defmodule Timex.Parse.ZoneInfo.Parser do @moduledoc """ This module is responsible for parsing binary zoneinfo files, such as those found in /usr/local/zoneinfo. """ # See https://tools.ietf.org/id/draft-murchison-tzdist-tzif-00.html for details defmodule Zone do @moduledoc """ Represents the data retrieved from a binary tzfile. """ # Maximum version encountered defstruct version: nil, # Transition times transitions: [], # Leap second adjustments leaps: [], # POSIX-TZ rule that describes the zone for future dates rule: nil end defmodule Header do @moduledoc false # Six big-endian 4-8 byte integers # count of UTC/local indicators defstruct utc_count: 0, # count of standard/wall indicators wall_count: 0, # number of leap seconds leap_count: 0, # number of transition times transition_count: 0, # number of local time types (never zero) type_count: 0, # total number of characters of the zone abbreviations string abbrev_length: 0 end defmodule TransitionInfo do @moduledoc false # total ISO 8601 offset (std + dst) defstruct gmt_offset: 0, # The time at which this transition starts starts_at: 0, # Is this transition in daylight savings time is_dst?: false, # The lookup index of the abbreviation abbrev_index: 0, # The zone abbreviation abbreviation: "N/A", # Whether transitions are standard or wall is_std?: true, # Whether transitions are UTC or local is_utc?: false end defmodule LeapSecond do @moduledoc false # The time at which this leap second occurs defstruct epoch: 0, # The number of leap seconds to be applied to UTC on/after epoch correction: 0 end defmodule Rule do @moduledoc false defstruct std_abbr: nil, std_offset: 0, dst_abbr: nil, dst_offset: 0, start_time: nil, end_time: nil end defguardp is_digit(c) when c >= ?0 and c <= ?9 defguardp is_alphabetic(c) when (c >= ?A and c <= ?Z) or (c >= ?a and c <= ?z) ############## # Macros defining common bitstring modifier combinations in zoneinfo files defmacrop char() do quote do: size(1) - unit(8) - integer end defmacrop bytes(size) do quote do: binary - size(unquote(size)) - unit(8) end defmacrop integer_32bit_be do quote do: big - size(4) - unit(8) - integer end defmacrop integer_64bit_be do quote do: big - size(8) - unit(8) - integer end defmacrop signed_char_be do quote do: big - size(1) - unit(8) - signed - integer end defmacrop unsigned_char_be do quote do: big - size(1) - unit(8) - unsigned - integer end @doc """ Parses a binary representing a valid zoneinfo file. Parses the timezone information inside, and returns it as a Zone struct. """ @spec parse(binary) :: {:ok, Zone.t()} | {:error, binary} def parse(<>) do version = case version do <<0>> -> 1 <> -> 2 <> -> 3 byte -> {:error, {:invalid_tzfile_version, byte}, rest} end with v when is_integer(v) <- version, {:ok, zoneinfo, _} <- parse_versioned_content(v, rest) do {:ok, zoneinfo} else {:error, reason, _} -> {:error, reason} end end def parse(_) do {:error, :invalid_zoneinfo_content} end @doc """ Like `parse/1`, but expects a file path to parse. """ def parse_file(path) when is_binary(path) do if path |> File.exists?() do path |> File.read!() |> parse() else {:error, "No zoneinfo file at #{path}"} end end # Parses the content of a tzinfo file based on the version format defp parse_versioned_content(version, data) defp parse_versioned_content(1, data) do with {:ok, zone, rest} <- parse_content(1, data, %Zone{version: 1}) do transitions = Enum.sort_by(zone.transitions, fn tx -> tx.starts_at end) leaps = Enum.sort_by(zone.leaps, fn leap -> leap.epoch end) {:ok, %Zone{zone | transitions: transitions, leaps: leaps}, rest} end end defp parse_versioned_content(version, data) do expected_version = case version do 1 -> <<0>> 2 -> <> 3 -> <> end with {:ok, zone1, rest} <- parse_content(1, data, %Zone{version: 1}), {:header, <>} <- {:header, rest}, {:ok, zone2, rest} <- parse_content(version, rest, %Zone{version: version}) do # Append the second set of zone info to the first set transitions = zone1.transitions |> Enum.concat(zone2.transitions) |> Enum.sort_by(fn tx -> tx.starts_at end) leaps = zone1.leaps |> Enum.concat(zone2.leaps) |> Enum.sort_by(fn leap -> leap.epoch end) zone = %Zone{ version: zone2.version, transitions: transitions, leaps: leaps, rule: zone2.rule } {:ok, zone, rest} else {:header, bytes} -> {:error, {:invalid_version_header, version}, bytes} end end # Parsing the content of a tzinfo file starting with the header # # ## Header Format # # +---------------+---+ # | magic (4) | <-+-- version (1) # +---------------+---+---------------------------------------+ # | [unused - reserved for future use] (15) | # +---------------+---------------+---------------+-----------+ # | isutccnt (4) | isstdcnt (4) | leapcnt (4) | # +---------------+---------------+---------------+ # | timecnt (4) | typecnt (4) | charcnt (4) | # --- # # ## 32-bit Body Format # # | transition times (timecnt x 4) ... # +-----------------------------------------------+ # | transition time index (timecnt) ... # +-----------------------------------------------+ # | local time type records (typecnt x 6) ... # +-----------------------------------------------+ # | time zone designations (charcnt) ... # +-----------------------------------------------+ # | leap second records (leapcnt x 8) ... # +-----------------------------------------------+ # | standard/wall indicators (isstdcnt) ... # +-----------------------------------------------+ # | UTC/local indicators (isutccnt) ... # +-----------------------------------------------+ # # ## 64-bit Body Format # # | transition times (timecnt x 8) ... # +-----------------------------------------------+ # | transition time index (timecnt) ... # +-----------------------------------------------+ # | local time type records (typecnt x 6) ... # +-----------------------------------------------+ # | time zone designations (charcnt) ... # +-----------------------------------------------+ # | leap second records (leapcnt x 12) ... # +-----------------------------------------------+ # | standard/wall indicators (isstdcnt) ... # +-----------------------------------------------+ # | UTC/local indicators (isutccnt) ... # +---+---------------------------------------+---+ # | NL| POSIX TZ string (0...) |NL | # +---+---------------------------------------+---+ defp parse_content(version, <>, zone) do {utc_count, header_raw} = parse_i32(header_raw) {wall_count, header_raw} = parse_i32(header_raw) {leap_count, header_raw} = parse_i32(header_raw) {tx_count, header_raw} = parse_i32(header_raw) {type_count, header_raw} = parse_i32(header_raw) {abbrev_length, _} = parse_i32(header_raw) header = %Header{ utc_count: utc_count, wall_count: wall_count, leap_count: leap_count, transition_count: tx_count, type_count: type_count, abbrev_length: abbrev_length } parse_transition_times(version, rest, header, zone) end # Parse the number of transition times in this zone defp parse_transition_times(version, data, %Header{transition_count: tx_count} = header, zone) do {times, rest} = parse_array(data, tx_count, &parse_int(version, &1)) parse_transition_info(version, rest, header, %Zone{zone | transitions: times}) end # Parse transition time info for this zone defp parse_transition_info( version, data, %Header{transition_count: tx_count, type_count: type_count} = header, %Zone{transitions: transitions} = zone ) do {indices, rest} = parse_array(data, tx_count, &parse_uchar/1) {txinfos, rest} = parse_array(rest, type_count, fn data -> {gmt_offset, next} = parse_i32(data) {is_dst, next} = parse_char(next) {abbrev_index, next} = parse_uchar(next) info = %TransitionInfo{ gmt_offset: gmt_offset, is_dst?: is_dst == 1, abbrev_index: abbrev_index } {info, next} end) txs = indices |> Enum.map(&Enum.at(txinfos, &1)) |> Enum.zip(transitions) |> Enum.map(fn {info, time} -> Map.put(info, :starts_at, time) end) parse_abbreviations(version, rest, header, %Zone{zone | transitions: txs}) end # Parses zone abbreviations for this zone defp parse_abbreviations( version, data, %Header{abbrev_length: len} = header, %Zone{transitions: transitions} = zone ) do <> = data txinfos = Enum.map(transitions, fn %TransitionInfo{abbrev_index: idx} = tx -> {:ok, abbrev, _} = parse_null_terminated_str(:binary.part(abbrevs, idx, len - idx)) %{tx | :abbreviation => abbrev} end) parse_leap_seconds(version, rest, header, %Zone{zone | transitions: txinfos}) end # Parses leap second information for this zone defp parse_leap_seconds(version, data, %Header{leap_count: count} = header, zone) do {leaps, rest} = parse_array(data, count, fn data -> {epoch, next} = parse_int(version, data) {correction, next} = parse_i32(next) leap = %LeapSecond{ epoch: epoch, correction: correction } {leap, next} end) parse_flags(version, rest, header, %Zone{zone | leaps: leaps}) end # Parses the trailing flags in the zoneinfo binary defp parse_flags(version, data, %Header{utc_count: utc_count, wall_count: wall_count}, zone) do {is_std_indicators, rest} = parse_array(data, wall_count, &parse_char/1) {is_utc_indicators, rest} = parse_array(rest, utc_count, &parse_char/1) transitions = zone.transitions |> Enum.with_index() |> Enum.map(fn {tx, i} -> is_std? = Enum.at(is_std_indicators, i) == 1 is_utc? = Enum.at(is_utc_indicators, i) == 1 %{tx | :is_std? => is_std?, :is_utc? => is_utc?} end) if version > 1 do parse_posixtz_string(version, rest, %Zone{zone | transitions: transitions}) else {:ok, %Zone{zone | transitions: transitions}, rest} end end # stdoffset[dst[offset][,start[/time],end[/time]]] defp parse_posixtz_string(_version, <>, zone) do with {:ok, format_str, rest} <- parse_newline_terminated_str(rest), {:ok, rule, format_rest} <- parse_tz(format_str) do {:ok, %Zone{zone | rule: rule}, format_rest <> rest} end end defp parse_posixtz_string(_version, rest, _zone) do {:error, {:invalid_format, "expected newline to follow set of utc/local indicators"}, rest} end defp parse_tz(""), do: {:ok, nil, ""} defp parse_tz(str), do: parse_tz(:std_abbr, str, %Rule{}) defp parse_tz(:std_abbr, str, rule) do with {:ok, abbr, rest} <- parse_abbrev(str) do parse_tz(:std_offset, rest, %Rule{rule | std_abbr: abbr, dst_abbr: abbr}) end end defp parse_tz(:std_offset, str, rule) do with {:ok, offset, rest} <- parse_offset(str) do parse_tz(:dst_abbr, rest, %Rule{rule | std_offset: offset, dst_offset: offset}) else {:error, nil, ""} -> {:ok, rule, ""} {:error, nil, rest} -> parse_tz(:dst_abbr, rest, rule) {:error, _, _} = err -> err end end # dst[offset][,...] defp parse_tz(:dst_abbr, str, rule) do with {:ok, abbr, rest} <- parse_abbrev(str), rule = %Rule{rule | dst_abbr: abbr} do # dst_offset is optional, and may or may not be followed by a comma and start/end rule # if the offset is not present. case rest do <<>> -> {:ok, rule, ""} <> -> parse_tz(:rule_period, rest, rule) _ -> parse_tz(:dst_offset, rest, rule) end end end # offset[,...] defp parse_tz(:dst_offset, str, rule) do with {:ok, offset, rest} <- parse_offset(str), rule = %Rule{rule | dst_offset: offset} do case rest do <<>> -> {:ok, rule, ""} <> -> parse_tz(:rule_period, rest, rule) _ -> {:error, :invalid_tz_rule_format} end else {:error, nil, ""} -> {:ok, rule, ""} {:error, nil, <>} -> parse_tz(:rule_period, rest, rule) {:error, _, _} = err -> err end end defp parse_tz(:rule_period, str, rule) do case String.split(str, ",", parts: 2, trim: false) do [start_dt, end_dt] -> with {:ok, start_time, _} <- parse_posixtz_datetime(start_dt), {:ok, end_time, rest} <- parse_posixtz_datetime(end_dt) do {:ok, %Rule{rule | start_time: start_time, end_time: end_time}, rest} else {:ok, _, rest} -> {:error, :expected_comma, rest} {:error, _, _} = err -> err end _ -> {:error, :expected_datetime_range, str} end end defp parse_posixtz_datetime(str) do result = case str do <> -> parse_month_week_day(rest) <> -> parse_julian_day(rest, allow_leap_days: false) _ -> parse_julian_day(str, allow_leap_days: true) end with {:ok, date, rest} <- result do case rest do <> -> with {:ok, time, rest} <- parse_time(rest) do {:ok, {date, time}, rest} end _ -> {:ok, {date, Timex.Time.new!(2, 0, 0, 0)}, rest} end end end defp parse_month_week_day(str) do case String.split(str, ".", parts: 3, trim: false) do [m, n, rest] -> case Integer.parse(rest) do {d, rest} -> with {:ok, date} <- parse_month_week_day(m, n, d) do {:ok, date, rest} else {:error, reason} -> {:error, reason, str} end :error -> {:error, :expected_day_number, str} end _ -> {:error, :invalid_month_week_day, str} end end defp parse_month_week_day(m, n, d) do with {:ok, m} <- to_integer(m), {:ok, n} <- to_integer(n), {:ok, d} <- to_integer(d) do cond do m < 1 or m > 12 -> {:error, :invalid_month} n < 1 or n > 5 -> {:error, :invalid_week_of_month} d < 0 or d > 6 -> {:error, :invalid_week_day} :else -> {:ok, {:mwd, {m, n, d}}} end else :error -> {:error, :invalid_number} end end defp parse_julian_day(str, opts) do with {:ok, day, rest} <- parse_integer_unsigned(str) do allow_leaps? = Keyword.get(opts, :allow_leap_days, true) cond do # Day of year including Feb 29 allow_leaps? and day >= 0 and day <= 365 -> {:ok, {:julian, day, opts}, rest} allow_leaps? -> {:error, {:invalid_julian_day, day}, str} # Day of year without Feb 29, i.e. day 59 is Feb 28, and day 60 is Mar 1 day >= 1 and day <= 365 -> {:ok, {:julian, day, opts}, rest} :else -> {:error, {:invalid_julian_day, day}, str} end end end defp parse_abbrev(<>), do: parse_quoted_abbrev(rest) defp parse_abbrev(str), do: parse_unquoted_abbrev(str) defp parse_quoted_abbrev(str, acc \\ "") defp parse_quoted_abbrev(<, rest::binary>>, acc) when byte_size(acc) < 3, do: {:error, {:invalid_quoted_abbreviation, acc}, rest} defp parse_quoted_abbrev(<, rest::binary>>, acc), do: {:ok, acc, rest} defp parse_quoted_abbrev(<>, acc), do: parse_quoted_abbrev(rest, acc <> <>) defp parse_quoted_abbrev(<<>>, acc), do: {:error, :unclosed_quoted_abbreviation, acc} defp parse_unquoted_abbrev(str, acc \\ "") defp parse_unquoted_abbrev(<>, acc) when is_alphabetic(c), do: parse_unquoted_abbrev(rest, acc <> <>) defp parse_unquoted_abbrev(rest, acc) when byte_size(acc) < 3, do: {:error, {:invalid_unquoted_abbreviation, acc}, rest} defp parse_unquoted_abbrev(rest, acc), do: {:ok, acc, rest} defp parse_offset(<>) when sign in [?+, ?-], do: parse_offset(rest, sign) defp parse_offset(str) when is_binary(str), do: parse_offset(str, ?+) defp parse_offset(str, sign) do sign = if sign == ?+, do: 1, else: -1 with {:ok, time, rest} <- parse_time(str), {seconds, _} <- Timex.Time.to_seconds_after_midnight(time) do {:ok, sign * seconds, rest} end end defp parse_time(str) do case parse_integer_unsigned(str) do {:ok, hh, <>} when hh >= 0 and hh <= 24 -> case parse_integer_unsigned(rest) do {:ok, mm, <>} when mm >= 0 and mm < 60 -> case parse_integer_unsigned(rest) do {:ok, ss, rest} when ss >= 0 and ss < 60 -> {:ok, Timex.Time.new!(hh, mm, ss, 0), rest} _ -> {:ok, Timex.Time.new!(hh, mm, 0, 0), rest} end {:ok, mm, rest} when mm >= 0 and mm < 60 -> {:ok, Timex.Time.new!(hh, mm, 0, 0), rest} _ -> {:ok, Timex.Time.new!(hh, 0, 0, 0), rest} end {:ok, hh, rest} when hh >= 0 and hh <= 24 -> {:ok, Timex.Time.new!(hh, 0, 0, 0), rest} {:ok, _, _} -> {:error, :invalid_hour, str} {:error, _, _} = err -> err end end defp to_integer(n) when is_integer(n) and n >= 0, do: {:ok, n} defp to_integer(s) when is_binary(s) do with {:ok, n, _} <- parse_integer_signed(s) do {:ok, n} end end defp parse_integer_signed(str) do case Integer.parse(str) do {value, rest} -> {:ok, value, rest} _ -> {:error, :invalid_number, str} end end defp parse_integer_unsigned(str, acc \\ "") defp parse_integer_unsigned(<>, acc) when is_digit(c) do parse_integer_unsigned(rest, acc <> <>) end defp parse_integer_unsigned(rest, acc) when byte_size(acc) > 0 do {:ok, String.to_integer(acc), rest} end defp parse_integer_unsigned(rest, _) do {:error, :invalid_number, rest} end ################ # Parses an array of a primitive type, ex: # parse_array(<<"test">>, 2, &parse_uchar/1) => [?t, ?e] ### defp parse_array(data, 0, _parser), do: {[], data} defp parse_array(data, count, parser) when is_binary(data) and is_function(parser) do {results, rest} = do_parse_array(data, count, parser, []) {results, rest} end defp do_parse_array(data, 0, _, acc), do: {Enum.reverse(acc), data} defp do_parse_array(data, count, parser, acc) do {item, next} = parser.(data) do_parse_array(next, count - 1, parser, [item | acc]) end ################# # Data Type Parsers defp parse_int(1, bin), do: parse_i32(bin) defp parse_int(_, bin), do: parse_i64(bin) defp parse_i32(<>), do: {val, rest} defp parse_i64(<>), do: {val, rest} defp parse_char(<>), do: {val, rest} defp parse_uchar(<>), do: {val, rest} defp parse_null_terminated_str(bin), do: parse_null_terminated_str(bin, <<>>) defp parse_null_terminated_str(<<>>, acc), do: {:ok, acc, ""} defp parse_null_terminated_str(<<0, rest::binary>>, acc), do: {:ok, acc, rest} defp parse_null_terminated_str(<>, acc) do parse_null_terminated_str(rest, acc <> <>) end defp parse_newline_terminated_str(bin), do: parse_newline_terminated_str(bin, <<>>) defp parse_newline_terminated_str(<<>>, acc), do: {:ok, acc, ""} defp parse_newline_terminated_str(<>, acc), do: {:ok, acc, rest} defp parse_newline_terminated_str(<>, acc) do parse_newline_terminated_str(rest, acc <> <>) end end