defmodule Timex.Parse.Timezones.Posix do @moduledoc """ Parses POSIX-style timezones: ## Format POSIX-style timezones are of the format: `stdoffset[dst[offset][,start[/time],end[/time]]]` Where `std`/`dst` are dates in one of the following formats: The `Mm.n.d` format, where: - `Mm` (1-12) for 12 months - `n` (1-5) 1 for the first week and 5 for the last week in the month - `d` (0-6) 0 for Sunday and 6 for Saturday The `Jn` format, where `n` is the julian day and leap days are excluded. Or the `n` format, where `n` is the julian day, and leap days are included. Offsets are optional, except for the `std` offset, and can be preceded by a sign. The offset indicates the time added to the local time to obtain UTC time. The offsets may be hours; hours and minutes; and hours, minutes, and seconds - colon separated between components. NOTE: The sign of the offset is opposite the usual expectation, positive numbers are west of GMT, and negative numbers are east of GMT, this is because the offset is the time added to _local_ time to arrive at UTC, rather than the other way around. For more info, see: https://pubs.opengroup.org/onlinepubs/9699919799/ ## Example TZ = `CST6CDT,M3.2.0/2:00:00,M11.1.0/2:00:00` This would represents a change to daylight saving time at 2:00 AM on the second Sunday in March and change back at 2:00 AM on the first Sunday in November, and keep 6 hours time offset from GMT every year. The breakdown of the string is: - `CST6CDT` is the timezone name (constructed by concatenating the abbreviation and offset of std/dst) - `CST` is the standard abbreviation - `6` is the offset from `CST` to get `UTC` - `CDT` is the DST abbreviation - There is no offset from `CDT`, so the standard assumes the offset is one hour ahead of `CST`, or `5` - `,M3` is the third month - `.2` is second week of the month - `.0` is the day of the week (Sunday in this case) - `/2:00:00` is the time at which `CST` changes to `CDT`; defaults to `2:00:00` if not specified - `,M11` is the eleventh month - `.1` is the first week of the month - `.0` is the day of the week - `/2:00:00` is the time at which `CDT` changes back to `CST`; defaults to `2:00:00` if not specified """ alias Timex.PosixTimezone, as: TZ defguardp is_digit(c) when c >= ?0 and c <= ?9 defguardp is_alphabetic(c) when (c >= ?A and c <= ?Z) or (c >= ?a and c <= ?z) defmacrop char() do quote do: size(1) - unit(8) - integer end def parse(s) when is_binary(s) do with {:ok, format_str, rest} <- parse_newline_terminated_str(s), {:ok, tz, format_rest} <- parse_tz(format_str) do {:ok, finalize(tz), format_rest <> rest} end end defp finalize(%TZ{std_abbr: std, std_offset: soffs, dst_abbr: dst, dst_offset: nil} = tz) when is_binary(dst) do # DST exists, but offset is unset, so the standard dictates that this means an hour ahead of standard %TZ{tz | name: "#{std}#{to_offset(soffs)}#{dst}", dst_offset: soffs + 3600} end defp finalize(%TZ{std_abbr: std, std_offset: soffs, dst_abbr: nil, dst_offset: nil} = tz) do # No DST, so set the abbreviation to STD and set the offset to the same %TZ{tz | name: "#{std}#{soffs}", dst_abbr: std, dst_offset: soffs} end defp finalize( %TZ{name: nil, std_abbr: std, std_offset: soffs, dst_abbr: dst, dst_offset: doffs} = tz ) do # Construct the full name for this zone if diff(soffs, doffs) == 3600 do # The DST offset is one hour ahead of the STD offset, so we can omit it %TZ{tz | name: "#{std}#{to_offset(soffs)}#{dst}"} else %TZ{tz | name: "#{std}#{to_offset(soffs)}#{dst}#{to_offset(doffs)}"} end end defp finalize(nil), do: nil defp diff(std, dst), do: std - dst defp to_offset(0), do: "0" defp to_offset(n) do n = n * -1 hours = div(n, 3600) minutes = div(rem(n, 3600), 60) seconds = rem(minutes, 60) cond do seconds == 0 and minutes == 0 -> "#{hours}" seconds == 0 -> "#{hours}:#{String.pad_leading(minutes, 2, "0")}" :else -> "#{hours}:#{String.pad_leading(minutes, 2, "0")}:#{String.pad_leading(seconds, 2, "0")}" end end defp parse_tz(""), do: {:ok, nil, ""} defp parse_tz(str), do: parse_tz(:std_abbr, str, %TZ{}) defp parse_tz(:std_abbr, str, rule) do with {:ok, abbr, rest} <- parse_abbrev(str) do parse_tz(:std_offset, rest, %TZ{rule | std_abbr: abbr}) end end defp parse_tz(:std_offset, str, rule) do with {:ok, offset, rest} <- parse_offset(str) do parse_tz(:dst_abbr, rest, %TZ{rule | std_offset: offset}) else {:error, nil, ""} -> {:error, :invalid_offset, ""} {:error, nil, rest} -> parse_tz(:dst_abbr, rest, rule) {:error, _, _} = err -> err end end # dst[offset][,...] defp parse_tz(:dst_abbr, str, rule) do with {:ok, abbr, rest} <- parse_abbrev(str), rule = %TZ{rule | dst_abbr: abbr} do # dst_offset is optional, and may or may not be followed by a comma and start/end rule # if the offset is not present. case rest do <<>> -> {:ok, rule, ""} <> -> parse_tz(:rule_period, rest, rule) _ -> parse_tz(:dst_offset, rest, rule) end end end # offset[,...] defp parse_tz(:dst_offset, str, rule) do with {:ok, offset, rest} <- parse_offset(str), rule = %TZ{rule | dst_offset: offset} do case rest do <<>> -> {:ok, rule, ""} <> -> parse_tz(:rule_period, rest, rule) _ -> {:error, :invalid_tz_rule_format} end else {:error, nil, ""} -> {:ok, rule, ""} {:error, nil, <>} -> parse_tz(:rule_period, rest, rule) {:error, _, _} = err -> err end end defp parse_tz(:rule_period, str, rule) do case String.split(str, ",", parts: 2, trim: false) do [start_dt, end_dt] -> with {:ok, dst_start, _} <- parse_posixtz_datetime(start_dt), {:ok, dst_end, rest} <- parse_posixtz_datetime(end_dt) do {:ok, %TZ{rule | dst_start: dst_start, dst_end: dst_end}, rest} else {:ok, _, rest} -> {:error, :expected_comma, rest} {:error, _, _} = err -> err end _ -> {:error, :expected_datetime_range, str} end end defp parse_posixtz_datetime(str) do result = case str do <> -> parse_month_week_day(rest) <> -> parse_julian_day(rest, allow_leap_days: false) _ -> parse_julian_day(str, allow_leap_days: true) end with {:ok, date, rest} <- result do case rest do <> -> with {:ok, time, rest} <- parse_time(rest) do {:ok, {date, time}, rest} end _ -> {:ok, {date, Timex.Time.new!(2, 0, 0, 0)}, rest} end end end defp parse_month_week_day(str) do case String.split(str, ".", parts: 3, trim: false) do [m, n, rest] -> case Integer.parse(rest) do {d, rest} -> with {:ok, date} <- parse_month_week_day(m, n, d) do {:ok, date, rest} else {:error, reason} -> {:error, reason, str} end :error -> {:error, :expected_day_number, str} end _ -> {:error, :invalid_month_week_day, str} end end defp parse_month_week_day(m, n, d) do with {:ok, m} <- to_integer(m), {:ok, n} <- to_integer(n), {:ok, d} <- to_integer(d) do cond do m < 1 or m > 12 -> {:error, :invalid_month} n < 1 or n > 5 -> {:error, :invalid_week_of_month} d < 0 or d > 6 -> {:error, :invalid_week_day} :else -> {:ok, {:mwd, {m, n, d}}} end else :error -> {:error, :invalid_number} end end defp parse_julian_day(str, opts) do with {:ok, day, rest} <- parse_integer_unsigned(str) do allow_leaps? = Keyword.get(opts, :allow_leap_days, true) cond do # Day of year including Feb 29 allow_leaps? and day >= 0 and day <= 365 -> {:ok, {:julian_leap, day}, rest} allow_leaps? -> {:error, {:invalid_julian_day, day}, str} # Day of year without Feb 29, i.e. day 59 is Feb 28, and day 60 is Mar 1 day >= 1 and day <= 365 -> {:ok, {:julian, day}, rest} :else -> {:error, {:invalid_julian_day, day}, str} end end end defp parse_abbrev(<>), do: parse_quoted_abbrev(rest) defp parse_abbrev(str), do: parse_unquoted_abbrev(str) defp parse_quoted_abbrev(str, acc \\ "") defp parse_quoted_abbrev(<, rest::binary>>, acc) when byte_size(acc) < 3, do: {:error, {:invalid_quoted_abbreviation, acc}, rest} defp parse_quoted_abbrev(<, rest::binary>>, acc), do: {:ok, acc, rest} defp parse_quoted_abbrev(<>, acc), do: parse_quoted_abbrev(rest, acc <> <>) defp parse_quoted_abbrev(<<>>, acc), do: {:error, :unclosed_quoted_abbreviation, acc} defp parse_unquoted_abbrev(str, acc \\ "") defp parse_unquoted_abbrev(<>, acc) when is_alphabetic(c), do: parse_unquoted_abbrev(rest, acc <> <>) defp parse_unquoted_abbrev(rest, acc) when byte_size(acc) < 3, do: {:error, {:invalid_unquoted_abbreviation, acc}, rest} defp parse_unquoted_abbrev(rest, acc), do: {:ok, acc, rest} defp parse_offset(<>) when sign in [?+, ?-], do: parse_offset(rest, sign) defp parse_offset(str) when is_binary(str), do: parse_offset(str, ?+) defp parse_offset(str, sign) do sign = if sign == ?+, do: -1, else: 1 with {:ok, time, rest} <- parse_time(str), {seconds, _} <- Timex.Time.to_seconds_after_midnight(time) do {:ok, sign * seconds, rest} end end defp parse_time(str) do case parse_integer_unsigned(str) do {:ok, hh, <>} when hh >= 0 and hh <= 24 -> case parse_integer_unsigned(rest) do {:ok, mm, <>} when mm >= 0 and mm < 60 -> case parse_integer_unsigned(rest) do {:ok, ss, rest} when ss >= 0 and ss < 60 -> {:ok, Timex.Time.new!(hh, mm, ss), rest} _ -> {:ok, Timex.Time.new!(hh, mm, 0), rest} end {:ok, mm, rest} when mm >= 0 and mm < 60 -> {:ok, Timex.Time.new!(hh, mm, 0), rest} _ -> {:ok, Timex.Time.new!(hh, 0, 0), rest} end {:ok, hh, rest} when hh >= 0 and hh <= 24 -> {:ok, Timex.Time.new!(hh, 0, 0), rest} {:ok, _, _} -> {:error, :invalid_hour, str} {:error, :invalid_number, str} -> {:error, nil, str} end end defp to_integer(n) when is_integer(n) and n >= 0, do: {:ok, n} defp to_integer(s) when is_binary(s) do with {:ok, n, _} <- parse_integer_signed(s) do {:ok, n} end end defp parse_integer_signed(str) do case Integer.parse(str) do {value, rest} -> {:ok, value, rest} _ -> {:error, :invalid_number, str} end end defp parse_integer_unsigned(str, acc \\ "") defp parse_integer_unsigned(<>, acc) when is_digit(c) do parse_integer_unsigned(rest, acc <> <>) end defp parse_integer_unsigned(rest, acc) when byte_size(acc) > 0 do {:ok, String.to_integer(acc), rest} end defp parse_integer_unsigned(rest, _) do {:error, :invalid_number, rest} end defp parse_newline_terminated_str(bin), do: parse_newline_terminated_str(bin, <<>>) defp parse_newline_terminated_str(<<>>, acc), do: {:ok, acc, ""} defp parse_newline_terminated_str(<>, acc), do: {:ok, acc, rest} defp parse_newline_terminated_str(<>, acc) do parse_newline_terminated_str(rest, acc <> <>) end end