defmodule Phoenix.LiveView.Tokenizer do
@moduledoc false
@space_chars ~c"\s\t\f"
@quote_chars ~c"\"'"
@stop_chars ~c">/=\r\n" ++ @quote_chars ++ @space_chars
defmodule ParseError do
@moduledoc false
defexception [:file, :line, :column, :description]
@impl true
def message(exception) do
location =
exception.file
|> Path.relative_to_cwd()
|> Exception.format_file_line_column(exception.line, exception.column)
"#{location} #{exception.description}"
end
def code_snippet(source, meta, indentation \\ 0) do
line_start = max(meta.line - 3, 1)
line_end = meta.line
digits = line_end |> Integer.to_string() |> byte_size()
number_padding = String.duplicate(" ", digits)
indentation = String.duplicate(" ", indentation)
source
|> String.split(["\r\n", "\n"])
|> Enum.slice((line_start - 1)..(line_end - 1))
|> Enum.map_reduce(line_start, fn
expr, line_number when line_number == line_end ->
arrow = String.duplicate(" ", meta.column - 1) <> "^"
acc = "#{line_number} | #{indentation}#{expr}\n #{number_padding}| #{arrow}"
{acc, line_number + 1}
expr, line_number ->
line_number_padding = String.pad_leading("#{line_number}", digits)
{"#{line_number_padding} | #{indentation}#{expr}", line_number + 1}
end)
|> case do
{[], _} ->
""
{snippet, _} ->
Enum.join(["\n #{number_padding}|" | snippet], "\n")
end
end
end
def finalize(_tokens, file, {:comment, line, column}, source) do
message = "expected closing `-->` for comment"
meta = %{line: line, column: column}
raise_syntax_error!(message, meta, %{source: source, file: file, indentation: 0})
end
def finalize(tokens, _file, _cont, _source) do
tokens
|> strip_text_token_fully()
|> Enum.reverse()
|> strip_text_token_fully()
end
@doc """
Initiate the Tokenizer state.
### Params
* `indentation` - An integer that indicates the current indentation.
* `file` - Can be either a file or a string "nofile".
* `source` - The contents of the file as binary used to be tokenized.
* `tag_handler` - Tag handler to classify the tags. See `Phoenix.LiveView.TagEngine`
behaviour.
"""
def init(indentation, file, source, tag_handler) do
%{
file: file,
column_offset: indentation + 1,
braces: :enabled,
context: [],
source: source,
indentation: indentation,
tag_handler: tag_handler
}
end
@doc """
Tokenize the given text according to the given params.
### Params
* `text` - The content to be tokenized.
* `meta` - A keyword list with `:line` and `:column`. Both must be integers.
* `tokens` - A list of tokens.
* `cont` - An atom that is `:text`, `:style`, or `:script`, or a tuple
{:comment, line, column}.
* `state` - The tokenizer state that must be initiated by `Tokenizer.init/4`
### Examples
iex> alias Phoenix.LiveView.Tokenizer
iex> state =
Tokenizer.init(indent, file, [text: ""], HTMLEngine)
iex> Tokenizer.tokenize(state)
{[
{:close, :tag, "section", %{column: 16, line: 1}},
{:tag, "div", [], %{column: 10, line: 1, closing: :self}},
{:tag, "section", [], %{column: 1, line: 1}}
], {:text, :enabled}}
"""
def tokenize(text, meta, tokens, cont, state) do
line = Keyword.get(meta, :line, 1)
column = Keyword.get(meta, :column, 1)
case cont do
{:text, braces} -> handle_text(text, line, column, [], tokens, %{state | braces: braces})
:style -> handle_style(text, line, column, [], tokens, state)
:script -> handle_script(text, line, column, [], tokens, state)
{:comment, _, _} -> handle_comment(text, line, column, [], tokens, state)
end
end
## handle_text
defp handle_text("\r\n" <> rest, line, _column, buffer, acc, state) do
handle_text(rest, line + 1, state.column_offset, ["\r\n" | buffer], acc, state)
end
defp handle_text("\n" <> rest, line, _column, buffer, acc, state) do
handle_text(rest, line + 1, state.column_offset, ["\n" | buffer], acc, state)
end
defp handle_text(" rest, line, column, buffer, acc, state) do
handle_doctype(rest, line, column + 9, [" rest, line, column, buffer, acc, state) do
handle_doctype(rest, line, column + 9, [" rest, line, column, buffer, acc, state) do
state = update_in(state.context, &[:comment_start | &1])
handle_comment(rest, line, column + 4, ["" <> rest, line, column, buffer, _state) do
{:text, rest, line, column + 3, ["-->" | buffer]}
end
defp handle_comment(<>, line, column, buffer, state) do
handle_comment(rest, line, column + 1, [char_or_bin(c) | buffer], state)
end
defp handle_comment(<<>>, line, column, buffer, _state) do
{:ok, line, column, buffer}
end
## handle_tag_open
defp handle_tag_open(text, line, column, acc, state) do
case handle_tag_name(text, column, []) do
{:ok, name, new_column, rest} ->
meta = %{line: line, column: column - 1, inner_location: nil, tag_name: name}
case state.tag_handler.classify_type(name) do
{:error, message} ->
raise_syntax_error!(message, %{line: line, column: column}, state)
{type, name} ->
acc = [{type, name, [], meta} | acc]
handle_maybe_tag_open_end(rest, line, new_column, acc, state)
end
:error ->
message =
"expected tag name after <. If you meant to use < as part of a text, use < instead"
meta = %{line: line, column: column}
raise_syntax_error!(message, meta, state)
end
end
## handle_tag_close
defp handle_tag_close(text, line, column, acc, state) do
case handle_tag_name(text, column, []) do
{:ok, name, new_column, ">" <> rest} ->
meta = %{
line: line,
column: column - 2,
inner_location: {line, column - 2},
tag_name: name
}
case state.tag_handler.classify_type(name) do
{:error, message} ->
raise_syntax_error!(message, meta, state)
{type, name} ->
acc = [{:close, type, name, meta} | acc]
handle_text(rest, line, new_column + 1, [], acc, pop_braces(state))
end
{:ok, _, new_column, _} ->
message = "expected closing `>`"
meta = %{line: line, column: new_column}
raise_syntax_error!(message, meta, state)
:error ->
message = "expected tag name after "
meta = %{line: line, column: column}
raise_syntax_error!(message, meta, state)
end
end
## handle_tag_name
defp handle_tag_name(<> = text, column, buffer)
when c in @stop_chars do
done_tag_name(text, column, buffer)
end
defp handle_tag_name(<>, column, buffer) do
handle_tag_name(rest, column + 1, [char_or_bin(c) | buffer])
end
defp handle_tag_name(<<>>, column, buffer) do
done_tag_name(<<>>, column, buffer)
end
defp done_tag_name(_text, _column, []) do
:error
end
defp done_tag_name(text, column, buffer) do
{:ok, buffer_to_string(buffer), column, text}
end
## handle_maybe_tag_open_end
defp handle_maybe_tag_open_end("\r\n" <> rest, line, _column, acc, state) do
handle_maybe_tag_open_end(rest, line + 1, state.column_offset, acc, state)
end
defp handle_maybe_tag_open_end("\n" <> rest, line, _column, acc, state) do
handle_maybe_tag_open_end(rest, line + 1, state.column_offset, acc, state)
end
defp handle_maybe_tag_open_end(<>, line, column, acc, state)
when c in @space_chars do
handle_maybe_tag_open_end(rest, line, column + 1, acc, state)
end
defp handle_maybe_tag_open_end("/>" <> rest, line, column, acc, state) do
acc = normalize_tag(acc, line, column + 2, true, state)
handle_text(rest, line, column + 2, [], acc, state)
end
defp handle_maybe_tag_open_end(">" <> rest, line, column, acc, state) do
case normalize_tag(acc, line, column + 1, false, state) do
[{:tag, "script", _, _} | _] = acc ->
handle_script(rest, line, column + 1, [], acc, state)
[{:tag, "style", _, _} | _] = acc ->
handle_style(rest, line, column + 1, [], acc, state)
acc ->
handle_text(rest, line, column + 1, [], acc, push_braces(state))
end
end
defp handle_maybe_tag_open_end("{" <> rest, line, column, acc, state) do
handle_root_attribute(rest, line, column + 1, acc, state)
end
defp handle_maybe_tag_open_end(<<>>, line, column, _acc, state) do
message = ~S"""
expected closing `>` or `/>`
Make sure the tag is properly closed. This may happen if there
is an EEx interpolation inside a tag, which is not supported.
For instance, instead of
Content
do
Content
If @id is nil or false, then no attribute is sent at all.
Inside {...} you can place any Elixir expression. If you want
to interpolate in the middle of an attribute value, instead of
Text
you can pass an Elixir string with interpolation:
Text
"""
raise_syntax_error!(message, %{line: line, column: column}, state)
end
defp handle_maybe_tag_open_end(text, line, column, acc, state) do
handle_attribute(text, line, column, acc, state)
end
## handle_attribute
defp handle_attribute(text, line, column, acc, state) do
case handle_attr_name(text, column, []) do
{:ok, name, new_column, rest} ->
attr_meta = %{line: line, column: column}
{text, line, column, value} = handle_maybe_attr_value(rest, line, new_column, state)
acc = put_attr(acc, name, attr_meta, value)
state =
if name == "phx-no-curly-interpolation" and state.braces == :enabled and
not script_or_style?(acc) do
%{state | braces: 0}
else
state
end
handle_maybe_tag_open_end(text, line, column, acc, state)
{:error, message, column} ->
meta = %{line: line, column: column}
raise_syntax_error!(message, meta, state)
end
end
defp script_or_style?([{:tag, name, _, _} | _]) when name in ~w(script style), do: true
defp script_or_style?(_), do: false
## handle_root_attribute
defp handle_root_attribute(text, line, column, acc, state) do
case handle_interpolation(text, line, column, [], 0, state) do
{:ok, value, new_line, new_column, rest} ->
meta = %{line: line, column: column}
acc = put_attr(acc, :root, meta, {:expr, value, meta})
handle_maybe_tag_open_end(rest, new_line, new_column, acc, state)
{:error, message} ->
# We do column - 1 to point to the opening {
meta = %{line: line, column: column - 1}
raise_syntax_error!(message, meta, state)
end
end
## handle_attr_name
defp handle_attr_name(<>, column, _buffer)
when c in @quote_chars do
{:error, "invalid character in attribute name: #{<>}", column}
end
defp handle_attr_name(<>, column, [])
when c in @stop_chars do
{:error, "expected attribute name", column}
end
defp handle_attr_name(<> = text, column, buffer)
when c in @stop_chars do
{:ok, buffer_to_string(buffer), column, text}
end
defp handle_attr_name(<>, column, buffer) do
handle_attr_name(rest, column + 1, [char_or_bin(c) | buffer])
end
defp handle_attr_name(<<>>, column, _buffer) do
{:error, "unexpected end of string inside tag", column}
end
## handle_maybe_attr_value
defp handle_maybe_attr_value("\r\n" <> rest, line, _column, state) do
handle_maybe_attr_value(rest, line + 1, state.column_offset, state)
end
defp handle_maybe_attr_value("\n" <> rest, line, _column, state) do
handle_maybe_attr_value(rest, line + 1, state.column_offset, state)
end
defp handle_maybe_attr_value(<>, line, column, state)
when c in @space_chars do
handle_maybe_attr_value(rest, line, column + 1, state)
end
defp handle_maybe_attr_value("=" <> rest, line, column, state) do
handle_attr_value_begin(rest, line, column + 1, state)
end
defp handle_maybe_attr_value(text, line, column, _state) do
{text, line, column, nil}
end
## handle_attr_value_begin
defp handle_attr_value_begin("\r\n" <> rest, line, _column, state) do
handle_attr_value_begin(rest, line + 1, state.column_offset, state)
end
defp handle_attr_value_begin("\n" <> rest, line, _column, state) do
handle_attr_value_begin(rest, line + 1, state.column_offset, state)
end
defp handle_attr_value_begin(<>, line, column, state)
when c in @space_chars do
handle_attr_value_begin(rest, line, column + 1, state)
end
defp handle_attr_value_begin("\"" <> rest, line, column, state) do
handle_attr_value_quote(rest, ?", line, column + 1, [], state)
end
defp handle_attr_value_begin("'" <> rest, line, column, state) do
handle_attr_value_quote(rest, ?', line, column + 1, [], state)
end
defp handle_attr_value_begin("{" <> rest, line, column, state) do
handle_attr_value_as_expr(rest, line, column + 1, state)
end
defp handle_attr_value_begin(_text, line, column, state) do
message =
"invalid attribute value after `=`. Expected either a value between quotes " <>
"(such as \"value\" or \'value\') or an Elixir expression between curly braces (such as `{expr}`)"
meta = %{line: line, column: column}
raise_syntax_error!(message, meta, state)
end
## handle_attr_value_quote
defp handle_attr_value_quote("\r\n" <> rest, delim, line, _column, buffer, state) do
column = state.column_offset
handle_attr_value_quote(rest, delim, line + 1, column, ["\r\n" | buffer], state)
end
defp handle_attr_value_quote("\n" <> rest, delim, line, _column, buffer, state) do
column = state.column_offset
handle_attr_value_quote(rest, delim, line + 1, column, ["\n" | buffer], state)
end
defp handle_attr_value_quote(<>, delim, line, column, buffer, _state) do
value = buffer_to_string(buffer)
{rest, line, column + 1, {:string, value, %{delimiter: delim}}}
end
defp handle_attr_value_quote(<>, delim, line, column, buffer, state) do
handle_attr_value_quote(rest, delim, line, column + 1, [char_or_bin(c) | buffer], state)
end
defp handle_attr_value_quote(<<>>, delim, line, column, _buffer, state) do
message = """
expected closing `#{<>}` for attribute value
Make sure the attribute is properly closed. This may also happen if
there is an EEx interpolation inside a tag, which is not supported.
Instead of
>
do
Where @some_attributes must be a keyword list or a map.
"""
meta = %{line: line, column: column}
raise_syntax_error!(message, meta, state)
end
## handle_attr_value_as_expr
defp handle_attr_value_as_expr(text, line, column, state) do
case handle_interpolation(text, line, column, [], 0, state) do
{:ok, value, new_line, new_column, rest} ->
{rest, new_line, new_column, {:expr, value, %{line: line, column: column}}}
{:error, message} ->
# We do column - 1 to point to the opening {
meta = %{line: line, column: column - 1}
raise_syntax_error!(message, meta, state)
end
end
## handle_interpolation
defp handle_interpolation("\r\n" <> rest, line, _column, buffer, braces, state) do
handle_interpolation(rest, line + 1, state.column_offset, ["\r\n" | buffer], braces, state)
end
defp handle_interpolation("\n" <> rest, line, _column, buffer, braces, state) do
handle_interpolation(rest, line + 1, state.column_offset, ["\n" | buffer], braces, state)
end
defp handle_interpolation("}" <> rest, line, column, buffer, 0, _state) do
value = buffer_to_string(buffer)
{:ok, value, line, column + 1, rest}
end
defp handle_interpolation(~S(\}) <> rest, line, column, buffer, braces, state) do
handle_interpolation(rest, line, column + 2, [~S(\}) | buffer], braces, state)
end
defp handle_interpolation(~S(\{) <> rest, line, column, buffer, braces, state) do
handle_interpolation(rest, line, column + 2, [~S(\{) | buffer], braces, state)
end
defp handle_interpolation("}" <> rest, line, column, buffer, braces, state) do
handle_interpolation(rest, line, column + 1, ["}" | buffer], braces - 1, state)
end
defp handle_interpolation("{" <> rest, line, column, buffer, braces, state) do
handle_interpolation(rest, line, column + 1, ["{" | buffer], braces + 1, state)
end
defp handle_interpolation(<>, line, column, buffer, braces, state) do
handle_interpolation(rest, line, column + 1, [char_or_bin(c) | buffer], braces, state)
end
defp handle_interpolation(<<>>, _line, _column, _buffer, _braces, _state) do
{:error,
"""
expected closing `}` for expression
In case you don't want `{` to begin a new interpolation, \
you may write it using `{` or using `<%= "{" %>`\
"""}
end
## helpers
@compile {:inline, ok: 2, char_or_bin: 1}
defp ok(acc, cont), do: {acc, cont}
defp char_or_bin(c) when c <= 127, do: c
defp char_or_bin(c), do: <>
defp buffer_to_string(buffer) do
IO.iodata_to_binary(Enum.reverse(buffer))
end
defp text_to_acc(buffer, acc, line, column, context)
defp text_to_acc([], acc, _line, _column, _context),
do: acc
defp text_to_acc(buffer, acc, line, column, context) do
meta = %{line_end: line, column_end: column}
meta =
if context == [] do
meta
else
Map.put(meta, :context, trim_context(context))
end
[{:text, buffer_to_string(buffer), meta} | acc]
end
defp trim_context([:comment_end, :comment_start | [_ | _] = rest]), do: trim_context(rest)
defp trim_context(rest), do: Enum.reverse(rest)
defp push_braces(%{braces: :enabled} = state), do: state
defp push_braces(%{braces: braces} = state), do: %{state | braces: braces + 1}
defp pop_braces(%{braces: :enabled} = state), do: state
defp pop_braces(%{braces: 1} = state), do: %{state | braces: :enabled}
defp pop_braces(%{braces: braces} = state), do: %{state | braces: braces - 1}
defp put_attr([{type, name, attrs, meta} | acc], attr, attr_meta, value) do
attrs = [{attr, value, attr_meta} | attrs]
[{type, name, attrs, meta} | acc]
end
defp normalize_tag([{type, name, attrs, meta} | acc], line, column, self_close?, state) do
attrs = Enum.reverse(attrs)
meta = %{meta | inner_location: {line, column}}
meta =
cond do
type == :tag and state.tag_handler.void?(name) -> Map.put(meta, :closing, :void)
self_close? -> Map.put(meta, :closing, :self)
true -> meta
end
[{type, name, attrs, meta} | acc]
end
defp strip_text_token_fully(tokens) do
with [{:text, text, _} | rest] <- tokens,
"" <- String.trim_leading(text) do
strip_text_token_fully(rest)
else
_ -> tokens
end
end
defp raise_syntax_error!(message, meta, state) do
raise ParseError,
file: state.file,
line: meta.line,
column: meta.column,
description: message <> ParseError.code_snippet(state.source, meta, state.indentation)
end
end