Hey there, so here is my small project where I try to implement a simple Lexer and Parser for my programming language.
This is the syntax of it:
fu main() {
$$ This is a comment
let name = "Name";
display("Hello Nerd ${name}");
}
I now post all of my files and codes:
lexer.ex:
defmodule Lexer do
def find_inok_file() do
case File.ls!(".") do
files ->
Enum.find_value(files, fn file ->
Path.extname(file) == ".inok" && file
end)
end
end
def read_inok_file(file_path) do
case Path.extname(file_path) do
".inok" ->
case File.read(file_path) do
{:ok, file_contents} ->
{file_path, file_contents}
{:error, _} ->
{file_path, ""}
end
_ ->
{file_path, ""}
end
end
def tokenize() do
case find_inok_file() do
nil ->
{:error, "No .inok file found in directory"}
file_path ->
case read_inok_file(file_path) do
{_, ""} ->
{:error, "File is empty"}
{_, file_contents} ->
Regex.scan(
~r/let|fu|main|display|\{|\}|\(|\)|;|(?<=\$\$).*?(?=\$\$)/ms,
file_contents
)
|> Enum.map(fn [token_value] -> {determine_token_type(token_value), token_value} end)
end
end
end
def determine_token_type(token_value) do
case token_value do
"let" -> {:let, token_value}
"fu" -> {:function, token_value}
"main" -> {:main, token_value}
"display" -> {:display, token_value}
"{" -> {:open_brace, token_value}
"}" -> {:close_brace, token_value}
"(" -> {:open_paren, token_value}
")" -> {:close_paren, token_value}
";" -> {:semicolon, token_value}
"$$" -> {:comment, token_value}
_ -> {:unknown, token_value}
end
end
end
parser.ex
defmodule Parser do
def parse(tokens) do
IO.inspect(tokens, label: "parse tokens")
case tokens do
[] ->
{:error, "Unexpected end of input"}
[{:function, _} = function_token | rest_tokens] ->
parse_function(rest_tokens, function_token)
[{:let, _} = _let_token, {:unknown, variable_token}, expression_token, {:semicolon, _} | rest_tokens] ->
statements = parse_statements(rest_tokens, [[:let, variable_token, expression_token]])
{:ok, {:function, "main", [], Enum.reverse(statements)}}
[{:display, _} = _display_token, {:unknown, expression_token}, {:semicolon, _} | rest_tokens] ->
statements = parse_statements(rest_tokens, [[:display, expression_token]])
{:ok, {:function, "main", [], Enum.reverse(statements)}}
_ ->
{:error, "Unexpected token"}
end
end
defp parse_function(tokens, function_token) do
case tokens do
[] ->
{:error, "Unexpected end of input"}
[{:main, _} = main_token, {:open_paren, _}, {:close_paren, _}, {:open_brace, _} | rest_tokens] ->
statements = parse_statements(rest_tokens, [])
{:ok, {function_token, main_token, [], statements}}
_ ->
{:error, "Unexpected token"}
end
end
defp parse_statements(tokens, acc) do
IO.inspect(tokens, label: "parse_statements tokens")
case tokens do
[] ->
{:error, "Unexpected end of input"}
[{:close_brace, _} | rest_tokens] ->
{:ok, Enum.reverse(acc), rest_tokens}
[{:let, _}, {:unknown, variable_token}, {:equal, _}, expression_token, {:semicolon, _} | rest_tokens] ->
parse_statements(rest_tokens, [[:let, variable_token, expression_token]] ++ acc)
[{:display, _}, expression_token, {:semicolon, _} | rest_tokens] ->
parse_statements(rest_tokens, [[:display, expression_token]] ++ acc)
[{kind, token} | _] ->
{:error, "Unexpected token: #{inspect(kind)} '#{inspect(token)}'"}
end
end
end
Here is my output (debugging purposes):
iex(61)> tokens = Lexer.tokenize
[
{{:function, "fu"}, "fu"},
{{:main, "main"}, "main"},
{{:open_paren, "("}, "("},
{{:close_paren, ")"}, ")"},
{{:open_brace, "{"}, "{"},
{{:let, "let"}, "let"},
{{:semicolon, ";"}, ";"},
{{:display, "display"}, "display"},
{{:open_paren, "("}, "("},
{{:open_brace, "{"}, "{"},
{{:close_brace, "}"}, "}"},
{{:close_paren, ")"}, ")"},
{{:semicolon, ";"}, ";"},
{{:close_brace, "}"}, "}"}
]
iex(62)> Parser.parse(tokens)
parse tokens: [
{{:function, "fu"}, "fu"},
{{:main, "main"}, "main"},
{{:open_paren, "("}, "("},
{{:close_paren, ")"}, ")"},
{{:open_brace, "{"}, "{"},
{{:let, "let"}, "let"},
{{:semicolon, ";"}, ";"},
{{:display, "display"}, "display"},
{{:open_paren, "("}, "("},
{{:open_brace, "{"}, "{"},
{{:close_brace, "}"}, "}"},
{{:close_paren, ")"}, ")"},
{{:semicolon, ";"}, ";"},
{{:close_brace, "}"}, "}"}
]
{:error, "Unexpected token"}
I literally don’t know why it doesn’t match | throws an error saying that there is an unexpected token
I also created a main file to produce an error message that might help, but for me it didn’t (yet)
main.ex
defmodule Main do
def run() do
case Lexer.tokenize() do
{:ok, tokens} ->
case Parser.parse(tokens) do
{:ok, function} ->
execute_function(function)
{:error, message} ->
IO.puts("Error: #{message}")
end
{:error, message} ->
IO.puts("Error: #{message}")
end
end
def execute_function({_, :main, _, statements}) do
Enum.each(statements, fn statement ->
execute_statement(statement)
end)
end
def execute_statement({_, :let, variable, expression}) do
value = evaluate_expression(expression)
assign_variable(variable, value)
end
def execute_statement({_, :display, expression}) do
value = evaluate_expression(expression)
IO.puts(value)
end
def evaluate_expression(value) do
value
end
def assign_variable(variable, value) do
# do nothing
end
end
=>
** (CaseClauseError) no case clause matching: [{{:function, "fu"}, "fu"}, {{:main, "main"}, "main"}, {{:open_paren, "("}, "("}, {{:close_paren, ")"}, ")"}, {{:open_brace, "{"}, "{"}, {{:let, "let"}, "let"}, {{:semicolon, ";"}, ";"}, {{:display, "display"}, "display"}, {{:open_paren, "("}, "("}, {{:open_brace, "{"}, "{"}, {{:close_brace, "}"}, "}"}, {{:close_paren, ")"}, ")"}, {{:semicolon, ";"}, ";"}, {{:close_brace, "}"}, "}"}]
(inok 0.1.0) lib/inok.ex:3: Main.run/0
iex:1: (file)
I hope that someone can help me out of here, I literally spent over three hours debugging this now with several IO.inspects and multiple rewrites of my Parser … but everything ends up in the same problem.
Thanks!