Hi, guys
In other forum we test different languages. My project is Elixir based.
Task: Custom file parser. Similar to Markdown.
- line starts with # header (1…6)
- empty line block (paragraph)
- In the begining of the output file ToC linked to headers
- Links line start with [txt] url
- Links in text [txt] replaced by link generated above
- /text become bold/italic
My code exec time is ~450ms (i7,ssd):
Are you see some slow blocks/elements?
Avg time for reading file 12MB is 90/100ms
defmodule Markdown do
def parse(source_file, target_file) do
source = File.read!(source_file)
|> String.splitter("\n")
|> Enum.to_list
links_worker = Task.async(fn ->
Enum.reduce(source, %{}, fn(i, acc) ->
ltr = String.slice(i, 0, 1)
if ltr == "[", do: Map.merge(acc, form_link(i)), else: acc
end)
end)
html = source
|> Stream.transform(
fn() -> %{ tag: "", html: "", header: [], line: 0 } end,
fn(i, acc) ->
ltr = String.slice(i, 0, 1)
if(ltr == "[") do
{[nil], acc}
else
data = parse_line(ltr, i, acc)
{[data.html], data}
end
end,
fn(acc) ->
toc = acc.header
|> Enum.reduce("", fn(x, acc) -> acc <> "<li>#{x}</li>" end)
html = "<!DOCTYPE html><html lang=\"zxx\"><head><title>Parser output</title><meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\"/></head><body><ul>#{toc}</ul>"
File.write(target_file, html, [:delayed_write])
acc
end)
|> Enum.reduce("", fn(i, acc) -> if !is_nil(i), do: acc <> i, else: acc end)
|> String.split(~r/\*/)
|> Stream.transform(1, fn i, acc ->
if rem(acc, 2) == 0, do: {["<b>#{i}</b>"], acc + 1}, else: {[i], acc + 1}
end)
|> Enum.join
|> String.splitter(["[", "]"])
|> Enum.to_list
|> (fn(html, worker) ->
links = Task.await(worker)
place_links(html, links)
end).(links_worker)
|> Enum.join
File.write(target_file, html, [:append, :delayed_write])
end
def place_links([], _) do
["</body></html>"]
end
def place_links([head | tail], links) do
html = head
|> (fn x, links -> if Map.has_key?(links, head), do: Map.get(links, head), else: x end).(links)
[html] ++ place_links(tail, links)
end
defp form_link(src) do
[_, txt, url] = String.splitter(src, ["[", "]"]) |> Enum.to_list
Map.put(%{}, txt, "<a href=\"#{url}\">#{txt}</a>")
end
# Block start/end
defp parse_line(start, _, acc) when start == "" do
html = ""
|> (fn(_,acc) -> if String.length(acc.tag) > 0, do: "</#{acc.tag}>" end).(acc)
%{ tag: "", html: html, header: acc.header, line: acc.line + 1 }
end
# Header
defp parse_line(start, line, acc) when start == "#" do
{tag_num, content} = head_num(line)
link = "<a href=\"#_#{acc.line}\" id=\"#{acc.line}\">#{content}</a>"
html = "<h#{tag_num}>#{content} <a href=\"##{acc.line}\" id=\"_#{acc.line}\">⇫</a></h#{tag_num}>"
%{ tag: "h#{tag_num}", html: html, header: acc.header ++ [link], line: acc.line + 1 }
end
# Paragraph
defp parse_line(_, line, acc) do
html = line
|> (fn(line, acc) -> if acc.tag != "p", do: "<p>" <> line, else: line end).(acc)
%{ tag: "p", html: html, header: acc.header, line: acc.line + 1 }
end
# Match header tag
defp head_num("######" <> rest), do: {6, String.trim(rest)}
defp head_num("#####" <> rest), do: {5, String.trim(rest)}
defp head_num("####" <> rest), do: {4, String.trim(rest)}
defp head_num("###" <> rest), do: {3, String.trim(rest)}
defp head_num("##" <> rest), do: {2, String.trim(rest)}
defp head_num("#" <> rest), do: {1, String.trim(rest)}
end
[source, target] = System.argv
if File.exists?(source) do
:timer.tc(Markdown, :parse, [source, target])
|> IO.inspect
else
IO.puts "Source file does not exist"
end