Not sure where this will eventually go, but for now I’m using this to attempt to get a feel for streams and processes. ‘sowpods.txt’ (https://en.wikipedia.org/wiki/SOWPODS) is a large file containing a single word per line. I won’t describe beyond that unless asked, since I want to learn how to convey intent in elixir as well as understanding how to write more idiomatic code. I would appropriate any and all throughts, critiques, and anything else to help make this better.
defmodule WordsByLetter do
def start_link(name) do
Agent.start_link(fn -> MapSet.new() end, name: name)
end
def add(dict, word) do
Agent.update(dict, &MapSet.put(&1, word))
end
def lookup(dict, word) do
Agent.get(dict, &MapSet.member?(&1, word))
end
end
defmodule WordList do
def spawn(wordlist) do
Task.async( fn -> WordList.process(wordlist) end )
end
def process(wordlist) do
wordlist
|> Enum.map( &String.strip(&1) )
|> Enum.map( &WordList.split(&1) )
|> Enum.each( &WordsByLetter.add(&1.dict, &1.word) )
end
def split(word) do
dict_name = String.first(word) <> "dict"
|> String.to_atom
%{dict: dict_name, word: word}
end
end
chunk_size = 2500
?a..?z
|> Enum.map( &to_string([&1]) )
|> Enum.map( &WordsByLetter.start_link(String.to_atom(&1 <> "dict")))
File.stream!("sowpods.txt", [:utf8, :read])
|> Stream.chunk(chunk_size)
|> Stream.map( &WordList.spawn(&1) )
|> Enum.each( &Task.await(&1) )
IO.puts WordsByLetter.lookup(:adict, "aa")
IO.puts WordsByLetter.lookup(:ddict, "dog")
IO.puts WordsByLetter.lookup(:qdict, "queen")