Every year I help my friend with his taxes, he has an itinerary of places that he’s been to for work and I scrape the google maps API to get driving distances. This year I did it in elixir. The major challenge is figuring out how to load external libraries (CSV, JSON) into an .exs script (no framework). Mix isn’t like python-pip (and that’s a really good thing TM) but every once in a while you just want it to do what you want it to. I can’t say that what I did is necessary “best practice”, but after a lot of hemming and hawwing, I figured out a reasonable solution. I’m leaving it here to hopefully help/inspire other people that need to do something quick 'n dirty.:
# fail early if no filename is provided.
[filename] = System.argv()
File.exists?(filename) || raise "bad filename."
file_root = Path.basename(filename, ".csv")
#
# before running, install jason and csv.
#
# mix archive.install hex jason 1.1.2
# mix archive.install hex csv 2.3.1
# mix archive.install hex parallel_stream 1.0.6
#
# load up all of the libraries.
~w(jason 1.1.2 csv 2.3.1 parallel_stream 1.0.6)
|> Enum.chunk_every(2)
|> Enum.map(fn [lib, ver] -> "~/.mix/archives/#{lib}-#{ver}/#{lib}-#{ver}/ebin" end)
|> Enum.map(&Path.expand/1)
|> Enum.map(&Code.prepend_path/1)
# make sure the libraries are there.
Code.ensure_compiled(Jason)
Code.ensure_compiled(CSV)
# marshall the address into a dict.
addresses = "addresses.csv"
|> File.stream!
|> CSV.decode!
|> Enum.map(&List.to_tuple/1)
|> Enum.into(%{})
# read the paths.
driving_lines = filename
|> File.stream!
|> CSV.decode!
# check that they are all kosher
driving_lines
|> Stream.with_index
|> Enum.each(fn
{_, 0} -> :ok
{[_, start_name, dest_name | _], line} ->
:erlang.is_map_key(start_name, addresses) || raise "bad line #{line + 1}, #{start_name}"
:erlang.is_map_key(dest_name, addresses) || raise "bad line #{line + 1}, #{dest_name}"
_ -> :ok
end)
# this time I will remember to deactivate this API key
api_key = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
# generate the curl query Mostly just using google's API.
curl_query = fn start_addr, dest_addr ->
start_http = Regex.replace(~r/\s/, start_addr, "+")
dest_http = Regex.replace(~r/\s/, dest_addr, "+")
"https://maps.googleapis.com/maps/api/distancematrix/json?units=imperial&origins=#{start_http}&destinations=#{dest_http}&key=#{api_key}"
end
{rows, _} = driving_lines
|> Stream.reject(fn [_, a, b | _] -> a == "" || b == "" end) # get rid of empty lines.
|> Stream.with_index
|> Enum.map_reduce(%{}, fn
# ignore the first line, which is just for fun titles.
{line, 0}, acc -> {line, acc}
# if our start and destination are cached, then use that instead.
{[r, start_name, dest_name | _], _}, acc when
:erlang.is_map_key({start_name, dest_name}, acc) ->
start_addr = addresses[start_name]
dest_addr = addresses[dest_name]
{[r, start_addr, dest_addr, acc[{start_name, dest_name}]], acc}
{[r, start_name, dest_name | _], _}, acc ->
# dereference the stard and destination addresses from our dictionary.
start_addr = addresses[start_name]
dest_addr = addresses[dest_name]
# run the curl command
{res, _} = System.cmd("curl", ["--stderr", "/dev/null", curl_query.(start_addr, dest_addr)])
# destructure google's insane JSON here.
v = case Jason.decode!(res) do
%{"rows" => [%{"elements" => [%{"distance" => %{"text" => distance}}]}]} ->
# of course the string has "mi" attached to it. Strip it out.
{v, _} = Float.parse(distance); v
_ -> "?"
end
# put the line back together, this time with full addresses and distances for the IRS.
# cache the result into our accumulator map.
{[r, start_addr, dest_addr, v], Map.put(acc, {start_name, dest_name}, v)}
# lines which don't conform to our dogma get released
{line, _}, acc -> {line, acc}
end)
# encode back into CSV.
rows
|> CSV.encode
|> Stream.into(File.stream!(file_root <> "-finished.csv"))
|> Stream.run