I think I managed to solve it, but I think there is a more elegant solution out there
defmodule Scratch do
NimbleCSV.define(MyCSVParser, [])
# core_cols = %{
# date_of_birth: 'DOB',
# external_id: 'Plan_Member_ID',
# first_name: 'First_Name',
# last_name: 'Last_Name',
# phone: 'Phone'
# }
#
# custom_col_pos = %{
# 0 => 'Plan_Member_ID',
# 1 => 'First_Name',
# 2 => 'Phone',
# 3 => 'Last_Name',
# 4 => 'DOB',
# 5 => 'HbA1c',
# 6 => 'Hypertension',
# 7 => 'Children',
# 8 => 'Gender',
# 9 => 'Pain'
# }
def process_csv(file_path, %{} = custom_col_pos, %{} = core_cols) do
file_path
|> File.stream!(read_ahead: 1000)
|> MyCSVParser.parse_stream([{:skip_headers, true}])
|> Stream.map(fn line ->
mapped_row =
line
|> Enum.with_index()
|> Enum.reduce(
%{},
fn {cell_data, index}, acc ->
header = Map.fetch!(custom_col_pos, index)
column_name = if core_cols[header], do: core_cols[header], else: header
Map.put(acc, column_name, format_col(cell_data))
end
)
IO.inspect(mapped_row, label: "MappedRow")
# {:ok, date_of_birth} = DateTimeParser.parse_date(dob)
end)
|> Stream.run()
end
defp format_col(str) do
str |> :binary.copy() |> Macro.underscore() |> String.downcase()
end
end
the output is (trimmed)
MappedRow: %{
:date_of_birth => "1/1/1974",
:external_id => "120511",
:first_name => "jane",
:last_name => "doe",
:phone => "1112223333",
'Children' => "n",
'Gender' => "f",
'HbA1c' => "6/3",
'Hypertension' => "y",
'Pain' => "y"
}
As you can see in the output I need to do some data type specific transformation i.e. HbA1c
should be 6.3
not 6/3
. So I guess I can use pattern matching on format_col
function variants using regular expressions, but not sure if this is the best approach?