Refactoring nested data transformations

Hello everybody,

I currently work on a live booking buffering system (people looking into reserving block a reservation while they look or buy, for a determined time), and have to constantly go from reservations by-user to reservations by-day or by-hour. Everything works fine now, but the transforms are quite verbose because of my heavy use of maps to keep access times low.

For example, I have this (working) code :

defmodule Sample do

@doc """
  Goes from
  %{user_id %{day %{hour {status, timestamp}}}
  where user_id : binary
  day : Date
  hour : Time
  status : {atom, socket, reservation-uuid}
  timestamp : integer
  to
  %{day: [{user_id, {status, timestamp}, hour}]}
  """
  def from_by_user_to_by_day(by_user) do
    users = by_user
            |> Map.keys
    users
             |> Enum.flat_map(
                  fn uid ->
                    usermap = Map.get(by_user, uid)
                    days = Map.keys(usermap)
                    Enum.flat_map(
                      days,
                      fn day ->
                        daymap = Map.get(usermap, day)
                        hours = Map.keys(daymap)
                        Enum.map(
                          hours,
                          fn hour ->
                            status = Map.get(daymap, hour)
                            {uid, status, hour, day}
                          end
                        )
                      end
                    )
                  end
                )
    |> Enum.reduce(
      %{},
      fn {user_guest_id, status, hour, day}, acc ->
        Map.put(acc, day, [{user_guest_id, status, hour} | Map.get(acc, day, [])])
      end
    )
  end

end

And cleaned it up into this :

defmodule Sample do

def from_by_user_to_by_day_n(data_by_user) do
    data_by_user
      |> Map.keys() # Gets user ids
      |> user_data_to_list_form(data_by_user) # Makes it linear
      |> Enum.reduce(%{}, &list_form_to_day_form_reducer/2) # Makes it daily
  end

  def user_data_to_list_form(user_guest_ids, data_by_user) do
    user_guest_ids
    |> Enum.flat_map(fn uid -> user_data_to_list_form_a(uid, data_by_user) end)
  end

  def user_data_to_list_form_a(uid, data_by_user) do
      user_map = Map.get(data_by_user, uid)
      Map.keys(user_map) |> # Those are days
        Enum.flat_map(fn day -> user_data_to_list_form_b(day, uid, user_map) end)
  end

  def user_data_to_list_form_b(day, uid, user_map) do
      day_map = Map.get(user_map, day)
      Map.keys(day_map) # Those are hours
       |> Enum.map(fn hour -> user_data_to_list_form_c(hour, day, uid, day_map) end)
  end

  def user_data_to_list_form_c(hour, day, uid, day_map) do
      status = Map.get(day_map, hour)
      {uid, status, hour, day}
  end

  def list_form_to_day_form_reducer({user_guest_id, status, hour, day}, acc) do
    lines = Map.get(acc, day, [])
    line = {user_guest_id, status, hour}
    Map.put(acc, day, [line | lines])
  end

end

This is still not satisfying, since all the transforms are quite similar with the exception that every stage of the transformation needs access to bound variables of the preceding stage.

I’m really skeptical, because it seems I don’t see an obvious generalisation of those transforms that would allow me to shorten (and clarify) this up.

The function I use to walk this structure to notify users of expired options and flush them is even more nested in the same fashion.

Any ideas ? I’m not looking for someone to give a solution, but for pointers on what I obviously missed.

Have a nice day !

1 Like

One common pattern that stands out in the above is this:

def do_things_with_map(some_map) do
  some_map
  |> Map.keys()
  |> Enum.flat_map(fn key -> do_things_with_each_key(key, some_map) end)
end

def do_things_with_each_key(key, some_map) do
  data_for_key = Map.get(some_map, key)
  # do something with key and data_for_key
end

Using both a key and a value from a map is a good indicator you should be iterating over the pairs from the map instead:

def do_things_with_map(some_map) do
  some_map
  |> Enum.flat_map(&do_things_with_each_key/1)
end

def do_things_with_each_key({key, data_for_key}) do
  # do something with key and data_for_key
end

Hello,
Thanks for your advice.
I yet have to remember that Enum offers iteration on {k,v} tuples.

Maybe I should write this generic do_thing_with_enumerable / do_thing_with_each_member function couple and specialize it with larger function heads that inejct surrounding state.

I managed to refactor the other part, the one that purges expired options…

I had a lot of things like this, where I reduce to a maybe-empty list and returns some map unchanged or with the non-empty list, but nested to 4 levels :

Enum.reduce(%{}, fn {day, hours}, out_b ->
    newhours = hours
         |> Enum.reduce(%{}, fn item, out_c ->
            item
              |> do_things
              |> to_reduce
              |> it_to_a_list
         end)
    if Map.keys(newhours)
       |> length == 0 do
      out_b
    else
      Map.put(out_b, day, newhours)
    end
end)

It became much clearer after seeing this pattern :

def put_or_pass_if_empty(enumerable, value, map, key) do
    if enumerable
       |> length == 0 do
      map
    else
      Map.put(map, key, value)
    end
  end

  def pop_empty(a, b, c, d), do: put_or_pass_if_empty(a, b, c, d)

  def filter_options(options, day, hour) do
       # ...
  end

  def filter_options_reducer({hour, list}, out, day) do
      opts = filter_options(list, day, hour)
      pop_empty(opts, opts, out, hour)
  end

  def cleaned_mess(byuser) do
     byuser
      |> Enum.reduce(
           %{},
           fn {user, days}, out_a ->
             newdays = days
               |> Enum.reduce(
                    %{},
                    fn {day, hours}, out_b ->
                      newhours = hours
                        |> Enum.reduce(%{}, fn a, b -> filter_options_reducer(a, b, day) end)
                      pop_empty(Map.keys(newhours), newhours, out_b, day)
                    end)
             pop_empty(Map.keys(newdays), newdays, out_a, user)
           end)
     end

Still messy, but on the right track. Thanks for pointing patterns !