Unable to figure out how to use map and reduce in this case

I have a list of categories, each category contains articles, each article has 3 kind of metrics:

all_data = [
  %{category_id: 1, arcticle_id: 1, data:[%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13}, %{trait_id: 3, val: 432}]},
  %{category_id: 1, arcticle_id: 62, data:[%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13}, %{trait_id: 3, val: 432}]},
  %{category_id: 1, arcticle_id: 33, data:[%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13}, %{trait_id: 3, val: 432}]},

  %{category_id: 12, arcticle_id: 3, data:[%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13}, %{trait_id: 3, val: 432}]},
  %{category_id: 13, arcticle_id: 14, data:[%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13}, %{trait_id: 3, val: 432}]},
  %{category_id: 12, arcticle_id: 6, data:[%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13}, %{trait_id: 3, val: 432}]},
]

I want to get rid of article by grouping categories with the same id and summing “val”'s. Something like this:

all_data2 = [
  %{category_id: 1, data:[%{trait_id: 1, val: 9}, %{trait_id: 2, val: 39}, %{trait_id: 3, val: 1296}]},
  %{category_id: 12, data:[%{trait_id: 1, val: 6}, %{trait_id: 2, val: 26}, %{trait_id: 3, val: 864}]},
  %{category_id: 13, data:[%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13}, %{trait_id: 3, val: 432}]},
]

What’s the best way to do this? I know I need to use reduce, but how exactly?

How do you expect article_id to be grouped? In resulting map for category_id:1 you have article_id: 1 but for category_id:12 you’ve got article_id: 6

I think he meant get rid of articles… but forgot to remove them.

What he wants, is to group by categories, then group by trait_id, then sum values.

Typo…

Maybe Enum.group_by might help

all_data 
|> Stream.map(&Map.delete(&1, :arcticle_id)) # remove article_id
|> Enum.group_by(fn entry -> entry.category_id end) # group

outputs

%{1 => [%{category_id: 1,
     data: [%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13},
      %{trait_id: 3, val: 432}]},
   %{category_id: 1,
     data: [%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13},
      %{trait_id: 3, val: 432}]},
   %{category_id: 1,
     data: [%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13},
      %{trait_id: 3, val: 432}]}],
  12 => [%{category_id: 12,
     data: [%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13},
      %{trait_id: 3, val: 432}]},
   %{category_id: 12,
     data: [%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13},
      %{trait_id: 3, val: 432}]}],
  13 => [%{category_id: 13,
     data: [%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13},
      %{trait_id: 3, val: 432}]}]}

Not quite what you want, but close…

veryyyy close.
idiot.

res1 = [
  %{category_id: 1, arcticle_id: 1, data: [%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13}, %{trait_id: 3, val: 432}]},
  %{category_id: 1, arcticle_id: 62, data: [%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13}, %{trait_id: 3, val: 432}]},
  %{category_id: 1, arcticle_id: 33, data: [%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13}, %{trait_id: 3, val: 432}]},

  %{category_id: 12, arcticle_id: 3, data: [%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13}, %{trait_id: 3, val: 432}]},
  %{category_id: 13, arcticle_id: 14, data: [%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13}, %{trait_id: 3, val: 432}]},
  %{category_id: 12, arcticle_id: 6, data: [%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13}, %{trait_id: 3, val: 432}]}
] 
|> Enum.group_by(&(&1.category_id)) 
|> Enum.reduce(fn(%{category_id3 => [%{category_id: category_id2, arcticle_id: arcticle_id2, data: ????}]}, acc) ->
    # ???
end)

And also, why am I having an eror involving category_id3?

illegal use of variable category_id3 inside map key match, maps can only match on existing variable by using ^category_id3

But it shouldn’t be a variable. How to do a pattern match on it?

You are using an arrow instead of :

defmodule Test do
  def reduce(data) when is_list(data) do
    reduce(data, %{})
  end


  defp reduce([], acc), do: acc
  defp reduce([%{category_id: category_id, data: new_data} | t], acc) do
    reduce(t, merge(acc, category_id, new_data))
  end

  defp merge(acc, category_id, new_data) do
    update_in(acc[category_id], fn
      nil -> new_data
      old_data -> Enum.uniq(old_data ++ new_data)
    end)
  end
end

all_data = [
  %{category_id: 1, arcticle_id: 1, data:[%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13}, %{trait_id: 3, val: 432}]},
  %{category_id: 1, arcticle_id: 62, data:[%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13}, %{trait_id: 3, val: 432}]},
  %{category_id: 1, arcticle_id: 33, data:[%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13}, %{trait_id: 3, val: 432}]},

  %{category_id: 12, arcticle_id: 3, data:[%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13}, %{trait_id: 3, val: 432}]},
  %{category_id: 13, arcticle_id: 14, data:[%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13}, %{trait_id: 3, val: 432}]},
  %{category_id: 12, arcticle_id: 6, data:[%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13}, %{trait_id: 3, val: 432}]},
]

Test.reduce(all_data)

outputs

%{1 => [%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13},
   %{trait_id: 3, val: 432}],
  12 => [%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13},
   %{trait_id: 3, val: 432}],
  13 => [%{trait_id: 1, val: 3}, %{trait_id: 2, val: 13},
   %{trait_id: 3, val: 432}]}

Instead of Enum.uniq you might want to merge the lists from :data somehow?

I think he asks to sum the value…

oh right

defmodule Test do
  def reduce(data) when is_list(data) do
    # reshape data to simplify things a little
    # [%{trait_id: 1, val: 9}, %{trait_id: 2, val: 39}, %{trait_id: 3, val: 1296}]
    # gets turned into
    # %{1 => 9, 2 => 39, 3 => 1296}
    data
    |> Enum.map(fn %{data: data} = entry ->
      reshaped_data = for %{trait_id: trait_id, val: val} <- data, do: {trait_id, val}
      %{entry | data: Enum.into(reshaped_data, %{})}
    end)
    |> reduce(%{})
  end


  defp reduce([], acc) do
    # acc now looks like %{1 => %{1 => 9, 2 => 39, 3 => 1296}, 12 => %{1 => 6, 2 => 26, 3 => 864},
    #                      13 => %{1 => 3, 2 => 13, 3 => 432}}
    name_keys(acc)
  end
  defp reduce([%{category_id: category_id, data: new_data} | rest], acc) do
    reduce(rest, merge(acc, category_id, new_data))
  end

  defp merge(acc, category_id, new_data) do
    update_in(acc[category_id], fn
      nil -> new_data
      old_data -> Map.merge(old_data, new_data, fn _k, v1, v2 -> v1 + v2 end)
    end)
  end

  defp name_keys(data) do
    Enum.reduce(data, [], fn {category_id, data}, acc ->
      named_data = Enum.reduce(data, [], fn {trait_id, sum_val}, acc -> [%{trait_id: trait_id, val: sum_val} | acc] end)
      [%{category_id: category_id, data: named_data} | acc]
    end)
  end
end

then Test.reduce(all_data) returns

[%{category_id: 13,
   data: [%{trait_id: 3, val: 432}, %{trait_id: 2, val: 13},
    %{trait_id: 1, val: 3}]},
 %{category_id: 12,
   data: [%{trait_id: 3, val: 864}, %{trait_id: 2, val: 26},
    %{trait_id: 1, val: 6}]},
 %{category_id: 1,
   data: [%{trait_id: 3, val: 1296}, %{trait_id: 2, val: 39},
    %{trait_id: 1, val: 9}]}]
1 Like

why so difficult?

Which part do you find difficult?

I guess it can be done without reshaping the data, but then summing the vals would be a bit more involved.

defmodule Category do
  def merge(categories) do
    merge_maps categories, :category_id, :data, fn(data1, data2) ->
      merge_maps(data2 ++ data1, :trait_id, :val, &(&1 + &2))
    end
  end

  defp merge_maps(maps, key, merge_key, fun) do
    reduce_fun =
      fn(map, acc) ->
        Map.update(acc, map[key], map,
                   &put_in(&1[merge_key], fun.(&1[merge_key], map[merge_key])))
      end

    Enum.reduce(maps, %{}, reduce_fun) |> Map.values
  end
end

(On line 4 you could write &(&1 + &2) as &+/2, the forum’s syntax highlighter breaks with &+/2, so I used the long form.)

https://hexdocs.pm/elixir/Map.html#update/4

3 Likes

How to remove “article_id”, will it require adding Enum.map?

There are several ways to accomplish that, but I would use Enum.map/2 and Map.delete/2.

Enum.map(categories, &Map.delete(&1, :article_id))
1 Like
all_data
|> Enum.group_by(&(&1.category_id), &(&1.data))
|> Enum.map(fn {category_id, data} ->
  summed_data =
    data
    |> List.flatten
    |> Enum.group_by(&(&1.trait_id), &(&1.val))
    |> Enum.map(fn {trait_id, values} ->
      %{trait_id: trait_id, val: Enum.sum(values)}
    end)

  %{category_id: category_id, data: summed_data}
end)

You lose a bit of ordering, but if that matters it’s easily recovered.

2 Likes