Creating a nested list from a flat list

I’m new to functional programming and I’d like to know the idiomatic way to convert a flat list of maps into a nested list.

For some background, I’m experimenting with Ecto. I have a table with a recursive relationship to itself. That is to say that there is a parent_id field which has a foreign key pointing to the id field of the same table. I’m trying to get a list of just the rows where the parent_id is null at the top level with all of its children in a nested list.

flat = [
    %{id: 1, name: "region 1", parent_region_id: nil},
    %{id: 2, name: "subregion 1", parent_region_id: 1},
    %{id: 3, name: "subregion 2", parent_region_id: 1},
    %{id: 4, name: "region 2", parent_region_id: nil},
    %{id: 5, name: "subregion 3", parent_region_id: 4},
    %{id: 6, name: "subregion 4", parent_region_id: 4}
  ]

nested = [
    %{
      id: 1,
      name: "region 1",
      parent_region_id: nil,
      subregions: [%{id: 2, name: "subregion 1", parent_id: 1}, %{id: 3, name: "subregion 2", parent_region_id: 1}]
    },
    %{
      id: 4,
      name: "region 2",
      parent_region_id: nil,
      subregions: [%{id: 5, subregion: "subregion 3", parent_region_id: 4}, %{id: 6, name: "subregion 4", parent_region_id: 4}]
    }
  ]

I can achieve this by using Repo.preload(:subregions), but that would add an unnecessary join. Any tips on an elegant way to convert flat into nested?

Here is the migration script.

defmodule Bazaar.Repo.Migrations.CreateRegions do
  use Ecto.Migration

  def change do
    create table(:regions) do
      add :name, :string
      add :parent_region_id, references(:regions), null: true

      timestamps()
    end
  end
end

And this is the schema.

defmodule Bazaar.Geoscheme.Region do
  use Ecto.Schema
  import Ecto.Changeset

  schema "regions" do
    field(:name, :string)

    belongs_to(:parent_region, Bazaar.Geoscheme.Region)
    has_many(:subregions, Bazaar.Geoscheme.Region, foreign_key: :parent_region_id)

    timestamps()
  end

  @doc false
  def changeset(region, attrs) do
    region
    |> cast(attrs, [:name, :parent_region_id])
    |> validate_required([:name])
  end
end
defmodule Tools.TreeFromList do
  def build_tree(nodes, config) do
    by_parent = Enum.group_by(nodes, & &1[config.parent_id_key])
    Enum.map(by_parent[config.root_parent], &build_tree_(&1, by_parent, config))
  end

  defp build_tree_(node, nodes_by_parent, config) do
    children =
      Enum.map(
        Map.get(nodes_by_parent, node[config.node_id_key], []),
        &build_tree_(&1, nodes_by_parent, config)
      )

    config.build_tree_node.(node, children)
  end
end
defmodule TreeFromListTest do
  use ExUnit.Case

  @tag :build_tree
  test "tree from list" do
    data = [
      %{id: 1, name: "F1", parent_id: nil},
      %{id: 2, name: "F2", parent_id: nil},
      %{id: 6, name: "F6", parent_id: 3},
      %{id: 4, name: "F4", parent_id: 2},
      %{id: 5, name: "F5", parent_id: 3},
      %{id: 3, name: "F3", parent_id: 1}
    ]

    config = %{
      build_tree_node: &Map.put(&1, :children, &2),
      parent_id_key: :parent_id,
      node_id_key: :id,
      root_parent: nil
    }

    assert [
             %{
               children: [
                 %{
                   children: [
                     %{children: [], name: "F6", parent_id: 3, id: 6},
                     %{children: [], name: "F5", parent_id: 3, id: 5}
                   ],
                   name: "F3",
                   parent_id: 1,
                   id: 3
                 }
               ],
               name: "F1",
               parent_id: nil,
               id: 1
             },
             %{
               children: [%{children: [], name: "F4", parent_id: 2, id: 4}],
               name: "F2",
               parent_id: nil,
               id: 2
             }
           ] == Tools.TreeFromList.build_tree(data, config)
  end
end
2 Likes
flat = [
    %{id: 1, name: "region 1", parent_region_id: nil, subregions: []},
    %{id: 2, name: "subregion 1", parent_region_id: 1, subregions: []},
    %{id: 3, name: "subregion 2", parent_region_id: 1, subregions: []},
    %{id: 4, name: "region 2", parent_region_id: nil, subregions: []},
    %{id: 5, name: "subregion 3", parent_region_id: 4, subregions: []},
    %{id: 6, name: "subregion 4", parent_region_id: 5, subregions: []}
  ]

defmodule Nester do
  def as_nested(flat) do
    lookup = Enum.group_by(flat, & &1.parent_region_id)

    with_subregions(nil, lookup)
  end

  defp with_subregions(parent_id, lookup) do
    lookup
    |> Map.get(parent_id, [])
    |> Enum.map(&nest_one(&1, lookup))
  end

  defp nest_one(row, lookup) do
    %{row | subregions: with_subregions(row.id, lookup)}
  end
end

Nester.as_nested(flat)
# result
[
  %{
    id: 1,
    name: "region 1",
    parent_region_id: nil,
    subregions: [
      %{id: 2, name: "subregion 1", parent_region_id: 1, subregions: []},
      %{id: 3, name: "subregion 2", parent_region_id: 1, subregions: []}
    ]
  },
  %{
    id: 4,
    name: "region 2",
    parent_region_id: nil,
    subregions: [
      %{id: 5, name: "subregion 3", parent_region_id: 4, subregions: []},
      %{id: 6, name: "subregion 4", parent_region_id: 4, subregions: []}
    ]
  }
]

The key step here is splitting the calculation into two parts:

  • the Enum.group_by that pulls together the subregions lists
  • the recursive process of taking a node and filling out its subregions
4 Likes

Awesome! This worked as is when piping my Repo.all into it. Now let’s see if I actually understand it.

Is the following correct?

This bit & &1.parent_region_id creates an anonymous function and returns the parent_region_id of the first argument which is then used to group the map elements.

Everything else seems comprehensible.

1 Like

That is 100% correct!