Is there a better way to traverse nested maps than this?

Is there a better way to traverse a tree structure and accumulate leaves than this code?

  def traverse_start(value, acc_list) when is_list(value) do
    for v <- value, reduce: acc_list do
      acc -> acc ++ traverse(v, acc_list)
    end
    |> Enum.uniq()
  end

  def traverse_start(value, acc_list) when is_map(value) do
    for {k, v} <- value, reduce: acc_list do
      acc -> acc ++ traverse([k], v, acc_list)
    end
    |> Enum.uniq()
  end

  def traverse(keys, value, acc_list) when is_map(value) do
    # keys |> IO.inspect
    for {k, v} <- value, reduce: acc_list ++ [keys] do
      acc -> acc ++ traverse(keys ++ [k], v, acc)
    end
  end

  def traverse(keys, value, acc_list) when is_list(value) do
    # keys |> IO.inspect
    for v <- value, reduce: acc_list ++ [keys] do
      acc -> (acc ++ traverse(keys, v, acc)) |> Enum.uniq()
    end
  end

  def traverse(keys, _value, _acc_list) do
    # keys |> IO.inspect
    [keys]
  end

This code traverses a map or list and gets a list of unique paths. It’s used to get column names for a CSV file from an array of JSON documents.

    columns = Report.traverse_start(data_list, []) |> Enum.sort()

input

    data_list = [
      %{},
      %{
        "a" => 1,
        "b" => %{"bb" => 2},
        "c" => %{"cc" => %{"ccc" => 3}},
        "n" => nil,
        "u" => %{"uu" => nil},
        "w" => %{"ww" => %{"www" => nil}},
        "r" => [],
        "z" => %{}
      },
      %{
        "a" => 1,
        "b" => %{"bb" => 2},
        "c" => %{"cc" => %{"ccc" => 3}},
        "n" => nil,
        "u" => %{"uu" => nil},
        "w" => %{"ww" => %{"www" => nil}},
        "r" => [
          %{
            "a" => 1,
            "b" => %{"bb" => 2},
            "c" => %{"cc" => %{"ccc" => 3}},
            "n" => nil,
            "u" => %{"uu" => nil},
            "w" => %{"ww" => %{"www" => nil}},
            "r" => [
              %{
                "a" => 1,
                "b" => %{"bb" => 2},
                "c" => %{"cc" => %{"ccc" => 3}},
                "n" => nil,
                "u" => %{"uu" => nil},
                "w" => %{"ww" => %{"www" => nil}},
                "r" => [],
                "z" => %{}
              },
              %{
                "d" => 4
              }
            ],
            "z" => %{}
          },
          %{
            "d" => 4
          }
        ]
      }
    ]

output

   columns = [
      ["a"],
      ["b"],
      ["b", "bb"],
      ["c"],
      ["c", "cc"],
      ["c", "cc", "ccc"],
      ["n"],
      ["r"],
      ["r", "a"],
      ["r", "b"],
      ["r", "b", "bb"],
      ["r", "c"],
      ["r", "c", "cc"],
      ["r", "c", "cc", "ccc"],
      ["r", "d"],
      ["r", "n"],
      ["r", "r"],
      ["r", "r", "a"],
      ["r", "r", "b"],
      ["r", "r", "b", "bb"],
      ["r", "r", "c"],
      ["r", "r", "c", "cc"],
      ["r", "r", "c", "cc", "ccc"],
      ["r", "r", "d"],
      ["r", "r", "n"],
      ["r", "r", "r"],
      ["r", "r", "u"],
      ["r", "r", "u", "uu"],
      ["r", "r", "w"],
      ["r", "r", "w", "ww"],
      ["r", "r", "w", "ww", "www"],
      ["r", "r", "z"],
      ["r", "u"],
      ["r", "u", "uu"],
      ["r", "w"],
      ["r", "w", "ww"],
      ["r", "w", "ww", "www"],
      ["r", "z"],
      ["u"],
      ["u", "uu"],
      ["w"],
      ["w", "ww"],
      ["w", "ww", "www"],
      ["z"]
    ]
2 Likes

You’ve got a LOT of O(n^2) going on here from appending to lists. Here is a more constant time answer:

defmodule PathFinder do
  def paths(collection) when is_map(collection) or is_list(collection) do
    Enum.flat_map(collection, &paths/1)
  end
  
  def paths({k, v}) do
    case paths(v) do
      [] ->
        [[k]]
      subpaths ->
        Enum.map(subpaths, fn subpath -> [k | subpath] end)
    end
  end
  
  def paths(_leaf) do
    []
  end
end

Tree traversal is often a lot easier to do as a body recursive function, particularly when your output shape is also pretty tree-like.

Notably these do NOT produce identical results, but I think that might be actually what you want. My version produces only full paths to leaf nodes, your answer includes paths like ["b"] that do not point to leaf nodes.

6 Likes

How would you include paths like [“b”] that do not point to leaf nodes?

Do you need that in your CSV? If they don’t point to any data, why make a column out of it?

Gonna leave that as an exercise for the reader :wink: Hint: it’s in how you return the subpaths here:

      subpaths ->
        Enum.map(subpaths, fn subpath -> [k | subpath] end)

You want to make sure to include [k] itself as one of the subpaths.

Deduping is also pretty straight forward. I’d probably just rename paths to do_paths and then have def paths(val), do: val |> do_paths |> Enum.uniq.

5 Likes

This worked for me.

[[k]] ++ Enum.map(subpaths, fn subpath -> [k | subpath] end)

Your code processed my data more than twice as fast.

2 Likes

If you have columns for the parent map with its fields user, user.name, user.email, then you can filter by user to see if some rows don’t have a property for user.

1 Like

Why not

[[k] | Enum.map(subpaths, fn subpath -> [k | subpath] end)]

(even though the compiler would probably rewrite it to this anyway in this case)

1 Like

Yeah I’d consider either fine, the compiler does indeed rewrite it to be identical in both cases.

1 Like