Minimal sub arborescence

Hi, all,

I’m trying to solve a problem with an arborescence (a fancy name for a directed acyclic graph which has a single “root” that has a unique path to every other vertex).

Now I’m trying to find the minimal sub arborescence given an arborescence and a list of vertices that must appear in the sub arborescence.

For example, given an arborescence

    a
   / \
  b   c
 /|\   \
d e f   g

and a list of vertices

[c, d, e]

The algorithm should return

    a
   / \
  b   c
 / \   
d   e 

and when given the same arborescence but the vertices list [d, f], it should return

  b
 / \
d   f

I’m using libgraph now, but it’s okay to switch to Erlang’s digraph if needed.

Here’s my code for now:

defmodule Arborescences do

  @doc """
  Finds the closest common ancestor vertex of the vertices `v1` and `v2` in the given arborescence `graph`.
  """
  @spec closest_common_ancestor(Graph.t(), Graph.vertex(), Graph.vertex()) :: nil | Graph.vertex()
  def closest_common_ancestor(graph, v1, v2) do
    with root when not is_nil(root) <- Graph.arborescence_root(graph) do
      case {v1, v2} do
        {^root, _v2} -> root
        {_v1, ^root} -> root
        _ ->
          path1 = Graph.dijkstra(graph, root, v1)
          path2 = Graph.dijkstra(graph, root, v2)

          # Meh!
          List.last(path1 -- (path1 -- path2))
      end
    end
  end

  @doc """
  Finds the closest common ancestor vertex of all the vertices in `vertices` in an arborescence `graph`.
  """
  @spec closest_common_ancestor(Graph.t(), [Graph.vertex()]) :: nil | Graph.vertex()
  def closest_common_ancestor(_graph, []), do: nil

  def closest_common_ancestor(graph, [vertex]) do
    if Graph.has_vertex?(graph, vertex), do: vertex, else: nil
  end

  def closest_common_ancestor(graph, [v1, v2 | rest]) do
    ancestor = closest_common_ancestor(graph, v1, v2)
    closest_common_ancestor(graph, [ancestor | rest])
  end

  @doc """
  Finds the minimal sub arborescence containing all the vertices in `vertices` of the given arborescence `graph`.
  """
  @spec minimal_sub_arborscence(Graph.t(), [Graph.vertex()]) :: Graph.t()
  def minimal_sub_arborscence(graph, vertices) do
    do_minimal_sub_arborscence(graph, Enum.uniq(vertices))
  end

  defp do_minimal_sub_arborscence(_graph, []) do
    Graph.new(type: :directed)
  end

  defp do_minimal_sub_arborscence(graph, [vertex]) do
    if Graph.has_vertex?(graph, vertex) do
      Graph.new(type: :directed) |> Graph.add_vertex(vertex)
    else
      Graph.new(type: :directed)
    end
  end

  defp do_minimal_sub_arborscence(graph, vertices) do
    subroot = closest_common_ancestor(graph, vertices)
    for dist <- vertices, reduce: Graph.new(type: :directed) do
      subgraph ->
        graph
        |> Graph.dijkstra(subroot, dist)
        |> Kernel.||([subroot])
        |> Enum.chunk_every(2, 1, :discard)
        |> Enum.reduce(subgraph, fn [v1, v2], subgraph ->
          Graph.add_edges(subgraph, Graph.edges(graph, v1, v2))
        end)
    end
  end
end

This code is far from optimal because it’s doing Dijkstra pathfinding too many times. How can I optimize such algorithm?

Thanks! :smiley:

2 Likes

I was playing with this during lunch and came up with this:

defmodule Arby do
  @doc """
  Finds the minimal sub arborescence containing all the vertices in `vertices` of the given arborescence `graph`.
  """
  @spec minimal_sub_arborscence(Graph.t(), Graph.label()) :: Graph.t()
  def minimal_sub_arborscence(graph, vertices) do
    if Graph.is_arborescence?(graph) do
      do_minimal_sub_arborscence(graph, vertices)
    else
      raise("Graph should be arborescence!")
    end
  end

  defp do_minimal_sub_arborscence(graph, vertices) do
    vertices
    |> Enum.map(&Graph.Utils.vertex_id/1)
    |> then(&fully_connected_vertex_ids(graph, &1))
    |> MapSet.to_list()
    |> then(&Map.take(graph.vertices, &1))
    |> Map.values()
    |> then(&Graph.subgraph(graph, &1))
  end

  @doc """
  Considers the given arborescence `graph` and the given list of vertex_ids,
  returning a MapSet of vertex_ids representing the minimal set
  required for all vertexes to be connected.
  """
  @spec fully_connected_vertex_ids(Graph.t(), [Graph.vertex_id()]) :: MapSet.t()
  def fully_connected_vertex_ids(graph, vertex_ids) do
    fully_connected_vertex_ids(graph, vertex_ids, MapSet.new(vertex_ids))
  end

  @spec fully_connected_vertex_ids(Graph.t(), [Graph.vertex_id()], MapSet.t()) :: MapSet.t()
  def fully_connected_vertex_ids(_graph, [], accumulated_vertex_ids) do
    accumulated_vertex_ids
  end

  def fully_connected_vertex_ids(
        graph,
        [vertex_id | remaining_vertex_ids],
        accumulated_vertex_ids
      ) do
    fully_connected_vertex_ids(
      graph,
      remaining_vertex_ids,
      MapSet.union(accumulated_vertex_ids, Map.get(graph.in_edges, vertex_id))
    )
  end
end

I think it’s better because I am fully exploiting the arborescence structure. In such a structure, every “vertex” has exactly 1 parent. So the fully_connected_vertex_ids/2 function just uses graph.in-edges to “get the parent” of each vertex we care about, until we have a final MapSet of vertex_ids representing a fully connected set of vertexes.

Once you have this, it’s a simple matter of making a subgraph.

Here’s my test module for that code:

defmodule ArbyTest do
  use ExUnit.Case
  doctest Arby

  @doc """
  It looks like this:

      a
    / \
    b   c
  /|\   \
  d e f   g

  """
  def example_graph() do
    Graph.new()
    |> Graph.add_edge(:a, :b)
    |> Graph.add_edge(:b, :d)
    |> Graph.add_edge(:b, :e)
    |> Graph.add_edge(:b, :f)
    |> Graph.add_edge(:a, :c)
    |> Graph.add_edge(:c, :g)
  end

  describe "minimal_sub_arborscence/2" do
    test "gets minimal_graph with reduced set of vetices" do
      graph = example_graph()

      for {input, expected_output} <- [
            {[:c, :d, :e], [:a, :b, :c, :d, :e]},
            {[:d, :f], [:b, :d, :f]}
          ] do
        assert graph
               |> Arby.minimal_sub_arborscence(input)
               |> Map.get(:vertices)
               |> Map.values() == expected_output
      end
    end
  end

  describe "arborescence assumptions" do
    test "arborescent graphs permit only 1 parent per vertex" do
      graph =
        example_graph()
        |> Graph.add_edge(:z, :b)

      refute Graph.is_arborescence?(graph)
    end
  end
end

I took another look at this and realized my code is wrong. For the extra test cases:

            {[:b, :d, :g], [:a, :b, :c, :d, :g]},
            {[:b, :d], [:b, :d]},

it fails

Thank you so much for answering my question. I really appreciate that.

I’m thinking maybe it’s easier to solve the problem by converting the arborescence to an inverted tree then finding the minimal sub inverted tree. Just find the path of each given vertex to the root, then eliminate the common part of all the paths except for the first vertex in it.

Building an inverted tree can be done in O(|V|), and the worst case of finding the sub inverted tree can be O(|V|^2), which is not a big deal considering each of my graphs can have at most several tens of vertices. I’ll try this idea tomorrow.

In a way, you kind of already have the inverted tree in the in_edges. That maps vertexes to their parent vertex.

No need to thank me! I was using the Graph lib recently anyway and I found the question interesting and fun to play with. Post any more solutions here!

1 Like

Thank you for your advice. From that, I came up with this piece of code:

defmodule Arborescences do
  def minimal_sub_arborescence(graph, vertices) do
    if Graph.is_arborescence?(graph) do
      do_minimal_sub_arborescence(graph, vertices)
    else
      raise "Not an arborescence"
    end
  end

  defp do_minimal_sub_arborescence(_graph, [root]) do
    Graph.new(type: :directed)
    |> Graph.add_vertex(root)
  end

  defp do_minimal_sub_arborescence(graph, vertices) do
    in_edges = Enum.flat_map(vertices, &Graph.in_edges(graph, &1))

    parents =
      in_edges
      |> Stream.map(&Map.get(&1, :v1))
      |> Enum.uniq()

    parent_arborescence = do_minimal_sub_arborescence(graph, parents)
    Graph.add_edges(parent_arborescence, in_edges)
  end
end

And after tweaking a while, a tail-recursive solution:

defmodule G do
  def minimal_sub_arborescence(graph, vertices) do
    if Graph.is_arborescence?(graph) do
      do_minimal_sub_arborescence(graph, vertices, & &1)
    else
      raise "Not an arborescence"
    end
  end

  defp do_minimal_sub_arborescence(_graph, [root], continuation) do
    Graph.new(type: :directed)
    |> Graph.add_vertex(root)
    |> continuation.()
  end

  defp do_minimal_sub_arborescence(graph, vertices, continuation) do
    in_edges = Enum.flat_map(vertices, &Graph.in_edges(graph, &1))

    parent_vertices =
      in_edges
      |> Stream.map(&Map.get(&1, :v1))
      |> Enum.uniq()

    do_minimal_sub_arborescence(graph, parent_vertices, fn acc_arborescence ->
      acc_arborescence
      |> Graph.add_edges(in_edges)
      |> continuation.()
    end)
  end
end

That’s pretty good! The only test case that fails is when you use [:b, :d]. It mistakenly includes :a in the minimal subgraph.

1 Like

Yup. I’ll see what I can do to fix it.

I found that I mistakenly assumed that all the vertices in the argument vertices are on the same level, which is definitely not the case.

I finally made it.

I eventually went back to the approach of finding all paths from the root to each given vertex and eliminating the common part of the paths, only this time I didn’t use Dijkstra.

defmodule G do
  def minimal_sub_arborescence(graph, vertices) do
    if Graph.is_arborescence?(graph) do
      do_minimal_sub_arborescence(graph, vertices)
    else
      raise "Not an arborescence"
    end
  end

  defp do_minimal_sub_arborescence(_graph, [vertex]) do
    Graph.new(type: :directed)
    |> Graph.add_vertex(vertex)
  end

  defp do_minimal_sub_arborescence(graph, vertices) do
    paths = Enum.map(vertices, &path_from_root(graph, &1))

    edges = Stream.unfold(paths, fn paths ->
      {
        Enum.map(paths, fn
          [] -> nil
          [h|_] -> h
        end),
        Enum.map(paths, fn
          [] -> []
          [_|t] -> t
        end)
      }
    end)
    |> Stream.drop_while(&all_the_same?/1)
    |> Stream.take_while(&Enum.any?/1)
    |> Stream.flat_map(& &1)
    |> Stream.reject(&is_nil/1)

    Graph.new(type: :directed)
    |> Graph.add_edges(edges)
  end

  defp all_the_same?([_]), do: false
  defp all_the_same?([a, a]), do: true
  defp all_the_same?([a, a | t]), do: all_the_same?([a | t])
  defp all_the_same?([_, _ | _]), do: false

  defp path_from_root(graph, vertex) do
    Stream.unfold(Graph.in_edges(graph, vertex), fn
      [] -> nil
      [edge] -> {edge, Graph.in_edges(graph, edge.v1)}
    end)
    |> Enum.take_while(&not is_nil(&1))
    |> Enum.reverse()
  end
end
4 Likes

well done!

1 Like

Thanks :blush: