I’m trying to fix this issue raised for libgraph:
I think the problem is that the current delete vertex code loops over every entry in the collections of total edges, in edges, and out edges for every deletion. I thought that could be improved by looping over the total edges once and simply reducing into the new collections all at the same time. In my simple test of a million vertices each with 999_998 edges I found very minimal if any real difference in the time it takes to run the deletion compared to the current implementation. Can anyone figure out why my intuition about this problem is wrong? Or alternatively why my test is wrong?
current implementation:
def delete_vertex(
%__MODULE__{out_edges: oe, in_edges: ie, edges: em, vertex_identifier: vertex_identifier} =
g,
v
) do
vs = g.vertices
ls = g.vertex_labels
with v_id <- vertex_identifier.(v),
true <- Map.has_key?(vs, v_id),
oe <- Map.delete(oe, v_id),
ie <- Map.delete(ie, v_id),
vs <- Map.delete(vs, v_id),
ls <- Map.delete(ls, v_id) do
oe = for {id, ns} <- oe, do: {id, MapSet.delete(ns, v_id)}, into: %{}
ie = for {id, ns} <- ie, do: {id, MapSet.delete(ns, v_id)}, into: %{}
em = for {{id1, id2}, _} = e <- em, v_id != id1 && v_id != id2, do: e, into: %{}
%__MODULE__{g | vertices: vs, vertex_labels: ls, out_edges: oe, in_edges: ie, edges: em}
else
_ -> g
end
end
attempt to fix:
def delete_vertex(
%__MODULE__{vertices: vs, vertex_labels: ls, out_edges: oe, in_edges: ie, edges: em, vertex_identifier: vertex_identifier} =
g,
v
) do
with v_id <- vertex_identifier.(v),
true <- Map.has_key?(vs, v_id),
oe <- Map.delete(oe, v_id),
ie <- Map.delete(ie, v_id),
vs <- Map.delete(vs, v_id),
ls <- Map.delete(ls, v_id) do
{em, oe, ie} =
em
|> Enum.reduce({em, oe, ie}, fn
{{v_id, id2} = k, _}, {e, o, i} -> {Map.delete(e, k), o, Map.update!(i, id2, fn ns -> MapSet.delete(ns, v_id) end)}
{{id1, v_id} = k, _}, {e, o, i} -> {Map.delete(e, k), Map.update!(o, id1, fn ns -> MapSet.delete(ns, v_id) end), i}
_, acc -> acc
end)
%__MODULE__{g | vertices: vs, vertex_labels: ls, out_edges: oe, in_edges: ie, edges: em}
else
_ -> g
end
end
simple test in iex:
g = Graph.new()
vs = 1..1_000_000 |> Enum.to_list()
vs |> Enum.each(fn i -> Graph.add_vertex(g, i) end)
vs |> Enum.zip(Enum.reverse(vs)) |> Enum.each(fn {i,o} -> Graph.add_edge(g, i, o) end)
vs |> Enum.zip(Enum.reverse(vs)) |> Enum.each(fn {i,o} -> Graph.add_edge(g, o, i) end)
:timer.tc(fn -> 1..1_000_000//7 |> Enum.each(fn i -> Graph.delete_vertex(g, i) end) end, :millisecond)