How to spawn many, many processes really fast?

I played with that problem and had 4 processes spawn 10 processes spawn 10 processes spawn 10 processes, etc, to spawn 40K processes.

It is fast, but not that much faster than spawning them sequentially. And if you do that then I don’t know how you could supervise those processes.

For better synchronization I would pick a timestamp 3 seconds in the future, spawn all processes sequentially and have them wait that precise timestamp to start the logic in sync.

Here is my test code:

defmodule Serv do
  use GenServer

  def start_nolink(id, parent) do
    GenServer.start(__MODULE__, {id, parent})
  end

  def init({id, parent}) do
    Process.link(parent)
    send(parent, {:started, id})
    {:ok, id}
  end
end

defmodule Spawner do
  def rec_spawn(parent, scheme) do
    rec_spawn(parent, scheme, 0)
  end

  defp rec_spawn(parent, [], sum) do
    Serv.start_nolink(sum, parent)
  end

  defp rec_spawn(parent, [range | ranges], sum) do
    sum = sum * 10
    starter = fn -> Enum.map(range, &rec_spawn(parent, ranges, sum + &1)) end
    spawn(starter)
  end
end

defmodule Control do
  def check_started(max) do
    _check_started(max + 1)
  end

  defp _check_started(max) do
    case Process.info(self(), :message_queue_len) do
      {:message_queue_len, ^max} ->
        IO.puts("all started OK")

        flush_all()

      {:message_queue_len, n} when n < max ->
        IO.puts("started #{n}/#{max}")
        Process.sleep(100)
        _check_started(max)
    end
  end

  defp flush_all() do
    receive do
      {:started, _} ->
        flush_all()
    after
      0 -> :ok
    end
  end

  def sum_ranges(ranges) do
    {sum, _} =
      List.foldr(ranges, {0, 1}, fn range, {sum, size} ->
        {sum + Enum.max(range) * size, size * 10}
      end)

    sum
  end
end

ranges = [0..3, 0..9, 0..9, 0..9, 0..9]

parent = self()
Spawner.rec_spawn(parent, ranges)

ranges
|> Control.sum_ranges()
|> Control.check_started()

2 Likes