DynamicSupervisor: how to start child iff it doesn't exist?

I’ve got this bit of code; I’m wondering why create_or_open_room always creates a new room, even when there is an existing child process with the same ID:

If a child specification with the specified ID already exists, child_spec is discarded and this function returns an error with :already_started or :already_present if the corresponding child process is running or not, respectively.
Supervisor.start_child/2 — Elixir v1.17.3

defmodule FooApp.Application do
  # See https://hexdocs.pm/elixir/Application.html
  # for more information on OTP Applications
  @moduledoc false

  use Application

  require Logger

  def create_or_open_room(<<room_name::binary>>) do # FIXME this is supposed to be idempotent
    case DynamicSupervisor.start_child(
      FooApp.RoomSupervisor,
      %{id: room_name, start: {GenServer, :start_link, [FooApp.Application.Model, room_name]}}
    ) do
      {:ok, pid} -> Logger.debug("=====STARTED #{inspect pid}====="); pid
      :ignore -> raise "unreachable"
      {:error, {:already_started, pid}} -> Logger.debug("=====IDEMPOTENCE OK #{inspect pid}====="); pid
      {:error, error} -> raise inspect error
    end
  end

  @impl true
  def start(_type, _args) do
    children = [
      FooAppWeb.Telemetry,
      FooApp.Repo,
      {Ecto.Migrator,
        repos: Application.fetch_env!(:fooApp, :ecto_repos),
        skip: skip_migrations?()},
      # {DNSCluster, query: Application.get_env(:fooApp, :dns_cluster_query) || :ignore},
      # {Phoenix.PubSub, name: FooApp.PubSub}, # for general app communication
      {Phoenix.PubSub, name: FooApp.RoomPubSub}, # ONLY for room statechange announcements, to avoid collision between UGC room names and an actual system topic
      # Start the Finch HTTP client for sending emails
      # {Finch, name: FooApp.Finch},
      # Start a worker by calling: FooApp.Worker.start_link(arg)
      # {FooApp.Worker, arg},
      {DynamicSupervisor, name: FooApp.RoomSupervisor},
      # Start to serve requests, typically the last entry
      FooAppWeb.Endpoint
    ]

    # See https://hexdocs.pm/elixir/Supervisor.html
    # for other strategies and supported options
    opts = [strategy: :one_for_one, name: FooApp.Supervisor]
    Supervisor.start_link(children, opts)
  end

  # Tell Phoenix to update the endpoint configuration
  # whenever the application is updated.
  @impl true
  def config_change(changed, _new, removed) do
    FooAppWeb.Endpoint.config_change(changed, removed)
    :ok
  end

  defp skip_migrations?() do
    # By default, sqlite migrations are run when using a release
    System.get_env("RELEASE_NAME") != nil
  end
end
FooApp.Application.Model (a boring GenServer, not relevant, including for completeness)
defmodule FooApp.Application.Model do
  use GenServer

  require Logger

  defmodule State do
    @enforce_keys [:name, :resources]
    defstruct [:name, :resources, :_pubsub_topic]
  end

  defmodule Resource do
    @enforce_keys [:name]
    defstruct [:name, status: "undefined"]

    @allowable_statuses ["green", "green-with-exception", "red"]
    def status_ok(status) do
      status in @allowable_statuses
    end
  end

  defp list_update_such(list, fun_pred, fun) do
    # TODO consider overhauling/replacing this with a new data structure entirely
    # https://elixirforum.com/t/need-to-display-resources-in-the-exact-order-they-are-declared-ordered-map/67829/4?u=james_e
    Enum.map(list, &if fun_pred.(&1) do fun.(&1) else &1 end)
  end

  @impl true
  def init(name) do
    resource_names = [ # FIXME actually load this from ecto
      "FROG-0",
      "FROG-1",
      "FROG-2",
      "FROG-3",
      "FROG-4",
      "FROG-5",
      "FROG-6",
      "FROG-7",
    ];
    {:ok, %State{
      name: name,
      resources: resource_names |> Enum.map(&%Resource{name: &1}),
      _pubsub_topic: name
    }}
  end

  @impl true
  def handle_continue(:broadcast_statechange, state) do
    # TODO this seems located inappropriately far from the call to subscribe; is there a better pattern for this?
    Phoenix.PubSub.broadcast!(FooApp.RoomPubSub, state._pubsub_topic, {:statechange, state});
    {:noreply, state}
  end

  @impl true
  def handle_call(:get_state, _from, state) do
    {:reply, state, state}
  end

  @impl true
  def handle_call({:act, actor, action}, from, state) do
    case action do
      {:set_resource_status, resource_name, new_status} -> (
        with \
          true <- actor === "director" || {:error, "Unauthorized"},
          resources = state.resources,
          true <- Resource.status_ok(new_status) || {:error, "Invalid status"}
        do
          {:reply, :ok, %{state |
            resources: list_update_such(resources, &(&1.name === resource_name), &%{&1 | status: new_status})
          }, {:continue, :broadcast_statechange}}
        else
          {:error, error} -> {:reply, {:error, error}, state}
        end
      )

      _ -> (
        Logger.warning("Client #{inspect from} attempted invalid action");
        {:reply, {:error, "Action incongruent with current state"}, state}
      )
    end
  end
end

The docs for DynamicSupervisor.start_child mention that the id is required but ignored:

Note that while the :id field is still required in the spec, the value is ignored and therefore does not need to be unique.

1 Like

Yeah, I think this applies only to named genservers, AKA starting them with option name: ProcessName.

As @al2o3cr said, the :id is ignored. Though if my memory serves you can achieve what you like by supplying a :name… which is what @D4no0 said.

If none of that works you could just use DynamicSupervisor.which_children and try to look for the already-started child there before attempting to start a new one.

I see… I adapted the code with that in mind, and now it seems to work perfectly:

  defp start_child_idempotent(supervisor, id, module, init_arg, opt \\ []) do
    # https://elixirforum.com/t/dynamicsupervisor-how-to-start-child-iff-it-doesnt-exist/67858/5?u=james_e
    reg_name = {:global, {"#{__MODULE__}.start_child_idempotent", GenServer.whereis(supervisor), id}} # FIXME: maybe convert to https://hexdocs.pm/elixir/1.17/Registry.html#module-using-in-via at some point
    case DynamicSupervisor.start_child(
      supervisor, %{
      id: nil, # https://hexdocs.pm/elixir/1.16/DynamicSupervisor.html#start_child/2:~:text=while%20the%20:id%20field%20is%20still%20required,the%20value%20is%20ignored
      start: {GenServer, :start_link, [module, init_arg, opt ++ [name: reg_name]]}
    }) do
      {:ok, pid} -> {:ok, pid}
      {:error, {:already_started, pid}} -> {:ok, pid}
      on_start_child -> on_start_child
    end

  def create_or_open_room(<<room_name::binary>>) do
    start_child_idempotent(
      FooApp.RoomSupervisor, room_name,
      FooApp.Application.Model, room_name
    )
  end

Thanks all for the tips. I don’t know how I missed that notice about the spec being wack.

Come on now, spec is not “wack”, you just need a name if you want to, you know, have a named process. :smiley:

1 Like

I was referring the @spec — where the DynamicSupervisor’s child’s id field is required, yet ignored — that was a red herring that confused me when trying to implement this.

The ID here isn’t meant to be a global name, only a local ID within the scope of that specific DynamicSupervisor.

1 Like

Can’t edit the post because it’s too old; here’s an implementation for posterity with proper documentation

@doc """
Exactly like [DynamicSupervisor.start_child/2](https://hexdocs.pm/elixir/1.17/DynamicSupervisor.html#start_child/2)
except that the "id" field is not [disregarded](https://hexdocs.pm/elixir/1.17/Supervisor.html#module-child-specification).

Only works with simple GenServer supervisees for now.

## Usage

    start_child_idempotent(
      Foo.Application.DynamicSupervisorForModelInstances, %{
      id: locally_unique_id_for_this_model_instance, start: {GenServer, :start_link, [
      Foo.Application.Model,
      model_arg
    ]}})
"""
@spec start_child_idempotent(Supervisor.supervisor(), Supervisor.child_spec()) :: DynamicSupervisor.on_start_child()
def start_child_idempotent(dynamic_supervisor, child_spec = %{id: id, start: _}) do
  # https://hexdocs.pm/elixir/1.17/GenServer.html#module-name-registration
  # FIXME: https://hexdocs.pm/elixir/1.17/Registry.html#module-using-in-via
  reg_name = {:global, {
    "#{__MODULE__}.start_child_idempotent",
    GenServer.whereis(dynamic_supervisor) || dynamic_supervisor,
    id
  }}

  # https://hexdocs.pm/elixir/1.15/DynamicSupervisor.html#start_child/2:~:text=while%20the%20:id%20field%20is%20still%20required,the%20value%20is%20ignored
  child_spec = Map.update!(child_spec, :start, fn
    {m = GenServer, f = :start_link, [m1, x]} -> {m, f, [m1, x, [name: reg_name]]}
    {m = GenServer, f = :start_link, [m1, x, opts]} -> {m, f, [m1, x, opts ++ [name: reg_name]]}
    {m, f = :start_link, [x, opts]} when is_list(opts) -> {m, f, [x, opts ++ [name: reg_name]]}
    start -> raise %ArgumentError{message: "only GenServer style start_link children supported at this time.\n#{inspect start}"}
  end)

  # https://github.com/elixir-lang/elixir/blob/v1.12.2/lib/elixir/lib/gen_server.ex#L929
  # https://github.com/erlang/otp/blob/OTP-24.2.1/lib/stdlib/src/gen.erl#L83
  case DynamicSupervisor.start_child(dynamic_supervisor, child_spec) do
    {:ok, pid} -> {:ok, pid}
    {:error, {:already_started, pid}} -> {:ok, pid}
    other -> other
  end
end
1 Like

Glad you made it work for you but I still don’t understand how can you not ensure the DynamicSupervisor gets started at the, ahem, start of your application.

This function — just like DynamicSupervisor.start_child/2 — requires you to pass in an existing, running DynamicSupervisor. Starting that process is handled elsewhere.