defmodule MyApp.Stack do
use GenServer
def start_link(opts) do
GenServer.start_link(__MODULE__, opts, name: opts[:stack_name])
end
@impl GenServer
def init(opts) do
{:ok, {do_track_stack_singleton(opts), opts}}
end
@impl GenServer
def handle_info({:DOWN, _, :process, _pid, _reason}, {_pids, opts} = _state) do
{:noreply, {do_track_stack_singleton(opts), opts}}
end
defp do_track_stack_singleton(stack_opts) do
stack_module = stack_opts[:server_module]
process_name = stack_opts[:process_name]
pid =
case GenServer.start_link(stack_module, stack_opts, name: {:global, process_name}) do
{:ok, pid} ->
pid
{:error, {:already_started, pid}} ->
pid
end
Process.monitor(pid)
pid
end
end
In test file,
defmodule MyApp.StackTest do
use ExUnit.Case, async: true
import Assertions
alias MyApp.Operation
alias MyApp.Stack
alias MyApp.Server
@process_name :stack_cron
@opts [
intended_run_time_utc: ~T[08:00:00.000000],
operation_module: Operation,
process_name: @process_name,
run_interval_milliseconds: 86_400_000,
server_module: Server,
timeout: :timer.seconds(2)
]
defp start(_) do
{:ok, stack_pid} =
{Stack, @opts}
|> start_supervised(restart: :temporary)
child_pid = :global.whereis_name(@process_name)
%{
child_pid: child_pid,
stack_pid: stack_pid
}
end
describe "While running Stack" do
setup [:start]
test "if stack child exits Stack restarts it", %{
child_pid: child_pid,
stack_pid: stack_pid
} do
{pids, _opts} = :sys.get_state(stack_pid)
Process.exit(child_pid, :kill)
{other_pids, _opts} = :sys.get_state(stack_pid)
assert pids == other_pids
end
end
end
This test seems to fail intermittently with the following error message:
** (exit) exited in: :sys.get_state(#PID<0.1124.0>)
** (EXIT) killed
code: {other_pids, _opts} = :sys.get_state(stack_pid)
From what I debugged, if I remove the line which exists the child_pid, and run this test for around 30times, I do not see this error. But when I add back this line, this test seems to fail once/twice in 30times. If I am not wrong, somewhere when existing the child_pid, it also kills the parent pid. But I am not able to come to a conclusion as to why it is intermittent. Or I could be entirely wrong about the child process killing parent process. Any idea how to resolve this flaky test?