This is all great, and gives me some direction for sure!
It’s a timeout.
children = [
{Cluster.Supervisor, [topologies, [name: Logflare.ClusterSupervisor]]},
supervisor(Logflare.Repo, []),
supervisor(Phoenix.PubSub.PG2, [
[
name: Logflare.PubSub,
fastlane: Phoenix.Channel.Server
]
]),
worker(
Logflare.Tracker,
[
[
name: Logflare.Tracker,
pubsub_server: Logflare.PubSub,
broadcast_period: 1_000,
down_period: 5_000,
permdown_period: 30_000,
pool_size: 1,
log_level: :debug
]
]
),
supervisor(LogflareTelemetry.Supervisor, []),
Logflare.Users.Cache,
Logflare.Sources.Cache,
Logflare.Logs.RejectedLogEvents,
{Task.Supervisor, name: Logflare.TaskSupervisor},
supervisor(Logflare.Sources.Counters, []),
supervisor(Logflare.Sources.RateCounters, []),
supervisor(Logflare.Source.Supervisor, []),
supervisor(Logflare.SystemMetricsSup, []),
supervisor(LogflareWeb.Endpoint, [])
]
We’re catching the sigterm from Kubernetes.
Our sigterm handler… just delays the shutdown to let our broadway pipelines drain after traffic stops. But I will issue a graceful Tracker shutdown there and see if that helps. And/or try manually killing all the websockets on that node.
defmodule Logflare.SigtermHandler do
@moduledoc false
@behaviour :gen_event
require Logger
@grace_period Application.get_env(:logflare, :sigterm_shutdown_grace_period_ms) ||
throw("Not configured")
@impl true
def init(_) do
Logger.info("#{__MODULE__} is being initialized...")
{:ok, %{}}
end
@impl true
def handle_info(:proceed_with_sigterm, state) do
Logger.warn("#{__MODULE__}: shutdown grace period reached, stopping the app...")
:init.stop()
{:ok, state}
end
@impl true
def handle_event(:sigterm, state) do
Logger.warn("#{__MODULE__}: SIGTERM received: waiting for #{@grace_period / 1_000} seconds")
Process.send_after(self(), :proceed_with_sigterm, @grace_period)
{:ok, state}
end
@impl true
def handle_event(ev, _state) do
Logger.warn("#{__MODULE__}: has received a system signal: #{ev} and redirected it to :erl_signal_server")
:gen_event.notify(:erl_signal_server, ev)
end
@impl true
def handle_call(msg, state) do
Logger.warn("#{__MODULE__} has received an unexpected call: #{inspect(msg)}")
{:ok, :ok, state}
end
end