@josevalim here is my telemetry logic, I’m log queries that takes over 700ms (and I have about 10 por minute)
1 - I’m not sure if this is the correct way to log things.
2 - I’ve already tried disabling logging completely, with no effect.
IMPORT: before I said 35 thousand requests per second, but it is per minute.
#
# TODO: enviar para sentry errors de authentification ?
#
defmodule D4Web.Telemetry do
require Logger
use Supervisor
import Telemetry.Metrics
@slow System.convert_time_unit(700, :millisecond, :native)
def start_link(arg) do
Supervisor.start_link(__MODULE__, arg, name: __MODULE__)
end
def init(_arg) do
children = [
{:telemetry_poller, measurements: measurements(), period: 30_000}
# {Telemetry.Metrics.ConsoleReporter, metrics: metrics()}
]
setup()
Supervisor.init(children, strategy: :one_for_one)
end
def metrics() do
[
last_value("vm.memory.total", unit: {:byte, :megabyte}),
last_value("vm.total_run_queue_lengths.total"),
last_value("vm.total_run_queue_lengths.cpu"),
last_value("vm.total_run_queue_lengths.io"),
last_value("d4.cache.size.value", tags: [:cache, :node]),
last_value("d4.presence.size.value")
]
end
def measurements() do
[
{D4.Cache, :dispatch_cache_size, []},
{D4Web.Presence, :dispatch_online_user, []}
]
end
def setup do
events = [
#
# phoenix
# https://hexdocs.pm/phoenix/Phoenix.Logger.html
#
# [:plug_adapter, :call, :start],
# [:plug_adapter, :call, :stop],
# [:plug_adapter, :call, :exception],
# [:phoenix, :endpoint, :start],
# [:phoenix, :endpoint, :stop],
# [:phoenix, :router_dispatch, :start],
[:phoenix, :router_dispatch, :stop],
[:phoenix, :router_dispatch, :exception],
# [:phoenix, :error_rendered],
# [:phoenix, :socket_connected],
# [:phoenix, :channel_joined],
# [:phoenix, :channel_handled_in],
#
# absinthe graphql
#
# [:absinthe, :execute, :operation, :start], # when the operation starts
# [:absinthe, :execute, :operation, :stop], # when the operation finishes
# [:absinthe, :subscription, :publish, :start], # when a subscription starts
# [:absinthe, :subscription, :publish, :stop], # when a subscription finishes
# [:absinthe, :resolve, :field, :start], # when field resolution starts
# [:absinthe, :resolve, :field, :stop], # when field resolution finishes
# [:absinthe, :middleware, :batch, :start], # when the batch processing starts
# [:absinthe, :middleware, :batch, :stop], # when the batch processing finishes
#
# ecto
# https://hexdocs.pm/ecto/Ecto.Repo.html#module-telemetry-events
#
[:d4, :repo, :query],
# app
[:d4_web, :graphql, :stop],
[:d4, :repo, :black_word],
[:d4, :repo, :black_email],
[:d4, :authorization],
[:d4_pay, :request, :stop],
[:d4_pay, :request, :exception],
# [:d4, :pglistener, :ok],
[:d4, :pglistener, :error],
[:d4_web, :rate_limit]
# [:d4, :pg, :d4error]
]
events =
if Application.get_env(:d4, :prod?),
do: events ++ [[:d4_web, :presences]],
else: events
unless Application.get_env(:d4, :test?) do
:telemetry.attach_many("d4-telemetry", events, &D4Web.Telemetry.handle_event/4, nil)
end
end
# #
# #
# #
# def handle_event([:phoenix, :router_dispatch, :stop], %{duration: time}, %{conn: conn}, _) do
# path = conn.request_path
# if path not in ["/api/graphiql", "/favicon.ico", "/ping", "/sw.js", "/manifest.json"] do
# level = if conn.status >= 500,
# do: :error,
# else: :info
# log level, "REQUEST", %{
# status: conn.status,
# path: path,
# method: conn.method,
# duration: parse_time(time)
# }
# end
# end
def handle_event([:phoenix, :router_dispatch, :stop], %{duration: time}, %{conn: conn}, _) do
path = conn.request_path
if String.starts_with?(path, "/wh/") do
level =
if conn.status == 200 || conn.status == 201,
do: :info,
else: :error
log(level, "REQUEST", %{
status: conn.status,
path: path,
method: conn.method,
duration: parse_time(time)
})
end
end
#
#
#
def handle_event(
[:phoenix, :router_dispatch, :exception],
_,
%{kind: kind, reason: reason, stacktrace: stacktrace},
_
) do
log(:error, "REQUEST", %{
exception: Exception.format(kind, reason, stacktrace)
})
end
#
# measurements:
# %{start: 10, stop: 10, time: 2}
# metadata
# %{resolution: resolution, identifier: identifier, path: path}
# TODO: distinguir cs_error (warnig) de error (error)
#
def handle_event([:d4_web, :graphql, :stop], %{duration: time}, metadata, _config) do
errors = %{}
level = :info
{level, errors} =
if Enum.any?(metadata.errors || []),
do: {:error, Map.merge(errors, %{errors: metadata.errors})},
else: {level, errors}
{level, errors} =
if match?(%{ok: false}, metadata.value),
do: {:warning, Map.merge(errors, %{cs_errors: metadata.value.errors})},
else: {level, errors}
data =
Map.merge(errors, %{
identifier: metadata.identifier,
path: metadata.path,
duration: parse_time(time)
})
log(level, "GRAPHQL", data)
end
#
# measurements:
# %{decode_time: 4000, query_time: 6335000, queue_time: 74000, total_time: 6413000}
# metadata:
# %{parameters: [1], source: 'users', query: "the sql query string" }
#
#
def handle_event([:d4, :repo, :query], measurements, metadata, _config) do
query_time = measurements[:query_time] || 0
queue_time = measurements[:queue_time] || 0
if query_time + queue_time > @slow do
idle_time = measurements[:idle_time] || 0
log(:warning, "QUERY", %{
slow: true,
source: metadata.source,
duration: parse_time(query_time),
idle: parse_time(idle_time),
queue: parse_time(queue_time),
db: parse_time(query_time),
sql: metadata.query,
params: metadata.params
})
end
end
#
#
#
def handle_event([:d4, :repo, :black_word], _, metadata, _config) do
log(:warning, "BLACK_WORD", %{
word: metadata.word,
text: metadata.text
})
end
#
#
#
def handle_event([:d4, :repo, :black_email], _, metadata, _config) do
log(:warning, "BLACK_EMAIL", %{email: metadata.email})
end
#
#
#
def handle_event([:d4, :authorization], _, %{struct: struct, action: action}, _config) do
log(:error, "AUTHORIZATION", %{
action: action,
struct: extract_struct(struct)
})
end
#
# mensurements %{total, joins, leaves}
#
def handle_event([:d4_web, :presences], mensurements, _, _) do
log(:info, "PRESENCES", mensurements)
end
#
# D4 PAY (MERCADO PAGO)
#
def handle_event(
[:d4_pay, :request, :stop],
%{duration: duration},
%{response: response} = meta,
_
) do
sc = response.status_code
kd = if sc in [200, 201], do: :info, else: :error
# bd = if sc not in [200, 201], do: response.body, else: nil
log(kd, "D4PAY_REQUEST", %{
duration: parse_time(duration),
path: meta[:path],
status_code: sc
})
end
def handle_event([:d4_pay, :request, :exception], %{duration: duration}, meta, _) do
log(:error, "D4PAY_REQUEST", %{
path: meta[:path],
duration: parse_time(duration),
exception: Exception.message(meta.reason),
params: meta.params
})
end
#
# D4_PGLISTENER
#
def handle_event([:d4, :pglistener, :ok], _, meta, _) do
log(:info, "D4_PGLISTENER", meta)
end
def handle_event([:d4, :pglistener, :error], _, meta, _) do
log(:error, "D4_PGLISTENER", meta)
end
#
# D4_WEB RATE_LIMIT
#
def handle_event([:d4_web, :rate_limit], _, %{key: {path, _}, limit: limit, name: name}, _) do
log(:warning, "RATE_LIMIT", %{path: path, limit: limit, name: name})
end
def handle_event([:d4_web, :rate_limit], _, %{key: key, limit: limit, name: name}, _) do
key = inspect(key)
log(:warning, "RATE_LIMIT", %{key: key, limit: limit, name: name})
end
#
# Handle no handler
#
def handle_event(event, measurements, metadata, _config) do
log(:warning, "TELEMETRY", %{
message: "No telemetry handler for #{inspect(event)} with #{inspect(measurements)} and metadata #{inspect(Map.keys(metadata))}"
})
end
#
#
#
defp log(level, what, telemetry) do
Logger.log(level, fn -> {what, telemetry: telemetry} end)
end
defp parse_time(diff) do
us = System.convert_time_unit(diff || 0, :native, :microsecond)
div(us, 1000)
end
defp extract_struct(%_{id: id} = struct) do
"#{D4.Helpers.get_type_name(struct)}:#{id}"
end
defp extract_struct(%_{} = struct) do
"#{D4.Helpers.get_type_name(struct)}"
end
defp extract_struct(_) do
""
end
end