Weird N+1 select queries behaviour on put_assoc

In my changeset, I have the following behavior that connects objects together:

  def link_delegates(changeset) do
      changeset = %Changeset{changeset | data: Repo.preload(changeset.data, :rncp_delegates)}
      rncp_delegates = get_field(changeset, :rncp_delegates)
      included_delegates = get_field(changeset, :included_delegates)
      excluded_delegates = get_field(changeset, :excluded_delegates)
      delegates =
        (Enum.uniq(rncp_delegates ++ included_delegates) -- excluded_delegates)
        |> Enum.sort_by(&(&1.id))

      changeset
      |> put_assoc(:delegates, delegates)
  end

When I execute Repo.get(Certification, 2014) |> Certification.changeset() |> Repo.update(), all the preload go well, I end up with a list of %Delegate{}s but at Repo.update() time, before the actual SQL UPDATE happens, I get:

[debug] QUERY OK db=0.1ms idle=839.7ms
begin []
[debug] QUERY OK source="certifications" db=20.6ms
SELECT c0."id", c0."rncp_id", c0."is_rncp_active", c0."is_active", c0."slug", c0."acronym", c0."label", c0."level", c0."last_rncp_import_date", c0."end_of_rncp_validity", c0."activities", c0."abilities", c0."activity_area", c0."accessible_job_type", c0."newer_certification_id", c0."inserted_at", c0."updated_at", d1."id" FROM "certifications" AS c0 INNER JOIN "delegates" AS d1 ON d1."id" = ANY($1) INNER JOIN "certifications_delegates" AS c2 ON c2."delegate_id" = d1."id" WHERE (c2."certification_id" = c0."id") ORDER BY d1."id" [[15]]
[debug] QUERY OK source="certifications" db=72.5ms
SELECT c0."id", c0."rncp_id", c0."is_rncp_active", c0."is_active", c0."slug", c0."acronym", c0."label", c0."level", c0."last_rncp_import_date", c0."end_of_rncp_validity", c0."activities", c0."abilities", c0."activity_area", c0."accessible_job_type", c0."newer_certification_id", c0."inserted_at", c0."updated_at", d1."id" FROM "certifications" AS c0 INNER JOIN "delegates" AS d1 ON d1."id" = ANY($1) INNER JOIN "certifications_delegates" AS c2 ON c2."delegate_id" = d1."id" WHERE (c2."certification_id" = c0."id") ORDER BY d1."id" [[16]]
[debug] QUERY OK source="certifications" db=76.7ms
SELECT c0."id", c0."rncp_id", c0."is_rncp_active", c0."is_active", c0."slug", c0."acronym", c0."label", c0."level", c0."last_rncp_import_date", c0."end_of_rncp_validity", c0."activities", c0."abilities", c0."activity_area", c0."accessible_job_type", c0."newer_certification_id", c0."inserted_at", c0."updated_at", d1."id" FROM "certifications" AS c0 INNER JOIN "delegates" AS d1 ON d1."id" = ANY($1) INNER JOIN "certifications_delegates" AS c2 ON c2."delegate_id" = d1."id" WHERE (c2."certification_id" = c0."id") ORDER BY d1."id" [[18]]
[debug] QUERY OK source="certifications" db=47.3ms
SELECT c0."id", c0."rncp_id", c0."is_rncp_active", c0."is_active", c0."slug", c0."acronym", c0."label", c0."level", c0."last_rncp_import_date", c0."end_of_rncp_validity", c0."activities", c0."abilities", c0."activity_area", c0."accessible_job_type", c0."newer_certification_id", c0."inserted_at", c0."updated_at", d1."id" FROM "certifications" AS c0 INNER JOIN "delegates" AS d1 ON d1."id" = ANY($1) INNER JOIN "certifications_delegates" AS c2 ON c2."delegate_id" = d1."id" WHERE (c2."certification_id" = c0."id") ORDER BY d1."id" [[19]]
[debug] QUERY OK source="certifications" db=82.3ms
SELECT c0."id", c0."rncp_id", c0."is_rncp_active", c0."is_active", c0."slug", c0."acronym", c0."label", c0."level", c0."last_rncp_import_date", c0."end_of_rncp_validity", c0."activities", c0."abilities", c0."activity_area", c0."accessible_job_type", c0."newer_certification_id", c0."inserted_at", c0."updated_at", d1."id" FROM "certifications" AS c0 INNER JOIN "delegates" AS d1 ON d1."id" = ANY($1) INNER JOIN "certifications_delegates" AS c2 ON c2."delegate_id" = d1."id" WHERE (c2."certification_id" = c0."id") ORDER BY d1."id" [[23]]
[debug] QUERY OK source="certifications" db=19.5ms
SELECT c0."id", c0."rncp_id", c0."is_rncp_active", c0."is_active", c0."slug", c0."acronym", c0."label", c0."level", c0."last_rncp_import_date", c0."end_of_rncp_validity", c0."activities", c0."abilities", c0."activity_area", c0."accessible_job_type", c0."newer_certification_id", c0."inserted_at", c0."updated_at", d1."id" FROM "certifications" AS c0 INNER JOIN "delegates" AS d1 ON d1."id" = ANY($1) INNER JOIN "certifications_delegates" AS c2 ON c2."delegate_id" = d1."id" WHERE (c2."certification_id" = c0."id") ORDER BY d1."id" [[25]]
[debug] QUERY OK source="certifications" db=56.2ms
SELECT c0."id", c0."rncp_id", c0."is_rncp_active", c0."is_active", c0."slug", c0."acronym", c0."label", c0."level", c0."last_rncp_import_date", c0."end_of_rncp_validity", c0."activities", c0."abilities", c0."activity_area", c0."accessible_job_type", c0."newer_certification_id", c0."inserted_at", c0."updated_at", d1."id" FROM "certifications" AS c0 INNER JOIN "delegates" AS d1 ON d1."id" = ANY($1) INNER JOIN "certifications_delegates" AS c2 ON c2."delegate_id" = d1."id" WHERE (c2."certification_id" = c0."id") ORDER BY d1."id" [[26]]
[debug] QUERY OK source="certifications" db=57.8ms
SELECT c0."id", c0."rncp_id", c0."is_rncp_active", c0."is_active", c0."slug", c0."acronym", c0."label", c0."level", c0."last_rncp_import_date", c0."end_of_rncp_validity", c0."activities", c0."abilities", c0."activity_area", c0."accessible_job_type", c0."newer_certification_id", c0."inserted_at", c0."updated_at", d1."id" FROM "certifications" AS c0 INNER JOIN "delegates" AS d1 ON d1."id" = ANY($1) INNER JOIN "certifications_delegates" AS c2 ON c2."delegate_id" = d1."id" WHERE (c2."certification_id" = c0."id") ORDER BY d1."id" [[55]]

which happens for unknown reasons as everything’s already loaded.

I’m sure this has to do with the put_assoc as if I remove it, the behavior doesn’t happen.

Anyone has an idea? I’m aware this can be very context-specific hence not easy to debug, but any hint on why SELECT queries happen on put_assoc is welcome!

Thanks

I continued my investigations and realized that:

delegates = Repo.all(Ecto.Query.from(d in Delegate, where: d.id in ^[191,190,192,193]))

Repo.get(Certification, 5426) 
|> Repo.preload(:delegates) 
|> Ecto.Changeset.change() 
|> Ecto.Changeset.put_assoc(:delegates, delegates)

if there is no change, it returns:

#Ecto.Changeset<action: nil, changes: %{}, errors: [],
 data: #Vae.Certification<>, valid?: true>

But if one association changes, I get a list of changes for all associations:

#Ecto.Changeset<
  action: nil,
  changes: %{
    delegates: [
      #Ecto.Changeset<action: :replace, changes: %{}, errors: [],
       data: #Vae.Delegate<>, valid?: true>,
      #Ecto.Changeset<action: :update, changes: %{}, errors: [],
       data: #Vae.Delegate<>, valid?: true>,
      #Ecto.Changeset<action: :update, changes: %{}, errors: [],
       data: #Vae.Delegate<>, valid?: true>,
      #Ecto.Changeset<action: :update, changes: %{}, errors: [],
       data: #Vae.Delegate<>, valid?: true>
    ]
  },
  errors: [],
  data: #Certification<>,
  valid?: true
>

And then when the Repo.update() happens, it loops over the delegates list, which for some unknown reason reload the object.

changeset.changes.delegates |> List.last() |> Repo.update()

reloads the %Delegate{}.

So the question is: why Repo.update(changeset) with action: :update and changes: %{} does anything?

Shouldn’t those be filtered out? Or is there anything else to do?

Thanks

FI, I just wrote:

      def put_assoc_no_useless_updates(changeset, key, value) do
        %Ecto.Changeset{changes: changes, data: data} = changeset = put_assoc(changeset, key, value)
        %{changeset | changes: Vae.Map.map_values(changes, fn {k, v} ->
          case v do
            v when is_list(v) ->
              Enum.reject(v, fn el ->
                case el do
                  %Ecto.Changeset{action: :update, changes: %{}, data: assoc} -> Enum.member?(Map.get(data, key), assoc)
                  _ -> false
                end
              end)
            v -> v
          end
        end) |> Enum.into(%{})}
      end

and it seems to do the job.

Comments appreciated. :smiley:

I’d guess put_assoc is loading the Delegate rows to figure out which ones need updating. Are you sure put_assoc on :delegates is what you want? Looks like there’s a join table involved - if you just want to link existing records you should be touching the join records instead.