How to create the right Ecto query for the given SQL query

peerreynders · December 5, 2018, 8:37pm

Based on the information given it seems you have declared a many_to_many association across

planner <-planner_id- planner_dishes -dish_id-> dishes

that is responsible for the “extra” JOIN that you are seeing in the Ecto generated SQL when you use assoc(planner, :dishes) - making it impossible to split the query where you need to.

So either stop using that association and/or reformulate the query.

Example: Starting with a monolithic query:

# file: music_db/priv/repo/playground.exs
#
# http://www.pragmaticprogrammer.com/titles/wmecto
# https://pragprog.com/titles/wmecto/source_code
# http://media.pragprog.com/titles/wmecto/code/wmecto-code.zip
#
# pg_ctl -D /usr/local/var/postgres start
# mix format ./priv/repo/playground.exs
# mix run ./priv/repo/playground.exs
#

defmodule AppInfo do
  def string() do
    Application.loaded_applications()
    |> Enum.map(&to_app_keyword/1)
    |> Enum.sort_by(&map_app_name/1)
    |> Enum.map_join(", ", &app_keyword_to_string/1)
  end

  defp to_app_keyword({app, _, vsn}),
    do: {app, vsn}

  defp app_keyword_to_string({app, vsn}),
    do: Atom.to_string(app) <> ": " <> to_string(vsn)

  defp map_app_name({app, _}),
    do: app
end

# SELECT a.name, COUNT(g_jazz.id), COUNT(g_live.id) FROM artists a
# LEFT OUTER JOIN albums b ON a.id = b.artist_id
# LEFT OUTER JOIN albums_genres ag ON b.id = ag.album_id
# LEFT OUTER JOIN genres g_jazz ON ag.genre_id = g_jazz.id AND g_jazz.name = 'jazz'
# LEFT OUTER JOIN genres g_live ON ag.genre_id = g_live.id AND g_live.name = 'live'
# GROUP BY a.id;

defmodule Playground do
  import Ecto.Query
  alias MusicDB.Repo
  alias MusicDB.{Artist, Album, AlbumGenre, Genre}

  def play do
    IO.puts(AppInfo.string())

    base_query =
      from(a in Artist,
        left_join: b in Album,
        on: a.id == b.artist_id,
        left_join: ag in AlbumGenre,
        on: b.id == ag.album_id,
        left_join: g_jazz in Genre,
        on: ag.genre_id == g_jazz.id and g_jazz.name == "jazz",
        left_join: g_live in Genre,
        on: ag.genre_id == g_live.id and g_live.name == "live",
        group_by: a.id,
        select: %{name: a.name, jazz: count(g_jazz.id), live: count(g_live.id)},
        order_by: [asc: a.name]
      )

    results = Repo.all(base_query)

    results
  end
end

IO.inspect(Playground.play())

It can be broken down into composable parts:

defmodule Playground do
  import Ecto.Query
  alias MusicDB.Repo
  alias MusicDB.{Artist, Album, AlbumGenre, Genre}

  def base_query do
    from(a in Artist,
      left_join: b in Album,
      on: a.id == b.artist_id,
      left_join: ag in AlbumGenre,
      on: b.id == ag.album_id,
      group_by: a.id,
      select: %{name: a.name},
      order_by: [asc: a.name]
    )
  end

  def count_jazz(query) do
    from([a, b, ag] in query,
      left_join: g_jazz in Genre,
      on: ag.genre_id == g_jazz.id and g_jazz.name == "jazz",
      select_merge: %{jazz: count(g_jazz.id)}
    )
  end

  def count_live(query) do
    from([a, b, ag] in query,
      left_join: g_live in Genre,
      on: ag.genre_id == g_live.id and g_live.name == "live",
      select_merge: %{live: count(g_live.id)}
    )
  end

  def play do
    IO.puts(AppInfo.string())

    results =
      base_query()
      |> count_jazz()
      |> count_live()
      |> Repo.all()

    results
  end
end

$ mix run priv/repo/playground.exs
asn1: 5.0.7, compiler: 7.2.6, connection: 1.0.4, crypto: 4.3.3, db_connection: 2.0.1, decimal: 1.5.0, ecto: 3.0.1, ecto_sql: 3.0.0, elixir: 1.7.4, hex: 0.18.2, inets: 7.0.2, kernel: 6.1, logger: 1.7.4, mix: 1.7.4, music_db: 0.1.0, poison: 3.1.0, postgrex: 0.14.0, public_key: 1.6.2, ssl: 9.0.3, stdlib: 3.6, telemetry: 0.2.0

15:33:27.953 [debug] QUERY OK source="artists" db=1.3ms decode=0.7ms queue=1.2ms
SELECT a0."name", count(g3."id"), count(g4."id") FROM "artists" AS a0 LEFT OUTER JOIN "albums" AS a1 ON a0."id" = a1."artist_id" LEFT OUTER JOIN "albums_genres" AS a2 ON a1."id" = a2."album_id" LEFT OUTER JOIN "genres" AS g3 ON (a2."genre_id" = g3."id") AND (g3."name" = 'jazz') LEFT OUTER JOIN "genres" AS g4 ON (a2."genre_id" = g4."id") AND (g4."name" = 'live') GROUP BY a0."id" ORDER BY a0."name" []
[
  %{jazz: 2, live: 0, name: "Bill Evans"},
  %{jazz: 1, live: 1, name: "Bobby Hutcherson"},
  %{jazz: 2, live: 1, name: "Miles Davis"}
]
$

However with respect to associations there can be problems like given the following schemas:

  schema "artists" do
    field(:name)
    field(:birth_date, :date)
    field(:death_date, :date)
    timestamps()

    has_many(:albums, Album)
    has_many(:tracks, through: [:albums, :tracks])
  end

  schema "albums" do
    field(:title, :string)
    timestamps()

    belongs_to(:artist, Artist)
    has_many(:tracks, Track)
    many_to_many(:genres, Genre, join_through: "albums_genres")
  end

  schema "albums_genres" do
    belongs_to(:albums, Album)
    belongs_to(:genres, Genre)
  end

  schema "genres" do
    field(:name)
    field(:wiki_tag)
    timestamps()

    many_to_many(:albums, Album, join_through: "albums_genres")
  end

In this case the many_to_many association is getting in the way of being able to “split” the query in the right place. So in order to use associations the query has to be modified:

defmodule Playground do
  import Ecto.Query
  alias MusicDB.Repo
  alias MusicDB.{Artist}

  def base_query do
    from(a in Artist,
      left_join: b in assoc(a, :albums),
      left_join: g in assoc(b, :genres),
      group_by: a.id,
      select: %{name: a.name},
      order_by: [asc: a.name]
    )
  end

  def count_jazz(query) do
    from([a, b, g] in query,
      select_merge: %{jazz: filter(count(1), g.name == "jazz")}
    )
  end

  def count_live(query) do
    from([a, b, g] in query,
      select_merge: %{live: filter(count(1), g.name == "live")}
    )
  end

  def play do
    IO.puts(AppInfo.string())

    results =
      base_query()
      |> count_jazz()
      |> count_live()
      |> Repo.all()

    results
  end
end

$ mix run priv/repo/playground.exs
asn1: 5.0.7, compiler: 7.2.6, connection: 1.0.4, crypto: 4.3.3, db_connection: 2.0.1, decimal: 1.5.0, ecto: 3.0.1, ecto_sql: 3.0.0, elixir: 1.7.4, hex: 0.18.2, inets: 7.0.2, kernel: 6.1, logger: 1.7.4, mix: 1.7.4, music_db: 0.1.0, poison: 3.1.0, postgrex: 0.14.0, public_key: 1.6.2, ssl: 9.0.3, stdlib: 3.6, telemetry: 0.2.0

15:17:31.436 [debug] QUERY OK source="artists" db=1.4ms decode=0.7ms queue=1.9ms
SELECT a0."name", count(1) FILTER (WHERE g2."name" = 'jazz'), count(1) FILTER (WHERE g2."name" = 'live') FROM "artists" AS a0 LEFT OUTER JOIN "albums" AS a1 ON a1."artist_id" = a0."id" LEFT OUTER JOIN "albums_genres" AS a3 ON a3."album_id" = a1."id" LEFT OUTER JOIN "genres" AS g2 ON a3."genre_id" = g2."id" GROUP BY a0."id" ORDER BY a0."name" []
[
  %{jazz: 2, live: 0, name: "Bill Evans"},
  %{jazz: 1, live: 1, name: "Bobby Hutcherson"},
  %{jazz: 2, live: 1, name: "Miles Davis"}
]
$