Converting SQL into Ecto

Hello Everyone,

I would like to ask for help converting the following into an Ecto equivalent:

SELECT
emails.*
FROM
(SELECT thread_id, MAX(internal_date) AS internal_date
FROM emails
GROUP BY thread_id) AS last_email
INNER JOIN
emails
ON
emails.thread_id = last_email.thread_id AND
emails.internal_date = last_email.internal_date
ORDER BY last_email.internal_date DESC

The tricky part to me is this bit:

FROM
(SELECT thread_id, MAX(internal_date) AS internal_date
FROM emails
GROUP BY thread_id) AS last_email

I’m not sure how you achieve the AS last_email part so that it can be referenced later in the query in Ecto.

Any insight would be greatly appreciated.

:wave:

You might be able to use a subquery to express FROM (SELECT ... as a join. Or maybe a fragment.

last_email =
  "emails"
  |> group_by([e], e.thread_id)
  |> select([e], %{thread_id: e.thread_id, internal_date: max(e.internal_date)})

query =
  "emails"
  |> join(:left, [e], l in subquery(last_email), on: e.thread_id == l.thread_id and e.internal_date == l.internal_date)
  |> select([:id])

Repo.to_sql(:all, query)

results in

SELECT e0."id"
FROM "emails" AS e0
LEFT OUTER JOIN (
  SELECT e0."thread_id" AS "thread_id", max(e0."internal_date") AS "internal_date"
  FROM "emails" AS e0
  GROUP BY e0."thread_id"
) AS s1 ON (e0."thread_id" = s1."thread_id") AND (e0."internal_date" = s1."internal_date");
1 Like

BTW, you can write this using “distinct on” without joins.

# file: music_db/priv/repo/playground.exs
#
# http://www.pragmaticprogrammer.com/titles/wmecto
# https://pragprog.com/titles/wmecto/source_code
# http://media.pragprog.com/titles/wmecto/code/wmecto-code.zip
#
# pg_ctl -D /usr/local/var/postgres start
# mix format ./priv/repo/playground.exs
# mix run ./priv/repo/playground.exs
#

defmodule AppInfo do
  def string() do
    Application.loaded_applications()
    |> Enum.map(&to_app_keyword/1)
    |> Enum.sort_by(&map_app_name/1)
    |> Enum.map_join(", ", &app_keyword_to_string/1)
  end

  defp to_app_keyword({app, _, vsn}),
    do: {app, vsn}

  defp app_keyword_to_string({app, vsn}),
    do: "#{app}: #{vsn}"

  defp map_app_name({app, _}),
    do: app
end

defmodule Playground do
  import Ecto.Query
  alias MusicDB.Repo
  alias MusicDB.{Track}

  # Using GROUP BY / MAX
  def query1 do
    last_track =
      from(t in Track,
        group_by: t.album_id,
        select: %{album_id: t.album_id, index: max(t.index)}
      )

    from(l in subquery(last_track),
      join: t in Track,
      on: [album_id: l.album_id, index: l.index],
      order_by: [desc: t.index],
      select: t
    )
  end

  # Using DISTINCT ON
  # https://hexdocs.pm/ecto/Ecto.Query.html#distinct/3-keywords-examples
  def query2 do
    last_track =
      from(t in Track,
        distinct: [asc: t.album_id],
        order_by: [desc: t.index],
        select: t
      )

    from(t in subquery(last_track),
      order_by: [desc: :index]
    )
  end

  def play(query) do
    # Ecto.Adapters.SQL.to_sql(:all, Repo, query)

    query
    |> Repo.all()
  end
end

IO.puts(AppInfo.string())
IO.inspect(Playground.play(Playground.query1()))
IO.inspect(Playground.play(Playground.query2()))
$ mix run ./priv/repo/playground.exs
asn1: 5.0.7, compiler: 7.2.7, connection: 1.0.4, crypto: 4.3.3, db_connection: 2.0.3, decimal: 1.6.0, ecto: 3.0.5, ecto_sql: 3.0.3, elixir: 1.7.4, hex: 0.18.2, inets: 7.0.2, kernel: 6.1.1, logger: 1.7.4, mix: 1.7.4, music_db: 0.1.0, poison: 3.1.0, postgrex: 0.14.1, public_key: 1.6.3, ssl: 9.0.3, stdlib: 3.6, telemetry: 0.2.0

12:53:11.377 [debug] QUERY OK db=4.4ms decode=0.6ms queue=1.4ms
SELECT
  t1."id",
  t1."title",
  t1."duration",
  t1."index",
  t1."number_of_plays",
  t1."inserted_at",
  t1."updated_at",
  t1."album_id"
FROM
  (
    SELECT
      t0."album_id" AS "album_id",
      max(t0."index") AS "index"
    FROM "tracks" AS t0
    GROUP BY t0."album_id"
  ) AS s0
  INNER JOIN "tracks" AS t1 ON (t1."album_id" = s0."album_id") AND (t1."index" = s0."index")
ORDER BY t1."index" DESC []

[
  %MusicDB.Track{
    __meta__: #Ecto.Schema.Metadata<:loaded, "tracks">,
    album: #Ecto.Association.NotLoaded<association :album is not loaded>,
    album_id: 3,
    duration: 489,
    duration_string: nil,
    id: 20,
    index: 10,
    inserted_at: ~N[2018-11-06 13:40:37],
    number_of_plays: 0,
    title: "All of You",
    updated_at: ~N[2018-11-06 13:40:37]
  },
  %MusicDB.Track{
    __meta__: #Ecto.Schema.Metadata<:loaded, "tracks">,
    album: #Ecto.Association.NotLoaded<association :album is not loaded>,
    album_id: 4,
    duration: 325,
    duration_string: nil,
    id: 29,
    index: 9,
    inserted_at: ~N[2018-11-06 13:40:37],
    number_of_plays: 0,
    title: "Blue In Green",
    updated_at: ~N[2018-11-06 13:40:37]
  },
  %MusicDB.Track{
    __meta__: #Ecto.Schema.Metadata<:loaded, "tracks">,
    album: #Ecto.Association.NotLoaded<association :album is not loaded>,
    album_id: 1,
    duration: 481,
    duration_string: nil,
    id: 5,
    index: 5,
    inserted_at: ~N[2018-11-06 13:40:37],
    number_of_plays: 0,
    title: "Flamenco Sketches",
    updated_at: ~N[2018-11-06 13:40:37]
  },
  %MusicDB.Track{
    __meta__: #Ecto.Schema.Metadata<:loaded, "tracks">,
    album: #Ecto.Association.NotLoaded<association :album is not loaded>,
    album_id: 2,
    duration: 1061,
    duration_string: nil,
    id: 10,
    index: 5,
    inserted_at: ~N[2018-11-06 13:40:37],
    number_of_plays: 0,
    title: "No Blues",
    updated_at: ~N[2018-11-06 13:40:37]
  },
  %MusicDB.Track{
    __meta__: #Ecto.Schema.Metadata<:loaded, "tracks">,
    album: #Ecto.Association.NotLoaded<association :album is not loaded>,
    album_id: 5,
    duration: 844,
    duration_string: nil,
    id: 33,
    index: 4,
    inserted_at: ~N[2018-11-06 13:40:37],
    number_of_plays: 0,
    title: "Song Of Songs",
    updated_at: ~N[2018-11-06 13:40:37]
  }
]

12:53:11.388 [debug] QUERY OK db=1.0ms queue=1.3ms
SELECT
  s0."id",
  s0."title",
  s0."duration",
  s0."index",
  s0."number_of_plays",
  s0."inserted_at",
  s0."updated_at",
  s0."album_id"
FROM
  (
    SELECT DISTINCT ON (t0."album_id")
      t0."id" AS "id",
      t0."title" AS "title",
      t0."duration" AS "duration",
      t0."index" AS "index",
      t0."number_of_plays" AS "number_of_plays",
      t0."inserted_at" AS "inserted_at",
      t0."updated_at" AS "updated_at",
      t0."album_id" AS "album_id"
    FROM "tracks" AS t0
    ORDER BY t0."album_id", t0."index" DESC
  ) AS s0
ORDER BY s0."index" DESC []

[
  %MusicDB.Track{
    __meta__: #Ecto.Schema.Metadata<:loaded, "tracks">,
    album: #Ecto.Association.NotLoaded<association :album is not loaded>,
    album_id: 3,
    duration: 489,
    duration_string: nil,
    id: 20,
    index: 10,
    inserted_at: ~N[2018-11-06 13:40:37],
    number_of_plays: 0,
    title: "All of You",
    updated_at: ~N[2018-11-06 13:40:37]
  },
  %MusicDB.Track{
    __meta__: #Ecto.Schema.Metadata<:loaded, "tracks">,
    album: #Ecto.Association.NotLoaded<association :album is not loaded>,
    album_id: 4,
    duration: 325,
    duration_string: nil,
    id: 29,
    index: 9,
    inserted_at: ~N[2018-11-06 13:40:37],
    number_of_plays: 0,
    title: "Blue In Green",
    updated_at: ~N[2018-11-06 13:40:37]
  },
  %MusicDB.Track{
    __meta__: #Ecto.Schema.Metadata<:loaded, "tracks">,
    album: #Ecto.Association.NotLoaded<association :album is not loaded>,
    album_id: 1,
    duration: 481,
    duration_string: nil,
    id: 5,
    index: 5,
    inserted_at: ~N[2018-11-06 13:40:37],
    number_of_plays: 0,
    title: "Flamenco Sketches",
    updated_at: ~N[2018-11-06 13:40:37]
  },
  %MusicDB.Track{
    __meta__: #Ecto.Schema.Metadata<:loaded, "tracks">,
    album: #Ecto.Association.NotLoaded<association :album is not loaded>,
    album_id: 2,
    duration: 1061,
    duration_string: nil,
    id: 10,
    index: 5,
    inserted_at: ~N[2018-11-06 13:40:37],
    number_of_plays: 0,
    title: "No Blues",
    updated_at: ~N[2018-11-06 13:40:37]
  },
  %MusicDB.Track{
    __meta__: #Ecto.Schema.Metadata<:loaded, "tracks">,
    album: #Ecto.Association.NotLoaded<association :album is not loaded>,
    album_id: 5,
    duration: 844,
    duration_string: nil,
    id: 33,
    index: 4,
    inserted_at: ~N[2018-11-06 13:40:37],
    number_of_plays: 0,
    title: "Song Of Songs",
    updated_at: ~N[2018-11-06 13:40:37]
  }
]
$
1 Like

Thank you so much for your very thorough solution. I greatly appreciate the two approaches you showed. It’s funny, I was using another approach that you showed for a different question on here. Then your answer came in. The solution is fast, and produces the result that I need so thank you again.

I simply “executed” the two suggestions by @idi527 and @gleb that were already present - in this particular case I just wanted to try out PostgreSQL’s DISTINCT ON.

I’d probably stick with the GROUP BY/MAX approach - likely because I’ve been already been familiar with self-joins to solve this particular type of problem. Still not sure that DISTINCT ON needs to exist (I tend to stick to ANSI SQL capabilities).