Ecto: Query composition vs. subquery, difference?

Elixir/Ecto newbie question. It seems to me these two don’t have a difference.

query = from q in MyApp.Books
query2 = from q2 in query, select q2.id
query3 = from q3 in subquery(query), select q3.id

query2 and query3 seem to do the same thing. What can subquery do that simple query composition can’t?

Subqueries are literally executed separately, while composition will just compose everything into one result table. This only becomes important when you do joins and stuff like group-by and such.

1 Like
$ iex -S mix
Erlang/OTP 21 [erts-10.0.7] [source] [64-bit] [smp:8:8] [ds:8:8:10] [async-threads:1] [hipe] [dtrace]

Interactive Elixir (1.7.3) - press Ctrl+C to exit (type h() ENTER for help)
iex(1)> alias MusicDB.{Repo,Album}
[MusicDB.Repo, MusicDB.Album]
iex(2)> import Ecto.Query
Ecto.Query
iex(3)> query = from(q in Album)
#Ecto.Query<from a in MusicDB.Album>
iex(4)> query2 = from(q2 in query, [select: q2.id])
#Ecto.Query<from a in MusicDB.Album, select: a.id>
iex(5)> query3 = from(q3 in subquery(query), [select: q3.id])
#Ecto.Query<from a in subquery(from a in MusicDB.Album), select: a.id>
iex(6)> Ecto.Adapters.SQL.to_sql(:all, Repo, query)
{"SELECT a0.\"id\", a0.\"title\", a0.\"inserted_at\", a0.\"updated_at\", a0.\"artist_id\" FROM \"albums\" AS a0",
 []}
iex(7)> Ecto.Adapters.SQL.to_sql(:all, Repo, query2)
{"SELECT a0.\"id\" FROM \"albums\" AS a0", []}
iex(8)> Ecto.Adapters.SQL.to_sql(:all, Repo, query3)
{"SELECT s0.\"id\" FROM (SELECT a0.\"id\" AS \"id\", a0.\"title\" AS \"title\", a0.\"inserted_at\" AS \"inserted_at\", a0.\"updated_at\" AS \"updated_at\", a0.\"artist_id\" AS \"artist_id\" FROM \"albums\" AS a0) AS s0",
 []}
iex(9)> Repo.all(query2)

15:36:37.711 [debug] QUERY OK source="albums" db=1.4ms
SELECT a0."id" FROM "albums" AS a0 []
[1, 2, 3, 4, 5]
iex(10)> Repo.all(query3)

15:36:44.797 [debug] QUERY OK db=2.0ms
SELECT s0."id" FROM (SELECT a0."id" AS "id", a0."title" AS "title", a0."inserted_at" AS "inserted_at", a0."updated_at" AS "updated_at", a0."artist_id" AS "artist_id" FROM "albums" AS a0) AS s0 []
[1, 2, 3, 4, 5]
iex(11)> 

So roughly it’s:

SELECT
  albums.id
FROM
  albums

versus

SELECT
  SUB.id
FROM (
  SELECT
    albums.*
  FROM
    albums
) AS SUB

or alternately

$ psql music_db
psql (10.5)
Type "help" for help.

music_db=# SELECT albums.id FROM albums;
 id 
----
  1
  2
  3
  4
  5
(5 rows)

music_db=# SELECT SUB.id FROM (SELECT albums.* FROM albums) AS SUB;
 id 
----
  1
  2
  3
  4
  5
(5 rows)

music_db=# 

http://www.postgresqltutorial.com/postgresql-subquery/

Subquery example: Find album ids that have above average total durations:

$ iex -S mix
Erlang/OTP 21 [erts-10.0.7] [source] [64-bit] [smp:8:8] [ds:8:8:10] [async-threads:1] [hipe] [dtrace]

Interactive Elixir (1.7.3) - press Ctrl+C to exit (type h() ENTER for help)
iex(1)> alias MusicDB.{Repo,Track}
[MusicDB.Repo, MusicDB.Track]
iex(2)> import Ecto.Query
Ecto.Query
iex(3)> query_totals = from(t in Track, [
...(3)>   group_by: t.album_id,
...(3)>   select: %{id: t.album_id, total: sum(t.duration)}
...(3)> ])
#Ecto.Query<from t in MusicDB.Track, group_by: [t.album_id],
 select: %{id: t.album_id, total: sum(t.duration)}>
iex(4)> query_avg = from(d in subquery(query_totals),[
...(4)>   select: %{avg_duration: avg(d.total)}
...(4)> ])
#Ecto.Query<from t in subquery(from t in MusicDB.Track,
  group_by: [t.album_id],
  select: %{id: t.album_id, total: sum(t.duration)}),
 select: %{avg_duration: avg(t.total)}>
iex(5)> query_longer = from(d in subquery(query_totals), [
...(5)>   inner_join: a in subquery(query_avg), on: a.avg_duration < d.total,
...(5)>   select: d.id
...(5)> ])
#Ecto.Query<from t0 in subquery(from t in MusicDB.Track,
  group_by: [t.album_id],
  select: %{id: t.album_id, total: sum(t.duration)}),
 join: t1 in subquery(from t in subquery(from t in MusicDB.Track,
  group_by: [t.album_id],
  select: %{id: t.album_id, total: sum(t.duration)}),
  select: %{avg_duration: avg(t.total)}),
 on: t1.avg_duration < t0.total, select: t0.id>
iex(6)> Ecto.Adapters.SQL.to_sql(:all, Repo, query_longer)
{"SELECT s0.\"id\" FROM (SELECT t0.\"album_id\" AS \"id\", sum(t0.\"duration\") AS \"total\" FROM \"tracks\" AS t0 GROUP BY t0.\"album_id\") AS s0 INNER JOIN (SELECT avg(s0.\"total\") AS \"avg_duration\" FROM (SELECT t0.\"album_id\" AS \"id\", sum(t0.\"duration\") AS \"total\" FROM \"tracks\" AS t0 GROUP BY t0.\"album_id\") AS s0) AS s1 ON s1.\"avg_duration\" < s0.\"total\"",
 []}
iex(7)> Repo.all(query_longer)

16:37:19.439 [debug] QUERY OK db=2.7ms
SELECT s0."id" FROM (SELECT t0."album_id" AS "id", sum(t0."duration") AS "total" FROM "tracks" AS t0 GROUP BY t0."album_id") AS s0 INNER JOIN (SELECT avg(s0."total") AS "avg_duration" FROM (SELECT t0."album_id" AS "id", sum(t0."duration") AS "total" FROM "tracks" AS t0 GROUP BY t0."album_id") AS s0) AS s1 ON s1."avg_duration" < s0."total" []
[3, 2]
iex(8)> Repo.all(query_avg)

16:37:19.446 [debug] QUERY OK db=5.0ms
SELECT avg(s0."total") FROM (SELECT t0."album_id" AS "id", sum(t0."duration") AS "total" FROM "tracks" AS t0 GROUP BY t0."album_id") AS s0 []
[%{avg_duration: #Decimal<3232.6000000000000000>}]
iex(9)> Repo.all(query_totals)

16:37:19.450 [debug] QUERY OK source="tracks" db=1.7ms
SELECT t0."album_id", sum(t0."duration") FROM "tracks" AS t0 GROUP BY t0."album_id" []
[
  %{id: 3, total: 3456},
  %{id: 5, total: 3057},
  %{id: 4, total: 2540},
  %{id: 2, total: 4491},
  %{id: 1, total: 2619}
]
iex(10)> 
1 Like