Ecto query without FROM

Coffei · July 31, 2023, 2:57pm

Hi!

I have some old SQL queries that I’d like to migrate to Ecto and I found that I don’t know how to do that The queries use multiple CTEs and use Postgres JSON functions to format the result into a single JSON, so the app just uses the result and doesn’t need to do any post-processing. It looks something like this:

WITH selected_posts AS (SELECT id, type FROM posts WHERE category = $1),
     comments AS (SELECT id, text FROM selected_posts sp INNER JOIN comments c 
                  ON c.post_id = sp.id 
                  ORDER BY c.score DESC),
     posts_agg AS (SELECT type, count(id) FROM selected_posts GROUP BY type)
SELECT json_build_object(
  'posts_aggregated', coalesce((SELECT array_agg(json_build_object(
          'type', type
          'count', count
        )) FROM posts_agg), ARRAY[]::json[]),
  'posts_count', SELECT COUNT(id) FROM selected_posts,
  'top_comments', coalesce((SELECT array_agg(json_build_object(
          'author', author
          'text', text
        )) FROM comments LIMIT 5), ARRAY[]::json[])
)

The gist is that query selected_posts is evaluated only once while extracting different information from the query. Note this is not the real thing, it’s only an example to illustrate the approach.

The pure Ecto solution I started with was to have separate query for every field in the response (in this case 3 queries for posts_aggregated, posts_count and top_comments. That works great most of the time, but if the first query, that all other originate from, takes a lot of time and returns lots of rows this work is duplicated in every query. In the CTE-query Postgres could figure this out and only fetch it once (or you could even materialize the CTE manually to enforce this behaviour).

So I wanted to rewrite the query in it’s original form in Ecto, but I am not sure how to write only a select without from. E.g. is it possible to write SELECT 1 in Ecto?

The only workaround I found was to use UNNEST(ARRAY[1]) as from, something like the following. But this seems like a hack, is there a better solution?

from(
  x in fragment("UNNEST(ARRAY[1])"), 
  select: %{a: subquery(...), b: subquery(...), c: subquery(...)}
)

ruslandoga · July 31, 2023, 3:17pm

You can probably select from a cte or a subquery:

defmodule YeTest do
  use ExUnit.Case

  setup do
    pid = Ecto.Adapters.SQL.Sandbox.start_owner!(Ye.Repo, shared: true)
    on_exit(fn -> Ecto.Adapters.SQL.Sandbox.stop_owner(pid) end)
  end

  import Ecto.Query

  @result """
  SELECT json_build_object(
    'posts_aggregated', coalesce((SELECT array_agg(json_build_object('type', type, 'count', count)) FROM posts_agg), ARRAY[]::json[]),
    'posts_count', (SELECT COUNT(id) FROM selected_posts),
    'top_comments', coalesce((SELECT array_agg(json_build_object('author', author, 'text', text)) FROM comments LIMIT 5), ARRAY[]::json[])
  ) AS json
  """

  test "cte" do
    selected_posts =
      from(p in "posts", where: p.category == ^"funny", select: map(p, [:id, :type]))

    comments =
      from(p in "selected_posts",
        inner_join: c in "comments",
        on: c.post_id == p.id,
        order_by: [desc: c.score],
        select: map(c, [:id, :text, :author])
      )

    posts_agg =
      from(p in "selected_posts", group_by: p.type, select: %{type: p.type, count: count(p.id)})

    query =
      "result"
      |> with_cte("selected_posts", as: ^selected_posts)
      |> with_cte("comments", as: ^comments)
      |> with_cte("posts_agg", as: ^posts_agg)
      |> with_cte("result", as: fragment(@result))
      # json needs to be cast to string otherwise Ecto would decode it
      |> select([r], %{json: type(r.json, :string)})

    assert Ye.Repo.all(query) == [
             %{
               json:
                 "{\"posts_aggregated\" : [{\"type\" : \"text\", \"count\" : 1},{\"type\" : \"image\", \"count\" : 1}], \"posts_count\" : 2, \"top_comments\" : [{\"author\" : \"author 2\", \"text\" : \"comment 2\"},{\"author\" : \"author 0\", \"text\" : \"comment 0\"}]}"
             }
           ]
  end
end

json_build_object can be made “dynamic” with a fragment + literal like here but that’s a hack.

mayel · July 31, 2023, 3:33pm

Have you tried MyRepo.query (bypassing Ecto.Query): Ecto.Adapters.SQL — Ecto SQL v3.10.1

LostKobrakai · July 31, 2023, 5:50pm

defmodule MyFragments do
  @doc "sql array_agg"
  defmacro array_agg(field) do
    quote do
      fragment("array_agg(?)", unquote(field))
    end
  end

  @doc "sql json_build_object"
  defmacro json_build_object(pairs) do
    list = Enum.flat_map(pairs, fn {key, value} -> [Atom.to_string(key), value] end)
    length = length(list)
    fragment = "?" |> List.duplicate(length) |> Enum.join(", ")

    quote do
      fragment(
        unquote("json_build_object(#{fragment})"),
        unquote_splicing(list)
      )
    end
  end
end

import MyFragments

category = "something"

selected_posts = 
  from p in "posts", 
    where: p.category == ^category, 
    select: %{id: p.id, type: p.type}

comments = 
  from sp in "selected_posts",
   join: c in "comments",
   on: c.post_id == sp.id,
   order_by: [desc: c.score],
   select: %{id: sp.id, text: c.id}

post_agg = 
  from sp in "selected_posts",
    group_by: sp.type,
    select: %{
      type: sp.type,
      count: count(sp.id)
    }

posts_aggregated = 
  from pa in "posts_agg", 
    select: 
      json_build_object(
        type: pa.type,
        count: pa.count
      )
      |> array_agg()

post_count = from sp in "selected_posts", select: count(sp.id)

top_comments = 
  from c in "comments", 
    limit: 5,
    select: 
      json_build_object(
        author: c.author,
        text: c.text
      )
      |> array_agg()

query = 
  from f in json_build_object(
    posts_aggregated: coalesce(subquery(posts_aggregated), type([], {:array, :json})),
    posts_count: subquery(post_count),
    top_comments: coalesce(subquery(top_comments), type([], {:array, :json}))
  )
  |> with_cte("selected_posts", as: ^selected_posts)
  |> with_cte("comments", as: ^comments)
  |> with_cte("posts_agg", as: ^post_agg)
  |> select(f)

This is quite the mix between CTE and subqueries, but that should be it for the most part. The most important thing to do for such complex queries really is a few additional macros around functions, so you don’t need to litter fragments all over the place.

Coffei · August 1, 2023, 12:57pm

Thanks all.

@ruslandoga Replacing the final select with SQL fragment is an option. I was trying to stick with native Ecto.Query as much as possible.

@mayel I am using that now, but then you need to have the query in SQL. I wanted to rewrite SQL into Ecto.

@LostKobrakai First of all thanks for the json_build_object macro, that is amazing! The query itself doesn’t work for me. It seems that while subquery in select works fine, the same doesn’t work in from.

import Ecto.Query

# this works
from(
  f in "posts",
  select: fragment(
    "json_build_object(?, ?)", 
    "count", 
    subquery(from(c in "comments", select: count(c.id))))
)

# this doesn't work
# ** (Ecto.Query.CompileError) `subquery(from(c in "comments", select: count(c.id)))` is not a valid query expression.
from(
  f in fragment(
    "json_build_object(?, ?)",
    "count",
    subquery(from(c in "comments", select: count(c.id)))
  ),
  select: f.f0
)

Finally after reviewing all your solutions I realized I could move one of the subqueries from subquery in select to from, as it always returns a single row. So I ended up with something like this:

WITH selected_posts AS (SELECT id, type FROM posts WHERE category = $1),
  comments AS (SELECT id, text FROM selected_posts sp INNER JOIN comments c 
               ON c.post_id = sp.id 
               ORDER BY c.score DESC),
  posts_agg AS (SELECT type, count(id) FROM selected_posts GROUP BY type),
  posts_agg_res AS (SELECT array_agg(json_build_object('type', type, 'count', count)) res FROM posts_agg)
SELECT json_build_object(
  'posts_aggregated', coalesce(res, ARRAY[]::json[]),
  'posts_count', SELECT COUNT(id) FROM selected_posts,
  'top_comments', coalesce((SELECT array_agg(json_build_object(
          'author', author
          'text', text
         )) FROM comments LIMIT 5), ARRAY[]::json[])
FROM posts_agg_res
)

which translates to ecto roughly as follows

import Ecto.Query
import MyFragments

selected_posts = from(p in "posts", where: p.category == "top", select: %{id: p.id, type: p.type})

comments =
  from(sp in "selected_posts",
    inner_join: c in "comments",
    on: c.post_id == sp.id,
    order_by: [desc: c.score],
    select: %{id: c.id, text: c.text, author: c.author},
    limit: 5
  )

posts_agg =
  from(p in "selected_posts", group_by: p.type, select: %{type: p.type, count: count(p.id)})

posts_agg_res =
  from(p in "posts_agg", select: array_agg(json_build_object(type: p.type, count: p.count)))

# ignored coalesce here for simplicity
from(p in "posts_agg_res",
  select: %{
    posts_aggregated: p.res,
    posts_count: subquery(from(p in "selected_posts", select: count(p.id))),
    top_comments:
      subquery(
        from(c in "comments", select: array_agg(json_build_object(author: c.author, text: c.text)))
      )
  }
)

I am happy with this solution as it doesn’t feel like hack anymore. It’s still not a general solution, but should work in similar queries, because such queries already rely on subqueries that return a single row, so one of them can be moved to FROM.