{account, {inflow, outflow}} =
Repo.one(from account in App.Account,
left_join: t0 in App.Transaction,
on: t0.account_id == account.id
and t0.user_id == ^current_user.id
and t0.deleted == false
and t0.type == "inflow",
left_join: t1 in App.Transaction,
on: t1.account_id == account.id
and t1.user_id == ^current_user.id
and t1.deleted == false
and t1.type == "outflow",
where: account.id == ^id and t1.user_id == t0.user_id,
group_by: account.id,
select: {account, {sum(t0.amount), sum(t1.amount)}})
and though the second join is coming back as expected, the first table seems to be doubling up on the SUM. For example, if there is one transaction of 1,000, it will instead return 2,000, plus whatever the right amount of the second table’s total is.
Not sure what could be going on here, any help is much appreciated!
Have you tried querying your database with plain sql to see what tables are actually returned? It might be due to how joins work, here’s an example (both inflows and outflows are duplicated):
asd@/tmp:test> select * from account;
+------+--------+
| id | name |
|------+--------|
| 1 | test |
+------+--------+
SELECT 1
Time: 0.007s
asd@/tmp:test> select
a0.id,
sum(t0.amount),
sum(t1.amount)
from account a0
left outer join transaction t0
on t0.account_id = a0.id
and t0.type = 'inflow'
left outer join transaction t1
on t1.account_id = a0.id
and t1.type = 'outflow'
group by a0.id;
+------+-------+-------+
| id | sum | sum |
|------+-------+-------|
| 1 | 4000 | 4000 |
+------+-------+-------+
SELECT 1
Time: 0.011s
Try running your query without group by and sum to see what data they actually operate on:
asd@/tmp:test> select
a0.id,
t0.amount,
t1.amount
from account a0
left outer join transaction t0
on t0.account_id = a0.id
and t0.type = 'inflow'
left outer join transaction t1
on t1.account_id = a0.id
and t1.type = 'outflow';
+------+----------+----------+
| id | amount | amount |
|------+----------+----------|
| 1 | 1000 | 1500 |
| 1 | 1000 | 500 |
| 1 | 1000 | 1500 |
| 1 | 1000 | 500 |
+------+----------+----------+
SELECT 4
Time: 0.012s
Ecto does deduplicate the values returned in joins besides if you use streaming.
Can you provide a quick example? I’ve never noticed it does that.
I think I’ve actually encountered several issues just on this forum where people had similar problems to OP due to ecto returning exactly what’s in the joined tables – duplicates.
What you are trying to accomplish is usually done with a UNION in SQL, not JOIN. Roughly:
SELECT account_id, amount as inflow, NULL as outflow FROM transaction WHERE type = 'inflow' AND account_id = ACCOUNT_ID
1, 1000, NULL
1, 1000, NULL
SELECT account_id, NULL as inflow, amount as outflow FROM transaction WHERE type = 'outflow' AND account_id = ACCOUNT_ID
1, NULL, 500
1, NULL, 1500
(
SELECT account_id, amount as inflow, NULL as outflow FROM transaction WHERE type = 'inflow' AND account_id = ACCOUNT_ID
) UNION ALL (
SELECT account_id, NULL as inflow, amount as outflow FROM transaction WHERE type = 'outflow' AND account_id = ACCOUNT_ID
)
1, 1000, NULL
1, 1000, NULL
1, NULL, 500
1, NULL, 1500
SELECT account_id, SUM(inflow), SUM(outflow) FROM (
SELECT account_id, amount as inflow, NULL as outflow FROM transaction WHERE type = 'inflow' AND account_id = ACCOUNT_ID
) UNION ALL (
SELECT account_id, NULL as inflow, amount as outflow FROM transaction WHERE type = 'outflow' AND account_id = ACCOUNT_ID
)
GROUP BY account_id
1, 2000, 2000
Ecto doesn’t support unions so the easiest alternative is two separate queries - one for inflow another for outflow.
It might not do it for “sum” operations (that part I missed), but if you’re loading assocations you don’t suddenly get duplicated posts for a user, just because you happened to join comments as well.
Sort of. If you’re doing a preload with a join, the rows come out of the database with duplicates, and then Ecto de-duplicates them. There are plenty of situations where Ecto doesn’t de-duplicate stuff, ie:
iex(2)> length Repo.all from u in User
401
iex(3)> length Repo.all from u in User, join: m in assoc(u, :memberships)
674
Will return multiple copies of the same user if they have have more than one membership. This is a good thing really, it provides a nice SQL like experience.
In a way though the OP’s question is a whole other scenario. The duplication happens at the SQL level, and so does the aggregation, so there isn’t even an opportunity for Ecto to deduplicate even if it wanted to.
Associations are purely Ecto functionality so it is free to do whatever it wants. JOINs on the other hand have to follow established convention - especially as it is the persistence engine that implements it.
FYI: You can use Ecto.Multi to pass multiple statements to Ecto at once (which would make it possible to fake a union). Example:
alias MusicDB.{Repo,Album,Track}
import Ecto.Query
alias Ecto.Multi
duration_query = fn (album_id) ->
from(a in Album, [
join: t in Track, on: a.id == t.album_id,
where: a.id == ^album_id,
group_by: a.id,
select: {a.id, sum(t.duration)}
])
end
combine_result = fn
(results, _, value) when results == %{} ->
[value]
(results, key, value) ->
others = Map.get(results, key)
[value|others]
end
album_duration = fn (album_id, key) ->
fn results ->
result =
album_id
|> duration_query.()
|> Repo.one()
{:ok, combine_result.(results, key, result)}
end
end
make_multi = fn (first_id, second_id) ->
Multi.new()
|> Multi.run(:first, album_duration.(first_id, nil))
|> Multi.run(:second, album_duration.(second_id, :first))
end
snd = fn {_,y} -> y end
demo = fn artist_id, first_id, second_id ->
[second_duration, first_duration] =
make_multi.(first_id, second_id)
|> Repo.transaction() # {:ok, %{first: [{3, 3456}], second: [{4, 2540}, {3, 3456}]}}
|> snd.() # %{first: [{3, 3456}], second: [{4, 2540}, {3, 3456}]}
|> Map.get(:second) # [{4, 2540}, {3, 3456}]
|> Enum.map(snd) # [2540, 3456]
{artist_id, first_duration, second_duration}
end
demo.(2,3,4)
iex(10)> demo.(2,3,4)
12:20:04.512 [debug] QUERY OK db=0.1ms
begin []
12:20:04.522 [debug] QUERY OK source="albums" db=2.3ms
SELECT a0."id", sum(t1."duration") FROM "albums" AS a0 INNER JOIN "tracks" AS t1 ON a0."id" = t1."album_id" WHERE (a0."id" = $1) GROUP BY a0."id" [3]
12:20:04.524 [debug] QUERY OK source="albums" db=0.3ms
SELECT a0."id", sum(t1."duration") FROM "albums" AS a0 INNER JOIN "tracks" AS t1 ON a0."id" = t1."album_id" WHERE (a0."id" = $1) GROUP BY a0."id" [4]
12:20:04.525 [debug] QUERY OK db=0.2ms
commit []
{2, 3456, 2540}
iex(11)>
def query(id, current_user, type) do
from(account in App.Account, [
left_join: t in App.Transaction,
on: t.account_id == account.id
and t.user_id == ^current_user.id
and t.deleted == false
and t.type == ^type,
where: account.id == ^id,
group_by: account.id,
select: {account, sum(t.amount)}
])
end
run it once with "inflow" and then again with "outflow" - and then pull the combined results together.
FWIW in Ecto master there’s support for the filter expressions. Using them, this could be achieved with a simpler query:
{account, inflow, outflow} =
Repo.one(from a in App.Account,
left_join: t in App.Transaction,
on: t.account_id == a.id and t.user_id == ^current_user.id and not t.deleted,
where: a.id == ^id,
group_by: a.id,
select: {a, filter(sum(t.amount), t.type == "inflow"), filter(sum(t.amount), t.type == "outflow")})
alias MusicDB.{Repo,Album,Track}
import Ecto.Query
artist_id = 2
query = from(ar in Artist, [
join: a in Album, on: ar.id == a.artist_id,
join: t in Track, on: a.id == t.album_id,
where: ar.id == ^artist_id,
group_by: ar.id,
select: {ar, {fragment("SUM(CASE WHEN ? = ? THEN ? ELSE 0 END)", t.album_id, 3, t.duration), fragment("SUM(CASE WHEN ? = ? THEN ? ELSE 0 END)", t.album_id, 4, t.duration)}}
])
Repo.one(query)
iex(5)> Repo.one(query)
14:57:13.174 [debug] QUERY OK source="artists" db=4.0ms decode=2.7ms
SELECT a0."id", a0."name", a0."birth_date", a0."death_date", a0."inserted_at", a0."updated_at", SUM(CASE WHEN t2."album_id" = 3 THEN t2."duration" ELSE 0 END), SUM(CASE WHEN t2."album_id" = 4 THEN t2."duration" ELSE 0 END) FROM "artists" AS a0 INNER JOIN "albums" AS a1 ON a0."id" = a1."artist_id" INNER JOIN "tracks" AS t2 ON a1."id" = t2."album_id" WHERE (a0."id" = $1) GROUP BY a0."id" [2]
{%MusicDB.Artist{
__meta__: #Ecto.Schema.Metadata<:loaded, "artists">,
albums: #Ecto.Association.NotLoaded<association :albums is not loaded>,
birth_date: nil,
death_date: nil,
id: 2,
inserted_at: ~N[2018-06-16 20:29:41.481934],
name: "Bill Evans",
tracks: #Ecto.Association.NotLoaded<association :tracks is not loaded>,
updated_at: ~N[2018-06-16 20:29:41.481940]
}, {3456, 2540}}
iex(6)>
That essentially results in a CROSS JOIN, not a UNION:
iex(1)> Repo.all(from(a in Artist, [
...(1)> join: b in Artist, on: fragment("true"),
...(1)> select: {a.name, b.name}
...(1)> ]))
15:36:31.196 [debug] QUERY OK source="artists" db=1.9ms
SELECT a0."name", a1."name" FROM "artists" AS a0 INNER JOIN "artists" AS a1 ON true []
[
{"Miles Davis", "Miles Davis"},
{"Miles Davis", "Bill Evans"},
{"Miles Davis", "Bobby Hutcherson"},
{"Bill Evans", "Miles Davis"},
{"Bill Evans", "Bill Evans"},
{"Bill Evans", "Bobby Hutcherson"},
{"Bobby Hutcherson", "Miles Davis"},
{"Bobby Hutcherson", "Bill Evans"},
{"Bobby Hutcherson", "Bobby Hutcherson"}
]
iex(2)> Repo.all(from(a in Artist, [
...(2)> cross_join: b in Artist,
...(2)> select: {a.name, b.name}
...(2)> ]))
15:36:31.204 [debug] QUERY OK source="artists" db=1.8ms
SELECT a0."name", a1."name" FROM "artists" AS a0 CROSS JOIN "artists" AS a1 []
[
{"Miles Davis", "Miles Davis"},
{"Miles Davis", "Bill Evans"},
{"Miles Davis", "Bobby Hutcherson"},
{"Bill Evans", "Miles Davis"},
{"Bill Evans", "Bill Evans"},
{"Bill Evans", "Bobby Hutcherson"},
{"Bobby Hutcherson", "Miles Davis"},
{"Bobby Hutcherson", "Bill Evans"},
{"Bobby Hutcherson", "Bobby Hutcherson"}
]
iex(3)>