Ecto.Multi (Repo.transaction) with functions that are using different Repo configurations to the same DB

dgamidov · January 27, 2017, 4:53pm

Hello!

Just want to share my experience with you.
Maybe it will save some hours for somebody.

I have 2 applications in umbrella application:

Secutity
Store

Application Security can be used in other projects.
Each application has its own repository, configured to the same database (Store is sharing Security functional):

repo_transaction_umbrella/config/dev.exs:

use Mix.Config

config :security, Security.Repo,
  adapter: Ecto.Adapters.Postgres,
  username: "postgres",
  password: "postgres",
  database: "repo_transaction_umbrella_dev",
  hostname: "localhost",
  pool_size: 10

config :store, Store.Repo,
  adapter: Ecto.Adapters.Postgres,
  username: "postgres",
  password: "postgres",
  database: "repo_transaction_umbrella_dev",
  hostname: "localhost",
  pool_size: 10

repo_transaction_umbrella/apps/security/lib/security/contexts/user.ex:

defmodule Security.User do
  alias Security.{Repo, UserSchema}

  def add(params) do
    changeset = UserSchema.build_changeset(%UserSchema{}, params)
    Repo.insert(changeset)
  end
end

repo_transaction_umbrella/apps/store/lib/store/contexts/profile.ex:

defmodule Store.Profile do
  alias Store.{Repo, ProfileSchema}

  def add(params) do
    changeset = ProfileSchema.build_changeset(%ProfileSchema{}, params)
    Repo.insert(changeset)
  end
end

repo_transaction_umbrella/apps/store/lib/store/contexts/customer.ex:

defmodule Store.Customer do
  alias Ecto.Multi
  alias Store.{Repo, Profile}
  alias Security.User

  def add(params) when is_map(params) do
    multi =
      Multi.new
      |> Multi.run(:user, fn _ -> User.add(params) end)
      |> Multi.run(:profile, fn %{user: user} ->
        params
        |> Map.put_new("user_id", user.id)
        |> Profile.add()
      end)

    case Repo.transaction(multi) do
      {:ok, result} -> {:ok, result}
      {:error, :user, changeset, %{}} -> {:error, :user, changeset}
      {:error, :profile, changeset, %{}} -> {:error, :profile, changeset}
    end
  end
end

I try to add new customer with Store.Customer.add/1 and incorrect parameters (name is empty):

iex(1)> params = %{"username" => "user_1", "name" => ""}
%{"name" => "", "username" => "user_1"}

iex(2)> alias Store.Customer
Store.Customer

iex(3)> Customer.add(params)

19:16:00.727 [debug] QUERY OK db=0.3ms
begin []

19:16:00.765 [debug] QUERY OK db=3.6ms
INSERT INTO "security"."users" ("username","inserted_at","updated_at") VALUES ($1,$2,$3) RETURNING "id" ["user_1", {{2017, 1, 27}, {16, 16, 0, 745833}}, {{2017, 1, 27}, {16, 16, 0, 753016}}]

19:16:00.768 [debug] QUERY OK db=0.4ms
rollback []
{:error, :profile,
 #Ecto.Changeset<action: :insert, changes: %{user_id: 110},
  errors: [name: {"can't be blank", [validation: :required]}],
  data: #Store.ProfileSchema<>, valid?: false>}

Looks ok - transaction started, insert ok, then rollback (because second insert failed).
But, in fact, ROLLBACK did nothing with this INSERT.

Reason - separate Repo configuration, separate sessions in PostgreSQL.
Log statement from postgresql.conf helped to figure it out:

repo_transaction_umbrella_dev_(postgres)_[BEGIN]_{00000}_14/6LOG:  execute POSTGREX_BEGIN: BEGIN
repo_transaction_umbrella_dev_(postgres)_[INSERT]_{00000}_30/6LOG:  execute <unnamed>: INSERT INTO "security"."users" ("username","inserted_at","updated_at") VALUES ($1,$2,$3) RETURNING "id"
repo_transaction_umbrella_dev_(postgres)_[INSERT]_{00000}_30/6DETAIL:  parameters: $1 = 'user_1', $2 = '2017-01-27 15:56:00.280901', $3 = '2017-01-27 15:56:00.287823'
repo_transaction_umbrella_dev_(postgres)_[ROLLBACK]_{00000}_14/6LOG:  execute POSTGREX_ROLLBACK: ROLLBACK

We started transaction with BEGIN in session 14, then did INSERT in session 30, then ROLLBACK in session 14.
Because Store.Customer.add/1 executes Security.User.add/1 (Security.Repo) and Store.Profile.add/1 (Store.Repo) in transaction from Store.Repo.

The solution is simple - pass the same Repo as you are using for Repo.transaction to functions as parameter if there are functions with different repo configurations in your transaction:

repo_transaction_umbrella/apps/security/lib/security/contexts/user.ex:

defmodule Security.User do
  alias Security.{Repo, UserSchema}

  def add(params) do
    changeset = UserSchema.build_changeset(%UserSchema{}, params)
    Repo.insert(changeset)
  end

  def add(params, repo) do
    changeset = UserSchema.build_changeset(%UserSchema{}, params)
    repo.insert(changeset)
  end
end

repo_transaction_umbrella/apps/store/lib/store/contexts/profile.ex:

defmodule Store.Profile do
  alias Store.{Repo, ProfileSchema}

  def add(params) do
    changeset = ProfileSchema.build_changeset(%ProfileSchema{}, params)
    Repo.insert(changeset)
  end

  def add(params, repo) do
    changeset = ProfileSchema.build_changeset(%ProfileSchema{}, params)
    repo.insert(changeset)
  end
end

repo_transaction_umbrella/apps/store/lib/store/contexts/customer.ex:

defmodule Store.Customer do
  alias Ecto.Multi
  alias Store.{Repo, Profile}
  alias Security.User

  def add(params) when is_map(params) do
    multi =
      Multi.new
      |> Multi.run(:user, fn _ -> User.add(params, Repo) end)
      |> Multi.run(:profile, fn %{user: user} ->
        params
        |> Map.put_new("user_id", user.id)
        |> Profile.add(Repo)
      end)

    case Repo.transaction(multi) do
      {:ok, result} -> {:ok, result}
      {:error, :user, changeset, %{}} -> {:error, :user, changeset}
      {:error, :profile, changeset, %{}} -> {:error, :profile, changeset}
    end
  end
end

I have loaded this test umbrella application to GitHub.

dgamidov · January 27, 2017, 7:19pm

May be there can be a check that all multi operations run in one session?
@Ecto-Core-Team

DmytroNasyrov · January 27, 2017, 7:35pm

Nice try. But this approach should fail when you use different postgres users in repos or separated databases. Maybe it should be better to remove ecto from both apps and add a new “repo” OTP app started as a dependency or to split database into two to get a real separation of concerns and forget about Multi.
Just thinking. I have the same two apps - one for authentication, another…for other things but with separated databases and they are linked on API layer.

josevalim · January 27, 2017, 8:02pm

Exactly. There is no way it can work as @dgamidov expects because different repositories are meant to be different repositories. You cannot have transactional guarantees across repositories. What if one repository decides to use MySQL and the other still uses Postgres, how would that even work?

In my opinion, you have an artificial boundary. If you are building two services that are supposedly isolated but they still use the same data source, then those two services are not actually isolated. They are still one and the same, as the example posted above seems to reveal. Or you need to actually provide proper isolation (i.e. move to actually different databases and handle the consequences) or make sure the shared service receives the repository they are meant to act on instead of defining its own.

dgamidov · January 27, 2017, 8:15pm

@josevalim I don’t expect it to work in separate databases.
It is understandable. I just wondering if Ecto can warn that multi was built from operations for different repositories.

dgamidov · January 27, 2017, 8:16pm

@DmytroNasyrov and in my case I must have DB integrity between store.profile (user_id) and security.user(id) with FK.

DmytroNasyrov · January 27, 2017, 8:21pm

I don’t think they are different. They only run as different apps, but this is the same repo. Actually you have joined them later in some other layer with sync Multi request.

dgamidov · January 27, 2017, 8:23pm

Repositories are different, because they have different configuration (but to same database), different names, pools of processes.

DmytroNasyrov · January 27, 2017, 8:29pm

I hope that works for you. From my point of view - different repos are two data containers which can be easily replaced with another one data container with the same API. Actually repo is an API to the database or even map or list instead. And integrity should be checked only in a layer that contains both of repos as a dependency applications.

josevalim · January 27, 2017, 8:33pm

Sorry about misreading your original post. I don’t think we can warn as you can do anything you want in the anonymous function. You could even call the repository dynamically. An implementation would be quite complex and require an unecessary central coordination for repos.

dgamidov · January 27, 2017, 8:41pm

Sure. It seems to be hard to implement. And anyway there should stay a possibility to shoot yourselves in the leg

josevalim · January 27, 2017, 9:22pm

Again, this is mostly a design/transactions issue. If you send an e-mail from inside a multi, you are going to have the exact same issues, and there is no saving from shooting yourself in the foot in there. I don’t see any reason why we should treat completely separate repositories as a special case. Any operation with side-effects won’t be reverted.

Also please read my reply carefully. I did not say it is hard to implement but complex. In the sense that the cost of supporting this feature is not worth it because it does not protect you from the root issue: which comes from design or a lack of misunderstanding of how transactions work.

There are more complex issues, such as SQL injection attacks, with a larger potential for shooting yourself in the foot which we gladly tackled, because it was worth it (or even necessary).

dgamidov · January 27, 2017, 9:35pm

Sorry, maybe I said it in a wrong way. With hard to implement I meant that to make it work Ecto should be acknowledged about processes ID(?) from database connection pool that are using to run all transactions from Multi and handling these through all layers. And it doesn’t seem to be worth.
And about shooting - just a joke, not connected to Ecto, of course!

andre1sk · January 27, 2017, 11:39pm

It could work even for different dbs if you just stick to PG but without Ecto as PG has two phase commit.

dgamidov · January 28, 2017, 4:26am

Thanks! Didn’t know it.

dgamidov · January 28, 2017, 5:45am

Thanks @josevalim and @DmytroNasyrov for pointing to right direction

Thought about it all night and understood that the best way to proper solve this problem - get rid of functions with side effects in Multi and get rid of several Repos:

repo_transaction_umbrella/apps/security/lib/security/contexts/user.ex:

defmodule Security.User do
  alias Security.{Repo, UserSchema}

  def add(params) do
    changeset = build_changeset(params)
    Repo.insert(changeset)
  end

  def build_changeset(params) do
    changeset = UserSchema.build_changeset(%UserSchema{}, params)
  end
end

repo_transaction_umbrella/apps/store/lib/store/contexts/profile.ex:

defmodule Store.Profile do
  alias Store.{Repo, ProfileSchema}

  def add(params) do
    changeset = build_changeset(params)
    Repo.insert(changeset)
  end

  def build_changeset(params) do
    changeset = ProfileSchema.build_changeset(%ProfileSchema{}, params)
  end
end

repo_transaction_umbrella/apps/store/lib/store/contexts/customer.ex:

defmodule Store.Customer do
  alias Ecto.Multi
  alias Store.{Repo, Profile}
  alias Security.User

  def add(params) when is_map(params) do
    user_changeset = User.build_changeset(params)

    multi =
      Multi.new
      |> Multi.insert(:user, user_changeset)
      |> Multi.run(:profile, fn %{user: user} ->
        params
        |> Map.put_new("user_id", user.id)
        |> Profile.build_changeset()
        |> Repo.insert()
      end)

    case Repo.transaction(multi) do
      {:ok, result} -> {:ok, result}
      {:error, :user, changeset, %{}} -> {:error, :user, changeset}
      {:error, :profile, changeset, %{}} -> {:error, :profile, changeset}
    end
  end
end

All work is in one transaction:

repo_transaction_umbrella_dev_(postgres)_[BEGIN]_{00000}_10/6LOG:  execute POSTGREX_BEGIN: BEGIN
repo_transaction_umbrella_dev_(postgres)_[INSERT]_{00000}_10/6LOG:  execute <unnamed>: INSERT INTO "security"."users" ("username","inserted_at","updated_at") VALUES ($1,$2,$3) RETURNING "id"
repo_transaction_umbrella_dev_(postgres)_[INSERT]_{00000}_10/6DETAIL:  parameters: $1 = 'user_1', $2 = '2017-01-28 05:29:55.411862', $3 = '2017-01-28 05:29:55.411871'
repo_transaction_umbrella_dev_(postgres)_[ROLLBACK]_{00000}_10/6LOG:  execute POSTGREX_ROLLBACK: ROLLBACK