Ecto.Multi (Repo.transaction) with functions that are using different Repo configurations to the same DB

Hello!

Just want to share my experience with you.
Maybe it will save some hours for somebody.

I have 2 applications in umbrella application:

  • Secutity
  • Store

Application Security can be used in other projects.
Each application has its own repository, configured to the same database (Store is sharing Security functional):

repo_transaction_umbrella/config/dev.exs:

use Mix.Config

config :security, Security.Repo,
  adapter: Ecto.Adapters.Postgres,
  username: "postgres",
  password: "postgres",
  database: "repo_transaction_umbrella_dev",
  hostname: "localhost",
  pool_size: 10

config :store, Store.Repo,
  adapter: Ecto.Adapters.Postgres,
  username: "postgres",
  password: "postgres",
  database: "repo_transaction_umbrella_dev",
  hostname: "localhost",
  pool_size: 10

repo_transaction_umbrella/apps/security/lib/security/contexts/user.ex:

defmodule Security.User do
  alias Security.{Repo, UserSchema}

  def add(params) do
    changeset = UserSchema.build_changeset(%UserSchema{}, params)
    Repo.insert(changeset)
  end
end

repo_transaction_umbrella/apps/store/lib/store/contexts/profile.ex:

defmodule Store.Profile do
  alias Store.{Repo, ProfileSchema}

  def add(params) do
    changeset = ProfileSchema.build_changeset(%ProfileSchema{}, params)
    Repo.insert(changeset)
  end
end

repo_transaction_umbrella/apps/store/lib/store/contexts/customer.ex:

defmodule Store.Customer do
  alias Ecto.Multi
  alias Store.{Repo, Profile}
  alias Security.User

  def add(params) when is_map(params) do
    multi =
      Multi.new
      |> Multi.run(:user, fn _ -> User.add(params) end)
      |> Multi.run(:profile, fn %{user: user} ->
        params
        |> Map.put_new("user_id", user.id)
        |> Profile.add()
      end)

    case Repo.transaction(multi) do
      {:ok, result} -> {:ok, result}
      {:error, :user, changeset, %{}} -> {:error, :user, changeset}
      {:error, :profile, changeset, %{}} -> {:error, :profile, changeset}
    end
  end
end

I try to add new customer with Store.Customer.add/1 and incorrect parameters (name is empty):

iex(1)> params = %{"username" => "user_1", "name" => ""}
%{"name" => "", "username" => "user_1"}

iex(2)> alias Store.Customer
Store.Customer

iex(3)> Customer.add(params)

19:16:00.727 [debug] QUERY OK db=0.3ms
begin []

19:16:00.765 [debug] QUERY OK db=3.6ms
INSERT INTO "security"."users" ("username","inserted_at","updated_at") VALUES ($1,$2,$3) RETURNING "id" ["user_1", {{2017, 1, 27}, {16, 16, 0, 745833}}, {{2017, 1, 27}, {16, 16, 0, 753016}}]

19:16:00.768 [debug] QUERY OK db=0.4ms
rollback []
{:error, :profile,
 #Ecto.Changeset<action: :insert, changes: %{user_id: 110},
  errors: [name: {"can't be blank", [validation: :required]}],
  data: #Store.ProfileSchema<>, valid?: false>}

Looks ok - transaction started, insert ok, then rollback (because second insert failed).
But, in fact, ROLLBACK did nothing with this INSERT.

Reason - separate Repo configuration, separate sessions in PostgreSQL.
Log statement from postgresql.conf helped to figure it out:

repo_transaction_umbrella_dev_(postgres)_[BEGIN]_{00000}_14/6LOG:  execute POSTGREX_BEGIN: BEGIN
repo_transaction_umbrella_dev_(postgres)_[INSERT]_{00000}_30/6LOG:  execute <unnamed>: INSERT INTO "security"."users" ("username","inserted_at","updated_at") VALUES ($1,$2,$3) RETURNING "id"
repo_transaction_umbrella_dev_(postgres)_[INSERT]_{00000}_30/6DETAIL:  parameters: $1 = 'user_1', $2 = '2017-01-27 15:56:00.280901', $3 = '2017-01-27 15:56:00.287823'
repo_transaction_umbrella_dev_(postgres)_[ROLLBACK]_{00000}_14/6LOG:  execute POSTGREX_ROLLBACK: ROLLBACK

We started transaction with BEGIN in session 14, then did INSERT in session 30, then ROLLBACK in session 14.
Because Store.Customer.add/1 executes Security.User.add/1 (Security.Repo) and Store.Profile.add/1 (Store.Repo) in transaction from Store.Repo.

The solution is simple - pass the same Repo as you are using for Repo.transaction to functions as parameter if there are functions with different repo configurations in your transaction:

repo_transaction_umbrella/apps/security/lib/security/contexts/user.ex:

defmodule Security.User do
  alias Security.{Repo, UserSchema}

  def add(params) do
    changeset = UserSchema.build_changeset(%UserSchema{}, params)
    Repo.insert(changeset)
  end

  def add(params, repo) do
    changeset = UserSchema.build_changeset(%UserSchema{}, params)
    repo.insert(changeset)
  end
end

repo_transaction_umbrella/apps/store/lib/store/contexts/profile.ex:

defmodule Store.Profile do
  alias Store.{Repo, ProfileSchema}

  def add(params) do
    changeset = ProfileSchema.build_changeset(%ProfileSchema{}, params)
    Repo.insert(changeset)
  end

  def add(params, repo) do
    changeset = ProfileSchema.build_changeset(%ProfileSchema{}, params)
    repo.insert(changeset)
  end
end

repo_transaction_umbrella/apps/store/lib/store/contexts/customer.ex:

defmodule Store.Customer do
  alias Ecto.Multi
  alias Store.{Repo, Profile}
  alias Security.User

  def add(params) when is_map(params) do
    multi =
      Multi.new
      |> Multi.run(:user, fn _ -> User.add(params, Repo) end)
      |> Multi.run(:profile, fn %{user: user} ->
        params
        |> Map.put_new("user_id", user.id)
        |> Profile.add(Repo)
      end)

    case Repo.transaction(multi) do
      {:ok, result} -> {:ok, result}
      {:error, :user, changeset, %{}} -> {:error, :user, changeset}
      {:error, :profile, changeset, %{}} -> {:error, :profile, changeset}
    end
  end
end

I have loaded this test umbrella application to GitHub.

3 Likes

May be there can be a check that all multi operations run in one session?
@Ecto-Core-Team

1 Like

Nice try. But this approach should fail when you use different postgres users in repos or separated databases. Maybe it should be better to remove ecto from both apps and add a new “repo” OTP app started as a dependency or to split database into two to get a real separation of concerns and forget about Multi.
Just thinking. I have the same two apps - one for authentication, another…for other things but with separated databases and they are linked on API layer.

3 Likes

Exactly. There is no way it can work as @dgamidov expects because different repositories are meant to be different repositories. You cannot have transactional guarantees across repositories. What if one repository decides to use MySQL and the other still uses Postgres, how would that even work?

In my opinion, you have an artificial boundary. If you are building two services that are supposedly isolated but they still use the same data source, then those two services are not actually isolated. They are still one and the same, as the example posted above seems to reveal. Or you need to actually provide proper isolation (i.e. move to actually different databases and handle the consequences) or make sure the shared service receives the repository they are meant to act on instead of defining its own.

4 Likes

@josevalim I don’t expect it to work in separate databases.
It is understandable. I just wondering if Ecto can warn that multi was built from operations for different repositories.

1 Like

@DmytroNasyrov and in my case I must have DB integrity between store.profile (user_id) and security.user(id) with FK.

1 Like

I don’t think they are different. They only run as different apps, but this is the same repo. Actually you have joined them later in some other layer with sync Multi request.

1 Like

Repositories are different, because they have different configuration (but to same database), different names, pools of processes.

1 Like

I hope that works for you. From my point of view - different repos are two data containers which can be easily replaced with another one data container with the same API. Actually repo is an API to the database or even map or list instead. And integrity should be checked only in a layer that contains both of repos as a dependency applications.

1 Like

Sorry about misreading your original post. :slight_smile: I don’t think we can warn as you can do anything you want in the anonymous function. You could even call the repository dynamically. An implementation would be quite complex and require an unecessary central coordination for repos.

2 Likes

Sure. It seems to be hard to implement. And anyway there should stay a possibility to shoot yourselves in the leg :joy:

1 Like

Again, this is mostly a design/transactions issue. If you send an e-mail from inside a multi, you are going to have the exact same issues, and there is no saving from shooting yourself in the foot in there. I don’t see any reason why we should treat completely separate repositories as a special case. Any operation with side-effects won’t be reverted.

Also please read my reply carefully. I did not say it is hard to implement but complex. In the sense that the cost of supporting this feature is not worth it because it does not protect you from the root issue: which comes from design or a lack of misunderstanding of how transactions work.

There are more complex issues, such as SQL injection attacks, with a larger potential for shooting yourself in the foot which we gladly tackled, because it was worth it (or even necessary).

3 Likes

Sorry, maybe I said it in a wrong way. With hard to implement I meant that to make it work Ecto should be acknowledged about processes ID(?) from database connection pool that are using to run all transactions from Multi and handling these through all layers. And it doesn’t seem to be worth.
And about shooting - just a joke, not connected to Ecto, of course!

1 Like

It could work even for different dbs if you just stick to PG but without Ecto as PG has two phase commit.

2 Likes

Thanks! Didn’t know it.

1 Like

Thanks @josevalim and @DmytroNasyrov for pointing to right direction :slight_smile:

Thought about it all night and understood that the best way to proper solve this problem - get rid of functions with side effects in Multi and get rid of several Repos:

repo_transaction_umbrella/apps/security/lib/security/contexts/user.ex:

defmodule Security.User do
  alias Security.{Repo, UserSchema}

  def add(params) do
    changeset = build_changeset(params)
    Repo.insert(changeset)
  end

  def build_changeset(params) do
    changeset = UserSchema.build_changeset(%UserSchema{}, params)
  end
end

repo_transaction_umbrella/apps/store/lib/store/contexts/profile.ex:

defmodule Store.Profile do
  alias Store.{Repo, ProfileSchema}

  def add(params) do
    changeset = build_changeset(params)
    Repo.insert(changeset)
  end

  def build_changeset(params) do
    changeset = ProfileSchema.build_changeset(%ProfileSchema{}, params)
  end
end

repo_transaction_umbrella/apps/store/lib/store/contexts/customer.ex:

defmodule Store.Customer do
  alias Ecto.Multi
  alias Store.{Repo, Profile}
  alias Security.User

  def add(params) when is_map(params) do
    user_changeset = User.build_changeset(params)

    multi =
      Multi.new
      |> Multi.insert(:user, user_changeset)
      |> Multi.run(:profile, fn %{user: user} ->
        params
        |> Map.put_new("user_id", user.id)
        |> Profile.build_changeset()
        |> Repo.insert()
      end)

    case Repo.transaction(multi) do
      {:ok, result} -> {:ok, result}
      {:error, :user, changeset, %{}} -> {:error, :user, changeset}
      {:error, :profile, changeset, %{}} -> {:error, :profile, changeset}
    end
  end
end

All work is in one transaction:

repo_transaction_umbrella_dev_(postgres)_[BEGIN]_{00000}_10/6LOG:  execute POSTGREX_BEGIN: BEGIN
repo_transaction_umbrella_dev_(postgres)_[INSERT]_{00000}_10/6LOG:  execute <unnamed>: INSERT INTO "security"."users" ("username","inserted_at","updated_at") VALUES ($1,$2,$3) RETURNING "id"
repo_transaction_umbrella_dev_(postgres)_[INSERT]_{00000}_10/6DETAIL:  parameters: $1 = 'user_1', $2 = '2017-01-28 05:29:55.411862', $3 = '2017-01-28 05:29:55.411871'
repo_transaction_umbrella_dev_(postgres)_[ROLLBACK]_{00000}_10/6LOG:  execute POSTGREX_ROLLBACK: ROLLBACK
2 Likes