How to properly sanitize multiple ts_vector fields with reusable macro?

Thanks a lot for helping me.

I’ve starred https://github.com/raidcorp/searchy and will keep an eye on it. The reason why I’ve done this on my own is because I need to rank results and search in related tables.

So I’ve ended up with this one:

At first I’ve added pg_term extension for incomplete word search:

defmodule MyApp.Repo.Migrations.AddPgTrgmExtension do
  @moduledoc """
  Create postgres pg_trgm extension and indices
  """

  use Ecto.Migration

  def up do
    execute("CREATE EXTENSION pg_trgm")
  end

  def down do
    execute("DROP EXTENSION pg_trgm")
  end
end

Then migrated searchable tables and add tsvector field, trigger and index:

defmodule MyApp.Repo.Migrations.AddPerformersSearch do
  use Ecto.Migration

  def up do
    alter table("performers") do
      add :tsvector, :tsvector
    end

    create index(:performers, [:tsvector], using: "GIN")

    execute("""
    CREATE OR REPLACE FUNCTION performers_tsvector_trigger()
    RETURNS trigger AS $$
    begin
      new.tsvector := setweight(to_tsvector('russian', coalesce(new.bio, '')), 'A');

      return new;
    end
    $$ LANGUAGE plpgsql;
    """)

    execute("""
    CREATE TRIGGER performers_tsvector_update
    BEFORE INSERT OR UPDATE ON performers
    FOR EACH ROW EXECUTE PROCEDURE performers_tsvector_trigger();
    """)

    alter table("users") do
      add :tsvector, :tsvector
    end

    create index(:users, [:tsvector], using: "GIN")

    execute("""
    CREATE OR REPLACE FUNCTION users_tsvector_trigger()
    RETURNS trigger AS $$
    begin
      new.tsvector := setweight(to_tsvector('russian', coalesce(new.last_name, '')), 'A') ||
      setweight(to_tsvector('russian', coalesce(new.first_name, '')), 'B') ||
      setweight(to_tsvector('russian', coalesce(new.email, '')), 'C') ||
      setweight(to_tsvector('russian', coalesce(new.phone, '')), 'C') ||
      setweight(to_tsvector('russian', coalesce(new.city, '')), 'D');

      return new;
    end
    $$ LANGUAGE plpgsql;
    """)

    execute("""
    CREATE TRIGGER users_tsvector_update
    BEFORE INSERT OR UPDATE ON users
    FOR EACH ROW EXECUTE PROCEDURE users_tsvector_trigger();
    """)

    alter table("topics") do
      add :tsvector, :tsvector
    end

    create index(:topics, [:tsvector], using: "GIN")

    execute("""
    CREATE OR REPLACE FUNCTION topics_tsvector_trigger()
    RETURNS trigger AS $$
    begin
      new.tsvector := setweight(to_tsvector('russian', coalesce(new.name, '')), 'A');

      return new;
    end
    $$ LANGUAGE plpgsql;
    """)

    execute("""
    CREATE TRIGGER topics_tsvector_update
    BEFORE INSERT OR UPDATE ON topics
    FOR EACH ROW EXECUTE PROCEDURE topics_tsvector_trigger();
    """)
  end

  def down do
    execute("DROP TRIGGER performers_tsvector_update on performers;")
    execute("DROP FUNCTION performers_tsvector_trigger();")

    alter table("performers") do
      remove :tsvector
    end

    drop index("performers", [:tsvector])

    execute("DROP TRIGGER users_tsvector_update on performers;")
    execute("DROP FUNCTION users_tsvector_trigger();")

    alter table("users") do
      remove :tsvector
    end

    drop index("users", [:tsvector])

    execute("DROP TRIGGER topics_tsvector_update on performers;")
    execute("DROP FUNCTION topics_tsvector_trigger();")

    alter table("topics") do
      remove :tsvector
    end

    drop index("topics", [:tsvector])
  end
end

Added tsvector type taken from https://github.com/raidcorp/searchy source:

defmodule MyApp.Ecto.Types.TSVector do
  use Ecto.Type

  def type, do: :tsvector

  def cast(tsvector), do: {:ok, tsvector}

  def load(tsvector), do: {:ok, tsvector}

  def dump(tsvector), do: {:ok, tsvector}

  def embed_as(_), do: :self

  def equal?(term1, term2), do: term1 == term2
end

Added tsvector field to all searchable ecto schemas:

    field :tsvector, Proling.Ecto.Types.TSVector

Added Search.Helpers module for convience:

defmodule MyApp.Search.Helpers do
  defmacro tsquery(tsvector, terms, language \\ "russian") do
    quote do
      fragment(
        "? @@ to_tsquery(?, ?)",
        unquote(tsvector),
        unquote(language),
        unquote(terms)
      )
    end
  end

  defmacro tsrankcd(tsvector, terms, language \\ "russian") do
    quote do
      fragment(
        "ts_rank_cd(?, to_tsquery(?, ?))",
        unquote(tsvector),
        unquote(language),
        unquote(terms)
      )
    end
  end
end

And finally added API for using in context:

defmodule MyApp.Production.Performer.Search do
  import Ecto.Query
  import MyApp.Search.Helpers

  @spec search(Ecto.Query.t(), any()) :: Ecto.Query.t()
  def search(query, search_term) do
    search_term = normalize(search_term)

    from q in query,
      join: p in assoc(q, :profile),
      left_join: t in assoc(q, :topics),
      where:
        tsquery(p.tsvector, ^search_term) or tsquery(q.tsvector, ^search_term) or
          tsquery(t.tsvector, ^search_term),
      order_by: [
        desc: tsrankcd(p.tsvector, ^search_term),
        desc: tsrankcd(q.tsvector, ^search_term),
        desc: tsrankcd(t.tsvector, ^search_term)
      ],
      distinct: [q.id]
  end

  defp normalize(terms, operator \\ "&") do
    terms
    |> String.downcase()
    |> String.trim()
    |> String.replace(~r/\(|\)\[|\]\{|\}/u, "")
    |> String.replace(~r/\s+/u, " #{operator} ")
    |> (&(&1 <> ":*")).()
  end
end

I’m gonna star @thiagomajesk answer with searchy because it`s source code greatly led me to the final destination. Also I would like to mention this awesome answer - Search Bar Feature (Phoenix Searching)

Hope this step-by-step example could help someone to get things done.

3 Likes