Does anyone have experience tuning Bumblebee for better results with the roberta-based
nomic-ai/nomic-embed-text-v2-moe
model? I’m seeing poor retrieval performance in Elixir, even though the same setup works well with all-MiniLM-L6-v2
, and the Python version using nomic and all-MiniLM
gives correct results.
# EmbeddingGenserver
@model_id "nomic-ai/nomic-embed-text-v2-moe"
...
@impl true
def handle_continue(:model_loader, _state) do
{:ok, model} =
Bumblebee.load_model({:hf, @model_id},
module: Bumblebee.Text.Roberta,
architecture: :base
)
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, @model_id}, type: :roberta)
...
@impl true
def handle_call({:embed, input}, _from, state) do
serving =
Bumblebee.Text.text_embedding(state.model, state.tokenizer,
output_attribute: :hidden_state,
output_pool: :mean_pooling,
embedding_processor: :l2_norm
)
...
# Embedding
def ingest(input, task \\ "search_document") do
...
input_mask =
input |> Stream.map(fn i -> "#{task}: #{i}" end) |> Enum.to_list()
embedding = EmbeddingGenserver.embed(input_mask)
Stream.zip(Stream.map(input, & &1), Stream.map(embedding, & &1))
|> Stream.map(fn {text, embedding} ->
Repo.insert(%Document{text: text, embedding: embedding}, prefix: tenant)
end)
|> Stream.run()
end
...
def retrieve(text, task \\ "search_document", k \\ 3) do
...
[embedding] = EmbeddingGenserver.embed("#{task}: #{text}")
Repo.all(
from d in Document,
prefix: ^tenant,
select: {fragment("1 - (? <=> ?::vector)", d.embedding, ^embedding), d.text},
order_by: cosine_distance(d.embedding, ^embedding),
limit: ^k
)
end
....
**poor similarity**
iex> Embedding.retrieve("project")
[
{0.9799439930982597,
"Contact Timeframe: After submitting the form on ccdcare.com or via this chat, our recruiters typically reach out within 24 to 48 hours. Thank you for your patience."},
{0.9761637491887082,
"Applying for Non-Agent Positions: Thank you for your interest. Staff positions are posted on our LinkedIn account. If the position you’re seeking isn’t listed, stay updated via our social media channels."},
{0.976003851891173,
"Campaigns with High Pay: Hourly rates depend on the campaign you qualify for. Our salaries start at RD$ 260.00 p/h, with full compensation including base salary and hourly incentives. Complete the recruitment process for detailed information."}
]
tokenizer_embed = AutoTokenizer.from_pretrained('nomic-ai/nomic-embed-text-v2-moe')
model_embed = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v2-moe', trust_remote_code=True)
model_embed.eval()
def embedd(text: list):
def mean_pooling(model_output, attention_mask):
token_embeddings = model_output[0]
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
encoded_input = tokenizer_embed(text, padding=True, truncation=True, return_tensors='pt')
with torch.no_grad():
model_output = model_embed(**encoded_input)
embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
embeddings = F.normalize(embeddings, p=2, dim=1)
return np.array(embeddings)[0]
...
ef retrieve_similar_documents_pgvector(query_text, top_k=5):
query_embedding = embedd([f"search_document: {query_text}"]).squeeze().tolist()
try:
connection = psycopg2.connect(user="postgres",
password="postgres",
host="localhost",
port="5432",
database="...")
register_vector(connection)
cursor = connection.cursor()
cursor.execute("""
SELECT text, 1 - (embedding <=> %s::vector) AS cosine_similarity
FROM ceidy.documents_copy
ORDER BY cosine_similarity DESC
LIMIT %s
""", (query_embedding, top_k))
results = cursor.fetchall()
return results # [(text, cosine_similarity), ...]
except (Exception, psycopg2.Error) as error:
print("Error during retrieval:", error)
return []
finally:
if connection:
cursor.close()
connection.close()
...
results = retrieve_similar_documents_pgvector("project", top_k=3)
for text, score in results:
print(f"Score: {score:.4f} | Text: {text}")
**the similarity looks good**
output:
Score: 0.7170 | Text: Spanish Projects: We do not currently have Spanish-language projects available. Please follow our social media for updates.
Score: 0.6636 | Text: Part-Time Projects: We currently have part-time projects available. Please share your direct number, email, ID number, and full name so a recruitment representative can assist you. Don’t miss out—apply today!
Score: 0.5672 | Text: Overseas Remote Work: At this moment, we do not have an option for overseas remote work. Please stay updated through our social media for any changes.