Does the following code actually cause batching? If I call test_serving
5 times asynchronously, are the requests put in the same batch?
def serving do
linreg = get_linreg_model()
x_scaler = get_x_scaler()
Nx.Serving.jit(&LinearRegression.predict(linreg, &1), defn_options())
|> Nx.Serving.client_preprocessing(fn input ->
input |> dbg()
input =
input
|> Nx.stack(axis: 1)
|> then(&MinMaxScaler.transform(x_scaler, &1))
batch = Nx.Batch.stack([input])
{batch, true}
end)
end
def test_serving do
entered_house =
System.user_home!()
|> Path.join("funda_house.parquet")
|> DF.from_parquet!()
|> DF.discard("Vraagprijs per m2")
Nx.Serving.batched_run(__MODULE__, entered_house)
|> Nx.flatten()
|> Nx.squeeze()
|> Nx.to_number()
end
This is the child spec:
{Nx.Serving,
serving: LinRegPredictor.serving(),
name: LinRegPredictor,
batch_size: 10,
batch_timeout: 3000},