Great post! In case anyone else runs into the same issue, the latest silero_vad.onnx on their GitHub repo (version 5) expects different inputs than the version used in this post (version 4). The inputs h and c are now combined into a single state tensor: {"state", "Float32", [2, nil, 128]}
Here is the updated code:
init_state = %{state: Nx.broadcast(0.0, {2, 1, 128})}
live_audio
|> Kino.Control.stream()
|> Kino.listen(init_state, fn
%{event: :audio_chunk, chunk: data}, %{state: state} ->
input = Nx.tensor([data])
sr = Nx.tensor(16_000, type: :s64)
{output, state_n} = Ortex.run(model, {input, state, sr})
prob = output |> Nx.squeeze() |> Nx.to_number()
row = %{x: :os.system_time(), y: prob}
Kino.VegaLite.push(chart, row, window: 1000)
{:cont, %{state: state_n}}
end)