Nx and Whisper, "cannot build an empty tensor"

I made a workaround which seems to fix the problem when an empty tensor is generated(it happens when the audio ends precisely at a moment divisible by 20 seconds).

      0..stat.duration//chunk_time
      |> dbg()
      |> Task.async_stream(
        fn ss ->
          args = ~w(-ac 1 -ar 16k -f f32le -ss #{ss} -t #{chunk_time} -v quiet -)
          {data, 0} = System.cmd("ffmpeg", ["-i", path] ++ args)
+          if byte_size(data) > 0 do
+            {ss, Nx.Serving.batched_run(WhisperServing, Nx.from_binary(data, :f32))}
+          else
+            {ss, ""}
+          end
        end,
        max_concurrency: 2,
        timeout: :infinity
      )
      |> Enum.map(fn
+        {:ok, {ss, %{results: [%{text: text}]}}} ->
+          func.(ss, text)
+        {:ok, {ss, ""}} ->
+          func.(ss, "")
      end)
1 Like