Best way to parse bytes from a file and turn them into struct fields

I started learning Elixir day before yesterday and have already started porting my MP4 parsing library written in Python to Elixir.
I am dealing with parsing a collection of bytes from the MP4 file and converting them into struct fields. For e.g. this is how i am parsing for one of the structs -

# Movie Header Box
defmodule Mvhd do
  defstruct(
    name: :mvhd,
    creation_time: 0,
    modification_time: 0,
    timescale: 0,
    duration: 0,
    next_track_id: 0
  )
end

defimpl Box, for: Mvhd do
  def parse(_, file, _) do
    <<version::integer-32>> = IO.binread(file, 4)

    mvhd = %Mvhd{}

    mvhd =
      if version == 0 do
        <<creation_time::integer-32>> = IO.binread(file, 4)
        <<modification_time::integer-32>> = IO.binread(file, 4)
        <<timescale::integer-32>> = IO.binread(file, 4)
        <<duration::integer-32>> = IO.binread(file, 4)

        mvhd
        |> Map.put(:creation_time, creation_time)
        |> Map.put(:modification_time, modification_time)
        |> Map.put(:timescale, timescale)
        |> Map.put(:duration, duration)
      else
        <<creation_time::integer-64>> = IO.binread(file, 8)
        <<modification_time::integer-64>> = IO.binread(file, 8)
        <<timescale::integer-32>> = IO.binread(file, 4)
        <<duration::integer-64>> = IO.binread(file, 8)

        mvhd
        |> Map.put(:creation_time, creation_time)
        |> Map.put(:modification_time, modification_time)
        |> Map.put(:timescale, timescale)
        |> Map.put(:duration, duration)
      end

    :file.position(file, {:cur, 76})
    <<next_track_id::integer-32>> = IO.binread(file, 4)
    mvhd |> Map.put(:next_track_id, next_track_id)
  end
end

This is based on whatever i’ve scavenged so far on the internet and it works.
I do not want to use any third party bytes-parsing/parser-combinator libraries for now.
But i do want experienced Elixir devs to suggest a better way (if any) to rewrite the above code.
Thank you!

defimpl Box, for: Mvhd do
  def parse(_, file, _) do
    # NOTE: consider error handling. IO.binread can return either:
    #   * less bytes than you asked for (causing a MatchError)
    #   * :eof or {:error, reason}, also a MatchError
    <<version::integer-32>> = IO.binread(file, 4)

    mvhd = %Mvhd{}

    mvhd =
      # NOTE: consider breaking these out into helper functions
      #       that pattern-match on version
      if version == 0 do
        # NOTE: consider doing a single larger read and pattern-matching
        #       all the fields in one <<>> expression
        <<creation_time::integer-32>> = IO.binread(file, 4)
        <<modification_time::integer-32>> = IO.binread(file, 4)
        <<timescale::integer-32>> = IO.binread(file, 4)
        <<duration::integer-32>> = IO.binread(file, 4)

        # NOTE: consider either a struct literal or record update syntax here
        #       instead of Map.put
        mvhd
        |> Map.put(:creation_time, creation_time)
        |> Map.put(:modification_time, modification_time)
        |> Map.put(:timescale, timescale)
        |> Map.put(:duration, duration)
      else
        <<creation_time::integer-64>> = IO.binread(file, 8)
        <<modification_time::integer-64>> = IO.binread(file, 8)
        <<timescale::integer-32>> = IO.binread(file, 4)
        <<duration::integer-64>> = IO.binread(file, 8)

        mvhd
        |> Map.put(:creation_time, creation_time)
        |> Map.put(:modification_time, modification_time)
        |> Map.put(:timescale, timescale)
        |> Map.put(:duration, duration)
      end

    :file.position(file, {:cur, 76})
    <<next_track_id::integer-32>> = IO.binread(file, 4)
    mvhd |> Map.put(:next_track_id, next_track_id)
  end
end
1 Like

The following is a little more idiomatic Elixir using pattern matching to extract the head fields based upon the version. Its for ideas only. A couple of comments:

  1. I think mp4 is big-endian so I noted that in the pattern matches
  2. Using multiple function heads that pattern match on some binary data (like version) is quite a common approach
  3. Reading a single chunk of data reduces the number of IOs and allows pattern matching on the whole header which should be more efficient
  4. According to this reference the mvhd header box version is different to integer-32-big?

→ 1 byte version = 8-bit unsigned value
- if version is 1 then date and duration values are 8 bytes in length
→ 3 bytes flags = 24-bit hex flags (current = 0)

# Movie Header Box
defmodule Mvhd do
  defstruct(
    name: :mvhd,
    creation_time: 0,
    modification_time: 0,
    timescale: 0,
    duration: 0,
    next_track_id: 0
  )

  def parse(_, file, _) do
    {creation_time, modification_time, timescale, duration, next_track} =
      extract_meta(IO.binread(file, 112))

    %__MODULE__{
      creation_time: creation_time,
      modification_time: modification_time,
      timescale: timescale,
      duration: duration,
      next_track_id: next_track
    }
  end

  def extract_meta(<<0::integer-32-big, rest::binary>>) do
    <<
      creation_time::integer-32-big,
      modification_time::integer-32-big,
      timescale::integer-32-big,
      duration::integer-32-big,
      _skip::binary-size(76),
      next_track_id::integer-32-big
    >> = rest

    {creation_time, modification_time, timescale, duration, next_track_id}
  end

  def extract_meta(<<_version::integer-32-big, rest::binary >>) do
    <<
      creation_time::integer-64-big,
      modification_time::integer-64-big,
      timescale::integer-32-big,
      duration::integer-64-big,
      _skip::binary-size(76),
      next_track_id::integer-32-big
    >> = rest

    {creation_time, modification_time, timescale, duration, next_track_id}
  end
end
4 Likes