Deconstructing a list with only two values

I have a list that only has two values in it, and I want to place the elements in a map. Is there an easy way to deconstruct the list so that I can get the last value without it being a list itself?

Here’s what I’m currently doing:

def split_metadata(metadata) do
  key_value_pairs =
    metadata
    |> String.split(:binary.compile_pattern([":", ";"]), trim: true)
    |> Stream.chunk_every(2, 2, :discard)
    |> Enum.reduce(%{}, fn [key | [value | []]], acc -> Map.put(acc, key, value) end)
end
iex > "build:1.2;branch:git_something_develop;branchname:weeble;key:value;woot:" |> Artifact.split_metadata()
%{
  "branch" => "git_something_develop",
  "branchname" => "weeble",
  "build" => "1.2",
  "key" => "value"
}

That works just fine, but it’s kind of ugly, the [ key | [value | []]] part that is.

I know there’s a List.first and a List.last, but that’s ugly too.

[key, value] is equivalent to [key | [value | []]]. In other words:

iex(1)> [a, b] = [1, 2]
[1, 2]
iex(2)> a
1
iex(3)> b
2
5 Likes

Doh, I see what I’m doing wrong. I’m doing [ key | value ] instead of [ key , value ].

Also, you could use Map.new(&List.to_tuple/1) instead of your reduce call, it does effectively the same thing.

3 Likes

Nice, thanks for the tip!

Some unsolicited remarks.

Apparently this isn’t unusual:

|> String.split(:binary.compile_pattern([":", ";"]), trim: true)

Without too much effort I found another, similar example:

https://gist.github.com/ltfschoen/a52109ea5a9e020fa4abcaacbc43e8af

Given the context this approach may be reasonable.

However on first blush that transformation would set a red flag for me on review. If I had to put it into words I would say:

The transformation is (willfully) disregarding the natural shape of the data (or it is ignoring the implied, natural boundaries/seams).

Effectively both “:” and “;” are treated as delimiters with identical meaning which isn’t the intention of the format. The test case didn’t make me feel any better. Without much imagination it could be rewritten as:

"build:1.2;branch:git_something_develop;branchname:weeble;woot:;key:value"

and the code as implemented would return:

%{"branch" => "git_something_develop", "branchname" => "weeble", "build" => "1.2", "woot" => "key"}

Well, that is never gonna happen …

Fine.

But for the sake of argument: valueless keys aren’t unheard of (e.g. disabled, readonly, required attributes in an input HTML tag). For the moment lets assume that we want to keep those keys - for example:

  defp split_pairs(pair, list) do
    case String.split(pair, ":") do
      [key, value] ->
        [{String.trim(key), String.trim(value)} | list]

      _ ->
        # ignore "non-pair"
        list
    end
  end

  defp to_key_values({key, ""}, list),
    do: [{key, key} | list]

  defp to_key_values({_, _} = key_value, list),
    do: [key_value | list]

  defp to_key_values(_, list),
    # ignore values that "don't fit"
    do: list

  def split(metadata) do
    metadata
    |> String.split(";")
    |> List.foldl([], &split_pairs/2)
    |> List.foldl([], &to_key_values/2)
    |> Map.new()
  end

or when discarding valueless keys:

  defp split_pairs1(pair, list) do
    case String.split(pair, ":", trim: true) do
      [key, value] ->
        [{String.trim(key), String.trim(value)} | list]

      _ ->
        # ignore "non-pair"
        list
    end
  end

  def split1(metadata) do
    metadata
    |> String.split(";")
    |> List.foldl([], &split_pairs1/2)
    |> Map.new()
  end

In my opinion both of these examples take (better) advantage of the natural, implied shape of the data. The other approach feels more like “smashing everything to bits and then picking up the pieces”.

1 Like

@peerreynders, thanks for unsolicited advice. You were definitely right here. I was ignoring the intended shape of the data being passed in and was just smashing it all together. I’ve re-written it as follows:

defmodule CloudVaultEx.Artifact.MetadataMapper do
  @moduledoc """
  Artifact metadata comes in a string format that should have two delimiters
  separating keys/values and key/value pairs from each other.  ";" is used to
  delimit key/value pairs, while ":" is used to delimit keys and values.

  This module provides helper functions to transform a metadata string into
  a Map.

  Example:

  "build:1.2;branch:git_something_develop;branchname:weeble;key:value;"
  |> MetadataMapper.map_metadata()

  result:
  %{
    "build" => "1.2",
    "branch" => "git_something_develop",
    "branchname" => "weeble",
    "key" => "value"
  }
  """

  @doc """
  Transforms a metadata string into a map. Key/Value pairs that have empty
  string values are discarded. Key/Value pairs where the key has spaces are
  also discarded.

  ## Examples

    iex> metadata_string = "build:1.2;branch:git_something_develop"
    iex> map_metadata(metadata_string)
    %{
      "build" => "1.2",
      "branch" => "git_something_develop"
    }

    iex> metadata_string = "build:1.2;key_with_no_value:;branch:git_something_develop"
    iex> map_metadata(metadata_string)
    %{
      "build" => "1.2",
      "branch" => "git_something_develop"
    }

    iex> metadata_string = "build:1.2;key_with_space_value: ;branch:git_something_develop"
    iex> map_metadata(metadata_string)
    %{
      "build" => "1.2",
      "branch" => "git_something_develop"
    }

    iex> metadata_string = "build:1.2;key with spaces:value;branch:git_something_develop"
    iex> map_metadata(metadata_string)
    %{
      "build" => "1.2",
      "branch" => "git_something_develop"
    }
  """
  def map_metadata(""), do: %{}

  def map_metadata(metadata_string) when is_binary(metadata_string) do
    metadata_string
    |> split_key_value_pairs()
    |> split_key_values()
    |> transform_to_map()
  end

  @doc """
  Splits a metadata string into key/value pairs.  Key/Value pairs are delimited
  by the ';' character.

  ## Examples

    iex> metadata = "build:1.2;branch:git_something_develop"
    iex> split_key_value_pairs(metadata)
    ["build:1.2", "branch:git_something_develop"]

    iex> metadata = "build:1.2;branch:git_something_develop;key:"
    iex> split_key_value_pairs(metadata)
    ["build:1.2", "branch:git_something_develop", "key:"]

    iex> metadata = "build:1.2;key_with_space_value: ;branch:git_something_develop"
    iex> split_key_value_pairs(metadata)
    ["build:1.2", "key_with_space_value:", "branch:git_something_develop" ]

    iex> metadata = "build:1.2"
    iex> split_key_value_pairs(metadata)
    ["build:1.2"]

    iex> split_key_value_pairs("this string does not match the shape we expect")
    []
  """
  def split_key_value_pairs(metadata_string) when is_binary(metadata_string) do
    case String.contains?(metadata_string, ";") do
      true ->
        metadata_string
        |> String.split(";", trim: true)
        |> Enum.map(&String.trim/1)

      false ->
        case String.contains?(metadata_string, ":") do
          true -> [String.trim(metadata_string)]
          false -> []
        end
    end
  end

  @doc """
  Splits a list of metadata key/value pairs into a list of tuples where the
  first element in each tuple is the key, and the second element is the value.
  Any key/value pair where the value is an empty string is discarded from the
  final list. Keys may not have spaces.


  Keys and values are delimited by the ':' character.

  ## Examples

    iex> metadata = ["build:1.2", "branch:git_something_develop"]
    iex> split_key_values(metadata)
    [{"build", "1.2"}, {"branch", "git_something_develop"}]

    iex> metadata = ["build:1.2", "branch:git_something_develop", "key:"]
    iex> split_key_values(metadata)
    [{"build", "1.2"}, {"branch", "git_something_develop"}]

    iex> split_key_values([])
    []

    iex> split_key_values(["this string:does not match the shape we expect"])
    []
  """
  def split_key_values([]), do: []

  def split_key_values(metadata_list) when is_list(metadata_list) do
    metadata_list
    |> Stream.filter(&(!String.ends_with?(&1, ":")))
    |> Stream.map(&String.split(&1, ":"))
    |> Stream.reject(fn [key, _] -> String.contains?(key, " ") end)
    |> Enum.map(&List.to_tuple(&1))
  end

  @doc """
  Transforms a list of key/value tuples into a Map where the first element
  of the tuple is the key, and the second value is the value.

  ## Examples:

    iex> key_values = [{"build", "1.2"}, {"branch", "git_something_develop"}]
    iex> transform_to_map(key_values)
    %{
      "build" => "1.2",
      "branch" => "git_something_develop"
    }

    iex> transform_to_map([])
    %{}
  """
  def transform_to_map([]), do: %{}

  def transform_to_map(key_value_tuples) when is_list(key_value_tuples) do
    key_value_tuples
    |> Map.new()
  end
end
2 Likes

Yikes. I didn’t actually expect you to change anything - I just wanted to present my personal impression. Now I’m wondering whether I pushed you too far.

Again, my personal take for “consideration”:

  def transform_to_map(key_value_tuples) when is_list(key_value_tuples) do
    key_value_tuples
    |> Map.new()
  end

As per style guide would be written as

  def transform_to_map(key_value_tuples) when is_list(key_value_tuples) do
    Map.new(key_value_tuples)
  end

or

  def transform_to_map(key_value_tuples) when is_list(key_value_tuples),
    do: Map.new(key_value_tuples)

which raises the question: does this function provide any value beyond Map.new/1? I personally have no qualms about single line functions as long as the name is more clear than than a cryptic one liner.

  def split_key_values(metadata_list) when is_list(metadata_list) do
    metadata_list
    |> Stream.filter(&(!String.ends_with?(&1, ":")))
    |> Stream.map(&String.split(&1, ":"))
    |> Stream.reject(fn [key, _] -> String.contains?(key, " ") end)
    |> Enum.map(&List.to_tuple(&1))
  end

If your sample string has a representative length then Stream might be overkill here.

Pipelining is useful but it can sometimes lead to fragmentation of logic. This way may be crystal clear to you and then it’s fine.

I would lean towards more functions and less pipes. A grossly exaggerated example:

  def split(metadata) do
    metadata
    |> String.split(";")
    |> List.foldl([], &key_value_cons_list/2)
    |> Map.new()
  end

  # reducer logic
  # i.e. cons key_value to list if "pair" is one
  defp key_value_cons_list(pair, list) do
    case to_key_value(pair) do
      {:ok, key_value} ->
        [key_value | list]

      _ ->
        # ignore "non-pair"
        list
    end
  end

  # the string-to-key_value transformation happens here
  defp to_key_value(pair) do
    with [fst, snd] <- String.split(pair, ":", trim: true),
         {{:ok, key}, {:ok, value}} <- {to_key(fst), to_value(snd)} do
      {:ok, {key, value}}
    else
      _ ->
        :error
    end
  end

  # sanitize key
  defp to_key(name) do
    key = String.trim(name)
    if(String.contains?(key, " "), do: :error, else: {:ok, key})
  end

  # sanitize value
  defp to_value(value),
    do: {:ok, String.trim(value)}

It’s a balancing act.

Further

  def split_key_value_pairs(metadata_string) when is_binary(metadata_string) do
    case String.contains?(metadata_string, ";") do
      true ->
        metadata_string
        |> String.split(";", trim: true)
        |> Enum.map(&String.trim/1)

      false ->
        case String.contains?(metadata_string, ":") do
          true -> [String.trim(metadata_string)]
          false -> []
        end
    end
  end

I don’t think the branching is adding any value. This code should focus on separating the “key value candidates” from one another. Whether or not any particular candidate is a key value should be resolved separately.

1 Like