Nimble_csv dump with each column quoted

I want to dump some data to a CSV file where each column value is separated with a semicolon and wrapped inside double quotes. The semicolon is no problem, but the double quotes are.

This does not work, no double quotes:

NimbleCSV.define(MyParser, separator: ";", escape: "\"")
data = [["firstname", "lastname"], ["Foo", "Bar"]]
IO.iodata_to_binary MyParser.dump_to_iodata(data)

That will return:

"firstname;lastname\nFoo;Bar\n"

But i want this:

"\"firstname\";\"lastname\"\n\"Foo\";\"Bar\""

How to wrap each column with double quotes?

:wave:

Would

NimbleCSV.define(MyParser, separator: ";", escape: "\"")
data = [["firstname", "lastname"], ["Foo", "Bar"]]

data
|> Enum.map(fn row ->
  Enum.map(row, fn value -> [?", value, ?"] end)
end)
|> MyParser.dump_to_iodata()
|> IO.iodata_to_binary 

work?

As an example

iex(1)> ?"
34
iex(2)> [?", "hello", ?"]
[34, "hello", 34]
iex(3)> IO.iodata_to_binary [?", "hello", ?"]
"\"hello\""

I useda an iolist here since I suspect it might be a bit more efficient than "\"#{value}\"".

iex(1)> data = [["\"firstname\"", "\"lastname\""], ["\"Foo\"", "\"Bar\""]]
[["\"firstname\"", "\"lastname\""], ["\"Foo\"", "\"Bar\""]]
iex(2)> IO.iodata_to_binary MyParser.dump_to_iodata(data)
"\"\"\"firstname\"\"\";\"\"\"lastname\"\"\"\n\"\"\"Foo\"\"\";\"\"\"Bar\"\"\"\n"
iex(3)> 

… so not exactly.

rfc4180

  1. Each field may or may not be enclosed in double quotes (however
    some programs, such as Microsoft Excel, do not use double quotes
    at all). If fields are not enclosed with double quotes, then
    double quotes may not appear inside the fields.

NimbleCSV has no problem consuming

"\"firstname\";\"lastname\"\n\"Foo\";\"Bar\""

it just doesn’t seem interested in producing it (likely to avoid unnecessarily bloating the output).

iex(1)> MyParser.parse_string("\"firstname\";\"lastname\"\n\"Foo\";\"Bar\"",[headers: false])
[["firstname", "lastname"], ["Foo", "Bar"]]
iex(2)> 
  1. Fields containing line breaks (CRLF), double quotes, and commas
    should be enclosed in double-quotes.

NimbleCSV will only use the double quotes when absolutely necessary, e.g.:

iex(1)> data = [["firstname", "lastname"], ["Fo\no", "Bar"]]
[["firstname", "lastname"], ["Fo\no", "Bar"]]
iex(2)> IO.iodata_to_binary MyParser.dump_to_iodata(data)   
"firstname;lastname\n\"Fo\no\";Bar\n"
iex(3)> data = [["firstname", "lastname"], ["Fo\"o", "Bar"]]
[["firstname", "lastname"], ["Fo\"o", "Bar"]]
iex(4)> IO.iodata_to_binary MyParser.dump_to_iodata(data)   
"firstname;lastname\n\"Fo\"\"o\";Bar\n"
iex(5)> 

that last example also demonstrating

  1. If double-quotes are used to enclose fields, then a double-quote
    appearing inside a field must be escaped by preceding it with
    another double quote.
3 Likes

Thank you for your great explanation! Considering rfc4180, it all makes sense of course. I would welcome a force_escape option for NimbleCSV though.

Now I’m going to find out how many clients rely on our current (quoted) CSV output format.

#
# relies on implementation details !!!
# may have undesirable side effects !!!
# USE AT YOUR OWN RISK !!!
#

defmodule Nimble do

  @separator ";"
  @line_separator "\n"
  @escape "\""

  # http://elixir-recipes.github.io/strings/list-with-alphabet/
  @lower for n <- ?a..?z, do: << n :: utf8 >> 
  @upper for n <- ?A..?Z, do: << n :: utf8 >>
  @numeric for n <- ?0..?9, do: << n :: utf8 >>
  @symbol for n <- [?_,?-,?+,?/,?*], do: << n :: utf8 >>

  @reserved [@escape, @separator, @line_separator] ++ @lower ++ @upper ++ @numeric ++ @symbol

  def make_options do
    [
      separator: @separator,
      escape: @escape,
      line_separator: @line_separator,
      reserved: @reserved
    ]
  end

end

NimbleCSV.define(MyParser, Nimble.make_options())
iex(1)> data = [["firstname", "lastname"], ["Foo", "Bar"]]
[["firstname", "lastname"], ["Foo", "Bar"]]
iex(2)> IO.iodata_to_binary MyParser.dump_to_iodata(data)
"\"firstname\";\"lastname\"\n\"Foo\";\"Bar\"\n"
iex(3)> 

… and that still doesn’t cover an empty string.

This seems like a more reasonable approach:

defmodule Nimble do

  @separator ?;
  @line_separator ?\n
  @escape ?"

  def make_options do
    [
      separator: << @separator :: utf8 >>,
      line_separator: << @line_separator :: utf8 >>,
      escape: << @escape :: utf8 >>
    ]
  end

  defp escape_entry([@escape, value, @escape] = entry) when is_binary(value),
    do: entry
  defp escape_entry(value) when is_binary(value),
    do: [@escape, value, @escape]

  defp escape_row([], ys),
    do: :lists.reverse(ys)
  defp escape_row([entry, sep | xs], ys),
    do: escape_row(xs, [sep, escape_entry(entry)|ys])

  def escape_row(row),
    do: escape_row(row,[])

  defp escape_list([], ys),
    do: :lists.reverse(ys)
  defp escape_list([row | xs], ys),
    do: escape_list(xs, [escape_row(row)|ys])

  def escape_list(list),
    do: escape_list(list,[])

end

NimbleCSV.define(MyParser, Nimble.make_options())
iex(1)> data = [["firstname", "lastname"], ["Fo;o", "Bar"]]
[["firstname", "lastname"], ["Fo;o", "Bar"]]
iex(2)> IO.iodata_to_binary(Nimble.escape_list(MyParser.dump_to_iodata(data)))
"\"firstname\";\"lastname\"\n\"Fo;o\";\"Bar\"\n"
iex(3)> data = [["firstname", "lastname"], ["", "Bar"]]                       
[["firstname", "lastname"], ["", "Bar"]]
iex(4)> IO.iodata_to_binary(Nimble.escape_list(MyParser.dump_to_iodata(data)))
"\"firstname\";\"lastname\"\n\"\";\"Bar\"\n"
iex(5)>