Trying to write a simple nimble_parsec parser

Hi,

I’m trying to write a parser using NimbleParsec v1.1.0 — Documentation which parses arbitrary text wrapped in a character like an underscore.

Here are some examples of what I’m hoping the parser will do:

"hello there" => ["hello there"]
"hello _Alex_!" => ["hello ", "_Alex_", "!"]
"Ũnicode is _allŏwed everywhere_ _ŏk?_" => ["Ũnicode is ", "_allŏwed everywhere_", "_ŏk?_"]

Here’s my attempt:

defmodule Txtparser do
  import NimbleParsec

  defcombinatorp :plain_string,
    utf8_string([], min: 1)
    |> lookahead_not(parsec(:wrapped_string))

  defcombinatorp :wrapped_string,
    string("_")
    |> parsec(:plain_string)
    |> string("_")

  defparsec(:parse,
    repeat(
      choice(
        [parsec(:wrapped_string), parsec(:plain_string)]
      )
    )
  )
end

But I think I’m misundertanding how lookahead_not works, as the following is happening

"hi _there_" => ["hi _there_"]

instead of this desired result:

"hi _there_" => ["hi ", "_there_"]
1 Like

maybe I’m missing something. how are you getting them split into separate chunks? When I run your code on "foo _bar_ foo" I get

{:ok, ["foo _bar_ foo"], "", %{}, {1, 0}, 13}

I want them to split into chunk, they aren’t currently though

a few NimbleParsec tips:

  1. try not to use defcombinatorp
  2. don’t be afraid to break up into smaller testable chunks
  3. try to limit circular references.
  4. test test test, I like having the Mix.env == :test defparsec block pattern
  5. don’t be afraid to set variables in you “module-building-script”
  6. use post_traverse

here is what I got:

defmodule Np do
  import NimbleParsec

  basic_chars =
    utf8_string([not: ?_], min: 1)

  wrapped_string =
    string("_")
    |> concat(basic_chars)
    |> string("_")

  plain_string =
    times(
      utf8_char([])
      |> lookahead_not(wrapped_string),
      min: 1
    )
    |> choice([eos(), utf8_char([])])
    |> post_traverse(:post_plain)

  parser =
    repeat(
      choice([
        wrapped_string
        |> post_traverse(:post_wrapped),
        plain_string
      ])
    )

  defparsec(:parse, parser)

  defp post_wrapped(_rest, parsed, context, _line, _offset) do
    {[parsed |> Enum.reverse |> List.to_string], context}
  end

  defp post_plain(_rest, parsed, context, _line, _offset) do
    {[parsed |> Enum.reverse |> List.to_string], context}
  end


  if Mix.env() == :test do
    defparsec(:basic_chars, basic_chars)
    defparsec(:wrapped_string, wrapped_string)
    defparsec(:plain_string, plain_string)
  end
end

np_test.exs

defmodule NpTest do
  use ExUnit.Case

  defmacrop assert_parses(result, what) do
    quote bind_quoted: [result: result, what: what] do
      assert {:ok, ^result, _, _, _, _} = what
    end
  end

  defmacrop assert_leaves(result, leftover, what) do
    quote bind_quoted: [result: result, leftover: leftover, what: what] do
      assert {:ok, ^result, ^leftover, _, _, _} = what
    end
  end

  defmacrop assert_fails(what) do
    quote bind_quoted: [what: what] do
      assert {:error, _, _, _, _, _} = what
    end
  end

  test "basic_chars" do
    assert_parses ["foo"], Np.basic_chars("foo")
    assert_leaves ["foo "], "_foo_", Np.basic_chars("foo _foo_")
    assert_fails Np.basic_chars("_foo_")
  end

  test "wrapped_string" do
    assert_parses ["_", "foo", "_"], Np.wrapped_string("_foo_")
    assert_leaves ["_", "foo", "_"], " bar", Np.wrapped_string("_foo_ bar")
  end

  test "plain string" do
    assert_parses ["foo"], Np.plain_string("foo")
    assert_leaves ["foo "], "_bar_", Np.plain_string("foo _bar_")
  end
end
8 Likes

Amazing, thanks so much!. I’ll dig into this and play around with it now - thank you!

1 Like