Function data changed depending on return value?

This question does sound weird as Elixir should have immutability but this is actually what is happening!

I have this code:

    html = # get the HTML from somewhere but this html is correct

    {:ok, file} = File.open("html", [:write])
    IO.binwrite(file, html)
    File.close(file)

    new_html = remove_unused(html)

    {:ok, file} = File.open("new_html", [:write])
    IO.binwrite(file, new_html)
    File.close(file)

    new_html
  end

  defp remove_unused(body) do
    regex = Regex.scan(~r/\[[\s\S]*?\.start\][\s\S]*?\[[\s\S]*?\.end\]/, body)

    if(regex == []) do
      IO.write regex
      body
    else
      {:ok, file} = File.open("regex", [:write])
      IO.binwrite(file, regex)
      File.close(file)

      indexed = Enum.with_index(regex)

      Enum.map(indexed, fn({data, index}) ->
        {:ok, file} = File.open("#{index}", [:write])
        IO.binwrite(file, "#{data}")
        File.close(file)
      end)

      Enum.reduce(indexed, body, fn({line, _index}, buffer_body) ->
        String.replace(buffer_body, line, "", global: false)
      end)
    end
  end

Where I want to replace [something.start], [something.end] and everything between them with an empty string.

When the method returns new_html that data is malformed. html is changed right after assigning even though it is not being reassigned at a later point and the overall html is empty (it has closing statemenets but almost nothing in-between).
(I checked this with using the files for debugging)

Now look at this… if I change the return value of the function with html instead of new_html the html is not malformed and all of the data in the files is correct, but that way I can’ access the value of new_html when I invoke the function! (reading the data from the file is not an option)

Could You provide MRE?

Let’s say the html is:

<body>
    [data.start]
        <h1> a </h1>
    [data.end]

    [ads.start]
        <h2>  b </h2>
    [ads.end]
</body>

(I can’t really say how the data is transformed but the output html is the same in the code which is the last html provided!)

When I send over some params and return new_html the value of html and new_html is:

<body>
    

    
        <h2>  b </h2>
    
</body>

But when I return html the value of html and new_html is (this should be the result even when returning new_data):

<body>
    
        <h1> a </h1>
    
        <h1> a </h1>
    

    
        <h2>  b </h2>
    
</body>

Edit: Having those extra lines (\n) is nothing to worry about, the data just needs to be present. And just in case this is my Elixir/Erlang:

Erlang/OTP 22 [erts-10.6] [source] [64-bit] [smp:8:8] [ds:8:8:10] [async-threads:1] [hipe]

Elixir 1.9.4 (compiled with Erlang/OTP 22)

How do you check for the value of html here?


Could you perhaps create some github repository with example data and a README that explains your expectations and what you get instead?

I can’t really create a repo as It’s using multiple technologies and there are some bits of code which are kind of crucial for this which I don’t want to be public (sorry)!

And as for how I am checking the value of html, in the opening post, I write the value of it into a file and the value of new_html into a file as well.

In the post a bit ago where I wrote an MRE both html and new_html where you have input data and what should be expected. Both of them should have the same values (which they kind of do) but the value is not correct.

Sorry, but I do not see anything obvious breaking, and all I did was to rename stuff and to replace your file writes to IO.inspect:

$ cat foo.exs
defmodule M do
  def strip_html(html) do
    IO.inspect(html, label: :html_in)

    html
    |> remove_unused()
    |> IO.inspect(label: :html_out)
  end

  def remove_unused(body) do
    matches = Regex.scan(~r/\[[\s\S]*?\.start\][\s\S]*?\[[\s\S]*?\.end\]/, body)

    case matches do
      [] -> body
      [_|_] ->
        IO.inspect(body, label: :in_removed_body_in)

        matches
        |> Enum.with_index()
        |> Enum.reduce(body, fn {match, idx}, body ->
          IO.inspect(match, label: "match #{idx}")
          IO.inspect(body, label: "body before #{idx}")
          String.replace(body, match, "", global: false)
          |> IO.inspect(label: "body after #{idx}")
        end)
    end
  end
end

M.strip_html("""
<body>
    [data.start]
        <h1> a </h1>
    [data.end]

    [ads.start]
        <h2>  b </h2>
    [ads.end]
</body>
""")
$ elixir foo.exs
html_in: "<body>\n    [data.start]\n        <h1> a </h1>\n    [data.end]\n\n    [ads.start]\n        <h2>  b </h2>\n    [ads.end]\n</body>\n"
in_removed_body_in: "<body>\n    [data.start]\n        <h1> a </h1>\n    [data.end]\n\n    [ads.start]\n        <h2>  b </h2>\n    [ads.end]\n</body>\n"
match 0: ["[data.start]\n        <h1> a </h1>\n    [data.end]"]
body before 0: "<body>\n    [data.start]\n        <h1> a </h1>\n    [data.end]\n\n    [ads.start]\n        <h2>  b </h2>\n    [ads.end]\n</body>\n"
body after 0: "<body>\n    \n\n    [ads.start]\n        <h2>  b </h2>\n    [ads.end]\n</body>\n"
match 1: ["[ads.start]\n        <h2>  b </h2>\n    [ads.end]"]
body before 1: "<body>\n    \n\n    [ads.start]\n        <h2>  b </h2>\n    [ads.end]\n</body>\n"
body after 1: "<body>\n    \n\n    \n</body>\n"
html_out: "<body>\n    \n\n    \n</body>\n"

I’ve read the comments several times but still can’t figure out what’s incorrect and how it should be changed.

1 Like

Basically data mid-function is changed depending on the return value of the function. The output of the files and the variables themselves differ if I return html or new_html at the end on the function (they essentially shouldn’t differ) and I have no clue why as this might be a bug in Elixir but I don’t know.

If I return html the logic works correctly and the correct data is written in the files, but if I return new_html elixir just goes haywire!

Then provide a fully reproducable example.

The code you have shown in this thread does not reproduce the issue when inspecting rather than writing to random files.

1 Like

I can’t really give exactly all of the code but I’ll add a bit more pseudo-code. (I have the same issues even when inspecting but I am using files to have “logs” more permanent and clear)

The pseudo-code for initially editing the HTML:

Input:

<body>
    [data.start]
        <h1> a </h1>
    [data.end]

    [ads.start]
        <h2>  b </h2>
    [ads.end]
</body>

Logic:

input = # (the half-html I had just written above)

html = # Do a function which parses the html (I can't really show this part of the code)

When I am returning html as the value at the end of the main function (not remove_unused), the value of html from the above-mentioned parse logic is:

<body>
    
        <h1> a </h1>
    
        <h1> a </h1>
    

    
        <h2>  b </h2>
    
</body>

If I were to now run the remove_unused function on the current state of html, the output should be the same as the input as regex shouldn’t find anything meeting the ~r/\[[\s\S]*?\.start\][\s\S]*?\[[\s\S]*?\.end\]/ requirement.

Now ideally I would get new_html as the return value of remove_unused(html) and for it to have the same value of html. Which it actually does!

new_html = remove_unused(html)

# new_html is actually the same as html

But if I try to return the new_html as the value at the end of the function (again, not remove_unused but the one which calls remove_unused) (to literally just change the last line of the function), the value of html after the initial parse of input is:

<body>
    

    
        <h2>  b </h2>
    
</body>

You can see that the <h1> headers are now missing. I didn’t change any input at all, I just changed the last line which is the return value of the function.

Of course, as html is now changed to a different value (for reasons unknown as, of course, the return line is way after assigning html) the value of new_html is changed and thus the output of the method and files.

I think I know what is the problem there - greediness of the regular expressions. In other way a.*a will match as much as it can, so in string foo abba bar it will match abba ba. To make it non-greedy you need to use a.*?a and then it will work as expected.

He uses already *? everywhere in the regex, so its not about greediness.

But this is probably exactly what we need to know.

Does it return a string or something else? Please at least tell us what IO.inspect html looks like, exactly.

Sounds like you want the function to return earlier?

I don’t know what’s even happening anymore. Elixir went completely haywire!

For this I’ll have to post a bit more code how the html is formed. I can’t really give how the variables inside it are created:

html =
  Enum.reduce(indexed_list, body, fn({original_line, index}, buffer_body) ->
    String.replace(buffer_body, "[#{type}.start]#{original_line}[#{type}.end]", Enum.at(replaced_list_of_elements, index), global: false)
  end)

I did what @NobbZ said for inspecting, here are the results.

I firstly removed all of the regex stuff after getting the html and then just returned the html:

html =
  Enum.reduce(indexed_list, body, fn({original_line, index}, buffer_body) ->
    String.replace(buffer_body, "[#{type}.start]#{original_line}[#{type}.end]", Enum.at(replaced_list_of_elements, index), global: false)
  end)

IO.inspect html

html # return

And the value which I get is:

"<body>\r\n    [data.start]\r\n        <h1> a </h1>\r\n    [data.end]\r\n\r\n    \r\n        <h2>  b </h2>\r\n    \r\n</body>"
"<body>\r\n    \r\n        <h1> a </h1>\r\n    \r\n        <h1> a </h1>\r\n    \r\n\r\n    \r\n        <h2>  b </h2>\r\n    \r\n</body>" 

The worst part is that I directly after that render the HTML and when it’s rendered, I only get (which is the same as what I get in the file):

<body>
    
        <h1> a </h1>
    
        <h1> a </h1>
    

    
        <h2>  b </h2>
    
</body>

So the inspected value contains the rendered value but even contains kind-of a half-baked value as well?

Now when I do:

html =
  Enum.reduce(indexed_list, body, fn({original_line, index}, buffer_body) ->
    String.replace(buffer_body, "[#{type}.start]#{original_line}[#{type}.end]", Enum.at(replaced_list_of_elements, index), global: false)
  end)

IO.inspect html

new_html = remove_unused(html)

IO.inspect new_html

html

The output for both is:

"<body>\r\n    [data.start]\r\n        <h1> a </h1>\r\n    [data.end]\r\n\r\n    \r\n        <h2>  b </h2>\r\n    \r\n</body>"
"<body>\r\n    \r\n\r\n    \r\n        <h2>  b </h2>\r\n    \r\n</body>"
"<body>\r\n    \r\n        <h1> a </h1>\r\n    \r\n        <h1> a </h1>\r\n    \r\n\r\n    \r\n        <h2>  b </h2>\r\n    \r\n</body>"
"<body>\r\n    \r\n        <h1> a </h1>\r\n    \r\n        <h1> a </h1>\r\n    \r\n\r\n    \r\n        <h2>  b </h2>\r\n    \r\n</body>"

And it is still being rendered as:



<body>
    
        <h1> a </h1>
    
        <h1> a </h1>
    

    
        <h2>  b </h2>
    
</body>

And now when I finally change the return value of the function:

html =
  Enum.reduce(indexed_list, body, fn({original_line, index}, buffer_body) ->
    String.replace(buffer_body, "[#{type}.start]#{original_line}[#{type}.end]", Enum.at(replaced_list_of_elements, index), global: false)
  end)

IO.inspect html

new_html = remove_unused(html)

IO.inspect new_html

new_html

The inspect is:

"<body>\r\n    [data.start]\r\n        <h1> a </h1>\r\n    [data.end]\r\n\r\n    \r\n        <h2>  b </h2>\r\n    \r\n</body>"
"<body>\r\n    \r\n\r\n    \r\n        <h2>  b </h2>\r\n    \r\n</body>"
"<body>\r\n    \r\n\r\n    \r\n        <h2>  b </h2>\r\n    \r\n</body>"
"<body>\r\n    \r\n\r\n    \r\n        <h2>  b </h2>\r\n    \r\n</body>"

And this is rendered:

<body>
    

    
        <h2>  b </h2>
    
</body>

Edit: The return value is then put into bamboo as the html of an e-mail!

You have 2 inspect calls but 4 values displayed? That does not make sense, can you please provide a :label to all inspect calls to distinguish outputs?

Also please make sure that you don’t print during remove unused.

Also you are talking about “rendering”, where does that happen and how? Is it rendering from the files you wrote or do you send the values there?

Because that what you have shown us, wouldn’t cause any rendering of intermediate values.

It is sent as an email and then rendered as one once the user opens it. I send them to mailtrap and it is written there under the HTML section like I pasted in the post above.

And I am still getting double calls for different values! Here are my results:

First pair:

html =
  Enum.reduce(indexed_list, body, fn({original_line, index}, buffer_body) ->
    String.replace(buffer_body, "[#{type}.start]#{original_line}[#{type}.end]", Enum.at(replaced_list_of_elements, index), global: false)
  end)

IO.inspect html, label: :only_html

html
only_html: "<body>\r\n    [data.start]\r\n        <h1> a </h1>\r\n    [data.end]\r\n\r\n    \r\n        <h2>  b </h2>\r\n    \r\n</body>"
only_html: "<body>\r\n    \r\n        <h1> a </h1>\r\n    \r\n        <h1> a </h1>\r\n    \r\n\r\n    \r\n        <h2>  b </h2>\r\n    \r\n</body>"

Second pair:

html =
  Enum.reduce(indexed_list, body, fn({original_line, index}, buffer_body) ->
    String.replace(buffer_body, "[#{type}.start]#{original_line}[#{type}.end]", Enum.at(replaced_list_of_elements, index), global: false)
  end)

IO.inspect html, label: :only_html

new_html = remove_unused(html)

IO.inspect new_html, label: :new_html

html
only_html: "<body>\r\n    [data.start]\r\n        <h1> a </h1>\r\n    [data.end]\r\n\r\n    \r\n        <h2>  b </h2>\r\n    \r\n</body>"
new_html: "<body>\r\n    \r\n\r\n    \r\n        <h2>  b </h2>\r\n    \r\n</body>"
only_html: "<body>\r\n    \r\n        <h1> a </h1>\r\n    \r\n        <h1> a </h1>\r\n    \r\n\r\n    \r\n        <h2>  b </h2>\r\n    \r\n</body>"
new_html: "<body>\r\n    \r\n        <h1> a </h1>\r\n    \r\n        <h1> a </h1>\r\n    \r\n\r\n    \r\n        <h2>  b </h2>\r\n    \r\n</body>"

Third pair:

html =
  Enum.reduce(indexed_list, body, fn({original_line, index}, buffer_body) ->
    String.replace(buffer_body, "[#{type}.start]#{original_line}[#{type}.end]", Enum.at(replaced_list_of_elements, index), global: false)
  end)

IO.inspect html, label: :only_html

new_html = remove_unused(html)

IO.inspect new_html, label: :new_html

new_html
only_html: "<body>\r\n    [data.start]\r\n        <h1> a </h1>\r\n    [data.end]\r\n\r\n    \r\n        <h2>  b </h2>\r\n    \r\n</body>"
new_html: "<body>\r\n    \r\n\r\n    \r\n        <h2>  b </h2>\r\n    \r\n</body>"
only_html: "<body>\r\n    \r\n\r\n    \r\n        <h2>  b </h2>\r\n    \r\n</body>"
new_html: "<body>\r\n    \r\n\r\n    \r\n        <h2>  b </h2>\r\n    \r\n</body>"

Do you call that function directly from iex during debugging this issue or do you have another function which you call that then wraps the one you are currently debugging and calls it twice? Perhaps even with the output from the last call?

Yeah I just fixed the issue… I uninstalled Elixir completely from my system, reboot, install, reboot and seems the issue no longer exists!

Edit: It was anti-climatic, I know!