Stateful algorithms map to reducers in FP

stevensonmt · June 11, 2022, 6:34pm

As I continue working through Leetcode, I find a lot of problems that are not conducive to immutable data structures, such as sliding window problems using two pointers. What I have found is that I can usually circumvent this by using a reduce function to track any “mutable” variables. For instance this problem I solved in Rust with:

pub fn min_operations(nums: Vec<i32>, x: i32) -> i32 {
        let max = nums.len();
        let target: i32 = nums.iter().sum::<i32>() - x;
        if target == 0 {
            return max as i32
        }
        let mut i = 0;
        let mut sub_sum = 0;
        let mut max_len = 0;
        let mut found = false;
        for j in 0..max {
            sub_sum += nums[j];
            while i <= j && sub_sum > target {
                sub_sum -= nums[i];
                i += 1;
            }
            if sub_sum == target {
                found = true;
                max_len = max_len.max(j - i + 1);
            }
        }
        if found {
            (max - max_len) as i32
        } else {
            -1
        }
    }

You have to track the two pointers i and j as well as the sub_sum and max_len values. This implementation iterates over the end of the subarray with appropriate sum, but it could be implemented to iterate over the beginning. In either case you know the range of values to iterate over, which takes care of tracking one variable. To track the other 3 variables I used a 3 tuple accumulator in a reduce function like so:

defmodule Solution do
  @spec min_operations(nums :: [integer], x :: integer) :: integer
  def min_operations(nums, x) do
    max = Enum.count(nums)
    target = Enum.sum(nums) - x 
    if target == 0 do
      max
    else
      nums_arr = nums |> Enum.with_index() |> Map.new(fn {v, k} -> {k, v} end)
      max_len = 
        0..max - 1
        |> Enum.reduce({-1, 0, 0}, fn j, {ml, ss, i} ->
                          
                      {uml, uss, ui} = find_max_sub(nums_arr, i, j, ml, target, ss + nums_arr[j]) 
                        {max(uml, ml), uss, ui} 
                    end)
        |> elem(0)
      if max_len < 0 do
        max_len 
      else
        max - max_len
      end
    end
  end
  
      
  def find_max_sub(_nums, start, finish, max_len, target, sub_sum) when start > finish or sub_sum < target, do: {max_len, sub_sum, start}
  def find_max_sub(_, start, finish, max_len, target, target), do: {max(max_len, finish - start + 1), target, start}
  def find_max_sub(nums, start, finish, max_len, target, sub_sum) do
    find_max_sub(nums, start + 1, finish, max_len, target, sub_sum - nums[start])
  end
end

I have two questions:

Is this an anti-pattern that should be avoided and if so, why?
How can I implement this sort of algorithm in a more functional style?

eksperimental · June 11, 2022, 7:04pm

Definitely break it up into helper functions.calculate_max(nums, target), max_len(nums)

al2o3cr · June 11, 2022, 7:14pm

Using a tuple as an accumulator is a pretty common pattern; past 4 or 5 elements it might be time to use a map or a struct instead.

Using the accumulator to carry state between iterations is a really common pattern - for instance, it’s the whole idea behind Stream.transform/3.

stevensonmt · June 11, 2022, 8:28pm

Do you mean something like this:

defmodule Solution do
  @spec min_operations(nums :: [integer], x :: integer) :: integer
  def min_operations(nums, x) do
    target = Enum.sum(nums) - x 
    max = Enum.count(nums)
    find_min_ops(nums, target, max)
  end
  
  def find_min_ops(_, 0, max), do: max
  def find_min_ops(nums, target, max) do
    nums
    |> Enum.with_index()
    |> Map.new(fn {v, k} -> {k, v} end)
    |> max_len(target, max)
    |> min_ops(max)
  end
      
  def max_len(nums_arr, target, max) do
    0..max - 1
    |> Enum.reduce({-1, 0, 0}, fn j, {ml, ss, i} ->
                      {uml, uss, ui} = find_max_sub(nums_arr, i, j, ml, target, ss + nums_arr[j]) 
                      {max(uml, ml), uss, ui} 
                    end)
    |> elem(0)
  end
      
  def min_ops(max_sub_len, _) when max_sub_len < 0, do: max_sub_len
  def min_ops(max_sub_len, max), do: max - max_sub_len
      
  def find_max_sub(_nums, start, finish, max_len, target, sub_sum) when start > finish or sub_sum < target, do: {max_len, sub_sum, start}
  def find_max_sub(_, start, finish, max_len, target, target), do: {max(max_len, finish - start + 1), target, start}
  def find_max_sub(nums, start, finish, max_len, target, sub_sum) do
    find_max_sub(nums, start + 1, finish, max_len, target, sub_sum - nums[start])
  end
end

eksperimental · June 12, 2022, 12:27am

Yes, exactly like that.
min_operations is the only public function, the rest you can use defp in order to make them private.

cjbottaro · June 12, 2022, 11:14pm

One thing that I have always loved about Elixir is that it favors practicality.

If your reduce function’s aggregator is huge and cumbersome, consider just making an :ets table for the duration of the function. Basically gives you the mutability that you’re used to, and if you have a decent after clause, then you won’t memory leak.

But over time, you should get really comfortable with Enum.reduce and a map accumulator. Or just recursive functions as above.