I don’t believe Explorer supports interpolation at this time. Until it does, I think you’ll need to use a workaround.
Here is an example workaround:
alias Explorer.DataFrame, as: DF
alias Explorer.Series, as: S
require DF
defmodule Interpolate do
def evenly_spaced(x_step, df, x_col, y_col) do
n_rows = DF.n_rows(df)
last = DF.slice(df, n_rows - 1, 1)[[x_col, y_col]]
DF.new(
x1: S.head(df[x_col], n_rows - 1),
x2: S.tail(df[x_col], n_rows - 1),
y1: S.head(df[y_col], n_rows - 1),
y2: S.tail(df[y_col], n_rows - 1),
)
|> DF.to_rows_stream()
|> Stream.map(fn %{"x1" => x1, "y1" => y1, "x2" => x2, "y2" => y2} ->
m = (y2 - y1) / (x2 - x1)
x = x1..x2//x_step |> Enum.to_list() |> S.from_list()
n = S.size(x)
x = S.head(x, n - 1)
y = x |> S.subtract(x1) |> S.multiply(m) |> S.add(y1)
DF.new(%{x_col => x, y_col => y})
end)
|> Enum.reduce(&DF.concat_rows(&2, &1))
|> DF.concat_rows(last)
end
end
# Build data
data = [
["2023-02-13", 100],
["2023-02-15", 100.01],
["2023-02-16", 100.09],
["2023-02-17", 101.02],
["2023-02-20", 105.00],
["2023-02-22", 103.06]
]
df = (
data
|> Enum.map(fn [date, value] ->
%{date: Date.from_iso8601!(date), value: value}
end)
|> DF.new()
)
# Cast date column to integer.
# (Units make working with Date/Durations a bit awkward.)
df = DF.mutate(df, date_int: cast(date, :integer))
# Run interpolation.
interp = Interpolate.evenly_spaced(1, df, "date_int", "value")
# Cast back to date.
interp = DF.mutate(interp, date: cast(date_int, :date))
# Print result.
interp[["date", "value"]] |> DF.print(limit: :infinity)
# +---------------------------------------------+
# | Explorer DataFrame: [rows: 10, columns: 2] |
# +------------------+--------------------------+
# | date | value |
# | <date> | <float> |
# +==================+==========================+
# | 2023-02-13 | 100.0 |
# +------------------+--------------------------+
# | 2023-02-14 | 100.005 |
# +------------------+--------------------------+
# | 2023-02-15 | 100.01 |
# +------------------+--------------------------+
# | 2023-02-16 | 100.09 |
# +------------------+--------------------------+
# | 2023-02-17 | 101.02 |
# +------------------+--------------------------+
# | 2023-02-18 | 102.34666666666666 |
# +------------------+--------------------------+
# | 2023-02-19 | 103.67333333333333 |
# +------------------+--------------------------+
# | 2023-02-20 | 105.0 |
# +------------------+--------------------------+
# | 2023-02-21 | 104.03 |
# +------------------+--------------------------+
# | 2023-02-22 | 103.06 |
# +------------------+--------------------------+
This workaround will be quite slow. If you need a performant solution, you’ll need to shell out to Polars proper.
Also, note that because of the simplicity of your use case (evenly spaced dates), I was able to use a simple algorithm. In the general case is more complicated.