Expected data structure for training a Multi Input / Multi Output Axon model

zacksiri · January 23, 2025, 8:20am

Hey everyone.

So i’ve been developing a model and it started as a straightforward logistic regression model and has evolved into a multi input / multi output model.

Here is what the model looks like

def model do
    # Create three input tensors for CPU, Memory, and Disk
    input_cpu = Axon.input("cpu", shape: {nil, 2})
    input_memory = Axon.input("memory", shape: {nil, 3})
    input_disk = Axon.input("disk", shape: {nil, 2})

    # Create separate prediction paths for each resource
    cpu_prediction =
      Axon.dense(input_cpu, 2, activation: :sigmoid, name: "cpu")

    memory_prediction =
      input_memory
      |> Axon.dense(8, activation: :relu)
      |> Axon.dense(2, activation: :sigmoid, name: "memory")

    disk_prediction =
      Axon.dense(input_disk, 2, activation: :sigmoid, name: "disk")

    # Combine outputs into a single model with multiple outputs
    Axon.container(
      %{
        cpu: cpu_prediction,
        memory: memory_prediction,
        disk: disk_prediction
      },
      name: "results"
    )
end

Here is what the training loop looks like:

 def train(data, opts \\ []) do
    save? = Keyword.get(opts, :save, false)
    model = model()

    state = Keyword.get(opts, :state) || Axon.ModelState.empty()
    iterations = Keyword.get(opts, :iterations, 100)
    epochs = Keyword.get(opts, :epochs, 100)

    # Losses and weights for each output cpu, memory, disk
    losses = [binary_cross_entropy: 0.2, binary_cross_entropy: 0.4, binary_cross_entropy: 0.4]

    state =
      model
      |> Axon.Loop.trainer(losses, Polaris.Optimizers.adamw(learning_rate: 0.01))
      |> Axon.Loop.run(data, state, iterations: iterations, epochs: epochs)

    if save? do
      dump_state(state)
    end

    state
end

I’ve tried the following data structures:

training_data = [
  # Example 1: Good placement (plenty of resources)
  {
    %{
      "cpu" => Nx.tensor([[0.05, 0.825]]),    # [requested, available]
      "memory" => Nx.tensor([[0.0625, 0.65, 0.10]]),
      "disk" => Nx.tensor([[0.004, 0.55]])
    },
    %{
      cpu: Nx.tensor([[1.0, 0.0]]),      # Good placement
      memory: Nx.tensor([[1.0, 0.0]]),   # Good placement
      disk: Nx.tensor([[1.0, 0.0]])      # Good placement
    }
  },
  
  # Example 2: Bad placement (scarce resources)
  {
    %{
      "cpu" => Nx.tensor([[0.05, 0.12]]),     # Low available CPU
      "memory" => Nx.tensor([[0.0625, 0.15, 0.010]]), # Low available memory
      "disk" => Nx.tensor([[0.004, 0.10]])     # Low available disk
    },
    %{
      cpu: Nx.tensor([[0.0, 1.0]]),      # Bad placement
      memory: Nx.tensor([[0.0, 1.0]]),   # Bad placement
      disk: Nx.tensor([[0.0, 1.0]])      # Bad placement
    }
  }
]

training_data = [
  # Example 1: Good placement
  {
    {
      Nx.tensor([[0.05, 0.825]]),
      Nx.tensor([[0.0625, 0.65, 0.010]]),
      Nx.tensor([[0.004, 0.55]])
    },
    {
      Nx.tensor([[1.0, 0.0]]),
      Nx.tensor([[1.0, 0.0]]),
      Nx.tensor([[1.0, 0.0]])
    }
  },
  # Example 2: Bad placement
  {
    {
      Nx.tensor([[0.05, 0.12]]),
      Nx.tensor([[0.0625, 0.15, 0.010]]),
      Nx.tensor([[0.004, 0.10]])
    },
    {
      Nx.tensor([[0.0, 1.0]]),
      Nx.tensor([[0.0, 1.0]]),
      Nx.tensor([[0.0, 1.0]])
    }
  }
]

# Correct training data structure with properly shaped tensors
training_data = [
  # Each training example
  {
    # Inputs tuple
    {
      Nx.tensor([0.05, 0.825]),    # cpu - shape {2}
      Nx.tensor([0.0625, 0.65, 0.010]),   # memory - shape {2}
      Nx.tensor([0.004, 0.55])     # disk - shape {2}
    },
    # Targets tuple
    {
      Nx.tensor([1.0, 0.0]),    # cpu target - shape {2}
      Nx.tensor([1.0, 0.0]),    # memory target - shape {2}
      Nx.tensor([1.0, 0.0])     # disk target - shape {2}
    }
  }
]

None of the above examples seem to work. Any suggestions?

zacksiri · January 23, 2025, 10:14am

I managed to figure out where I went wrong.

The Axon.container is wrong. The output format needs to be a tuple not a map

So this is invalid

# Change
Axon.container(
      %{
        cpu: cpu_prediction,
        memory: memory_prediction,
        disk: disk_prediction
      },
      name: "results"
    )

# to

 Axon.container(
   {cpu_prediction, memory_prediction, disk_prediction},
   name: "results"
  )

Then the training data set should look like the following:

training_data = [
  {
    # Input map with string keys matching the Axon.input names
    %{
      "cpu" => Nx.tensor([0.05, 0.825]),
      "memory" => Nx.tensor([0.0625, 0.65, 0.75]),
      "disk" => Nx.tensor([0.004, 0.55])
    },
    # Target outputs still as a tuple
    {
      Nx.tensor([1.0, 0.0]),    # cpu target
      Nx.tensor([1.0, 0.0]),    # memory target
      Nx.tensor([1.0, 0.0])     # disk target
    }
  }
]

I am now able to start the training loop. I have no one but myself to blame. I followed Claude down a rabbit hole. I was very happy with my simple logistic regression model.

Then I went to ask it how split the output into cpu, memory, disk instead of just having a single output, and it gave me the model you see above. In it’s defense most of the model was correct the only part that’s wrong is the Axon.container bit. But my lack of experience naively thought it would work.