Iterating through a page from Notion's API

martins · June 19, 2024, 7:11pm

Hi,

I’m using Notion as a source for my data.
Here I’m parsing through the data to extract the first paragraph found after a H1 tag with the content Description.

This works, but I’m not sure if this is the most elegant way.

  def return_first_paragraph_after_description_header(page) do
    heading_content = "Description"
    page = Jason.Formatter.pretty_print(page) |> Jason.decode!()

    results =
      page["results"]

    text =
      Enum.with_index(results, fn r, i ->
        if r["heading_1"] do
          heading_object = r["heading_1"]
          h1_text = Enum.at(heading_object["rich_text"], 0)["plain_text"]

          if h1_text == heading_content do
            paragraph = Enum.at(results, i + 1)
            rich_text = paragraph["paragraph"]["rich_text"]

            Enum.at(rich_text, 0)["plain_text"]
          end
        end
      end)

    # Text is now:
    # [nil, nil, "Here is an example", nil, nil, nil, nil, nil, nil]
    # Remove the nils
    text = Enum.reject(text, fn x -> x == nil end)
    Enum.join(text, "")
  end

I don’t like that I have to do the extra step with removing the nils.

al2o3cr · June 19, 2024, 7:25pm

I’ve got two questions:

#1: what is this code for?

#2: (related) What does page look like?

martins · June 19, 2024, 7:31pm

Hi @al2o3cr,

#1 It makes it easier to read the json, but I guess I can remove it now as I don’t have to inspect and look at the data anymore.

#2
Here’s the relevant part of page. This is one of the list items found in results.
I.e. what I’m iterating through by using Enum.with_index

 "heading_1": {
        "rich_text": [
          {
            "type": "text",
            "text": {
              "content": "Description",
              "link": null
            },
            "annotations": {
              "bold": false,
              "italic": false,
              "strikethrough": false,
              "underline": false,
              "code": false,
              "color": "default"
            },
            "plain_text": "Description",
            "href": null
          }
        ],
        "is_toggleable": false,
        "color": "default"
      }
    },
    {
      "object": "block",
      "id": "6c4b3452-aa3a-4205-8c8d-acbad2da3316",
      "parent": {
        "type": "page_id",
        "page_id": "fd7eaa90-2f09-4399-b21e-0e06cc327daa"
      },
      "created_time": "2024-06-17T14:34:00.000Z",
      "last_edited_time": "2024-06-19T09:41:00.000Z",
      "created_by": {
        "object": "user",
        "id": "56cf7590-cfc3-49d2-a84d-4fbad33a4d16"
      },
      "last_edited_by": {
        "object": "user",
        "id": "56cf7590-cfc3-49d2-a84d-4fbad33a4d16"
      },
      "has_children": false,
      "archived": false,
      "in_trash": false,
      "type": "paragraph",
      "paragraph": {
        "rich_text": [
          {
            "type": "text",
            "text": {
              "content": "Here is an example",
              "link": null
            },

codeanpeace · June 19, 2024, 9:22pm

Here’s one approach to write it in a more functionally elegant way:

use Enum.find, Enum.find_index, Enum.find_value to short circuit after finding the first truthy result
abstract out the “path” to access the plain text from each result
use get_in, put_in, update_in to traverse the nested json map of results

def return_first_paragraph_after_description_header(page) do
  results = Jason.decode!(page)["results"]
  plain_text_path = ["rich_text", Access.at(0), "plain_text"]
  description_header_index = Enum.find_index(results, fn result -> get_in(result, ["heading_1" | plain_text_path]) == "Description" end)
  get_in(results, [Access.at(description_header_index + 1), "paragraph" | plain_text_path])
end
# note: like in the code you shared, this assumes that the next result represents a paragraph

martins · June 27, 2024, 7:09am

Thanks, @codeanpeace! That solution looks more like an elegant Elixir-solution.