HTTPoison (Hackney) Follow Redirect without Reusing Authorization Headers

I noticed that the follow_redirect: true option used in HTTPoison (and therefore Hackney) when combined with an Authorization header (in my case basic auth) makes subsequent requests with the same Authorization headers. Curl, for example, does not seem to do that, at least not by default for the -L option, it looks like the --location-trusted option is to be used precisely when the user wants to reuse basic auth headers on redirects.

I want to use the Authorization header only on the first request and follow redirects without it. I stumbled upon an issue with Twilio media URLs with this where they require authentication for a media URL that redirects twice - to their CDN and then S3 - but the last URL, the S3 one, returns an error (unsupported authorization type) if a request with an Authorization header is made (or at least with the one required for the first URL), it works without any authorization though.

My question is - is it possible to specify to only use the headers passed to HTTPoison.get!/3 for the first request and follow redirects without them. I guess the question is if it’s possible in Hackney in general, and if not, is there a not too complex way to override this?

You can manually do the redirects, adapting the headers/options on each request, but it’s kinda busywork that follow_redirect: true hopes to avoid unless you happen upon a case like this.

Here’s some code I came up with once upon a time to deal with a similar scenario where specific insight was needed to know info about the hops involved in the redirect.

A couple things to point out:

  1. You need to build in a safety valve, i.e. a maximum number of redirects, otherwise you could end up in an endless redirect.
  2. The redirect targets need to be qualified, e.g. by prepending the full hostname, so this recursive process keeps a little bit if state.

Here’s my code – put together into a single module. Hopefully this is useful to you: you can see where you could manipulate the options between successive requests.

defmodule RecursiveClient do
  @moduledoc """
  This module augments the functionality available in another HTTP client (e.g.
  `HTTPoison`) so we can know more about requests that _redirect_. Specifically,
  we want to know the final URL where a series of redirects landed.

  Whereas `HTTPoison` only discloses the originally requested URL (when `:follow_redirect`
  is true), this module keeps track of *both* the originally requested URL and the
  the ultimate URL that finally yielded a successful response.

  Note: only `:get` requests are currently supported.
  """

  use HTTPoison.Base

  import HTTPClients

  alias HTTPoison.Response

  @default_client HTTPoison
  @hard_limit_max_redirects 10
  @custom_response_header "X-Original-Request-URL"

  @doc """
  See `RecursiveClient.get/3`
  """
  @impl true
  def get(url), do: get(url, [], [])

  @doc """
  See `RecursiveClient.get/3`
  """
  @impl true
  def get(url, headers), do: get(url, headers, [])

  @doc """
  Recursively makes a GET request to the given `url` following any redirects up to
  `max_redirect` times while keeping track of the URL that initiated the sequence
  of requests as well as the URL that eventually yielded content.

  ## Options

    - `:client` an HTTP client module implementing the `HTTPoison.Base`
      callbacks. Default: `#{@default_client}`
    - `:max_redirect` non-negative integer providing a hard-stop on the number
      of redirects this will follow. Default `#{@hard_limit_max_redirects}`

  ## Examples

      iex> RecursiveClient.get("https://blog.ahrefs.com", [], client: HTTPoison, max_redirect: 3)
      {:ok, %HTTPoison.Response{
        body: _,
        headers: [{"#{@custom_response_header}", "https://blog.ahrefs.com"} | _],
        request: _,
        request_url: "https://ahrefs.com/blog/",
        status_code: 200
      }}

      iex> {:ok, response} = RecursiveClient.get("https://blog.ahrefs.com")
      iex> RecursiveClient.requested_resolved(response)
      {:ok, "https://blog.ahrefs.com", "https://ahrefs.com/blog/"}
  """
  @impl true
  def get(url, headers, options) do
    {http_client, options} = Keyword.pop(options, :client, @default_client)
    {max_redirect, options} = Keyword.pop(options, :max_redirect, @hard_limit_max_redirects)
    options = Keyword.put(options, :follow_redirect, false)

    case make_recursive_request({http_client, :get, [url, headers, options]}, max_redirect, 1) do
      {:ok, response} -> {:ok, append_custom_response_header(url, response)}
      {:error, _error} -> {:error, %HTTPoison.Error{id: url, reason: :ehostunreach}}
    end
  end

  defp make_recursive_request(_, max_redirect, n_redirect)
       when n_redirect > max_redirect do
    {:error, "Too many redirects"}
  end

  # The first of the args is assumed to be the URL to request.
  # We shift it off because we may need to replace it with a new URL
  defp make_recursive_request(
         {client, function, [url | other_args] = args},
         max_redirect,
         n_redirect
       ) do
    case is_valid_url?(url) do
      true ->
        client
        |> make_request(function, args)
        |> handle_response(fn new_request_url ->
          make_recursive_request(
            {client, function, [new_request_url | other_args]},
            max_redirect,
            n_redirect + 1
          )
        end)

      false ->
        {:error, "Invalid url: #{inspect(url)}"}
    end
  end

  defp handle_response(
         {:error, %Response{status_code: status_code} = response},
         redirect_callback
       )
       when status_code >= 300 and status_code <= 399 do
    case target_url(response) do
      {:ok, new_request_url} ->
        redirect_callback.(new_request_url)

      # Couldn't determine the new request URL
      {:error, error} ->
        {:error, error}
    end
  end

  defp handle_response(response, _), do: response

  # Adds a custom response header to the Response: we stash the info
  # there so it is in a place that is compatible with the defined behaviour.
  defp append_custom_response_header(original_url, %Response{headers: headers} = response) do
    Map.put(response, :headers, [{@custom_response_header, original_url}] ++ headers)
  end

  @doc """
  Takes in a URL and determines if URL is valid or invalid.
  See https://github.com/johno/is_url/blob/master/lib/is_url.ex

  ## Examples
      iex> RecursiveClient.is_valid_url?("https://blog.ahrefs.com")
      true

      iex> RecursiveClient.is_valid_url?("invalid_url")
      false
  """
  @spec is_valid_url?(binary) :: boolean()
  def is_valid_url?(url) when is_binary(url) do
    case URI.parse(url) do
      %URI{scheme: scheme} when scheme not in ["https", "http"] -> false
      %URI{host: host} when host in ["", nil] -> false
      _ -> true
    end
  end

  @doc """
  Given a redirect response, this returns a fully qualified url to which it redirects.

  ## Examples

      iex> RecursiveClient.target_url(%HTTPoison.Response{
        body:
          ""
        headers: [
          {"Location", "https://ahrefs.com/blog/"}
        ],
        request: %HTTPoison.Request{
          body: "",
          headers: [],
          method: :get,
          options: [follow_redirect: false],
          params: %{},
          url: "https://blog.ahrefs.com"
        },
        request_url: "https://blog.ahrefs.com",
        status_code: 301
      })

      {:ok, "https://ahrefs.com/blog/"}
  """
  @spec target_url(response_struct :: HTTPoison.Response.t()) ::
          {:ok, String.t()} | {:error, any()}
  def target_url(%Response{request_url: request_url, headers: headers, status_code: status_code})
      when status_code >= 300 and status_code <= 399 do
    with {:ok, location} <- get_location(headers) do
      fully_qualify_location(request_url, location)
    end
  end

  defp fully_qualify_location(request_url, location) do
    uri1 = URI.parse(request_url)
    uri2 = URI.parse(location)

    {:ok,
     URI
     |> struct(
       Map.merge(
         uri1 |> Map.from_struct(),
         uri2
         |> Map.from_struct()
         |> Enum.reject(fn {_, v} -> is_nil(v) end)
         |> Enum.into(%{})
       )
     )
     |> URI.to_string()}
  rescue
    e in ArgumentError ->
      {:error,
       "Invalid redirect location #{inspect(location)} returned from #{request_url}; #{inspect(e)}"}
  end

  defp get_location(headers) do
    headers
    |> Enum.find_value(:error, fn
      {k, v} ->
        case String.downcase(k) do
          "location" ->
            v

          _ ->
            false
        end
    end)
    |> case do
      :error -> {:error, "Location not found"}
      location -> {:ok, location}
    end
  end
end

It’s possible some other HTTP client has more nuanced rules around its redirects that would avoid a custom solution like this for your use-case :thinking:

2 Likes

Thanks a lot! Your idea might very much help, I thought a little about it and it might be the route I go if nothing better pops up.