You can manually do the redirects, adapting the headers/options on each request, but it’s kinda busywork that follow_redirect: true
hopes to avoid unless you happen upon a case like this.
Here’s some code I came up with once upon a time to deal with a similar scenario where specific insight was needed to know info about the hops involved in the redirect.
A couple things to point out:
- You need to build in a safety valve, i.e. a maximum number of redirects, otherwise you could end up in an endless redirect.
- The redirect targets need to be qualified, e.g. by prepending the full hostname, so this recursive process keeps a little bit if state.
Here’s my code – put together into a single module. Hopefully this is useful to you: you can see where you could manipulate the options between successive requests.
defmodule RecursiveClient do
@moduledoc """
This module augments the functionality available in another HTTP client (e.g.
`HTTPoison`) so we can know more about requests that _redirect_. Specifically,
we want to know the final URL where a series of redirects landed.
Whereas `HTTPoison` only discloses the originally requested URL (when `:follow_redirect`
is true), this module keeps track of *both* the originally requested URL and the
the ultimate URL that finally yielded a successful response.
Note: only `:get` requests are currently supported.
"""
use HTTPoison.Base
import HTTPClients
alias HTTPoison.Response
@default_client HTTPoison
@hard_limit_max_redirects 10
@custom_response_header "X-Original-Request-URL"
@doc """
See `RecursiveClient.get/3`
"""
@impl true
def get(url), do: get(url, [], [])
@doc """
See `RecursiveClient.get/3`
"""
@impl true
def get(url, headers), do: get(url, headers, [])
@doc """
Recursively makes a GET request to the given `url` following any redirects up to
`max_redirect` times while keeping track of the URL that initiated the sequence
of requests as well as the URL that eventually yielded content.
## Options
- `:client` an HTTP client module implementing the `HTTPoison.Base`
callbacks. Default: `#{@default_client}`
- `:max_redirect` non-negative integer providing a hard-stop on the number
of redirects this will follow. Default `#{@hard_limit_max_redirects}`
## Examples
iex> RecursiveClient.get("https://blog.ahrefs.com", [], client: HTTPoison, max_redirect: 3)
{:ok, %HTTPoison.Response{
body: _,
headers: [{"#{@custom_response_header}", "https://blog.ahrefs.com"} | _],
request: _,
request_url: "https://ahrefs.com/blog/",
status_code: 200
}}
iex> {:ok, response} = RecursiveClient.get("https://blog.ahrefs.com")
iex> RecursiveClient.requested_resolved(response)
{:ok, "https://blog.ahrefs.com", "https://ahrefs.com/blog/"}
"""
@impl true
def get(url, headers, options) do
{http_client, options} = Keyword.pop(options, :client, @default_client)
{max_redirect, options} = Keyword.pop(options, :max_redirect, @hard_limit_max_redirects)
options = Keyword.put(options, :follow_redirect, false)
case make_recursive_request({http_client, :get, [url, headers, options]}, max_redirect, 1) do
{:ok, response} -> {:ok, append_custom_response_header(url, response)}
{:error, _error} -> {:error, %HTTPoison.Error{id: url, reason: :ehostunreach}}
end
end
defp make_recursive_request(_, max_redirect, n_redirect)
when n_redirect > max_redirect do
{:error, "Too many redirects"}
end
# The first of the args is assumed to be the URL to request.
# We shift it off because we may need to replace it with a new URL
defp make_recursive_request(
{client, function, [url | other_args] = args},
max_redirect,
n_redirect
) do
case is_valid_url?(url) do
true ->
client
|> make_request(function, args)
|> handle_response(fn new_request_url ->
make_recursive_request(
{client, function, [new_request_url | other_args]},
max_redirect,
n_redirect + 1
)
end)
false ->
{:error, "Invalid url: #{inspect(url)}"}
end
end
defp handle_response(
{:error, %Response{status_code: status_code} = response},
redirect_callback
)
when status_code >= 300 and status_code <= 399 do
case target_url(response) do
{:ok, new_request_url} ->
redirect_callback.(new_request_url)
# Couldn't determine the new request URL
{:error, error} ->
{:error, error}
end
end
defp handle_response(response, _), do: response
# Adds a custom response header to the Response: we stash the info
# there so it is in a place that is compatible with the defined behaviour.
defp append_custom_response_header(original_url, %Response{headers: headers} = response) do
Map.put(response, :headers, [{@custom_response_header, original_url}] ++ headers)
end
@doc """
Takes in a URL and determines if URL is valid or invalid.
See https://github.com/johno/is_url/blob/master/lib/is_url.ex
## Examples
iex> RecursiveClient.is_valid_url?("https://blog.ahrefs.com")
true
iex> RecursiveClient.is_valid_url?("invalid_url")
false
"""
@spec is_valid_url?(binary) :: boolean()
def is_valid_url?(url) when is_binary(url) do
case URI.parse(url) do
%URI{scheme: scheme} when scheme not in ["https", "http"] -> false
%URI{host: host} when host in ["", nil] -> false
_ -> true
end
end
@doc """
Given a redirect response, this returns a fully qualified url to which it redirects.
## Examples
iex> RecursiveClient.target_url(%HTTPoison.Response{
body:
""
headers: [
{"Location", "https://ahrefs.com/blog/"}
],
request: %HTTPoison.Request{
body: "",
headers: [],
method: :get,
options: [follow_redirect: false],
params: %{},
url: "https://blog.ahrefs.com"
},
request_url: "https://blog.ahrefs.com",
status_code: 301
})
{:ok, "https://ahrefs.com/blog/"}
"""
@spec target_url(response_struct :: HTTPoison.Response.t()) ::
{:ok, String.t()} | {:error, any()}
def target_url(%Response{request_url: request_url, headers: headers, status_code: status_code})
when status_code >= 300 and status_code <= 399 do
with {:ok, location} <- get_location(headers) do
fully_qualify_location(request_url, location)
end
end
defp fully_qualify_location(request_url, location) do
uri1 = URI.parse(request_url)
uri2 = URI.parse(location)
{:ok,
URI
|> struct(
Map.merge(
uri1 |> Map.from_struct(),
uri2
|> Map.from_struct()
|> Enum.reject(fn {_, v} -> is_nil(v) end)
|> Enum.into(%{})
)
)
|> URI.to_string()}
rescue
e in ArgumentError ->
{:error,
"Invalid redirect location #{inspect(location)} returned from #{request_url}; #{inspect(e)}"}
end
defp get_location(headers) do
headers
|> Enum.find_value(:error, fn
{k, v} ->
case String.downcase(k) do
"location" ->
v
_ ->
false
end
end)
|> case do
:error -> {:error, "Location not found"}
location -> {:ok, location}
end
end
end
It’s possible some other HTTP client has more nuanced rules around its redirects that would avoid a custom solution like this for your use-case 