Fetch the latest copy of a page from the Wayback Machine

Simple way to fetch the latest copy of a given URL from the wayback machine, without including the banner.

  1. Fetch the metadata (like https://archive.org/wayback/available?url=https://example.com)
  2. Prepend id_ after the timestamp in the returned URL to remove the banner (http://web.archive.org/web/20221212143457/https://example.com/ becomes http://web.archive.org/web/20221212143457id_/https://example.com/).

Here's an example function with vanilla JS:

/**
 * Given a URL, return the latest copy of that URL in the Wayback machine.
 * To do this, find the last available response for the URL, then modify it
 * to omit the default banner in the UI.
 *
 * To do this for https://example.com, fetch https://archive.org/wayback/available?url=https://example.com
 * and get a response like:
 *
 *    url: "https://example.com",
 *    archived_snapshots: {
 *      closest: {
 *        status: "200",
 *        available: true,
 *        url: "http://web.archive.org/web/20221212143457/https://example.com/",
 *        timestamp: "20221212143457",
 *      },
 *    }
 *
 *  Then add "_id" before the URL portion and fetch that. In this case
 *  http://web.archive.org/web/20221212143457id_/https://example.com/
 */
async function get_wayback_response(url) {
  if (!url) {
    throw new Error("No URL provided");
  }
  const result = {
    timings: {},
  };
  const start = performance.now();
  const resp = await fetch(`http://archive.org/wayback/available?url=${url}`);
  result.metadata = await resp.json();
  result.timings.fetch_metadata = performance.now() - start;

  const closest = result.metadata?.archived_snapshots?.closest;
  if (!closest) {
    throw new Error(`No snapshot available from wayback server for ${url}`);
  }

  // Adding "id_" before the URL excludes the banner.
  const constructed_url = closest.url.replace(
    new RegExp(/(.*\/web\/[0-9]*)/),
    `$1id_`
  );
  result.response = await fetch(constructed_url);
  result.text = await result.response.text();
  result.timings.fetch_wayback = performance.now() - start;

  return result;
}