Fetch the latest copy of a page from the Wayback Machine
Simple way to fetch the latest copy of a given URL from the wayback machine, without including the banner.
- Fetch the metadata (like
https://archive.org/wayback/available?url=https://example.com
) - Prepend
id_
after the timestamp in the returned URL to remove the banner (http://web.archive.org/web/20221212143457/https://example.com/
becomeshttp://web.archive.org/web/20221212143457id_/https://example.com/
).
Here's an example function with vanilla JS:
/**
* Given a URL, return the latest copy of that URL in the Wayback machine.
* To do this, find the last available response for the URL, then modify it
* to omit the default banner in the UI.
*
* To do this for https://example.com, fetch https://archive.org/wayback/available?url=https://example.com
* and get a response like:
*
* url: "https://example.com",
* archived_snapshots: {
* closest: {
* status: "200",
* available: true,
* url: "http://web.archive.org/web/20221212143457/https://example.com/",
* timestamp: "20221212143457",
* },
* }
*
* Then add "_id" before the URL portion and fetch that. In this case
* http://web.archive.org/web/20221212143457id_/https://example.com/
*/
async function get_wayback_response(url) {
if (!url) {
throw new Error("No URL provided");
}
const result = {
timings: {},
};
const start = performance.now();
const resp = await fetch(`http://archive.org/wayback/available?url=${url}`);
result.metadata = await resp.json();
result.timings.fetch_metadata = performance.now() - start;
const closest = result.metadata?.archived_snapshots?.closest;
if (!closest) {
throw new Error(`No snapshot available from wayback server for ${url}`);
}
// Adding "id_" before the URL excludes the banner.
const constructed_url = closest.url.replace(
new RegExp(/(.*\/web\/[0-9]*)/),
`$1id_`
);
result.response = await fetch(constructed_url);
result.text = await result.response.text();
result.timings.fetch_wayback = performance.now() - start;
return result;
}