else/ArchiveIt/archive_it.py
2015-08-02 21:28:40 -07:00

54 lines
No EOL
1.6 KiB
Python

import requests
import sys
import traceback
HEADERS = {'User-Agent': 'archive_it commandline tool for archive.is submissions v1.1',
'Referer': 'http://archive.is/',
'Origin': 'http://archive.is'}
URL_SUBMIT = 'https://archive.is/submit/'
def archive(url, anyway=0):
data = {'url': url}
if anyway is 1:
data[anyway] = 1
response = requests.post(URL_SUBMIT, data=data, headers=HEADERS)
try:
if 'link' in response.headers:
time = extract_timestamp(response.headers['link'])
raise Exception('''
Link already archived: %s
Pass parameter `anyway=1` to overwrite.
''' % time)
response = response.headers['refresh']
response = response.split(';')
for item in response:
if 'archive.is' in item:
return item.split('=')[1]
except:
return response
def extract_timestamp(link):
times = link.split(';')
d = {}
for item in times:
if '=' in item:
x = items.split('=')
d[x[0]] = x[1]
return d.get('from', d)
if __name__ == '__main__':
if len(sys.argv) == 1:
print('Use: > archive_it.py http://www.website.com/page')
quit()
url = sys.argv[1]
try:
response = archive(url)
if isinstance(response, str):
print(response)
elif isinstance(response, requests.models.Response):
print('Did not get the expected response. Here\'s what we got:')
print(response)
print(response.headers)
print(response.text)
except:
traceback.print_exc()