From 5f0453369e798066e7b9e84323a1c9c5185eae25 Mon Sep 17 00:00:00 2001 From: Ethan Dalool Date: Tue, 17 Aug 2021 14:07:44 -0700 Subject: [PATCH] Delete instathief.py. --- Instathief/instathief.py | 158 --------------------------------------- 1 file changed, 158 deletions(-) delete mode 100644 Instathief/instathief.py diff --git a/Instathief/instathief.py b/Instathief/instathief.py deleted file mode 100644 index a726269..0000000 --- a/Instathief/instathief.py +++ /dev/null @@ -1,158 +0,0 @@ -import argparse -import bs4 -import datetime -import json -import os -import requests -import sys - -# pip install voussoirkit -from voussoirkit import clipext -from voussoirkit import downloady - - -''' ''' -STRFTIME = '%Y%m%d-%H%M%S' -# strftime used for filenames when downloading - -URL_PROFILE = 'https://www.instagram.com/{username}' -URL_QUERY = 'https://www.instagram.com/query/' - -PAGE_QUERY_TEMPLATE = ''' -ig_user({user_id}) -{{ - media.after({end_cur}, {count}) - {{ - count, - nodes - {{ - code, - date, - display_src, - id, - video_url - }}, - page_info - }} -}} -'''.replace('\n', '').replace(' ', '') - -USERAGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36' -''' ''' - -last_cookie = None - -def download_media(media_list): - for media in media_list: - timestamp = datetime.datetime.utcfromtimestamp(media['created']) - timestamp = timestamp.strftime(STRFTIME) - basename = downloady.basename_from_url(media['url']) - extension = os.path.splitext(basename)[1] - - filename = timestamp + extension - downloady.download_file( - url=media['url'], - localname=filename, - callback_progress=downloady.progress2, - overwrite=False, - ) - -def get_page(user_id, end_cur, count, cookies): - query = PAGE_QUERY_TEMPLATE.format( - count=count, - end_cur=end_cur, - user_id=user_id, - ) - headers = { - 'referer': 'https://www.instagram.com/', - 'user-agent': USERAGENT, - 'x-csrftoken': cookies['csrftoken'], - } - data = { - 'q': query, - 'ref': 'users::show', - } - - response = requests.post( - url=URL_QUERY, - cookies=cookies, - data=data, - headers=headers, - ) - response.raise_for_status() - page = response.json() - return page - -def get_user_info(username): - global last_cookie - url = URL_PROFILE.format(username=username) - response = requests.get(url) - response.raise_for_status() - - text = response.text - soup = bs4.BeautifulSoup(text, 'html.parser') - - scripts = soup.find_all('script') - for script in scripts: - if 'window._sharedData' in script.text: - break - else: - raise Exception('Did not find expected javascript') - - user_data = script.text - user_data = user_data.split('window._sharedData = ')[1].rstrip(';').strip() - user_data = json.loads(user_data) - user_data = user_data['entry_data']['ProfilePage'][0]['user'] - - user_id = user_data['id'] - page_info = user_data['media']['page_info'] - if page_info['has_next_page']: - end_cur = page_info['start_cursor'] - # Minus 1 because the queries use "after" parameters for pagination, and - # if we just take this cursor then we will only get items after it. - end_cur = int(end_cur) - 1 - else: - end_cur = None - - user_data = { - 'user_id': user_id, - 'end_cur': end_cur, - 'cookies': response.cookies, - } - last_cookie = response.cookies - return user_data - -def get_user_media(username): - user_info = get_user_info(username) - end_cur = user_info.pop('end_cur') - - while True: - page = get_page(count=50, end_cur=end_cur, **user_info) - page = page['media'] - - posts = page['nodes'] - for post in posts: - timestamp = post['date'] - media_url = post.get('video_url') or post.get('display_src') - ret = { - 'created': timestamp, - 'url': media_url - } - yield ret - - page_info = page['page_info'] - if page_info['has_next_page']: - end_cur = page_info['end_cursor'] - else: - break - - -def main(): - username = sys.argv[1] - media = get_user_media(username) - for (timestamp, url) in media: - print(url) - - -if __name__ == '__main__': - main()