def update_languages(): response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "hashtag", "FORMULA") label_list, hashtag_list = gspread.convert_to_dict_data(response) for index, hashtag in enumerate(hashtag_list): name = hashtag['name'] print(name) try: detect_list = detect_langs(name) languages = [detect.lang for detect in detect_list] print(languages) except Exception as e: print(e) continue new_data = hashtag_list[index] new_data['languages'] = ','.join(languages) hashtag_list[index] = new_data body = { 'values': gspread.convert_to_sheet_values(label_list, hashtag_list) } gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'hashtag', body) print("SUCCESS!! update_languages")
def update_hashtag(): response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "hashtag", "FORMULA") label_list, hashtag_list = gspread.convert_to_dict_data(response) data = get_hashtag() new_num = 0 for d in data: name = d['name'] index = next((index for index, hashtag in enumerate(hashtag_list) if hashtag['name'] == name), None) if index is None: hashtag_list.append(d) print("NEW!!:", d.get('page'), d.get('name')) new_num += 1 continue new_data = hashtag_list[index] new_data.update(d) hashtag_list[index] = new_data print("new:", new_num) hashtag_list = sorted(hashtag_list, key=lambda k: k.get('num', 0) or 0, reverse=True) body = { 'values': gspread.convert_to_sheet_values(label_list, hashtag_list) } gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'hashtag', body) print("SUCCESS!! update_hashtag")
def get_spots(): response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "city", "FORMULA") _, city_list = gspread.convert_to_dict_data(response) response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "spot", "FORMULA") label_list, spot_list = gspread.convert_to_dict_data(response) spot_names = {spot.get('spot') for spot in spot_list} for city in city_list: num = 1 new_num = 0 while True: try: driver = get_driver() page = '?page=%s' % (num) print(page) driver.get(BASE_URL + city['href'] + page) sleep(1) html_source = driver.page_source soup = BeautifulSoup(html_source, "lxml") main_tag = soup.find("main") list_tags = main_tag.find_all("li") for li in list_tags: a_tag = li.find("a") if not a_tag: continue spot = a_tag.text if spot in spot_names: continue spot_list.append({ 'city': city['city'], 'spot': spot, 'page': num, 'href': a_tag.get('href'), }) print("NEW!", spot) new_num += 1 num += 1 except Exception as e: pprint(e) break finally: driver.quit() print("NEW", new_num) values = gspread.convert_to_sheet_values(label_list, spot_list) body = {'values': values} gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'spot', body) print("SUCCESS!! get_spots")
def get_location_japan(): response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "city", "FORMULA") label_list, city_list = gspread.convert_to_dict_data(response) city_names = {city.get('city') for city in city_list} url = "/explore/locations/JP/" print(url) num = 1 while True: try: driver = get_driver() page = '?page=%s' % (num) print("page:", num) driver.get(BASE_URL + url + page) sleep(1) html_source = driver.page_source soup = BeautifulSoup(html_source, "lxml") main_tag = soup.find("main") list_tags = main_tag.find_all("li") for li in list_tags: a_tag = li.find("a") if not a_tag: continue city = a_tag.text if city in city_names: continue city_list.append({ 'city': city, 'page': num, 'href': a_tag.get('href'), }) print("NEW!", city) num += 1 except Exception as e: pprint(e) break finally: driver.quit() body = {'values': gspread.convert_to_sheet_values(label_list, city_list)} gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'city', body) print("SUCCESS!! get_location_japan")
def add_hashtag_detail(): try: driver = get_driver() # Login print("LOGIN START!!") driver.get(login_url) usernameField = driver.find_element_by_xpath(usernamePath) usernameField.send_keys(INSTAGRAM_USERNAME) passwordField = driver.find_element_by_xpath(passwordPath) passwordField.send_keys(INSTAGRAM_PASSWORD) passwordField.send_keys(Keys.RETURN) sleep(30) print("LOGIN FINISH!!") response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "hashtag", "FORMULA") label_list, hashtag_list = gspread.convert_to_dict_data(response) count = 1 for index, hashtag in enumerate(hashtag_list): # 進行状況を表示 if index % 100 == 0: print("index:", index) # 100件ごとに保存する if count % 100 == 0: body = { 'values': gspread.convert_to_sheet_values(label_list, hashtag_list) } gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'hashtag', body) print("count:", count) if hashtag.get('num'): continue new_hashtag = hashtag data = get_hashtag_detail(driver, hashtag['name']) new_hashtag.update(data) hashtag_list[index] = new_hashtag count += 1 print("new:", count) hashtag_list = sorted(hashtag_list, key=lambda k: k.get('num', 0) or 0, reverse=True) body = { 'values': gspread.convert_to_sheet_values(label_list, hashtag_list) } gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'hashtag', body) print("SUCCESS!! add_hashtag_detail") except Exception as e: pprint(e) finally: driver.quit()
def add_hashtag_list(): try: driver = get_driver() # Login print("LOGIN START!!") driver.get(login_url) usernameField = driver.find_element_by_xpath(usernamePath) usernameField.send_keys(INSTAGRAM_USERNAME) passwordField = driver.find_element_by_xpath(passwordPath) passwordField.send_keys(INSTAGRAM_PASSWORD) passwordField.send_keys(Keys.RETURN) print("LOGIN FINISH!!") response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "hashtag", "FORMULA") label_list, hashtag_list = gspread.convert_to_dict_data(response) count = 1 new_num = 0 for index, hashtag in enumerate(hashtag_list[:5]): # 進行状況を表示 if index % 100 == 0: print("index:", index) # 100件ごとに保存する if count % 100 == 0: body = { 'values': gspread.convert_to_sheet_values(label_list, hashtag_list) } gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'hashtag', body) print("count:", count) if 'ja' not in hashtag['languages']: continue data = get_hashtag_detail(driver, hashtag['name']) hashtag_set = data.get('hashtag_set', set()) for new_tag in hashtag_set: find = next((index for hashtag in hashtag_list if hashtag['name'] == new_tag), None) if find is not None: continue hashtag_list.append({ 'name': new_tag, 'update_at': data.get('update_at'), }) print(new_tag) new_num += 1 count += 1 hashtag_list = sorted(hashtag_list, key=lambda k: k.get('num', 0) or 0, reverse=True) body = { 'values': gspread.convert_to_sheet_values(label_list, hashtag_list) } gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'hashtag', body) print("new:", new_num) print("SUCCESS!! add_hashtag_detail") except Exception as e: pprint(e) finally: driver.quit()
def update_spread_sheet(): helper_firestore.initialize_firebase() ref = firestore.client().collection('hashtags') query = ref \ .order_by('view_count', direction=firestore.Query.DESCENDING) \ .limit(20) docs = query.get() label_list = [] hashtag_list = [] for doc in docs: hashtag = doc.to_dict() hashtag['ハッシュタグ'] = hashtag['cha_name'] for _period, stat in hashtag['stats'].items(): period = _period.replace('_', '/') if period not in label_list: label_list.append(period) hashtag[period] = stat['view_count'] hashtag_list.append(hashtag) label_list = sorted(label_list) label_list.insert(0, 'ハッシュタグ') body = { 'values': gspread.convert_to_sheet_values(label_list, hashtag_list), 'majorDimension': 'COLUMNS', } gspread.update_sheet_values(SHEET_ID, '視聴回数合計', body, valueInputOption='RAW') del label_list[0] for hashtag in hashtag_list: prev_count = 0 for period in label_list: count = hashtag.get(period) if not count: continue if prev_count == 0: hashtag[period] = '' else: hashtag[period] = count - prev_count prev_count = count del label_list[0] label_list.insert(0, 'ハッシュタグ') body = { 'values': gspread.convert_to_sheet_values(label_list, hashtag_list), 'majorDimension': 'COLUMNS', } gspread.update_sheet_values(SHEET_ID, '視聴回数(日別)', body, valueInputOption='RAW') print('SUCCESS: update_spread_sheet')