示例#1
0
def update_languages():
    response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "hashtag",
                                        "FORMULA")
    label_list, hashtag_list = gspread.convert_to_dict_data(response)

    for index, hashtag in enumerate(hashtag_list):
        name = hashtag['name']
        print(name)

        try:
            detect_list = detect_langs(name)
            languages = [detect.lang for detect in detect_list]
            print(languages)

        except Exception as e:
            print(e)
            continue

        new_data = hashtag_list[index]
        new_data['languages'] = ','.join(languages)
        hashtag_list[index] = new_data

    body = {
        'values': gspread.convert_to_sheet_values(label_list, hashtag_list)
    }
    gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'hashtag', body)
    print("SUCCESS!! update_languages")
示例#2
0
def update_hashtag():
    response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "hashtag",
                                        "FORMULA")
    label_list, hashtag_list = gspread.convert_to_dict_data(response)

    data = get_hashtag()

    new_num = 0
    for d in data:
        name = d['name']
        index = next((index for index, hashtag in enumerate(hashtag_list)
                      if hashtag['name'] == name), None)

        if index is None:
            hashtag_list.append(d)
            print("NEW!!:", d.get('page'), d.get('name'))
            new_num += 1
            continue

        new_data = hashtag_list[index]
        new_data.update(d)
        hashtag_list[index] = new_data

    print("new:", new_num)
    hashtag_list = sorted(hashtag_list,
                          key=lambda k: k.get('num', 0) or 0,
                          reverse=True)
    body = {
        'values': gspread.convert_to_sheet_values(label_list, hashtag_list)
    }
    gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'hashtag', body)
    print("SUCCESS!! update_hashtag")
示例#3
0
def get_spots():
    response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "city", "FORMULA")
    _, city_list = gspread.convert_to_dict_data(response)

    response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "spot", "FORMULA")
    label_list, spot_list = gspread.convert_to_dict_data(response)
    spot_names = {spot.get('spot') for spot in spot_list}

    for city in city_list:
        num = 1
        new_num = 0
        while True:
            try:
                driver = get_driver()
                page = '?page=%s' % (num)
                print(page)
                driver.get(BASE_URL + city['href'] + page)
                sleep(1)
                html_source = driver.page_source
                soup = BeautifulSoup(html_source, "lxml")

                main_tag = soup.find("main")
                list_tags = main_tag.find_all("li")
                for li in list_tags:
                    a_tag = li.find("a")

                    if not a_tag:
                        continue

                    spot = a_tag.text
                    if spot in spot_names:
                        continue

                    spot_list.append({
                        'city': city['city'],
                        'spot': spot,
                        'page': num,
                        'href': a_tag.get('href'),
                    })
                    print("NEW!", spot)
                    new_num += 1

                num += 1

            except Exception as e:
                pprint(e)
                break

            finally:
                driver.quit()

        print("NEW", new_num)
        values = gspread.convert_to_sheet_values(label_list, spot_list)
        body = {'values': values}
        gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'spot', body)

    print("SUCCESS!! get_spots")
示例#4
0
def get_location_japan():
    response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "city", "FORMULA")
    label_list, city_list = gspread.convert_to_dict_data(response)
    city_names = {city.get('city') for city in city_list}

    url = "/explore/locations/JP/"
    print(url)

    num = 1
    while True:
        try:
            driver = get_driver()
            page = '?page=%s' % (num)
            print("page:", num)
            driver.get(BASE_URL + url + page)
            sleep(1)
            html_source = driver.page_source
            soup = BeautifulSoup(html_source, "lxml")

            main_tag = soup.find("main")
            list_tags = main_tag.find_all("li")
            for li in list_tags:
                a_tag = li.find("a")

                if not a_tag:
                    continue

                city = a_tag.text
                if city in city_names:
                    continue

                city_list.append({
                    'city': city,
                    'page': num,
                    'href': a_tag.get('href'),
                })
                print("NEW!", city)

            num += 1

        except Exception as e:
            pprint(e)
            break

        finally:
            driver.quit()

    body = {'values': gspread.convert_to_sheet_values(label_list, city_list)}
    gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'city', body)
    print("SUCCESS!! get_location_japan")
示例#5
0
def add_hashtag_detail():
    try:
        driver = get_driver()

        # Login
        print("LOGIN START!!")
        driver.get(login_url)

        usernameField = driver.find_element_by_xpath(usernamePath)
        usernameField.send_keys(INSTAGRAM_USERNAME)

        passwordField = driver.find_element_by_xpath(passwordPath)
        passwordField.send_keys(INSTAGRAM_PASSWORD)

        passwordField.send_keys(Keys.RETURN)
        sleep(30)
        print("LOGIN FINISH!!")

        response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "hashtag",
                                            "FORMULA")
        label_list, hashtag_list = gspread.convert_to_dict_data(response)

        count = 1
        for index, hashtag in enumerate(hashtag_list):

            # 進行状況を表示
            if index % 100 == 0:
                print("index:", index)

            # 100件ごとに保存する
            if count % 100 == 0:
                body = {
                    'values':
                    gspread.convert_to_sheet_values(label_list, hashtag_list)
                }
                gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'hashtag',
                                            body)
                print("count:", count)

            if hashtag.get('num'):
                continue

            new_hashtag = hashtag
            data = get_hashtag_detail(driver, hashtag['name'])
            new_hashtag.update(data)
            hashtag_list[index] = new_hashtag
            count += 1

        print("new:", count)
        hashtag_list = sorted(hashtag_list,
                              key=lambda k: k.get('num', 0) or 0,
                              reverse=True)
        body = {
            'values': gspread.convert_to_sheet_values(label_list, hashtag_list)
        }
        gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'hashtag', body)
        print("SUCCESS!! add_hashtag_detail")

    except Exception as e:
        pprint(e)

    finally:
        driver.quit()
示例#6
0
def add_hashtag_list():
    try:
        driver = get_driver()

        # Login
        print("LOGIN START!!")
        driver.get(login_url)

        usernameField = driver.find_element_by_xpath(usernamePath)
        usernameField.send_keys(INSTAGRAM_USERNAME)

        passwordField = driver.find_element_by_xpath(passwordPath)
        passwordField.send_keys(INSTAGRAM_PASSWORD)

        passwordField.send_keys(Keys.RETURN)
        print("LOGIN FINISH!!")

        response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "hashtag",
                                            "FORMULA")
        label_list, hashtag_list = gspread.convert_to_dict_data(response)

        count = 1
        new_num = 0
        for index, hashtag in enumerate(hashtag_list[:5]):

            # 進行状況を表示
            if index % 100 == 0:
                print("index:", index)

            # 100件ごとに保存する
            if count % 100 == 0:
                body = {
                    'values':
                    gspread.convert_to_sheet_values(label_list, hashtag_list)
                }
                gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'hashtag',
                                            body)
                print("count:", count)

            if 'ja' not in hashtag['languages']:
                continue

            data = get_hashtag_detail(driver, hashtag['name'])
            hashtag_set = data.get('hashtag_set', set())

            for new_tag in hashtag_set:

                find = next((index for hashtag in hashtag_list
                             if hashtag['name'] == new_tag), None)

                if find is not None:
                    continue

                hashtag_list.append({
                    'name': new_tag,
                    'update_at': data.get('update_at'),
                })
                print(new_tag)
                new_num += 1

            count += 1

        hashtag_list = sorted(hashtag_list,
                              key=lambda k: k.get('num', 0) or 0,
                              reverse=True)
        body = {
            'values': gspread.convert_to_sheet_values(label_list, hashtag_list)
        }
        gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'hashtag', body)
        print("new:", new_num)
        print("SUCCESS!! add_hashtag_detail")

    except Exception as e:
        pprint(e)

    finally:
        driver.quit()
示例#7
0
def update_spread_sheet():

    helper_firestore.initialize_firebase()
    ref = firestore.client().collection('hashtags')

    query = ref \
        .order_by('view_count', direction=firestore.Query.DESCENDING) \
        .limit(20)

    docs = query.get()

    label_list = []
    hashtag_list = []

    for doc in docs:
        hashtag = doc.to_dict()
        hashtag['ハッシュタグ'] = hashtag['cha_name']

        for _period, stat in hashtag['stats'].items():
            period = _period.replace('_', '/')

            if period not in label_list:
                label_list.append(period)

            hashtag[period] = stat['view_count']

        hashtag_list.append(hashtag)

    label_list = sorted(label_list)
    label_list.insert(0, 'ハッシュタグ')

    body = {
        'values': gspread.convert_to_sheet_values(label_list, hashtag_list),
        'majorDimension': 'COLUMNS',
    }
    gspread.update_sheet_values(SHEET_ID,
                                '視聴回数合計',
                                body,
                                valueInputOption='RAW')

    del label_list[0]
    for hashtag in hashtag_list:

        prev_count = 0
        for period in label_list:

            count = hashtag.get(period)
            if not count:
                continue

            if prev_count == 0:
                hashtag[period] = ''

            else:
                hashtag[period] = count - prev_count

            prev_count = count

    del label_list[0]
    label_list.insert(0, 'ハッシュタグ')
    body = {
        'values': gspread.convert_to_sheet_values(label_list, hashtag_list),
        'majorDimension': 'COLUMNS',
    }

    gspread.update_sheet_values(SHEET_ID,
                                '視聴回数(日別)',
                                body,
                                valueInputOption='RAW')

    print('SUCCESS: update_spread_sheet')