Пример #1
0
    def next_batch(self):
        tweets_todb = []
        from_date_str = self.next_datetime.strftime("%Y%m%d%H%M")
        to_date_str = (self.next_datetime +
                       timedelta(days=1)).strftime("%Y%m%d%H%M")

        print(f"Searching for date {from_date_str}...")
        write_to_log(
            self.log_file,
            f'-------[{datetime.now().strftime("%H:%M:%S")}] Start searching for date {from_date_str}-------'
        )

        tweets_count = 0
        next_token = None
        while tweets_count < self.tweets_per_day and next_token != "Not existed!":
            try:
                tweets_list, next_token = premium_search(
                    product=self.product,
                    label=self.label,
                    query=self.query,
                    from_date=from_date_str,
                    to_date=to_date_str,
                    next_token=next_token)
                tweets_count += len(tweets_list)
                for obj in tweets_list:
                    tweets_todb.append([
                        obj["tweet_id"], obj["full_text"], obj["created_at"],
                        obj["language"], obj["hashtags_str"],
                        obj["mentions_str"], obj["favorite_count"],
                        obj["retweet_count"]
                    ])
            except Exception as e:
                error_code = str(e).split(":")[0]
                if error_code == "88" or error_code == "429":
                    print("Rate limit exceeded!")
                    write_to_log(
                        self.log_file,
                        f'**[{datetime.now().strftime("%H:%M:%S")}]** Rate limit exceeded! Next date to search: {from_date_str}'
                    )
                    program_sleep(61)
                else:
                    write_to_log(self.log_file, f"{e}")
                    break

        self.next_datetime += timedelta(days=1)
        write_to_log(
            self.log_file,
            f'-------[{datetime.now().strftime("%H:%M:%S")}] Finished searching for date {from_date_str}-------\n'
        )
        return tweets_todb
            create_table(table_name=t.name,
                         cols_constraints_dict=t.cols_const,
                         cur=CUR,
                         primary_key=t.pk,
                         foreign_keys=t.fks)
            CONN.commit()

        if CLEAR_TABLE:
            for t in [RETWEETS]:
                clear_table(t.name, CUR)
                CONN.commit()
        """recent search"""
        end_time = datetime.strptime("2020-07-02_21:53:20",
                                     "%Y-%m-%d_%H:%M:%S")
        start_time = datetime.strptime("2020-07-02_21:00:00",
                                       "%Y-%m-%d_%H:%M:%S")
        jrs = JULY_RETWEET_SEARCH(PRODUCT, LABEL, SEARCH_QUERY, start_time,
                                  end_time, BEARER_TOKEN, CONN, CUR)
        while jrs.has_next_batch():
            try:
                jrs.save_next_batch()
            except Exception as e:
                if str(e) == "Rate limit exceeded!":
                    program_sleep(60)

    except:
        print(traceback.format_exc())
    """close db connection"""
    CONN.commit()
    CONN.close()
Пример #3
0
        else:
            self.next_id_ptr = len(self.tweets_id_list)


if __name__ == "__main__":
    """db connection"""
    CONN = sqlite3.connect(DB_FILE)
    CUR = CONN.cursor()
    """authorization"""
    BEARER_TOKEN = BearerTokenAuth(consumer_key, consumer_secret)

    usr = UPDATE_STREAM_RETWEETS(BEARER_TOKEN, CONN, CUR)
    usr.add_text_column()

    while usr.has_next_batch():
        try:
            usr.save_next_batch()
        except Exception as e:
            error_msg = str(e)
            if error_msg == "[Self Defined]Limit Exceeded!":
                print("Limite Exceeded!")
                program_sleep(900)
            else:
                print(error_msg)
                break
    """close db connection"""
    CONN.commit()
    CONN.close()

    print("Finished updating!")
                CONN.commit()
        """recent search"""
        while True:
            checkpoint_timestamp = datetime.now().astimezone(
                pytz.utc) - timedelta(minutes=1)
            start_time = checkpoint_timestamp - timedelta(days=7) + timedelta(
                minutes=1)
            rs = RECENT_SEARCH(SEARCH_QUERY, start_time, checkpoint_timestamp,
                               BEARER_TOKEN, CONN, CUR)
            while rs.has_next_batch() and (datetime.now().astimezone(
                    pytz.utc) - checkpoint_timestamp < timedelta(days=1)):
                try:
                    rs.save_next_batch()
                except Exception as e:
                    if "429" in str(e) or "Rate limit exceeded" in str(e):
                        program_sleep(900)
                rs.update_start_time(datetime.now().astimezone(pytz.utc) -
                                     timedelta(days=7) + timedelta(minutes=1))

            if datetime.now().astimezone(
                    pytz.utc) - checkpoint_timestamp < timedelta(days=1):
                sleep_time = (
                    checkpoint_timestamp + timedelta(days=1) -
                    datetime.now().astimezone(pytz.utc)).total_seconds()
                program_sleep(math.ceil(sleep_time))

    except:
        print(traceback.format_exc())
    """close db connection"""
    CONN.commit()
    CONN.close()
Пример #5
0
        for t in [REGULAR_TWEETS, RETWEETS]:
            create_table(table_name=t.name, cols_constraints_dict=t.cols_const, cur=CUR, primary_key=t.pk, foreign_keys=t.fks)
            CONN.commit()

        if CLEAR_TABLE:
            for t in [REGULAR_TWEETS, RETWEETS]:
                clear_table(t.name, CUR)
                CONN.commit()

        """filter stream"""
        fs = FILTERED_STREAM(BEARER_TOKEN, FILTER_RULES, CONN, CUR)
        fs.rules_setup()

        timeout = 0
        while True:
            try:
                fs.stream_connect()
                timeout = 0
            except Exception as e:
                if str(e).startswith("429"):
                    program_sleep(2 ** timeout)
                    timeout += 1
                else:
                    print(e)
    except:
        print(traceback.format_exc())

    """close db connection"""
    CONN.commit()
    CONN.close()