continue tweets.append(mtweet) else: # pause before checking again print 'sleeping...' time.sleep(WAIT) response = utils.bq_data_insert(bigquery, PROJECT_ID, os.environ['BQ_DATASET'], os.environ['BQ_TABLE'], tweets) tweets = [] count += 1 if count % 25 == 0: print ("processing count: %s of %s at %s: %s" % (count, count_max, datetime.datetime.now(), response)) if __name__ == '__main__': topic_info = PUBSUB_TOPIC.split('/') topic_name = topic_info[-1] sub_name = "tweets-%s" % topic_name print "starting write to BigQuery...." credentials = utils.get_credentials() bigquery = utils.create_bigquery_client(credentials) pubsub = utils.create_pubsub_client(credentials) try: # TODO: check if subscription exists first subscription = create_subscription(pubsub, PROJECT_ID, sub_name) except Exception, e: print e write_to_bq(pubsub, sub_name, bigquery) print 'exited write loop'
mtweet = utils.cleanup(tweet) # We only want to write tweets to BigQuery; we'll skip # 'delete' and 'limit' information. if 'delete' in mtweet: continue if 'limit' in mtweet: print mtweet continue tweets.append(mtweet) else: # pause before checking again print 'sleeping...' time.sleep(WAIT) utils.bq_data_insert(bigquery, PROJECT_ID, os.environ['BQ_DATASET'], os.environ['BQ_TABLE'], tweets) tweets = [] if __name__ == '__main__': sub_name = "tweets-%s" % PROJECT_ID print "starting write to BigQuery...." credentials = utils.get_credentials() bigquery = utils.create_bigquery_client(credentials) pubsub = utils.create_pubsub_client(credentials) try: # TODO: check if subscription exists first subscription = create_subscription(pubsub, PROJECT_ID, sub_name) except Exception, e: print e write_to_bq(pubsub, sub_name, bigquery)
except Exception, e: print e redis_errors += 1 if redis_errors > allowed_redis_errors: print "Too many redis-related errors: exiting." return continue # First do some massaging of the raw data mtweet = utils.cleanup(tweet) # We only want to write tweets to BigQuery; we'll skip 'delete' and # 'limit' information. if 'delete' in mtweet: continue if 'limit' in mtweet: continue tweets.append(mtweet) # try to insert the tweets into bigquery response = utils.bq_data_insert(bigquery, PROJECT_ID, os.environ['BQ_DATASET'], os.environ['BQ_TABLE'], tweets) tweets = [] count += 1 if count % 25 == 0: print ("processing count: %s of %s at %s: %s" % (count, count_max, datetime.datetime.now(), response)) if __name__ == '__main__': print "starting write to BigQuery...." bigquery = utils.create_bigquery_client() write_to_bq(bigquery)
print e redis_errors += 1 if redis_errors > allowed_redis_errors: print "Too many redis-related errors: exiting." return continue # First do some massaging of the raw data mtweet = utils.cleanup(tweet) # We only want to write tweets to BigQuery; we'll skip 'delete' and # 'limit' information. if 'delete' in mtweet: continue if 'limit' in mtweet: continue tweets.append(mtweet) # try to insert the tweets into bigquery response = utils.bq_data_insert(bigquery, PROJECT_ID, os.environ['BQ_DATASET'], os.environ['BQ_TABLE'], tweets) tweets = [] count += 1 if count % 25 == 0: print("processing count: %s of %s at %s: %s" % (count, count_max, datetime.datetime.now(), response)) if __name__ == '__main__': print "starting write to BigQuery...." bigquery = utils.create_bigquery_client() write_to_bq(bigquery)