def __init__(self, api=None): StreamListener.__init__(self, api) self.d = datetime.date.today() self.out_prefix = "D:/workspace/python/MyPythonApp/works/output/filter" self.fout = open("%s-%s.jsonl" % (self.out_prefix, self.d.isoformat()), 'a') self.day = self.d.day
def __init__(self,db_path,verbose,*args,**kwargs): self._verbose = verbose #setup database: self._conn = sqlite3.connect(options.db_path) c = self._conn.cursor() # Create table # c.execute('''CREATE TABLE IF NOT EXISTS GeoTweet # (text text, lat real, lng real, time text, rt text, rt_count text, tw_id text)''') cmd = """ CREATE TABLE IF NOT EXISTS "capture_geotweet" ( "id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, -- table ID "time" varchar(100) NOT NULL, -- time in string, e.g., "Sun Jun 22 14:24:56 +0000 2014" "text" varchar(200) NOT NULL, -- content of tweet "lat" decimal NOT NULL, -- latitude of tweet (all tweets in the database are geotagged) "lng" decimal NOT NULL, -- longitude of tweet "rt" bool NOT NULL, -- whether the tweet was retweeted "rt_count" integer NOT NULL, -- the number of times the tweet has been retweeted "tw_id" varchar(50) NOT NULL -- the Twitter assigned ID , "epoch_time" integer); -- time of tweet in seconds since Thursday Jan 1st, 1970 00:00:00 GMT """ c.execute(cmd) self._conn.commit() self._time_format = '%a %b %d %H:%M:%S +0000 %Y' self._pattern = re.compile("[\W,.:/\']+") StreamListener.__init__(self,*args,**kwargs)
def __init__(self, is_ai=False): StreamListener.__init__(self) self._is_ai = is_ai self._kafka_producer = KafkaProducer(bootstrap_servers=['kfk-brk-1.au.adaltas.cloud:6667','kfk-brk-2.au.adaltas.cloud:6667','kfk-brk-3.au.adaltas.cloud:6667'],security_protocol = 'SASL_PLAINTEXT',sasl_mechanism='GSSAPI')
def __init__(self, collection, stream_name, data_dir, seed=False, duration_minutes=15, tweets_per_record=25000): StreamListener.__init__(self) log.info("Streaming %s tweets for %s collection into %s. Rotating files every %s minutes. Rotating " "records every %s tweets", stream_name, collection, data_dir, duration_minutes, tweets_per_record) self.collection = collection self.stream_name = stream_name self.duration_minutes = duration_minutes self.tweets_per_record = tweets_per_record self.data_dir = data_dir self.seed = seed #Create data_dir if not os.path.exists(self.data_dir): os.makedirs(self.data_dir) self.period_start = None self.period_start_time = None self.warc = None self.warc_filepath = None self.segment_origin_id = None self.payload = "" self.segment = 1 self.segment_total_length = 0 self.tweet_count = 0 #These will be set by StreamDecorator self.session = None self.url = None
def __init__(self, api=None, fobj=None): StreamListener.__init__(self, api) if fobj is None: fobj = sys.stdout self.fobj = fobj self.ratelimit = open('ratelimits.log', 'a') self.ratelimit.write('\n\n# Stream restarted on {0}'.format(time.strftime('%c')))
def __init__(self, db_path, verbose, *args, **kwargs): self._verbose = verbose #setup database: self._conn = sqlite3.connect(options.db_path) c = self._conn.cursor() # Create table # c.execute('''CREATE TABLE IF NOT EXISTS GeoTweet # (text text, lat real, lng real, time text, rt text, rt_count text, tw_id text)''') cmd = """ CREATE TABLE IF NOT EXISTS "capture_geotweet" ( "id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, -- table ID "time" varchar(100) NOT NULL, -- time in string, e.g., "Sun Jun 22 14:24:56 +0000 2014" "text" varchar(200) NOT NULL, -- content of tweet "lat" decimal NOT NULL, -- latitude of tweet (all tweets in the database are geotagged) "lng" decimal NOT NULL, -- longitude of tweet "rt" bool NOT NULL, -- whether the tweet was retweeted "rt_count" integer NOT NULL, -- the number of times the tweet has been retweeted "tw_id" varchar(50) NOT NULL -- the Twitter assigned ID , "epoch_time" integer); -- time of tweet in seconds since Thursday Jan 1st, 1970 00:00:00 GMT """ c.execute(cmd) self._conn.commit() self._time_format = '%a %b %d %H:%M:%S +0000 %Y' self._pattern = re.compile("[\W,.:/\']+") StreamListener.__init__(self, *args, **kwargs)
def __init__(self, hashtags, session, engine): StreamListener.__init__(self) self.cpt = 0 # FIXME: test if useful self.eu = EncodingUtils() self.hashtags = self.format_hashtags(hashtags) self.session = session # bridge to the db self.engine = engine
def __init__(self, botname, taghandler, customhandler): self.botname = botname self.rsettag = re.compile(u"^@%s +@?([a-zA-Z0-9_-]+) *[\=\=] *([^\s]+)$" % self.botname, re.IGNORECASE) self.rgettag = re.compile(u"^@%s +@?([a-zA-Z0-9_-]+) *$" % self.botname, re.IGNORECASE) self.rdeltag = re.compile(u"^@%s +\/del +([^\s]+) *$" % self.botname, re.IGNORECASE) StreamListener.__init__(self) self.taghandler = taghandler self.customhandler = customhandler
def __init__(self, keywords, collection, *args): """ add list of keywords and pymongo connection to a mongodb collection to StreamListener attributes """ self.keywords = keywords self.collection = collection StreamListener.__init__(self, *args)
def __init__(self): StreamListener.__init__(self) self.dataAmount = 0 self.client = MongoClient() self.twitterCollection = self.client.test.twitters # for text preprocessing self.wordMacher = re.compile(r'[a-z]+') self.urlMacher = re.compile(r'http[s]?://[\S]+') self.atMacher = re.compile(r'[\S]*@[\S]+')
def __init__(self, timer): self.inc = 0 StreamListener.__init__(self) # report the start of data collection... print "Gathering data at %s" % (str(ctime())) self.startTime = time() print "Start Time = %s" % (str(ctime())) self.timer = timer self.count = 0
def __init__(self,timer): self.inc = 0 StreamListener.__init__(self) # report the start of data collection... print "Gathering data at %s"%(str(ctime())) self.startTime = time() print "Start Time = %s"%(str(ctime())) self.timer = timer self.count = 0
def __init__(self, output_file, time_limit): # attribute to get listener start time self.start_time = datetime.datetime.now() # attribute to set time limit for listening self.time_limit = time_limit # attribute to set the output file self.output_file = output_file # initiate superclass's constructor StreamListener.__init__(self)
def __init__(self, api=None): StreamListener.__init__(self, api=api) ts = time.strftime("./data/%Y%m%d%H%M") self.statusf = open(ts+'_status.csv','w',newline='') self.statusw = csv.writer(self.statusf) self.statusw.writerow(['id', 'created_at', 'coordinates',\ 'hashtags', 'user_mentions', 'symbols', 'urls', \ 'media', \ 'in_reply_to_screen_name', \ 'in_reply_to_user_id_str', \ 'in_reply_to_status_id_str', \ 'place', 'retweeted_status_id', 'source', \ 'text', 'user id' \ # some other attributes exsits, they are list below #, status.withheld_copyright, \#optional #status.withheld_in_countries, \#optional #status.withheld_scope, \#optional #status.truncated, \#default False #status.retweeted, status.retweet_count, \#for no rt #status.scopes, possibly_sensitive, \ #status.lang, status.fiter_level, \lang=en #status.favorited, status.favorite_count, \ #status.current_user_retweet, \ #status.contributors, status.annotations \ ]) self.userf = open(ts+'_user.csv','w',newline='') self.userw = csv.writer(self.userf) self.userw.writerow(['created_at', 'default_profile', \ #user.default_profile_image, \ 'description', \ #user.entities, \ 'favourites_count', \ #user.follow_request_sent, user.following,\#relate to given user 'followers_count', 'friends_count', \ 'geo_enabled', 'id_str', 'is_translator', \ 'lang', 'listed_count', 'location', \ #user.notifications, \ 'name', \ #user.profile_background_color, user.profile_background_image_url, \ #user.profile_background_image_url_https, user.profile_background_tile, \ #user.profile_banner_url, user.profile_image_url, \ #user.profile_image_url_https, user.profile_link_color, \ #user.profile_sidebar_border_color, user.profile_sidebar_fill_color, \ #user.profile_text_color, user.profile_use_background_image, \ 'protected', 'screen_name', \ #user.show_all_inline_media, user.status, \ 'statuses_count', 'time_zone', 'user.url', \ #user.utc_offset, \ #user.withheld_in_countries, user.withheld_scope, 'verified']) self.deletef = open(ts+'_delete.csv','w',newline='') self.deletew = csv.writer(self.deletef) self.deletew.writerow(['status_id','user_id']) self.logf = open('stream.log','a') self.count = 0 self.err_count = 0 self.time = 0.0
def __init__(self, count=100): """ creates a Custom Listener of Tweets that will end after a given amount of streamed Tweets :param count: number of Tweets to stream """ # instantiating the super class StreamListener StreamListener.__init__(self) self.max_count = count self.counter = 0 self.tweets = []
def __init__(self, count=100): """ creates a Custom Listener of Tweets that will end after a given amount of streamed Tweets :param count: number of Tweets to stream """ # instantiating the super class StreamListener StreamListener.__init__(self) self.max_count = count self.counter = 0 self.tweets = []
def __init__(self,useBrowser,ofile,colorDict): StreamListener.__init__(self) self.useBrowser = useBrowser self.ofile = ofile (self.twMap,self.controller) = mapper.initMap(self.ofile,self.useBrowser) self.allCoordinates = [] self.flog = codecs.open("log.txt",mode="w",encoding="utf-8") self.gcCount = 0 self.tweetCount = 0 self.colorDict = colorDict
def __init__(self,timer): self.inc = 0 StreamListener.__init__(self) # report the start of data collection... print "Gathering data at %s"%(str(ctime())) self.startTime = time() print "Start Time = %s"%(str(ctime())) self.timer = timer self.count = 0 self.conn = psycopg2.connect(database="postgres", user="******", password="******", host="localhost", port="5432")
def __init__(self,timer): self.inc = 0 StreamListener.__init__(self) # report the start of data collection... print "Gathering data at %s"%(str(ctime())) self.startTime = time() print "Start Time = %s"%(str(ctime())) ### When initialized, connects to MongoDB database tweets self.db=pymongo.MongoClient().tweets self.timer = timer self.count = 0
def __init__(self, *args, **kwargs): state_dir = kwargs.pop('state_dir') self.post_replies = kwargs.pop('post_replies', False) self.gather = kwargs.pop('gather', None) StreamListener.__init__(self, *args, **kwargs) self.me = self.api.me() self._state = State.load(self.me.screen_name, state_dir) if self.gather: os.makedirs(self.gather, exist_ok=True)
def __init__(self, useBrowser, filename, colorDict): StreamListener.__init__(self) self.useBrowser = useBrowser self.ofile = filename (self.twMap, self.controller) = mapper.initMap(self.ofile, self.useBrowser) self.allCoordinates = [] self.flog = codecs.open("log.txt", mode="w", encoding="utf-8") self.gcCount = 0 self.tweetCount = 0 self.colorDict = colorDict
def __init__(self, path, fileName, StartDate): StreamListener.__init__(self) self.fileName = r'{0}/{1}'.format(path, fileName) self.start_date = StartDate self.fileNumber = 1 self.base_file = open(r'{0}_{1}_{2}.json'.format( self.fileName, self.start_date, self.fileNumber), 'a+', encoding="utf8") self.dataCount = 0 self.maxCount = 100
def __init__(self,timer): self.inc = 0 StreamListener.__init__(self) # report the start of data collection... print "Gathering data at %s"%(str(ctime())) self.startTime = time() print "Start Time = %s"%(str(ctime())) self.timer = timer self.count = 0 self.f = open('sentiment_corpus.csv', 'a') self.writer = csv.writer(self.f, lineterminator='\n')
def __init__(self, user_id_to_monitor, handle_status_callback): """ user_id_to_monitor: a valid Twitter user handle handle_status_callback: a callback that takes (hashtag, originating_twitter_user_id, tweet) as parameters. The client callback will be invoked on every hashtag in a tweet that mentions it.""" StreamListener.__init__(self) self.user_id_to_monitor = user_id_to_monitor self.handle_status = handle_status_callback
def __init__(self, name, output_dir): StreamListener.__init__(self) self.name = name self.output_dir = output_dir if not os.path.exists(output_dir): os.mkdir(output_dir) current_dt = datetime.utcfromtimestamp(int( time.time())).strftime('%Y-%m-%d-%H') self.output = bz2.open( os.path.join(self.output_dir, '{0}.bz2'.format(current_dt)), 'at') self.next_hour_ts = (datetime.strptime(current_dt, '%Y-%m-%d-%H') - datetime(1970, 1, 1)).total_seconds() + 3600
def __init__(self, api=None, **options): StreamListener.__init__(self, api) self.json = import_simplejson() self.print_data = options.get("print_data", False) self.print_data = options.get("created_at", False) self.print_data = options.get("user", False) self.print_data = options.get("id", False) self.print_data = options.get("favorite_count", False) self.print_data = options.get("retweet_count", False) self.print_data = options.get("text", False) self.print_data = options.get("source_url", False) self.print_data = options.get("print_data", False) self.print_data = options.get("print_data", False) self.delay = int(options.get("delay", 0))
def __init__(self, api=None, basename=None): StreamListener.__init__(self, api) if basename is None: basename = 'tweets' logHandler = TimedRotatingFileHandler("tweets", when="H", interval=8) logFormatter = logging.Formatter('%(message)s') logHandler.setFormatter(logFormatter) logger = logging.getLogger('MyLogger') logger.addHandler(logHandler) logger.setLevel(logging.INFO) self.logHandler = logHandler self.logger = logger self.ratelimit = open('ratelimit.log', 'a') data = "\n\n# Stream restarted: {0}\n".format(time.strftime('%c')) print(data, file=self.ratelimit)
def __init__(self, api=None, stream_options=None, engine=None, session_maker=None): if stream_options is None: stream_options = dict() StreamListener.__init__(self, api=api) AlchemyDatabase.__init__(self, engine=engine, session_maker=session_maker, declarative_base=SQLTwitterBase) self._auth = None self._stream = None self._stream_options = stream_options self.session = self.get_session()
def __init__(self, node, southwest, northeast, extras): logging.info("Beginning TwitterStreamer init") StreamListener.__init__(self) # Lock and target location self.lock = threading.Lock() self.buckets = {} self.deltas = {} self.uploaders = {} self.location = [southwest[LONG],southwest[LAT],northeast[LONG],northeast[LAT]] self.event_manager = EventManager() # Upload handler self.node = node logging.info("TwitterStreamer init successful")
def __init__(self, api=None, **options): StreamListener.__init__(self, api) Verbose.__init__(self, options.get("verbose_level", 1)) self.queue = Queue(maxsize=options.get("queue_size", 20)) self.tweet_list = OrganizedList(options.get("list_size", 250)) self.queue_thread = Thread(target=self._listen) self.queue_thread.daemon = True self.running = False self.locked = options.get("locked", True) self.retweet_time = options.get("retweet_time", 10) self.fav_time = options.get("fav_time", 10) self.follow_time = options.get("follow_time", 10) self.error_time = options.get("error_time", 10) self.empty_time = options.get("empty_time", 10) self.interaction_time = options.get("interaction_time", 0) if options.get("load_timeline", True): self._load_timeline() if options.get("autostart", True): self.start()
def __init__(self,count,keyword,api=None): StreamListener.__init__(self,api=api) self.count = count self.keyword = keyword
def __init__(self, datacollectionobject): StreamListener.__init__(self) self.datacollectionobject = datacollectionobject self.tweets = [] self.tweet_times = []
def __init__(self, count, keyword, api=None): StreamListener.__init__(self, api=api) self.count = count self.keyword = keyword
def __init__(self, api=None): StreamListener.__init__(self, api) self.d = datetime.date.today() self.out_prefix = "D:/workspace/python/MyPythonApp/works/output/filter" self.fout = open("%s-%s.jsonl" % (self.out_prefix, self.d.isoformat()), 'a' ) self.day = self.d.day
def __init__(self): StreamListener.__init__(self) debuglog.debug("Starting")
def __init__(self, output): self.output = output StreamListener.__init__(self)
def __init__(self): StreamListener.__init__(self) debuglog.debug("Starting")
def __init__(self, timeout: int, tweet_db): StreamListener.__init__(self) self.timeout = timeout self.tweet_db = tweet_db
def __init__(self): StreamListener.__init__(self) self.tweetProducer = TweetProducer('172.17.0.3:9092')
def __init__(self): StreamListener.__init__(self)
def __init__(self, producer, count=10000, verbose=False): StreamListener.__init__(self) self.max_count = count self.counter = 0 self.verbose = verbose self.producer = producer
def __init__(self, api): StreamListener.__init__(self) self.api = api
def __init__(self): StreamListener.__init__(self) self.es = eslib.ElasticSearchForTweets()
def __init__(self, keywords): StreamListener.__init__(self) init_db() self.keywords = keywords self.sentiment_model = SentimentIntensityAnalyzer()
def __init__(self, num_tweets_to_grab=1000, min_num_retweets=0, spreadsheet=TweetSpreadsheet()): StreamListener.__init__(self) self.counter = 0 self.num_tweets_to_grab = num_tweets_to_grab self.spreadsheet = spreadsheet self.min_num_retweets = min_num_retweets
def __init__(self, player, api=None): StreamListener.__init__(self, api) self._player = player
def __init__(self, output_file): StreamListener.__init__(self) self.output_file = os.path.join("/scratch/data/", output_file) self.tmpData = "" self.tweet_count = 0
def __init__(self): StreamListener.__init__(self) self.s3 = S3Connection(aws_access_key, aws_secret_key) self.start_new_tweet_file()
def __init__(self, api=None): StreamListener.__init__(self, api=api)
def __init__(self, store, api=None): StreamListener.__init__(self, api=api) self._stop = False self.data_store = store
def __init__(self, bot): StreamListener.__init__(self, api=None) self.bot = bot self.count = 0
def __init__(self, monitor_these_user_ids, api=None): StreamListener.__init__(self) self.monitor_these_user_ids = monitor_these_user_ids self.pickle_file = open('/tmp/tweet_status.pickle', 'w')
def __init__(self, queue, *args, **kwargs): StreamListener.__init__(self, *args, **kwargs) self.queue = queue
def __init__(self): StreamListener.__init__(self) self.destinations = []
def __init__(self, handler_class, api=None, **opts): StreamListener.__init__(self) self.handler = handler_class(**opts)
def __init__(self): StreamListener.__init__(self) # ニコニコ処理用のインスタンスを作って、on_statusメソッド内ではそれを使いまわす self.n = niconico()
def __init__(self, runfor=1): StreamListener.__init__() self.starttime = datetime.datetime.now() self.runfor = runfor
def __init__(self, q=Queue()): StreamListener.__init__(self) self.latest_tweet = None self.q = q num_threads = 4 '''