def watchlist(self): '''Watch a fixed list of user_id''' # Clean up database first db = misc.mysql_db(self.config['db_server'], self.config['db_username'], self.config['db_password'], self.config['db_database'], self.logger) stmt = 'DELETE FROM target_users' db.execute(stmt) # TODO still have problem stmt = ('LOAD DATA LOCAL INFILE "seed.lst" INTO TABLE target_users ' 'FIELDS TERMINATED BY \"\\t\" LINES TERMINATED BY \"\\n\"') db.execute(stmt) # Get that list first self.crawl('seed.lst') # Get that list's friend second stmt = ('SELECT DISTINCT friend_id FROM friends, target_users ' 'WHERE friends.user_id = target_users.user_id') db.execute(stmt) results = db.cursor.fetchall() db.__del__() misc.write_to_files(results, 'initial_friends', self.config['seed_per_file'], 'utf') # Enter the generate-crawl-update loop self.twalerloop()
def __init__(self, config, logger): self.config = config self.logger = logger dir_seeds = self.config['dir_seeds'] try: if not os.path.exists(dir_seeds): os.makedirs(dir_seeds) self.db = misc.mysql_db(self.config['db_server'], self.config['db_username'], self.config['db_password'], self.config['db_database'], self.logger) except Exception as e: traceback.print_stack() self.logger.error(str(e))