cursor = db_conn.cursor() cursor.execute("SELECT COUNT(*) FROM {}".format(annotations_table)) return cursor.fetchone()[0] if __name__ == "__main__": parser = argparse.ArgumentParser(description='Tweet annotator') parser.add_argument('tweet_file', help='JSON tweets file for annotation') parser.add_argument('keyword', help='Keyword we wish to disambiguate (determines table name and used to filter tweets)') parser.add_argument('--skipto', default=None, type=int, help="Skip forwards to this tweet id, continue from the next tweet") args = parser.parse_args() print("These are our args:") print(args) print(args.tweet_file, args.keyword) annotations_table, spotlight_table = sql_convenience.create_all_tables(args.keyword) tweets = tweet_generators.get_tweets(open(args.tweet_file)) # we can skip through Tweets we've already seen in the same file by # specifying a tweet id to jump to if args.skipto is not None: for tweet in tweets: if tweet['id'] == args.skipto: break # continue after this tweet for tweet in tweets: tweet_text = unicode(tweet['text']) annotate = True # determine if this is an English tweet or not tweet_text_bytesutf8 = tweet_text.encode('utf-8') language_name, language_code, is_reliable, text_bytes_found, details = cld.detect(tweet_text_bytesutf8)
parser.add_argument('nerengine', help='NER engine type (only "opencalais" at present)') parser.add_argument( '--drop', default=False, action="store_true", help= 'Drops the keyword destination table so we do all annotations again') args = parser.parse_args() print(args) if args.nerengine == "opencalais": ner = opencalais_ner.OpenCalaisNER else: 1 / 0 annotations_table = "annotations_{}".format(args.keyword) destination_table = "{}_{}".format(args.nerengine, args.keyword) cursor = config.db_conn.cursor() if args.drop: sql = "DROP TABLE IF EXISTS {}".format(destination_table) print("Dropping table: {}".format(sql)) cursor.execute(sql) annotations_table, destination_table = sql_convenience.create_all_tables( args.keyword) engine = ner(annotations_table, destination_table) engine.annotate_all_messages()
"keyword", help="Keyword we wish to disambiguate (determines table name and used to filter tweets)" ) parser.add_argument("nerengine", help='NER engine type (only "opencalais" at present)') parser.add_argument( "--drop", default=False, action="store_true", help="Drops the keyword destination table so we do all annotations again", ) args = parser.parse_args() print(args) if args.nerengine == "opencalais": ner = opencalais_ner.OpenCalaisNER else: 1 / 0 annotations_table = "annotations_{}".format(args.keyword) destination_table = "{}_{}".format(args.nerengine, args.keyword) cursor = config.db_conn.cursor() if args.drop: sql = "DROP TABLE IF EXISTS {}".format(destination_table) print("Dropping table: {}".format(sql)) cursor.execute(sql) annotations_table, destination_table = sql_convenience.create_all_tables(args.keyword) engine = ner(annotations_table, destination_table) engine.annotate_all_messages()