## input arguments import getopt add_opts = [ ('-b', ['-b: percentage of beginning section']), ('-m', ['-m: percentage of middle section']), ('-e', ['-e: percentage of ending section']), ('-l', ['-l: minimum occurrence of a pattern', ' n: at least occurs < n > times for each pattern']) ] try: opts, args = getopt.getopt(sys.argv[1:],'hb:m:e:l:v',['help', 'begPercentage=', 'midPercentage=', 'endPercentage=', 'min_count=', 'verbose']) except getopt.GetoptError: config.help(config.patternPositionFeat_name, addon=add_opts, exit=2) for opt, arg in opts: if opt in ('-h', '--help'): config.help(config.patternPositionFeat_name, addon=add_opts) elif opt in ('-b'): config.begPercentage = int(arg.strip()) elif opt in ('-m'): config.midPercentage = int(arg.strip()) elif opt in ('-e'): config.endPercentage = int(arg.strip()) elif opt in ('-l','--limit'): config.min_count = int(arg.strip()) elif opt in ('-v','--verbose'): config.verbose = True ## insert metadata setting = { "feature_name": "pattern_position", "section": "b"+ str(config.begPercentage) + "_m" + str(config.midPercentage) + "_e" + str(config.endPercentage), "min_count": config.min_count }
return True if __name__ == '__main__': import getopt add_opts = [ ('setting_id', ['<setting_id>: specify setting ID(s) (e.g., 537086fcd4388c7e81676914, or 537086fcd4388c7e81676914,537c6c90d4388c0e27069e7b)', ' which can be retrieved from the mongo collection features.settings' ]), ] arg_idx = 2 if len(sys.argv) > 1 and not sys.argv[1].startswith('-') else 1 try: opts, args = getopt.getopt(sys.argv[arg_idx:],'hvo',['help', 'verbose', 'overwrite']) setting_id_str = sys.argv[1].strip() except: config.help('toSVM', addon=add_opts, args=['<setting_id>'], exit=2) ## read options for opt, arg in opts: if opt in ('-h', '--help'): config.help('toSVM',args=['setting_id'], addon=add_opts) elif opt in ('-v','--verbose'): config.verbose = True elif opt in ('-o','--overwrite'): config.overwrite = True ## set log level loglevel = logging.DEBUG if config.verbose else logging.INFO logging.basicConfig(format='[%(levelname)s] %(message)s', level=loglevel) run()
import getopt add_opts = [ ('setting_id', ['<setting_id>: specify a setting ID (e.g., 537086fcd4388c7e81676914)', ' which can be retrieved from the mongo collection features.settings' ]), ('--quiet', ['--quiet: run svm in quiet mode']), ('--core', ['-c, --core: multi-core for svm']), ('--param', ['--param: parameter string for libsvm (e.g., use "c4b1" or "-c 4 -b 1" to represent the libsvm parameters -c 4 -b 1)']) ] arg_idx = 2 if len(sys.argv) > 1 and not sys.argv[1].startswith('-') else 1 try: opts, args = getopt.getopt(sys.argv[arg_idx:],'hvop:q',['help', 'verbose', 'overwrite', 'list', 'param=','multi=', 'quiet']) setting_id = sys.argv[1].strip() except: config.help('run_svm', addon=add_opts, args=['<setting_id>'], exit=2) svm_params = [] ## read options for opt, arg in opts: if opt in ('-h', '--help'): config.help('run_svm', addon=add_opts) elif opt in ('-q','--quiet'): quiet_mode = True elif opt in ('-s','--scale'): scale_mode = True elif opt in ('-p','--param'): svm_params = parse_params(arg.strip()) elif opt in ('--multi'): cores = int(arg.strip()) elif opt in ('-v','--verbose'): config.verbose = True elif opt in ('-o','--overwrite'): config.overwrite = True ## set log level loglevel = logging.DEBUG if config.verbose else logging.INFO
## input arguments import getopt add_opts = [ ('-n', ['-n or --minCount: filter out patterns with minimum count', ' k: minimum count']), ('--debug', ['--debug: run in debug mode']), ('--remove', ['--remove: remove self count']) ] try: # opts, args = getopt.getopt(sys.argv[1:],'hf:n:c:vr:',['help', 'featureValueType=', 'minCount=', 'cut=', 'verbose', 'debug']) opts, args = getopt.getopt(sys.argv[1:],'hn:vr:o',['help', 'minCount=', 'verbose', 'debug', 'overwrite', 'remove']) except getopt.GetoptError: config.help(program, addon=add_opts, exit=2) for opt, arg in opts: if opt in ('-h', '--help'): config.help(program, addon=add_opts) # elif opt in ('-f', '--featureValueType'): config.featureValueType = arg.strip() elif opt in ('-n', '--minCount'): config.minCount = int( arg.strip() ) elif opt in ('-c', '--cut'): config.cutoffPercentage = int( arg.strip() ) elif opt in ('-v','--verbose'): config.verbose = True elif opt in ('-o','--overwrite'): config.overwrite = True elif opt in ('--remove'): remove = True elif opt in ('--debug'): config.debug = True loglevel = logging.DEBUG if config.verbose else logging.INFO logging.basicConfig(format='[%(levelname)s] %(message)s', level=loglevel) logging.debug('connecting mongodb at %s/%s' % (config.mongo_addr, config.db_name))
co_feature = db['features.keyword'] ## input arguments import getopt add_opts = [ ('-k', ['-k: keyword set in WordNetAffect', ' 0: basic', ' 1: extend']), ('--lemma', ['--lemma: use word lemma when looking for keywords']) ] try: opts, args = getopt.getopt(sys.argv[1:],'hk:v',['help', 'keyword_type=', 'lemma', 'verbose']) except getopt.GetoptError: config.help(config.keywordFeat_name, addon=add_opts, exit=2) for opt, arg in opts: if opt in ('-h', '--help'): config.help(config.keywordFeat_name, addon=add_opts) elif opt in ('-k','--keyword_type'): if int(arg.strip()) == 0: config.keyword_type = 'basic' elif int(arg.strip()) == 1: config.keyword_type = 'extend' elif opt in ('--lemma'): config.lemma = True elif opt in ('-v','--verbose'): config.verbose = True ## insert metadata setting = { "feature_name": "keyword", "keyword_type": config.keyword_type, "lemma": config.lemma }
import getopt add_opts = [ ('--setting', ['--setting: specify a setting ID (e.g., 537b00e33681df445d93d57e)', ' which can be retrieved from the mongo collection features.settings' ]), ('--all', ['-a, --all: evaluate and update all current experiments, default: '+str(update_all)+' )']), ('--param', ['--param: parameter string for libsvm (e.g., use "b1c4", default: '+param+' )']), ('--path', ['-p, --path: path to local files (default: '+root+' )']), ('--inter', ['-i, --inter: intersection with 2005 Mishne05']) ] try: opts, args = getopt.getopt(sys.argv[1:],'hvai',['help', 'verbose', 'setting=', 'param=', 'all', 'inter']) except getopt.GetoptError: config.help('run_svm', addon=add_opts, exit=2) ## read options for opt, arg in opts: if opt in ('-h', '--help'): config.help('run_svm', addon=add_opts) elif opt in ('-a','--all'): update_all = True elif opt in ('-i','--inter'): intersection = True elif opt in ('--param'): param = arg.strip() elif opt in ('-p','--path'): root = arg.strip() elif opt in ('--setting'): setting_id = arg.strip() elif opt in ('-v','--verbose'): config.verbose = True ## select collections co_svm_eval = db[config.co_svm_eval_name] co_svm_out = db[config.co_svm_out_name] co_svm_gold = db[config.co_svm_gold_name]
print >> sys.stdout, '='*40 print >> sys.stdout, 'Avg. LJ40K:', round(avg_LJ40K,4) print >> sys.stdout, 'Avg. Mishne05:', round(avg_Mishne05,4) print >> sys.stdout, 'Avg. Overall:', round(avg_shared,4) # print >> sys.stderr, avg_LJ40K, avg_Mishne05, avg_shared return avg_LJ40K, avg_Mishne05, avg_shared if __name__ == '__main__': import getopt try: opts, args = getopt.getopt(sys.argv[1:],'hp:d:g:s:l:vo',['help','ps_function=', 'ds_function=', 'sig_function=', 'smoothing=', 'limit=', 'verbose', 'overwrite']) except getopt.GetoptError: config.help('evaluation', exit=2) for opt, arg in opts: if opt in ('-h', '--help'): config.help('evaluation') elif opt in ('-p','--ps_function'): config.ps_function_type = int(arg.strip()) elif opt in ('-d','--ds_function'): config.ds_function_type = int(arg.strip()) elif opt in ('-g','--sig_function'): config.sig_function_type = int(arg.strip()) elif opt in ('-s','--smoothing'): config.smoothing_type = int(arg.strip()) elif opt in ('-l','--limit'): config.min_count = int(arg.strip()) elif opt in ('-v','--verbose'): config.verbose = True elif opt in ('-o','--overwrite'): config.overwrite = True ## fetch from collection config.co_docscore_name = '_'.join([config.co_docscore_prefix] + config.getOpts(fields=config.opt_fields[config.ev_name], full=False)) # if cannot find the fetch target collection
mdoc = { 'udocID': doc['udocID'], 'gold_emotion': gold_emotion, 'scores': scores } co_docscore.insert( mdoc ) if __name__ == '__main__': import getopt try: opts, args = getopt.getopt(sys.argv[1:],'hp:d:g:s:l:vo',['help','ps_function=', 'ds_function=', 'sig_function=', 'smoothing=', 'limit=', 'verbose', 'overwrite']) except getopt.GetoptError: config.help(config.ds_name, exit=2) for opt, arg in opts: if opt in ('-h', '--help'): config.help(config.ds_name) elif opt in ('-p','--ps_function'): config.ps_function_type = int(arg.strip()) elif opt in ('-d','--ds_function'): config.ds_function_type = int(arg.strip()) elif opt in ('-g','--sig_function'): config.sig_function_type = int(arg.strip()) elif opt in ('-s','--smoothing'): config.smoothing_type = int(arg.strip()) elif opt in ('-l','--limit'): config.min_count = int(arg.strip()) elif opt in ('-v','--verbose'): config.verbose = True elif opt in ('-o','--overwrite'): config.overwrite = True ## select mongo collections co_emotions = db[config.co_emotions_name] co_docs = db[config.co_docs_name] co_pats = db[config.co_pats_name]
docIDs.append( {'filename': fn, 'topic': topic, 'ldocID': ldocID, 'udocID': udocID} ) return docIDs if __name__ == '__main__': import getopt add_opts = [ ('--path', ['-p or --path: specify the input corpus path']), ('--database', ['-d or --database: specify the destination database name']), ] try: opts, args = getopt.getopt(sys.argv[1:],'hp:d:',['help','path=', 'database=']) except getopt.GetoptError: config.help('extract_dependency', addon=add_opts, exit=2) ## read options for opt, arg in opts: if opt in ('-h', '--help'): config.help('extract_dependency', addon=add_opts) elif opt in ('-p','--path'): corpus_root = arg.strip() elif opt in ('-d','--database'): config.db_name = arg.strip() if not corpus_root: print 'specify the input corpus path: e.g., python extract_dependency.py -p /corpus/NTCIR/' exit(-1) if not corpus_root: print 'specify the destination database name: e.g., python extract_dependency.py -d NTCIR' exit(-1)
co_patscore.create_index('pattern') print >> sys.stderr, 'done.' if __name__ == '__main__': import getopt try: opts, args = getopt.getopt( sys.argv[1:], 'hp:s:vo', ['help', 'ps_function=', 'smoothing=', 'verbose', 'overwrite']) except getopt.GetoptError: config.help(config.ps_name, exit=2) for opt, arg in opts: if opt in ('-h', '--help'): config.help(config.ps_name) elif opt in ('-p', '--ps_function'): config.ps_function_type = int(arg.strip()) elif opt in ('-s', '--smoothing'): config.smoothing_type = int(arg.strip()) elif opt in ('-v', '--verbose'): config.verbose = True elif opt in ('-o', '--overwrite'): config.overwrite = True # check if fetch source existed co_lexicon_existed = config.co_lexicon_name in db.collection_names() if not co_lexicon_existed:
mongo_collection.save(doc) if __name__ == '__main__': import getopt add_opts = [ ('--database', ['-d or --database: specify the destination database name']), ('--topic', ['--topic: identify as "topic" rather than "emotion"']), ] try: opts, args = getopt.getopt(sys.argv[1:],'hd:v',['help','database=','topic','verbose']) except getopt.GetoptError: config.help('extract_pattern', addon=add_opts, exit=2) ## read options for opt, arg in opts: if opt in ('-h', '--help'): config.help('extract_pattern', addon=add_opts) elif opt in ('-d','--database'): config.db_name = arg.strip() elif opt in ('-v','--verbose'): config.verbose = True elif opt in ('--topic'): topic_or_emotion = 'topic' db = mc[config.db_name] co_deps = db[config.co_deps_name] co_pats = db[config.co_pats_name] # # rule = [('subj',1), ('cop', 1)] # rule = [('prep', 0), ('subj',0), ('obj',0)] # # targets = ['JJ']
## select mongo collections co_emotions = db[config.co_emotions_name] co_docs = db[config.co_docs_name] co_pats = db[config.co_pats_name] co_nestedLexicon = db['lexicon.nested'] ## target mongo collections co_setting = db['features.settings'] co_feature = db['features.pattern'] ## input arguments import getopt try: opts, args = getopt.getopt(sys.argv[1:],'hl:v',['help', 'min_count=', 'verbose']) except getopt.GetoptError: config.help(config.patternFeat_name, exit=2) for opt, arg in opts: if opt in ('-h', '--help'): config.help(config.patternFeat_name) elif opt in ('-l','--limit'): config.min_count = int(arg.strip()) elif opt in ('-v','--verbose'): config.verbose = True ## insert metadata setting = { "feature_name": "pattern", "min_count": config.min_count } ## print confirm message config.print_confirm(setting.items(), bar=40, halt=True)
]), ('-r', [ '-r: remove self count', " 0: dont't remove anything", ' 1: minus-one', ' f: minus-frequency' ]), ('-p', ['-p: use position lexicon'])] try: opts, args = getopt.getopt(sys.argv[1:], 'hb:m:e:f:n:c:r:pv', [ 'help', 'begPercentage=', 'midPercentage=', 'endPercentage=', 'featureValueType=', 'minCount=', 'cut', 'verbose' ]) except getopt.GetoptError: config.help(config.patternEmotionPositionFeat_name, addon=add_opts, exit=2) for opt, arg in opts: if opt in ('-h', '--help'): config.help(config.patternEmotionPositionFeat_name, addon=add_opts) elif opt in ('-b'): config.begPercentage = int(arg.strip()) elif opt in ('-m'): config.midPercentage = int(arg.strip()) elif opt in ('-e'): config.endPercentage = int(arg.strip()) elif opt in ('-f'): config.featureValueType = arg.strip() elif opt in ('-n'): config.minCount = int(arg.strip())
'<setting_id>: specify a setting ID (e.g., 537086fcd4388c7e81676914)', ' which can be retrieved from the mongo collection features.settings' ]), ('--quiet', ['--quiet: run svm in quiet mode']), ('--core', ['-c, --core: multi-core for svm']), ('--param', [ '--param: parameter string for libsvm (e.g., use "c4b1" or "-c 4 -b 1" to represent the libsvm parameters -c 4 -b 1)' ])] arg_idx = 2 if len(sys.argv) > 1 and not sys.argv[1].startswith('-') else 1 try: opts, args = getopt.getopt(sys.argv[arg_idx:], 'hvop:q', [ 'help', 'verbose', 'overwrite', 'list', 'param=', 'multi=', 'quiet' ]) setting_id = sys.argv[1].strip() except: config.help('run_svm', addon=add_opts, args=['<setting_id>'], exit=2) svm_params = [] ## read options for opt, arg in opts: if opt in ('-h', '--help'): config.help('run_svm', addon=add_opts) elif opt in ('-q', '--quiet'): quiet_mode = True elif opt in ('-s', '--scale'): scale_mode = True elif opt in ('-p', '--param'): svm_params = parse_params(arg.strip()) elif opt in ('--multi'): cores = int(arg.strip()) elif opt in ('-v', '--verbose'): config.verbose = True elif opt in ('-o', '--overwrite'): config.overwrite = True ## set log level loglevel = logging.DEBUG if config.verbose else logging.INFO logging.basicConfig(format='[%(levelname)s] %(message)s', level=loglevel)
('-m', ['-m: percentage of middle section']), ('-e', ['-e: percentage of ending section']), ('-k', [ '-k: keyword set in WordNetAffect', ' 0: basic', ' 1: extend' ]), ('--lemma', ['--lemma: use word lemma when looking for keywords'])] try: opts, args = getopt.getopt(sys.argv[1:], 'hb:m:e:k:v', [ 'help', 'begPercentage=', 'midPercentage=', 'endPercentage=', 'keyword_type=', 'lemma', 'verbose' ]) except getopt.GetoptError: config.help(config.keywordPositionFeat_name, addon=add_opts, exit=2) for opt, arg in opts: if opt in ('-h', '--help'): config.help(config.keywordPositionFeat_name, addon=add_opts) elif opt in ('-b'): config.begPercentage = int(arg.strip()) elif opt in ('-m'): config.midPercentage = int(arg.strip()) elif opt in ('-e'): config.endPercentage = int(arg.strip()) elif opt in ('-k', '--keyword_type'): if int(arg.strip()) == 0: config.keyword_type = 'basic' elif int(arg.strip()) == 1: config.keyword_type = 'extend' elif opt in ('--lemma'): config.lemma = True elif opt in ('-v', '--verbose'): config.verbose = True
co_emotions = db[config.co_emotions_name] co_docs = db[config.co_docs_name] co_pats = db[config.co_pats_name] co_nestedLexicon = db['lexicon.nested'] ## target mongo collections co_setting = db['features.settings'] co_feature = db['features.pattern'] ## input arguments import getopt try: opts, args = getopt.getopt(sys.argv[1:], 'hl:v', ['help', 'min_count=', 'verbose']) except getopt.GetoptError: config.help(config.patternFeat_name, exit=2) for opt, arg in opts: if opt in ('-h', '--help'): config.help(config.patternFeat_name) elif opt in ('-l', '--limit'): config.min_count = int(arg.strip()) elif opt in ('-v', '--verbose'): config.verbose = True ## insert metadata setting = {"feature_name": "pattern", "min_count": config.min_count} ## print confirm message config.print_confirm(setting.items(), bar=40, halt=True) ## insert metadata setting_id = str(co_setting.insert(setting))