'Print deprels. The option can be "UD", "langspec", or "UD+langspec".') opt_parser.add_argument( '--catvals', default=None, help= 'Print category=value pairs. The option can be "UD", "langspec", or "UD+langspec". This distinction is based on the feature, not the value.' ) opt_parser.add_argument( '--sort', default='freq', help= 'Sort the values by their frequency (freq) or alphabetically (alph). Default: %(default)s.' ) args = opt_parser.parse_args() #Parsed command-line arguments args.output = "-" inp, out = file_util.in_out(args, multiple_files=True) trees = file_util.trees(inp) stats = Stats() try: for comments, tree in trees: stats.tree_count += 1 for cols in tree: stats.count_cols(cols) except: traceback.print_exc() print >> sys.stderr, "\n\n ------- STATS MAY BE EMPTY OR INCOMPLETE ----------" pass if args.stats: stats.print_basic_stats(out) if args.jsonstats:
if args.quiet: args.echo_input=False tagsets={POSTAG:None,CPOSTAG:None,FEATS:None,DEPREL:None,DEPS:None} #sets of tags for every column that needs to be checked if args.lang: tagsets[DEPREL]=load_set("deprel.ud","deprel."+args.lang,validate_langspec=True) if tagsets[DEPREL] is None: warn(u"The language-specific file data/deprel.%s could not be found. Dependency relations will not be checked.\nPlease add the language-specific dependency relations using python conllu-stats.py --deprels=langspec yourdata/*.conllu > data/deprel.%s\n Also please check that file for errorneous relations. It's okay if the file is empty, but it must exist.\n\n"%(args.lang,args.lang),"Language specific data missing",lineno=False) tagsets[DEPS]=tagsets[DEPREL] tagsets[FEATS]=load_set("feat_val.ud","feat_val."+args.lang) if tagsets[FEATS] is None: warn(u"The language-specific file data/feat_val.%s could not be found. Feature=value pairs will not be checked.\nPlease add the language-specific pairs using python conllu-stats.py --catvals=langspec yourdata/*.conllu > data/feat_val.%s It's okay if the file is empty, but it must exist.\n \n\n"%(args.lang,args.lang),"Language specific data missing",lineno=False) tagsets[CPOSTAG]=load_set("cpos.ud",None) inp,out=file_util.in_out(args) validate(inp,out,args,tagsets) if not error_counter: if not args.quiet: print >> sys.stderr, "*** PASSED ***" sys.exit(0) else: if not args.quiet: print >> sys.stderr, "*** FAILED *** with %d errors"%sum(v for k,v in error_counter.iteritems()) for k,v in sorted(error_counter.items()): print >> sys.stderr, k, "errors:", v sys.exit(1)
print("Accuracy: ", numTruetag / numWords) opt_parser = argparse.ArgumentParser(description="CoNLL-U validation script") io_group = opt_parser.add_argument_group("Input / output options") opt_parser.add_argument( 'input', nargs='?', help='Input file name, or "-" or nothing for standard input.') opt_parser.add_argument( 'output', nargs='?', help='Output file name, or "-" or nothing for standard output.') args = opt_parser.parse_args() #Parsed command-line arguments inp, out = file_util.in_out(args) trees = file_util.trees(inp) trees = list(trees) makeCorpus(trees) makeModel(100) # getAccuracyBestTag(25) # getAccuracyBestSeqTag(25) print( "Tag kalimat: Ahli rekayasa optik mendesain komponen dari instrumen optik seperti lensa." ) print( "Tag menggunakan best tag: ", tagSentence(( " Ahli rekayasa optik mendesain komponen dari instrumen optik seperti lensa" ).split()))
if not cat==u"CPOSTAG" and ((u"UD" in which and cat in ud_cats) or (u"langspec" in which and cat not in ud_cats)): print >> out, cat_is_val if __name__=="__main__": opt_parser = argparse.ArgumentParser(description='Script for basic stats generation. Assumes a validated input.') opt_parser.add_argument('input', nargs='+', help='Input file name (can be several files), or "-" or nothing for standard input.') opt_parser.add_argument('--stats',action='store_true',default=False, help='Print basic stats') opt_parser.add_argument('--jsonstats',action='store_true',default=False, help='Print basic stats as json dictionary') opt_parser.add_argument('--deprels',default=None,help='Print deprels. The option can be "UD", "langspec", or "UD+langspec".') opt_parser.add_argument('--catvals',default=None,help='Print category=value pairs. The option can be "UD", "langspec", or "UD+langspec". This distinction is based on the feature, not the value.') opt_parser.add_argument('--sort',default='freq',help='Sort the values by their frequency (freq) or alphabetically (alph). Default: %(default)s.') args = opt_parser.parse_args() #Parsed command-line arguments args.output="-" inp,out=file_util.in_out(args,multiple_files=True) trees=file_util.trees(inp) stats=Stats() try: for comments,tree in trees: stats.tree_count+=1 for cols in tree: stats.count_cols(cols) except: traceback.print_exc() print >> sys.stderr, "\n\n ------- STATS MAY BE EMPTY OR INCOMPLETE ----------" pass if args.stats: stats.print_basic_stats(out) if args.jsonstats: