from xml.etree.cElementTree import ElementTree, tostring from daeso.utils.cli import DaesoArgParser from daeso.utils.etree import write # TODO: # - this is not too smart yet - you can do this with sed, # but escaping all the slashes is a pain # - windows support parser = DaesoArgParser(description=__doc__) parser.add_argument("corpus", nargs="+", help="parallel graph corpus") parser.add_argument( "-p", "--path-prefix-pair", nargs=2, default=["", ""], metavar="DIR", help="a pair of path prefixes specifying what to change from and to", ) parser.add_argument( "-t", "--test", action="store_true", help="perform a dry run without changing anything for real (implies -V)" )
corresponding graphs in the "from" and "to" graphbanks respectively. The graphbanks are assumed to be in GraphML format, unless specified otherwise by means of the --source-graphbank-format and --target-graphbank-format option. The default set of alignment relations for the parallel graph corpus is the Daeso set, but you can change it using the --relations option. """ + epilog parser = DaesoArgParser(description=__doc__, epilog=epilog) parser.add_argument("-p", "--parallel-text-corpora", metavar="CORPUS", nargs="+", default=(), help='parallel text corpora') parser.add_argument("-s", "--source-graphbanks", metavar="GRAPHBANK", nargs="+", default=(), help='source graphbanks') parser.add_argument("-t", "--target-graphbanks", metavar="GRAPHBANK", nargs="+", default=(),
Automatically takes care of the internal references to graph bank files. Usage is similar to the "cp" shell command. """ __authors__ = 'Erwin Marsi <*****@*****.**>' from os.path import isdir, basename, join, samefile, exists from sys import stderr from daeso.utils.cli import DaesoArgParser from daeso.pgc.corpus import ParallelGraphCorpus, LOAD_NONE parser = DaesoArgParser(description=__doc__) parser.add_argument("source", nargs="+", help="source parallel graph corpus file") parser.add_argument("target", help="either a target parallel graph corpus file or " "a target directory") parser.add_argument("-o", "--overwrite", action="store_true", help="overwrite existing file") args = parser.parse_args() if isdir(args.target): target_is_dir = True
The graphbanks are assumed to be in GraphML format, unless specified otherwise by means of the --source-graphbank-format and --target-graphbank-format option. The default set of alignment relations for the parallel graph corpus is the Daeso set, but you can change it using the --relations option. """ + epilog parser = DaesoArgParser(description=__doc__, epilog=epilog) parser.add_argument( "-p", "--parallel-text-corpora", metavar="CORPUS", nargs="+", default=(), help='parallel text corpora') parser.add_argument( "-s", "--source-graphbanks", metavar="GRAPHBANK", nargs="+", default=(), help='source graphbanks') parser.add_argument( "-t", "--target-graphbanks", metavar="GRAPHBANK", nargs="+", default=(),
import sys from daeso.utils.cli import DaesoArgParser from daeso.utils.opsys import multiglob from daeso.pgc.corpus import ParallelGraphCorpus parser = DaesoArgParser(description=__doc__) parser.add_argument( "file", nargs="+", metavar="FILE", help="parallel graph corpus filename, " "or quoted file name pattern for parallel graph corpora" ) parser.add_argument( "-f", "--format", action="store_true", help="output indented XML" ) parser.add_argument( "-V", "--verbose", action="store_true", help="verbose ouput to stderr" )
import imp from daeso.utils.cli import DaesoArgParser from daeso.utils.opsys import multiglob from daeso.pgc.corpus import ParallelGraphCorpus from daeso_nl.ga.setup import set_up_corpus_aligner parser = DaesoArgParser(description=__doc__, version=__version__) parser.add_argument( "pgc_files", nargs="+", metavar="FILE", help="parallel graph corpus file" ) parser.add_argument( "-c", "--config", metavar="FILE", help="configuration file to set up a corpus aligner") parser.add_argument( "-x", "--clear", action="store_true", help="remove all existing alignments" ) parser.add_argument(
def expand_globs(corpus_dir, globs): files = [] for pattern in globs: files.extend(relglob(corpus_dir, pattern)) return files parser = DaesoArgParser(description=__doc__, version=__version__) parser.add_argument( "pgc_glob", nargs="+", help=("glob (i.e. filename pattern) for parallel graph corpora, " "interpreted relative to the corpus base directory " "(cf. --corpus_dir)")) parser.add_argument( "-c", "--corpus-dir", default=corpus_dir, help="pgc filenames are interpreted relative to this base directory " "(default is '" + corpus_dir + "')") parser.add_argument( "-d", "--dev-bins", type=int, default=10,
__author__ = 'Erwin Marsi <*****@*****.**>' __version__ = "0.9" from sys import exit from daeso.utils.cli import DaesoArgParser from daeso_nl.alpino.server import start_server, DEFAULT_HOST, DEFAULT_PORT parser = DaesoArgParser(description=__doc__, version="%(prog)s version " + __version__) parser.add_argument("-H", "--host", default="%s:%d" % (DEFAULT_HOST, DEFAULT_PORT), metavar="HOST[:PORT]", help="name or IP address of host (default is '%s') " "optionally followed by a port number " "(default is %d)" % (DEFAULT_HOST, DEFAULT_PORT)) parser.add_argument("-c", "--command", help="command line to start Alpino parser") parser.add_argument("-o", "--out_dir", help="directory for writing temporary files") parser.add_argument('-l', '--log', action='store_true', help="log requests") parser.add_argument('-V', '--verbose', action='store_true',
reports difference in text alignments between two parallel text corpora """ from daeso.utils.cli import DaesoArgParser from daeso.ptc.diff import print_diff __authors__ = 'Erwin Marsi <*****@*****.**>' parser = DaesoArgParser(description=__doc__) parser.add_argument( "true_corpus", help="parallel text corpus containing true alignments") parser.add_argument( "pred_corpus", help="parallel text corpus containing predicted alignments") parser.add_argument( "-t", "--tag", default="s", help='only consider alignments involving this tag (defaults is "s"') parser.add_argument( "-e", "--encoding", default="utf-8",
# but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. __author__ = "Erwin Marsi <*****@*****.**>" from daeso.utils.cli import DaesoArgParser from graeph.release import version, description parser = DaesoArgParser(description=description.strip(), version=version) parser.add_argument("corpus_file", metavar="FILE", nargs="?", help="parallel graph corpus file") parser.add_argument("-d", "--dot_exec", metavar="FILE", help='"dot" graph drawing program') parser.add_argument( "-r", "--redirect", action='store_true', help="redirect output written to stdout and stderr streams " "to a pop-up window") args = parser.parse_args()
# along with this program. If not, see <http://www.gnu.org/licenses/>. __version__ = "1.0" __author__ = "Erwin Marsi" from htxt.gui import Hitaext from daeso.utils.cli import DaesoArgParser description = """ Hitaext: hierarchical text aligment tool """ parser = DaesoArgParser(description=description.strip(), version=__version__) parser.add_argument("corpus_file", metavar="FILE", nargs="?", help="parallel text corpus file") parser.add_argument( "-r", "--redirect", action='store_true', help="redirect output written to stdout and stderr streams " "to a pop-up window") args = parser.parse_args() app = Hitaext(cl_args=args, redirect=args.redirect) app.MainLoop()
from glob import glob from sys import stderr from xml.etree.cElementTree import ElementTree, tostring from daeso.utils.cli import DaesoArgParser from daeso.utils.etree import write # TODO: # - this is not too smart yet - you can do this with sed, # but escaping all the slashes is a pain # - windows support parser = DaesoArgParser(description=__doc__) parser.add_argument("corpus", nargs="+", help="parallel graph corpus") parser.add_argument( "-p", "--path-prefix-pair", nargs=2, default=["", ""], metavar="DIR", help="a pair of path prefixes specifying what to change from and to") parser.add_argument( "-t", "--test", action="store_true", help="perform a dry run without changing anything for real (implies -V)")
import os import sys from daeso.utils.cli import DaesoArgParser from daeso.pgc.corpus import ParallelGraphCorpus def log(s): if args.verbose: print >>sys.stderr, "***", s parser = DaesoArgParser(description=__doc__) parser.add_argument( "filename", metavar="FILE", help="parallel graph corpus" ) parser.add_argument( "-f", "--format", action="store_true", help="output indented XML" ) parser.add_argument( "-p", "--parts", default=2, type=int, metavar="N", help="number of parts"
__version__ = "1.0" __author__ = "Erwin Marsi" from htxt.gui import Hitaext from daeso.utils.cli import DaesoArgParser description = """ Hitaext: hierarchical text aligment tool """ parser = DaesoArgParser(description=description.strip(), version=__version__) parser.add_argument( "corpus_file", metavar="FILE", nargs="?", help="parallel text corpus file") parser.add_argument( "-r", "--redirect", action='store_true', help="redirect output written to stdout and stderr streams " "to a pop-up window") args = parser.parse_args() app = Hitaext(cl_args=args, redirect=args.redirect) app.MainLoop()
# You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. """ dump instance base Converts a Timbl instance file to a Timbl instance base file given a graph aligner configuration. Timbl options and filenames are taken from the configuration file (cf. the "timbl_inst_fname" and "timbl_ib_fname" attributes). """ __authors__ = 'Erwin Marsi <*****@*****.**>' __version__ = "0.9" import imp from daeso.utils.cli import DaesoArgParser from daeso_nl.ga.setup import dump_inst_base parser = DaesoArgParser(description=__doc__, version=__version__) parser.add_argument( "config", metavar="FILE", help="configuration file to set up a corpus aligner") args = parser.parse_args() config = imp.load_source("config", args.config) dump_inst_base(config)
from glob import glob from daeso.utils.cli import DaesoArgParser, epilog from daeso.gb.gbstats import gb_stats epilog = """ Remarks: * Many columns will have zero values, because a parallel graph corpus is required to get alignment information; see pgc_stats.py * Failed parses will only be excluded for graph banks in 'alpino' format. """ + epilog parser = DaesoArgParser(description=__doc__, epilog=epilog) parser.add_argument("pattern", help="*quoted* pattern for graph bank files") parser.add_argument("-F", "--format", metavar="STRING", default="alpino", dest="format", help="treebank format (defaults to 'alpino')") parser.add_argument("-a", "--with-all", action="store_true", dest="with_all", help="include all, sets options -efp") parser.add_argument("-e",
import sys import socket from xmlrpclib import ServerProxy, Fault from daeso.utils.cli import DaesoArgParser from daeso_nl.alpino.server import DEFAULT_HOST, DEFAULT_PORT parser = DaesoArgParser(description=__doc__, version=__version__) parser.add_argument( "-H", "--host", default="%s:%d" % (DEFAULT_HOST, DEFAULT_PORT), metavar="HOST[:PORT]", help="name or IP address of host (default is '%s') " "optionally followed by a port number " "(default is %d)" % (DEFAULT_HOST, DEFAULT_PORT)) parser.add_argument( "-i", "--input-encoding", default="utf8", metavar="utf8|latin1|ascii|...", help="character encoding of input (default is utf8)") parser.add_argument( "-o", "--output-encoding", default="utf8", metavar="utf8|latin1|ascii|...", help="character encoding of output (default is utf8)")
__authors__ = 'Erwin Marsi <*****@*****.**>' from os.path import isdir, basename, join, samefile, exists from sys import stderr from daeso.utils.cli import DaesoArgParser from daeso.pgc.corpus import ParallelGraphCorpus, LOAD_NONE parser = DaesoArgParser(description=__doc__) parser.add_argument( "source", nargs="+", help="source parallel graph corpus file" ) parser.add_argument( "target", help="either a target parallel graph corpus file or " "a target directory" ) parser.add_argument( "-o", "--overwrite", action="store_true", help="overwrite existing file") args = parser.parse_args()
zip_arch.write(corpus_filename, arch_filename) corpus = ParallelGraphCorpus(inf=corpus_filename, graph_loading=LOAD_NONE) for gb in corpus._graphbanks(): gb_filename = gb.get_file_path() # add graphbank files to archive arch_filename = os.path.join(arch_dir, os.path.basename(gb_filename)) zip_arch.write(gb_filename, arch_filename) zip_arch.close() parser = DaesoArgParser(description=__doc__, epilog=epilog) parser.add_argument("zip_file", metavar="ZIP_FILE", help="filename of zip archive") parser.add_argument("pgc_files", nargs="+", metavar="CORPUS_FILE", help="parallel graph corpus filename, " "or quoted file name pattern for parallel graph corpora") args = parser.parse_args() pgc_zip(args.zip_file, args.pgc_files)
from daeso.utils.cli import DaesoArgParser, epilog from daeso.pgc.pgcstats import pgc_stats epilog = """ Examples: $ pgc_stats.py -efpu "*.pgc" Remarks: * Failed parses will only be exluded for graph banks in 'alpino' format. """ + epilog parser = DaesoArgParser(description=__doc__, epilog=epilog) parser.add_argument("pattern", help="*quoted* pattern for parallel graph corpus files") parser.add_argument("-a", "--with-all", action="store_true", dest="with_all", help="include all, sets options -efpru") #parser.add_argument("-c", "--csv", action="store_true", #dest="csv", #help="output in comma separated values") parser.add_argument("-e", "--with-empty-nodes", action="store_true", dest="with_empty_nodes",
import os import sys from daeso.utils.cli import DaesoArgParser from daeso.pgc.corpus import ParallelGraphCorpus def log(s): if args.verbose: print >> sys.stderr, "***", s parser = DaesoArgParser(description=__doc__) parser.add_argument("filename", metavar="FILE", help="parallel graph corpus") parser.add_argument("-f", "--format", action="store_true", help="output indented XML") parser.add_argument("-p", "--parts", default=2, type=int, metavar="N", help="number of parts") parser.add_argument("-s", "--size",
__author__ = 'Erwin Marsi <*****@*****.**>' __version__ = "0.9" from sys import exit from daeso.utils.cli import DaesoArgParser from daeso_nl.alpino.server import start_server, DEFAULT_HOST, DEFAULT_PORT parser = DaesoArgParser(description=__doc__, version="%(prog)s version " + __version__) parser.add_argument("-H", "--host", default="%s:%d" % (DEFAULT_HOST, DEFAULT_PORT), metavar="HOST[:PORT]", help="name or IP address of host (default is '%s') " "optionally followed by a port number " "(default is %d)" % (DEFAULT_HOST, DEFAULT_PORT)) parser.add_argument("-c", "--command", help="command line to start Alpino parser") parser.add_argument("-o", "--out_dir", help="directory for writing temporary files") parser.add_argument('-l', '--log', action='store_true', help="log requests") parser.add_argument('-V',
for gb in corpus._graphbanks(): gb_filename = gb.get_file_path() # add graphbank files to archive arch_filename = os.path.join( arch_dir, os.path.basename(gb_filename) ) zip_arch.write(gb_filename, arch_filename) zip_arch.close() parser = DaesoArgParser(description=__doc__, epilog=epilog) parser.add_argument( "zip_file", metavar="ZIP_FILE", help="filename of zip archive") parser.add_argument( "pgc_files", nargs="+", metavar="CORPUS_FILE", help="parallel graph corpus filename, " "or quoted file name pattern for parallel graph corpora") args = parser.parse_args() pgc_zip(args.zip_file, args.pgc_files)
""" evaluation of text alignment in parallel text corpora reports precision, recall and F-score on alignment for a certain tag for one or more pairs of true and predicted parallel text corpora """ from daeso.utils.cli import DaesoArgParser from daeso.ptc.evaluate import eval_alignment __authors__ = "Erwin Marsi <*****@*****.**>" parser = DaesoArgParser(description=__doc__) parser.add_argument("-t", "--true_corpora", nargs="+", help="parallel text corpus containing true alignments") parser.add_argument("-p", "--pred_corpora", nargs="+", help="parallel text corpus containing predicted alignments") parser.add_argument("--tag", default="s", help='only consider alignments involving this tag (defaults is "s"') args = parser.parse_args() assert len(args.true_corpora) == len(args.pred_corpora) eval_alignment(zip(args.true_corpora, args.pred_corpora), args.tag)
# You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. """ parallel text corpus diff reports difference in text alignments between two parallel text corpora """ from daeso.utils.cli import DaesoArgParser from daeso.ptc.diff import print_diff __authors__ = 'Erwin Marsi <*****@*****.**>' parser = DaesoArgParser(description=__doc__) parser.add_argument("true_corpus", help="parallel text corpus containing true alignments") parser.add_argument( "pred_corpus", help="parallel text corpus containing predicted alignments") parser.add_argument( "-t", "--tag", default="s", help='only consider alignments involving this tag (defaults is "s"') parser.add_argument("-e", "--encoding", default="utf-8", help='character encoding of output (default is "utf-8")')
# GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. """ dump instance base Converts a Timbl instance file to a Timbl instance base file given a graph aligner configuration. Timbl options and filenames are taken from the configuration file (cf. the "timbl_inst_fname" and "timbl_ib_fname" attributes). """ __authors__ = 'Erwin Marsi <*****@*****.**>' __version__ = "0.9" import imp from daeso.utils.cli import DaesoArgParser from daeso_nl.ga.setup import dump_inst_base parser = DaesoArgParser(description=__doc__, version=__version__) parser.add_argument("config", metavar="FILE", help="configuration file to set up a corpus aligner") args = parser.parse_args() config = imp.load_source("config", args.config) dump_inst_base(config)
__version__ = "0.9" import imp import sys from daeso.utils.cli import DaesoArgParser from daeso_nl.ga.setup import set_up_align_server from daeso_nl.ga.server import (start_server, DEFAULT_HOST, DEFAULT_PORT) #------------------------------------------------------------------------------- parser = DaesoArgParser(description=__doc__, version=__version__) parser.add_argument("config", metavar="FILE", help="configuration file to set up a graph align server") parser.add_argument( "-H", "--host", default="%s:%d" % (DEFAULT_HOST, DEFAULT_PORT), metavar="HOST[:PORT]", help="name or IP address of host (default is '%s') " % DEFAULT_HOST + "optionally followed by a port number (default is %d)" % DEFAULT_PORT) parser.add_argument("-l", "--log", action="store_true", help="log requests") args = parser.parse_args() try:
banks.source.get_file_path(), banks.target.get_file_path(), graphs.source.id, graphs.target.id, nodes.source, nodes.target ] + columns print delimiter.join(columns).encode("utf-8") parser = DaesoArgParser(description=__doc__.strip()) parser.add_argument( "corpus", nargs="+", metavar="FILE", help="parallel graph corpus file" ) parser.add_argument( "-d", "--delimiter", default="\t", help="column delimiter string (default is tab character '\\t')" ) parser.add_argument( "-V", "--verbose", action="store_true", help="print graph pair number, from and to bank id's, from and to graph id's, " "and from and to node id's" )
from daeso.gb.gbstats import gb_stats epilog = """ Remarks: * Many columns will have zero values, because a parallel graph corpus is required to get alignment information; see pgc_stats.py * Failed parses will only be excluded for graph banks in 'alpino' format. """ + epilog parser = DaesoArgParser(description=__doc__, epilog=epilog) parser.add_argument( "pattern", help="*quoted* pattern for graph bank files") parser.add_argument( "-F", "--format", metavar="STRING", default="alpino", dest="format", help="treebank format (defaults to 'alpino')") parser.add_argument( "-a", "--with-all", action="store_true", dest="with_all", help="include all, sets options -efp")
if verbose: banks = graph_pair.get_banks() columns = [ banks.source.get_file_path(), banks.target.get_file_path(), graphs.source.id, graphs.target.id, nodes.source, nodes.target ] + columns print delimiter.join(columns).encode("utf-8") parser = DaesoArgParser(description=__doc__.strip()) parser.add_argument("corpus", nargs="+", metavar="FILE", help="parallel graph corpus file") parser.add_argument( "-d", "--delimiter", default="\t", help="column delimiter string (default is tab character '\\t')") parser.add_argument( "-V", "--verbose", action="store_true", help= "print graph pair number, from and to bank id's, from and to graph id's, " "and from and to node id's")
# You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. __author__ = "Erwin Marsi <*****@*****.**>" from daeso.utils.cli import DaesoArgParser from graeph.release import version, description parser = DaesoArgParser(description=description.strip(), version=version) parser.add_argument( "corpus_file", metavar="FILE", nargs="?", help="parallel graph corpus file") parser.add_argument( "-d", "--dot_exec", metavar="FILE", help='"dot" graph drawing program') parser.add_argument( "-r", "--redirect", action='store_true', help="redirect output written to stdout and stderr streams " "to a pop-up window") args = parser.parse_args()
__authors__ = 'Erwin Marsi <*****@*****.**>' __version__ = "0.9" import imp from daeso.utils.cli import DaesoArgParser from daeso.utils.opsys import multiglob from daeso.pgc.corpus import ParallelGraphCorpus from daeso_nl.ga.setup import set_up_corpus_aligner parser = DaesoArgParser(description=__doc__, version=__version__) parser.add_argument("pgc_files", nargs="+", metavar="FILE", help="parallel graph corpus file") parser.add_argument("-c", "--config", metavar="FILE", help="configuration file to set up a corpus aligner") parser.add_argument("-x", "--clear", action="store_true", help="remove all existing alignments") parser.add_argument("-i", "--in-place", action="store_true",
""" from daeso.utils.cli import DaesoArgParser from daeso.pgc.corpus import ParallelGraphCorpus from daeso.pgc.diff import pgc_diff __authors__ = 'Erwin Marsi <*****@*****.**>' parser = DaesoArgParser(description=__doc__) parser.add_argument( "corpus1", help="first parallel graph corpus") parser.add_argument( "corpus2", help="second parallel graph corpus") parser.add_argument( "-1", "--first_annotator", metavar="NAME", default="First annotator", help="name of annotator of first corpus") parser.add_argument( "-2", "--second_annotator",
def expand_globs(corpus_dir, globs): files = [] for pattern in globs: files.extend(relglob(corpus_dir, pattern)) return files parser = DaesoArgParser(description=__doc__, version=__version__) parser.add_argument( "pgc_glob", nargs="+", help=( "glob (i.e. filename pattern) for parallel graph corpora, " "interpreted relative to the corpus base directory " "(cf. --corpus_dir)")) parser.add_argument( "-c", "--corpus-dir", default=corpus_dir, help="pgc filenames are interpreted relative to this base directory " "(default is '" + corpus_dir + "')") parser.add_argument( "-d", "--dev-bins", type=int, default=10, help="number of bins used for development data (default is 10)")
# - check for at least two input files __authors__ = 'Erwin Marsi <*****@*****.**>' from daeso.utils.cli import DaesoArgParser from daeso.pgc.agreement import run_eval parser = DaesoArgParser(description=__doc__) parser.add_argument( "corpus_fns", nargs="+", metavar="corpus", help="parallel graph corpus file (at least two are required)") parser.add_argument( "-a", "--annotator", dest="annotators", metavar="CC", action="append", help="initials of the annotator of a parallel graph corpus files " "(default is 'A1', 'A2', etc.) Repeat this option as many times as " "there are corpus files") parser.add_argument( "-p", "--pickle", dest="pickle_fn",
__version__ = '0.9' import sys import socket from xmlrpclib import ServerProxy, Fault from daeso.utils.cli import DaesoArgParser from daeso_nl.alpino.server import DEFAULT_HOST, DEFAULT_PORT from daeso_nl.alpino.client import alpino_client parser = DaesoArgParser(description=__doc__, version=__version__) parser.add_argument("-H", "--host", default="%s:%d" % (DEFAULT_HOST, DEFAULT_PORT), metavar="HOST[:PORT]", help="name or IP address of host (default is '%s') " "optionally followed by a port number " "(default is %d)" % (DEFAULT_HOST, DEFAULT_PORT)) parser.add_argument("-i", "--input-encoding", default="utf8", metavar="utf8|latin1|ascii|...", help="character encoding of input (default is utf8)") parser.add_argument("-o", "--output-encoding", default="utf8", metavar="utf8|latin1|ascii|...", help="character encoding of output (default is utf8)")
import sys from daeso.utils.cli import DaesoArgParser from daeso_nl.ga.setup import set_up_align_server from daeso_nl.ga.server import ( start_server, DEFAULT_HOST, DEFAULT_PORT ) #------------------------------------------------------------------------------- parser = DaesoArgParser(description=__doc__, version=__version__) parser.add_argument( "config", metavar="FILE", help="configuration file to set up a graph align server") parser.add_argument( "-H", "--host", default="%s:%d" % (DEFAULT_HOST, DEFAULT_PORT), metavar="HOST[:PORT]", help="name or IP address of host (default is '%s') " % DEFAULT_HOST + "optionally followed by a port number (default is %d)" % DEFAULT_PORT) parser.add_argument( "-l", "--log", action="store_true", help="log requests") args = parser.parse_args()
use pgc_diff.py """ # TODO: # - check for at least two input files __authors__ = 'Erwin Marsi <*****@*****.**>' from daeso.utils.cli import DaesoArgParser from daeso.pgc.agreement import run_eval parser = DaesoArgParser(description=__doc__) parser.add_argument( "corpus_fns", nargs="+", metavar="corpus", help="parallel graph corpus file (at least two are required)") parser.add_argument( "-a", "--annotator", dest="annotators", metavar="CC", action="append", help="initials of the annotator of a parallel graph corpus files " "(default is 'A1', 'A2', etc.) Repeat this option as many times as " "there are corpus files") parser.add_argument("-p", "--pickle",
epilog = """ Examples: $ pgc_stats.py -efpu "*.pgc" Remarks: * Failed parses will only be exluded for graph banks in 'alpino' format. """ + epilog parser = DaesoArgParser(description=__doc__, epilog=epilog) parser.add_argument( "pattern", help="*quoted* pattern for parallel graph corpus files") parser.add_argument( "-a", "--with-all", action="store_true", dest="with_all", help="include all, sets options -efpru") #parser.add_argument("-c", "--csv", action="store_true", #dest="csv", #help="output in comma separated values") parser.add_argument( "-e", "--with-empty-nodes", action="store_true",
for one or more pairs of true and predicted parallel text corpora """ from daeso.utils.cli import DaesoArgParser from daeso.ptc.evaluate import eval_alignment __authors__ = 'Erwin Marsi <*****@*****.**>' parser = DaesoArgParser(description=__doc__) parser.add_argument( "-t", "--true_corpora", nargs = "+", help="parallel text corpus containing true alignments") parser.add_argument( "-p", "--pred_corpora", nargs = "+", help="parallel text corpus containing predicted alignments") parser.add_argument( "--tag", default="s", help='only consider alignments involving this tag (defaults is "s"') args = parser.parse_args()
# TODO: # - silence warning about meta-data __authors__ = 'Erwin Marsi <*****@*****.**>' import sys from daeso.utils.cli import DaesoArgParser from daeso.utils.opsys import multiglob from daeso.pgc.corpus import ParallelGraphCorpus parser = DaesoArgParser(description=__doc__) parser.add_argument("file", nargs="+", metavar="FILE", help="parallel graph corpus filename, " "or quoted file name pattern for parallel graph corpora") parser.add_argument("-f", "--format", action="store_true", help="output indented XML") parser.add_argument("-V", "--verbose", action="store_true", help="verbose ouput to stderr") args = parser.parse_args()