def pgc_zip(zip_filename, pgc_filenames): zip_arch = zipfile.ZipFile(zip_filename, "w") arch_dir = os.path.splitext( os.path.basename(zip_filename))[0] for corpus_filename in multiglob(pgc_filenames): # add corpus to archive arch_filename = os.path.join( arch_dir, os.path.basename(corpus_filename) ) zip_arch.write(corpus_filename, arch_filename) corpus = ParallelGraphCorpus(inf=corpus_filename, graph_loading=LOAD_NONE) for gb in corpus._graphbanks(): gb_filename = gb.get_file_path() # add graphbank files to archive arch_filename = os.path.join( arch_dir, os.path.basename(gb_filename) ) zip_arch.write(gb_filename, arch_filename) zip_arch.close()
def pgc_zip(zip_filename, pgc_filenames): zip_arch = zipfile.ZipFile(zip_filename, "w") arch_dir = os.path.splitext(os.path.basename(zip_filename))[0] for corpus_filename in multiglob(pgc_filenames): # add corpus to archive arch_filename = os.path.join(arch_dir, os.path.basename(corpus_filename)) zip_arch.write(corpus_filename, arch_filename) corpus = ParallelGraphCorpus(inf=corpus_filename, graph_loading=LOAD_NONE) for gb in corpus._graphbanks(): gb_filename = gb.get_file_path() # add graphbank files to archive arch_filename = os.path.join(arch_dir, os.path.basename(gb_filename)) zip_arch.write(gb_filename, arch_filename) zip_arch.close()
parser.add_argument( "-f", "--format", action="store_true", help="output indented XML" ) parser.add_argument( "-V", "--verbose", action="store_true", help="verbose ouput to stderr" ) args = parser.parse_args() pgc_fns = multiglob(args.file) def log(s): if args.verbose: print >>sys.stderr, "***", s log("Reading corpus from " + pgc_fns[0]) corpus = ParallelGraphCorpus(inf=pgc_fns[0]) for fn in pgc_fns[1:]: log("Joining corpus from " + fn) # __iadd__ also checks if another corpus is compatible w.r.t. relations # and meta-data corpus += ParallelGraphCorpus(inf=fn)
"--config", metavar="FILE", help="configuration file to set up a corpus aligner") parser.add_argument("-x", "--clear", action="store_true", help="remove all existing alignments") parser.add_argument("-i", "--in-place", action="store_true", help="modify input file(s)") args = parser.parse_args() if args.config: config = imp.load_source("config", args.config) corpus_aligner = set_up_corpus_aligner(config) else: from daeso_nl.ga.corpus import CorpusAligner corpus_aligner = CorpusAligner() for inf in multiglob(args.pgc_files): corpus = ParallelGraphCorpus(inf=inf) corpus_aligner.align(corpus, clear=args.clear) if args.in_place: corpus.write(outf=inf, pprint=True) else: corpus.write(pprint=True)
parser.add_argument( "-x", "--clear", action="store_true", help="remove all existing alignments" ) parser.add_argument( "-i", "--in-place", action="store_true", help="modify input file(s)" ) args = parser.parse_args() if args.config: config = imp.load_source("config", args.config) corpus_aligner = set_up_corpus_aligner(config) else: from daeso_nl.ga.corpus import CorpusAligner corpus_aligner = CorpusAligner() for inf in multiglob(args.pgc_files): corpus = ParallelGraphCorpus(inf=inf) corpus_aligner.align(corpus, clear=args.clear) if args.in_place: corpus.write(outf=inf, pprint=True) else: corpus.write(pprint=True)
help="parallel graph corpus filename, " "or quoted file name pattern for parallel graph corpora") parser.add_argument("-f", "--format", action="store_true", help="output indented XML") parser.add_argument("-V", "--verbose", action="store_true", help="verbose ouput to stderr") args = parser.parse_args() pgc_fns = multiglob(args.file) def log(s): if args.verbose: print >> sys.stderr, "***", s log("Reading corpus from " + pgc_fns[0]) corpus = ParallelGraphCorpus(inf=pgc_fns[0]) for fn in pgc_fns[1:]: log("Joining corpus from " + fn) # __iadd__ also checks if another corpus is compatible w.r.t. relations # and meta-data