def main(args): """ Subcommand main. You shouldn't need to call this yourself if you're using `config_argparser` """ output_dir = get_output_dir(args, default_overwrite=True) start = args.span.char_start end = args.span.char_end src_corpus = read_source_corpus(args) tgt_corpus = read_target_corpus(args) renames = compute_renames(tgt_corpus, src_corpus) for src_k, src_doc in src_corpus.items(): # retrieve target subdoc tgt_k = copy.copy(src_k) tgt_k.subdoc = args.target print(src_k, tgt_k, file=sys.stderr) if tgt_k not in tgt_corpus: raise ValueError("Uh-oh! we don't have %s in the corpus" % tgt_k) tgt_doc = tgt_corpus[tgt_k] # move portion from source to target subdoc if start == 0: # move up new_src_doc, new_tgt_doc = move_portion( renames, src_doc, tgt_doc, end, # src_split tgt_split=-1) elif end == len(src_doc.text()): # src_doc.text_span().char_end: # move down # move_portion inserts src_doc[0:src_split] between # tgt_doc[0:tgt_split] and tgt_doc[tgt_split:], # so we detach src_doc[start:] into a temporary doc, # then call move_portion on this temporary doc new_src_doc, src_doc2 = split_doc(src_doc, start) _, new_tgt_doc = move_portion( renames, src_doc2, tgt_doc, -1, # src_split tgt_split=0) # the whitespace between new_src_doc and src_doc2 went to # src_doc2, so we need to append a new whitespace to new_src_doc evil_set_text(new_src_doc, new_src_doc.text() + ' ') else: raise ValueError("Sorry, can only move to the start or to the " "end of a document at the moment") # print diff for suggested commit message diffs = ["======= TO %s ========" % tgt_k, show_diff(tgt_doc, new_tgt_doc), "^------ FROM %s" % src_k, show_diff(src_doc, new_src_doc), ""] print("\n".join(diffs), file=sys.stderr) # dump the modified documents save_document(output_dir, src_k, new_src_doc) save_document(output_dir, tgt_k, new_tgt_doc) announce_output_dir(output_dir)
def main(args): """ Subcommand main. You shouldn't need to call this yourself if you're using `config_argparser` """ output_dir = get_output_dir(args, default_overwrite=True) start = args.span.char_start end = args.span.char_end src_corpus = read_source_corpus(args) tgt_corpus = read_target_corpus(args) renames = compute_renames(tgt_corpus, src_corpus) for src_k in src_corpus: tgt_k = copy.copy(src_k) tgt_k.subdoc = args.target print(src_k, tgt_k, file=sys.stderr) if tgt_k not in tgt_corpus: sys.exit("Uh-oh! we don't have %s in the corpus" % tgt_k) else: src_doc = src_corpus[src_k] tgt_doc = tgt_corpus[tgt_k] if start == 0: new_src_doc, new_tgt_doc =\ move_portion(renames, src_doc, tgt_doc, src_split=end, tgt_split=-1) elif end == src_doc.text_span().char_end: new_src_doc, src_doc2 = split_doc(src_doc, start) _, new_tgt_doc =\ move_portion(renames, src_doc2, tgt_doc, src_split=-1, tgt_split=0) else: sys.exit("Sorry, can only move to the start or to the " "end of a document at the moment") diffs = ["======= TO %s ========" % tgt_k, show_diff(tgt_doc, new_tgt_doc), "^------ FROM %s" % src_k, show_diff(src_doc, new_src_doc), ""] print("\n".join(diffs), file=sys.stderr) save_document(output_dir, src_k, new_src_doc) save_document(output_dir, tgt_k, new_tgt_doc) announce_output_dir(output_dir)
def main(args): """ Subcommand main. You shouldn't need to call this yourself if you're using `config_argparser` """ output_dir = get_output_dir(args, default_overwrite=True) src_reader = educe.stac.LiveInputReader(args.insert) src_corpus = src_reader.slurp(src_reader.files()) if not src_corpus: sys.exit("Insert dir must have exactly one .aa/.ac pair (none found)") elif len(src_corpus) > 1: sys.exit("Insert dir must have exactly one .aa/.ac pair (%d found)" % len(src_corpus)) src_doc = src_corpus.values()[0] reader = educe.stac.Reader(args.corpus) tgt_files = reader.filter(reader.files(), is_requested(args)) tgt_corpus = reader.slurp(tgt_files) renames = compute_renames(tgt_corpus, src_corpus) for tgt_k in tgt_corpus: tgt_doc = tgt_corpus[tgt_k] _, new_tgt_doc = move_portion(renames, src_doc, tgt_doc, -1, tgt_split=args.start) diffs = [ "======= INSERT IN %s ========" % tgt_k, show_diff(tgt_doc, new_tgt_doc) ] print("\n".join(diffs).encode('utf-8'), file=sys.stderr) save_document(output_dir, tgt_k, new_tgt_doc) announce_output_dir(output_dir)
def main(args): """ Subcommand main. You shouldn't need to call this yourself if you're using `config_argparser` """ output_dir = get_output_dir(args, default_overwrite=True) src_reader = educe.stac.LiveInputReader(args.insert) src_corpus = src_reader.slurp(src_reader.files()) if not src_corpus: sys.exit("Insert dir must have exactly one .aa/.ac pair (none found)") elif len(src_corpus) > 1: sys.exit("Insert dir must have exactly one .aa/.ac pair (%d found)" % len(src_corpus)) src_doc = src_corpus.values()[0] reader = educe.stac.Reader(args.corpus) tgt_files = reader.filter(reader.files(), is_requested(args)) tgt_corpus = reader.slurp(tgt_files) renames = compute_renames(tgt_corpus, src_corpus) for tgt_k in tgt_corpus: tgt_doc = tgt_corpus[tgt_k] _, new_tgt_doc = move_portion(renames, src_doc, tgt_doc, -1, tgt_split=args.start) diffs = ["======= INSERT IN %s ========" % tgt_k, show_diff(tgt_doc, new_tgt_doc)] print("\n".join(diffs).encode('utf-8'), file=sys.stderr) save_document(output_dir, tgt_k, new_tgt_doc) announce_output_dir(output_dir)
def main(args): """ Subcommand main. You shouldn't need to call this yourself if you're using `config_argparser` """ output_dir = get_output_dir(args, default_overwrite=True) start = args.span.char_start end = args.span.char_end src_corpus = read_source_corpus(args) tgt_corpus = read_target_corpus(args) renames = compute_renames(tgt_corpus, src_corpus) for src_k, src_doc in src_corpus.items(): # retrieve target subdoc tgt_k = copy.copy(src_k) tgt_k.subdoc = args.target print(src_k, tgt_k, file=sys.stderr) if tgt_k not in tgt_corpus: raise ValueError("Uh-oh! we don't have %s in the corpus" % tgt_k) tgt_doc = tgt_corpus[tgt_k] # move portion from source to target subdoc if start == 0: # move up new_src_doc, new_tgt_doc = move_portion( renames, src_doc, tgt_doc, end, # src_split tgt_split=-1) elif end == len(src_doc.text()): # src_doc.text_span().char_end: # move down # move_portion inserts src_doc[0:src_split] between # tgt_doc[0:tgt_split] and tgt_doc[tgt_split:], # so we detach src_doc[start:] into a temporary doc, # then call move_portion on this temporary doc new_src_doc, src_doc2 = split_doc(src_doc, start) _, new_tgt_doc = move_portion( renames, src_doc2, tgt_doc, -1, # src_split tgt_split=0) # the whitespace between new_src_doc and src_doc2 went to # src_doc2, so we need to append a new whitespace to new_src_doc evil_set_text(new_src_doc, new_src_doc.text() + ' ') else: raise ValueError("Sorry, can only move to the start or to the " "end of a document at the moment") # print diff for suggested commit message diffs = [ "======= TO %s ========" % tgt_k, show_diff(tgt_doc, new_tgt_doc), "^------ FROM %s" % src_k, show_diff(src_doc, new_src_doc), "" ] print("\n".join(diffs), file=sys.stderr) # dump the modified documents save_document(output_dir, src_k, new_src_doc) save_document(output_dir, tgt_k, new_tgt_doc) announce_output_dir(output_dir)