def on_predict(args): gen = forget_except(elmo_difference_pipe( metaize(itertools.cycle(["http://export.arxiv.org/"])), difference_model_path=args['best_model_path'], service_id="difference"), keys=['html_path', 'css', 'html']) return gen
def annotate_uploaded_file(file, service_id): pprint(BEST_MODELS) return next( elmo_difference_single_pipe( metaize([file]), difference_model_path=BEST_MODELS["difference"]['best_model_path'], service_id=service_id), None)
def annotate_difference_elmo(): model_in_the_loop( model_dir=config.ELMO_DIFFERENCE_MODEL_PATH, collection_path=config.ELMO_DIFFERENCE_COLLECTION_PATH, on_train=lambda args: list( elmo_difference_model_pipe(metaize(args['samples_files']), collection_step=args['training_rate'])), service_id="difference", on_predict=on_predict, training_rate_mode='size', training_rate_file=config.ELMO_DIFFERENCE_COLLECTION_PATH + "/train_over.conll3")
def make_box_layout_model(self): model_pipe = self.ant("annotation.collection", "model") model_path = config.hidden_folder + config.layout_model_path + "/test" collection_path = config.COLLECTION_PATH files = list(glob.glob(collection_path + ".*.pickle")) print( list( model_pipe(metaize(files), training=True, collection_step="_testcase", layout_model_path=model_path)))
def fill_library(): x = None while not x: try: gen = forget_except(filling_pipe( itertools.islice((metaize(itertools.cycle(["pdfs"]))), 100)), keys=["html_path"]) for i in range(100): k = next(gen, None) del k break except Exception: logging.error("Getting first 100 threw", exc_info=True) break
for text, meta in iterator: txt_path = meta['html_path'] + ".txt" meta['txt_path'] = txt_path meta['mp3_path'] = txt_path + '.mp3' text = "\n\n".join(text) with open(txt_path, "w") as f: f.write(text) self.logger.info( f"encoding \"{text[100:]}\" as mp3 on path {meta['mp3_path']}" ) os.popen( f"text2wave -eval '(nitech_us_bdl_arctic_hts)' {meta['txt_path']} -o out.wave" ).read() os.popen(f"lame out.wave {meta['mp3_path']}").read() yield meta['mp3_path'], meta if __name__ == "__main__": from core.pathant.PathAnt import PathAnt from layout.model_helpers import find_best_model from helpers.list_tools import metaize ant = PathAnt() print(ant.graph()) model_path = model_pat = find_best_model()[0] pipe = ant("pdf", "mp3") res = list(pipe(metaize(["./../test/glue.pdf"]), model_path=model_path)) pprint(res) os.popen(f"mplayer {res[0][0]}").read()