def wordUpdate(): data = request.get_json() sentence = data["sentence"] attentionOverrideMap = data["attentionOverrideMap"] correctionMap = data["correctionMap"] unk_map = data["unk_map"] beam_size = int(data["beam_size"]) beam_length = float(data["beam_length"]) beam_coverage = float(data["beam_coverage"]) translation, attn, translations = seq2seq_model.translate( sentence, beam_size, beam_length=beam_length, beam_coverage=beam_coverage, attention_override_map=attentionOverrideMap, correction_map=correctionMap, unk_map=unk_map) beam = translationsToTree(translations) res = {} res["beam"] = beam return jsonify(res)
def translate(): data = request.get_json() sentence = data["sentence"] beam_size = int(data["beam_size"]) beam_length = float(data["beam_length"]) beam_coverage = float(data["beam_coverage"]) translation, attn, translations = seq2seq_model.translate( sentence, beam_size, beam_length=beam_length, beam_coverage=beam_coverage, apply_bpe=False) res = {} res["sentence"] = sentence res["translation"] = " ".join(translation) res["attention"] = attn beam = translationsToTree(translations) res["beam"] = beam return jsonify(res)
def retranslate(document_id): document = get_document(document_id) scorer = Scorer() extractor = DomainSpecificExtractor( source_file=document.filepath, train_source_file="myseq2seq/data/wmt14/train.tok.clean.bpe.32000.de", train_vocab_file="myseq2seq/train_vocab.pkl") keyphrases = extractor.extract_keyphrases() num_changes = 0 for i, sentence in enumerate(document.sentences): if sentence.corrected: continue translation, attn, translations = seq2seq_model.translate( sentence.source) beam = translationsToTree(translations) score = scorer.compute_scores(sentence.source, " ".join(translation), attn, keyphrases) score["order_id"] = i translation_text = " ".join(translation) if translation_text != sentence.translation: num_changes += 1 sentence.diff = html_diff( sentence.translation[:-4].replace("@@ ", ""), translation_text[:-4].replace("@@ ", "")) sentence.translation = translation_text sentence.beam = beam sentence.score = score sentence.attention = attn save_document(document, document_id) return jsonify({"numChanges": num_changes})
def documentUpload(): if 'file' not in request.files: return redirect(request.url) file = request.files['file'] # if user does not select file, browser also # submit an empty part without filename if file.filename == '': return redirect(request.url) if file and allowed_file(file.filename): document_name = request.args.get("document_name") id = uuid4() filename = secure_filename(file.filename) filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(filepath) user = User.query.filter_by(username=get_jwt_identity()).first() dbDocument = DBDocument(id=id, name=document_name, user=user) document = Document(str(id), document_name, dict(), filepath) sentences = document.load_content(filename) with open(filepath, "w") as f: for i, sentence in enumerate(sentences): f.write( sentence.replace("@@ ", "") + "\n" if i < len(sentences) - 1 else "") extractor = DomainSpecificExtractor( source_file=filepath, train_source_file= "myseq2seq/data/wmt14/train.tok.clean.bpe.32000.de", train_vocab_file="myseq2seq/train_vocab.pkl") keyphrases = extractor.extract_keyphrases(n_results=30) scorer = Scorer() print("Translating {} sentences".format(len(sentences))) for i, source in enumerate(sentences): translation, attn, translations = seq2seq_model.translate( source, beam_size=3, beam_length=1, beam_coverage=1) print("Translated {} of {}".format(i + 1, len(sentences))) beam = translationsToTree(translations[:3]) score = scorer.compute_scores(source, " ".join(translation), attn, keyphrases) score["order_id"] = i sentence = Sentence(i, source, " ".join(translation), attn, beam, score) document.sentences.append(sentence) print("Finished translation") keyphrases = [{ "name": k, "occurrences": f, "active": False } for (k, f) in keyphrases] document.keyphrases = keyphrases db.session.add(dbDocument) db.session.commit() save_document(document, id) return jsonify({}) return jsonify({})