def __init__(self, model_path): os.chdir(model_path) self._app = web.Application() config_path = os.path.join(model_path, "config.json") log_path = self._set_up_log(model_path) with open(log_path, 'a+') as file: named_tuple = time.localtime() time_string = time.strftime("%m/%d/%Y, %H:%M:%S", named_tuple) file.write(f'--Starting Server--\n'\ f'{time_string}\n\n') with open(config_path, 'r') as file: config = json.loads(file.read()) self._app['nmt'] = Pangeanmt(model_path) self._app['pipeline'] = Pipeline(config['pipeline_config'],\ config['src_lang'], config['tgt_lang']) self._app['pipeline_tgt'] = Pipeline(config['pipeline_config_tgt'],\ config['tgt_lang']) self._app['model_path'] = model_path self._app['lock'] = asyncio.Lock() self._app['sem'] = asyncio.Semaphore() self._app['ol'] = config['online_learning']['active'] self._app['log_path'] = log_path self._app.router.add_post('/save', save) self._app.router.add_post('/train', train) self._app.router.add_post('/isready', ready) self._app.router.add_post('/translate', translate)
class Engine: def __init__(self): with open('config.json', 'r') as file: self._config = json.loads(file.read()) self._src_pipeline = Pipeline(self._config['pipeline_config'], self._config['src_lang'], self._config['tgt_lang']) self._tgt_pipeline = Pipeline(self._config['pipeline_config_tgt'], self._config['tgt_lang']) # Returns a trained model def train(self, p, src_text, tgt_text): res = {'src_prep': [], 'tgt_prep': []} # Shape returns the dimensions of the DataFrame, so shape[0] is the # number of rows. for i in range(len(src_text)): if ((i + 1) % 10 == 0): print(f'Trained with {i+1} segments.') # The attribute iat gets the value at [x, y] src = src_text[i] tgt = tgt_text[i] src_prep = self._src_pipeline.preprocess_str(src) tgt_prep = self._tgt_pipeline.preprocess_str(tgt) p.train(src_prep, tgt_prep) return p def translate(self, p, src_text, tgt_text): # Shape returns the dimensions of the DataFrame, so shape[0] is the # number of rows. translations_post = [] for i in range(0, len(src_text), 30): ## for i in range(len(src_text)): ## seg = Seg(src_text[i]) if ((i + 1) % 30 == 0): print(f'Translated {i+1} segments.') if i + 30 <= len(src_text): segs = src_text[i:i + 30] else: segs = src_text[i:] segs_prep = [] for seg in segs: segs_prep.append(self._src_pipeline.preprocess_str(seg)) ## self._src_pipeline.preprocess(seg) translations = p.translate(segs_prep) ## translation = p.translate(seg_prep) for trans in translations: tgt = (' ').join(trans.tgt) tgt = self._src_pipeline.postprocess_str(tgt) translations_post.append(tgt) ## translation_post = (' ').join(translation[0].tgt) return translations_post
def __init__(self, in_file, ref_file): with open('config.json', 'r') as file: self._config = json.loads(file.read()) self._src_pipeline = Pipeline(self._config['pipeline_config'],\ self._config['src_lang'], self._config['tgt_lang']) self._tgt_pipeline = Pipeline(self._config['pipeline_config_tgt'],\ self._config['tgt_lang']) self._in_file = os.path.join('data', in_file) self._ref_file = os.path.join('data', ref_file)
def __init__(self): with open('config.json', 'r') as file: self._config = json.loads(file.read()) self._src_pipeline = Pipeline(self._config['pipeline_config'], self._config['src_lang'], self._config['tgt_lang']) self._tgt_pipeline = Pipeline(self._config['pipeline_config_tgt'], self._config['tgt_lang'])
class Engine: def __init__(self, in_file, ref_file): with open('config.json', 'r') as file: self._config = json.loads(file.read()) self._src_pipeline = Pipeline(self._config['pipeline_config'],\ self._config['src_lang'], self._config['tgt_lang']) self._tgt_pipeline = Pipeline(self._config['pipeline_config_tgt'],\ self._config['tgt_lang']) self._in_file = os.path.join('data', in_file) self._ref_file = os.path.join('data', ref_file) # Returns trained model def train(self): p = Pangeanmt('.') with open(self._in_file, 'r') as src_file: with open(self._ref_file, 'r') as tgt_file: for seg in src_file: #try: src = self._src_pipeline.preprocess_str(seg) tgt_seg = tgt_file.readline() tgt = self._tgt_pipeline.preprocess_str(tgt_seg) p.train(src, tgt) #except: # print('Something went wrong.') return p def translate_file(self, p, output_file): with open(self._in_file, 'r') as in_file: with open(output_file, 'w+') as out_file: for seg in in_file: seg = self._src_pipeline.preprocess_str(seg) translation = p.translate([seg]) tgt = (' ').join(translation[0].tgt) tgt = self._src_pipeline.postprocess_str(tgt) out_file.write(f'{tgt}\n') def gen_config(self, alpha): lr = self._config['opts']['learning_rate'] os.rename('config.json', f'{lr}_config.json') self._config['opts']['learning_rate'] = alpha with open('config.json', 'w+') as config_file: config_file.write(json.dumps(self._config))
def __init__(self): with open("config.json", "r") as file: self._config = json.loads(file.read()) self._src_pipeline = Pipeline( self._config["pipeline_config"], self._config["src_lang"], self._config["tgt_lang"], ) self._tgt_pipeline = Pipeline(self._config["pipeline_config_tgt"], self._config["tgt_lang"])
class Engine: def __init__(self): with open("config.json", "r") as file: self._config = json.loads(file.read()) self._src_pipeline = Pipeline( self._config["pipeline_config"], self._config["src_lang"], self._config["tgt_lang"], ) self._tgt_pipeline = Pipeline(self._config["pipeline_config_tgt"], self._config["tgt_lang"]) # Returns a trained model def train_from_table(self, p, table): res = {"src_prep": [], "tgt_prep": []} # Shape returns the dimensions of the DataFrame, so shape[0] is the # number of rows. for i in range(table.shape[0]): if (i + 1) % 10 == 0: print(f"Trained with {i+1} segments.") # The attribute iat gets the value at [x, y] src = table.iat[i, 0] tgt = table.iat[i, 1] src_prep = self._src_pipeline.preprocess_str(src) tgt_prep = self._tgt_pipeline.preprocess_str(tgt) res["src_prep"].append(src_prep) res["tgt_prep"].append(tgt_prep) p.train(src_prep, tgt_prep) return p, res def no_train_translate_from_table(self, p, table): res = {"original": []} # Shape returns the dimensions of the DataFrame, so shape[0] is the # number of rows. for i in range(table.shape[0]): if (i + 1) % 10 == 0: print(f"Translated {i+1} segments.") # The attribute iat gets the value at [x, y] seg = table.iat[i, 0] seg_prep = self._src_pipeline.preprocess_str(seg) translation = p.translate([seg_prep]) tgt = (" ").join(translation[0].tgt) tgt = self._src_pipeline.postprocess_str(tgt) res["original"].append(tgt) return res def translate_from_table(self, p, table, j): res = {f"tgts_{j}": []} # Shape returns the dimensions of the DataFrame, so shape[0] is the # number of rows. for i in range(table.shape[0]): if (i + 1) % 10 == 0: print(f"Translated {i+1} segments.") # The attribute iat gets the value at [x, y] seg = table.iat[i, 0] seg_prep = self._src_pipeline.preprocess_str(seg) translation = p.translate([seg_prep]) tgt = (" ").join(translation[0].tgt) tgt = self._src_pipeline.postprocess_str(tgt) res[f"tgts_{j}"].append(tgt) return res