Exemplo n.º 1
0
def train_wrapper(seed_fn, source_fn, target_fn, reverse=False, mx_path=None,
                  train_size=5000):
    logging.info("Training...")
    seed_trans = read_dict(seed_fn, reverse=reverse)

    #we only need to load the vectors for the words in the training data
    #semantic spaces contain additional words
    source_words = set(seed_trans.iterkeys())
    target_words = set().union(*seed_trans.itervalues())

    source_sp = Space.build(source_fn, lexicon=source_words)
    source_sp.normalize()

    target_sp = Space.build(target_fn, lexicon=target_words)
    target_sp.normalize()

    logging.info("Learning the translation matrix")
    tm, used_for_train = train_tm(source_sp, target_sp, seed_trans, train_size)

    mx_path = default_output_fn(mx_path, seed_fn, source_fn, target_fn,)
    logging.info("Saving the translation matrix to {}".format(mx_path))
    np.save('{}.npy'.format(mx_path), tm)
    pickle.dump(used_for_train, open('{}.train_wds'.format(mx_path),
                                     mode='w'))

    return tm, used_for_train
Exemplo n.º 2
0
 def load_tr_mx(self):
     if self.args.mx_path:
         if self.tr_mx or self.exclude_from_test:
             raise Exception(
                 "Translation mx or training words specified amibiguously.")
         else:
             self.args.mx_path = default_output_fn(
                 self.args.mx_path, self.args.seed_fn, self.args.source_fn,
                 self.args.target_fn)
             logging.info("Loading from {}".format(self.args.mx_path))
             self.exclude_from_test = pickle.load(open(
                 '{}.train_wds'.format(self.args.mx_path)))
             self.tr_mx = np.load('{}.npy'.format(self.args.mx_path))
     elif self.tr_mx is None or not self.exclude_from_test:
         raise Exception('Translation matrix or training words unspecified')