logger.setLevel(logging.WARN) else: logger.setLevel(logging.INFO) print >> stderr, 'Instances file: %s' % instances_file print >> stderr, 'Model file: %s' % model print >> stderr, 'Word vector file: %s' % word_vector_file print >> stderr, 'lambda_reg: %20.18f' % lambda_reg print >> stderr, 'Max iterations: %d' % maxiter if _seed: print >> stderr, 'Random seed: %s' % _seed print >> stderr, '' print >> stderr, 'load Word2Vec Model...' word_vectors = WordVectors.load_vectors(word_vector_file) embsize = word_vectors.embsize() print >> stderr, 'preparing data...' instances, _, total_internal_node = prepare_data(word_vectors, instances_file) print >> stderr, 'init. RAE parameters...' timer = Timer() timer.tic() if _seed != None: _seed = int(_seed) else: _seed = None print >> stderr, 'seed: %s' % str(_seed) theta0 = init_theta(embsize, _seed=_seed)
print >> stderr, "Source Word vector file: %s" % source_word_vector_file print >> stderr, "Target Word vector file: %s" % target_word_vector_file print >> stderr, "lambda_reg: %20.18f" % lambda_reg print >> stderr, "lambda_reg_L: %20.18f" % lambda_reg_L print >> stderr, "lambda_reg_rec: %20.18f" % lambda_reg_rec print >> stderr, "lambda_reg_sem: %20.18f" % lambda_reg_sem print >> stderr, "alpha: %20.18f" % alpha print >> stderr, "Max iterations: %d" % maxiter print >> stderr, "Max iterations_la: %d" % maxiter_la if _seed: print >> stderr, "Random seed: %s" % _seed print >> stderr, "" print >> stderr, "load word vectors..." # 载入词向量的输入放入word_vectors中 source_word_vectors = WordVectors.load_vectors(source_word_vector_file) target_word_vectors = WordVectors.load_vectors(target_word_vector_file) # embsize为词向量的维度 source_embsize = source_word_vectors.embsize() target_embsize = target_word_vectors.embsize() print >> stderr, "preparing data..." # 载入训练短语数据,将短语转化为instance的数组放入instances中 source_instances, _, source_total_internal_node = prepare_data(source_word_vectors, source_instances_file) target_instances, _, target_total_internal_node = prepare_data(target_word_vectors, target_instances_file) print >> stderr, "init. RAE parameters..." timer = Timer() timer.tic() if _seed != None: _seed = int(_seed) else:
print >> stderr, 'Source Word vector file: %s' % src_word_vector_file print >> stderr, 'Target Word vector file: %s' % trg_word_vector_file print >> stderr, 'lambda_reg: %20.18f' % lambda_reg print >> stderr, 'lambda_reg_L: %20.18f' % lambda_reg_L print >> stderr, 'lambda_reg_rec: %20.18f' % lambda_reg_rec print >> stderr, 'lambda_reg_sem: %20.18f' % lambda_reg_sem print >> stderr, 'alpha: %20.18f' % alpha print >> stderr, 'Max iterations: %d' % maxiter print >> stderr, 'Max iterations_la: %d' % maxiter_la if _seed: print >> stderr, 'Random seed: %s' % _seed print >> stderr, '' print >> stderr, 'load word vectors...' # 载入词向量的输入放入word_vectors中 src_word_vectors = WordVectors.load_vectors( src_word_vector_file ) trg_word_vectors = WordVectors.load_vectors( trg_word_vector_file ) #embsize为词向量的维度 src_embsize = src_word_vectors.embsize() trg_embsize = trg_word_vectors.embsize() print >> stderr, 'preparing data...' #载入训练短语数据,将短语转化为instance的数组放入instances中 src_instances, _, src_total_internal_node = prepare_data( src_word_vectors, src_instances_file ) trg_instances, _, trg_total_internal_node = prepare_data( trg_word_vectors, trg_instances_file ) print >> stderr, 'init. RAE parameters...' timer = Timer() timer.tic() if _seed != None: _seed = int(_seed) else:
if checking_grad: logger.setLevel(logging.WARN) else: logger.setLevel(logging.INFO) print >> stderr, 'Instances file: %s' % instances_file print >> stderr, 'Model file: %s' % model print >> stderr, 'Word vector file: %s' % word_vector_file print >> stderr, 'lambda_reg: %20.18f' % lambda_reg print >> stderr, 'Max iterations: %d' % maxiter if _seed: print >> stderr, 'Random seed: %s' % _seed print >> stderr, '' print >> stderr, 'load word vectors...' word_vectors = WordVectors.load_vectors(word_vector_file) embsize = word_vectors.embsize() print >> stderr, 'preparing data...' instances, _, total_internal_node = prepare_data( word_vectors, instances_file) print >> stderr, 'init. RAE parameters...' timer = Timer() timer.tic() if _seed != None: _seed = int(_seed) else: _seed = None print >> stderr, 'seed: %s' % str(_seed)