parser.add_argument('output', help='output file') options = parser.parse_args() phrases_file = options.phrases word_vector_file = options.word_vector_file theta_file = options.theta output_file = options.output print >> stderr, 'load word vectors...' word_vectors = WordVectors.load_vectors(word_vector_file) embsize = word_vectors.embsize() # print "embsize: ",embsize # print "theta file: ",theta_file print >> stderr, 'load RAE parameters...' theta = unpickle(theta_file) rae = RecursiveAutoencoder.build(theta, embsize) total_cost = 0 total_instance_num = 0 total_internal_node_num = 0 print '='*63 print '%20s %20s %20s' % ('all', 'avg/node', 'internal node') print '-'*63 with Reader(phrases_file) as reader, Writer(output_file) as writer: for phrase in reader: instance = Instance.parse_from_str(phrase, word_vectors) words_embedded = word_vectors[instance.words] root_node, cost = rae.forward(words_embedded)
print >> stderr, 'lambda_reg_sem: %20.18f' % lambda_reg_sem print >> stderr, 'alpha: %20.18f' % alpha print >> stderr, 'Max iterations_la: %d' % maxiter_la if _seed: print >> stderr, 'Random seed: %s' % _seed print >> stderr, '' print >> stderr, 'load word vectors...' # 载入词向量的输入放入word_vectors中 src_word_vectors = WordVectors.load_vectors( src_word_vector_file ) trg_word_vectors = WordVectors.load_vectors( trg_word_vector_file ) #embsize为词向量的维度 src_embsize = src_word_vectors.embsize() trg_embsize = trg_word_vectors.embsize() src_theta_opt = unpickle( src_theta_file ) trg_theta_opt = unpickle( trg_theta_file ) theta_opt = [] theta_opt.extend( src_theta_opt ) theta_opt.extend( trg_theta_opt ) src_instances, _, src_total_internal_node, bad_src_instances,\ trg_instances, _, trg_total_internal_node, bad_trg_instances\ = prepare_data_la( src_word_vectors, src_instances_file,\ trg_word_vectors, trg_instances_file ) timer = Timer() timer.tic() if _seed != None: _seed = int(_seed) else:
parser.add_argument('word_vector_file', help='word vector file') parser.add_argument('theta', help='RAE parameter file (pickled)') parser.add_argument('output', help='output file') options = parser.parse_args() phrases_file = options.phrases word_vector_file = options.word_vector_file theta_file = options.theta output_file = options.output print >> stderr, 'load word vectors...' word_vectors = WordVectors.load_vectors(word_vector_file) embsize = word_vectors.embsize() print >> stderr, 'load RAE parameters...' theta = unpickle(theta_file) rae = RecursiveAutoencoder.build(theta, embsize) total_cost = 0 total_instance_num = 0 total_internal_node_num = 0 print '=' * 63 print '%20s %20s %20s' % ('all', 'avg/node', 'internal node') print '-' * 63 with Reader(phrases_file) as reader, Writer(output_file) as writer: for phrase in reader: instance = Instance.parse_from_str(phrase, word_vectors) words_embedded = word_vectors[instance.words] root_node, cost = rae.forward(words_embedded)