Exemplo n.º 1
0
      logger.setLevel(logging.WARN)
    else:
      logger.setLevel(logging.INFO)
        
    print >> stderr, 'Instances file: %s' % instances_file
    print >> stderr, 'Model file: %s' % model
    print >> stderr, 'Word vector file: %s' % word_vector_file 
    print >> stderr, 'lambda_reg: %20.18f' % lambda_reg
    print >> stderr, 'Max iterations: %d' % maxiter
    if _seed:
      print >> stderr, 'Random seed: %s' % _seed
    print >> stderr, ''
    
    print >> stderr, 'load Word2Vec Model...'

    word_vectors = WordVectors.load_vectors(word_vector_file)
    embsize = word_vectors.embsize()
       
    print >> stderr, 'preparing data...' 
    instances, _, total_internal_node = prepare_data(word_vectors, instances_file)
    
    print >> stderr, 'init. RAE parameters...'
    timer = Timer()
    timer.tic()
    if _seed != None:
      _seed = int(_seed)
    else:
      _seed = None
    print >> stderr, 'seed: %s' % str(_seed)

    theta0 = init_theta(embsize, _seed=_seed)
Exemplo n.º 2
0
    print >> stderr, "Source Word vector file: %s" % source_word_vector_file
    print >> stderr, "Target Word vector file: %s" % target_word_vector_file
    print >> stderr, "lambda_reg: %20.18f" % lambda_reg
    print >> stderr, "lambda_reg_L: %20.18f" % lambda_reg_L
    print >> stderr, "lambda_reg_rec: %20.18f" % lambda_reg_rec
    print >> stderr, "lambda_reg_sem: %20.18f" % lambda_reg_sem
    print >> stderr, "alpha: %20.18f" % alpha
    print >> stderr, "Max iterations: %d" % maxiter
    print >> stderr, "Max iterations_la: %d" % maxiter_la
    if _seed:
        print >> stderr, "Random seed: %s" % _seed
    print >> stderr, ""

    print >> stderr, "load word vectors..."
    # 载入词向量的输入放入word_vectors中
    source_word_vectors = WordVectors.load_vectors(source_word_vector_file)
    target_word_vectors = WordVectors.load_vectors(target_word_vector_file)
    # embsize为词向量的维度
    source_embsize = source_word_vectors.embsize()
    target_embsize = target_word_vectors.embsize()

    print >> stderr, "preparing data..."
    # 载入训练短语数据,将短语转化为instance的数组放入instances中
    source_instances, _, source_total_internal_node = prepare_data(source_word_vectors, source_instances_file)
    target_instances, _, target_total_internal_node = prepare_data(target_word_vectors, target_instances_file)
    print >> stderr, "init. RAE parameters..."
    timer = Timer()
    timer.tic()
    if _seed != None:
        _seed = int(_seed)
    else:
Exemplo n.º 3
0
        print >> stderr, 'Source Word vector file: %s' % src_word_vector_file 
        print >> stderr, 'Target Word vector file: %s' % trg_word_vector_file 
        print >> stderr, 'lambda_reg: %20.18f' % lambda_reg
        print >> stderr, 'lambda_reg_L: %20.18f' % lambda_reg_L
        print >> stderr, 'lambda_reg_rec: %20.18f' % lambda_reg_rec
        print >> stderr, 'lambda_reg_sem: %20.18f' % lambda_reg_sem
        print >> stderr, 'alpha: %20.18f' % alpha
        print >> stderr, 'Max iterations: %d' % maxiter
        print >> stderr, 'Max iterations_la: %d' % maxiter_la
        if _seed:
            print >> stderr, 'Random seed: %s' % _seed
        print >> stderr, ''

        print >> stderr, 'load word vectors...'
        # 载入词向量的输入放入word_vectors中
        src_word_vectors = WordVectors.load_vectors( src_word_vector_file )
        trg_word_vectors = WordVectors.load_vectors( trg_word_vector_file )
        #embsize为词向量的维度
        src_embsize = src_word_vectors.embsize()
        trg_embsize = trg_word_vectors.embsize()
       
        print >> stderr, 'preparing data...' 
        #载入训练短语数据,将短语转化为instance的数组放入instances中
        src_instances, _, src_total_internal_node = prepare_data( src_word_vectors, src_instances_file )
        trg_instances, _, trg_total_internal_node = prepare_data( trg_word_vectors, trg_instances_file )
        print >> stderr, 'init. RAE parameters...'
        timer = Timer()
        timer.tic()
        if _seed != None:
            _seed = int(_seed)
        else:
Exemplo n.º 4
0
        if checking_grad:
            logger.setLevel(logging.WARN)
        else:
            logger.setLevel(logging.INFO)

        print >> stderr, 'Instances file: %s' % instances_file
        print >> stderr, 'Model file: %s' % model
        print >> stderr, 'Word vector file: %s' % word_vector_file
        print >> stderr, 'lambda_reg: %20.18f' % lambda_reg
        print >> stderr, 'Max iterations: %d' % maxiter
        if _seed:
            print >> stderr, 'Random seed: %s' % _seed
        print >> stderr, ''

        print >> stderr, 'load word vectors...'
        word_vectors = WordVectors.load_vectors(word_vector_file)
        embsize = word_vectors.embsize()

        print >> stderr, 'preparing data...'
        instances, _, total_internal_node = prepare_data(
            word_vectors, instances_file)

        print >> stderr, 'init. RAE parameters...'
        timer = Timer()
        timer.tic()
        if _seed != None:
            _seed = int(_seed)
        else:
            _seed = None
        print >> stderr, 'seed: %s' % str(_seed)