示例#1
0
    def create_adjustors(self):
        initial_momentum = .5
        final_momentum = .99
        start = 1
        saturate = self.max_epochs
        self.momentum_adjustor = learning_rule.MomentumAdjustor(
            final_momentum, start, saturate)
        self.momentum_rule = learning_rule.Momentum(initial_momentum,
                                                    nesterov_momentum=True)

        if self.lr_monitor_decay:
            self.learning_rate_adjustor = MonitorBasedLRAdjuster(
                high_trigger=1.,
                shrink_amt=0.9,
                low_trigger=.95,
                grow_amt=1.1,
                channel_name='train_objective')
        elif self.lr_lin_decay:
            self.learning_rate_adjustor = LinearDecayOverEpoch(
                start, saturate, self.lr_lin_decay)
示例#2
0
ann = mlp.MLP(layers, nvis=ds_train.nr_inputs)

#####################################
#Define Training
#####################################

#L1 Weight Decay
L1_cost = PL.costs.cost.SumOfCosts([PL.costs.cost.MethodCost(method='cost_from_X'), PL.costs.mlp.L1WeightDecay(coeffs=[0.1, 0.01])])

# momentum
initial_momentum = .5
final_momentum = .99
start = 1
saturate = 20
momentum_adjustor = learning_rule.MomentumAdjustor(final_momentum, start, saturate)
momentum_rule = learning_rule.Momentum(initial_momentum)
 
# learning rate
start = .1
saturate = 20
decay_factor = .00001
learning_rate_adjustor = sgd.LinearDecayOverEpoch(start, saturate, decay_factor)

# termination criterion that stops after 50 epochs without
# any increase in misclassification on the validation set
termination_criterion = MonitorBased(channel_name='objective', N=20, prop_decrease=0.0)
 
# create Stochastic Gradient Descent trainer 
trainer = sgd.SGD(learning_rate=.001,
                    batch_size=10,
                    monitoring_dataset=ds_valid, 
示例#3
0
def main():
    base_name = sys.argv[1] #文件名前缀
    n_epoch = int(sys.argv[2]) # epoch次数
    n_hidden = int(sys.argv[3]) # 隐含层节点数
    include_rate = float(sys.argv[4]) # 包含率(1-dropout)

    in_size = 943 # 输入层节点数目
    out_size = 4760  #输出层节点数
    b_size = 200 #batch的大小
    l_rate = 5e-4 #学习速率
    l_rate_min = 1e-5 #学习速率最小值
    decay_factor = 0.9 #
    lr_scale = 3.0 #
    momentum = 0.5 #摄动因子
    init_vals = np.sqrt(6.0/(np.array([in_size, n_hidden])+np.array([n_hidden, out_size])))
    
    print 'loading data...'
    #读取数据Train,Validation,Test
    X_tr = np.load('bgedv2_X_tr_float64.npy')
    Y_tr = np.load('bgedv2_Y_tr_0-4760_float64.npy')
    Y_tr_target = np.array(Y_tr)
    X_va = np.load('bgedv2_X_va_float64.npy')
    Y_va = np.load('bgedv2_Y_va_0-4760_float64.npy')
    Y_va_target = np.array(Y_va)
    X_te = np.load('bgedv2_X_te_float64.npy')
    Y_te = np.load('bgedv2_Y_te_0-4760_float64.npy')
    Y_te_target = np.array(Y_te)

    X_1000G = np.load('1000G_X_float64.npy')
    Y_1000G = np.load('1000G_Y_0-4760_float64.npy')
    Y_1000G_target = np.array(Y_1000G)
    X_GTEx = np.load('GTEx_X_float64.npy')
    Y_GTEx = np.load('GTEx_Y_0-4760_float64.npy')
    Y_GTEx_target = np.array(Y_GTEx)

    #随机化
    random.seed(0)
    #随机抽取5000样本进行训练
    monitor_idx_tr = random.sample(range(88807), 5000)
    #将数据X,Y整合成DensenMatrix类型
    data_tr = p2_dt_dd.DenseDesignMatrix(X=X_tr.astype('float32'), y=Y_tr.astype('float32'))
    #取出X中对应5000样本进行训练
    X_tr_monitor, Y_tr_monitor_target = X_tr[monitor_idx_tr, :], Y_tr_target[monitor_idx_tr, :]
    #设置多层感知机的隐含层计算方式
    h1_layer = p2_md_mlp.Tanh(layer_name='h1', dim=n_hidden, irange=init_vals[0], W_lr_scale=1.0, b_lr_scale=1.0)
    #设置多层感知机的输出层计算方式
    o_layer = p2_md_mlp.Linear(layer_name='y', dim=out_size, irange=0.0001, W_lr_scale=lr_scale, b_lr_scale=1.0)
    #设置好模型 
    model = p2_md_mlp.MLP(nvis=in_size, layers=[h1_layer, o_layer], seed=1)
    #设置dropout比例
    dropout_cost = p2_ct_mlp_dropout.Dropout(input_include_probs={'h1':1.0, 'y':include_rate}, 
                                             input_scales={'h1':1.0, 
                                                           'y':np.float32(1.0/include_rate)})
    #设置训练算法(batch大小,学习速率,学习规则,终止条件,dropout比例)
    algorithm = p2_alg_sgd.SGD(batch_size=b_size, learning_rate=l_rate, 
                               learning_rule = p2_alg_lr.Momentum(momentum),
                               termination_criterion=p2_termcri.EpochCounter(max_epochs=1000),
                               cost=dropout_cost)
    #设置训练类(数据集,训练模型,训练算法)
    train = pylearn2.train.Train(dataset=data_tr, model=model, algorithm=algorithm)
    train.setup()

    x = T.matrix()
    y = model.fprop(x) #训练好的模型对X的预测值
    f = theano.function([x], y) 

    MAE_va_old = 10.0
    MAE_va_best = 10.0
    MAE_tr_old = 10.0
    MAE_te_old = 10.0
    MAE_1000G_old = 10.0
    MAE_1000G_best = 10.0
    MAE_GTEx_old = 10.0

    outlog = open(base_name + '.log', 'w')
    log_str = '\t'.join(map(str, ['epoch', 'MAE_va', 'MAE_va_change', 'MAE_te', 'MAE_te_change', 
                              'MAE_1000G', 'MAE_1000G_change', 'MAE_GTEx', 'MAE_GTEx_change',
                              'MAE_tr', 'MAE_tr_change', 'learing_rate', 'time(sec)']))
    print log_str
    outlog.write(log_str + '\n')
    sys.stdout.flush() #刷新缓冲区

    for epoch in range(0, n_epoch):
        t_old = time.time() #开始时间
        train.algorithm.train(train.dataset)#训练
        #计算不同数据集预测值
        Y_va_hat = f(X_va.astype('float32')).astype('float64')
        Y_te_hat = f(X_te.astype('float32')).astype('float64')
        Y_tr_hat_monitor = f(X_tr_monitor.astype('float32')).astype('float64')
        Y_1000G_hat = f(X_1000G.astype('float32')).astype('float64')
        Y_GTEx_hat = f(X_GTEx.astype('float32')).astype('float64')
        #计算预测值与真实值的MAE
        MAE_va = np.abs(Y_va_target - Y_va_hat).mean()
        MAE_te = np.abs(Y_te_target - Y_te_hat).mean()
        MAE_tr = np.abs(Y_tr_monitor_target - Y_tr_hat_monitor).mean()
        MAE_1000G = np.abs(Y_1000G_target - Y_1000G_hat).mean()
        MAE_GTEx = np.abs(Y_GTEx_target - Y_GTEx_hat).mean()
        #计算迭代误差
        MAE_va_change = (MAE_va - MAE_va_old)/MAE_va_old
        MAE_te_change = (MAE_te - MAE_te_old)/MAE_te_old
        MAE_tr_change = (MAE_tr - MAE_tr_old)/MAE_tr_old
        MAE_1000G_change = (MAE_1000G - MAE_1000G_old)/MAE_1000G_old
        MAE_GTEx_change = (MAE_GTEx - MAE_GTEx_old)/MAE_GTEx_old
        
        #更新MAE
        MAE_va_old = MAE_va
        MAE_te_old = MAE_te
        MAE_tr_old = MAE_tr
        MAE_1000G_old = MAE_1000G
        MAE_GTEx_old = MAE_GTEx

        
        t_new = time.time() #终止时间
        l_rate = train.algorithm.learning_rate.get_value()
        log_str = '\t'.join(map(str, [epoch+1, '%.6f'%MAE_va, '%.6f'%MAE_va_change, '%.6f'%MAE_te, '%.6f'%MAE_te_change,
                                  '%.6f'%MAE_1000G, '%.6f'%MAE_1000G_change, '%.6f'%MAE_GTEx, '%.6f'%MAE_GTEx_change,
                                  '%.6f'%MAE_tr, '%.6f'%MAE_tr_change, '%.5f'%l_rate, int(t_new-t_old)]))
        print log_str
        outlog.write(log_str + '\n')
        sys.stdout.flush()
        
        if MAE_tr_change > 0: #如果误差增大,减小学习速率
            l_rate = l_rate*decay_factor
        if l_rate < l_rate_min: #学习速率最小为l_rate_min
            l_rate = l_rate_min

        train.algorithm.learning_rate.set_value(np.float32(l_rate)) #更改训练类的学习速率参数
        #更新Validation误差值
        if MAE_va < MAE_va_best:
            MAE_va_best = MAE_va
            outmodel = open(base_name + '_bestva_model.pkl', 'wb')
            pkl.dump(model, outmodel)
            outmodel.close()    
            np.save(base_name + '_bestva_Y_te_hat.npy', Y_te_hat)
            np.save(base_name + '_bestva_Y_va_hat.npy', Y_va_hat)
        #更新1000G误差值
        if MAE_1000G < MAE_1000G_best:
            MAE_1000G_best = MAE_1000G
            outmodel = open(base_name + '_best1000G_model.pkl', 'wb')
            pkl.dump(model, outmodel)
            outmodel.close()    
            np.save(base_name + '_best1000G_Y_1000G_hat.npy', Y_1000G_hat)
            np.save(base_name + '_best1000G_Y_GTEx_hat.npy', Y_GTEx_hat)

    print 'MAE_va_best : %.6f' % (MAE_va_best)
    print 'MAE_1000G_best : %.6f' % (MAE_1000G_best)
    outlog.write('MAE_va_best : %.6f' % (MAE_va_best) + '\n')
    outlog.write('MAE_1000G_best : %.6f' % (MAE_1000G_best) + '\n')
    outlog.close()
 l2 = RectifiedLinear(layer_name='l2',
                      irange=ir,
                      dim=dim,
                      max_col_norm=1.)
 l3 = RectifiedLinear(layer_name='l3',
                      irange=ir,
                      dim=dim,
                      max_col_norm=1.)
 output = Softmax(layer_name='y',
                  n_classes=9,
                  irange=ir,
                  max_col_norm=mcn_out)
 mdl = MLP([l1, l2, l3, output], nvis=X2.shape[1])
 trainer = sgd.SGD(learning_rate=lr,
                   batch_size=bs,
                   learning_rule=learning_rule.Momentum(mm),
                   cost=Dropout(default_input_include_prob=ip,
                                default_input_scale=1 / ip),
                   termination_criterion=EpochCounter(epochs),
                   seed=seed)
 decay = sgd.LinearDecayOverEpoch(start=2, saturate=20, decay_factor=.1)
 experiment = Train(dataset=training,
                    model=mdl,
                    algorithm=trainer,
                    extensions=[decay])
 experiment.main_loop()
 epochs_current = epochs
 for s in range(n_add):
     trainer = sgd.SGD(learning_rate=lr * .1,
                       batch_size=bs,
                       learning_rule=learning_rule.Momentum(mm),
示例#5
0
def main():
    base_name = sys.argv[1]
    n_epoch = int(sys.argv[2])
    n_hidden = int(sys.argv[3])
    include_rate = float(sys.argv[4])

    in_size = 943
    out_size = 4760
    b_size = 200
    l_rate = 3e-4
    l_rate_min = 1e-5
    decay_factor = 0.9
    lr_scale = 3.0
    momentum = 0.5
    init_vals = np.sqrt(6.0/(np.array([in_size, n_hidden, n_hidden, n_hidden])+np.array([n_hidden, n_hidden, n_hidden, out_size])))
    
    print 'loading data...'
    
    X_tr = np.load('bgedv2_X_tr_float64.npy')
    Y_tr = np.load('bgedv2_Y_tr_4760-9520_float64.npy')
    Y_tr_target = np.array(Y_tr)
    X_va = np.load('bgedv2_X_va_float64.npy')
    Y_va = np.load('bgedv2_Y_va_4760-9520_float64.npy')
    Y_va_target = np.array(Y_va)
    X_te = np.load('bgedv2_X_te_float64.npy')
    Y_te = np.load('bgedv2_Y_te_4760-9520_float64.npy')
    Y_te_target = np.array(Y_te)

    X_1000G = np.load('1000G_X_float64.npy')
    Y_1000G = np.load('1000G_Y_4760-9520_float64.npy')
    Y_1000G_target = np.array(Y_1000G)
    X_GTEx = np.load('GTEx_X_float64.npy')
    Y_GTEx = np.load('GTEx_Y_4760-9520_float64.npy')
    Y_GTEx_target = np.array(Y_GTEx)

    
    random.seed(0)
    monitor_idx_tr = random.sample(range(88807), 5000)
    
    data_tr = p2_dt_dd.DenseDesignMatrix(X=X_tr.astype('float32'), y=Y_tr.astype('float32'))
    X_tr_monitor, Y_tr_monitor_target = X_tr[monitor_idx_tr, :], Y_tr_target[monitor_idx_tr, :]
    h1_layer = p2_md_mlp.Tanh(layer_name='h1', dim=n_hidden, irange=init_vals[0], W_lr_scale=1.0, b_lr_scale=1.0)
    h2_layer = p2_md_mlp.Tanh(layer_name='h2', dim=n_hidden, irange=init_vals[1], W_lr_scale=lr_scale, b_lr_scale=1.0)
    h3_layer = p2_md_mlp.Tanh(layer_name='h3', dim=n_hidden, irange=init_vals[2], W_lr_scale=lr_scale, b_lr_scale=1.0)
    o_layer = p2_md_mlp.Linear(layer_name='y', dim=out_size, irange=0.0001, W_lr_scale=lr_scale, b_lr_scale=1.0)
    model = p2_md_mlp.MLP(nvis=in_size, layers=[h1_layer, h2_layer, h3_layer, o_layer], seed=1)
    dropout_cost = p2_ct_mlp_dropout.Dropout(input_include_probs={'h1':1.0, 'h2':include_rate, 'h3':include_rate,
                                                                   'y':include_rate}, 
                                             input_scales={'h1':1.0, 'h2':np.float32(1.0/include_rate),
                                                           'h3':np.float32(1.0/include_rate),
                                                           'y':np.float32(1.0/include_rate)})
    
    algorithm = p2_alg_sgd.SGD(batch_size=b_size, learning_rate=l_rate, 
                               learning_rule = p2_alg_lr.Momentum(momentum),
                               termination_criterion=p2_termcri.EpochCounter(max_epochs=1000),
                               cost=dropout_cost)

    train = pylearn2.train.Train(dataset=data_tr, model=model, algorithm=algorithm)
    train.setup()

    x = T.matrix()
    y = model.fprop(x)
    f = theano.function([x], y)

    MAE_va_old = 10.0
    MAE_va_best = 10.0
    MAE_tr_old = 10.0
    MAE_te_old = 10.0
    MAE_1000G_old = 10.0
    MAE_1000G_best = 10.0
    MAE_GTEx_old = 10.0

    outlog = open(base_name + '.log', 'w')
    log_str = '\t'.join(map(str, ['epoch', 'MAE_va', 'MAE_va_change', 'MAE_te', 'MAE_te_change', 
                              'MAE_1000G', 'MAE_1000G_change', 'MAE_GTEx', 'MAE_GTEx_change',
                              'MAE_tr', 'MAE_tr_change', 'learing_rate', 'time(sec)']))
    print log_str
    outlog.write(log_str + '\n')
    sys.stdout.flush()

    for epoch in range(0, n_epoch):
        t_old = time.time()
        train.algorithm.train(train.dataset)
        
        Y_va_hat = f(X_va.astype('float32')).astype('float64')
        Y_te_hat = f(X_te.astype('float32')).astype('float64')
        Y_tr_hat_monitor = f(X_tr_monitor.astype('float32')).astype('float64')
        Y_1000G_hat = f(X_1000G.astype('float32')).astype('float64')
        Y_GTEx_hat = f(X_GTEx.astype('float32')).astype('float64')

        MAE_va = np.abs(Y_va_target - Y_va_hat).mean()
        MAE_te = np.abs(Y_te_target - Y_te_hat).mean()
        MAE_tr = np.abs(Y_tr_monitor_target - Y_tr_hat_monitor).mean()
        MAE_1000G = np.abs(Y_1000G_target - Y_1000G_hat).mean()
        MAE_GTEx = np.abs(Y_GTEx_target - Y_GTEx_hat).mean()
        
        MAE_va_change = (MAE_va - MAE_va_old)/MAE_va_old
        MAE_te_change = (MAE_te - MAE_te_old)/MAE_te_old
        MAE_tr_change = (MAE_tr - MAE_tr_old)/MAE_tr_old
        MAE_1000G_change = (MAE_1000G - MAE_1000G_old)/MAE_1000G_old
        MAE_GTEx_change = (MAE_GTEx - MAE_GTEx_old)/MAE_GTEx_old

        
        MAE_va_old = MAE_va
        MAE_te_old = MAE_te
        MAE_tr_old = MAE_tr
        MAE_1000G_old = MAE_1000G
        MAE_GTEx_old = MAE_GTEx

        
        t_new = time.time()
        l_rate = train.algorithm.learning_rate.get_value()
        log_str = '\t'.join(map(str, [epoch+1, '%.6f'%MAE_va, '%.6f'%MAE_va_change, '%.6f'%MAE_te, '%.6f'%MAE_te_change,
                                  '%.6f'%MAE_1000G, '%.6f'%MAE_1000G_change, '%.6f'%MAE_GTEx, '%.6f'%MAE_GTEx_change,
                                  '%.6f'%MAE_tr, '%.6f'%MAE_tr_change, '%.5f'%l_rate, int(t_new-t_old)]))
        print log_str
        outlog.write(log_str + '\n')
        sys.stdout.flush()
        
        if MAE_tr_change > 0:
            l_rate = l_rate*decay_factor
        if l_rate < l_rate_min:
            l_rate = l_rate_min

        train.algorithm.learning_rate.set_value(np.float32(l_rate))

        if MAE_va < MAE_va_best:
            MAE_va_best = MAE_va
            outmodel = open(base_name + '_bestva_model.pkl', 'wb')
            pkl.dump(model, outmodel)
            outmodel.close()    
            np.save(base_name + '_bestva_Y_te_hat.npy', Y_te_hat)
            np.save(base_name + '_bestva_Y_va_hat.npy', Y_va_hat)
        
        if MAE_1000G < MAE_1000G_best:
            MAE_1000G_best = MAE_1000G
            outmodel = open(base_name + '_best1000G_model.pkl', 'wb')
            pkl.dump(model, outmodel)
            outmodel.close()    
            np.save(base_name + '_best1000G_Y_1000G_hat.npy', Y_1000G_hat)
            np.save(base_name + '_best1000G_Y_GTEx_hat.npy', Y_GTEx_hat)

    print 'MAE_va_best : %.6f' % (MAE_va_best)
    print 'MAE_1000G_best : %.6f' % (MAE_1000G_best)
    outlog.write('MAE_va_best : %.6f' % (MAE_va_best) + '\n')
    outlog.write('MAE_1000G_best : %.6f' % (MAE_1000G_best) + '\n')
    outlog.close()
示例#6
0
def main():
    base_name = sys.argv[
        1]  # 获取第一个参数   sys.argv[ ]记录(获取)命令行参数  sys(system)  argv(argument variable)参数变量,该变量为list列表
    n_epoch = int(sys.argv[2])  #获取第二个参数
    n_hidden = int(sys.argv[3])  #获取第三个参数作为隐层神经元个数
    include_rate = float(sys.argv[4])

    in_size = 1001  #输入层神经元个数(标记基因个数)
    out_size = 1  #输出层神经元个数
    b_size = 200  #偏差值
    l_rate = 5e-4  #学习速率
    l_rate_min = 1e-5  #学习速率最小值
    decay_factor = 0.9  #衰减因数
    lr_scale = 3.0
    momentum = 0.5
    init_vals = np.sqrt(6.0 / (np.array([in_size, n_hidden]) +
                               np.array([n_hidden, out_size])))  #初始值,返回平方根

    print 'loading data...'  #显示载入数据

    X_tr = np.load(
        'geno_X_tr_float64.npy')  # tr(traing)以numpy专用二进制类型保存训练数据集的数据
    Y_tr = np.load('pheno_Y_tr_0-4760_float64.npy')
    Y_tr_pheno = np.array(Y_tr)
    X_va = np.load(
        'geno_X_va_float64.npy')  #验证集(模型选择,在学习到不同复杂度的模型中,选择对验证集有最小预测误差的模型)
    Y_va = np.load('pheno_Y_va_0-4760_float64.npy')
    Y_va_target = np.array(Y_va)
    X_te = np.load('geno_te_float64.npy')  #测试集(对学习方法的评估)
    Y_te = np.load('pheno_Y_te_0-4760_float64.npy')
    Y_te_target = np.array(Y_te)

    random.seed(0)  #设置生成随机数用的整数起始值。调用任何其他random模块函数之前调用这个函数
    monitor_idx_tr = random.sample(range(88807), 5000)  #监测训练
    #将训练数据集类型设为32位浮点型,The DenseDesignMatrix class and related code Functionality for representing data that can be described as a dense matrix (rather than a sparse matrix) with each row containing an example and each column corresponding to a different feature.
    data_tr = p2_dt_dd.DenseDesignMatrix(X=X_tr.astype('float32'),
                                         y=Y_tr.astype('float32'))
    X_tr_monitor, Y_tr_monitor_target = X_tr[monitor_idx_tr, :], Y_tr_target[
        monitor_idx_tr, :]
    #一个隐层,用Tanh()作激活函数; 输出层用线性函数作激活函数
    h1_layer = p2_md_mlp.Tanh(layer_name='h1',
                              dim=n_hidden,
                              irange=init_vals[0],
                              W_lr_scale=1.0,
                              b_lr_scale=1.0)
    o_layer = p2_md_mlp.Linear(layer_name='y',
                               dim=out_size,
                               irange=0.0001,
                               W_lr_scale=lr_scale,
                               b_lr_scale=1.0)
    #Multilayer Perceptron;nvis(Number of “visible units” input units)  layers(a list of layer objects,最后1层指定MLP的输出空间)
    model = p2_md_mlp.MLP(nvis=in_size, layers=[h1_layer, o_layer], seed=1)
    dropout_cost = p2_ct_mlp_dropout.Dropout(input_include_probs={
        'h1': 1.0,
        'y': include_rate
    },
                                             input_scales={
                                                 'h1':
                                                 1.0,
                                                 'y':
                                                 np.float32(1.0 / include_rate)
                                             })
    #随机梯度下降法
    algorithm = p2_alg_sgd.SGD(
        batch_size=b_size,
        learning_rate=l_rate,
        learning_rule=p2_alg_lr.Momentum(momentum),
        termination_criterion=p2_termcri.EpochCounter(max_epochs=1000),
        cost=dropout_cost)
    #训练 根据前面的定义 :dataset为一个密集型矩阵,model为MLP多层神经网络,algorithm为SGD
    train = pylearn2.train.Train(dataset=data_tr,
                                 model=model,
                                 algorithm=algorithm)
    train.setup()

    x = T.matrix()  #定义为一个二维数组
    #fprop(state_below) does the forward prop transformation
    y = model.fprop(x)
    f = theano.function([x], y)  #定义一个function函数,输入为x,输出为y

    MAE_va_old = 10.0  #平均绝对误差
    MAE_va_best = 10.0
    MAE_tr_old = 10.0  #训练误差
    MAE_te_old = 10.0
    MAE_1000G_old = 10.0
    MAE_1000G_best = 10.0
    MAE_GTEx_old = 10.0
    #base_name = sys.argv[1]      # 获取第一个参数   sys.argv[ ]记录(获取)命令行参数
    outlog = open(base_name + '.log', 'w')
    log_str = '\t'.join(
        map(str, [
            'epoch', 'MAE_va', 'MAE_va_change', 'MAE_te', 'MAE_te_change',
            'MAE_tr', 'MAE_tr_change', 'learing_rate', 'time(sec)'
        ]))
    print log_str  #输出运行日志
    outlog.write(log_str + '\n')
    #Python的标准输出缓冲(这意味着它收集“写入”标准出来之前,将其写入到终端的数据)。调用sys.stdout.flush()强制其“缓冲
    sys.stdout.flush()

    for epoch in range(0, n_epoch):
        t_old = time.time()
        train.algorithm.train(train.dataset)

        Y_va_hat = f(X_va.astype('float32')).astype('float64')
        Y_te_hat = f(X_te.astype('float32')).astype('float64')
        Y_tr_hat_monitor = f(X_tr_monitor.astype('float32')).astype('float64')

        #计算平均绝对误差
        MAE_va = np.abs(Y_va_target - Y_va_hat).mean()
        MAE_te = np.abs(Y_te_target - Y_te_hat).mean()
        MAE_tr = np.abs(Y_tr_monitor_target - Y_tr_hat_monitor).mean()

        #误差变换率
        MAE_va_change = (MAE_va - MAE_va_old) / MAE_va_old
        MAE_te_change = (MAE_te - MAE_te_old) / MAE_te_old
        MAE_tr_change = (MAE_tr - MAE_tr_old) / MAE_tr_old

        #将old误差值更新为当前误差值
        MAE_va_old = MAE_va
        MAE_te_old = MAE_te
        MAE_tr_old = MAE_tr

        #返回当前的时间戳(1970纪元后经过的浮点秒数)
        t_new = time.time()
        l_rate = train.algorithm.learning_rate.get_value()
        log_str = '\t'.join(
            map(str, [
                epoch + 1,
                '%.6f' % MAE_va,
                '%.6f' % MAE_va_change,
                '%.6f' % MAE_te,
                '%.6f' % MAE_te_change,
                '%.6f' % MAE_tr,
                '%.6f' % MAE_tr_change,
                '%.5f' % l_rate,
                int(t_new - t_old)
            ]))
        print log_str
        outlog.write(log_str + '\n')
        sys.stdout.flush()

        if MAE_tr_change > 0:  #训练误差变换率大于0时,学习速率乘上一个衰减因子
            l_rate = l_rate * decay_factor
        if l_rate < l_rate_min:  #学习速率小于最小速率时,更新为最小速率
            l_rate = l_rate_min

        train.algorithm.learning_rate.set_value(np.float32(l_rate))

        if MAE_va < MAE_va_best:
            MAE_va_best = MAE_va
            outmodel = open(base_name + '_bestva_model.pkl', 'wb')
            pkl.dump(model, outmodel)
            outmodel.close()
            np.save(base_name + '_bestva_Y_te_hat.npy', Y_te_hat)
            np.save(base_name + '_bestva_Y_va_hat.npy', Y_va_hat)

    print 'MAE_va_best : %.6f' % (MAE_va_best)
    outlog.write('MAE_va_best : %.6f' % (MAE_va_best) + '\n')
    outlog.close()
示例#7
0
def main():
    training_data, validation_data, test_data, std_scale = load_training_data()
    kaggle_test_features = load_test_data(std_scale)

    ###############
    # pylearn2 ML
    hl1 = mlp.Sigmoid(layer_name='hl1', dim=200, irange=.1, init_bias=1.)
    hl2 = mlp.Sigmoid(layer_name='hl2', dim=100, irange=.1, init_bias=1.)

    # create Softmax output layer
    output_layer = mlp.Softmax(9, 'output', irange=.1)
    # create Stochastic Gradient Descent trainer that runs for 400 epochs
    trainer = sgd.SGD(learning_rate=.05,
                      batch_size=300,
                      learning_rule=learning_rule.Momentum(.5),
                      termination_criterion=MonitorBased(
                          channel_name='valid_objective',
                          prop_decrease=0.,
                          N=10),
                      monitoring_dataset={
                          'valid': validation_data,
                          'train': training_data
                      })

    layers = [hl1, hl2, output_layer]
    # create neural net
    model = mlp.MLP(layers, nvis=93)

    watcher = best_params.MonitorBasedSaveBest(
        channel_name='valid_objective',
        save_path='pylearn2_results/pylearn2_test.pkl')

    velocity = learning_rule.MomentumAdjustor(final_momentum=.6,
                                              start=1,
                                              saturate=250)
    decay = sgd.LinearDecayOverEpoch(start=1, saturate=250, decay_factor=.01)
    ######################

    experiment = Train(dataset=training_data,
                       model=model,
                       algorithm=trainer,
                       extensions=[watcher, velocity, decay])

    experiment.main_loop()

    #load best model and test
    ################
    model = serial.load('pylearn2_results/pylearn2_test.pkl')
    # get an prediction of the accuracy from the test_data
    test_results = model.fprop(theano.shared(test_data[0],
                                             name='test_data')).eval()

    print test_results.shape
    loss = multiclass_log_loss(test_data[1], test_results)

    print 'Test multiclass log loss:', loss

    out_file = 'pylearn2_results/' + str(loss) + 'ann'
    #exp.save(out_file + '.pkl')

    #save the kaggle results

    results = model.fprop(
        theano.shared(kaggle_test_features, name='kaggle_test_data')).eval()
    save_results(out_file + '.csv', kaggle_test_features, results)
示例#8
0
output = mlp.Softmax(n_classes=2, layer_name='y', irange=.01)

#output = mlp.HingeLoss(layer_name='y',n_classes=2,irange=.05)

#layers = [l5, l6, output]
layers = [l1, l2, l3, l4, l5, output]

ann = mlp.MLP(layers, nvis=X[0].reshape(-1).shape[0])

lr = 0.1
epochs = 400
trainer = sgd.SGD(
    learning_rate=lr,
    batch_size=100,
    learning_rule=learning_rule.Momentum(.05),
    # Remember, default dropout is .5
    #cost=Dropout(input_include_probs={'l1': .5},
    #             input_scales={'l1': 1.}),
    termination_criterion=EpochCounter(epochs),
    monitoring_dataset={
        'train': ds,
        'valid': ds_test
    })

watcher = best_params.MonitorBasedSaveBest(channel_name='valid_roc_auc',
                                           save_path='saved_clf.pkl')

velocity = learning_rule.MomentumAdjustor(final_momentum=.9,
                                          start=1,
                                          saturate=250)
示例#9
0
    save_path = 'valid_best_fold%d.pkl' % fold
    print save_path
    
    images_train = images[train_index]
    y_train = y[train_index]
    images_train, y_train = shuffle(images_train, y_train, random_state=7)
    X_train = DenseDesignMatrix(X=images_train, y=y_train,view_converter=view_converter)
    
    images_test = images[test_index]
    y_test = y[test_index]
    X_test = DenseDesignMatrix(X=images_test, y=y_test,view_converter=view_converter)
            
    if retrain:
        print "training on", X_train.X.shape, 'testing on', X_test.X.shape
        trainer = sgd.SGD(learning_rate=learn_rate, batch_size=batch_size,
                          learning_rule=learning_rule.Momentum(momentum_start),
                          cost=Dropout(
                                       input_include_probs={'l1':1., 'l2':1., 'l3':1., 'l4':1., 'l5':1., 'l6':1.},
                                       input_scales={'l1':1., 'l2':1., 'l3':1., 'l4':1., 'l5':1., 'l6':1.}
                                       ),
                          termination_criterion=EpochCounter(max_epochs=max_epochs),
                          monitoring_dataset={'train':X_train, 'valid':X_test},
                          )
        
        
        input_space = Conv2DSpace(shape=(central_window_shape, central_window_shape),
                    axes = axes,
                    num_channels = 1)
                    
        ann = mlp.MLP(layers, input_space=input_space)
示例#10
0
    random.seed(0)   #设置生成随机数用的整数起始值。调用任何其他random模块函数之前调用这个函数
    monitor_idx_tr = random.sample(range(88807), 5000)   #监测训练
    #将训练数据集类型设为32位浮点型,The DenseDesignMatrix class and related code Functionality for representing data that can be described as a dense matrix (rather than a sparse matrix) with each row containing an example and each column corresponding to a different feature.
    data_tr = p2_dt_dd.DenseDesignMatrix(X=X_tr.astype('float32'), y=Y_tr.astype('float32'))
    X_tr_monitor, Y_tr_monitor_target = X_tr[monitor_idx_tr, :], Y_tr_target[monitor_idx_tr, :]
    #一个隐层,用Tanh()作激活函数; 输出层用线性函数作激活函数
    h1_layer = p2_md_mlp.Tanh(layer_name='h1', dim=n_hidden, irange=init_vals[0], W_lr_scale=1.0, b_lr_scale=1.0) 
    o_layer = p2_md_mlp.Linear(layer_name='y', dim=out_size, irange=0.0001, W_lr_scale=lr_scale, b_lr_scale=1.0)
    #Multilayer Perceptron;nvis(Number of “visible units” input units)  layers(a list of layer objects,最后1层指定MLP的输出空间) 
    model = p2_md_mlp.MLP(nvis=in_size, layers=[h1_layer, o_layer], seed=1)
    dropout_cost = p2_ct_mlp_dropout.Dropout(input_include_probs={'h1':1.0, 'y':include_rate}, 
                                             input_scales={'h1':1.0, 
                                                           'y':np.float32(1.0/include_rate)})
    #随机梯度下降法
    algorithm = p2_alg_sgd.SGD(batch_size=b_size, learning_rate=l_rate, 
                               learning_rule = p2_alg_lr.Momentum(momentum),
                               termination_criterion=p2_termcri.EpochCounter(max_epochs=1000),
                               cost=dropout_cost)
    #训练 根据前面的定义 :dataset为一个密集型矩阵,model为MLP多层神经网络,algorithm为SGD
    train = pylearn2.train.Train(dataset=data_tr, model=model, algorithm=algorithm)
    train.setup()

    x = T.matrix()             #定义为一个二维数组
    #fprop(state_below) does the forward prop transformation
    y = model.fprop(x)  
    f = theano.function([x], y)  #定义一个function函数,输入为x,输出为y

    MAE_va_old = 10.0      #平均绝对误差
    MAE_va_best = 10.0
    MAE_tr_old = 10.0      #训练误差
    MAE_te_old = 10.0
def main( x ):

	l1_dim = x[0]
	l2_dim = x[1]
	learning_rate = x[2]
	momentum = x[3]
	l1_dropout = x[4]
	decay_factor = x[5]
	
	min_lr = 1e-7

	#

	train = np.loadtxt( train_file, delimiter = ',' )
	x_train = train[:,0:-1]
	y_train = train[:,-1]
	y_train.shape = ( y_train.shape[0], 1 )

	# 

	validation = np.loadtxt( validation_file, delimiter = ',' )
	x_valid = validation[:,0:-1]
	y_valid = validation[:,-1]
	y_valid.shape = ( y_valid.shape[0], 1 )

	#

	#input_space = VectorSpace( dim = x.shape[1] )
	full = DenseDesignMatrix( X = x_train, y = y_train )
	valid = DenseDesignMatrix( X = x_valid, y = y_valid )

	l1 = mlp.RectifiedLinear( 
		layer_name='l1',
		irange=.001,
		dim = l1_dim,
		# "Rather than using weight decay, we constrain the norms of the weight vectors"
		max_col_norm=1.
	)

	l2 = mlp.RectifiedLinear(
		layer_name='l2',
		irange=.001,
		dim = l2_dim,
		max_col_norm=1.
	)

	output = mlp.Linear( dim = 1, layer_name='y', irange=.0001 )

	layers = [l1, l2, output]
	nvis = x_train.shape[1]

	mdl = mlp.MLP( layers, nvis = nvis )	# input_space = input_space

	#lr = .001
	#epochs = 100
	
	decay = sgd.ExponentialDecay( decay_factor = decay_factor, min_lr = min_lr )

	trainer = sgd.SGD(
		learning_rate = learning_rate,
		batch_size=128,
		learning_rule=learning_rule.Momentum( momentum ),
		
		update_callbacks = [ decay ],

		# Remember, default dropout is .5
		cost = Dropout( input_include_probs = {'l1': l1_dropout},
				   input_scales={'l1': 1.}),

		#termination_criterion = EpochCounter(epochs),
		termination_criterion = MonitorBased(
			channel_name = "valid_objective",
			prop_decrease = 0.001,				# 0.1% of objective
			N = 10	
		),

		# valid_objective is MSE

		monitoring_dataset = { 'train': full, 'valid': valid }
	)

	watcher = best_params.MonitorBasedSaveBest( channel_name = 'valid_objective', save_path = output_model_file )
	
	experiment = Train( dataset = full, model = mdl, algorithm = trainer, extensions = [ watcher ] )
	experiment.main_loop()

	###

	error = get_error_from_model( output_model_file )
	print "*** error: {} ***".format( error )
	return error