Пример #1
0
    def loop(weights, training, score, gradient, learningrate, reducelr,
             gdfunction, maxratio):
        '''A gradient descent loop for either batch or stochastic_pass.

        weights      -- initial weight vector
        training     -- list of DataPoint instances to train on
        score        -- function [weights features --> score]
        gradient     -- function [score feature label --> gradient]
        learningrate -- learning rate parameter (lambda)
        reducelr     -- function [number --> number] to reduce learning rate
        gdfunction   -- function with a signature like GradientDescent.batch
        maxraio      -- maximum newerror:olderror ratio before stopping

        '''
        INDENT = '  '
        print INDENT * 2 + 'Initial learning rate:', learningrate

        # initialize the error
        error = stats.rmse(
            [score(weights, dp.features) for dp in training],
            [dp.label for dp in training])
        print INDENT * 2 + 'Initial Training RMSE:', error

        # loop
        while True:

            # calculate new weights & error
            newweights = gdfunction(weights, training, score, gradient,
                                    learningrate)
            try:
                newerror = stats.rmse(
                    [score(newweights, dp.features) for dp in training],
                    [dp.label for dp in training])
            except OverflowError:
                newerror = error + 1
                print INDENT * 3 + 'Training RMSE: Overflow'
            else:
                if newerror <= error:
                    print INDENT * 2 + 'Training RMSE: v', newerror
                else:
                    print INDENT * 3 + 'Training RMSE: ^', newerror

            # figure out what to do next
            if newerror <= error:
                ratio = newerror / error
                # error went down; accept error and weights
                error = newerror
                weights = newweights
                # do we stop?
                if ratio > maxratio:
                    print INDENT * 2 + 'Finished learning; error ratio:',
                    print ratio, '>', maxratio
                    break
            else:
                # error went up; retry with a smaller lambda
                learningrate = reducelr(learningrate)
                print INDENT * 3 + 'Retrying with learning rate:', learningrate

        # done
        return weights
Пример #2
0
def comparing_stat_patch_list(pred, y):
    patch1 = mpatches.Patch(color='red',
                            label='mean:' + ('%03.6f' % np.mean(pred)))
    patch2 = mpatches.Patch(color='red',
                            label='std:' + ('%03.6f' % np.std(pred)))
    patch3 = mpatches.Patch(color='red',
                            label='skewness:' +
                            ('%03.3f' % stats.skewness(pred)))
    patch4 = mpatches.Patch(color='red',
                            label='kurtosis:' +
                            ('%03.3f' % stats.kurtosis(pred)))
    patch5 = mpatches.Patch(color='blue',
                            label='mean:' + ('%03.6f' % np.mean(y)))
    patch6 = mpatches.Patch(color='blue',
                            label='std:' + ('%03.6f' % np.std(y)))
    patch7 = mpatches.Patch(color='blue',
                            label='skewness:' + ('%03.3f' % stats.skewness(y)))
    patch8 = mpatches.Patch(color='blue',
                            label='kurtosis:' + ('%03.3f' % stats.kurtosis(y)))
    #patch9 = mpatches.Patch(color='black', label= 'MAPE:'+ ('%03.3f' % stats.mape(pred, y)))
    patch10 = mpatches.Patch(color='black',
                             label='RMSE:' + ('%03.6f' % stats.rmse(pred, y)))
    #plt.text(.25,.5,str(np.mean(pred)))
    return [
        patch5, patch6, patch7, patch8, patch1, patch2, patch3, patch4, patch10
    ]
Пример #3
0
def main2(gdname, gdfunction, training, regression, learningrate):
    '''Perform gradient descent with a learner and analyze the results.'''

    # learning rate
    try:
        initiallr, reducelr = learningrate
        suffix = 'dynamic' + str(initiallr)
    except TypeError:
        initiallr = learningrate
        reducelr = lambda x: x
        suffix = initiallr

    # learn
    weights = GradientDescent.loop(
        len(training[0].features) * [0.0],
        training,
        regression.model,
        regression.gradient,
        initiallr,
        reducelr,
        gdfunction,
        0.99)

    # test
    terror = stats.rmse(
        [regression.model(weights, dp.features) for dp in testing],
        [dp.label for dp in testing])
    print INDENT * 2 + 'Testing RMSE:', terror

    ## produce a result set
    results = [resultset.DataResult(dp.label,
                                    regression.model(weights, dp.features)) \
               for dp in testing]

##    ## find a good operating point
##    op = resultset.minerrop(results)
##    print INDENT * 2 + 'Operating Point:', op

##    ## assign predictions
##    results = resultset.applyop(op, results)

    ## output roc data
    roc = resultset.rocdata(results)
    auc = resultset.auc(roc)
    with open('{}-{}_lambda={}_auc={}'.format(regression.__name__, gdname,
                                       suffix, auc).lower(),
              mode='wb') as fd:
        for fpr, tpr in roc:
            fd.write('{}, {}\n'.format(fpr, tpr))
Пример #4
0
        # prediction
        confidence, prediction = sorted(zip(classifierResult[1],
                                            classifierResult["actualValues"]),
                                        reverse=True)[0]

    #print json.dumps(REZ)

    # hamming distances
#  print("---")
#  keys = list(hamming.keys())
#  keys.sort()
#  bits = hamming[keys[100]]
#  prev = None
#  for i in range(len(keys)):
#    if prev is not None:
#      print "%s:\tprev=%s\tvalue=%s" % (i,
#         numpy.count_nonzero(prev != hamming[keys[i]]),
#         numpy.count_nonzero(bits != hamming[keys[i]]))
#    prev = hamming[keys[i]]

# error stats
    errorNll = nll(consumptions, predictions)
    errorMae = mae(consumptions, predictions)
    errorMape = mape(consumptions, predictions)
    errorRmse = rmse(consumptions, predictions)
    print("---")
    print("mape: %s" % errorMape)
    print("mae: %s" % errorMae)
    print("rmse: %s" % errorRmse)
    print("nll_1000: %s" % errorNll)
Пример #5
0
def alphasense_compute(dataFrame, t_incl=False, h_incl=False):

    # lambdas for columns
    col_skip = 3
    if t_incl and h_incl:
        col_temp = (lambda i: (col_skip + 6 * i))
        col_hum = (lambda i: (col_skip + 6 * i + 1))
        col_no2 = (lambda i: range((col_skip + 6 * i + 2),
                                   (col_skip + 6 * i + 4)))
        col_ox = (lambda i: range((col_skip + 6 * i + 4),
                                  (col_skip + 6 * i + 6)))
    elif h_incl:
        col_hum = (lambda i: (col_skip + 5 * i))
        col_no2 = (lambda i: range((col_skip + 5 * i + 1),
                                   (col_skip + 5 * i + 3)))
        col_ox = (lambda i: range((col_skip + 5 * i + 3),
                                  (col_skip + 5 * i + 5)))
    elif t_incl:
        col_temp = (lambda i: (col_skip + 5 * i))
        col_no2 = (lambda i: range((col_skip + 5 * i + 1),
                                   (col_skip + 5 * i + 3)))
        col_ox = (lambda i: range((col_skip + 5 * i + 3),
                                  (col_skip + 5 * i + 5)))
    else:
        col_no2 = (lambda i: range((col_skip + 4 * i), (col_skip + 4 * i + 2)))
        col_ox = (lambda i: range((col_skip + 4 * i + 2),
                                  (col_skip + 4 * i + 4)))

    #dataFrame = dataFrame.values
    err_no2 = np.zeros([len(NO2_WE_0T), 4, 5])
    err_o3 = np.zeros([len(NO2_WE_0T), 4, 5])

    # iterate over sensors
    for i in xrange(np.size(dataFrame, 1)):
        if col_ox(i)[-1] >= np.size(dataFrame, 1):
            break

        nx = dataFrame[:, col_no2(i)]
        ox = dataFrame[:, col_ox(i)]

        # formula 1
        pred_no2 = formula1(nx[:, 0], nx[:, 1], NO2_WE_0E[i], NO2_AE_0E[i],
                            NTn, SENSITIVITY_NO2[i])
        err_no2[i, 0, 0] = stats.mae(dataFrame[:, 1], pred_no2)
        err_no2[i, 0, 1] = stats.rmse(dataFrame[:, 1], pred_no2)
        err_no2[i, 0, 2] = stats.mape(dataFrame[:, 1], pred_no2)
        err_no2[i, 0, 3] = stats.coeff_deter(dataFrame[:, 1], pred_no2)
        err_no2[i, 0, 4] = stats.pearson(dataFrame[:, 1], pred_no2)

        #print np.mean(pred_no2)
        #print np.std(pred_no2)

        pred = formula1(ox[:, 0], ox[:, 1], O3_WE_0E[i], O3_AE_0E[i], NTo,
                        SENSITIVITY_O3[i])
        err_o3[i, 0, 0] = stats.mae(dataFrame[:, 2], pred - pred_no2)
        err_o3[i, 0, 1] = stats.rmse(dataFrame[:, 2], pred - pred_no2)
        err_o3[i, 0, 2] = stats.mape(dataFrame[:, 2], pred - pred_no2)
        err_o3[i, 0, 3] = stats.coeff_deter(dataFrame[:, 2], pred - pred_no2)
        err_o3[i, 0, 4] = stats.pearson(dataFrame[:, 2], pred - pred_no2)

        #print np.mean(pred)
        #print np.std(pred)

        # formula 2
        pred_no2 = formula2(nx[:, 0], nx[:, 1], NO2_WE_0E[i], NO2_AE_0E[i],
                            NO2_WE_0T[i], NO2_AE_0T[i], KTn,
                            SENSITIVITY_NO2[i])
        err_no2[i, 1, 0] = stats.mae(dataFrame[:, 1], pred)
        err_no2[i, 1, 1] = stats.rmse(dataFrame[:, 1], pred)
        err_no2[i, 1, 2] = stats.mape(dataFrame[:, 1], pred)
        err_no2[i, 1, 3] = stats.coeff_deter(dataFrame[:, 1], pred)
        err_no2[i, 1, 4] = stats.pearson(dataFrame[:, 1], pred)

        #print np.mean(pred_no2)
        #print np.std(pred_no2)

        pred = formula2(ox[:, 0], ox[:, 1], O3_WE_0E[i], O3_AE_0E[i],
                        O3_WE_0T[i], O3_AE_0T[i], KTo, SENSITIVITY_O3[i])
        err_o3[i, 1, 0] = stats.mae(dataFrame[:, 2], pred - pred_no2)
        err_o3[i, 1, 1] = stats.rmse(dataFrame[:, 2], pred - pred_no2)
        err_o3[i, 1, 2] = stats.mape(dataFrame[:, 2], pred - pred_no2)
        err_o3[i, 1, 3] = stats.coeff_deter(dataFrame[:, 2], pred - pred_no2)
        err_o3[i, 1, 4] = stats.pearson(dataFrame[:, 2], pred - pred_no2)

        #print np.mean(pred)
        #print np.std(pred)

        # formula 3
        pred_no2 = formula3(nx[:, 0], nx[:, 1], NO2_WE_0E[i], NO2_AE_0E[i],
                            NO2_WE_0T[i], NO2_AE_0T[i], K_Tn,
                            SENSITIVITY_NO2[i])
        err_no2[i, 2, 0] = stats.mae(dataFrame[:, 1], pred_no2)
        err_no2[i, 2, 1] = stats.rmse(dataFrame[:, 1], pred_no2)
        err_no2[i, 2, 2] = stats.mape(dataFrame[:, 1], pred_no2)
        err_no2[i, 2, 3] = stats.coeff_deter(dataFrame[:, 1], pred_no2)
        err_no2[i, 2, 4] = stats.pearson(dataFrame[:, 1], pred_no2)

        #print np.mean(pred_no2)
        #print np.std(pred_no2)

        pred = formula3(ox[:, 0], ox[:, 1], O3_WE_0E[i], O3_AE_0E[i],
                        O3_WE_0T[i], O3_AE_0T[i], K_To, SENSITIVITY_O3[i])
        err_o3[i, 2, 0] = stats.mae(dataFrame[:, 2], pred - pred_no2)
        err_o3[i, 2, 1] = stats.rmse(dataFrame[:, 2], pred - pred_no2)
        err_o3[i, 2, 2] = stats.mape(dataFrame[:, 2], pred - pred_no2)
        err_o3[i, 2, 3] = stats.coeff_deter(dataFrame[:, 2], pred - pred_no2)
        err_o3[i, 2, 4] = stats.pearson(dataFrame[:, 2], pred - pred_no2)

        #print np.mean(pred)
        #print np.std(pred)

        # formula 4
        pred_no2 = formula4(nx[:, 0], nx[:, 1], NO2_WE_0E[i], NO2_AE_0E[i],
                            NO2_WE_0T[i], NO2_AE_0T[i], K__Tn,
                            SENSITIVITY_NO2[i])
        err_no2[i, 3, 0] = stats.mae(dataFrame[:, 1], pred_no2)
        err_no2[i, 3, 1] = stats.rmse(dataFrame[:, 1], pred_no2)
        err_no2[i, 3, 2] = stats.mape(dataFrame[:, 1], pred_no2)
        err_no2[i, 3, 3] = stats.coeff_deter(dataFrame[:, 1], pred_no2)
        err_no2[i, 3, 4] = stats.pearson(dataFrame[:, 1], pred_no2)

        #print np.mean(pred_no2)
        #print np.std(pred_no2)

        pred = formula4(ox[:, 0], ox[:, 1], O3_WE_0E[i], O3_AE_0E[i],
                        O3_WE_0T[i], O3_AE_0T[i], K__To, SENSITIVITY_O3[i])
        err_o3[i, 3, 0] = stats.mae(dataFrame[:, 2], pred - pred_no2)
        err_o3[i, 3, 1] = stats.rmse(dataFrame[:, 2], pred - pred_no2)
        err_o3[i, 3, 2] = stats.mape(dataFrame[:, 2], pred - pred_no2)
        err_o3[i, 3, 3] = stats.coeff_deter(dataFrame[:, 2], pred - pred_no2)
        err_o3[i, 3, 4] = stats.pearson(dataFrame[:, 2], pred - pred_no2)

        #print np.mean(pred)
        #print np.std(pred)

    #np.savetxt("alpha-no2-err1.csv", err_no2[0].T, fmt='%0.4g', delimiter=',')
    #np.savetxt("alpha-no2-err2.csv", err_no2[1].T, fmt='%0.4g', delimiter=',')
    #np.savetxt("alpha-o3-err1.csv", err_o3[0].T, fmt='%0.4g', delimiter=',')
    #np.savetxt("alpha-o3-err2.csv", err_o3[1].T, fmt='%0.4g', delimiter=',')
    #print err_no2
    #print err_o3
    return err_no2, err_o3