def convert_to_pylearn_ds(train, valid, test):
    train_X, train_y = map(np.array, zip(*train))
    valid_X, valid_y = map(np.array, zip(*valid))
    test_X, test_y = map(np.array, zip(*test))

    # convert to pylearn_dataset
    return ds.DenseDesignMatrix(X=train_X,y=train_y),\
        ds.DenseDesignMatrix(X=valid_X,y=valid_y),\
        ds.DenseDesignMatrix(X=test_X,y=test_y)
def create_dataset(schema, tables, ids, n_classes, which=None):
    all_instances = psda.generate_instances_for_appliances_by_dataids(
        schema, tables, ['use', 'air1', 'furnace1'], ids, sample_rate='15T')

    X_arrays = []
    y_arrays = []
    sorted_classes = np.linspace(0, 1, n_classes + 1)[:-1]
    for instances, dataid in zip(all_instances, ids):
        use = instances[0].traces[0]
        use.series.fillna(0, inplace=True)
        use.series = use.series.astype(float).clip(0.0000001)
        air1 = instances[1].traces[0]
        furnace1 = instances[2].traces[0]
        total_air = da.utils.aggregate_traces([air1, furnace1], {})
        total_air.series.fillna(0, inplace=True)
        total_air.series = total_air.series.astype(float)
        ratio_series = total_air.series / use.series
        ratios = da.appliance.ApplianceTrace(ratio_series, {})
        use_windows = use.get_windows(window_length, window_stride)
        ratio_windows = ratios.get_windows(window_length, window_stride)
        X_arrays.append(use_windows)
        ratio_windows = ratio_windows[:, prediction_index].clip(0, 1)
        classes = np.searchsorted(sorted_classes, ratio_windows,
                                  side='right') - 1
        y_arrays.append(classes_to_onehot(classes, n_classes))
    X = np.concatenate(X_arrays, axis=0)
    y = np.concatenate(y_arrays, axis=0)
    dataset = ds.DenseDesignMatrix(X=X, y=y)
    with open(os.path.join(args.data_dir, args.prefix + '_' + which + '.pkl'),
              'w') as f:
        pickle.dump(dataset, f)
    with open("../../data/pylearn2/valid_car_{:02d}.pkl".format(i), 'r') as f:
        valid_sets.append(pickle.load(f))

    with open("../../data/pylearn2/test_car_{:02d}.pkl".format(i), 'r') as f:
        test_sets.append(pickle.load(f))

train_X = np.concatenate([train_set.X for train_set in train_sets], axis=0)
valid_X = np.concatenate([valid_set.X for valid_set in valid_sets], axis=0)
test_X = np.concatenate([test_set.X for test_set in test_sets], axis=0)
train_y = np.concatenate([train_set.y for train_set in train_sets], axis=0)
valid_y = np.concatenate([valid_set.y for valid_set in valid_sets], axis=0)
test_y = np.concatenate([test_set.y for test_set in test_sets], axis=0)

print train_X.shape
print valid_X.shape
print test_X.shape

train_set = ds.DenseDesignMatrix(X=train_X, y=train_y)
valid_set = ds.DenseDesignMatrix(X=valid_X, y=valid_y)
test_set = ds.DenseDesignMatrix(X=test_X, y=test_y)

with open("../../data/pylearn2/train_car_all.pkl", 'w') as f:
    pickle.dump(train_set, f)

with open("../../data/pylearn2/valid_car_all.pkl", 'w') as f:
    pickle.dump(valid_set, f)

with open("../../data/pylearn2/test_car_all.pkl", 'w') as f:
    pickle.dump(test_set, f)
    tables = [
        u'validated_01_2014',
        u'validated_02_2014',
        u'validated_03_2014',
        u'validated_04_2014',
        u'validated_05_2014',
    ]

    db_url = "postgresql://*****:*****@db.wiki-energy.org:5432/postgres"
    psda.set_url(db_url)

    window_length = 24 * 4 * 7
    window_stride = 24 * 4
    train, valid, test = psda.get_appliance_detection_arrays(
        schema, tables, args.appliance, window_length, window_stride, 10)
    train_dataset = ds.DenseDesignMatrix(X=train[0], y=train[1])
    valid_dataset = ds.DenseDesignMatrix(X=valid[0], y=valid[1])
    test_dataset = ds.DenseDesignMatrix(X=test[0], y=test[1])

    with open(
            '{data_dir}/{prefix}_train.pkl'.format(data_dir=args.data_dir,
                                                   prefix=args.prefix),
            'w') as f:
        pickle.dump(train_dataset, f)

    with open(
            '{data_dir}/{prefix}_valid.pkl'.format(data_dir=args.data_dir,
                                                   prefix=args.prefix),
            'w') as f:
        pickle.dump(valid_dataset, f)
Exemplo n.º 5
0
def evaluateTeam(eventCodeList, teamNumber):
    print "###   saving example dataset for team #" + str(teamNumber)
    roboDataset = [[[], []], [[], []]]
    reattempts = 10
    brokenRequest = False
    for event in eventCodeList:
        r = []
        print "requesting matches at", event, "for team", teamNumber
        rStr = 'http://www.thebluealliance.com/api/v2/team/frc' + str(
            teamNumber
        ) + '/event/' + event + '/matches?X-TBA-App-Id=frc4534:auto-scouting:2'
        try:
            r = requests.get(rStr)
        except:
            print "first match event request for team", teamNumber, "failed, beginning reattempts", r
            time.sleep(2)
            while r == [] and reattempts > 0:
                try:
                    r = requests.get(rStr)
                except:
                    pass
                time.sleep(2)
                reattempts -= 1
                print reattempts, "more attempts to request matches at", event, "for team", teamNumber
            if r == []:
                brokenRequest = True
        if brokenRequest:
            print "broken request, team", teamNumber, ", event:", event, ". internet disconnected/unreachable?"
        if brokenRequest == False:
            for i in r.json():
                try:
                    stringMatchData = [[], []]
                    numMatchData = [[], []]
                    if "frc" + str(
                            teamNumber) in i['alliances']['blue']['teams']:
                        alliance = 'blue'
                    elif "frc" + str(
                            teamNumber) in i['alliances']['red']['teams']:
                        alliance = 'red'
                    else:
                        alliance = 'team is not in match alliances...'
                    stringMatchData[0].append('E_LowBar')
                    stringMatchData[0].append(
                        i['score_breakdown'][alliance]['position2'])
                    stringMatchData[0].append(
                        i['score_breakdown'][alliance]['position3'])
                    stringMatchData[0].append(
                        i['score_breakdown'][alliance]['position4'])
                    stringMatchData[0].append(
                        i['score_breakdown'][alliance]['position5'])
                    stringMatchData[1].append(
                        i['score_breakdown'][alliance]['position1crossings'])
                    stringMatchData[1].append(
                        i['score_breakdown'][alliance]['position2crossings'])
                    stringMatchData[1].append(
                        i['score_breakdown'][alliance]['position3crossings'])
                    stringMatchData[1].append(
                        i['score_breakdown'][alliance]['position4crossings'])
                    stringMatchData[1].append(
                        i['score_breakdown'][alliance]['position5crossings'])
                    incompleteMatchDataError = False
                    for j in stringMatchData[0]:
                        if j == 'E_LowBar':
                            numMatchData[0].append(0)
                        elif j == 'A_Portcullis':
                            numMatchData[0].append(1)
                        elif j == 'A_ChevalDeFrise':
                            numMatchData[0].append(2)
                        elif j == 'B_Moat':
                            numMatchData[0].append(3)
                        elif j == 'B_Ramparts':
                            numMatchData[0].append(4)
                        elif j == 'C_Drawbridge':
                            numMatchData[0].append(5)
                        elif j == 'C_SallyPort':
                            numMatchData[0].append(6)
                        elif j == 'D_RockWall':
                            numMatchData[0].append(7)
                        elif j == 'D_RoughTerrain':
                            numMatchData[0].append(8)
                        else:
                            incompleteMatchDataError = True
                    for j in stringMatchData[1]:
                        numMatchData[1].append(j)
                    if incompleteMatchDataError == False:
                        for j in numMatchData[0]:
                            roboDataset[0][0].append([j])
                        for j in numMatchData[1]:
                            roboDataset[0][1].append([j])
                        #roboDataset[0][0].append(numMatchData[0])
                        #roboDataset[0][1].append(numMatchData[1])

                    roboDataset[1][1].append([
                        i['score_breakdown'][alliance]['autoBouldersLow'],
                        i['score_breakdown'][alliance]['autoBouldersHigh'],
                        i['score_breakdown'][alliance]['teleopBouldersLow'],
                        i['score_breakdown'][alliance]['teleopBouldersHigh']
                    ])
                    roboDataset[1][0].append([0])
                except:
                    print "exception in event " + event + ", team " + str(
                        teamNumber) + ", match #" + str(i['match_number'])
                    pass

    hidden_layer_1 = mlp.Tanh(layer_name='hidden1',
                              dim=16,
                              irange=.1,
                              init_bias=1.)
    hidden_layer_2 = mlp.Tanh(layer_name='hidden2',
                              dim=8,
                              irange=.1,
                              init_bias=1.)
    output_layer = mlp.Linear(layer_name='output',
                              dim=4,
                              irange=.1,
                              init_bias=1.)
    layers = [hidden_layer_1, hidden_layer_2, output_layer]
    trainer = sgd.SGD(learning_rate=.05,
                      batch_size=10,
                      termination_criterion=EpochCounter(epochsMode))
    ann = mlp.MLP(layers, nvis=1)
    roboDataset[1][0] = numpy.array(roboDataset[1][0])
    roboDataset[1][1] = numpy.array(roboDataset[1][1])
    try:
        ds = datasets.DenseDesignMatrix(X=roboDataset[1][0],
                                        y=roboDataset[1][1])
    except IndexError:
        print "IndexError in dataset creation for team", teamNumber, ",", "length of dataset=", len(
            roboDataset[1])
    ret = [[], []]
    start = time.time()
    if len(
            roboDataset[1][1]
    ) > 4:  ## only here to train for teams with enough matches to get _anywhere_ within reasonably reliable net results
        print "Scoring team", teamNumber, "in goals"
        trainer.setup(ann, ds)
        print "training for <=", epochsMode, "epochs (team", teamNumber, ")"
        while True:
            trainer.train(dataset=ds)
            ann.monitor.report_epoch()
            if not trainer.continue_learning(ann):
                break
        print "network training time:", int(
            time.time() - start), "seconds for team", teamNumber
        inputs = numpy.array([[0]])
        for i in ann.fprop(theano.shared(inputs, name='inputs')).eval()[0]:
            ret[0].append(i)

    hidden_layer_1 = mlp.Tanh(layer_name='hidden1',
                              dim=16,
                              irange=.1,
                              init_bias=1.)
    hidden_layer_2 = mlp.Tanh(layer_name='hidden2',
                              dim=8,
                              irange=.1,
                              init_bias=1.)
    output_layer = mlp.Linear(layer_name='output',
                              dim=1,
                              irange=.1,
                              init_bias=1.)
    layers = [hidden_layer_1, hidden_layer_2, output_layer]
    trainer = sgd.SGD(learning_rate=.05,
                      batch_size=10,
                      termination_criterion=EpochCounter(epochsMode))
    ann = mlp.MLP(layers, nvis=1)
    roboDataset[0][0] = numpy.array(roboDataset[0][0])
    roboDataset[0][1] = numpy.array(roboDataset[0][1])
    try:
        ds = datasets.DenseDesignMatrix(X=roboDataset[0][0],
                                        y=roboDataset[0][1])
    except IndexError:
        print "IndexError in dataset creation for team", teamNumber, ",", "length of dataset=", len(
            roboDataset[0][1])
    start = time.time()
    if len(
            roboDataset[0][1]
    ) > 4:  ## only here to train for teams with enough matches to get _anywhere_ within reasonably reliable net results
        print "Scoring team", teamNumber, "in defenses"
        trainer.setup(ann, ds)
        print "training for <=", epochsMode, "epochs (team", teamNumber, ")"
        while True:
            trainer.train(dataset=ds)
            ann.monitor.report_epoch()
            if not trainer.continue_learning(ann):
                break
        print "network training time:", int(
            time.time() - start), "seconds for team", teamNumber
        # inputs = numpy.array([[0]])
        inputs = [[0], [1], [2], [3], [4], [5], [6], [7], [8]]
        for i in inputs:
            ret[1].append(
                ann.fprop(theano.shared(numpy.array([i]),
                                        name='inputs')).eval()[0][0])
        # for i in ann.fprop(theano.shared(inputs, name='inputs')).eval()[0]:
        #     ret.append(i)
    return ret
Exemplo n.º 6
0
import pylearn2.datasets as ds
import numpy as np

parser = argparse.ArgumentParser()
parser.add_argument("data_dir", help="data directory")
parser.add_argument("old_prefix", help="prefix for old files")
parser.add_argument("new_prefix", help="prefix for new files")
args = parser.parse_args()

for name in ["train", "valid", "test"]:
    old_filename = os.path.join(args.data_dir,
                                args.old_prefix + "_" + name + ".pkl")
    new_filename = os.path.join(args.data_dir,
                                args.new_prefix + "_" + name + ".pkl")
    with open(old_filename, 'r') as f:
        dataset = pickle.load(f)

    new_dataset_X = []
    new_dataset_y = []
    for input_array, class_array in zip(dataset.X, dataset.y):
        class_ = np.argmax(class_array)
        if not class_ == 0:
            new_dataset_X.append(input_array)
            new_dataset_y.append(class_array)

    new_dataset = ds.DenseDesignMatrix(X=np.array(new_dataset_X),
                                       y=np.array(new_dataset_y))

    with open(new_filename, 'w') as f:
        pickle.dump(new_dataset, f)