Exemplo n.º 1
0
                    steps_per_epoch=dtgen.steps['train'],
                    validation_data=dtgen.next_valid_batch(),
                    validation_steps=dtgen.steps['valid'],
                    callbacks=callbacks,
                    shuffle=True,
                    verbose=1
                    )

model.save(f"saved_model/Flor/{INPUT_SOURCE_NAME}_filter")


# Predict
PREDICT_IMAGE_SRC = "hello.png"
tokenizer = Tokenizer(chars=CHARSET_BASE, max_text_length=MAX_TEXT_LENGTH)
img = preproc(PREDICT_IMAGE_SRC, input_size=INPUT_SHAPE)
x_test = normalization([img])

STEPS = 1

out = model.predict(
    x=x_test,
    batch_size=None,
    verbose=False,
    steps=STEPS,
    callbacks=None,
    max_queue_size=10,
    workers=1,
    use_multiprocessing=False
)

steps_done = 0
Exemplo n.º 2
0
# spliting data to 4 parts
mon4,mon5,mon6,mon7 = pp.split_data(data)

# training set: train_d aka x, (mon4, mon5); train_t aka y, (mon6)
# testing set: test_d aka x, (mon4, mon5, mon6); test_t aka y, (mon7)
train_d = dc(mon4)
train_d.extend(mon5)
test_d = dc(train_d)
test_d.extend(mon6)
train_t = dc(mon6)
test_t = dc(mon7)

# processing data
train_d = pp.process_activity(train_d)
train_t = pp.process_activity(mon6)
train_d = pp.normalization(train_d)

train_x,train_y = pp.get_train_data(train_d,train_t)
test_d = pp.process_activity(test_d)
test_d = pp.normalization(test_d)
test_t = pp.process_activity(test_t)

# using percetron to train model
pcpt = Perceptron()
pcpt.fit(train_x, train_y)

# geting 3000 best prediction data
result = heapq.nlargest(2000,test_d,lambda x:pcpt.decision_function(test_d[x]))

# calculating the quality of result
precision, recall, f1 = pp.get_comments(result, test_t)
Exemplo n.º 3
0
    import argparse
    parser = argparse.ArgumentParser(description='Building Interactive Intelligent Systems')
    parser.add_argument('-c','--clean', help='True to do data cleaning, default is False', action='store_false')
    parser.add_argument('-mv','--max_vocab', help='max vocab size predifined, no limit if set -1', required=False, default=-1)
    parser.add_argument('-lr','--learning_rate', required=False, default=0.001)
    parser.add_argument('-i','--num_iter', required=False, default=1)
    parser.add_argument('-fn','--file_name', help='file name', required=False, default='myTest')
    args = vars(parser.parse_args())
    print(args)

    print('[Read the data from twitter-sentiment-testset.csv...]')
    revs, word2idx = data_preprocess('./twitter-sentiment-testset.csv', args['clean'], int(args['max_vocab']))
    
    print('[Extract features from the read data...]')
    data, label = feature_extraction_bow(revs, word2idx)
    data = normalization(data)
    
    # shuffle data
    shuffle_idx = np.arange(len(data))
    np.random.shuffle(shuffle_idx)
    data = data[shuffle_idx]
    label = label[shuffle_idx]


    print('[Start training...]')
    X_train, X_dev, Y_train, Y_dev = train_test_split(data, label, test_size=0.2, random_state=0)
    parameters = model(X_train.T, Y_train.T, X_dev.T, Y_dev.T, args['file_name'], 
                        num_iterations=int(args['num_iter']), learning_rate=float(args['learning_rate']))
    
    print('\n[Start evaluating on the official test set and dump as {}...]'.format(args['file_name']+'.csv'))
    revs, _ = data_preprocess("./twitter-sentiment-testset.csv", args['clean'], int(args['max_vocab']))
Exemplo n.º 4
0
    def setup_data(self, data, idx, sat, sat_properties):
        if sat_properties[sat]['use']:
            sat_properties[sat]['data'] = data[sat][self.grid_list[idx]]

            if sat in ['planet']:
                self.setup_planet(data, sat, sat_properties)
            if sat in ['s2']:
                self.setup_s2(data, idx, sat, sat_properties)
            if self.include_doy:
                sat_properties[sat]['doy'] = data[f'{sat}_dates'][
                    self.grid_list[idx]][()]
            if sat_properties[sat]['agg']:
                sat_properties[sat]['data'], sat_properties[sat][
                    'doy'] = split_and_aggregate(
                        sat_properties[sat]['data'],
                        sat_properties[sat]['doy'],
                        self.agg_days,
                        reduction=sat_properties[sat]['agg_reduction'])

                # Replace the VH/VV band with a cleaner band after aggregation??
                if sat in ['s1']:
                    with np.errstate(divide='ignore', invalid='ignore'):
                        sat_properties[sat]['data'][
                            BANDS[sat]
                            ['RATIO'], :, :, :] = sat_properties[sat]['data'][
                                BANDS[sat]['VH'], :, :, :] / sat_properties[
                                    sat]['data'][BANDS[sat]['VV'], :, :, :]
                        sat_properties[sat]['data'][
                            BANDS[sat]['RATIO'], :, :, :][
                                sat_properties[sat]['data'][
                                    BANDS[sat]['VV'], :, :, :] == 0] = 0

            else:
                sat_properties[sat]['data'], sat_properties[sat][
                    'doy'], sat_properties[sat][
                        'cloudmasks'] = preprocess.sample_timeseries(
                            sat_properties[sat]['data'],
                            self.num_timesteps,
                            sat_properties[sat]['doy'],
                            cloud_stack=sat_properties[sat]['cloudmasks'],
                            least_cloudy=self.least_cloudy,
                            sample_w_clouds=self.sample_w_clouds,
                            all_samples=self.all_samples)

            if sat in ['planet'] and self.resize_planet:
                sat_properties[sat]['data'] = imresize(
                    sat_properties[sat]['data'],
                    (sat_properties[sat]['data'].shape[0], self.grid_size,
                     self.grid_size, sat_properties[sat]['data'].shape[3]),
                    anti_aliasing=True,
                    mode='reflect')

            # Include NDVI and GCVI for s2 and planet, calculate before normalization and numband selection but AFTER AGGREGATION
            if self.include_indices and sat in ['planet', 's2']:
                with np.errstate(divide='ignore', invalid='ignore'):
                    numbands = str(sat_properties[sat]['num_bands'])
                    ndvi = (sat_properties[sat]['data'][
                        BANDS[sat][numbands]['NIR'], :, :, :] -
                            sat_properties[sat]['data'][
                                BANDS[sat][numbands]['RED'], :, :, :]) / (
                                    sat_properties[sat]['data'][
                                        BANDS[sat][numbands]['NIR'], :, :, :] +
                                    sat_properties[sat]['data'][
                                        BANDS[sat][numbands]['RED'], :, :, :])
                    gcvi = (sat_properties[sat]['data'][
                        BANDS[sat][numbands]['NIR'], :, :, :] /
                            sat_properties[sat]['data'][
                                BANDS[sat][numbands]['GREEN'], :, :, :]) - 1

                ndvi[(sat_properties[sat]['data'][
                    BANDS[sat][numbands]['NIR'], :, :, :] + sat_properties[sat]
                      ['data'][BANDS[sat][numbands]['RED'], :, :, :]) == 0] = 0
                gcvi[sat_properties[sat]['data'][
                    BANDS[sat][numbands]['GREEN'], :, :, :] == 0] = 0

            #TODO: Clean this up a bit. No longer include doy/clouds if data is aggregated?
            if self.normalize:
                sat_properties[sat]['data'] = preprocess.normalization(
                    sat_properties[sat]['data'], sat, self.country)

            # Concatenate vegetation indices after normalization
            if sat in ['planet', 's2'] and self.include_indices:
                sat_properties[sat]['data'] = np.concatenate(
                    (sat_properties[sat]['data'], np.expand_dims(ndvi,
                                                                 axis=0)), 0)
                sat_properties[sat]['data'] = np.concatenate(
                    (sat_properties[sat]['data'], np.expand_dims(gcvi,
                                                                 axis=0)), 0)

            # Concatenate cloud mask bands
            if sat_properties[sat][
                    'cloudmasks'] is not None and self.include_clouds:
                sat_properties[sat][
                    'cloudmasks'] = preprocess.preprocess_clouds(
                        sat_properties[sat]['cloudmasks'], self.model_name,
                        self.timeslice)
                sat_properties[sat]['data'] = np.concatenate(
                    (sat_properties[sat]['data'],
                     sat_properties[sat]['cloudmasks']), 0)

            # Concatenate doy bands
            if sat_properties[sat]['doy'] is not None and self.include_doy:
                doy_stack = preprocess.doy2stack(
                    sat_properties[sat]['doy'],
                    sat_properties[sat]['data'].shape)
                sat_properties[sat]['data'] = np.concatenate(
                    (sat_properties[sat]['data'], doy_stack), 0)

        return sat_properties