steps_per_epoch=dtgen.steps['train'], validation_data=dtgen.next_valid_batch(), validation_steps=dtgen.steps['valid'], callbacks=callbacks, shuffle=True, verbose=1 ) model.save(f"saved_model/Flor/{INPUT_SOURCE_NAME}_filter") # Predict PREDICT_IMAGE_SRC = "hello.png" tokenizer = Tokenizer(chars=CHARSET_BASE, max_text_length=MAX_TEXT_LENGTH) img = preproc(PREDICT_IMAGE_SRC, input_size=INPUT_SHAPE) x_test = normalization([img]) STEPS = 1 out = model.predict( x=x_test, batch_size=None, verbose=False, steps=STEPS, callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False ) steps_done = 0
# spliting data to 4 parts mon4,mon5,mon6,mon7 = pp.split_data(data) # training set: train_d aka x, (mon4, mon5); train_t aka y, (mon6) # testing set: test_d aka x, (mon4, mon5, mon6); test_t aka y, (mon7) train_d = dc(mon4) train_d.extend(mon5) test_d = dc(train_d) test_d.extend(mon6) train_t = dc(mon6) test_t = dc(mon7) # processing data train_d = pp.process_activity(train_d) train_t = pp.process_activity(mon6) train_d = pp.normalization(train_d) train_x,train_y = pp.get_train_data(train_d,train_t) test_d = pp.process_activity(test_d) test_d = pp.normalization(test_d) test_t = pp.process_activity(test_t) # using percetron to train model pcpt = Perceptron() pcpt.fit(train_x, train_y) # geting 3000 best prediction data result = heapq.nlargest(2000,test_d,lambda x:pcpt.decision_function(test_d[x])) # calculating the quality of result precision, recall, f1 = pp.get_comments(result, test_t)
import argparse parser = argparse.ArgumentParser(description='Building Interactive Intelligent Systems') parser.add_argument('-c','--clean', help='True to do data cleaning, default is False', action='store_false') parser.add_argument('-mv','--max_vocab', help='max vocab size predifined, no limit if set -1', required=False, default=-1) parser.add_argument('-lr','--learning_rate', required=False, default=0.001) parser.add_argument('-i','--num_iter', required=False, default=1) parser.add_argument('-fn','--file_name', help='file name', required=False, default='myTest') args = vars(parser.parse_args()) print(args) print('[Read the data from twitter-sentiment-testset.csv...]') revs, word2idx = data_preprocess('./twitter-sentiment-testset.csv', args['clean'], int(args['max_vocab'])) print('[Extract features from the read data...]') data, label = feature_extraction_bow(revs, word2idx) data = normalization(data) # shuffle data shuffle_idx = np.arange(len(data)) np.random.shuffle(shuffle_idx) data = data[shuffle_idx] label = label[shuffle_idx] print('[Start training...]') X_train, X_dev, Y_train, Y_dev = train_test_split(data, label, test_size=0.2, random_state=0) parameters = model(X_train.T, Y_train.T, X_dev.T, Y_dev.T, args['file_name'], num_iterations=int(args['num_iter']), learning_rate=float(args['learning_rate'])) print('\n[Start evaluating on the official test set and dump as {}...]'.format(args['file_name']+'.csv')) revs, _ = data_preprocess("./twitter-sentiment-testset.csv", args['clean'], int(args['max_vocab']))
def setup_data(self, data, idx, sat, sat_properties): if sat_properties[sat]['use']: sat_properties[sat]['data'] = data[sat][self.grid_list[idx]] if sat in ['planet']: self.setup_planet(data, sat, sat_properties) if sat in ['s2']: self.setup_s2(data, idx, sat, sat_properties) if self.include_doy: sat_properties[sat]['doy'] = data[f'{sat}_dates'][ self.grid_list[idx]][()] if sat_properties[sat]['agg']: sat_properties[sat]['data'], sat_properties[sat][ 'doy'] = split_and_aggregate( sat_properties[sat]['data'], sat_properties[sat]['doy'], self.agg_days, reduction=sat_properties[sat]['agg_reduction']) # Replace the VH/VV band with a cleaner band after aggregation?? if sat in ['s1']: with np.errstate(divide='ignore', invalid='ignore'): sat_properties[sat]['data'][ BANDS[sat] ['RATIO'], :, :, :] = sat_properties[sat]['data'][ BANDS[sat]['VH'], :, :, :] / sat_properties[ sat]['data'][BANDS[sat]['VV'], :, :, :] sat_properties[sat]['data'][ BANDS[sat]['RATIO'], :, :, :][ sat_properties[sat]['data'][ BANDS[sat]['VV'], :, :, :] == 0] = 0 else: sat_properties[sat]['data'], sat_properties[sat][ 'doy'], sat_properties[sat][ 'cloudmasks'] = preprocess.sample_timeseries( sat_properties[sat]['data'], self.num_timesteps, sat_properties[sat]['doy'], cloud_stack=sat_properties[sat]['cloudmasks'], least_cloudy=self.least_cloudy, sample_w_clouds=self.sample_w_clouds, all_samples=self.all_samples) if sat in ['planet'] and self.resize_planet: sat_properties[sat]['data'] = imresize( sat_properties[sat]['data'], (sat_properties[sat]['data'].shape[0], self.grid_size, self.grid_size, sat_properties[sat]['data'].shape[3]), anti_aliasing=True, mode='reflect') # Include NDVI and GCVI for s2 and planet, calculate before normalization and numband selection but AFTER AGGREGATION if self.include_indices and sat in ['planet', 's2']: with np.errstate(divide='ignore', invalid='ignore'): numbands = str(sat_properties[sat]['num_bands']) ndvi = (sat_properties[sat]['data'][ BANDS[sat][numbands]['NIR'], :, :, :] - sat_properties[sat]['data'][ BANDS[sat][numbands]['RED'], :, :, :]) / ( sat_properties[sat]['data'][ BANDS[sat][numbands]['NIR'], :, :, :] + sat_properties[sat]['data'][ BANDS[sat][numbands]['RED'], :, :, :]) gcvi = (sat_properties[sat]['data'][ BANDS[sat][numbands]['NIR'], :, :, :] / sat_properties[sat]['data'][ BANDS[sat][numbands]['GREEN'], :, :, :]) - 1 ndvi[(sat_properties[sat]['data'][ BANDS[sat][numbands]['NIR'], :, :, :] + sat_properties[sat] ['data'][BANDS[sat][numbands]['RED'], :, :, :]) == 0] = 0 gcvi[sat_properties[sat]['data'][ BANDS[sat][numbands]['GREEN'], :, :, :] == 0] = 0 #TODO: Clean this up a bit. No longer include doy/clouds if data is aggregated? if self.normalize: sat_properties[sat]['data'] = preprocess.normalization( sat_properties[sat]['data'], sat, self.country) # Concatenate vegetation indices after normalization if sat in ['planet', 's2'] and self.include_indices: sat_properties[sat]['data'] = np.concatenate( (sat_properties[sat]['data'], np.expand_dims(ndvi, axis=0)), 0) sat_properties[sat]['data'] = np.concatenate( (sat_properties[sat]['data'], np.expand_dims(gcvi, axis=0)), 0) # Concatenate cloud mask bands if sat_properties[sat][ 'cloudmasks'] is not None and self.include_clouds: sat_properties[sat][ 'cloudmasks'] = preprocess.preprocess_clouds( sat_properties[sat]['cloudmasks'], self.model_name, self.timeslice) sat_properties[sat]['data'] = np.concatenate( (sat_properties[sat]['data'], sat_properties[sat]['cloudmasks']), 0) # Concatenate doy bands if sat_properties[sat]['doy'] is not None and self.include_doy: doy_stack = preprocess.doy2stack( sat_properties[sat]['doy'], sat_properties[sat]['data'].shape) sat_properties[sat]['data'] = np.concatenate( (sat_properties[sat]['data'], doy_stack), 0) return sat_properties