def get_predictions(age, cp, sex, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, req_model, slope, ca, thal): data = pd.DataFrame( { 'age': age, 'sex': sex, 'cp': cp, 'trestbps': trestbps, 'chol': chol, 'fbs': fbs, 'restecg': restecg, 'thalach': thalach, 'exang': exang, 'oldpeak': oldpeak, 'slope': slope, 'ca': ca }, index=[0]) extract_data(data) vals = data.iloc[:].values if req_model == 'DecisionTree': print(req_model) return decisionTree.predict(vals)[0] elif req_model == 'LogisiticRegression': print(req_model) print("get Pred LR") return logisticRegression.predict(vals)[0] elif req_model == 'NaiveBayes': print(req_model) return naiveBayes.predict(vals)[0] else: return "Cannot Predict"
def main(): savepath = './save_point' filepath = './save_point/keras_example_checkpoint.h5' # Extract MNIST dataset train_data_filename = maybe_download('train-images-idx3-ubyte.gz') train_labels_filename = maybe_download('train-labels-idx1-ubyte.gz') test_data_filename = maybe_download('t10k-images-idx3-ubyte.gz') test_labels_filename = maybe_download('t10k-labels-idx1-ubyte.gz') train_data = extract_data(train_data_filename, 60000, dense=False) train_data = train_data.reshape((60000, NUM_CHANNELS, IMG_SIZE, IMG_SIZE)) train_labels = extract_labels(train_labels_filename, 60000, one_hot=True) test_data = extract_data(test_data_filename, 10000, dense=False) test_data = test_data.reshape((10000, NUM_CHANNELS, IMG_SIZE, IMG_SIZE)) test_labels = extract_labels(test_labels_filename, 10000, one_hot=True) validation_data = train_data[:VALIDATION_SIZE, ...] validation_labels = train_labels[:VALIDATION_SIZE, :] validation_set = (validation_data, validation_labels) train_data = train_data[VALIDATION_SIZE:, ...] train_labels = train_labels[VALIDATION_SIZE:, ...] # Model construction model = Sequential() model.add(Convolution2D(32, 3, 3, border_mode='same', input_shape=(1, 28, 28))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Convolution2D(64, 3, 3, border_mode='same')) model.add(Flatten()) model.add(Dense(256)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(10)) model.add(Activation('softmax')) # Define optimizer and configure training process sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=["accuracy"]) model.fit( train_data, train_labels, nb_epoch=NUM_EPOCHS, batch_size=1000, validation_data=validation_set) print 'Save model weights' if not os.path.isdir (savepath): os.mkdir (savepath) model.save_weights(filepath, overwrite=True) predict = model.predict(test_data, batch_size=1000) print 'Test err: %.1f%%' % error_rate(predict, test_labels) print 'Test loss: %1.f%%, accuracy: %1.f%%', \ tuple(model.evaluate(test_data, test_labels, batch_size=1000))
def test_extract_data(self): notepad_app_hash = "TxTaPpHaSh" self.assertEqual(extract_data(None), None) self.assertEqual(extract_data(""), None) self.assertEqual(extract_data("bad_input"), None) self.assertEqual( extract_data("https://www.w3.org/TR/PNG/iso_8859-1.txt"), notepad_app_hash)
def build_model_and_evaluate(data, target, classifier="XGB"): model = Model1() if data == "face": df_X = model.fetch_face_data() elif data == "text": df_X = model.fetch_text_data() elif data == "relation": df_X = model.fetch_relation_data() else: raise ValueError("Incorrect data format") X, y = utils.extract_data(df_X, label=target) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=2) if classifier == "xgb": clf = XGBClassifier(n_estimators=200) elif classifier == "svm": clf = SGDClassifier() else: raise ValueError("Incorrect classifier") clf = clf.fit(X_train, y_train) y_pred = clf.predict(X_test) score = accuracy_score(y_test, y_pred) return accuracy_score
def process_incidents(logger): try: conn, cur, dict_cur = utils.get_database_connection() except Exception as e: logger.error("Getting database connection") sys.exit("Unable to get database connection") url = utils.build_extract_url(logger) logger.info("Starting the extract") results = utils.extract_data(url, logger) number_results = len(results) logger.info(f"Extracted {number_results} records") load_status = utils.load_data(conn, cur, results, logger) logger.info(f"Load status: {load_status}") if load_status == 'success': incidents = utils.get_new_incidents(dict_cur) number_incidents = len(incidents) logger.info(f"Found {number_incidents} incidents") if number_results > 0: api = utils.get_twitter_auth() for incident in incidents: tweet_success = utils.update_status(api, incident, conn, cur) if tweet_success: logger.info("Tweet status posted successfully") else: logger.error("Posting tweet status") conn.close() cur.close() dict_cur.close()
def invoice_template(): from models import Invoice invoice_id = request.form.get('id', None) if invoice_id is None: return 'wrong parameters sent' invoice = Invoice.query.filter(invoice_id == invoice_id).first() factor_data = extract_data(invoice) return render_template('invoice_template.html', **factor_data)
def save(self, name_att, input, data_type): print(name_att, input, data_type) data = utils.extract_data(input, data_type) self.chat_bot.add_att(name_att, data) ans = self.chat_bot.save_user() if ans == "NEW": self.set_next(self.list_answers[1][1]) else: self.set_next(self.list_answers[0][1]) return ans
def invoice_factor(): from models import Invoice invoice_id = request.form.get('id', None) if invoice_id is None: return 'wrong parameters sent' invoice = Invoice.query.filter(invoice_id == invoice_id).first() file_path = os.path.join(files_dir, '{}.pdf'.format(invoice.number)) static_path = os.path.join(static_dir, 'css/style.css') factor_data = extract_data(invoice) HTML(string=render_template('invoice.html', **factor_data)).write_pdf( target=file_path, stylesheets=[static_path]) return send_file(file_path)
def check_input(self, input): input = utils.extract_data(input, self.data_type) check = utils.check_input_type(input, self.data_type) print("TIMEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE", input, check) if check: self.chat_bot.add_att(self.attribute, input) else: inext = self.nodej["except"] if inext != 0: self.set_next(inext) check = True return check
def do_where(self, my_df, attr, value, opr): tbl, attr = self.extract_ta(attr) # if tbl is None: # pass # else: # table = self.alias_map[tbl] if isinstance(value, list): return self.do_dynamic_where(my_df, attr, value[0], opr, value[2], value[1]) elif utils.is_float(value) or utils.is_date(value) or utils.is_quoted( value): par = utils.extract_data(value) return self.do_fix_where(my_df, attr, par, opr) else: return self.do_dynamic_where(my_df, attr, value, opr)
def build_model_and_evaluate_rms(data, regressor="XGB"): model = Model1() if data == "face": df_X = model.fetch_face_data() elif data == "text": df_X = model.fetch_text_data() elif data == "relation": df_X = model.fetch_relation_data() else: raise ValueError("Incorrect data format") X, y = utils.extract_data(df_X, label="personality") X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=2) if regressor == "xgb": reg = MultiOutputRegressor( XGBRegressor(n_estimators=200, max_depth=2, objective="reg:squarederror")) elif regressor == "rf": reg = MultiOutputRegressor(RandomForestRegressor(n_estimators=100)) elif regressor == "lasso": reg = "" elif regressor == "lightgbm": reg = MultiOutputRegressor( lightgbm.LGBMRegressor(objective="regression")) else: raise ValueError("Incorrect classifier") reg = reg.fit(X_train, y_train) y_pred = reg.predict(X_test) # Calculating RMSE for all personality rmse = [] for i, value in enumerate(utils.regressor_labels): rmse.append(sqrt(mean_squared_error(y_pred[:, i], y_test[value]))) return rmse
def build_model_and_evaluate(data: List[str], target: str, classifier="XGB"): model = Model2EarlyFusion() df_X = combine_features(data) X, y = utils.extract_data(df_X, label=target) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=2) if classifier == "xgb": clf = XGBClassifier(n_estimators=200) elif classifier == "svm": clf = SGDClassifier() else: raise ValueError("Incorrect classifier") clf = clf.fit(X_train, y_train) y_pred = clf.predict(X_test) score = accuracy_score(y_test, y_pred) return accuracy_score
def read(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE, nb_classes=2): images, labels = extract_data('./data/') labels = np.reshape(labels, [-1]) X_train, X_test, y_train, y_test = train_test_split( images, labels, test_size=0.3, random_state=random.randint(0, 100)) X_valid, X_test, y_valid, y_test = train_test_split( images, labels, test_size=0.5, random_state=random.randint(0, 100)) # Tensoflow ordering: assert Keras.image_dim_ordering() == 'tf' X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 3) X_valid = X_valid.reshape(X_valid.shape[0], img_rows, img_cols, 3) X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 3) input_shape = (img_rows, img_cols, 3) # The data, shuffled and split between train and test sets: adfis('X_train shape:', X_train.shape) adfis(X_train.shape[0], 'train samples') adfis(X_valid.shape[0], 'valid samples') adfis(X_test.shape[0], 'test samples') # Convert class vectors to binary class matrices: Y_train = np_utils.to_categorical(y_train, nb_classes) Y_valid = np_utils.to_categorical(y_valid, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) X_train = X_train.astype('float32') X_valid = X_valid.astype('float32') X_test = X_test.astype('float32') X_train /= 255 X_valid /= 255 X_test /= 255 self.X_train = X_train self.X_valid = X_valid self.X_test = X_test self.Y_train = Y_train self.Y_valid = Y_valid self.Y_test = Y_test
def test_model(model, test_data_path, output_dir, output_file_name, eval_file_name): # Read and extract test data set test_data = pd.read_csv(test_data_path, sep='\t', names=header_name, header=None, usecols=[0, 1, 2]).iloc[:, 0:3] test_data, x_test, y_test = extract_data(test_data) print("Correct labels:\n", np.array(y_test), "\n") # Start predicting line by line and write to output file output_path = os.path.join(output_dir, output_file_name) output_file = open(output_path, "w") predictions = [] num_of_correct = 0 for index, row in test_data.iterrows(): # Make prediction line_prediction, max_score = model.predict_line(row['text']) predictions.append(line_prediction) # Evaluate if the prediction is correct or not line_prediction = "yes" if line_prediction else "no" target = "yes" if row['q1_label'] else "no" outcome = "correct" if line_prediction == target else "wrong" if (outcome == 'correct'): num_of_correct += 1 # Write result to file content = """{} {} {:.2E} {} {}\n""".format( row['tweet_id'], line_prediction, max_score, target, outcome) output_file.write(content) output_file.close() print("Predicted labels:\n", predictions) print("Trace file produced: ", output_path) # TODO: Calculate and print out precision and stats evaluate(y_test.tolist(), predictions, os.path.join(output_dir, eval_file_name), num_of_correct)
def build_model_and_evaluate_rms(data, regressor="XGB"): model = Model2EarlyFusion() df_X = combine_features(data) X, y = utils.extract_data(df_X, label="personality") X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=2) reg = MultiOutputRegressor(XGBRegressor(n_estimators=200, max_depth=2, objective="reg:squarederror")) reg = reg.fit(X_train, y_train) y_pred = reg.predict(X_test) # Calculating RMSE for all personality rmse = [] for i, value in enumerate(utils.regressor_labels): rmse.append(sqrt(mean_squared_error(y_pred[:, i], y_test[value]))) return rmse
def __download_data(self, url, out_path): """download a url and save it in the out_path :url the: url to download the records :out_path: the path were the file is saved :return: the number records extracted """ r = requests.get(url) log.info('processing url {}'.format(url)) time.sleep(0.5) if r.status_code is 200: data = utils.extract_data(r.json()) if len(data) > 0: log.info('saving file {}'.format(out_path)) with open(out_path, 'w') as f: json.dump(data, f) return len(data) else: log.error('no data for url {}'.format(url)) else: log.error('go respone {} for url {}'.format(r.status_code, url)) r.close() return 0
def main(): # load config file config = load_config(config_path) # build dict for token (vocab_dict) and char (vocab_c_dict) vocab_dict, vocab_c_dict = build_dict(vocab_path, vocab_char_path) # load pre-trained embedding # W_init: token index * token embeding # embed_dim: embedding dimension W_init, embed_dim = load_word2vec_embedding(word_embedding_path, vocab_dict) K = 3 # generate train/valid examples train_data, sen_cut_train = generate_examples(train_path, vocab_dict, vocab_c_dict, config, "train") dev_data, sen_cut_dev = generate_examples(valid_path, vocab_dict, vocab_c_dict, config, "dev") #------------------------------------------------------------------------ # training process begins hidden_size = config['nhidden'] batch_size = config['batch_size'] coref_model = model.CorefQA(hidden_size, batch_size, K, W_init, config).to(device) if len(sys.argv) > 4 and str(sys.argv[4]) == "load": try: coref_model.load_state_dict(torch.load(torch_model_p)) print("saved model loaded") except: print("no saved model") criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(coref_model.parameters(), lr=config['learning_rate']) # TODO: use hyper-params in paper iter_index = 0 batch_acc_list = [] batch_loss_list = [] dev_acc_list = [] max_iter = int(config['num_epochs'] * len(train_data) / batch_size) print("max iteration number: " + str(max_iter)) while True: # building batch data # batch_xxx_data is a list of batch data (len 15) # [dw, m_dw, qw, m_qw, dc, m_dc, qc, m_qc, cd, m_cd, a, dei, deo, dri, dro] batch_train_data, sen_cut_batch = generate_batch_data(train_data, config, "train", -1, sen_cut_train) # -1 means random sampling # dw, m_dw, qw, m_qw, dc, m_dc, qc, m_qc, cd, m_cd, a, dei, deo, dri, dro = batch_train_data print(len(sen_cut_batch)) # zero the parameter gradients optimizer.zero_grad() # forward pass dw, dc, qw, qc, cd, cd_m = extract_data(batch_train_data) cand_probs = coref_model(dw, dc, qw, qc, cd, cd_m, sen_cut_batch) # B x Cmax answer = torch.tensor(batch_train_data[10]).type(torch.LongTensor) # B x 1 loss = criterion(cand_probs, answer) # evaluation process acc_batch = cal_acc(cand_probs, answer, batch_size) batch_acc_list.append(acc_batch) batch_loss_list.append(loss) dev_acc_list = evaluate_result(iter_index, config, dev_data, batch_acc_list, batch_loss_list, dev_acc_list, coref_model, sen_cut_dev) # save model if iter_index % config['model_save_frequency'] == 0 and len(sys.argv) > 4: torch.save(coref_model.state_dict(), torch_model_p) # back-prop loss.backward() optimizer.step() # check stopping criteria iter_index += 1 if iter_index > max_iter: break
def main(): savepath = './save_point' filepath = './save_point/model_api_checkpoint.h5' train_data_filename = maybe_download('train-images-idx3-ubyte.gz') train_labels_filename = maybe_download('train-labels-idx1-ubyte.gz') test_data_filename = maybe_download('t10k-images-idx3-ubyte.gz') test_labels_filename = maybe_download('t10k-labels-idx1-ubyte.gz') train_data = extract_data(train_data_filename, 60000, dense=False) train_data = train_data.reshape((60000, NUM_CHANNELS, IMG_SIZE, IMG_SIZE)) train_labels = extract_labels(train_labels_filename, 60000, one_hot=True) test_data = extract_data(test_data_filename, 10000, dense=False) test_data = test_data.reshape((10000, NUM_CHANNELS, IMG_SIZE, IMG_SIZE)) test_labels = extract_labels(test_labels_filename, 10000, one_hot=True) validation_data = train_data[:VALIDATION_SIZE, ...] validation_labels = train_labels[:VALIDATION_SIZE, :] validation_set = (validation_data, validation_labels) train_data = train_data[VALIDATION_SIZE:, ...] train_labels = train_labels[VALIDATION_SIZE:, ...] img = Input(shape=(1, 28, 28)) conv1 = Convolution2D(32, 3, 3, border_mode='same')(img) conv1 = Activation('relu')(conv1) pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) conv2_1 = Convolution2D(64, 3, 3, border_mode='same')(pool1) conv2_2 = Convolution2D(64, 5, 5, border_mode='same')(pool1) conv2_1 = Activation('relu')(conv2_1) conv2_2 = Activation('relu')(conv2_2) pool2_1 = MaxPooling2D(pool_size=(2, 2))(conv2_1) pool2_2 = MaxPooling2D(pool_size=(2, 2))(conv2_2) dense1 = Flatten()(pool2_1) dense2 = Flatten()(pool2_2) dense = merge([dense1, dense2], mode='concat', concat_axis=1) dense = Dense(512)(dense) dense = Activation('relu')(dense) dense = Dense(256)(dense) dense = Activation('relu')(dense) dense = Dense(10)(dense) output = Activation('softmax')(dense) model = Model(input=[img], output=[output]) sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9) model.compile( optimizer=sgd, loss=['categorical_crossentropy'], metrics=["accuracy"]) model.fit( [train_data], [train_labels], nb_epoch=1, verbose=1, batch_size=1000, validation_data=validation_set) print 'Save model weights' if not os.path.isdir (savepath): os.mkdir (savepath) model.save_weights(filepath, overwrite=True) predictions = model.predict([test_data], batch_size=1000) print 'Test error: %.1f%%' % error_rate(predictions, test_labels) print 'Test loss: %.14f, Test accurracy %.4f' % \ tuple(model.evaluate([test_data], [test_labels], batch_size=1000))
if __name__ == '__main__': tick = time.time() args = get_parser(sys.argv[1:]) # args = get_parser(['--CITY', 'NYK', '--LOG_DIR', 'log', # '--WITH_TIME', '--normalize_weight']) os.environ['CUDA_VISIBLE_DEVICES'] = args.device data, dicts = load_data( os.path.join( args.ROOT, 'data', '{}_INTV_processed_voc5_len2_setting_WITH_GPS_WITH_TIME_WITH_USERID.pk' .format(args.CITY))) args.vocabulary_size = dicts.vocabulary_size data, idx = extract_data(data, args) #put all data_extraction here train_data = get_train_data(data) dataloader = DataLoader(train_data, args) dataloader_time = DataLoader_time(data, args, idx) evaluator_emb = Evaluator(args, dicts, mode='emb') evaluator_weight = Evaluator(args, dicts, mode='weight') logger = Logger(os.path.join(args.LOG_DIR, 'log_txt')) graph = tf.Graph() with graph.as_default(): model = STSkipgram(args) sess = tf.Session(graph=graph, config=config) state = train(graph, sess, model, args, evaluator_emb, evaluator_weight, logger, dataloader, dataloader_time) sess.close()
X = np.concatenate([X] + [np.apply_along_axis(shift, 1, X, vector) for vector in direction_vectors]) print X.shape y = np.concatenate([y for _ in range(len(direction_vectors) + 1)], axis=0) print y.shape return X, y # Extract data train_data_filename = maybe_download('train-images-idx3-ubyte.gz') train_labels_filename = maybe_download('train-labels-idx1-ubyte.gz') test_data_filename = maybe_download('t10k-images-idx3-ubyte.gz') test_labels_filename = maybe_download('t10k-labels-idx1-ubyte.gz') X_train = extract_data(train_data_filename, 60000, dense=True) y_train = extract_labels(train_labels_filename, 60000, one_hot=False) X_test = extract_data(test_data_filename, 10000, dense=True) y_test = extract_labels(test_labels_filename, 10000, one_hot=False) ################################################# # Test for decision tree classifier without dimensionality reduction Tree = DecisionTreeClassifier() Tree.fit(X_train, y_train) print 'Without dimenstionality reduction: ', Tree.score(X_test, y_test) # Dimensionality reduction using PCA (784 -> 64) pca = PCA(n_components=64) pca.fit(X_train) X_train_reduce = pca.transform(X_train)
print( "We have ", aug_func_count, " augmentation function in our model" " with an augmentation factor of ", aug_factor) # Read our dataset dataLog_orig = utils.read_data_log(data_filename) # Get rid of some noisy data... utils.visualize_data(dataLog_orig) dataLog = dataLog_orig.loc[dataLog_orig['throttle'] > 0.25] print("Loaded data info: ") dataLog.info() filenames, steering = utils.extract_data(dataLog, remove_zeros=False, round_steering=True) total_sample = len(filenames) train_files, val_files, train_steering, val_steering = train_test_split( filenames, steering, test_size=0.33, random_state=543) train_samples = len(train_files) val_samples = len(val_files) print("Total Sample: ", total_sample, " Training samples : ", train_samples, " Validation samples: ", val_samples) batch_size = args.batch epochs = args.epochs model = model() model.summary()
def main(argv=None): # pylint: disable=unused-argument # Get the data. train_data_filename = maybe_download('train-images-idx3-ubyte.gz') train_labels_filename = maybe_download('train-labels-idx1-ubyte.gz') test_data_filename = maybe_download('t10k-images-idx3-ubyte.gz') test_labels_filename = maybe_download('t10k-labels-idx1-ubyte.gz') # Extract it into numpy arrays. train_data = extract_data(train_data_filename, 60000, dense=False) train_labels = extract_labels(train_labels_filename, 60000, one_hot=True) test_data = extract_data(test_data_filename, 10000, dense=False ) test_labels = extract_labels(test_labels_filename, 10000, one_hot=True) # Generate a validation set. validation_data = train_data[:VALIDATION_SIZE, ...] validation_labels = train_labels[:VALIDATION_SIZE] train_data = train_data[VALIDATION_SIZE:, ...] train_labels = train_labels[VALIDATION_SIZE:] num_epochs = NUM_EPOCHS train_size = train_labels.shape[0] # This is where training samples and labels are fed to the graph. # These placeholder nodes will be fed a batch of training data at each # training step using the {feed_dict} argument to the Run() call below. train_data_node = tf.placeholder( tf.float32, shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)) train_labels_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, NUM_LABELS)) eval_data = tf.placeholder( tf.float32, shape=(EVAL_BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)) # The variables below hold all the trainable weights. They are passed an # initial value which will be assigned when when we call: # {tf.initialize_all_variables().run()} # First convolutional layer conv1_weights = tf.Variable( tf.truncated_normal([3, 3, NUM_CHANNELS, 32], # 5x5 filter, depth 32. stddev=0.1, seed=SEED)) conv1_biases = tf.Variable(tf.zeros([32])) # Two second convolutional layers 5 x 5 filter, and 3 x 3 filters. conv2_weights = tf.Variable( tf.truncated_normal([5, 5, 32, 64], stddev=0.1, seed=SEED)) conv2_biases = tf.Variable(tf.constant(0.01, shape=[64])) conv2_weights2 = tf.Variable( tf.truncated_normal([3, 3, 32, 64], stddev=0.1, seed=SEED)) conv2_biases2 = tf.Variable(tf.constant(0.01, shape=[64])) # First fully connected layer after conv layer fc1_weights = tf.Variable( # fully connected, depth 512. tf.truncated_normal( [IMAGE_SIZE // 4 * IMAGE_SIZE // 4 * 128, 512], stddev=0.05, seed=SEED)) fc1_biases = tf.Variable(tf.constant(0.01, shape=[512])) # Second fully connected layer fc2_weights = tf.Variable( tf.truncated_normal([512, 256], stddev=0.05, seed=SEED)) fc2_biases = tf.Variable(tf.constant(0.1, shape=[256])) # Output layer fc3_weights = tf.Variable( tf.truncated_normal([256, NUM_LABELS], stddev=0.04, seed=SEED)) fc3_biases = tf.Variable(tf.constant(0.1, shape=[NUM_LABELS])) # We will replicate the model structure for the training subgraph, as well # as the evaluation subgraphs, while sharing the trainable parameters. def model(data, train=False): """The Model definition.""" # 2D convolution, with 'SAME' padding (i.e. the output feature map has # the same size as the input). Note that {strides} is a 4D array whose # shape matches the data layout: [image index, y, x, depth]. conv = tf.nn.conv2d(data, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') # Bias and rectified linear non-linearity. relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases)) if train: relu = tf.nn.dropout(relu, .5) # Max pooling. The kernel size spec {ksize} also follows the layout of # the data. Here we have a pooling window of 2, and a stride of 2. pool = tf.nn.max_pool(relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') conv = tf.nn.conv2d(pool, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') relu = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases)) conv2 = tf.nn.conv2d(pool, conv2_weights2, strides=[1, 1, 1, 1], padding='SAME') relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases2)) pool = tf.nn.max_pool(relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # Reshape the feature map cuboid into a 2D matrix to feed it to the # fully connected layers. pool = tf.concat(3, [pool, pool2]) pool_shape = pool.get_shape().as_list() reshape = tf.reshape( pool, [pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]]) # Fully connected layer. Note that the '+' operation automatically # broadcasts the biases. hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases) hidden = tf.nn.relu(tf.matmul(hidden, fc2_weights) + fc2_biases) # Add a 50% dropout during training only. Dropout also scales # activations such that no rescaling is needed at evaluation time. if train: hidden = tf.nn.dropout(hidden, 0.5, seed=SEED) return tf.matmul(hidden, fc3_weights) + fc3_biases def extract_filter (data): conv = tf.nn.conv2d(data, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') # Bias and rectified linear non-linearity. relu1 = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases)) # Max pooling. The kernel size spec {ksize} also follows the layout of # the data. Here we have a pooling window of 2, and a stride of 2. pool = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') conv = tf.nn.conv2d(pool, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') relu2 = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases)) conv2 = tf.nn.conv2d(pool, conv2_weights2, strides=[1, 1, 1, 1], padding='SAME') relu3 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases2)) return relu1, relu2, relu3 # Training computation: logits + cross-entropy loss. logits = model(train_data_node, True) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( logits, train_labels_node)) # L2 regularization for the fully connected parameters. regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) + tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases) + tf.nn.l2_loss(fc3_weights) + tf.nn.l2_loss(fc3_biases)) # Add the regularization term to the loss. loss += 5e-4 * regularizers # Optimizer: set up a variable that's incremented once per batch and # controls the learning rate decay. batch = tf.Variable(0) # Decay once per epoch, using an exponential schedule starting at 0.01. learning_rate = tf.train.exponential_decay( 0.01, # Base learning rate. batch * BATCH_SIZE, # Current index into the dataset. train_size, # Decay step. 0.95, # Decay rate. staircase=True) # Use simple momentum for the optimization. optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(loss, global_step=batch) # Predictions for the current training minibatch. train_prediction = tf.nn.softmax(logits) # Predictions for the test and validation, which we'll compute less often. eval_prediction = tf.nn.softmax(model(eval_data)) # Small utility function to evaluate a dataset by feeding batches of data to # {eval_data} and pulling the results from {eval_predictions}. # Saves memory and enables this to run on smaller GPUs. def eval_in_batches(data, sess): """Get all predictions for a dataset by running it in small batches.""" size = data.shape[0] if size < EVAL_BATCH_SIZE: raise ValueError("batch size for evals larger than dataset: %d" % size) predictions = numpy.ndarray(shape=(size, NUM_LABELS), dtype=numpy.float32) for begin in xrange(0, size, EVAL_BATCH_SIZE): end = begin + EVAL_BATCH_SIZE if end <= size: predictions[begin:end, :] = sess.run( eval_prediction, feed_dict={eval_data: data[begin:end, ...]}) else: batch_predictions = sess.run( eval_prediction, feed_dict={eval_data: data[-EVAL_BATCH_SIZE:, ...]}) predictions[begin:, :] = batch_predictions[begin - size:, :] return predictions # Create a local session to run the training. saver = tf.train.Saver() start_time = time.time() with tf.Session() as sess: # Run all the initializers to prepare the trainable parameters. if FLAGS.model: saver.restore(sess, FLAGS.model) # If model exists, load it else: sess.run(tf.initialize_all_variables()) # If there is no model randomly initialize if FLAGS.train: # Loop through training steps. for step in xrange(int(num_epochs * train_size) // BATCH_SIZE): # Compute the offset of the current minibatch in the data. # Note that we could use better randomization across epochs. offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE) batch_data = train_data[offset:(offset + BATCH_SIZE), ...] batch_labels = train_labels[offset:(offset + BATCH_SIZE)] # This dictionary maps the batch data (as a numpy array) to the # node in the graph is should be fed to. feed_dict = {train_data_node: batch_data, train_labels_node: batch_labels} # Run the graph and fetch some of the nodes. _, l, lr, predictions = sess.run( [optimizer, loss, learning_rate, train_prediction], feed_dict=feed_dict) if step % EVAL_FREQUENCY == 0: elapsed_time = time.time() - start_time start_time = time.time() print('Step %d (epoch %.2f), %.1f ms' % (step, float(step) * BATCH_SIZE / train_size, 1000 * elapsed_time / EVAL_FREQUENCY)) print('Minibatch loss: %.3f, learning rate: %.6f' % (l, lr)) print('Minibatch error: %.1f%%' % error_rate(predictions, batch_labels)) print('Validation error: %.1f%%' % error_rate( eval_in_batches(validation_data, sess), validation_labels)) sys.stdout.flush() # Finally print the result! test_error = error_rate(eval_in_batches(test_data, sess), test_labels) print('Test error: %.1f%%' % test_error) print ('Optimization done') print ('Save models') if not tf.gfile.Exists("./conv_save"): tf.gfile.MakeDirs("./conv_save") saver_path = saver.save(sess, "./conv_save/model.ckpt") print ('Successfully saved file: %s' % saver_path) else: # If train flag is false, execute image extraction routine print ("Filter extraction routine") aa = train_data[1:2, :, :, :] print (aa.shape) # Run extract filter operations (conv1, conv2 and conv3 layers) images = sess.run(extract_filter(train_data[1:2, :, :, :])) print (images[2].shape) plt.imshow (images[2][0, :, :, 32] * 255 + 255 / 2, cmap='gray') # plt.imshow (images[2][0, :, :, 32], cmap='gray') plt.show () # Save all outputs for i in range (3): filter_shape = images[i].shape img_size = [filter_shape[1], filter_shape[2]] print (img_size)
def twv_variable_calculations(target, yolo_output, keyword_indices, calcs_for_atwv_map, config_dict, threshold): C = config_dict["C"] B = config_dict["B"] K = config_dict["K"] #pred conf: the confidence of every box (4X6X2) #pred_class_all_prob ([4, 6, 2, 1000]) pred_ws, pred_start, pred_end, pred_conf, pred_class_all_prob = utils.extract_data(yolo_output, C, B, K) pred_classes_prob, pred_classes = torch.max(pred_class_all_prob, 3) #max on Classes (K) conf_class_mult, box_index = torch.max(( pred_conf* pred_classes_prob), 2) #max on p*K gt_thresh_idx = (torch.gt(conf_class_mult, threshold).long() * (pred_classes[:,:,0] +1)).nonzero() #for false negatives non_zero_indices = (target + 1).nonzero() #([[0, 0], [1, 1],... for batch, target_keyword in non_zero_indices: target_keyword = target_keyword.item() if target_keyword in keyword_indices: if target_keyword not in calcs_for_atwv_map: calcs_for_atwv_map[target_keyword] = [0, 0, 0] n_true = calcs_for_atwv_map[target_keyword][2] n_true+=1 #true number of occurences of term in corpus calcs_for_atwv_map[target_keyword][2] = n_true count_doubles = {} #count if a word occurred twice in a single example for batch_idx_pred, predict_keyword in gt_thresh_idx: pred_cell = predict_keyword.item() pred_word = pred_classes[batch_idx_pred.item(), pred_cell, 0].item() if pred_word in keyword_indices: if batch_idx_pred.item() not in count_doubles: count_doubles[batch_idx_pred.item()] = [] if pred_word in count_doubles[batch_idx_pred.item()]: #pdb.set_trace() continue #ignore words that appeared twice else: count_doubles[batch_idx_pred.item()].append(pred_word) if pred_word not in calcs_for_atwv_map: calcs_for_atwv_map[pred_word] = [0, 0, 0] #pdb.set_trace() n_correct = calcs_for_atwv_map[pred_word][0] n_spurious = calcs_for_atwv_map[pred_word][1] #find if word really was there exists = 0 for batch, target_keyword in non_zero_indices: if batch.item() == batch_idx_pred.item(): if target_keyword.item() == pred_word: exists = 1 break if batch.item() > batch_idx_pred.item(): break n_correct += exists if exists == 0: n_spurious += 1 calcs_for_atwv_map[pred_word][0] = n_correct calcs_for_atwv_map[pred_word][1] = n_spurious
def convert_yolo_tags(pred, c, b, k, threshold): ''' YOLO's outputs are tags given in format: (cell_i, box_j, (t, delta_t, p_b_{i,j}), p_{c_i}(k) ). This function converts it to tags in the following format: (start, end, word) inputs: pred: prediction or given target labels, in yolo format c: number of cells b: number of timing boxes k: number of keywords threshold: if the product of: p_b_{i,j} * p_{c_i}(k) is greather than the threshold, we predict that a keyword exists. output: final_pred_labels: dictionary, whose keys are the keywords. Every keyword has an array of (start, end) values. ''' pred_ws, pred_start, pred_end, pred_conf, pred_class_prob = utils.extract_data(pred, c, b, k) class_max, class_indices = torch.max(pred_class_prob, 3) conf_max, box_indices = torch.max((pred_conf * class_max), 2) pass_conf = (conf_max >= threshold).float() labels = [] for batch in range(0, pred.size(0)): for cell_i in range(0, pred.size(1)): if pass_conf[batch, cell_i].item() <= 0: continue selected_box_index = box_indices[batch, cell_i].item() selected_class_index = class_indices[batch, cell_i, 0].item() label_start = pred_start[batch, cell_i, selected_box_index].item() label_end = pred_end[batch, cell_i, selected_box_index].item() x = (label_end + label_start)/2 w = pred_ws[batch, cell_i, selected_box_index].item() labels.append([cell_i, x, w, selected_class_index, batch]) width_cell = 1. / c # width per cell final_pred_labels = {} for label in labels: real_x = (label[0] * width_cell + label[1]) # label[1] was already multiple with width cell real_w = label[2] cur_start = (real_x - float(real_w) / 2.0) cur_end = (real_x + float(real_w) / 2.0) cur_class = str(label[4])+ "_" + str(label[3]) # batch_class if cur_class not in final_pred_labels: final_pred_labels[cur_class] = [] else: prev_start = final_pred_labels[cur_class][-1][0] prev_end = final_pred_labels[cur_class][-1][1] if cur_start >= prev_end and cur_end >= prev_start: # -------- # ------- if cur_end - prev_end <= GAP_THRESH: final_pred_labels[cur_class].pop() #remove last item cur_start = prev_start elif cur_start <= prev_end and prev_start <= cur_end: # -------- # ------- final_pred_labels[cur_class].pop() #remove last item cur_start = prev_start elif cur_start >= prev_start and cur_end <= prev_end: # ----------- # ---- final_pred_labels[cur_class].pop() #remove last item cur_start = prev_start cur_end = pred_end elif cur_start >= prev_start and cur_end >= pred_end: # ----- # --------- final_pred_labels[cur_class].pop() #remove last item final_pred_labels[cur_class].append([cur_start, cur_end]) # print "objet- start:{}, end:{}, class:{}".format(pred_start,pred_end, pred_class) return final_pred_labels
np.apply_along_axis(shift, 1, X, vector) for vector in direction_vectors ]) print X.shape y = np.concatenate([y for _ in range(len(direction_vectors) + 1)], axis=0) print y.shape return X, y # Extract data train_data_filename = maybe_download('train-images-idx3-ubyte.gz') train_labels_filename = maybe_download('train-labels-idx1-ubyte.gz') test_data_filename = maybe_download('t10k-images-idx3-ubyte.gz') test_labels_filename = maybe_download('t10k-labels-idx1-ubyte.gz') X_train = extract_data(train_data_filename, 60000, dense=True) y_train = extract_labels(train_labels_filename, 60000, one_hot=False) X_test = extract_data(test_data_filename, 10000, dense=True) y_test = extract_labels(test_labels_filename, 10000, one_hot=False) ################################################# # Test for decision tree classifier without dimensionality reduction Tree = DecisionTreeClassifier() Tree.fit(X_train, y_train) print 'Without dimenstionality reduction: ', Tree.score(X_test, y_test) # Dimensionality reduction using PCA (784 -> 64) pca = PCA(n_components=64) pca.fit(X_train) X_train_reduce = pca.transform(X_train)
return len(self.data) def __getitem__(self, idx): return (self.data[idx], self.label[idx]) if __name__ == "__main__": #Parameters for the dataset chunk_size = 200 #Load in the input data dirs = extract_file_names( "/home/alex/Projects/Unsupervised/kepler_q9_variability/") data = extract_data(dirs) data = split_to_chunk(data, chunk_size) datalist = convert_datalist(data) datalist = normalise(datalist) data_arr = np.vstack(datalist) with open("autoencoder_dataset.pkl", "wb") as f: pickle.dump(data_arr, f) print("Written ae_dataset.pkl") ### Plotting #for i in range(0, 100):
train_labels, validation_data, validation_labels, epochs=8) # model.save("model_saves/NN_model.ckpt") # model.load("model_saves/NN_model_e7.ckpt") ###################################################### # Test set accuracy for the model test_predictions = model.predict(test_data) correct = np.sum(test_predictions == test_labels) print("Accuracy:", correct / len(test_data)) ###################################################### # Labeling the south part of the image south_img = utils.extract_data("data/test_south.tif") south_img_shape = np.shape(south_img) south_data = south_img.reshape(shape=(np.shape(south_img)[0] * np.shape(south_img)[1], np.shape(south_img)[2])) south_data = utils.standardize(south_data) south_predictions = model.predict(south_data) south_predictions_img = predictions.reshape( (south_img_shape[0], south_img_shape[1])) ## applying a denoising filter: # south_predictions_img = utils.denoise(south_predictions_img) south_predictions_img = utils.to_RGB(south_predictions_img) plt.imshow(south_predictions_img) plt.axis('off')
def yolo_accuracy(prediction, target, C, B, K, T, iou_t=0.5, is_cuda=False): correct_class_high_iou = 0 correct_class_low_iou = 0 wrong_class_high_iou = 0 wrong_class_low_iou = 0 total_correct_class = 0 pred_ws, pred_start, pred_end, pred_conf, pred_class_all_prob = utils.extract_data(prediction, C, B, K) pred_classes_prob, pred_classes = torch.max(pred_class_all_prob, 3) conf_class_mult, box_index = torch.max(( pred_conf* pred_classes_prob), 2) no_object_correct = torch.eq((conf_class_mult < T).float(), 1 - target[:, :, -1]).cpu().sum() no_object_object_wrong = (torch.eq((conf_class_mult < T).float(), target[:, :, -1])).cpu().sum() target_ws, target_start, target_end, target_conf, target_class_all_prob = utils.extract_data(target[:, :, :-1], C, B, K) target_classes_prob, target_classes = torch.max(target_class_all_prob, 3) squeeze_target_start = torch.zeros([target_start.size(0), C]).cuda() if is_cuda else \ torch.zeros([target_start.size(0), C]) squeeze_pred_start = torch.zeros([target_start.size(0), C]).cuda() if is_cuda else \ torch.zeros([target_start.size(0), C]) squeeze_target_end = torch.zeros([target_start.size(0), C]).cuda() if is_cuda else \ torch.zeros([target_start.size(0), C]) squeeze_pred_end = torch.zeros([target_start.size(0), C]).cuda() if is_cuda else \ torch.zeros([target_start.size(0), C]) squeeze_target_ws = torch.zeros([target_start.size(0), C]).cuda() if is_cuda else \ torch.zeros([target_start.size(0), C]) squeeze_pred_ws = torch.zeros([target_start.size(0), C]).cuda() if is_cuda else \ torch.zeros([target_start.size(0), C]) box_indices_array = box_index.cpu().numpy() for row in range(0, box_indices_array.shape[0]): for col in range(0, box_indices_array.shape[1]): squeeze_target_start[row, col] = target_start[row, col, box_indices_array[row, col]] squeeze_pred_start[row, col] = pred_start[row, col, box_indices_array[row, col]] squeeze_target_end[row, col] = target_end[row, col, box_indices_array[row, col]] squeeze_pred_end[row, col] = pred_end[row, col, box_indices_array[row, col]] squeeze_target_ws[row, col] = target_ws[row, col, box_indices_array[row, col]] squeeze_pred_ws[row, col] = pred_ws[row, col, box_indices_array[row, col]] intersect_start = torch.max(squeeze_pred_start, squeeze_target_start) intersect_end = torch.min(squeeze_pred_end, squeeze_target_end) intersect_w = intersect_end - intersect_start iou_mask = torch.eq(torch.eq((conf_class_mult > T).float(), target[:, :, -1]).float(), target[:, :, -1]) iou = intersect_w / (squeeze_pred_ws + squeeze_target_ws - intersect_w) iou_select = iou * iou_mask.float() mean_iou_correct = 0.0 mean_iou_wrong = 0.0 is_object = target[:, :, -1].cpu().numpy() for batch in range(0, box_indices_array.shape[0]): for cell in range(0, box_indices_array.shape[1]): if is_object[batch, cell].item() != 1 or (conf_class_mult > T)[batch,cell].item() !=1: continue if pred_classes[batch, cell, 0].item() != target_classes[batch, cell, 0].item(): # predict object with wrong class if iou_select[batch, cell].item() < iou_t: wrong_class_low_iou += 1 else: wrong_class_high_iou += 1 mean_iou_wrong += iou_select[batch, cell].item() else: # predict object with right class if iou_select[batch, cell].item() < iou_t: correct_class_low_iou += 1 else: correct_class_high_iou += 1 mean_iou_correct += iou_select[batch, cell].item() total_correct_class += 1 return no_object_correct - total_correct_class, no_object_object_wrong, correct_class_high_iou, \ correct_class_low_iou, wrong_class_high_iou, wrong_class_low_iou, total_correct_class, \ mean_iou_correct, mean_iou_wrong
def agent_train(self, ns, r, done=False): #convert next state and reward to tensors #next_state_v = torch.tensor([next_state],dtype=dtype) #reward_v = torch.tensor([reward],dtype=dtype) #save the values in the replay buffer self.buffer.push(self.state, self.act, r, ns, done) #set the state to the next state to advance agent self.state = ns #if there are enough samples in replay buffer, perform network updates if len(self.buffer) >= self.BUFFER_SIZE: #get a mini batch from the replay buffer sample = self.buffer.sample(self.BATCH_SIZE) #make the data nice compressed_states, compressed_actions, compressed_next_states, compressed_rewards = utils.extract_data( sample) #critic network training #yt=r(st,at)+γ⋅Q(st+1,μ(st+1)) na_from_tactor_a = self.target_actor.get_action( compressed_next_states) na_from_tactor = na_from_tactor_a.mean(dim=1).unsqueeze(-1) v_from_tcritic = self.target_critic.get_state_value( compressed_next_states, na_from_tactor) #calculate yt=r(st,at)+γ⋅Q(st+1,μ(st+1)) target_v = compressed_rewards.unsqueeze( 1) + self.GAMMA * v_from_tcritic actual_v = self.online_critic.get_state_value( compressed_states, compressed_actions) loss = nn.MSELoss() output = loss(actual_v, target_v) self.optim.zero_grad() output.backward(retain_graph=True) self.optim.step() self.online_critic.value_func.zero_grad() for s, a in zip(compressed_states.split(1), compressed_actions.split(1)): online_v = self.online_critic.get_state_value(s, a) grad_wrt_a = torch.autograd.grad(online_v, (s, a)) action = self.online_actor.get_action(s) action.mean().backward(retain_graph=True) for param in self.online_actor.policy.parameters(): param.data += self.ALPHA * ( param.grad * grad_wrt_a[1].item()) / (self.BATCH_SIZE) self.online_actor.policy.zero_grad() self.online_critic.value_func.zero_grad() # #soft update for param_o, param_t in zip(self.online_actor.policy.parameters(), self.target_actor.policy.parameters()): param_t.data = param_o.data * self.TAU + param_t.data * ( 1 - self.TAU) for param_o, param_t in zip( self.online_critic.value_func.parameters(), self.target_critic.value_func.parameters()): param_t.data = param_o.data * self.TAU + param_t.data * ( 1 - self.TAU) self.online_actor.policy.zero_grad() self.target_actor.policy.zero_grad() self.online_critic.value_func.zero_grad() self.target_critic.value_func.zero_grad() torch.save(self.target_actor.policy.state_dict(), self.agent_name + 'target_actor_state_1.pt') torch.save(self.target_critic.value_func.state_dict(), self.agent_name + 'target_critic_state_1.pt')
def render_GET(self, request): request.setHeader(b"content-type", b"application/json") request.responseHeaders.addRawHeader(b"content-type", b"application/json") return Response.response(Response(request, data=extract_data(request)))
def get_face_data(self, target): df_face, _ = utils.load_data_from_csv(dtype="face") df_face, y = utils.extract_data(df_face, target, type="face") # df_face = preprocess(df_face, dtype="face") return df_face, y
df_liwc = pd.merge(df_liwc, df_output, left_on="userId", right_on="userid") df_nrc = pd.merge(df_nrc, df_output, left_on="userId", right_on="userid") # drop users with multiple faces, keeping only the first face df_face.drop_duplicates(subset="userId", keep="first", inplace=True) df_face = pd.merge(df_face, df_output, left_on="userId", right_on="userid", how="outer") del df_face["userId"] df_face.rename(columns={"userid": "userId"}, inplace=True) # since there were missing faces, fill mean face in place of no-faces df_face.fillna(df_face.mean(), inplace=True) X_age_face_train, y_age_face_train = utils.extract_data(df_face, label="age") # Min Max scale features X_age_face_train = preprocessing.MinMaxScaleDataframe(X_age_face_train) X_age_text_train, y_age_text_train = utils.extract_data(df_text, label="age") X_age_text_train = preprocessing.MinMaxScaleDataframe(X_age_text_train) """Code""" print("Pre-processing data...\n") # remove pages with count less than threshold (Note: this removes few users as well) threshold = 5 page_like_count = df_relation.groupby(['like_id']).size() df_relation['likes_count'] = df_relation['like_id'].apply( lambda x: page_like_count.get(x)) df_relation_filtered = df_relation[df_relation['likes_count'] > threshold]
def get_text_data(self, target): df_text, _ = utils.load_data_from_csv(dtype="text") df_text, y = utils.extract_data(df_text, target, type="text") # df_text = preprocess(df_text, dtype="text") return df_text, y