def predict_multi_step_ahead(model, test_input, lead_time, parallel=False): predicted_value = None if parallel: for index_cal in range(0, lead_time): predicted_value = model.predict(test_input) np.append(test_input[:, 1:], predicted_value, 1) else: for index_cal in range(0, lead_time): predicted_value = model.predict(test_input) test_input = np.append(test_input[:, 1:], np.array([predicted_value]).T, 1) return predicted_value
def pfa_predict(answer, data): current_skills, last_times = data seconds_ago = map( lambda x: (answer['inserted'] - x).total_seconds() if x is not None else 315360000, last_times) current_skills = map( lambda (skill, secs): skill + TIME_SHIFT / max(secs, 0.001), zip(current_skills, seconds_ago)) if 'number_of_options' in answer and answer['number_of_options'] != len(answer['options']): # backward compatibility return model.predict_simple(current_skills[0], answer['number_of_options']) else: return model.predict(current_skills[0], current_skills[1:]) return model.predict(current_skills[0], current_skills[1:0])
def sample(): # build sampling graph with tf.variable_scope("char-rnn"): keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob') cell = model.build_cell(keep_prob) inputs = tf.placeholder(dtype=tf.int32, shape=[1,1], name='inputs') initial_state = tf.placeholder(dtype=tf.float32, shape=[1,cell.state_size], name='initial_state') logits, final_state = model.predict(inputs, cell, initial_state, keep_prob) char2id = text_input.load_from_dump(os.path.join(FLAGS.data_dir, 'vocab.cPickle')) id2char = {v:k for k,v in char2id.items()} output_str = FLAGS.start_with sess = tf.Session() state = cell.zero_state(1, dtype=tf.float32).eval(session=sess) saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if not ckpt or not ckpt.model_checkpoint_path: raise IOError('Cannot restore checkpoint file in ' + FLAGS.train_dir) saver.restore(sess, ckpt.model_checkpoint_path) for c in FLAGS.start_with[:-1]: x = np.array([[char2id[c]]]) _, state = sess.run([logits, final_state], feed_dict={inputs:x, keep_prob:1., initial_state:state}) last_id = char2id[FLAGS.start_with[-1]] for _ in xrange(FLAGS.max_length - len(FLAGS.start_with)): x = np.array([[last_id]]) logits_value, state = sess.run([logits, final_state], feed_dict={inputs:x, keep_prob:1., initial_state:state}) last_id = _sample_from_logits(logits_value) c = id2char[last_id] output_str += c print output_str
def bi_item(m, tuples, k, func, func_w): # with open('u2U.txt', 'r') as f1: # u2U = pickle.loads(f1.read()) # with open('m2Ucluster.txt', 'r') as f4: # m2Ucluster = pickle.loads(f4.read()) with open('m2M.txt', 'r') as f1: m2M = pickle.loads(f1.read()) with open('u2Mcluster.txt', 'r') as f2: u2Mcluster = pickle.loads(f2.read()) res = [] # return the list of predictions given the query tuples # m = np.asarray(m, order = 'F') # column-major order u2Mcluster = np.asarray(u2Mcluster, order = 'F') simPair = model.similarityPair(u2Mcluster , func) for pair in tuples: #print pair[0] c = findCentroid(m2M, pair[0]) sim = simPair[c] temp = model.knn(sim, k) prediction = model.predict(u2Mcluster, temp, sim, func_w) pred = prediction[pair[1]] + 3 if pred > 5: pred = 5 elif pred < 1: pred = 1 res.append(pred)# plus 3 #print pred return res
def showPredict(): n_artists = get_n_artists() n_days = get_n_days(isX=False, isTrain=False) artistIdList, dsList, yReal, yPredict = predict(isOffline=ISOFFLINE) yTrain = getPlays(isTrain=True) yTrain = yTrain.reshape(n_artists, n_days) firstDay = datetime.strptime(dsList[0], '%Y%m%d') xData = np.arange(n_days) + date2num(firstDay) pdf = PdfPages('../report/analyze.pdf') for i in range(n_artists): fig = plt.figure() ax = plt.axes() ax.xaxis.set_major_formatter(DateFormatter('%m%d')) yRealData = yReal[i] yPredictData = yPredict[i] yTrainData = yTrain[i] artist_id = artistIdList[i*n_days] plt.plot_date(xData, yRealData, fmt='-^g', label='real') plt.plot_date(xData, yPredictData, fmt='-vr', label='predict') plt.plot_date(xData, yTrainData, fmt='-ob', label='train') plt.legend(loc='best', shadow=True) plt.xlabel('day') plt.ylabel('plays') plt.title(artist_id) pdf.savefig(fig) plt.close() pdf.close()
def score(data): # Get the prediction prediction = predict(data, our_model, final_columns, category_features, averages) # save the data and prediction in our db. return prediction
def elo_predict(answer, data): current_skills, difficulties, place_first_answers_nums, prior_skill, user_first_answers_num = data if 'number_of_options' in answer and answer['number_of_options'] != len(answer['options']): # backward compatibility return model.predict_simple(current_skills[0], answer['number_of_options']) else: return model.predict(current_skills[0], current_skills[1:])
def pfa_predict(user_id, place_asked_id, options, question_type, inserted, data, time_shift=DEFAULT_TIME_SHIFT): current_skills, last_times = data seconds_ago = map( lambda x: (inserted - x).total_seconds() if x is not None else 315360000, last_times) current_skills = map( lambda (skill, secs): skill + time_shift / max(secs, 0.001), zip(current_skills, seconds_ago)) return model.predict(current_skills[0], current_skills[1:])
def model_testing_independent(): """ Training model and test it with an independent dataset. """ print 'Traing model and test it with an independent dataset.' #-- directory config db_dir = r'/nfs/t2/atlas/database' base_dir = r'/nfs/h1/workingshop/huanglijie/autoroi' doc_dir = os.path.join(base_dir, 'doc') data_dir = os.path.join(base_dir, 'multi-atlas', 'l_sts') #-- laod session ID list for training sessid_file = os.path.join(doc_dir, 'sessid') sessid = open(sessid_file).readlines() sessid = [line.strip() for line in sessid] #-- parameter config class_label = [8, 10, 12] atlas_num = [40] #atlas_num = [1, 5] + range(10, 201, 10) #atlas_num = range(1, 201) #-- model training forest_list, classes_list, spatial_ptn = model.train(sessid, data_dir) #-- load mask coordinate derived from training dataset mask_coords = lib.load_mask_coord(data_dir) #-- load testing dataset test_dir = r'/nfs/h1/workingshop/huanglijie/autoroi/multi-atlas/group08' loc_dir = os.path.join(test_dir, 'localizer') pred_dir = os.path.join(test_dir, 'predicted_files', 'l_sts') test_sessid_file = os.path.join(test_dir, 'sessid') test_sessid = open(test_sessid_file).readlines() test_sessid = [line.strip() for line in test_sessid] for subj in test_sessid: zstat_file = os.path.join(loc_dir, subj + '_face_obj_zstat.nii.gz') feature_name, sample_data = lib.ext_sample(zstat_file, mask_coords, class_label) model.predict(sample_data, atlas_num, pred_dir, subj + '_pred.nii.gz', class_label, forest_list, classes_list, spatial_ptn)
def predict(): data = request.json prediction = model.predict(data) data['prediction'] = prediction db.save_prediction(data) resp = { 'prediction': prediction } return resp
def process_answer(self, user_id, place_asked_id, place_answered_id, options, inserted): skill = self.get_skill(user_id, place_asked_id) correct = place_asked_id == place_answered_id asked_pred, options_pred = model.predict( skill, map(lambda i: self.get_skill(user_id, i), options)) if correct: skill += 3.4 * (correct - asked_pred) else: skill += 0.3 * (correct - asked_pred) self._current[user_id, place_asked_id] = skill
async def predict(params: predict_text): """ Permite predecir la polaridad del tweet yentregar su nivel de similitud """ tweet = params.text prediction = tf_model.predict(tweet) prediction_db = PredictionModel( text=tweet, label=prediction["label"], score=prediction["score"], time=prediction["elapsed_time"], ) db.session.add(prediction_db) db.session.commit() return prediction
def range_predict(model, X_test, Y_test, params, batch_size=1): """ Make a prediction for a range of input values, by saturating the lstm returns the predictions (unscaled) and the number of errors """ input_shape = (1, params['lstm_timesteps'], len(params['columNames'])) preds = zeros(X_test.shape[0]) for i in range(0, X_test.shape[0]): input_vector = X_test[i].reshape(input_shape) # Make a prediction, saturating for k in range(0, params['num_saturations']): y_hat = model.predict(input_vector, batch_size=batch_size) model.reset_states() preds[i] = y_hat rmse, num_errors = compute.error(Y_test, preds) return (preds, rmse, num_errors)
def predict(self, payload): question = payload['question'].replace("\\/", "/").encode().decode('unicode_escape') html_article = payload['article'].replace("\\/", "/").encode().decode('unicode_escape') context = extract_text(html_article) answer = predict(question, context, tokenizer=self.tokenizer, model=self.model) payload['reader'] = 0 if len(answer[0]) > 0: H, T, img = post_process(html_article, answer, payload['html_url'], tokenizer=self.tokenizer) if H != '': payload['html_snippet'], payload['text_snippet'], payload['images'] = H, T, img payload['reader'] = 1 return JSONResponse(content=payload)
def predict(military_time, lat, longitude, age, gender) -> float: # get proximity score location_node = [float(lat), float(longitude)] proximity_score = proximity.get_proximity(location_node) # get prediction of emergency sin_time = encoding.sin_time( encoding.military_time_in_minutes_fn(military_time)) cos_time = encoding.cos_time( encoding.military_time_in_minutes_fn(military_time)) prediction = model.predict( np.asarray([int(age), int(gender), sin_time, cos_time]).reshape(1, -1)) # return multiplication # This returns a "safety score". # convert to a percentage if necessary by multiplying with 100 return str(1 - (proximity_score * prediction))
def predict_with_pretrain_model(sample, model): #fix dataset sample = -sample + 255 img = Image.fromarray(sample) transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) sample = transform(img).float() sample = Variable(sample, volatile=True) sample = sample.unsqueeze(0) model = model.Net() model.load_state_dict(torch.load('./results/lenet.pkl')) out = model.predict(sample) return out.data[0].tolist()
def upload(): target = os.path.join(APP_ROUTE, 'images/') print(target) if not os.path.isdir(target): os.mkdir(target) file = request.files.getlist("file")[0] filename = file.filename destination = "/".join([target, filename]) print(destination) file.save(destination) result = predict(destination) print(result) return render_template("complete.html", prediction=result)
def prediction(): """ Get request, make a prediction and answer back to sender """ # get data r = request img_url = r.data # predict prediction = model.predict(img_url) # build a response dict to send back to client response = {'message': prediction} return jsonify(str(response))
def update(): conn = sqlite3.connect('data/textpile.db') docs, labels = query_train(conn) model = train(docs, labels, **param) docs, doc_ids = query_predict(conn) preds = predict(model, docs, doc_ids, topk=1000) conn.execute('DELETE FROM doc_relevance') sql = 'INSERT INTO doc_relevance (doc_id, relevance, explain_json) VALUES (?,?,?)' res = ((id, sco, json.dumps(exp)) for id, lab, sco, exp in preds) conn.executemany(sql, res) sql = 'UPDATE meta SET value = ? WHERE key = \'last_updated\'' now = dt.datetime.utcnow().isoformat(' ')[:19] conn.execute(sql, [now]) conn.commit()
def get_result(): tol = 0.80 pictures = glob.glob("./test/**/*.png") res = [predict(path)[0][0] for path in tqdm(pictures)] tp, tn, fp, fn = 0, 0, 0, 0 for i in range(700): if res[i] > tol: tp += 1 else: fn += 1 for i in range(700, 1400): if res[i] > tol: fp += 1 else: tn += 1 return tp, tn, fp, fn, len(pictures)
def main(): import time from sklearn.metrics import confusion_matrix # labeled sample l_data_path = "/home/kzk/datasets/uci_csv_ssl_lrate_fixed_1_50_1_98/car/4_l.csv" data_l = np.loadtxt(l_data_path, delimiter=" ") data_l = np.hstack( (data_l, np.reshape(np.ones(data_l.shape[0]), (data_l.shape[0], 1)))) y_l = data_l[:, 0] X_l = data_l[:, 1:] # unlabeled sample u_data_path = "/home/kzk/datasets/uci_csv_ssl_lrate_fixed_1_50_1_98/car/4_u.csv" data_u = np.loadtxt(u_data_path, delimiter=" ") data_u = np.hstack( (data_u, np.reshape(np.ones(data_u.shape[0]), (data_u.shape[0], 1)))) X_u = data_u[:, 1:] # test sample t_data_path = "/home/kzk/datasets/uci_csv_ssl_lrate_fixed_1_50_1_98/car/4_t.csv" data_t = np.loadtxt(t_data_path, delimiter=" ") data_t = np.hstack( (data_t, np.reshape(np.ones(data_t.shape[0]), (data_t.shape[0], 1)))) y_t = data_t[:, 0] X_t = data_t[:, 1:] # learn st = time.time() model = RegularizedHPFSSLClassifier(max_itr=10, threshold=1e-4, learn_type="online", multi_class="ovo") model.learn(X_l, y_l, X_u) et = time.time() print "Elapsed time: %f [s]" % (et - st) # predict outputs = [] for i, x in enumerate(X_t): outputs_ = model.predict(x) outputs.append(outputs_[0][0]) # confusion matrix cm = confusion_matrix(y_t, outputs) print cm print 100.0 * np.sum(cm.diagonal()) / len(y_t)
def recognize(image): #get mask of bacteria on one frame of video shift = 100 # step for sliding window size = 100 # sliding window size image = np.expand_dims(image,axis=2)/255 # adding 1 diminision to array output = np.zeros([image.shape[0],image.shape[1]],dtype="float32") # raw output_mask buf1 = np.zeros([1,size,size,1],dtype="float32") # buffer 1 buf2 = np.zeros([size,size],dtype="float32") # buffer 2 buf3=np.zeros([size,size],dtype="float32") # buffer 3 for x_shift in range(math.ceil(image.shape[0]/shift)): # processing frame with sliding window for y_shift in range(math.ceil(image.shape[1]/shift)): x = shift*x_shift y = shift*y_shift piece = image[x:x+size,y:y+size,:] # piece of image buf1[0,:piece.shape[0],:piece.shape[1],:]=piece buf2 = (model.predict(buf1))[0,:,:,0] # predicting for piece of image output[x:x+piece.shape[0],y:y+piece.shape[1]]+=buf2[:piece.shape[0],:piece.shape[1]] return output
def get(self): # use parser and find the user's query args = parser.parse_args() user_query = args['query'] # vectorize the user's query and make a prediction uq_vectorized = model.vectorizer_transform(np.array([user_query])) prediction = model.predict(uq_vectorized) pred_proba = model.predict_proba(uq_vectorized) # round the predict proba value and set to new variable confidence = round(pred_proba[0], 3) # create JSON object output = {'intent': prediction.item(0), 'probability': str(confidence)} return output
def plotROC(X, y, theta, thresholds, **kwargs): '''Plots the ROC (Received Operator Characterstic) curve''' tpr, fpr = [], [] for t in thresholds: predictions = mod.predict(X, theta, threshold=t) cnf = mod.confMatrix(predictions, y) tpr.append(cnf[1][1]/(cnf[1][1]+cnf[1][0])) fpr.append(cnf[0][1]/(cnf[0][1]+cnf[0][0])) createPlot(title=["Receiving Operator Characterstic Curve"], xlabel='False Positive Rate', ylabel='True Positive Rate', **kwargs); plt.plot(fpr,tpr); plt.xlim(0,1); plt.ylim(0,1);
def get_predication(): res = {} #print(header , "Header printing") data = request.get_json() name, age, lat, lng, disable, outdoor, preparation = data['name'], data[ 'age'], data['lat'], data['lng'], data['disable'], data[ 'outdoor'], data['preparation'] community, local_support, environment, asset_protection = data[ 'community'], data['local_support'], data['environment'], data[ 'asset_protection'] res['response'] = 'OK' res['output'] = predict(name, age, lat, lng, disable, outdoor, preparation, community, local_support, environment, asset_protection) return jsonify(res)
def socket_predict(message): text = model.preprocess(message['data']) annotations = model.predict(text) annotations[0] = annotations[0][1: -1] # remove first and last [CLS] + [SEP] text_tokens, labels = model.align_tokenization_with_labels( text.split(), annotations) print(text_tokens) print(labels) ex = model.generate_sens_viz_cont(text_tokens[0], labels[0][:len(labels[0])]) ex['settings'] = {} html = displacy.render(ex, style='ent', manual=True, options={'colors': VIZ_COLOR_OPTIONS}) emit('annotations', {'data': html})
def predict(): if request.method == "POST": #data = request.form.get('input') works if u give data as form data = request.json.get('input') #pred, p, n = model.predict("posiwords.txt", "negawords.txt", "allwords.txt", data) pred, p, n, rating = model.predict("posiwords.txt", "negawords.txt", "allwords.txt", data) #return str(pred) if (pred == -1): return "The predicted movie review is NEGATIVE." + "Rating:" + str( rating) else: return "The predicted movie review is POSITIVE." + "Rating:" + str( rating) else: return "No review available"
def run(): ser=wait_until_serial_port_is_available_and_connect() model_path='model.joblib' if os.path.isfile(model_path)==False: print ("Training model... This may take some time") dataset_path='dataset.csv' if os.path.isfile(dataset_path)==True: model=train_model(dataset_path, model_path, train_test_split_var=False, debug=False) print("Model trained!") else: print("Traning unavailable: Dataset missing or wrong path") return "exit" else: model=load(model_path) google_docs_open=False #0 is for google docs closed, 1 is for open gmail_open=False while (1): min_confidence=0.70 data,threshold_times_crossed=serial_signal_read(ser) peaks=find_peaks_num(data) gesture_done, confidence=predict([data+[threshold_times_crossed]+[peaks]],model) if confidence>min_confidence: if gesture_done==1: if google_docs_open==False and gmail_open==False: gesture_1_open_google_docs() google_docs_open=1 elif google_docs_open==True and gmail_open==False: gesture_1_close() google_docs_open=False elif google_docs_open==False and gmail_open==True: gesture_1_close() gmail_open=False elif gesture_done==2 and google_docs_open==True: gesture_2_toggle_speach_writing() elif gesture_done==3: if google_docs_open==True: gesture_3_selec_copy_paste_to_new_email() elif google_docs_open==False and gmail_open==False: gesture_3_seach_IVAD_mails() gmail_open=True else: #gesture not detected with suficient confidence. pass
async def post(self): try: # parse request body data = tornado.escape.json_decode(self.request.body) data['pipeline'] = data['pipeline'].lower() data['model_name'] = '%s.%s.%s' % (data['pipeline'].replace( '/', '__'), data['process'], data['target']) # perform model prediction results = Model.predict(data['model_name'], data['inputs']) self.set_status(200) self.set_header('content-type', 'application/json') self.write(tornado.escape.json_encode(results)) except Exception as e: self.set_status(404) self.write(message(404, 'Failed to perform model prediction')) raise e
def index(): if request.method == 'POST': uploaded_file = request.files['file'] if uploaded_file.filename != '': if not os.path.exists('static'): os.makedirs('static') image_path = os.path.join('static', uploaded_file.filename) uploaded_file.save(image_path) has_problem = request.form.get('problem') class_name = model.predict(image_path, has_problem) result = { 'class_name': class_name, 'image_path': image_path, } return render_template('result.html', result=result) return render_template('index.html')
def get_recommendation(): ''' Get top 5 recommended products ''' if request.method == 'POST': username = request.form['uname'] out_data = [[]] title = ['Index', 'Product'] infotext = "Invalid user! please enter valid user name." if len(username) > 0: infotext, out_data = model.predict(username) return render_template('index.html', info=infotext, data=out_data, headings=title) else: return render_template('index.html')
def test_common_usage(self): # setup recommend_fun = self.recommend_fun() if recommend_fun is None: return env = environment.InMemoryEnvironment() stream = model.DefaultAnswerStream(env) self.prepare_stream(stream) # test recommended = recommend_fun(0, range(100), env, 10) for target, options in recommended: skills = env.current_skills([0 for i in range(len(options) + 1)], [target] + options) prediction = model.predict(skills[0], skills[1:])[0] if env.rolling_success(0) < 0.5: self.assertGreater(prediction, 0.5) else: self.assertLess(prediction, 0.5)
def get(self): # use parser and find the user's query args = parser.parse_args() user_query = args['query'] # preprocessing the user's query and make a prediction uq_preprocess = model.numericalImputer_transform(np.array([user_query])) prediction = model.predict(uq_preprocess) pred_proba = model.predict_proba(uq_preprocess) # round the predict proba value and set to new variable confidence = round(pred_proba[0], 3) # create JSON object output = {'prediction': str(prediction), 'probability': confidence} return output
def predict(model, sess, iterator, iterator_feed_dict): sess.run(iterator.initializer, feed_dict=iterator_feed_dict) concat_predictions = {} batch_count = 0 while True: try: batch_count += 1 predictions = model.predict(sess) if "probabilities" not in concat_predictions: concat_predictions["probabilities"]=predictions["probabilities"] else: concat_predictions["probabilities"]=np.append(concat_predictions["probabilities"], predictions["probabilities"], axis=0) if "classes" not in concat_predictions: concat_predictions["classes"]=predictions["classes"] else: concat_predictions["classes"]=np.append(concat_predictions["classes"],predictions["classes"], axis=0) except tf.errors.OutOfRangeError: break return concat_predictions
def main(self, SAVE_DIR="images/favorite", ALLOCATE_PATH="images/allocate", MODEL_NAME="use_model/model.h5", IMG_SIZE=256, CLASSES=3): basic.mkdir(Path(ALLOCATE_PATH)) target_list = [] target_list.extend(Path(SAVE_DIR).glob("*.jpg")) target_list.extend(Path(SAVE_DIR).glob("*.png")) for target in target_list: start = time.time() predict = model.predict(target, MODEL_NAME, IMG_SIZE, CLASSES) elapsed_time = time.time() - start print("time: {}".format(elapsed_time)) target_genre = self.genre[np.argmax(predict)] basic.read_write_img( str(Path(ALLOCATE_PATH, target_genre, target.name)), target)
def index(): try: ambient except NameError: ambient = 25 module = 30 irradiation = 0.25 prediction = 3363 prediction = int( predict(float(ambient), float(module), float(irradiation))) return render_template('index.html', ambient=ambient, module=module, irradiation=irradiation, prediction=prediction)
def index(): if request.method == 'POST': url = request.form['url'] predict = model.predict(url) value = predict[1] clickbait = predict[2] text = predict[3] article_title = predict[0] model.update(value) model.update(clickbait) return render_template('results.html', value=value, clickbait=clickbait, text=text, article_title=article_title, url=url) else: return render_template('index.html')
def run_holts(train, validate, target_variable, exponential, smoothing_level=.1, smoothing_slope=.1): # Create model object model = Holt(train[target_variable], exponential=exponential) # Fit model model = model.fit(smoothing_level=smoothing_level, smoothing_slope=smoothing_slope, optimized=False) # Create predictions y_pred = model.predict(start=validate.index[0], end=validate.index[-1]) return model, y_pred
def model_predict(model, features, labels, batch_size): """Runs prediction on a given model w.r.t. features, labels, and a batch size. Args: model: a tf.estimator.EstimatorSpec model features: map of feature to list of values labels: list of values batch_size: size of batches Returns: generator """ def predict_input_fn(features, labels, batch_size): return tf.data.Dataset.from_tensor_slices( (dict(features), labels)).batch(batch_size) return model.predict( lambda: predict_input_fn(features, labels, batch_size))
def post(): blockchain_output = '' post_output = '' if request.method=='POST': form = request.form.to_dict() if len(form.keys())==0: form = json.loads(request.data) classified = predict(form['message']) form['category'] = classified[0] blockchain_output = requests.post('https://wx44n042ha.execute-api.us-east-1.amazonaws.com/alpha/ourblockreportloglambda',json=form).text post_output = requests.post('https://gony0gqug0.execute-api.us-east-1.amazonaws.com/beta/post',json=form).text with open(log_path,'a') as file: file.write(str(form)+'\n') body_dict = {'blockchain_output':blockchain_output,'post_output':post_output} resp = Response(json.dumps(body_dict)) resp.headers['Access-Control-Allow-Origin'] = '*' resp.headers["Access-Control-Allow-Headers"] = 'Origin, X-Requested-With, Content-Type, Accept' return resp
async def predict_beer_style_multi(brewery_name: int, review_aroma: int, review_appearance: int, review_palate: int, review_taste: int): features = format_features(brewery_name,review_aroma, review_appearance, review_palate, review_taste) obs = pd.DataFrame(features) # scale num cols obs = apply_scaler(obs) #transform to embed object obs_tensor = single_tensor(obs) #predict on embed obj model = get_model() #return predictions as text string answer = predict(obs_tensor, model) return answer
def test_algo(model): state = init_grid() print display_grid(state) status = 1 i = 0 while (status == 1): i += 1 qval = model.predict(state.reshape(1, 64), batch_size=1) action = np.argmax(qval) state = make_move(state, action) print display_grid(state) reward = get_reward(state) print reward if reward != -1: status = 0 if i > 10: status = 0 print "Too many moves"
def eval_model(model, test_data, test_labels): # testset accuracy preds_test = model.predict(test_data) preds_test_num = np.argmax(preds_test, axis=1) classes = list(set(test_labels)) classes.sort() acc_per_class = [] for i in range(len(classes)): instance_class = [ j for j in range(len(test_labels)) if test_labels[j] == classes[i] ] acc_i = accuracy_score(test_labels[instance_class], preds_test_num[instance_class]) acc_per_class.append(acc_i) acc = accuracy_score(test_labels, preds_test_num) f1 = f1_score(test_labels, preds_test_num, average='macro') return acc, f1, acc_per_class
def answers(self, user_ids, place_ids, env, n): user_knowledge_provider = self.user_knowledge_provider(user_ids, place_ids) stream = self.stream(env) activity = self.activity(user_ids) answers = [] for i in range(n): user_id, inserted = activity.next_activity() [(place_asked_id, options)] = self.recommend_question(user_id, place_ids, env, 1) asked_pred, options_pred = model.predict( user_knowledge_provider.get_skill(user_id, place_asked_id), map(lambda i: user_knowledge_provider.get_skill(user_id, i), options)) r = random.random() if r < asked_pred: place_answered_id = place_asked_id elif len(options) == 0: place_answered_id = random.choice(place_ids) else: acc = asked_pred place_answered_id = None for o, p in zip(options, options_pred): acc += p if r < acc: place_answered_id = o answer = { 'place_asked': place_asked_id, 'place_answered': place_answered_id, 'user': user_id, 'options': options, 'inserted': inserted, 'id': i } stream.stream_answer(answer) user_knowledge_provider.process_answer( user_id, place_asked_id, place_answered_id, options, inserted) answers.append(answer) return answers
def bi_user(m, tuples, k, func, func_w): with open('u2U.txt', 'r') as f1: u2U = pickle.loads(f1.read()) with open('m2Ucluster.txt', 'r') as f2: m2Ucluster = pickle.loads(f2.read()) m2Ucluster = m2Ucluster.transpose() res = [] # return the list of predictions given the query tuples m2Ucluster = np.asarray(m2Ucluster, order = 'F') simPair = model.similarityPair(m2Ucluster , func) for pair in tuples: #print pair[0] c = findCentroid(u2U, pair[1]) sim = simPair[c] temp = model.knn(sim, k) prediction = model.predict(m2Ucluster, temp, sim, func_w) pred = prediction[pair[0]] + 3 if pred > 5: pred = 5 elif pred < 1: pred = 1 res.append(pred)# plus 3 #print pred return res
def train(): print "Building training graph ..." with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-FLAGS.init_scale, FLAGS.init_scale) with tf.variable_scope("char-rnn", initializer=initializer): keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob') cell = model.build_cell(keep_prob) inputs = tf.placeholder(dtype=tf.int32, shape=[FLAGS.batch_size, FLAGS.num_steps], name='inputs') targets = tf.placeholder(dtype=tf.int32, shape=[FLAGS.batch_size, FLAGS.num_steps], name='targets') lr = tf.placeholder(dtype=tf.float32, shape=[], name='learning_rate') initial_state = tf.placeholder(dtype=tf.float32, shape=[FLAGS.batch_size, cell.state_size], name='initial_state') logits, final_state = model.predict(inputs, cell, initial_state, keep_prob) loss = model.loss(logits, targets) train_op = model.train_batch(loss, lr) # create saver and summary saver = tf.train.Saver(tf.all_variables()) summary_op = tf.merge_all_summaries() sess = tf.Session() sess.run(tf.initialize_all_variables()) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def) # load data print "Loading data ..." reader = text_input.TextReader(os.path.join(FLAGS.data_dir, FLAGS.data_file)) reader.prepare_data() train_loader = text_input.DataLoader(os.path.join(FLAGS.data_dir, 'train.cPickle'), FLAGS.batch_size, FLAGS.num_steps) test_loader = text_input.DataLoader(os.path.join(FLAGS.data_dir, 'test.cPickle'), FLAGS.batch_size, FLAGS.num_steps) total_steps = FLAGS.num_epochs * train_loader.num_batch save_path = os.path.join(FLAGS.train_dir, 'model.ckpt') zero_state = cell.zero_state(FLAGS.batch_size, dtype=tf.float32).eval(session=sess) global_step = 0 def eval(sess, loader, state): test_loss = 0. for _ in xrange(loader.num_batch): x_batch, y_batch = loader.next_batch() feed = {inputs: x_batch, targets: y_batch, keep_prob: 1., initial_state: state} state, loss_value = sess.run([final_state, loss], feed_dict=feed) test_loss += loss_value return test_loss / loader.num_batch # training for epoch in xrange(FLAGS.num_epochs): current_lr = FLAGS.init_lr * (FLAGS.lr_decay ** (max(epoch - FLAGS.decay_after + 1, 0))) state = zero_state training_loss = 0. for _ in xrange(train_loader.num_batch): global_step += 1 start_time = time.time() x_batch, y_batch = train_loader.next_batch() feed = {inputs: x_batch, targets: y_batch, keep_prob: (1.-FLAGS.dropout), lr: current_lr, initial_state: state} state, loss_value, _ = sess.run([final_state, loss, train_op], feed_dict=feed) duration = time.time() - start_time training_loss += loss_value if global_step % FLAGS.log_steps == 0: format_str = ('%s: step %d/%d (epoch %d/%d), loss = %.2f (%.3f sec/batch), lr: %.5f') print(format_str % (datetime.now(), global_step, total_steps, epoch+1, FLAGS.num_epochs, loss_value, duration, current_lr)) if global_step % FLAGS.summary_steps == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, global_step) if epoch % FLAGS.save_epochs == 0: saver.save(sess, save_path, global_step) train_loader.reset_pointer() # epoch summary training_loss /= train_loader.num_batch summary_writer.add_summary(_summary_for_scalar('training_loss', training_loss), global_step) test_loss = eval(sess, test_loader, zero_state) test_loader.reset_pointer() summary_writer.add_summary(_summary_for_scalar('test_loss', test_loss), global_step) print("Epoch %d: training_loss = %.2f, test_loss = %.2f" % (epoch+1, training_loss, test_loss))
def elo_predict(user_id, place_asked_id, options, question_type, inserted, data): current_skills, difficulties, place_first_answers_nums, prior_skill, user_first_answers_num = data return model.predict(current_skills[0], current_skills[1:])
# Get the prediction prediction = predict(data, our_model, final_columns, category_features, averages) # save the data and prediction in our db. return prediction if __name__ == '__main__': # If the pickle object doesn't exist or the rebuild option # is given by the command line. if (not os.path.exists(filename_pickle)) or \ (len(sys.argv) > 1 and sys.argv[1] == 'rebuild'): (built_model, final_columns, averages) = buildmodel(label_name, category_features, non_category_features, model=our_model, save=True) print "Built the model, and saved as pickle" else: print "Opening pickle..." with open(filename_pickle, 'r') as f: (built_model, final_columns, averages) = pickle.load(f) our_model = built_model data = json.dumps({"org_name": "DREAM Project Foundation", "name_length": 51, "event_end": 1363928400, "venue_latitude": 42.9630578, "event_published": 1361978554.0, "user_type": 1, "channels": 11, "currency": "USD", "org_desc": "", "event_created": 1361291193, "event_start": 1363914000, "has_logo": 1, "email_domain": "dreamprojectfoundation.org", "user_created": 1361290985, "payee_name": "", "payout_type": "ACH", "venue_name": "Grand Rapids Brewing Co", "sale_duration2": 30, "venue_address": "1 Ionia Avenue Southwest", "approx_payout_date": 1364360400, "org_twitter": 13.0, "gts": 537.4, "listed": "y", "ticket_types": [{"event_id": 5558108, "cost": 50.0, "availability": 1, "quantity_total": 125, "quantity_sold": 10}], "org_facebook": 13.0, "num_order": 7, "user_age": 0, "body_length": 1474, "description": "<p><span style=\"font-size: medium; font-family: 'book antiqua', palatino;\">Come enjoy a night of music and beer tasting at the new Grand Rapids Brewery while we make an effort to create awareness and raise funds for Dream Project Foundation. The night will include music, Grand Rapids Brewery's finest beer to sample, heavy hors d'oeuvre's and silent auction of artwork directly from the young artists of Dream House.</span></p>\r\n<p> </p>\r\n<p>Who We Are:</p>\r\n<p>DREAM Project Foundation is a small American 501c3 registered non-profit organization, working to break the cycle of human trafficking through community development. As a small, grass roots organization, we focus primarily on prevention and protection which begins with shelter and continues with education, so those vulnerable are aware of the dangers and able to protect themselves.</p>\r\n<p>DREAM Project Foundation was officially founded in 2011 to support the DREAM House children's home based in Thailand on the border of Myanar (Burma). While helping children stay safe from the trafficing is the heart of our mission, we know that in order to end trafficking it must be a collaborative effort for all people and communities. </p>\r\n<p>We at DREAM Project Foundation are determined to fight against this atrocity, focusing on the factors that cause people to be vulnerable targets to traffickers, with most of our work based in SE Asia as it is a major international hub of human trafficking.</p>", "object_id": 5558108, "venue_longitude": -85.6706147, "venue_country": "US", "previous_payouts": [{"name": "", "created": "2013-04-19 03:25:42", "country": "US", "state": "", "amount": 500.0, "address": "", "zip_code": "", "event": 5558108, "uid": 52778636}], "sale_duration": 22.0, "num_payouts": 0, "name": "DREAM Project Foundation - Taste of a Better Future", "country": "US", "delivery_method": 0.0, "has_analytics": 0, "fb_published": 0, "venue_state": "MI", "has_header": 'null', "show_map": 1}) #print data print 'score', score(json.loads(data)) with open('data/test_new.json') as f: test_data = json.load(f) for ddd in test_data: print ddd['acct_type'], predict(ddd, our_model, final_columns, category_features, averages)
# Print best validation accuracy and epoch in valid_set max_val_acc, idx = max((val, idx) for (idx, val) in enumerate(history.history['val_acc'])) print('Maximum accuracy at epoch', '{:d}'.format(idx + 1), '=', '{:.4f}'.format(max_val_acc)) # plot the result import matplotlib.pyplot as plt plt.figure() plt.plot(history.epoch, history.history['acc'], label="acc") plt.plot(history.epoch, history.history['val_acc'], label="val_acc") plt.scatter(history.epoch, history.history['acc'], marker='*') plt.scatter(history.epoch, history.history['val_acc']) plt.legend(loc='lower right') plt.show() plt.figure() plt.plot(history.epoch, history.history['loss'], label="loss") plt.plot(history.epoch, history.history['val_loss'], label="val_loss") plt.scatter(history.epoch, history.history['loss'], marker='*') plt.scatter(history.epoch, history.history['val_loss'], marker='*') plt.legend(loc='lower right') plt.show() score, acc = model.evaluate(X_test, test_label, batch_size=BATCH_SIZE) print('Test score:', score) print('Test accuracy:', acc) predictions = model.predict(X_test) preditFval(predictions, test_label)
def main(): init() fit() predict(isOffline=ISOFFLINE)
#! /usr/bin/env python """ @author: dell """ if __name__ == "__main__": import music import model train_examples = music.load_examples('data/train.pkl') model.learn(train_examples) test_examples = music.load_examples('data/test.pkl') test_ratings = model.predict(test_examples) for i in range(len(test_examples)): test_examples[i]['rating'] = test_ratings[i] music.write_examples('submissions/zmusic_predictions.csv', test_examples)
def main(args): model_id = build_model_id(args) model_path = build_model_path(args, model_id) setup_model_dir(args, model_path) sys.stdout, sys.stderr = setup_logging(args, model_path) x_train, y_train = load_model_data(args.train_file, args.data_name, args.target_name) x_validation, y_validation = load_model_data( args.validation_file, args.data_name, args.target_name) rng = np.random.RandomState(args.seed) if args.n_classes > -1: n_classes = args.n_classes else: n_classes = max(y_train)+1 n_classes, target_names, class_weight = load_target_data(args, n_classes) if len(class_weight) == 0: n_samples = len(y_train) print('n_samples', n_samples) print('classes', range(n_classes)) print('weights', n_samples / (n_classes * np.bincount(y_train))) class_weight = dict(zip(range(n_classes), n_samples / (n_classes * np.bincount(y_train)))) print('class_weight', class_weight) logging.debug("n_classes {0} min {1} max {2}".format( n_classes, min(y_train), max(y_train))) y_train_one_hot = np_utils.to_categorical(y_train, n_classes) y_validation_one_hot = np_utils.to_categorical(y_validation, n_classes) logging.debug("y_train_one_hot " + str(y_train_one_hot.shape)) logging.debug("x_train " + str(x_train.shape)) min_vocab_index = np.min(x_train) max_vocab_index = np.max(x_train) logging.debug("min vocab index {0} max vocab index {1}".format( min_vocab_index, max_vocab_index)) json_cfg = load_model_json(args, x_train, n_classes) logging.debug("loading model") sys.path.append(args.model_dir) import model from model import build_model ####################################################################### # Subsetting ####################################################################### if args.subsetting_function: subsetter = getattr(model, args.subsetting_function) else: subsetter = None def take_subset(subsetter, path, x, y, y_one_hot, n): if subsetter is None: return x[0:n], y[0:n], y_one_hot[0:n] else: mask = subsetter(path) idx = np.where(mask)[0] idx = idx[0:n] return x[idx], y[idx], y_one_hot[idx] x_train, y_train, y_train_one_hot = take_subset( subsetter, args.train_file, x_train, y_train, y_train_one_hot, n=args.n_train) x_validation, y_validation, y_validation_one_hot = take_subset( subsetter, args.validation_file, x_validation, y_validation, y_validation_one_hot, n=args.n_validation) ####################################################################### # Preprocessing ####################################################################### if args.preprocessing_class: preprocessor = getattr(model, args.preprocessing_class)(seed=args.seed) else: preprocessor = modeling.preprocess.NullPreprocessor() logging.debug("y_train_one_hot " + str(y_train_one_hot.shape)) logging.debug("x_train " + str(x_train.shape)) model_cfg = ModelConfig(**json_cfg) logging.info("model_cfg " + str(model_cfg)) model = build_model(model_cfg) setattr(model, 'stop_training', False) logging.info('model has {n_params} parameters'.format( n_params=count_parameters(model))) if len(args.extra_train_file) > 1: callbacks = keras.callbacks.CallbackList() else: callbacks = [] save_model_info(args, model_path, model_cfg) if not args.no_save: if args.save_all_checkpoints: filepath = model_path + '/model-{epoch:04d}.h5' else: filepath = model_path + '/model.h5' callbacks.append(ModelCheckpoint( filepath=filepath, verbose=1, save_best_only=not args.save_every_epoch)) callback_logger = logging.info if args.log else callable_print if args.n_epochs < sys.maxsize: # Number of epochs overrides patience. If the number of epochs # is specified on the command line, the model is trained for # exactly that number; otherwise, the model is trained with # early stopping using the patience specified in the model # configuration. callbacks.append(EarlyStopping( monitor='val_loss', patience=model_cfg.patience, verbose=1)) if args.classification_report: cr = ClassificationReport(x_validation, y_validation, callback_logger, target_names=target_names) callbacks.append(cr) if model_cfg.optimizer == 'SGD': callbacks.append(SingleStepLearningRateSchedule(patience=10)) if len(args.extra_train_file) > 1: args.extra_train_file.append(args.train_file) logging.info("Using the following files for training: " + ','.join(args.extra_train_file)) train_file_iter = itertools.cycle(args.extra_train_file) current_train = args.train_file callbacks._set_model(model) callbacks.on_train_begin(logs={}) epoch = batch = 0 while True: x_train, y_train_one_hot = preprocessor.fit_transform( x_train, y_train_one_hot) x_validation, y_validation_one_hot = preprocessor.transform( x_validation, y_validation_one_hot) iteration = batch % len(args.extra_train_file) logging.info("epoch {epoch} iteration {iteration} - training with {train_file}".format( epoch=epoch, iteration=iteration, train_file=current_train)) callbacks.on_epoch_begin(epoch, logs={}) n_train = x_train.shape[0] callbacks.on_batch_begin(batch, logs={'size': n_train}) index_array = np.arange(n_train) if args.shuffle: rng.shuffle(index_array) batches = keras.models.make_batches(n_train, model_cfg.batch_size) logging.info("epoch {epoch} iteration {iteration} - starting {n_batches} batches".format( epoch=epoch, iteration=iteration, n_batches=len(batches))) avg_train_loss = avg_train_accuracy = 0. for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] if isinstance(model, keras.models.Graph): data = { 'input': x_train[batch_ids], 'output': y_train_one_hot[batch_ids] } train_loss = model.train_on_batch(data, class_weight=class_weight) train_accuracy = 0. else: train_loss, train_accuracy = model.train_on_batch( x_train[batch_ids], y_train_one_hot[batch_ids], accuracy=True, class_weight=class_weight) batch_end_logs = {'loss': train_loss, 'accuracy': train_accuracy} avg_train_loss = (avg_train_loss * batch_index + train_loss)/(batch_index + 1) avg_train_accuracy = (avg_train_accuracy * batch_index + train_accuracy)/(batch_index + 1) callbacks.on_batch_end(batch, logs={'loss': train_loss, 'accuracy': train_accuracy}) logging.info("epoch {epoch} iteration {iteration} - finished {n_batches} batches".format( epoch=epoch, iteration=iteration, n_batches=len(batches))) logging.info("epoch {epoch} iteration {iteration} - loss: {loss} - acc: {acc}".format( epoch=epoch, iteration=iteration, loss=avg_train_loss, acc=avg_train_accuracy)) batch += 1 # Validation frequency (this if-block) doesn't necessarily # occur in the same iteration as beginning of an epoch # (next if-block), so model.evaluate appears twice here. kwargs = { 'verbose': 0 if args.log else 1 } pargs = [] validation_data = {} if isinstance(model, keras.models.Graph): validation_data = { 'input': x_validation, 'output': y_validation_one_hot } pargs = [validation_data] else: pargs = [x_validation, y_validation_one_hot] kwargs['show_accuracy'] = True if (iteration + 1) % args.validation_freq == 0: if isinstance(model, keras.models.Graph): val_loss = model.evaluate(*pargs, **kwargs) y_hat = model.predict(validation_data) val_acc = accuracy_score(y_validation, np.argmax(y_hat['output'], axis=1)) else: val_loss, val_acc = model.evaluate( *pargs, **kwargs) logging.info("epoch {epoch} iteration {iteration} - val_loss: {val_loss} - val_acc: {val_acc}".format( epoch=epoch, iteration=iteration, val_loss=val_loss, val_acc=val_acc)) epoch_end_logs = {'iteration': iteration, 'val_loss': val_loss, 'val_acc': val_acc} callbacks.on_epoch_end(epoch, epoch_end_logs) if batch % len(args.extra_train_file) == 0: if isinstance(model, keras.models.Graph): val_loss = model.evaluate(*pargs, **kwargs) y_hat = model.predict(validation_data) val_acc = accuracy_score(y_validation, np.argmax(y_hat['output'], axis=1)) else: val_loss, val_acc = model.evaluate( *pargs, **kwargs) logging.info("epoch {epoch} iteration {iteration} - val_loss: {val_loss} - val_acc: {val_acc}".format( epoch=epoch, iteration=iteration, val_loss=val_loss, val_acc=val_acc)) epoch_end_logs = {'iteration': iteration, 'val_loss': val_loss, 'val_acc': val_acc} epoch += 1 callbacks.on_epoch_end(epoch, epoch_end_logs) if model.stop_training: logging.info("epoch {epoch} iteration {iteration} - done training".format( epoch=epoch, iteration=iteration)) break current_train = next(train_file_iter) x_train, y_train = load_model_data(current_train, args.data_name, args.target_name) y_train_one_hot = np_utils.to_categorical(y_train, n_classes) if epoch > args.n_epochs: break callbacks.on_train_end(logs={}) else: x_train, y_train_one_hot = preprocessor.fit_transform( x_train, y_train_one_hot) x_validation, y_validation_one_hot = preprocessor.transform( x_validation, y_validation_one_hot) if isinstance(model, keras.models.Graph): data = { 'input': x_train, 'output': y_train_one_hot } validation_data = { 'input': x_validation, 'output': y_validation_one_hot } model.fit(data, shuffle=args.shuffle, nb_epoch=args.n_epochs, batch_size=model_cfg.batch_size, validation_data=validation_data, callbacks=callbacks, class_weight=class_weight, verbose=2 if args.log else 1) y_hat = model.predict(validation_data) print('val_acc %.04f' % accuracy_score(y_validation, np.argmax(y_hat['output'], axis=1))) else: model.fit(x_train, y_train_one_hot, shuffle=args.shuffle, nb_epoch=args.n_epochs, batch_size=model_cfg.batch_size, show_accuracy=True, validation_data=(x_validation, y_validation_one_hot), callbacks=callbacks, class_weight=class_weight, verbose=2 if args.log else 1)
import data import model import ngram qpath = '../MSR_Sentence_Completion_Challenge_V1/Data/Holmes.machine_format.questions.txt' apath = '../MSR_Sentence_Completion_Challenge_V1/Data/Holmes.machine_format.answers.txt' questions = data.load_questions(qpath, apath) vecpath = '../data/holmes_vectors.txt' # vecpath = '../data/GoogleNews-vectors-negative300.txt' wordvec, dictionary = data.load_wordvec(vecpath) # count = ngram.get_count('../data/Holmes_Training_Data_processed.txt', 3) correct = 0 total = 0 for q in questions: print 'predicting', int(100 * total / len(questions)), '%\r', result = model.predict(wordvec, dictionary, q) # result = ngram.answer(count, q) if result == -1: pass else: correct += result total += 1 print "Accuracy: ", 1.0 * correct/total
# check if a model has been previously trained already_trained = os.path.exists(load_path) if not (args.train or already_trained): check_if_ok_to_continue('Model has not been trained. ' 'Train it now (this may take several hours)? ') args.train = True dataset = model.load_data(args.dataset) if args.train: model.run_training(dataset) # predict a rating for the user if args.user_id and (args.movie or args.top): instance = dataset.get_ratings(args.user_id) ratings = data.unnormalize(instance.ravel()) output = model.predict(instance, dataset).ravel() if args.movie: col = dataset.get_col(args.movie) rating = output[col] # purty stars num_stars = int(round(rating * 2)) stars = ''.join(u'\u2605' for _ in range(num_stars)) stars += ''.join(u'\u2606' for _ in range(10 - num_stars)) print("The model predicts that user %s will rate " "movie number %s: " % (args.user_id, args.movie)) print('%1.2f / 5' % rating) print(stars) print('actual rating: %1.1f' % ratings[col])