def main(): global done t = threading.Thread(target=animate) t.start() current_date = datetime.today().strftime('%Y%m%d') root_directory = utilities.get_root_dir() file_directory = "{}/{}".format(root_directory, USERS) files = utilities.get_data(file_directory) import_report_df = check_users(files) print("\tDONE: Check Users import") file_directory = "{}/{}".format(root_directory, CONTACT_ROLES) files = utilities.get_data(file_directory) import_report_df = check_contact_roles(import_report_df, files) print("\tDONE: Check Contact Roles import") file_directory = "{}/{}".format(root_directory, DO_NOT_FLAGS) files = utilities.get_data(file_directory) import_report_df = check_do_not_flags(import_report_df, files) print("\tDONE: Check Do Not Flag import") output_file = "{}/{}_import_results.csv".format(root_directory, current_date) utilities.df_to_csv(import_report_df, output_file) done = True print("\nIMPORT REPORT: {}".format(output_file)) os.system("open {}".format(output_file))
def upload_file(): if request.method == "POST": subject_id = int(request.form['subject_id']) data_no = int(request.form['data_no']) start_time = time.time() query_subject = get_data(subject_no=subject_id, data_no=data_no) query_subject = np.reshape(query_subject, (query_subject.shape[0], 1)) input_pair_group = np.zeros((109, 2, 3000, 1), dtype=np.float64) count = 0 for i, input_data in enumerate(subject_data): input_data = np.reshape(input_data, (input_data.shape[0], 1)) input_pair_group[i, 0, :, :] = query_subject input_pair_group[i, 1, :, :] = input_data pred = loaded_model([input_pair_group[:, 0], input_pair_group[:, 1]]) pred = 1.0 - np.reshape(pred, (pred.shape[0], )) subject_results = [] for i, j in enumerate(pred): subject_results.append((i, j)) subject_results.sort(key=lambda e: e[1], reverse=True) end_time = time.time() final_result = [i for i in subject_results[:6] if i[1] > 0.7] response = { "results": final_result, "inference_time": round(end_time - start_time, 2) } return json.dumps(str(response))
def validate(model): directory = settings.directory datasource = utilities.get_data(settings.testsetpath) datagen = utilities.limited_gen_data(datasource) settings.saveMean = False #model = cnn_lstm.create_cnn_lstm(weightsfile) #sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) #model.compile(optimizer=sgd, loss='categorical_crossentropy') print('\nin validate method!!!!!!') print(type(model)) posxs = [] posqs = [] howmanyaccepted = 0 counter = 0 print('looping on test set!') for ims, xs, qs in datagen: print(len(ims)) howmanyaccepted += 1 print howmanyaccepted inputs = np.zeros([1, 3, 3, 224, 224]) inputs[0, :] = ims out = model.predict(inputs) posx = out[0][0][1] #.mean(0)#xyz posq = out[1][0][1] #.mean(0)#wpqr actualx = xs[1] #.mean(0) actualq = qs[1] #.mean(0) errx, theta = getError(posx, posq, actualx, actualq) posxs.append(errx) posqs.append(theta) print('error should report here!') print 'errx ', errx, ' m and ', 'errq ', theta, ' degrees' return np.median(posxs), np.median(posqs), howmanyaccepted
def get_data(self): # TODO Return the eye position print "client.py/Client.get_data" data = utilities.get_data() if len(data): return data else: return None
def evaluateModel(model_name, model_path): clf = get_model(model_name) x_train, x_test, y_train, y_test = get_data() print('------------- Training Started -------------') clf.fit(x_train, y_train) print('------------- Training Ended -------------') score = clf.score(x_test, y_test) print("accuracy: {:.2f}%".format(score * 100.)) util.save_speaker_model(model_path, clf)
def main(): """ Usage: python main.py [ -c config_file -s config_section ] """ parser = argparse.ArgumentParser() parser.add_argument('-c', '--config_file', default="default.ini") parser.add_argument('-s', '--config_section', default="DEFAULT") args = parser.parse_args() config_file = args.config_file #"default.ini" config_section = args.config_section #"DEFAULT" # "DISJOINT" "TEST" CONFIG = configuration.Configuration(config_file, config_section) n_seed = int(CONFIG.get("random_seed")) if n_seed != -1: random.seed(n_seed) # for reproducibility else: n_seed = None n_run = int(CONFIG.get("n_run")) knn = int(CONFIG.get("knn")) model_type = CONFIG.get("model_type") prediction_type = CONFIG.get("prediction_type") features = set(CONFIG.get("features").split("|")) recalculate_similarity = CONFIG.get_boolean("recalculate_similarity") disjoint_cv = CONFIG.get_boolean("disjoint_cv") try: split_both = CONFIG.get_boolean("pairwise_disjoint") except: split_both = False output_file = CONFIG.get("output_file") n_fold = int(CONFIG.get("n_fold")) n_proportion = int(CONFIG.get("n_proportion")) n_subset = int(CONFIG.get("n_subset")) # for faster results - subsampling drug_disease_file = CONFIG.get("drug_disease_file") drug_side_effect_file = CONFIG.get("drug_side_effect_file") drug_structure_file = CONFIG.get("drug_structure_file") drug_target_file = CONFIG.get("drug_target_file") # Get data data = get_data(drug_disease_file, drug_side_effect_file, drug_structure_file, drug_target_file) # Check prediction accuracy of ML classifier on the data set using the parameters above check_ml(data, n_run, knn, n_fold, n_proportion, n_subset, model_type, prediction_type, features, recalculate_similarity, disjoint_cv, split_both, output_file, model_fun=None, n_seed=n_seed) return
def evaluateModel(model_name): """ generate a model train it test it and display its metrics :param model_name: """ clf = get_model(model_name) x_train, x_test, y_train, y_test = get_data() print('------------- Training Started -------------') clf.fit(x_train, y_train) print('------------- Training Ended -------------') y_pred = clf.predict(x_test) display_metrics(y_pred, y_test)
def get_data(self): # print "client.py/Client.get_data" # data = str(self.tempName) + ": " + str(self.tempCounter) # self.tempCounter += 1 data = utilities.get_data() if len(data): if data is not self.current_data: self.current_data = data return data else: # ignore duplicates return None else: return None
def main(): in_arg = get_input_args() # Creates and returns command line arguments print('\nData Directory:\n', in_arg.data_directory, '\n') print('Optional Command Line Arguments:\n', 'Save Checkpoint [--save_dir]: ', in_arg.save_dir, '\n', 'Pretrained Network [--arch]: ', in_arg.arch, '\n', 'Learning Rate [--learning_rate]: ', in_arg.learning_rate, '\n', 'Hidden Units [--hidden_units]: ', in_arg.hidden_units, '\n', 'Epochs [--epochs]: ', in_arg.epochs, '\n', 'GPU [--gpu]: ', in_arg.gpu, '\n') if 'checkpoints' not in listdir( ): # makes checkpoints folder if it doesn't already exist mkdir('checkpoints') train_dir, valid_dir, test_dir = util.get_data( in_arg.data_directory ) # Returns Train, Validation and Test Directories transformed_train, transformed_valid, transformed_test = mod.transform_data( train_dir, valid_dir, test_dir) # Returns transformed datasets train_loader, valid_loader, test_loader = mod.load_data( transformed_train, transformed_valid, transformed_test) # Returns Data loaders model = mod.build_model( util.label_count(train_dir), in_arg.hidden_units, in_arg.arch, transformed_train.class_to_idx) # Returns built model epochs = in_arg.epochs # Epochs initially set by command line argument in_arg.epochs. Can be changed with m.load_checkpoint() criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=in_arg.learning_rate) use_gpu = mod.use_gpu(model, in_arg.gpu) # Returns True or False for GPU use mod.train( model, criterion, optimizer, train_loader, valid_loader, use_gpu, in_arg.epochs ) # Trains the model. Prints Training Loss, Validation Loss & Validation Accuracy mod.save_checkpoint( in_arg.arch, model.classifier.state_dict(), transformed_train.class_to_idx, util.label_count(train_dir), in_arg.hidden_units, in_arg.epochs, in_arg.save_dir ) # Saves classifier and other model parameters to checkpoint
def training(mode, model_path, dataset_folder, class_labels): # Read data global x_train, y_train, x_test, y_test x_train, x_test, y_train, y_test = get_data(dataset_folder, class_labels, flatten=False) y_train = np_utils.to_categorical(y_train) y_test = np_utils.to_categorical(y_test) model_id = mode - 1 if model_id == 0: # Model is CNN so have to reshape the data in_shape = x_train[0].shape x_train = x_train.reshape(x_train.shape[0], in_shape[0], in_shape[1], 1) x_test = x_test.reshape(x_test.shape[0], in_shape[0], in_shape[1], 1) elif model_id > len(models): sys.stderr.write('Model Not Implemented yet') sys.exit(-1) model = get_model(class_labels, models[model_id], x_train[0].shape) accuracy = evaluateModel(model, model_path) return accuracy
import numpy as np from logistic_regression import LogisticRegression from utils import bin_feat_heart, con_feat_heart, name_features_heart import utilities TRAIN = 'heart_train.csv' TEST = 'heart_test.csv' if __name__ == '__main__': path = utilities.get_path() X_train, y_train = utilities.get_data(path / TRAIN, 10) X_test, y_test = utilities.get_data(path / TEST, 10) encoder = utilities.OneHotEncoder() scaler = utilities.StandardScaler() encoder.fit(X_train[:, bin_feat_heart]) X_train_new = np.hstack( (encoder.transform(X_train[:, bin_feat_heart]), X_train[:, con_feat_heart])) X_test_new = np.hstack( (encoder.transform(X_test[:, bin_feat_heart]), X_test[:, con_feat_heart])) scaler.fit(X_train_new) X_train_scaled = scaler.transform(X_train_new) X_test_scaled = scaler.transform(X_test_new)
def data_prep(full_path): im_list_tr, att_list_tr, im_list_val, att_list_val, im_list_test, att_list_test = get_data(full_path) save2lists(im_list_tr, att_list_tr,'training_list.lst') save2lists(im_list_val, att_list_val,'valid_list.lst') save2lists(im_list_test, att_list_test,'testing_list.lst')
import numpy as np from linear_regression import LinearRegression from utils import mapper, bin_feat_reg, con_feat_reg, name_features_insurance import utilities TRAIN = 'insurance_train.csv' TEST = 'insurance_test.csv' if __name__ == '__main__': path = utilities.get_path() X_train, y_train = utilities.get_data(path / TRAIN, 2, mapper) X_test, y_test = utilities.get_data(path / TEST, 2, mapper) encoder = utilities.OneHotEncoder() scaler = utilities.StandardScaler() encoder.fit(X_train[:, bin_feat_reg]) X_train_new = np.hstack( (encoder.transform(X_train[:, bin_feat_reg]), X_train[:, con_feat_reg])) X_test_new = np.hstack( (encoder.transform(X_test[:, bin_feat_reg]), X_test[:, con_feat_reg])) scaler.fit(X_train_new) X_train_scaled = scaler.transform(X_train_new) X_test_scaled = scaler.transform(X_test_new) model = LinearRegression(learning_rate=10e-5, penalty='l2')
import os import cv2 import pickle import numpy as np import matplotlib.pyplot as plt import sys sys.path.append("/home/hank/libsvm-3.24/python") from svmutil import * from utilities import get_data, plot_heatmap, plot_res from bag_of_features import bag_of_features train_x, train_y, test_x, test_y = get_data(gray=False) if os.path.isfile('vocab.pkl'): with open('vocab.pkl', 'rb') as handle: voc = pickle.load(handle) with open('train_features.pkl', 'rb') as handle: im_features = pickle.load(handle) with open('test_features.pkl', 'rb') as handle: test_features = pickle.load(handle) else: im_features, test_features, voc = bag_of_features(train_x, test_x, k=400) with open('vocab.pkl', 'wb') as handle: pickle.dump(voc, handle, protocol=pickle.HIGHEST_PROTOCOL) with open('train_features.pkl', 'wb') as handle: pickle.dump(im_features, handle, protocol=pickle.HIGHEST_PROTOCOL) with open('test_features.pkl', 'wb') as handle: pickle.dump(test_features, handle, protocol=pickle.HIGHEST_PROTOCOL)
from networks import * import utilities as util import time import numpy as np from skimage.exposure import equalize_hist imgs, labels, test_imgs, test_labels = util.get_data(40) start_time = time.time() print(imgs.shape) for img in imgs: img = util.filter_anisotropic_diffusion(img, n_iter=1, gamma=0.01, kappa=1) print("time for diffusion filter {}".format(time.time() - start_time)) t1 = time.time() for img in imgs: img = equalize_hist(img) print("time for HE {}".format(time.time() - t1)) t2 = time.time() imgs = imgs.reshape(-1, 256 * 256) for img in imgs: img /= np.std(img) + 1e-5 img -= np.mean(img) imgs = imgs.reshape(-1, 256, 256, 1) print("time for rescaling: {}".format(time.time() - t2)) print("overall time for preprocessing: {}".format(time.time() - start_time))
def train(): try: train_data = utilities.get_data(TRAIN_PATH) test_data = utilities.get_data(TEST_PATH) except Exception as e: print(e) num_api = numerapi.NumerAPI(PUBLIC_KEY, SECRET_GUY, verbosity="info") num_api.download_current_dataset(dest_path='../data/') feature_names = utilities.get_feature_names(TRAIN_PATH) train_data = utilities.get_data(TRAIN_PATH) test_data = utilities.get_data(TEST_PATH) feature_names = utilities.get_feature_names(train_data) #use pca for dimensionality reduction pca = PCA(n_components=N_COMPONENTS) pca.fit(train_data[feature_names]) x_train_pca = pca.transform(train_data[feature_names]) x_test_pca = pca.transform(test_data[feature_names]) #corrupt dataset using gaussian noise mu, sigma = 0, 0.1 noise = np.random.normal(mu, sigma, x_train_pca.shape) x_train_pca_noise = x_train_pca + noise #train an LGBMRegressor model - use random search for parameter tuning #with cross validation lgb = LGBMRegressor() lgb_randomsearch = RandomizedSearchCV(estimator=lgb, cv=CV, param_distributions=params, n_iter=100) lgb_model = lgb_randomsearch.fit(x_train_pca_noise[:100], train_data['target'][:100]) lgb_model_best = lgb_model.best_estimator_ lgb_model_best = lgb_model_best.fit(x_train_pca_noise[:100], train_data['target'][:100]) print("Generating all predictions...") train_data['prediction'] = lgb_model_best.predict(x_train_pca_noise) test_data['prediction'] = lgb_model_best.predict(x_test_pca) train_corrs = (evaluation.per_era_score(train_data)) print('train correlations mean: {}, std: {}'.format( train_corrs.mean(), train_corrs.std(ddof=0))) #print('avg per-era payout: {}'.format(evaluation.payout(train_corrs).mean())) valid_data = test_data[test_data.data_type == 'validation'] valid_corrs = evaluation.per_era_score(valid_data) #valid_sharpe = evaluation.sharpe(valid_data) print('valid correlations mean: {}, std: {}'.format( valid_corrs.mean(), valid_corrs.std(ddof=0))) #print('avg per-era payout {}'.format(evaluation.payout(valid_corrs.mean()))) #print('valid sharpe: {}'.format(valid_sharpe)) #live_data = test_data[test_data.data_type == "test"] #live_corrs = evaluation.per_era_score(test_data) #test_sharpe = evaluation.sharpe(test_data) #print('live correlations - mean: {}, std: {}'.format(live_corrs.mean(),live_corrs.std(ddof=0))) #print('avg per-era payout is {}'.format(evaluation.payout(live_corrs).mean())) #print('live Sharpe: {}'.format(test_sharpe)) #pickle and save the model with open('lgbm_model_round_253.pkl', 'wb') as f: pickle.dump(lgb_model, f) #save down predictions valid_corrs.to_csv('valid_predictions.csv')
def main(): parser = ArgumentParser() parser.add_argument( "--output-dir", help="output directory", type=str, required=True ) parser.add_argument("--month", help="month", type=str, required=True) parser.add_argument("--year", help="year", type=str, required=True) parser.add_argument( "--import-csv", help="yes/no : Whether to import existing csv file.Default is 'no'", type=str, default="no", ) args = parser.parse_args() main_dir = args.output_dir n_month, n_year = args.month.lower(), args.year work_dir = main_dir + "//" + n_month + "_" + n_year create_directory(main_dir) create_directory(work_dir) log_file_write = open(work_dir + "//scrape_en-hi_log_file.txt", mode="w") log_file_write.write(f"{n_month,n_year}\n") if args.import_csv.lower() == "yes": set_import = True elif args.import_csv.lower() == "no": set_import = False else: log_file_write.write(f"\n Please enter a valid option for import-csv") scrape_loc_en = work_dir + "//" + "scrape_file_en_" + n_month + "_" + n_year scrape_loc_hi = work_dir + "//" + "scrape_file_hi_" + n_month + "_" + n_year create_directory(scrape_loc_hi) create_directory(scrape_loc_en) url_file_loc = "file:///" + HTML_FOLDER + "//Press Information Bureau." filename_url_en = url_file_loc + "_en_" + n_month + "_" + n_year + ".html" filename_url_hi = url_file_loc + "_hi_" + n_month + "_" + n_year + ".html" ministy_pa_list = pd.read_csv( MINISTRY_NAME_PARALLEL_LOCATION, encoding="utf-16", ) parse_url_en = get_html(filename_url_en) parse_url_hi = get_html(filename_url_hi) no_of_result_en = int( (parse_url_en.find("div", {"class": "search_box_result"}).contents[0]).split()[ 1 ] ) no_of_result_hi = int( (parse_url_hi.find("div", {"class": "search_box_result"}).contents[0]).split()[ 1 ] ) log_file_write.write(f"\nNo of search result in {n_month} of {n_year}:") log_file_write.write(f"\n English: {no_of_result_en} \n Hindi: {no_of_result_hi}") log_file_write.write( f"\nNo of Ministry in English search result:\ {len(parse_url_en.findAll('h3',{'class':'font104'}))}" ) log_file_write.write( f"\nNo of Ministry in Hindi search result:\ {len(parse_url_hi.findAll('h3',{'class':'font104'}))}" ) # Import or Create english dataframe df_en = get_data( n_month, n_year, filename_url_en, ministy_pa_list, "en", log_file_write, import_data=set_import, import_data_dir=work_dir, ) if "PRID" not in df_en.columns.tolist(): df_en["PRID"] = df_en["Link"].apply(lambda x: x.split("=")[-1]) log_file_write.write(f"\n English Datframe \n") log_file_write.write(f"\n Datframe Info:\n") df_en.info(buf=log_file_write) # Write the English Dataframe df_en.to_csv( os.path.join(work_dir, "English_data_" + n_month + "_" + n_year + ".csv"), index=False, encoding="utf-16", ) # Scraping English Documents iter_f = df_en.shape[0] log_file_write.write("\nStarting scraping for English Document") for i in range(iter_f): en_scrape_file = ( scrape_loc_en + "//" + str(i).zfill(4) + "_en_" + "_".join(df_en.loc[i, ["English_Ministry_Name"]].values[0].split()) + "_" + df_en.loc[i, ["Posting_Date"]].values[0].strftime("%Y-%m-%d") + "_" + str(df_en.loc[i, ["PRID"]].values[0]) + ".txt" ) m = 0 while m == 0: try: b = get_html(df_en.Link[i], "lxml") m = b.body.form.find( "div", {"class": "innner-page-main-about-us-content-right-part"} ) except: log_file_write.write("\nerror:retrying") m = 0 if m is None: log_file_write.write( f"\nindex: {i}, Link: {df_en.Link[i]}, no english content found" ) continue k_en = [ str(k.get_text()).strip() for k in m.findAll( [ "div", "tr", "td", "p", "ol", "h2", "h3", "h4", "ul", "pre", "span", "li", ] ) if len( k.find_parents(["p", "ol", "h2", "h3", "h4", "ul", "pre", "span", "li"]) ) == 0 ] if len(k_en) == 0: log_file_write.write( f"\nindex: {i}, Link: {df_en.Link[i]},no English content in variuos tags" ) continue log_file_write.write(f"\nindex: {i}, number of lines: {len(k_en)}") write_scrape_text_file(en_scrape_file, k_en, df_en.English_Ministry_Name[i]) log_file_write.write(f"\nDone scraping for English Document") # Import or Create hindi dataframe df_hi = get_data( n_month, n_year, filename_url_hi, ministy_pa_list, "hi", log_file_write, import_data=set_import, import_data_dir=work_dir, ) if "PRID" not in df_hi.columns.tolist(): df_hi["PRID"] = df_hi["Link"].apply(lambda x: x.split("=")[-1]) log_file_write.write(f"\nHindi Datframe \n") log_file_write.write(f"\nDatframe Info:\n") df_hi.info(buf=log_file_write) # Write the Hindi Dataframe df_hi.to_csv( os.path.join(work_dir, "Hindi_data_" + n_month + "_" + n_year + ".csv"), index=False, encoding="utf-16", ) # Scraping Hindi Documents iter_f = df_hi.shape[0] log_file_write.write("\nStarting scraping for Hindi Document") for i in range(iter_f): hi_scrape_file = ( scrape_loc_hi + "//" + str(i).zfill(4) + "_hi_" + "_".join(df_hi.loc[i, ["English_Ministry_Name"]].values[0].split()) + "_" + df_hi.loc[i, ["Posting_Date"]].values[0].strftime("%Y-%m-%d") + "_" + str(df_hi.loc[i, ["PRID"]].values[0]) + ".txt" ) m = 0 while m == 0: try: b = get_html(df_hi.Link[i], "lxml") m = b.body.form.find( "div", {"class": "innner-page-main-about-us-content-right-part"} ) except: log_file_write.write("\nerror:retrying") m = 0 if m is None: log_file_write.write( f"\nindex: {i}, Link: {df_hi.Link[i]}, no hindi content found" ) continue k_hi = [ str(k.get_text()).strip() for k in m.findAll( [ "div", "tr", "td", "p", "ol", "h2", "h3", "h4", "ul", "pre", "span", "li", ] ) if len( k.find_parents(["p", "ol", "h2", "h3", "h4", "ul", "pre", "span", "li"]) ) == 0 ] if len(k_hi) == 0: log_file_write.write( f"\nindex: {i}, Link: {df_hi.Link[i]},no hindi content in variuos tags" ) continue log_file_write.write(f"\nindex: {i}, number of lines: {len(k_hi)}") write_scrape_text_file(hi_scrape_file, k_hi, df_hi.Hindi_Ministry_Name[i]) log_file_write.write("\nDone scraping for Hindi Document") log_file_write.close()
import numpy as np #import matplotlib.pyplot as plt #import os import sys #import cgs as cgs #this is my own script that contains cgs constants import utilities as util #import re #import glob AU = 149597870700e-3 #km rp = sys.argv[1] #N = int(sys.argv[2])#desired number of particles R = float(sys.argv[2]) #in km R /= AU rp, t = util.get_data(rp, units='raw') def calc_N(rp, R): r = np.sqrt(rp["x"]**2 + rp["y"]**2 + rp["z"]**2) indices = np.where(r < R) N = len(rp['index1'][indices]) return N def carve(rp, R): r = np.sqrt(rp["x"]**2 + rp["y"]**2 + rp["z"]**2) indices = np.where(r < R) util.write_data(rp, 'carved', indices) return 1
from keras.layers import Dense, Dropout, Conv1D, Flatten, BatchNormalization, Activation, MaxPooling1D from keras.callbacks import TensorBoard,ModelCheckpoint,EarlyStopping from keras.optimizers import Adam,SGD import numpy as np import os import shutil import time from utilities import get_data dataset_path = '3_emotion' print('Dataset path:',dataset_path) print('Emotion:',os.listdir(dataset_path)) print('Num emotion:',len(os.listdir(dataset_path))) x_train, x_test, y_train, y_test = get_data(dataset_path=dataset_path, max_duration = 4.0) y_train = np_utils.to_categorical(y_train) y_test = np_utils.to_categorical(y_test) print('x_train:',x_train.shape) print('y_train:',y_train.shape) #create model model = Sequential() Ckernel_size = 3 Cstrides = 1 Ppool_size = 2 Pstrides = 2 padding = 'SAME' acti = 'relu'
def __init__(self): self.image_data = get_data() self.camera = str self.predict = PREDICT()
print "Filtering for rater consensus of: {}%".format( REQUIRED_AGREEMENT_PERCENT ) if REQUIRED_AGREEMENT_PERCENT else "Using rater average" print "Minimum word count threshold: {}".format(WORD_COUNT_MIN) ########################## ########################## # SELECT DATASET # ########################## ########################## dataset_name = "clickdata" # dataset_name = "moviedata" traindata, testdata = get_data(dataset_name=dataset_name, model_technique=MODEL_TECHNIQUE, manual_class_centers=MANUAL_CLASS_CENTERS, num_kmeans_classes=NUM_KMEANS_CLASSES, test_split_percent=TEST_SPLIT_PERCENT, rerandomize=RERANDOMIZE, training_sentence_max=TRAINING_SENTENCE_MAX) ############# # PLOT DATA # ############# # traindata.plot_valences() # traindata.plot_all_data("Traindata (Indiv. User Ratings) ({})".format(traindata.num_ratings())) # traindata.plot_mean_data("Traindata (Mean Ratings) ({})".format(traindata.num_sentences())) # if TEST_SPLIT_PERCENT > 0: # testdata.plot_all_data("Testdata (Indiv. User Ratings) ({})".format(testdata.num_ratings())) # testdata.plot_mean_data("Testdata (Mean Ratings) ({})".format(testdata.num_sentences())) # raw_input("enter")
def rotation_loss3(y_true, y_pred): print "####### IN THE ROTATION LOSS FUNCTION #####" return BETA * K.sqrt(K.sum(K.square((y_true - y_pred)))) #batchSize=25 nb_epochs = 30000 print "creating the model" model = cnn_lstm.create_cnn_lstm(startweight) sgd = settings.optimizer #sgd = SGD(lr=0.000001, decay=1e-6, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss=[pose_loss3, rotation_loss3]) #for e in range(nb_epoch): #print("epoch %d" % e) datasource = utilities.get_data(settings.traindata) data_gen = utilities.gen_data_batch(datasource) for i in range(nb_epochs): X_batch, Y_batch = next(data_gen) #model.train(X_batch,Y_batch) #history = model.fit(X_batch, Y_batch,batch_size=32,shuffle=True,nb_epoch=1) #print Y_batch[0].shape #print Y_batch[1].shape #print len(Y_batch) history = model.fit(X_batch, Y_batch, nb_epoch=1, batch_size=utilities.batchSize) #history = model.fit(X_batch,{'pose_wpqr': Y_batch[1], 'pose_xyz': Y_batch[0]},
tuple(text_origin + label_size)], fill="white") draw.text(text_origin, label, fill=(0, 0, 0), font=font, color="b") del draw # plt.imshow(image) # image = image.resize((image.size[0]//2,image.size[1]//2)) plt.imshow(image) plt.show() ts = int(datetime.now().timestamp() * 10000) # plt.imsave(TMP_MOVIE+str(ts)+".png",image) plt.close() return image if __name__ == "__main__": image_data = get_data() for camera in image_data["camera"].unique(): images = image_data[image_data["camera"] == camera]["path"].values images = np.sort(images) img_train = images[:len(images) // 2] park_data = create_boxes(img_train) park_slots = look_for_slots(park_data, img=img_train, plot=False, PRUNE_TH=1, PRUNE_STEP=10, MERGE_STEP=50, MERGE_TH=0.8) park_slots.drop(park_slots[park_slots["found"] < 3].index, inplace=True)
import cv2 import math import numpy as np import glob from datetime import datetime from utilities import get_data, plot_res, plot_heatmap training_set, train_ans, testing_set, test_ans = get_data(gray=True, size=16, normal=True) training_set = np.array([x.flatten() for x in training_set]) testing_set = np.array([x.flatten() for x in testing_set]) print('=====KNN with cv2.ml.KNearest function=====') knn = cv2.ml.KNearest_create() knn.train(training_set, cv2.ml.ROW_SAMPLE, train_ans) for k in range(5): start = datetime.now() ret, results, neighbours, dist = knn.findNearest(testing_set, k + 1) count = 0 hit = 0 for i in range(len(testing_set)): if results[i] == test_ans[i]: hit += 1 count += 1 end = datetime.now() print('accu with ', k + 1, ' neighbors = ', hit / count)
("Weekly Comparison", "Post Comparison", "Annual Forecast")) # choose metric to display in "Weekly Comparison" and "Annual Forecast" views metric = st.sidebar.radio("Metric:", ("Pageviews", "RPM", "Earnings")) # choose to display confidence interval in # "Weekly Comparison" and "Annual Forecast" views low_hi = st.sidebar.checkbox("Show Low & High Forecast", value=False) # choose time period for comparison in "Post Comparison" view comparison = st.sidebar.radio("Comparison Period (Post Comparison View Only)", ("Last Week", "Last Year")) # LOGIC TO DISPLAY CHARTS # load latest data df_rpm, df_views, df_holiday = get_data() # fit prophet model and make 365 days of predictions forecast_views, forecast_rpm = fit_predict(df_rpm, df_views, df_holiday) # merge views and rpm together for earnings forecast df = merge_forecast(forecast_rpm, forecast_views, df_rpm, df_views) # create four dfs for weekly plot comparison # next week, this week, last week, last year during the same week next_wk = df_between_dates(df, 0, 1) this_wk = df_between_dates(df, -1, 0) last_wk = df_between_dates(df, -2, -1) last_yr_wk = df_between_dates(df, -53, -52) # plot weekly comparison chart and percentage table
config.gpu_options.allow_growth = True session = tf.Session(config=config) def plot_history(acc, val_acc): epochs = range(1, len(acc) + 1) plt.plot(epochs, acc, 'bo', label='Training acc') plt.plot(epochs, val_acc, 'b', label='Validation acc') plt.title('Training and Validation acc') plt.legend() plt.show() ## AlexNet train_x, train_y, test_x, test_y = get_data(size=150) train_y = to_categorical(train_y) test_y = to_categorical(test_y) train_x.shape def modeling(input_shape): model = Sequential() model.add(Conv2D(64, (3, 3), padding='same', input_shape=input_shape)) model.add( BatchNormalization(momentum=0.5, epsilon=1e-5, gamma_initializer="uniform")) model.add(LeakyReLU(alpha=0.1)) model.add(Conv2D(64, (3, 3), padding='same')) model.add(
"ss.[0-9]*[0-9]")) #get list of strings with full paths to each ss file target, impactor, t = util.get_sorted_data(ss_files[0], units='cgs') M_T = np.sum(target['mass']) R_T = np.max(np.sqrt(target['x']**2 + target['y']**2 + target['z']**2)) r_T = np.mean(target['radius']) #calculate approx kinetic and potential energy of each particle in each frame f = open(output, 'w') for frame in ss_files: sys.stdout.write('Current Frame: {0} of {1}\r'.format(frame, ss_files[-1])) sys.stdout.flush() #print("Current Frame: {0}".format(frame)) data, t = util.get_data(frame, units='cgs') v2 = data['xdot']**2 + data['ydot']**2 + data['zdot']**2 r = np.sqrt(data['x']**2 + data['y']**2 + data['z']**2) E_k = 0.5 * data['mass'] * v2 E_pot = -cgs.G * data['mass'] * M_T / r E = E_k + E_pot bound_ind = np.where( (E <= 0.0) & (r > R_T + r_T) ) #indices where particles is on bound orbit, but not touching surface (approx) unbound_ind = np.where(E > 0.0) bound_ind = bound_ind[0] unbound_ind = unbound_ind[0] N_esc = len(unbound_ind) M_esc = np.sum(data['mass'][unbound_ind]) N_disk = len(bound_ind) M_disk = np.sum(data['mass'][bound_ind])
import numpy as np import old_cnn_lstm as cnn_lstm from scipy.misc import imread, imresize from keras.layers import Input, Dense, Convolution2D, MaxPooling2D, AveragePooling2D, ZeroPadding2D, Dropout, Flatten, merge, Reshape, Activation from keras.models import Model from keras.regularizers import l2 from keras.optimizers import SGD from custom_layers import PoolHelper # ,LRN #import caffe import cv2 import utilities from LRN2D import LRN2D as LRN import settings from similarityMeasures import getError directory = settings.directory # "/usr/prakt/w065/posenet/sm/" datasource = utilities.get_data(settings.testsetpath) datagen = utilities.limited_gen_data(datasource) settings.saveMean=False #outputDirectory = "/usr/prakt/w065/posenet/TFData/" #meanFileLocation = 'smmean.binaryproto' # 'tfsmtrainedweights.h5'#'75batbhessmtrainedweights.h5'#'smtrainedweights.h5' weightsfile = settings.testweights # weightsfile='shoptrainedweights.h5' #poses = [] # will contain poses followed by qs #images = [] #settings.oldmean=True # limitingCounter=3
#batchSize=25 nb_epochs = 30000 print "creating the model" model = posenet.create_posenet(startweight) sgd = settings.optimizer #sgd = SGD(lr=0.000001, decay=1e-6, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss=[ pose_loss12, rotation_loss12, pose_loss12, rotation_loss12, pose_loss3, rotation_loss3 ]) #for e in range(nb_epoch): #print("epoch %d" % e) datasource = utilities.get_data() data_gen = utilities.gen_data_batch(datasource) print "beta=", BETA for i in range(nb_epochs): X_batch, Y_batch = next(data_gen) #model.train(X_batch,Y_batch) #history = model.fit(X_batch, Y_batch,batch_size=32,shuffle=True,nb_epoch=1) history = model.fit(X_batch, { 'cls1_fc_pose_wpqr': Y_batch[1], 'cls1_fc_pose_xyz': Y_batch[0], 'cls2_fc_pose_wpqr': Y_batch[1], 'cls2_fc_pose_xyz': Y_batch[0], 'cls3_fc_pose_wpqr': Y_batch[1],
if __name__ == "__main__": if len(sys.argv) != 2: sys.stderr.write('Invalid arguments\n') sys.stderr.write('Usage python2 train_DNN.py <model_number>\n') sys.stderr.write('1 - CNN\n') sys.stderr.write('2 - LSTM\n') sys.exit(-1) n = int(sys.argv[1]) - 1 print ('model given', models[n]) # Read data global x_train, y_train, x_test, y_test x_train, x_test, y_train, y_test = get_data(flatten=False) y_train = np_utils.to_categorical(y_train) y_test = np_utils.to_categorical(y_test) if n == 0: # Model is CNN so have to reshape the data in_shape = x_train[0].shape print(x_train.shape) print(in_shape) x_train = x_train.reshape(x_train.shape[0], in_shape[0], in_shape[1], 1) x_test = x_test.reshape(x_test.shape[0], in_shape[0], in_shape[1], 1) elif n > len(models): sys.stderr.write('Model Not Implemented yet') sys.exit(-1) model = get_model(models[n], x_train[0].shape)
def train(): try: train_data=utilities.get_data(TRAIN_PATH) test_data=utilities.get_data(TEST_PATH) except Exception as e: print(e) num_api = numerapi.NumerAPI(PUBLIC_KEY, SECRET_GUY,verbosity="info") num_api.download_current_dataset(dest_path='../data/') feature_names=utilities.get_feature_names(TRAIN_PATH) train_data=utilities.get_data(TRAIN_PATH) test_data=utilities.get_data(TEST_PATH) feature_names=utilities.get_feature_names(train_data) x_train=train_data[feature_names] x_test=test_data[feature_names] #call autoencoder for dimensionality reduction ae=AutoEncoder(x_train.shape,N_COMPONENTS) model=ae.build() model.compile(optimizer=OPT, loss=LOSS) history=model.fit(x_train, x_train, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=2, validation_data=(x_test,x_test)) #get the autoencoder representation x_train_ae = model.predict(x_train) x_test_ae = model.predict(x_test) #corrupt dataset using gaussian noise #mu,sigma=0,0.1 #noise=np.random.normal(mu,sigma,x_train_pca.shape) #x_train_pca_noise=x_train_pca+noise #train an LGBMRegressor model - use random search for parameter tuning #with cross validation lgb=LGBMRegressor() lgb_randomsearch=RandomizedSearchCV(estimator=lgb,cv=CV,param_distributions=params, n_iter=100) lgb_model=lgb_randomsearch.fit(x_train_ae[:100],train_data['target'][:100]) lgb_model_best=lgb_model.best_estimator_ lgb_model_best=lgb_model_best.fit(x_train_ae[:100],train_data['target'][:100]) print("Generating all predictions...") train_data['prediction'] = lgb_model.predict(x_train_ae) test_data['prediction'] = lgb_model.predict(x_test_ae) train_corrs = (evaluation.per_era_score(train_data)) print('train correlations mean: {}, std: {}'.format(train_corrs.mean(), train_corrs.std(ddof=0))) #print('avg per-era payout: {}'.format(evaluation.payout(train_corrs).mean())) valid_data = test_data[test_data.data_type == 'validation'] valid_corrs = evaluation.per_era_score(valid_data) #valid_sharpe = evaluation.sharpe(valid_data) print('valid correlations mean: {}, std: {}'.format(valid_corrs.mean(), valid_corrs.std(ddof=0))) #print('avg per-era payout {}'.format(evaluation.payout(valid_corrs.mean()))) #print('valid sharpe: {}'.format(valid_sharpe)) #live_data = test_data[test_data.data_type == "test"] #live_corrs = evaluation.per_era_score(test_data) #test_sharpe = evaluation.sharpe(test_data) #print('live correlations - mean: {}, std: {}'.format(live_corrs.mean(),live_corrs.std(ddof=0))) #print('avg per-era payout is {}'.format(evaluation.payout(live_corrs).mean())) #print('live Sharpe: {}'.format(test_sharpe)) #pickle and save the model with open('lgbm_model_round_253.pkl', 'wb') as f: pickle.dump(lgb_model,f) #save down predictions valid_corrs.to_csv('valid_predictions.csv')