def rd_fr_classify(tran_data, test_data): forests = random_fr(tran_data) res_clses = [] cls = [] for tree in forests: res_clses.append(tree.classify(test_data)) clses_T = map(list, zip(*res_clses)) for c in clses_T: vote_cls = collections.Counter(c).most_common(1)[0][0] cls.append(vote_cls) accurcy = check_accurcy(test_data, cls) return accurcy if __name__ == '__main__': #dataset = read_data("breast-cancer-assignment5.txt") dataset = read_data("german-assignment5.txt") DiscType = get_disc_val(dataset) attrset = range(len(dataset[0])) #forests = random_fr(dataset) #accurcy = rd_fr_classify(dataset, dataset[1:]) #print accurcy print fcv(dataset, rd_fr_classify)
def main(_): print('Loading data info ...') FLAGS.word2id, FLAGS.max_sentence_len, FLAGS.max_aspect_len = get_data_info(FLAGS.train_fname, FLAGS.test_fname, FLAGS.data_info, FLAGS.pre_processed) print('Loading training data and testing data ...') train_data = read_data(FLAGS.train_fname, FLAGS.word2id, FLAGS.max_sentence_len, FLAGS.max_aspect_len, FLAGS.train_data, FLAGS.pre_processed) test_data = read_data(FLAGS.test_fname, FLAGS.word2id, FLAGS.max_sentence_len, FLAGS.max_aspect_len, FLAGS.test_data, FLAGS.pre_processed) print('Loading pre-trained word vectors ...') FLAGS.word2vec = load_word_embeddings(FLAGS.embedding_fname, FLAGS.embedding_dim, FLAGS.word2id) with tf.Session() as sess: model = RAM(FLAGS, sess) model.build_model() model.run(train_data, test_data)
def main(): logging.info("[Normalized + Feature Selection] Features: Mean, Std") print "Reading data..." X, Y = utils.read_data("../files/train.csv") print "Preprocessing..." X = preprocess(X) print "Extracting Features..." X = extractFeatures(X) Y = [int(x) for x in Y] X, Y = np.array(X), np.array(Y) classMap = sorted(list(set(Y))) accs = [] rf = RandomForestClassifier(n_estimators=1000, n_jobs=-1) logging.info(rf) print "Selecting Features..." X = selectFeatures(X, Y, rf) folds = 5 stf = cross_validation.StratifiedKFold(Y, folds) logging.info("CV Folds: " + str(folds)) loss = [] print "Testing..." for i, (train, test) in enumerate(stf): X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test] rf.fit(X_train, y_train) predicted = rf.predict(X_test) probs = rf.predict_proba(X_test) probs = [[min(max(x, 0.001), 0.999) for x in y] for y in probs] loss.append(utils.logloss(probs, y_test, classMap)) accs.append(utils.accuracy(predicted, y_test)) logging.info("Accuracy(Fold {0}): ".format(i) + str(accs[len(accs) - 1])) logging.info("Loss(Fold {0}): ".format(i) + str(loss[len(loss) - 1])) logging.info("Mean Accuracy: " + str(np.mean(accs))) logging.info("Mean Loss: " + str(np.mean(loss)))
def main(): probes, links = read_data() with open("data/nearest_link.pkl", "rb") as fin: nearest_link = cPickle.load(fin) with open("data/probe_ids.pkl", "rb") as fin: probe_ids = cPickle.load(fin) dir_angles = [] for idx, (probe, nearest, probe_id) in enumerate(izip(probes, nearest_link, probe_ids)): # print "probe", idx if nearest is not None: nearest_idx, nearest_dis = nearest road_vec = calc_road_vec(probe, links[nearest_idx]) # print road_vec cosine = None if idx > 0 and probe_ids[idx - 1] == probe_id: cosine = get_cosine(probes[idx - 1], probe, road_vec) if idx < len(probe) - 1 and probe_ids[idx + 1] == probe_id: cosine1 = get_cosine(probe, probes[idx + 1], road_vec) if cosine is None or np.abs(cosine1) > np.abs(cosine): cosine = cosine1 if cosine is None: cosine = 1 dir_angles.append(cosine) else: dir_angles.append(None) if idx % 10000 == 0: print idx print len(dir_angles) with open("data/dir_angles.pkl", "wb") as fout: cPickle.dump(dir_angles, fout, 2)
def main(): training, target = utils.read_data("../files/train.csv") training = [x[1:] for x in training] target = [float(x) for x in target] test, throwaway = utils.read_data("../files/test.csv") test = [x[1:] for x in test] rf = RandomForestClassifier(n_estimators=100, min_samples_split=2) rf.fit(training, target) predicted_probs = rf.predict_proba(test) predicted_probs = [[min(max(x,0.001),0.999) for x in y] for y in predicted_probs] print utils.logloss(predicted_probs, test) predicted_probs = [["%f" % x for x in y] for y in predicted_probs] utils.write_delimited_file("../files/rf_benchmark.csv", predicted_probs)
def func(): print "Reading train data" train, target = read_data("data/train.csv") pca_components = [1, 2, 3, 4, 5, 10, 20, 25, 30, 50, 70, 100] pca_components = [1, 20, 50, 75, 100] pca_fits = []
def show(dir='data'): """show all the result in the same figure get all the data from data dir """ if not os.path.isdir(dir): raise ValueError('dir is not exist') dirs = os.listdir(dir) align = 3 q, r = divmod(len(dirs), align) q = q + 1 if r != 0 else q rows = q * 100 columns = align * 10 start = 1 # read all the data for index, file in enumerate(dirs): x, y, ys = read_data(os.path.join(dir, file)) number = rows + columns + start + index # plot the data plt.subplot(number) plt.title(file.split('.')[0], color='red') plt.xlabel('message/bytes') plt.ylabel('time/s') plt.plot(x, y, 'bo-', x, ys, 'r^-') # show the image red_patch = mpatches.Patch(color='blue', label='source data') blue_patch = mpatches.Patch(color='red', label='scipy fit func') plt.legend(handles=[red_patch, blue_patch], bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) plt.show()
def main(): link_candidates = cPickle.load(open("data/link_candidates.pkl", "rb")) print len(link_candidates), " link candidates" probes, links = read_data() print len(probes), " probes" print len(links), " links" idx = 0 nearest_links = [] for probe, candidates in izip(probes, link_candidates): link_dis = dict() for cand_id in candidates: dis = probe_to_link_dist(links[cand_id], probe) if dis < 100: link_dis[cand_id] = dis # print sorted(link_dis.items(), key=itemgetter(1))[:3] if len(link_dis) == 0: nearest_links.append(None) else: nearest_link = min(link_dis.items(), key=itemgetter(1)) nearest_links.append(nearest_link) idx += 1 if idx % 10000 == 0: print idx with open("data/nearest_link.pkl", "wb") as fout: cPickle.dump(nearest_links, fout, 2)
def optimize(rep, all_bins, filename, all_heights, spikes, output, no_threads, mode): '''initialize and start several optimizations''' init_tree, data = utils.read_data(filename) info_tree = [init_tree, data, all_bins, all_heights, spikes] tree = FullTree(info_tree[0], info_tree[1], info_tree[2], info_tree[3], info_tree[4]) no_pop = tree.no_pop mutex = multiprocessing.Lock() runs_queue = multiprocessing.JoinableQueue() optim_queue = multiprocessing.Queue() for i in range(no_threads): p = multiprocessing.Process(target=threaded_optimize, args=(mutex, runs_queue, optim_queue, info_tree, )) p.deamon = True p.start() # put the runs in the queue param = tree.guess_param() runs_queue.put([0, param]) for i in range(1, rep): # generate random initial values around the guessed ones init_param = [p + nprand.uniform(-p, p) for p in param] runs_queue.put([i, init_param]) # put sentinel for each process for i in range(no_threads): runs_queue.put(None) runs_queue.join() # I am done, report results report_results(rep, optim_queue, tree, output, mode)
def main(): train_x, train_y = read_data(train_fname) ID, test_x = read_data(test_fname, train_mode=False) print(ID) print(train_x) print(np.sum(train_y)) ind = random.shuffle(range(train_x[0])) # train_x, train_y, val_x, val_y = split_data(train_x, train_y, val_ratio=0.2) tr = xgb.DMatrix(train_x, label=train_y) # val = xgb.DMatrix(val_x, label=val_y) te = xgb.DMatrix(test_x) param = {'bst:max_depth':3, 'bst:eta':0.1, 'silent':0, 'objective':'binary:logistic' } param['nthread'] = 16 param['eval_metric'] = 'auc' num_round = 100 xgb.cv(param, tr, num_round, nfold=5, metrics={"error"}) model = xgb.train(param, tr, num_round, early_stopping_rounds=10) ypred = model.predict(xgmat, ntree_limit=model.best_ntree_limit) clf = RandomForestClassifier(n_estimators=500, criterion='entropy', max_features='sqrt', max_depth=None, oob_score=False, n_jobs=-1, verbose=1) clf.fit(X=train_x, y=train_y) print(clf.score(train_x, train_y)) pred = clf.predict(X=test_x) pred_prob = clf.predict_proba(X=test_x)[:,1] write_answer(output_fname, ID, pred, print_prob=False) write_answer("prob_"+output_fname, ID, pred_prob, print_prob=True)
def __init__(self, dataPath, vectorizedPath, kmeansPath, vectorizerPath): # Loading model. self.km = joblib.load(kmeansPath) if not self.km: raise NameError('Model not found named %s' % kmeansPath) # Load vactorizer. self.vectorizer = joblib.load(vectorizerPath) # Load vectorized data. self.vectorized = sp.sparse.csr.csr_matrix(np.load(vectorizedPath)) # Vectorize existing data. self.data = utils.read_data(dataPath)
def main(): train_x, train_y = read_data(train_fname) ID, test_x = read_data(test_fname, train_mode=False) print(ID) print(train_x) print(np.sum(train_y)) clf = RandomForestClassifier(n_estimators=500, criterion='entropy', max_features='sqrt', max_depth=None, oob_score=False, n_jobs=-1, verbose=1) clf.fit(X=train_x, y=train_y) print(clf.score(train_x, train_y)) pred = clf.predict(X=test_x) pred_prob = clf.predict_proba(X=test_x)[:,1] write_answer(output_fname, ID, pred, print_prob=False) write_answer("prob_"+output_fname, ID, pred_prob, print_prob=True)
def eventsview(request): check_db() if len(Vendor.objects.all()) == 0: content = read_data("http://offthegridsf.com/vendors#food") vendors = parse_HTML(content) create_vendor_models(vendors) fb_data = read_data(facebookurl) if len(Event.objects.all()) > 0 and fb_data != "ERROR": event_data_ready = check_recent_fb_json(fb_data["data"]) create_event_models(event_data_ready) establish_relation() elif fb_data != "ERROR": print "WERE here" next_parameter = fb_data["paging"]["cursors"]["after"] next_set = transform_url(facebookurl, next_parameter) second = read_data(next_set) fulldata = fb_data["data"] + second["data"] events = check_less_thirty(fulldata) create_event_models(events) establish_relation() event_list = Event.objects.all().order_by("-event_time") context_dict = {"events": event_list} return render(request, "GingerBites/events.html", context_dict)
def main(): probes, links = read_data() link_points, belong = flatten_uniq(links) print "%d probe points loaded" % len(probes) print "%d link points loaded" % len(link_points) # probes = np.array(random.sample(probes, 10000)) # print probes probes = np.array(probes) link_points = np.array(link_points) probes_rad = np.deg2rad(probes) link_points_rad = np.deg2rad(link_points) knns_kd = nearest_kdtree(probes_rad, link_points_rad, n=100, is_filter=False) link_candidates = [set([bel for lat, lon in link_points_[knns] for bel in belong[lat, lon]]) for knns in knns_kd] with open("data/link_candidates.pkl", "wb") as fout: cPickle.dump(link_candidates, fout, 2)
def main(): "main program" loansData = read_data() numeric_vars = get_numeric_vars() train_df, train_y, test_df, test_y = load_data(loansData, numeric_vars) print("train_df head\n", train_df[:3]) print("train_y head\n", train_y[:3]) plotdir = make_plotdir() # add scaling train_X, my_scaler = scale_train_data(train_df) test_X = scale_test_data(my_scaler, test_df) regr = linreg() regr.fit(train_X, train_y) # print('regr methods', dir(regr)) # print('columns', list(train_df.columns), 'Intercept') # print('coefs', regr.coef_, regr.intercept_) coefs = sort_coefs(list(train_df.columns), regr.coef_, regr.intercept_) fitpts = regr.predict(train_X) plot_predict_scatter(plotdir, "train", fitpts, train_y) cross_validate(regr, train_X, train_y, cv=10, print_out=True) score = regr.score(train_X, train_y) print('Regression fit R^2 score %.4f' % score) pred = regr.predict(test_X) # pscore = sum(np.array(test_y) == pred) # need np.tol.diff pscore = sum(np.abs(test_y - pred)) / len(test_y) print('Regression predict diff average %.4f' % pscore) # pscore = np.sqrt(sum( (test_y - pred)*(test_y - pred) )) pscore = regr.score(test_X, test_y) print('Regression predict R^2 score %.4f' % pscore) plot_predict_scatter(plotdir, "test", pred, test_y) # try fit with fewer top variables: 5, 4, 3, 2 for top in range(5, 1, -1): new_vars = get_top_vars(coefs, top) print('new_vars', new_vars) run_var_list(new_vars, loansData)
def main(): print "Reading data..." X, Y = utils.read_data("../files/train_10.csv") print "Preprocessing..." X = preprocess(X) print "Extracting Features..." X = extractFeatures(X) Y = [int(x) for x in Y] X, Y = np.array(X), np.array(Y) classMap = sorted(list(set(Y))) accs = [] rf = RandomForestClassifier(n_estimators=1000, n_jobs=-1, compute_importances=True, oob_score=True) rf.fit(X, Y) importantFeatures = [] for x,i in enumerate(rf.feature_importances_): print len(rf.feature_importances_) print x, i if i>np.average(rf.feature_importances_): importantFeatures.append(str(x)) print 'Most important features:', ', '.join(importantFeatures) print rf.oob_score_
def main(): X, Y = utils.read_data("../files/train_10.csv") n_target = len(set(Y)) Y = map(int, Y) folds = 5 stf = cross_validation.StratifiedKFold(Y, folds) loss = [] accs = [] classMap = sorted(list(set(Y))) X, Y = np.array(X), np.array(Y) print "Testing..." for i, (train, test) in enumerate(stf): X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test] probs = [[0.001 for x in range(n_target)] for y in range(len(y_test))] loss.append(utils.logloss(probs, y_test, classMap)) accs.append(utils.accuracy([1]*len(y_test), y_test)) print "Accuracy(Fold {0}): ".format(i) + str(accs[len(accs) - 1]) print "Loss(Fold {0}): ".format(i) + str(loss[len(loss) - 1]) print "Mean Accuracy: " + str(np.mean(accs)) print "Mean Loss: " + str(np.mean(loss))
def main(): probes, links = read_data() with open("data/link_lengths.pkl", "rb") as fin: link_lengths = cPickle.load(fin) with open("data/nearest_link.pkl", "rb") as fin: nearest_link = cPickle.load(fin) idx = 0 dis_ref_nonref_list = [] for probe, nearest in izip(probes, nearest_link): if nearest is not None: nearest_idx, nearest_dis = nearest dis_ref, dis_nonref = dis_ref_nonref(probe, links[nearest_idx], nearest_dis, link_lengths[nearest_idx]) dis_ref_nonref_list.append((dis_ref, dis_nonref)) idx += 1 if idx % 10000 == 0: print idx else: dis_ref_nonref_list.append(None) print len(dis_ref_nonref_list) with open("data/dis_ref_nonref.pkl", "wb") as fout: cPickle.dump(dis_ref_nonref_list, fout, 2)
def test(): probes, links = read_data() link_points, belong = flatten_uniq(links) print "%d probe points loaded" % len(probes) print "%d link points loaded" % len(link_points) probes = np.array(random.sample(probes, 100)) # print probes link_points = np.array(link_points) probes_rad = np.deg2rad(probes) link_points_rad = np.deg2rad(link_points) knns_kd = evaluate(lambda: nearest_kdtree(probes_rad, link_points_rad, n=30, is_filter=True), "kdtree") knns_force = evaluate(lambda: nearest_force(probes_rad, link_points_rad, n=30), "brute force") for idx, (k1, k2) in enumerate(izip(knns_kd, knns_force)): diff = np.setdiff1d(k2, k1) if diff.size > 0: print diff print probes[idx] print link_points[diff] print haversine_np(probes_rad[idx], link_points_rad[diff])
def main(): probes, links = read_data() with open("data/nearest_link.pkl", "rb") as fin: nearest_link = cPickle.load(fin) with open("data/link_ids.pkl", "rb") as fin: link_ids = cPickle.load(fin) with open("data/dis_ref_nonref.pkl", "rb") as fin: dis_ref_nonref = cPickle.load(fin) with open("data/dir_angles.pkl", "rb") as fin: dir_angles = cPickle.load(fin) with open("data/probe_points.csv", "r") as fin, open("data/Partition6467MatchedPoints.csv", "w") as fout: for idx, (line, nearest, dir_angle, dis_pair) in enumerate(izip(fin, nearest_link, dir_angles, dis_ref_nonref)): if nearest is not None: nearest_idx, nearest_dis = nearest line = line.strip() dis_ref, dis_nonref = dis_pair dir_chr = "F" if dir_angle > 0 else "T" link_id = link_ids[nearest_idx] line += "," + ",".join([link_id, dir_chr, str(dis_ref), str(dis_nonref)]) print line # fout.write(line + "\n") if idx > 10: break
def test(): link_candidates = cPickle.load(open("data/link_candidates.pkl", "rb")) probes, links = read_data() d = dict() indices = random.sample(range(len(probes)), 10000) for idx in indices: # for probe, candidates in izip(probes, link_candidates): probe = probes[idx] candidates = link_candidates[idx] link_dis = dict() for cand_id in candidates: dis = probe_to_link_dist(links[cand_id], probe) if dis < 100: link_dis[cand_id] = dis link_dis = link_dis.items() if len(link_dis): min_dis = min(link_dis, key=itemgetter(1))[1] # print min_dis link_dis = [x for x in link_dis if x[1] < min_dis + 10 and min_dis * 3 > x[1]] l = len(link_dis) if l == 4: print link_dis d[l] = d.get(l, 0) + 1 print d
def decode(): print('Decoding') with tf.Session() as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, False) # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) _, _, all_data_labels = utils.read_data(_buckets, data_dir=FLAGS.data_dir, sequence_len=FLAGS.seq_len) all_data, all_labels = all_data_labels all_states = [] for bucket_id in range(len(_buckets)): if len(all_data[bucket_id]) == 0: print(" eval: empty bucket %d" % bucket_id) continue idx = 0 total_eval_loss = 0 i = 0 total_len = len(all_data[bucket_id]) while idx < total_len: encoder_inputs, decoder_inputs, target_weights, idx = model.get_batch(all_data, bucket_id, idx) _, eval_loss, _, states = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) total_eval_loss += eval_loss i += 1 all_states.append(states) print('\rBucket {}, item {}/{}'.format(bucket_id, idx, total_len), end='\r') sys.stdout.flush() print(" eval: bucket %d loss %.2f" % (bucket_id, total_eval_loss / i)) sys.stdout.flush() np.savez(os.path.join(FLAGS.train_dir, 'decoded_states.npz'), states=np.vstack(all_states), labels=all_labels)
def main(): X, Y = utils.read_data("../files/train_10.csv") Y = map(int, Y) folds = 5 stf = cross_validation.StratifiedKFold(Y, folds) loss = [] svc = svm.SVC(probability=True) accs = [] classMap = sorted(list(set(Y))) X, Y = np.array(X), np.array(Y) print "Testing..." for i, (train, test) in enumerate(stf): X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test] svc.fit(X_train, y_train) predicted = svc.predict(X_test) probs = svc.predict_proba(X_test) probs = [[min(max(x, 0.001), 0.999) for x in y] for y in probs] loss.append(utils.logloss(probs, y_test, classMap)) accs.append(utils.accuracy(predicted, y_test)) print "Accuracy(Fold {0}): ".format(i) + str(accs[len(accs) - 1]) print "Loss(Fold {0}): ".format(i) + str(loss[len(loss) - 1]) print "Mean Accuracy: " + str(np.mean(accs)) print "Mean Loss: " + str(np.mean(loss))
temp = arg_value.split(",") services = temp else: print "\n** Error ! Unrecognized argument ! Let me print the help **" print "\n ----------------------------------------------------------\n" helpmain() sys.exit(-1) # # # Mode # # # import ModeKfold # we read the datas data = utils.read_data(filename) obj0 = ModeKfold.ModeKfold() # we do a mode k-fold cross-validation res0, time0 = obj0.cross_validation(data,k, "") if "mode" in services: # we print the results print_results("Mode", res0, time0, 0) # # # BigML # # # if "bigml" in services:
import progressbar import os p = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) if not p in sys.path: sys.path.append(p) import utils from models import LR, FM, PNN1, PNN2, FNN, CCPM train_file = '../../output/fm/train.fm' test_file = '../../output/fm/test.fm' input_dim = utils.INPUT_DIM train_data = utils.read_data(train_file) # train_data = pkl.load(open('../data/train.yx.pkl', 'rb')) train_data = utils.shuffle(train_data) test_data = utils.read_data(test_file) # test_data = pkl.load(open('../data/test.yx.pkl', 'rb')) # pkl.dump(train_data, open('../data/train.yx.pkl', 'wb')) # pkl.dump(test_data, open('../data/test.yx.pkl', 'wb')) if train_data[1].ndim > 1: print('label must be 1-dim') exit(0) print('read finish') print('train data size:', train_data[0].shape) print('test data size:', test_data[0].shape) train_size = train_data[0].shape[0]
from utils import read_data from intcode.vm import IntcodeVM raw = read_data(21)[0].split(",") def attempt(instructions, part): assert part in [1, 2] if part == 1: instructions.append("WALK") else: instructions.append("RUN") text_input = [ord(i) for i in "\n".join(instructions) + "\n"] vm = IntcodeVM(raw, input_=text_input) out = vm.collect_all_outputs() row = [] for i in out: if i == 10: # print("".join(row)) row = [] else: try: row.append(chr(i)) except ValueError: # final instruction is puzzle result print("Part {}:".format(part)) print(i)
from collections import Counter from typing import List from utils import read_data INPUT = read_data() def make_counters(data: List[str]) -> List[Counter]: pos_counters = [Counter() for _ in range(8)] for line in data: for index, char in enumerate(line): pos_counters[index].update(char) return pos_counters def part_one(counters: List[Counter]) -> str: return "".join([ sorted(counters[i], key=lambda x: counters[i][x])[-1] for i in range(8) ]) def part_two(counters: List[Counter]) -> str: return "".join([ sorted(counters[i], key=lambda x: counters[i][x])[0] for i in range(8) ]) processed_input = make_counters(INPUT.split("\n")) print(f"Part one password: {part_one(processed_input)}") print(f"Part two password: {part_two(processed_input)}")
import tensorflow as tf import utils from model import Model from utils import read_data from flags import parse_args FLAGS, unparsed = parse_args() logging.basicConfig( format= '%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s', level=logging.DEBUG) vocabulary = read_data(FLAGS.text) print('Data size', len(vocabulary)) with open(FLAGS.dictionary, encoding='utf-8') as inf: dictionary = json.load(inf, encoding='utf-8') with open(FLAGS.reverse_dictionary, encoding='utf-8') as inf: reverse_dictionary = json.load(inf, encoding='utf-8') model = Model(learning_rate=FLAGS.learning_rate, batch_size=FLAGS.batch_size, num_steps=FLAGS.num_steps) model.build() with tf.Session() as sess: summary_string_writer = tf.summary.FileWriter(FLAGS.output_dir, sess.graph)
def evaluate_part_two(expression: str) -> int: expression = expression.replace(" ", "") while '(' in expression: parens = find_matching_parens(expression) first_paren = min(parens.keys()) sub_problem = expression[first_paren + 1:parens[first_paren]] sub_value = evaluate_part_two(sub_problem) expression = str(sub_value).join( (expression[:first_paren], expression[parens[first_paren] + 1:])) while '+' in expression: plus_loc = expression.index("+") left_num, left_num_start = get_left_num(expression, plus_loc) right_num, right_num_end = get_right_num(expression, plus_loc) expression = str(left_num + right_num).join( (expression[:left_num_start], expression[right_num_end + 1:])) while '*' in expression: mult_loc = expression.index("*") left_num, left_num_start = get_left_num(expression, mult_loc) right_num, right_num_end = get_right_num(expression, mult_loc) expression = str(left_num * right_num).join( (expression[:left_num_start], expression[right_num_end + 1:])) return int(expression) INPUT = read_data().split("\n") part_one_answers = [evaluate_part_one(line) for line in INPUT] print(f'Part one: {sum(evaluate_part_one(line) for line in INPUT)}') print(f"Part two: {sum(evaluate_part_two(line) for line in INPUT)}")
# -*- coding: utf-8 -*- """ Created on Tue Mar 10 15:05:25 2020 @author: Mathias """ #%% import os os.chdir('C:/Users/mathi/Desktop/Thesis/Implementations') from utils import read_data, clean_seqs, eight_to_three_state path = '../DATA/CB513_formatted.txt' proteins, seqs, structures = read_data(path, 'DSSP') seqs, structures = clean_seqs(seqs, structures) structures_3_state = eight_to_three_state(structures, method='method_1') #### KEY FIGURES #### # Number of seqs print("Number of seqs: {}".format(len(proteins))) # Total number of aa's length = 0 for seq in seqs: length += len(seq) print("Total number os aa's: {}".format(length))
import numpy as np from sklearn.metrics import roc_auc_score import utils from models import LR, FM, PNN1, PNN2, FNN, CCPM #train_file = '../data/train.yx.txt' #test_file = '../data/test.yx.txt' train_file = '../data/train.txt' test_file = '../data/test.txt' # fm_model_file = '../data/fm.model.txt' input_dim = utils.INPUT_DIM train_data = utils.read_data(train_file) train_data = utils.shuffle(train_data) test_data = utils.read_data(test_file) if train_data[1].ndim > 1: print 'label must be 1-dim' exit(0) print('read finish') train_size = train_data[0].shape[0] test_size = test_data[0].shape[0] num_feas = len(utils.FIELD_SIZES) min_round = 1 num_round = 1000 early_stop_round = 50
from sklearn.feature_extraction.text import TfidfVectorizer from sklearn import linear_model import sys import utils from sklearn.model_selection import cross_validate from sklearn.model_selection import ShuffleSplit train_file = "./data/A1_Data/train.json" dev_file = "./data/A1_Data/dev.json" model_file = 'model.pkl' train_file = sys.argv[1] dev_file = sys.argv[2] model_file = sys.argv[3] train_sent, train_y = utils.read_data(train_file) dev_sent, dev_y = utils.read_data(dev_file) print(len(train_sent), len(dev_sent)) print("Cleaning data") train_x = utils.clean_data(train_sent) dev_x = utils.clean_data(dev_sent) vectorizer = TfidfVectorizer(max_features=1000000, tokenizer=utils.lemmatize_tokenize, analyzer='word', ngram_range=(1, 2)) temp = train_x + dev_x print("Transforming data to tdidf") train_test_x_tfidf = vectorizer.fit_transform(temp) train_x_tfidf = train_test_x_tfidf[0:len(train_x)]
import numpy as np from utils import read_data import tensorflow as tf import argparse # --------------------------------------------------------------------------------------------------------------------------------------------------------- parser = argparse.ArgumentParser() parser.add_argument('-d', '--data', type=str, default='banana', help='Name of the dataset') args = parser.parse_args() # --------------------------------------------------------------------------------------------------------------------------------------------------------- X, y, Xt, yt = read_data(key=args.data) # labels should 0-1 y = ((y + 1)/2).reshape(len(y), 1) yt = ((yt + 1)/2).reshape(len(yt), 1) dim = X.shape[1] inp = tf.placeholder(tf.float32, shape=[None, dim]) labels = tf.placeholder(tf.float32, shape=[None, 1]) # params hsize = 1024 W1 = tf.get_variable("W1", shape=[dim, hsize], initializer=tf.contrib.layers.xavier_initializer()) b1 = tf.get_variable("b1", shape=[hsize], initializer=tf.zeros_initializer()) W2 = tf.get_variable("W2", shape=[hsize, hsize], initializer=tf.contrib.layers.xavier_initializer()) b2 = tf.get_variable("b2", shape=[hsize], initializer=tf.zeros_initializer())
from apriori import Apriori from fpgrowth import FPGrowth from utils import read_data import time if __name__ == '__main__': transactions, items = read_data('Online Retail.xlsx') # transactions, items = {'T1': ['pasta', 'lemon', 'bread', 'orange'], # 'T2': ['pasta', 'lemon'], # 'T3': ['pasta', 'orange', 'cake'], # 'T4': ['pasta', 'lemon', 'orange', 'cake']}, ['pasta', 'lemon', 'bread', 'orange', 'cake'] t1 = time.time() apriori = Apriori() apriori.fit(transactions=transactions, items=items, min_support=0.03) # result = apriori.predict(['pasta', 'lemon']) result = apriori.predict(['84029G', '84029E']) print(len(apriori.rules)) t2 = time.time() print(result) print(t2 - t1) print('--------------------------------------------') t1 = time.time() fp_growth = FPGrowth() fp_growth.fit(transactions=transactions, items=items, min_support=0.03) result = fp_growth.predict(['84029G', '84029E']) # result = fp_growth.predict(['pasta', 'lemon']) print(len(fp_growth.rules)) t2 = time.time()
def train(self, Config): data_dir = os.path.join('./{}'.format(Config.checkpoint_dir), Config.data_dir) #获取训练数据的地址 train_data, train_label = read_data(data_dir, Config) """ update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.train_op = tf.train.AdamOptimizer(Config.learning_rate).minimize(self.loss) """ #self.train_op = tf.train.AdamOptimizer(Config.learning_rate).minimize(self.loss) global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay( Config.learning_rate, global_step * Config.batch_size, 40 * len(train_data) * Config.batch_size, 0.5, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) self.train_op = optimizer.minimize(self.loss, var_list=self.memnet_variables, name="memnet_opt") #gradient clip """ global_step = tf.Variable(0, trainable=False) #learning_rate = tf.train.exponential_decay(Config.learning_rate, global_step*Config.batch_size, 5*len(train_data)*Config.batch_size, 0.1, staircase=True) opt = tf.train.AdamOptimizer(learning_rate=Config.learning_rate) grad_and_value = opt.compute_gradients(self.loss) clip = tf.Variable(Config.clip_grad, name='clip') def ClipIfNotNone(grad): if grad is None: return grad return tf.clip_by_value(grad, -clip, clip) capped_gvs = [(ClipIfNotNone(grad), var) for grad, var in grad_and_value] self.train_op = opt.apply_gradients(capped_gvs, global_step=global_step) """ tf.global_variables_initializer().run() summary_writer = tf.summary.FileWriter("./graph", graph=tf.get_default_graph()) counter = 0 start_time = time.time() if self.load(self.checkpoint_dir): print("Load SUCCESS.") else: print("Load failed!") print("Training...") for ep in range(Config.epoch): batch_idxs = len(train_data) // Config.batch_size #shuffle permutation = np.random.permutation(train_data.shape[0]) #train_data = train_data[permutation,:, :, :] #train_label = train_label[permutation,:, :, :] for idx in range(0, batch_idxs): batch_images = train_data[permutation[idx * Config.batch_size:(idx + 1) * Config.batch_size]] batch_labels = train_label[permutation[idx * Config.batch_size:(idx + 1) * Config.batch_size]] #permutation = np.random.choice(train_data.shape[0], Config.batch_size) #batch_images = train_data[permutation,:, :, :] #batch_labels = train_label[permutation,:, :, :] counter += 1 _, err = self.sess.run([self.train_op, self.loss], feed_dict={ self.images: batch_images, self.labels: batch_labels }) if counter % 100 == 0: summary = self.sess.run(self.merged, feed_dict={ self.images: batch_images, self.labels: batch_labels }) summary_writer.add_summary(summary, counter) print("Epoch: [%2d], step: [%2d], time: [%4.4f], loss: [%.8f]" \ % ((ep+1), counter, time.time()-start_time, err)) #if counter % 1000 == 0: # self.save(Config.checkpoint_dir, counter) #if err < 0.015: # self.save(Config.checkpoint_dir, counter) self.save(Config.checkpoint_dir, counter)
return params def test(parameters): """ test classifier with test data - no labels params - the trained params """ fd = open("test.pred", 'w') counter = 0 test_ans = '' test_data = ut.read_data('test') for label, feature in test_data: pred = mlp1.predict(feats_to_vec(feature), parameters) for l, i in ut.L2I.items(): if i == pred: test_ans = l counter += 1 fd.write(test_ans + "\n") #print 'line: ', counter, 'prediction: ', test_ans fd.close() if __name__ == '__main__': train_data = ut.read_data('train') dev_data = ut.read_data('dev') params = mlp1.create_classifier(len(ut.F2I), HIDDEN_SIZE, len(ut.L2I)) trained_params = train_classifier(train_data, dev_data, EPOCH, LR, params) print trained_params test(trained_params)
import numpy as np from pylab import * import argparse import os import json import utils if __name__ == '__main__': cost1 = [] cost2 = [] for group in utils.listGroups(): for fn in utils.listGroupInstances(group): print(fn) topology = fn.split('.')[0] data = utils.read_data(group, 'minCover', topology) assert data != None, topology c = int(data['initialCoverSeg']) cost1.append(c) data = utils.read_data(group, 'minCover_complete', topology) assert data != None, topology c = int(data['initialCoverSeg']) cost2.append(c) print(utils) groups = [str(i) for i in range(max(len(cost1), len(cost2)))] cost1 = utils.data_to_bar(cost1) cost2 = utils.data_to_bar(cost2) utils.make_g_barplot([cost1, cost2], groups, ['original IGP', 'ECMP-free and complete IGP'], ['#3CAEA3', '#ED553B'], 'segment cost', 'percentage of topologies', '', '../data/plot/minSegCover_segcost_complete.eps', 5)
im_name = "im_test" # obatin image result = get_image(im_name) rospy.sleep(10) # get target coordinate target_cam = get_target() target_cam = np.matrix( np.concatenate( (np.array(target_cam).transpose(), np.ones( [1, len(target_cam)])), axis=0)) # get transformation matrix H, error = calibrate(data_file) write_data(data_file, H, "H", type="mat") H = read_data(data_file, "H") print(H) # get target pose in robot base target_r = np.dot(H, target_cam) # transform from homogeneous coordinate to Cartesian coordinate target_r[:3, :] = target_r[:3, :] / target_r[3, :] target_r = target_r[:3, :] target_r = list(target_r.transpose()) # execution for each task for target in target_r: print(target) current_pose = group.get_current_pose().pose current_pose.position.x = target[0, 0] current_pose.position.y = target[0, 1] plan_execute(current_pose, mode) str = raw_input(
import math from utils import read_data raw = read_data(22) def brute_force(data, N): cards = list(range(N)) for row in data: if row == "deal into new stack": # Reverse ordering cards = list(reversed(cards)) elif row.startswith("cut"): cut = int(row[4:]) if cut < 0: cut += N cards = cards[cut:] + cards[:cut] elif row.startswith("deal with increment"): increment = int(row.split()[3]) new = [0] * N for i in range(N): new[(i * increment) % N] = cards[i] cards = new else: raise Exception("Unexpected row: {}".format(row)) return cards
def main(param): model_name = param.model_name param["model_name"] = "pseudo_" + model_name utils.seed_everything(0) print('read csv...') train, test, submit = utils.read_data("./data") train = make_pseudo_labeled_data(train, test) print('read wave data...') train_wave = utils.read_wave("./data/ecg/" + train["Id"] + ".npy") test_wave = utils.read_wave("./data/ecg/" + test["Id"] + ".npy") train_y = train["target"] train["sex"] = train["sex"].replace({"male": 0, "female": 1}) test["sex"] = test["sex"].replace({"male": 0, "female": 1}) human_mask = train['label_type'] == 'human' train_meta_human = train[human_mask][["sex", "age"]] train_wave_human = train_wave[human_mask] train_meta_auto = train[~human_mask][["sex", "age"]] train_wave_auto = train_wave[~human_mask] train_y_human = train_y[human_mask] train_y_auto = train_y[~human_mask] kf = StratifiedKFold(n_splits=5, random_state=10, shuffle=True) val_preds = np.zeros(train_meta_human.shape[0]) for (fold, (train_index, val_index)) in enumerate(kf.split(train_meta_human, train_y_human)): print(f"{'=' * 20} fold {fold + 1} {'=' * 20}") # foldごとに定義しないとリークしてしまう model = MODEL_NAMES_DICT[model_name](param) train_input_wave = np.concatenate([ train_wave_human[train_index], train_wave_auto ]) train_input_meta = np.concatenate([ train_meta_human.iloc[train_index], train_meta_auto ]) train_y_concat = np.concatenate([ train_y_human.iloc[train_index], train_y_auto ]) val_input_wave = train_wave_human[val_index] val_input_meta = train_meta_human.iloc[val_index] val_y_concat = train_y_human.iloc[val_index] val_pred = model.fit( [train_input_wave, train_input_meta], train_y_concat, [val_input_wave, val_input_meta], val_y_concat, fold ) # foldを忘れないよう注意. fitの帰り値はval_pred val_preds[val_index] += val_pred print("AUC score:", roc_auc_score(train_y[human_mask], val_preds))
import argparse import os import json import utils if __name__ == '__main__': runtime1 = [ ] runtime2 = [ ] skip = [ ] for group in utils.listGroups(): for fn in utils.listGroupInstances(group): print(fn) topology = fn.split('.')[0] data = utils.read_data(group, 'maxEDPSegmentModel', topology) if data == None: skip.append(topology) continue res = data['pairResults'] for r in res: runtime2.append(float(r['runtime']) / 1e9) data = utils.read_data(group, 'maxEDPMip', topology) assert data != None, topology res = data['pairResults'] for r in res: runtime1.append(float(r['runtime']) / 1e9) x, y = utils.cdf(runtime1, 0.01) ax = plt.subplot() ax.set_xscale("log")
def _kidney_disease(config): tmp_df1 = pd.DataFrame( utils.read_data(config.dataset_dir + '/kidney_disease_data.txt')) tmp_df2 = pd.DataFrame( utils.read_data(config.dataset_dir + '/kidney_disease_labels.txt')) data_df = pd.concat([tmp_df1, tmp_df2], axis=1) data_df.columns = [str(x) for x in range(len(data_df.columns))] target_col = str(len(data_df.columns) - 1) data_df[[target_col]] = data_df[[target_col]].replace(-1, 0) data_df_dropped = data_df.dropna() ### df = data_df_dropped.drop(columns=[target_col]) cols = df.columns.tolist() data_df = data_df[~data_df[target_col].isnull()] null_df = data_df[data_df.isnull().any(axis=1)] filled_df = null_df.fillna(np.nan) y_test = np.asarray(filled_df[target_col]) x_test = filled_df.drop(columns=[target_col]).values ### y = np.asarray(data_df_dropped[target_col]) df = data_df_dropped.drop(columns=[target_col]) cols = df.columns.tolist() print("x_test {}, y_test {}".format(x_test.shape, y_test.shape)) print("df {}, y {}".format(df.shape, y.shape)) if config.mod_split == 'none': X = df.values data = {'0': X, 'y': y, '0_test': x_test, 'y_test': y_test} elif config.mod_split == 'random': X = random_split(config, df.values) data = {'y': y} for i, x in enumerate(X): data[str(i)] = x elif config.mod_split == 'computation_split': X = feature_split(config, df.values) data = {'y': y} for i, x in enumerate(X): data[str(i)] = x clusters = feature_split(config, df.values, return_split_sizes=True) X_test_split = [] for cluster in set(clusters): indices = [ j for j in range(len(clusters)) if clusters[j] == cluster ] print(indices) X_test_split.append(x_test[:, indices]) for i, x in enumerate(X_test_split): data["{}_test".format(i)] = x data['y_test'] = y_test return data
from diane.analysis_tools import Histogram1D from utils import read_data iters, betas = read_data() workers = {} for B in betas: for t in iters[B]: wid = t[2] try: h = workers[wid] except KeyError: h = Histogram1D(0, 50000, 100, "execution time of subsequent tasks on the same worker node (master.log)") workers[wid] = h h.add(t[1] - t[0]) dispersion = Histogram1D( 0, 1, 100, "relative std deviation of the execution time of subsequent iterations on the same worker node (master.log)", ) samples = Histogram1D(0.5, 50.5, 50, "number of subsequent iterations on the same worker node (master.log)") # absdisp = Histogram1D(0,1000,100) for w in workers: h = workers[w] if h.hits > 1: dispersion.add(h.stddev / h.mean) samples.add(h.hits) # absdisp.add(h.stddev)
def train(self, Config): data_dir = os.path.join('./{}'.format(Config.checkpoint_dir), Config.data_dir) train_data_mos, train_data_r, train_data_b, train_label_r, train_label_g, train_label_b = read_data( data_dir, Config) self.train_g = tf.train.AdamOptimizer(Config.learning_rate).minimize( self.loss_g, var_list=self.vars_g) self.train_r = tf.train.AdamOptimizer(Config.learning_rate).minimize( self.loss_r, var_list=self.vars_r) self.train_b = tf.train.AdamOptimizer(Config.learning_rate).minimize( self.loss_b, var_list=self.vars_b) self.train_rgb = tf.train.AdamOptimizer(Config.learning_rate).minimize( self.loss_rgb, var_list=[self.vars_r, self.vars_g, self.vars_b]) tf.global_variables_initializer().run() counter = 0 start_time = time.time() if self.load(self.checkpoint_dir): print("Load SUCCESS.") else: print("Load failed!") print("Training...") for ep in range(Config.epoch): batch_idxs = len(train_data_mos) // Config.batch_size permutation = np.random.permutation(train_data_mos.shape[0]) minn = 10000 for idx in range(0, batch_idxs): batch_images_mos = train_data_mos[ permutation[idx * Config.batch_size:(idx + 1) * Config.batch_size]] batch_images_r = train_data_r[ permutation[idx * Config.batch_size:(idx + 1) * Config.batch_size]] batch_images_b = train_data_b[ permutation[idx * Config.batch_size:(idx + 1) * Config.batch_size]] batch_labels_g = train_label_g[ permutation[idx * Config.batch_size:(idx + 1) * Config.batch_size]] batch_labels_r = train_label_r[ permutation[idx * Config.batch_size:(idx + 1) * Config.batch_size]] batch_labels_b = train_label_b[ permutation[idx * Config.batch_size:(idx + 1) * Config.batch_size]] counter += 1 """ if ep < Config.epoch // 4 * 1: _, err = self.sess.run([self.train_g, self.loss_g], feed_dict={self.images_mos: batch_images_mos, self.labels_g: batch_labels_g}) elif ep < Config.epoch // 4 * 2: _, err = self.sess.run([self.train_r, self.loss_r], feed_dict={self.images_mos: batch_images_mos, self.images_r: batch_images_r, self.labels_r: batch_labels_r, self.labels_g: batch_labels_g}) elif ep < Config.epoch // 4 * 3: _, err = self.sess.run([self.train_b, self.loss_b], feed_dict={self.images_mos: batch_images_mos, self.images_b: batch_images_b, self.labels_b: batch_labels_b, self.labels_g: batch_labels_g}) else: _, err = self.sess.run([self.train_rgb, self.loss_rgb], feed_dict={self.images_mos: batch_images_mos, self.images_r: batch_images_r, self.images_b: batch_images_b, self.labels_r: batch_labels_r, self.labels_g: batch_labels_g, self.labels_b: batch_labels_b}) """ _, err = self.sess.run( [self.train_rgb, self.loss_rgb], feed_dict={ self.images_mos: batch_images_mos, self.images_r: batch_images_r, self.images_b: batch_images_b, self.labels_r: batch_labels_r, self.labels_g: batch_labels_g, self.labels_b: batch_labels_b }) if counter % 100 == 0: print("Epoch: [%2d], step: [%2d], time: [%4.4f], loss: [%.8f]" \ % ((ep+1), counter, time.time()-start_time, err)) if counter % 10000 == 0: self.save(Config.checkpoint_dir, counter) if err <= minn: minn = err self.save(Config.checkpoint_dir, counter) self.save(Config.checkpoint_dir, counter)
from utils import read_data, save_submission import pandas as pd import numpy as np from sklearn import preprocessing from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier from sklearn.feature_extraction import DictVectorizer from sklearn.metrics import log_loss from sklearn.cross_validation import cross_val_score from matplotlib import pyplot as plt from sys import argv, exit #this file is an attempt to throw everything sklearn has to create a whole bunch of submissions which will then be averaged out. train_features, train_labels, test_features, ids, outfile = read_data() def train_et(): et = ExtraTreesClassifier(n_estimators = 500, max_depth = 35, min_samples_split=4, min_samples_leaf=2, criterion="entropy") et.fit(train_features, train_labels) probs = et.predict_proba(test_features)[:,1] save_submission(outfile+"_et", ids, probs) # print cross_val_score(et, train_features, train_labels, scoring="log_loss") def train_rf(): rf = RandomForestClassifier(n_estimators = 100, max_depth = 10, min_samples_split=4, min_samples_leaf=2, criterion="entropy") rf.fit(train_features, train_labels) probs = rf.predict_proba(test_features)[:,1] save_submission(outfile+"_rf", ids, probs) print cross_val_score(rf, train_features, train_labels, scoring="log_loss") def train_ada():
def evaluate(self, testing_data_path): tokens, labels = read_data(testing_data_path) tokens_transform = self.preprocess_token_transform(tokens) labels_transform = self.preprocess_label_transform(labels) tf_unary_scores, tf_transition_params, tf_sequence_lengths = self.sess.run( [self.logits_tensor, self.tensor_transition_params, self.sequence_lengths], feed_dict={ self.tensor_tokens: tokens_transform }) label_corre_cnt = 0 label_total_cnt = 0 sentence_corre_cnt = 0 sentence_total_cnt = len(labels) # err_dir = self.params[constants.PARAM_KEY_MODEL_ERR_DIR] # err_file = err_dir+'_new_one_entity_oeo2oso.txt' # err_file = open(err_file,'w') # true_dir = self.params[constants.PARAM_KEY_MODEL_ERR_DIR] true_file = '/Users/liuxiaoan/Downloads/fund_raw_data_new/true_data_new.txt' true_file = open(true_file, 'w') err_entity = self.read_errdata('/Users/liuxiaoan/Downloads/raw_data/data/music_valid_data_new_entity.txt') # print len(err_entity) for tf_unary_scores_, y_, sequence_length_, data_index in zip(tf_unary_scores, labels_transform, tf_sequence_lengths, range(len(labels_transform))): # Remove padding from the scores and tag sequence. tf_unary_scores_ = tf_unary_scores_[:sequence_length_] y_ = y_[:sequence_length_] # Compute the highest scoring sequence. viterbi_sequence, _ = tf.contrib.crf.viterbi_decode( tf_unary_scores_, tf_transition_params) # Evaluate accuracy. # change o e o to o s o if True: labels_str = ''.join(str(i) for i in viterbi_sequence) try: i = labels_str.index('121') viterbi_sequence[i+1] = 4 except: pass try: i = labels_str.index('131') viterbi_sequence[i + 1] = 5 except: pass sentence_corre = True label_total_cnt += sequence_length_ sencence_label_corre_ = np.sum(np.equal(viterbi_sequence, y_)) label_corre_cnt += sencence_label_corre_ if sencence_label_corre_ == sequence_length_: sentence_corre_cnt += 1 true_file.write(" ".join(tokens[data_index]) + '\n') true_file.write(" ".join(labels[data_index]) + '\n') true_file.write(" ".join(self.predict([tokens[data_index]])[0]).strip() + '\n') # err_file.write(err_entity[data_index] + '\n') # else: # if True: # try: # err_file.write(",".join(tokens[data_index])+ '\n') # err_file.write(",".join(labels[data_index]) + '\n') # err_file.write(",".join(self.predict([tokens[data_index]])[0]).strip().replace(' ',',') + '\n') # err_file.write(err_entity[data_index]+'\n') # except: # continue # err_file.close() true_file.close() logging.info("total label count: %d, label accuracy: %.2f", label_total_cnt, 1.0 * label_corre_cnt / label_total_cnt) logging.info("total sentence count: %d, sentence accuracy: %.2f", sentence_total_cnt, 1.0 * sentence_corre_cnt / sentence_total_cnt)
# timeperiod=moving_average # ) # # average_true_range = average_true_range_list[-1] # settings.init() # code_name = ('300623', '捷捷微电') # code_name = ('600145', '*ST新亿') # code_name = ('601700', '风范股份') # code_name = ('000725', '京东方A') code_name = ('002157', '正邦科技') # code_name = ('300663', '科蓝软件') # end = '2017-09-26' end = '2019-02-15' data = utils.read_data(code_name) # print(data) result = enter.check_volume(code_name, data, end_date=end) print("low atr check {0}'s result: {1}".format(code_name, result)) # # rolling_window = 21 # moving_average = 20 # # average_true_range = ATR( # data.high.values[-rolling_window:], # data.low.values[-rolling_window:], # data.close.values[-rolling_window:], # timeperiod=moving_average # ) # print(data['high'].values) #
import matplotlib.pyplot as plt from sklearn import svm import numpy as np from sklearn.metrics import confusion_matrix from sklearn.model_selection import GridSearchCV, train_test_split import sys import os sys.path.append(os.path.abspath("../")) from utils import read_data, plot_data, plot_decision_function # Read data x, labels = read_data("points_class_0.txt", "points_class_1.txt") # Split data to train and test on 80-20 ratio X_train, X_test, y_train, y_test = train_test_split(x, labels, test_size = 0.2, random_state=0) print("Displaying data. Close window to continue.") # Plot data plot_data(X_train, y_train, X_test, y_test) print("Training SVM with C=1 ...") # make a classifier and fit on training data clf_1 = svm.SVC(kernel='linear', C=1) clf_1.fit(X_train, y_train) print("Display decision function (C=1) ...") # Plot decision function on training and test data plot_decision_function(X_train, y_train, X_test, y_test, clf_1) print("Training SVM with C=1 ...")
def train(): with tf.Session() as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, False) # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) train_set, dev_set, _ = utils.read_data(_buckets, data_dir=FLAGS.data_dir, sequence_len=FLAGS.seq_len) train_bucket_sizes = [len(train_set[b]) for b in range(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in range(len(train_bucket_sizes))] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] while True: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([i for i in range(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. # perplexity = math.exp(loss) if loss < 300 else float('inf') print("global step %d learning rate %.4f step-time %.2f loss " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, loss)) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max(previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.train_dir, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. for bucket_id in range(len(_buckets)): if len(dev_set[bucket_id]) == 0: print(" eval: empty bucket %d" % (bucket_id)) continue encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss, _, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # eval_ppx = math.exp(eval_loss) if eval_loss < 300 else float('inf') print(" eval: bucket %d loss %.2f" % (bucket_id, eval_loss)) sys.stdout.flush()
#IMPORT REQUIRED PACKAGES import utils import matplotlib.pyplot as plt import torch import torch.nn as nn import models import os #DATA PREPROCESSING data_by_paramters, data_parameters = utils.read_data("trainingDataOnlyKFixed.csv") #Read in CSV data into two lists. First contains the data organized by different action potential firign paramters. Second contains a list of the firing paramters used. cropped_data_by_paramters = utils.crop_data(data_by_paramters,1000) #Crop data of constant voltage rand_data_by_parameters, rand_data_parameters = utils.randomize_data(cropped_data_by_paramters, data_parameters) #Randomize the dataset in a way that keeps the parameters and firing data associated with those paramters at the same index scaled_rand_data_by_parameters = utils.scale_data_using_data_removal(rand_data_by_parameters,2) #Shrink the random data to be smaller normalized_data = utils.normalize_data(scaled_rand_data_by_parameters,(-1,1)) #Normalize the data to values between -1 and 1 so that high values dont prevent the model from training properly parameters_split = utils.prepare_tags(rand_data_parameters) #Split string tags into numbers tensor_data = torch.from_numpy(normalized_data).float() #Conevrt Normalized and preprocessed data into a tensor that can be fed into the network tensor_parameters = torch.from_numpy(parameters_split).float() #Convert paramters into a tensor that can be fed into the network #CREATE MODEL time_series_dims = normalized_data[0].ndim #Get size of time series data recordings in this case it is only voltage LSTM_hidden_layer = 200 #How many LSTMS are in this layer num_of_parameters = parameters_split[0].size #Get number of parameters that need to be predicted learning_rate = 0.000001 #Learning rate for model epochs=2000 #How many times will the model go through the data model_save_increments = 50 #How often will loss be measured and will the model be saved time_series_length = len(normalized_data[0]) hidden_layer_size = 500 num_of_layers = 1 model_type = "models/LSTMDeepModel" path = model_type + "_" + str(LSTM_hidden_layer) + "_" + str(learning_rate) + "/"
def test_read_data(self): invalid_url = "https://graph.facebook.com/v2.2/Offthegridsf/events?date_form=U&fields=id,cover,name,description,start_time,location,end_time,going&access_token=FAKE" data = read_data(invalid_url) self.assertEqual(data, "ERROR")
import pandas as pd import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split import utils from methods import methods from visualization import plots FILENAME = 'datasets/ILThermo_Tm.csv' MODEL = 'mlp_regressor' DIRNAME = 'my_test' descs = sys.argv[1:] if len(sys.argv) > 1 else None # Get X matrix and response vector y df, y_error = utils.read_data(FILENAME) X, y = utils.molecular_descriptors(df, descs) Y = np.empty((y.shape[0], 2)) Y[:, 0] = y.ravel() Y[:, 1] = y_error.ravel() X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.10) y_train = Y_train[:, 0] y_test = Y_test[:, 0] e_train = Y_train[:, 1] e_test = Y_test[:, 1] # Normalize testing data using training data X_train, X_mean, X_std = utils.normalization(X_train)
import numpy as np from sklearn.decomposition import KernelPCA from sklearn.cross_validation import StratifiedKFold from keras.models import Sequential from keras.layers import Dense, Dropout from keras.optimizers import RMSprop, Adam, Nadam # Just trying different optimizer from utils import (plot_feature_corr, plot_clusters, plot_pca, plot_feature_importance, pr_curve, print_classfication_report, read_data, write_data, score_model) from sklearn import metrics batch_size = 128 epochs = 50 X_train, y_train, X_test = read_data() # Feature Diagnostic plot_feature_corr(X_train) plot_feature_corr(np.vstack(X_test), stem='test') plot_pca(X_train, y_train) plot_clusters(X_train, y_train) indices_ci = plot_feature_importance(X_train, y_train) skf = StratifiedKFold(y_train, n_folds=4) train_index, dev_index = next(iter(skf)) X_dev = X_train[dev_index] y_dev = y_train[dev_index]
import numpy as np from pylab import * import argparse import os import json import utils if __name__ == '__main__': cost1 = [ ] for group in utils.listGroups(): for fn in utils.listGroupInstances(group): print(fn) topology = fn.split('.')[0] data = utils.read_data(group, 'identificationCost', topology) assert data != None, topology c = int(max(data['originalIGP'])) cost1.append(c) groups = [ str(i) for i in range(len(cost1)) ] cost1 = utils.data_to_bar(cost1) utils.make_g_barplot([cost1], groups, ['original IGP'], ['#3CAEA3'], 'maximum identification sr-cylce segment cost', 'percentage of topologies', '', '../data/plot/minSegCover_identification_orig.eps', 50) """ ax = plt.subplot() plot(x, y) plt.xlabel("Topology size |G|") plt.ylabel("Runtime in seconds") plt.savefig('../data/plot/minSegCover_runtime_by_size_complete.eps', format='eps', dpi=600, bbox_inches='tight') """
import numpy as np from pylab import * import argparse import os import json import utils if __name__ == '__main__': times = [] for group in utils.listGroups(): for fn in utils.listGroupInstances(group): print(fn) topology = fn.split('.')[0] data = utils.read_data(group, 'cycleCoverGreedyRounding', topology) assert data != None, topology t = int(data['runtime']) times.append(t / 1e9 / 60) print(len(times)) print(max(times)) x, y = utils.cdf(times, 0.1) plt.plot(x, y) plt.xlabel("Runtime of the cycle cover CG algorithm (in minutes)") plt.ylabel("Percentage of topologies") plt.savefig('../data/plot/minCycleCover_runtime_cdf.eps', format='eps', dpi=600, bbox_inches='tight')
for file in sim_list: img = cv2.imread('./sim/' + file) cvt = cv2.cvtColor(img, cv2.COLOR_BGR2YUV) resized = cv2.resize(cvt, (400, 200)) images.append(resized) # cv2.imshow("check", resized) # cv2.waitKey(0) labels = predict(path, np.array(images)) show(images, labels) def pre_trained(): model = fcn(learning=True) #Don't use this model model.load_weights('./fcn_weights_final.h5') return model if __name__ == "__main__": # train('last_try.h5', fcn_cv, 'full') data = read_data(13) imgs, _ = next(synt_generator(data, 'full')(15)) # # USe this model labels = predict('./last_try.h5', imgs, fcn_cv) show_color(imgs, labels) check('./fcn_weights_f2.h5')
''' if 1.0 - sum(data_wh[:-1]) < 0: print "less than 0-------------------------", 1-sum(data_wh[:-1]) data_wh[-1] = 1.0 - sum(data_wh[:-1]) #确保权重之和为1 ''' def get_pre_res(dataset, res_cls): ''' 根据分类结果以及训练集本身的标签,对正确以及错误分类进行统计 ''' pre_statis = [] for d, cls in zip(dataset, res_cls): if d[-1] == cls: pre_statis.append(1) else: pre_statis.append(0) return pre_statis if __name__ == '__main__': #datasets = read_data("german-assignment5.txt") datasets = read_data("breast-cancer-assignment5.txt") #datasets = read_data("test.txt") DiscType = get_disc_val(datasets) AttrSet = range(len(datasets[0])) #print ada_classify(datasets[1:255], datasets[255:]) #print ada_classify(datasets[1:10], datasets[10:]) print fcv(datasets, ada_classify)
def train(self, config): #os.environ["CUDA_VISIBLE_DEVICES"] = "0" # if config.is_train: # input_setup(self.sess, config) # else: # nx, ny,loc = input_setup(self.sess, config) if config.is_train: data_dir = os.path.join('./{}'.format(config.checkpoint_dir), "data_train_cmq.h5") train_data, train_label = read_data(data_dir, ["train_inpt", "train_labl"]) else: data_dir = os.path.join('./{}'.format(config.checkpoint_dir), "data_test_cmq.h5") train_data, train_label, test_mask = read_data( data_dir, ["test_train", "test_label", "test_mask"]) # Stochastic gradient descent with the standard backpropagation self.train_op = tf.train.GradientDescentOptimizer( config.learning_rate).minimize(self.loss) # tensorboard merged = tf.summary.merge_all() # 将图形、训练过程等数据合并在一起 writer = tf.summary.FileWriter(os.getcwd() + '/logs', self.sess.graph) # 将训练日志写入到logs文件夹下 tf.initialize_all_variables().run() counter = 0 start_time = time.time() if self.load(self.checkpoint_dir): print(" [*] Load SUCCESS") else: print(" [!] Load failed...") if config.is_train: print("Training...") #准备测试psnr的数据 # data_dir = os.path.join('./{}'.format(config.checkpoint_dir), "data_test.h5") # test_data, test_label = read_data(data_dir, ["test_train", "test_label"]) # with h5py.File(os.getcwd()+"/checkpoint/data_origin_246.h5", 'r') as hf: # origin_image_246 = np.array(hf.get("origin_data_246")) # # 找到无关数据 # zorelist = [] # for i in range(origin_image_246.shape[0]): # for j in range(origin_image_246.shape[1]): # if origin_image_246[i][j] == 0.0: # zorelist.append([i, j]) for ep in xrange(config.epoch): # Run by batch images batch_idxs = len(train_data) // config.batch_size for idx in xrange(0, batch_idxs): batch_images = train_data[idx * config.batch_size:(idx + 1) * config.batch_size] batch_labels = train_label[idx * config.batch_size:(idx + 1) * config.batch_size] counter += 1 _, err = self.sess.run([self.train_op, self.loss], feed_dict={ self.images: batch_images, self.labels: batch_labels }) if counter % 10 == 0: print("Epoch: [%2d], step: [%2d], time: [%4.4f], loss: [%.8f]" \ % ((ep + 1), counter, time.time() - start_time, err)) if counter % 500 == 0: self.save(config.checkpoint_dir, counter) #计算psnr # self.image_size=258 # self.label_size=246 # result= self.pred.eval({self.images:test_data, self.labels:test_label}) # self.image_size=33 # self.label_size=21 # result = result.reshape([246, 246]) # for i in range(result.shape[0]): # for j in range(result.shape[1]): # if [i, j] in zorelist: # result[i][j] = 0.0 # self.psnr = tf.assign(self.psnr, compare_psnr(origin_image_246, result)) # self.sess.run(self.psnr) summary, acc = self.sess.run([merged, self.loss], feed_dict={ self.images: batch_images, self.labels: batch_labels }) writer.add_summary(summary, counter) else: print("Testing...") #使用makeimagebychoose函数制作的数据,直接计算psnr # with h5py.File(os.getcwd()+"/checkpoint/data_test.h5", 'r') as hf: # test_train = np.array(hf.get("test_train")) # test_label = np.array(hf.get("test_label")) # test_psnr = np.array(hf.get("test_psnr")) # # result = self.pred.eval({self.images: test_train, self.labels: test_label}) # cont = 0 # for i in range(result.shape[0]): # label = test_label[i] # label = label.reshape([21,21]) # train = result[i] # train = train.reshape([21,21]) # srcnn_psnr=compare_psnr(label, train) # if(srcnn_psnr > test_psnr[i]): # cont=cont+1 # print("**********psnr srcnn:%.3f | bicubic:%.3f**********" % (srcnn_psnr,test_psnr[i])) # else: # print("psnr srcnn:%.3f | bicubic:%.3f" % (srcnn_psnr,test_psnr[i])) # print("超过数:%d/%d" % (cont, result.shape[0])) #结束 #使用原始模型的测试方式 makeimage33() # with h5py.File(os.getcwd()+"/checkpoint/data_test.h5", 'r') as hf: # test_train = np.array(hf.get("test_train")) # test_label = np.array(hf.get("test_label")) # mask = np.array(hf.get("mask")) # loc = np.array((hf.get("loc"))) # nx = np.array((hf.get("nx"))) # ny = np.array((hf.get("ny"))) # result = self.pred.eval({self.images: test_train, self.labels: test_label}) # result = merge_test(result, [nx, ny, loc]) # result = result.squeeze() # #result = result*mask # # 使用matplotlib # fig = plt.figure() # plt.suptitle("Test") # # # 原图 # h, w = loc[len(loc) - 1][0] + 21, loc[len(loc) - 1][1] + 21 # img = np.zeros((h, w, 1)) # for i, image in enumerate(train_label): # x, y = loc[i] # img[x:x + 21, y:y + 21] = image # img = img.squeeze() # # a = fig.add_subplot(1, 3, 2) # plt.imshow(result, cmap="gray") # a.set_title("srcnn:%.4f" % compare_psnr(img, result)) # print("SRCNN:%.4f ssim:%.4f" % (compare_psnr(img, result), compare_ssim(img, result))) # # a = fig.add_subplot(1, 3, 1) # plt.imshow(img, cmap="gray") # a.set_title("origin") # # imgorigin = img[2:, 2:] # # bicubic # img = scipy.ndimage.interpolation.zoom(img[2:, 2:], (1. / 3), prefilter=False) # img = scipy.ndimage.interpolation.zoom(img, (3 / 1.), prefilter=False) # # print("bicubic:%.4f ssim:%.4f" % (compare_psnr(imgorigin, img), compare_ssim(imgorigin, img))) # # a = fig.add_subplot(1, 3, 3) # plt.imshow(img, cmap="gray") # a.set_title("bicubic:%.4f" % compare_psnr(imgorigin, img)) # plt.show() # print("getResult...") #结束 #单个测试,去除背景输入网络 # with h5py.File(os.getcwd()+"/checkpoint/singleimage.h5", 'r') as hf: # trian_data = np.array(hf.get("trian_data")) # train_label = np.array(hf.get("train_label")) # bicubicpsinr = np.array((hf.get("bicubicpsinr"))) # result = self.pred.eval({self.images: trian_data, self.labels: train_label}) # result = result.reshape([result.shape[1], result.shape[2]]) # train_label = train_label.reshape([train_label.shape[1], train_label.shape[2]]) # print("srcnn%.4f bicubic%.4f" % (compare_psnr(train_label, result),bicubicpsinr)) # plt.figure() # plt.imshow(result, cmap="gray") # plt.show() # 单个测试结束 #针对makeimageCutByHand函数的测试代码 # with h5py.File(os.getcwd()+"/checkpoint/data_origin_246.h5", 'r') as hf: # origin_image_246 = np.array(hf.get("origin_data_246")) # with h5py.File(os.getcwd()+"/checkpoint/data_origin_258.h5", 'r') as hf: # origin_image_258 = np.array(hf.get("origin_data_258")) # count =0 # sum =0 # for i in range(train_data.shape[0]): # result = train_data[i] # result_label = train_label[i] # maskimage = test_mask[i] # result = result.reshape([1,result.shape[0], result.shape[1], 1]) # result_label = result_label.reshape([1,result_label.shape[0],result_label.shape[1], 1]) # result = self.pred.eval({self.images: result, self.labels: result_label}) # result = result.reshape([result.shape[1], result.shape[2]]) # result =result*maskimage # train_image = train_data[i].reshape([train_data[i].shape[0], train_data[i].shape[1]]) # psnrsrcnn = compare_psnr(origin_image_246[i],result) # psnrorigin = compare_psnr(origin_image_258[i],train_image) # # plt.figure() # plt.subplot(1, 3, 1) # plt.imshow(origin_image_258[i], cmap="gray") # plt.title("orgin:%d"% i) # # plt.subplot(1, 3, 2) # plt.imshow(result, cmap="gray") # plt.title("srcnn:%.4f" % psnrsrcnn) # # # plt.subplot(1, 3, 3) # plt.imshow(train_image, cmap="gray") # plt.title("bicubic:%.4f" % psnrorigin) # plt.show() # # if psnrsrcnn > psnrorigin: # print("**************第%d张 srcnn:%.4f|bicubic:%.4f**************" % (i, psnrsrcnn, psnrorigin)) # count= count+1 # else: # print("第%d张 srcnn:%.4f|bicubic:%.4f " % (i,psnrsrcnn,psnrorigin)) # sum=sum+1 # print("超过数:",count,"/",sum) #测试代码结束 #针对makeimageCut的测试代码 with h5py.File(os.getcwd() + "/checkpoint/data_origin_339.h5", 'r') as hf: origin_image_246 = np.array(hf.get("origin_data_339")) with h5py.File(os.getcwd() + "/checkpoint/data_origin_351.h5", 'r') as hf: origin_image_258 = np.array(hf.get("origin_data_352")) count = 0 sum = 0 bh_psnr = [] sh_psnr = [] bh_ssim = [] sh_ssim = [] for i in range(train_data.shape[0]): result = train_data[i] result_label = train_label[i] maskimage = test_mask[i] result = result.reshape([1, 351, 351, 1]) result_label = result_label.reshape([1, 339, 339, 1]) result = self.pred.eval({ self.images: result, self.labels: result_label }) result = result.reshape([339, 339]) # for j in range(maskimage.shape[0]): # for k in range(maskimage.shape[1]): # if maskimage[j][k] == 0: # result[j][k] = 0.0 result = result * maskimage train_image = train_data[i].reshape([351, 351]) psnrsrcnn = compare_psnr(origin_image_246[i], result) psnrorigin = compare_psnr(origin_image_258[i], train_image) ssimsrcnn = compare_ssim(origin_image_246[i], result) ssimorigin = compare_ssim(origin_image_258[i], train_image) sh_psnr.append(psnrsrcnn) sh_ssim.append(ssimsrcnn) bh_psnr.append(psnrorigin) bh_ssim.append(ssimorigin) # num = num + psnrsrcnn # plt.figure() # plt.subplot(1, 3, 1) # plt.imshow(origin_image_258[i], cmap="gray") # plt.title("orgin:%d" % i) # # plt.subplot(1, 3, 2) # plt.imshow(result, cmap="gray") # plt.title("srcnn:%.4f" % psnrsrcnn) # # plt.subplot(1, 3, 3) # plt.imshow(train_image, cmap="gray") # plt.title("bicubic:%.4f" % psnrorigin) # plt.show() if psnrsrcnn > psnrorigin: print( "**************第%d张 srcnn:%.4f|bicubic:%.4f**************" % (i, psnrsrcnn, psnrorigin)) count = count + 1 else: print("第%d张 srcnn:%.4f|bicubic:%.4f " % (i, psnrsrcnn, psnrorigin)) sum = sum + 1 print("超过数:", count, "/", sum, "平均-psnr:", np.mean(sh_psnr), "平均-ssim:", np.mean(sh_ssim), "标准差:", np.std(sh_psnr, ddof=1), np.std(sh_ssim, ddof=1)) print("bicubic平均-psnr:", np.mean(sh_psnr), "bicubic平均-ssim:", np.mean(sh_ssim), "标准差:", np.std(bh_psnr, ddof=1), np.std(bh_ssim, ddof=1))
def main(): """ ask for year of interest ask for region ask for income print top 10 life expectancies print bottom 10 life expectancies repeat returns None """ data = utils.filter_countries(utils.read_data( "worldbank_life_expectancy")) #read data and filter for countries #User Input year_of_interest = int(input( "Enter a year of interest (-1 to quit): ")) #ask for year of interest if year_of_interest == -1: return None region = input("Enter a region (all for all regions): ") #ask for region income = input("Enter an income category (all for all categories): " ) #ask for income category while True: #loop until user quits loop newdata = data if year_of_interest < 1960 or year_of_interest > 2015: print("valid years are 1960 to 2015") else: bool = True if region != "all": if utils.valid_region(data, region): newdata = utils.filter_region(data, region) else: bool = False print("not valid region") if income != "all": if utils.valid_income(data, income): newdata = utils.filter_income(data, income) else: bool = False print("not valid income") if bool: countries = sorted_ranking_data(newdata, year_of_interest) print("\nTop 10 life expectancies for %d:" % year_of_interest) for i in range(len(countries)): if i < 10: print("%d: %s %f" % ( (i + 1), countries[i].country, countries[i].value)) print("\nBottom 10 life expectancies for %d:" % year_of_interest) for i in range(len(countries)): if (len(countries) - i) < 10: print("%d: %s %f" % ( (i + 1), countries[i].country, countries[i].value)) #User Input year_of_interest = int(input("Enter a year of interest (-1 to quit): ") ) #ask for year of interest if year_of_interest == -1: return None region = input( "Enter a region (all for all regions): ") #ask for region income = input("Enter an income category (all for all categories): " ) #ask for income category
rf = RandomForestClassifier(max_features='auto', oob_score=False, class_weight='balanced', random_state=config.set_seed, n_jobs=-1, verbose=config.verbose) clf = GridSearchCV(estimator=rf, param_grid=params, scoring='accuracy', cv=cv, n_jobs=-1) clf.fit(X, y) print('#### Best Params') print(clf.best_params_) print('#### Best Score') print(clf.best_score_) return clf.grid_scores_ if __name__ == '__main__': # Train Data train_x = read_data(config.d_xtrain) train_y = read_data(config.d_ytrain) # Test Data test_x = read_data(config.d_xtest) report = grid_model(param_grid, train_x, train_y, cv=5) write_data(config.grid_report_rf, report)