def run_contrast_data(): '''Plots some csv data sample files.''' outpath = 'example/sample_output/run_contrast_data/' if os.path.isdir(outpath): shutil.rmtree(outpath) os.makedirs(outpath, exist_ok=True) # check data fpath1 = 'csv_data/Basement/2020-11-21/172102_000000.csv' fpath2 = 'csv_data/Basement/2020-11-24/090936_111111.csv' mat1 = data.read_csv(fpath1) mat2 = data.read_csv(fpath2) # compare '111111' and '000000' titles = ['111111', '000000'] ill.render(mat2, os.path.join(outpath, 'L_1.png')) ill.render(mat1, os.path.join(outpath, 'L_0.png')) ill.contrast([mat2, mat1], os.path.join(outpath, 'gray_0_1.png'), 'Grayscale before denoising', titles, cmap='gray') ill.contrast([mat2, mat1], os.path.join(outpath, 'rainbow_0_1.png'), 'Heatmap before denoising', titles, cmap='rainbow') # denoise by subtraction denoise = mat2 - mat1 titles = ['denoise', 'abs(denoise)'] ill.render(denoise, os.path.join(outpath, 'L_subtract.png')) ill.contrast([denoise, np.abs(denoise)], os.path.join(outpath, 'gray_subtract.png'), 'Normalization with Subtraction', titles, cmap='gray') ill.contrast([denoise, np.abs(denoise)], os.path.join(outpath, 'rainbow_subtract.png'), 'Normalization with Subtraction', titles, cmap='rainbow') # denoise by cropping image c_mat2 = mat2[27:, :] titles = ['before cropping', 'after cropping'] ill.render(c_mat2, os.path.join(outpath, 'crop_L_1.png')) ill.contrast([mat2, c_mat2], os.path.join(outpath, 'gray_crop.png'), 'Normalization after cropping', titles, cmap='gray') ill.contrast([mat2, c_mat2], os.path.join(outpath, 'rainbow_crop.png'), 'Normalization after cropping', titles, cmap='rainbow')
def train(train_csv): data = read_csv(train_csv) dict_data = preprocess(data) X, Y = convert_to_input(dict_data) model = model_generate() modela.fit(X, Y, epochs=10) return (modela)
def __init__(self, g, csv_path): SituationBase.__init__(self, g) self.FRAME_RATE = 22 self.log("Reading config %s" % csv_path) self.scenes = dict([(rec["Number"], rec) for rec in data.read_csv(csv_path, self.g.game_data)]) self.map_pane = self.add_pane("MINIMAP", MapPane(self)) self.set_current_scene("1")
def read_train_test_frames(): df = data.read_csv(index_col=8) # Dependent and independent features y_df = df[[data.DEPENDENT]] x_df = df[data.independents(df)] # Split into training and testing sets test_size = .20 # 10e6 observations on rank 3 justifies reduced test size split_kwargs = dict(test_size=test_size, random_state=_RANDOM_STATE) return train_test_split(x_df, y_df, **split_kwargs)
def plot_heatmaps(csv_files, outpaths, shading='gouraud', cmap='rainbow', roi_boxes=True): range_depth = 12.8 range_width = +sin(radians(60)) * range_depth x, y = data.get_transform_index() # matplotlib fig = plt.figure(figsize=(6,3)) ax = fig.add_subplot(111) fig.suptitle('Range Azimuth Heatmap (-60\N{DEGREE SIGN}, 60\N{DEGREE SIGN})') for i in range(len(csv_files)): if outpaths[i] == '': continue ax.set_xlabel('Azimuth [m]') ax.set_ylabel('Range [m]') ax.set_xlim([-range_width - 0.5, range_width + 0.5]) ax.set_ylim([0, range_depth + 0.5]) # plot ROI boxes if roi_boxes: slots = [53, 54, 55, 56, 57, 58] for j in range(len(slots)): id = slots[j] reg = data.data_index[str(id)] pts = reg.get_corners() pts.append(pts[0]) x_coord = [] y_coord = [] for p in pts: x_coord.append(p[0]) y_coord.append(p[1]) ax.plot(x_coord, y_coord, 'black') mat = data.read_csv(csv_files[i]) im = ax.pcolormesh(x, y, mat, cmap=cmap, shading=shading, vmin=0.0, vmax=500) cb = fig.colorbar(im) fig.tight_layout() fig.savefig(outpaths[i]) cb.remove() plt.cla() gc.collect() plt.clf() plt.close(fig) gc.collect()
def __init__(self, sit): utils.Pane.__init__(self, sit, 600, 30, 800, 230, (140, 180, 160)) self.background = data.load_image("MiniMap.png") class Location(object): def __init__(self, rec): self.name = rec["Location"] self.x = int(rec["x"]) self.y = int(rec["y"]) if not MapPane.locations: MapPane.locations = {} for rec in data.read_csv("map_locations.csv", self.g.game_data): if rec["Location"]: loc = Location(rec) MapPane.locations[loc.name] = loc if not self.g.movement_path: self.move_to_location("Apartment") else: self.render()
for ex in examples: # Ignore NA-scored instances if ex[1] == "NA": continue if len(batch) >= batch_size: yield pack_samples(batch) batch = [ex] else: batch.append(ex) # Final batch if len(batch) > 0: yield pack_samples(batch) if __name__ == "__main__": # Test KoBERT import data labeled = data.read_csv("data/sample.csv") b_gen = batch_samples(labeled, 8) model = BertSentimentPredictor() model.train() input_b = next(b_gen)[0] print(model(input_b)) print(model(input_b))
if len(batch) >= batch_size: yield pack_samples(batch) batch = [ex] else: batch.append(ex) # Final batch if len(batch) > 0: yield pack_samples(batch) if __name__ == "__main__": # Test CBiLSTM import data labeled = data.read_csv("data/sample.csv") unlabeled = data.read_csv("data/thaad_relevant.csv") # First get the char/jamo vocabulary c2i = defaultdict(lambda: len(c2i)) j2i = defaultdict(lambda: len(j2i)) c2i["[PAD]"] j2i["[PAD]"] for i, ex in enumerate(labeled): print(f"Reading char & jamo vocabulary from labeled texts: {i}", end="\r") for c in ex[0]: c2i[c] for j in jamo.j2hcj(jamo.h2j(ex[0])): j2i[j]
def test(test_csv): data = read_csv(test_csv) dict_data = preprocess_test(test_csv) X, Y = convert_to_input_test(test_csv) return (X, Y)
def validate_dataset(csv_path): data_v = read_csv(csv_path) dict_data = preprocess(data_v) X_valid, Y_valid = convert_to_input(dict_data) return (X_valid, Y_valid)
#!/usr/bin/python3 import numpy as np from operator import itemgetter from day import norm_days, norm_day, get_high, get_low, get_open, get_close from data import read_csv, read_tdx, match_all, read_dir from draw import draw_box, plot_row, plot_target import matplotlib.pyplot as plt from matplotlib.figure import figaspect if __name__ == "__main__": data, dates = read_csv('./csv1') exit() data, dates = read_tdx('./testdata/testdata_1.txt') factor = 1 # data, dates = read_tdx('./sample1.txt') # data, dates = read_tdx('./600030.txt') # factor = 100 print(len(data)) print(len(dates)) total_days = len(data) ldays = 5 target = data[total_days - ldays:,:] data, dates = read_dir('./stock/', -1, read_tdx) # cands = data[:total_days - ldays,:]
y = np.zeros(28) for key in lbl: y[int(key)] = 1 labels.append(y) return np.array(paths), np.array(labels) if __name__ == '__main__': patience = 5 earlyThreshold = 0.0005 num_fold = 5 mskf = MultilabelStratifiedKFold(n_splits=num_fold, random_state=0) fold = 0 X, y, _ = read_csv() for train_index, val_index in mskf.split(X, y): X_val, y_val = X[val_index], y[val_index] X_train, y_train = X[train_index], y[train_index] print("starting fold: {}".format(fold)) if not os.path.exists(config.submit): os.makedirs(config.submit) if not os.path.exists(config.weights + config.model_name + os.sep + str(fold)): os.makedirs(config.weights + config.model_name + os.sep + str(fold)) if not os.path.exists(config.best_models):
# coding: utf-8 import matplotlib.pyplot as plt #%matplotlib inline import numpy as np import data as pd import os from sklearn import datasets, linear_model print(os.getcwd()) data=pd.read_csv('ccpv.csv',encoding ="utf-8",header =0) print(data.head()) print(data.shape) print(data.info()) print(data.ix[:, [0,1,2,3]]) x = data.ix[:, [0,1,2,3]] x.head() y=data[["PE"]] print(y.head()) print(x.head()) from sklearn.cross_validation import train_test_split x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1) print(x_train.shape) print(x_test.shape) print(y_train.shape) print(y_test.shape) from sklearn.linear_model import LinearRegression
"""Exploratory data analysis""" import datetime as dt import time import matplotlib.pyplot as plt import pandas as pd from pandas.tools.plotting import andrews_curves, autocorrelation_plot, \ lag_plot, parallel_coordinates, scatter_matrix from sklearn.decomposition import PCA, KernelPCA import data if __name__ == '__main__': df = data.read_csv() # Description matrix print('Description:\n{}'.format(df.describe())) # Number of samples for visualization and other compute bound steps nsamples = 500 # Scatterplot # Take a random sample of data rather than visualize all data sample_df = df.sample(nsamples) scatter_kwds = dict(alpha=0.2, figsize=(15, 15)) diagonals = ['hist', 'kde'] for d in diagonals: plt.clf() # Clear any existing figure axes = scatter_matrix(sample_df, diagonal=d, **scatter_kwds) scatter_matrix_fp_fmt = 'output/scatter_matrix_{}.png'
return parser.parse_args() if __name__ == "__main__": args = parse_arguments() # Setting shortcuts CLF_OR_REG = args.exp_mode == "clf" # True is CLF, False is REG CUDA = args.cuda_device COTR_EPOCH = args.epoch BATCH_SIZE = args.batch_size N = BATCH_SIZE * 2 U_SUB_SIZE = BATCH_SIZE * 10 # Data generators labeled = data.read_csv(args.labeled_path) unlabeled = data.read_csv(args.unlabeled_path) # Char/jamo vocabularies if args.model_path is None: c2i = defaultdict(lambda: len(c2i)) j2i = defaultdict(lambda: len(j2i)) c2i["[PAD]"] j2i["[PAD]"] else: checkpoint = torch.load(args.model_path) c2i = checkpoint["c2i"] j2i = checkpoint["j2i"] # Labeled & unlabeled pool of data L = []
from data import read_csv, build_sentences_labels, handle_uncommon_words, handle_unknown_words, build_sentences, handle_unknown_sentences from probs import build_emission_map, build_transition_map if __name__ == '__main__': X_train = read_csv("./data/dev_x.csv") X_labels = read_csv("./data/dev_y.csv") sentences, labels = build_sentences_labels(X_train, X_labels, k=2) sentences = handle_uncommon_words(sentences) #transition_map = build_transition_map(labels) #emission_map = build_emission_map(sentences, labels) vocab = set() for sentence in sentences: for word in sentence: vocab.add(word) not_found = [] test_sentences = build_sentences(read_csv('./data/test_x.csv'), k=2) test_sentences = handle_unknown_sentences(test_sentences, vocab) for test_sentence in test_sentences: for test_word in test_sentence: if test_word not in vocab: not_found.append(test_word) with open('output.txt', 'w') as f: for word in not_found: f.write(f"{word}\n")
def test(model_paths: Sequence[Text], test_data_paths: Sequence[Text], pretrained_model_name: Text, label_col: Text, n_rows: int, batch_size: int, verbose: bool): width = max(len(p) for p in model_paths + test_data_paths) headers = ["precision", "recall", "f1-score", "support"] header_fmt = f'{{:<{width}s}} ' + ' {:>9}' * 4 row_fmt = f'{{:<{width}s}} ' + ' {:>9.3f}' * 3 + ' {:>9}' # load the tokenizer model tokenizer_for = transformers.AutoTokenizer.from_pretrained tokenizer = tokenizer_for(pretrained_model_name) # load the pre-trained transformer model model_for = transformers.TFAutoModel.from_pretrained transformer = model_for(pretrained_model_name) test_data_rows = {p: [] for p in test_data_paths} for model_path in model_paths: tf.keras.backend.clear_session() # load the fine-tuned transformer model model = models.from_transformer(transformer=transformer, n_outputs=1) model.load_weights(model_path).expect_partial() for data_path in test_data_paths: # tokenize the test data df = data.read_csv(data_path=data_path, label_col=label_col, n_rows=n_rows) x, y_ref = data.df_to_xy(df=df, tokenizer=tokenizer, label_col=label_col) # predict on the test data y_pred_scores = model.predict(x, batch_size=batch_size) y_pred = (y_pred_scores >= 0.5).astype(int).ravel() # evaluate predictions stats_arrays = sklearn.metrics.precision_recall_fscore_support( y_ref, y_pred, labels=[1]) stats = [a.item() for a in stats_arrays] row = [model_path] + stats test_data_rows[data_path].append(row_fmt.format(*row)) # if requested, print detailed results for this model if verbose: header = header_fmt.format(data_path, *headers) print("=" * len(header)) print(header) print(row_fmt.format(*row)) print("=" * len(header)) df.insert(1, "prediction", y_pred_scores) print(df) print() # print results for all models on all datasets for data_path, rows in test_data_rows.items(): print(header_fmt.format(data_path, *headers)) for row in rows: print(row) print()
def load_questions(self): records = data.read_csv("InterviewQuiz.csv", self.g.game_data) QuizSituationBase.questions = dict([(rec["Number"], rec) for rec in records]) QuizSituationBase.questions_by_q = dict([(rec["Question"], rec) for rec in records])