def generate_balanced_dataset(): train_file = '../data_pickle/training.pkl' test_file = '../data_pickle/dev.pkl' train = load_data(train_file) test = load_data(test_file) real_articles = get_articles_from_label(train['data'], train['labels'], 1) fake_articles = get_articles_from_label(train['data'], train['labels'], 0) bal_real_articles = balance_dataset(real_articles) bal_fake_articles = balance_dataset(fake_articles) new_articles = bal_real_articles + bal_fake_articles new_labels = ([1] * len(bal_real_articles)) + ([0] * len(bal_fake_articles)) bal_data = {'data': new_articles, 'labels': new_labels}
def generate_balanced_dataset(): train_file = '../data_pickle/training.pkl' test_file = '../data_pickle/dev.pkl' train = load_data(train_file) test = load_data(test_file) real_articles = get_articles_from_label(train['data'], train['labels'], 1) fake_articles = get_articles_from_label(train['data'], train['labels'], 0) bal_real_articles = balance_dataset(real_articles) bal_fake_articles = balance_dataset(fake_articles) new_articles = bal_real_articles + bal_fake_articles new_labels = ([1] * len(bal_real_articles)) + ([0] * len(bal_fake_articles)) bal_data = { 'data':new_articles, 'labels':new_labels }
def make(): try: vars = [] parameters={} for var in variables.keys(): vars.append(var.get()) if len(vars) != len(set(vars)): msgbox.showerror('Error', 'choosing the same parameter twice is not allowed\n' 'plese delet one') return 1 for plot in plots: for axis in plots[plot]: if not axis.get() in vars+SUPPORTED_FIT_RESULTS.keys(): if axis.get() == '': msgbox.showerror('Error', 'enpthy field not allowed in plot') return 1 else: msgbox.showerror('Error', 'parameter '+axis.get()+' was not set') return 1 for variable in variables.keys(): parameters[variable.get()]=eval(variables[variable].get()) experiment1 = Experiment(**parameters) print 'initializing...' experiment1.initialize() print 'building...' tree_root = build_tree(experiment1.Tree) with open(tree_root+'\\parameters.json', 'w') as fo: json.dump(experiment1.parameters, fo) print 'build done!' count_limit = device.run(experiment1, tree_root, experiment1.parameters['freqency_list'], experiment1.parameters['powers_list']) print count_limit while True: if device.counter.value==count_limit: break print '\ndone!' data = experiment.load_data('./tree') plot_list = [] for plot in plots: if len(plots[plot]) == 3: plot_list.append(([plots[plot][0].get(),plots[plot][1].get(),plots[plot][2].get()],)) elif len(plots[plot]) == 2: plot_list.append(([plots[plot][0].get(), plots[plot][1].get()],)) # report_generator.make_report(data, plot_list) print 'a' except: msgbox.showerror('Error', 'plese check the following things:\n' '* all values are in brackets\n' '* all values are separated by commas\n' '* make shure that freqency_list and powers are set\n' 'example:\n' '\t[1.25, 25, 1.235e+3, 1.74e-6]')
# exp.convert_data(strain) strains = exp.strains strains = ['tdc-1', 'daf-7', 'tph-1'] strains = ['N2'] strain = 'N2' feat = 'roam' save_fig = exp.figname('2016_12_20') save_fig = None for strain in strains: ### Load worms data = exp.load_data(strain) nworms = data.nworms ### Stage time distribution ssort = np.argsort(data.total_time) fig = plt.figure(1) plt.clf() plt.subplot(3, 2, 1) plt.plot(np.array(data.total_time)[ssort]) #for s in range(data.nstages-1): # o = np.argsort(data.stage_durations[:,s]); # plt.plot(np.array(data.total_time)[o]); plt.title('%s %s - total time' % (strain, feat))
fig = plt.figure(412); plt.clf(); fplt.plot_pca(dt[:,:-1]) fig.savefig(os.path.join(fig_directory, 'stage_durations_pca.pdf')) #%% Get stage durations from Roaming Dwelling data set strain = 'N2'; rd_data = rexp.load_data(strain); rd_stage_ids = rd_data.stage_switch; rd_stage_dur = rd_data.stage_durations; #%% Get stage durations from automatic detection xy_stage_ids = np.load(os.path.join(exp.data_directory, 'transitions_times.npy')) xy_stage_dur = np.diff(xy_stage_ids, axis = 1) #%% Plot Stage Durations fig = plt.figure(1); plt.clf(); rate = 3.0 * 60 * 60;
#%% Load / Prepare data data = {} dat_bin = {} dat_mean = {} dat_var = {} dat = {} stage_bins = {} dat_bin_s = {} for strain in strains: print 'processing %s...' % strain data[strain] = exp.load_data(strain) dat[strain] = getattr(data[strain], feat) sbinsb = exp.stage_bins(data[strain], nbins=sbins) dat_bin[strain] = exp.bin_data(dat[strain], sbinsb) dat_mean[strain] = np.mean(dat_bin[strain], axis=0) dat_var[strain] = np.var(dat_bin[strain], axis=0) stage_bins[strain] = exp.stage_bins(data[strain], nbins=1) dat_bin_s[strain] = exp.bin_data(dat[strain], stage_bins[strain]) #%% Order by activity order = {} for strain in strains:
xy_to_rd = -np.ones(len(xy_name), dtype=int) for i in range(len(xy_name)): pos = np.nonzero(rd_name == xy_name[i])[0] if len(pos) > 0: xy_to_rd[i] = pos # correct for restarted exp 20/21 ### Get stage durations from Roaming Dwelling data set os.chdir(dir_roaming) import experiment as exprd strain = 'N2' rd_data = exprd.load_data(strain) assert rd_data.stage_durations.shape[0] == rd_name.shape[0] rd_stage_ids = rd_data.stage_switch rd_stage_dur = rd_data.stage_durations ### Get stage durations from automatic detection xy_stage_ids = np.load( os.path.join(exp.experiment_directory, 'transitions_times.npy')) xy_stage_dur = np.diff(xy_stage_ids, axis=1) ### Plot Stage Durations plt.figure(1) plt.clf()
reload(exp) print 'working at %s' % exp.base_directory #%% Load data sets and compare speed import scripts.preprocessing.filenames as f; strain = 'daf7' nworms, exp_names, dir_names = f.filenames(strain = strain); #%% straind = 'daf-7' strain = 'daf7'; rd_data = dexp.load_data(strain = straind); rd_speed = rd_data.speed; rd_speed_th = rd_speed.copy(); th = np.nanpercentile(rd_speed, 95); rd_speed_th[rd_speed_th > th] = th; fplt.plot_array(rd_speed_th) v = []; for wid in range(nworms): v.append(exp.load(strain = strain, dtype = 'speed', wid = wid, memmap = None)); ntimes = max([len(vv) for vv in v]) #v = [];
# -*- coding: utf-8 -*- """ Created on Thu Oct 13 01:22:12 2016 @author: ckirst """ import experiment as exp import plot as fplt data = exp.load_data('N2') data = exp.add_positions(data) wid = 0 fplt.plot_trace(data.positions[0], data.roam[0])
import numpy as np import pandas as pd from sklearn.linear_model import LogisticRegression from sklearn.metrics import f1_score from sklearn.cross_validation import train_test_split from sklearn.preprocessing import LabelEncoder from gensim.models.word2vec import Word2Vec from experiment import FeatureStacker, WordEmbeddings, Windower, load_data from experiment import include_features model = Word2Vec.load(sys.argv[1]) X, y = load_data(sys.argv[2]) X_train_idx, X_test_idx, y_train_idx, y_test_idx = train_test_split( range(len(X)), range(len(X)), test_size=0.2, random_state=2014 ) X_train_docs = [X[i] for i in X_train_idx] y_train_docs = [label for i in y_train_idx for label in y[i]] X_test_docs = [X[i] for i in X_test_idx] y_test_docs = [label for i in y_test_idx for label in y[i]] experiments = [("word",), ("word", "pos"), ("word", "pos", "root"), ("word", "pos", "root", "rel")] experiments = experiments + [experiment + ("embeddings",) for experiment in experiments] experiments += [("embeddings",)] scores = np.zeros((10, len(experiments)))
import seaborn as sb import numpy as np import pandas as pd from sklearn.linear_model import LogisticRegression from sklearn.metrics import f1_score from sklearn.cross_validation import train_test_split from sklearn.preprocessing import LabelEncoder from gensim.models.word2vec import Word2Vec from experiment import FeatureStacker, WordEmbeddings, Windower, load_data from experiment import include_features model = Word2Vec.load(sys.argv[1]) X, y = load_data(sys.argv[2]) X_train_idx, X_test_idx, y_train_idx, y_test_idx = train_test_split( range(len(X)), range(len(X)), test_size=0.2, random_state=2014) X_train_docs = [X[i] for i in X_train_idx] y_train_docs = [label for i in y_train_idx for label in y[i]] X_test_docs = [X[i] for i in X_test_idx] y_test_docs = [label for i in y_test_idx for label in y[i]] experiments = [('word', ), ('word', 'pos'), ('word', 'pos', 'root'), ('word', 'pos', 'root', 'rel')] experiments = experiments + [ experiment + ('embeddings', ) for experiment in experiments ] experiments += [('embeddings', )]
reload(exp) print 'working at %s' % exp.base_directory #%% Load data sets and compare speed import scripts.preprocessing.filenames as f strain = 'daf7' nworms, exp_names, dir_names = f.filenames(strain=strain) #%% straind = 'daf-7' strain = 'daf7' rd_data = dexp.load_data(strain=straind) rd_speed = rd_data.speed rd_speed_th = rd_speed.copy() th = np.nanpercentile(rd_speed, 95) rd_speed_th[rd_speed_th > th] = th fplt.plot_array(rd_speed_th) v = [] for wid in range(nworms): v.append(exp.load(strain=strain, dtype='speed', wid=wid, memmap=None)) ntimes = max([len(vv) for vv in v]) #v = []; #for wid in range(nworms): # print '%d / %d' % (wid, nworms);