def main(argv=None): if not os.path.exists(FLAGS.train_dir): os.mkdir(FLAGS.train_dir) # load dataset train_data = util.read_data(FLAGS.data_dir, 'train', FLAGS.sent_len, negative=FLAGS.negative, hierarchical=FLAGS.hierarchical, shuffle=False) test_data = util.read_data(FLAGS.data_dir, 'dev', FLAGS.sent_len, negative=FLAGS.negative, hierarchical=FLAGS.hierarchical) class_names = None relations = None if FLAGS.negative: class_names = util.load_from_dump( os.path.join(FLAGS.data_dir, 'classes.cPickle')) relations = util.load_from_dump( os.path.join(FLAGS.data_dir, 'relations.cPickle')) train(train_data, test_data, class_names=class_names, relations=relations)
def generate_combined(src='all', num=None, method='avg', img_scale=False): """ Generates a combined file from aps files in the src_dir for help in identifying zones. This file is serialized as an npy file named 'combinedNUM.npy' where 'num' is the number of files used in the combination. If 'num' is not given, all files are used. """ files = shuffled_files(src) if num is None: num = len(files) sample = np.asarray(util.read_data(files[0])) combined = np.zeros(sample.shape) for file in files[0:num]: file_data = np.asarray(util.read_data(file)) if img_scale: file_data = scipy.misc.bytescale(file_data) if method == 'avg': combined = combined + file_data else: combined = np.maximum(combined, file_data) if method == 'avg': combined = combined / num # np.sum(combined, axis=(1,2)) np.save(f"combined-{src}-{num}", combined)
def run(): db = initdb() o = read_data("中国城市坐标D.xls") d = read_data("中国城市坐标D.xls") logger.info("成功加载文件") bmap = BaiduMap(AK, SK) # 加载数据 if recov.hasfile: datas = recov.recover() for data in datas: bmap.load_data(**data) else: for i in o.index: for j in d.index: if i >= j: continue data = { "origin": "%s,%s" % (o.iloc[i].y, o.iloc[i].x), "destination": "%s,%s" % (d.iloc[j].y, d.iloc[j].x) } bmap.load_data(**data) logger.info("完成数据加载") # 开始写入 for result in bmap.get_train_data(): try: db.insert_transit(result) except Exception: logger.error("写入失败")
def get_data(): train_data, _ = util.read_data(error=0, is_train=True) test_data = np.vstack( list(map(lambda x: util.read_data(x, False)[0], range(22)))) scaler = preprocessing.StandardScaler().fit(train_data) train_data = scaler.transform(train_data) test_data = scaler.transform(test_data) return train_data, test_data
def get_test_data(): test_data = [] for i in range(22): data, _ = util.read_data(error=i, is_train=False) test_data.append(data) test_data = np.concatenate(test_data) train_data, _ = util.read_data(error=0, is_train=True) scaler = preprocessing.StandardScaler().fit(train_data) test_data = scaler.transform(test_data) return test_data
def get_data(n_samples): data, _ = util.read_data(error=0, is_train=True) train_data = _hstack(data, n_samples) test_data = [] for i in range(22): data, _ = util.read_data(error=i, is_train=False) test_data.append(_hstack(data, n_samples)) test_data = np.vstack(test_data) scaler = preprocessing.StandardScaler().fit(train_data) train_data = scaler.transform(train_data) test_data = scaler.transform(test_data) return train_data, test_data
def main(fname, threshold, epsilon): X, Y = read_data(fname) print X.shape, Y.shape print "X shape: %s" % str(X.shape) startTime = time.time() X_index_retained, W, X_index, obj_values = sfs_l21_norm( X, Y, threshold, epsilon) endTime = time.time() print "runtime: %.4f seconds" % (endTime - startTime) print "**" * 30 print "data set name: %s" % fname print "selected features index: " print "sfs-l21-norm " print "threshold = %s, epsilon = %s: " % (str(threshold), str(epsilon)), print "%s\n" % str(list(X_index_retained)) print "selected features weight: \n%s\n" % str(W) print "features index that through the gradient validation: \n%s\n" % str( X_index) print "**" * 30 print "DONE" obj_values_df = pd.DataFrame(obj_values, index=X_index, columns=["obj_values"]) columns = ["weight_%d" % i for i in range(Y.shape[1])] weights_df = pd.DataFrame(W, index=np.hstack((-1, X_index_retained)), columns=columns) df = pd.concat((obj_values_df, weights_df)) df.plot(subplots=True) plt.show()
def main(argv=None): restore_param = util.load_from_dump( os.path.join(FLAGS.train_dir, 'flags.cPickle')) restore_param['train_dir'] = FLAGS.train_dir if restore_param.has_key('contextwise') and restore_param['contextwise']: source_path = os.path.join(restore_param['data_dir'], "ids") target_path = os.path.join(restore_param['data_dir'], "target.txt") _, data = util.read_data_contextwise( source_path, target_path, restore_param['sent_len'], train_size=restore_param['train_size']) else: source_path = os.path.join(restore_param['data_dir'], "ids.txt") target_path = os.path.join(restore_param['data_dir'], "target.txt") _, data = util.read_data(source_path, target_path, restore_param['sent_len'], train_size=restore_param['train_size']) pre, rec = evaluate(data, restore_param) util.dump_to_file(os.path.join(FLAGS.train_dir, 'results.cPickle'), { 'precision': pre, 'recall': rec })
def get_middle_profile(directory): # Change directories cwd = os.getcwd() os.chdir(directory) ### Data ### intensity_polar = util.read_data(frame, 'polar_intensity', fargo_par, id_number=id_number, directory=".") if normalize: intensity_polar /= np.max(intensity_polar) azimuthal_radii, azimuthal_profiles = az.get_profiles(intensity_polar, fargo_par, args, shift=None) # Middle Profile middle_i = (num_profiles - 1) / 2 middle_profile = azimuthal_profiles[middle_i] # Return to previous directory os.chdir(cwd) return middle_profile
def measure_peak_offset(args): # Unpack i, frame = args intensity_polar = util.read_data(frame, 'polar_intensity', fargo_par, id_number=id_number) # Shift and get peak shift_c = az.shift_away_from_minimum(intensity_polar, fargo_par) intensity_polar = np.roll(intensity_polar, shift_c, axis=-1) peak_r_i, peak_phi_i = az.get_peak(intensity_polar, fargo_par) # Return peak relative to the center, where the edges are set by a threshold intensity_polar /= np.max(intensity_polar) # normalize intensity_polar_at_peak = intensity_polar[ peak_r_i, :] # take azimuthal profile left_index = az.my_searchsorted(intensity_polar_at_peak, threshold) right_index = len(intensity_polar_at_peak) - az.my_searchsorted( intensity_polar_at_peak[::-1], threshold) - 1 # Convert to theta left_theta = theta[left_index] * (180.0 / np.pi) right_theta = theta[right_index] * (180.0 / np.pi) center_theta = left_theta + (right_theta - left_theta) / 2.0 peak_theta = theta[peak_phi_i] * (180.0 / np.pi) peak_offset = peak_theta - center_theta print i, frame, peak_offset, center_theta, peak_theta # Store in mp_array peak_offsets[i] = peak_offset
def run(fname, epsilon, threshold, label_pos): from util import read_data X, Y = read_data(fname, label_pos) print X.shape, Y.shape print "X shape: %s" % str(X.shape) X_index_retained, W, X_index, obj_values = sfs_l21_norm( X, Y, threshold, epsilon) print "**" * 30 print "data set name: %s" % fname print "selected features index: " print "grafting l21-norm " print "threshold = %s, epsilon = %s: " % (str(threshold), str(epsilon)), print "%s\n" % str(list(X_index_retained)) print "selected features weight: \n%s\n" % str(W) print "features index that through the gradient validation: \n%s\n" % str( X_index) print "**" * 30 print "DONE" obj_values_df = pd.DataFrame(obj_values, index=X_index, columns=["obj_values"]) columns = ["weight_%d" % i for i in range(Y.shape[1])] weights_df = pd.DataFrame(W, index=np.hstack((-1, X_index_retained)), columns=columns) df = pd.concat((obj_values_df, weights_df)) df.plot(subplots=True) plt.show()
def word_pair_train(self, **kargs): print('word pair train, %d' % config.USE_WORD_PAIR_NO) self.dict_word_pairs = util.load_dict_word_pairs(config.WORD_PAIRS, config.USE_WORD_PAIR_NO) """ :type feature: list(tuple(dict, str/int)) ^ ^ | | feature sense """ self.trainData = util.read_data(self.file_name) start = time.time() feature = [] for relation in self.trainData: feat = {} feat.update(feature_functions.word_pairs(relation, self.dict_word_pairs)) for sense in relation['Sense']: num_sense = util.map_sense_to_number(sense) if num_sense != -1: feature.append( (feat, num_sense) ) end=time.time() print('extract word pair costs: %f seconds' %(end-start)) start = time.time() model = nltk.MaxentClassifier.train(feature, **kargs) end = time.time() print('train word pair model costs: %f seconds' %(end-start)) util.store_model(model, 'model/word_pair.model') return model """
def data(self): # Explicitly load the run data if not hasattr(self, '_data'): self._data = read_data(self.data_file)[self.index] else: pass return self._data
def read_data(frame): """ Step 0: read intensity data """ intensity = util.read_data(frame, 'intensity', fargo_par, id_number=id_number) return intensity
def split_all_data(): # READ DATA dataset_dir = os.path.join('..', 'data', DATASET) feats_file = os.path.join(dataset_dir, 'feats.17') labels_file = os.path.join(dataset_dir, 'time') data = util.read_data(feats_file, labels_file) # SHUFFLE DATA np.random.seed(1000) np.random.shuffle(data) # BUILD FOLDER STRUCTURE dataset_dir = os.path.join(SPLIT_DIR, DATASET) try: os.makedirs(dataset_dir) except OSError: print "skipping folder creation" # SPLIT TRAIN/TEST AND SAVE fold_indices = cross_validation.KFold(data.shape[0], n_folds=10) for fold, index in enumerate(fold_indices): print index[0].shape train_data = data[index[0]] test_data = data[index[1]] train_data, scaler = util.normalize_train_data(train_data) test_data = util.normalize_test_data(test_data, scaler) fold_dir = os.path.join(dataset_dir, str(fold)) try: os.makedirs(fold_dir) except OSError: print "skipping fold dir" np.savetxt(os.path.join(fold_dir, 'train'), train_data, fmt="%.5f") np.savetxt(os.path.join(fold_dir, 'test'), test_data, fmt="%.5f")
def points_file(src='all', padding=True): """ Creates points file for the given 'src' files ('train', 'valid', 'test', etc) """ files = shuffled_files(src) file = os.path.join(config.PSCREENING_HOME, 'points-' + src + '.csv') with open(file, 'w', newline='') as csvfile: writer = csv.writer(csvfile, delimiter=',') f_count = 0 for f in files: f_count += 1 print(f"Reading file {f}...") file_images = util.read_data(f, as_images=True) w, h = file_images[0].size print(f"Creating zones...") zone_rects = z.create_zones16(file_images) if padding: zones_config.apply_padding(zone_rects, max_width=w, max_height=h) print(f"Write record...") for i in range(16): row = [[f], [i] ] + [list(zone_rects[i][j]) for j in ZONE_EXTRACTIONS] row = [val for sublist in row for val in sublist] writer.writerow(row) print(f"Record #{f_count} completed")
def main(argv=None): FLAGS = tf.app.flags.FLAGS if not os.path.exists(FLAGS.train_dir): os.mkdir(FLAGS.train_dir) # load dataset # source_path = os.path.join(FLAGS.data_dir, 'ids.txt') # target_path = os.path.join(FLAGS.data_dir, 'target.txt') source_path = os.path.join(FLAGS.data_dir, 'test_cs_unlabeled_data_combined.txt') target_path = os.path.join(FLAGS.data_dir, 'test_cs_labels_combined.txt') attention_path = None if FLAGS.attention: if os.path.exists(os.path.join(FLAGS.data_dir, 'source.att')): attention_path = os.path.join(FLAGS.data_dir, 'source.att') else: raise ValueError("Attention file %s not found.", os.path.join(FLAGS.data_dir, 'source.att')) train_data, test_data = util.read_data( source_path, target_path, FLAGS.sent_len, attention_path=attention_path, train_size=FLAGS.train_size, hide_key_phrases=FLAGS.hide_key_phrases) train(train_data, test_data)
def main(argv=None): if not os.path.exists(FLAGS.train_dir): os.mkdir(FLAGS.train_dir) # load dataset train_data = util.read_data(FLAGS.data_dir, 'train', FLAGS.sent_len, negative=FLAGS.negative, hierarchical=FLAGS.hierarchical) test_data = util.read_data(FLAGS.data_dir, 'dev', FLAGS.sent_len, negative=FLAGS.negative, hierarchical=FLAGS.hierarchical) train(train_data, test_data)
def main(): filename = "data/data.csv" column_type = {'Review Text':str, 'Sentiment':str, 'Fit':str, 'Fabric':str, 'Color':str, 'Style':str, 'Cost':str} x_label = 'Review Text' y_label_sentiment = 'Sentiment' sentiment_labels = ['-1', '0', '1'] data = read_data(filename, column_type, x_label=x_label, y_label=y_label_sentiment, clean_data=True) x = data[x_label] y = data.drop([x_label], axis=1) ## split data : train and test X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=50) # naive bayes w_prob, c_prob = naive_bayes_learner(X_train, y_train[y_label_sentiment], sentiment_labels, verbose=False) expected_class_list = np.zeros(len(y_test)) predicted_class_list = np.zeros((len(y_test))) for index, (data, expected_class) in enumerate(zip(X_test, y_test[y_label_sentiment])): predicted_class, log_prob = naive_bayes_classifier(w_prob, c_prob, sentiment_labels, data) expected_class_list[index] = expected_class predicted_class_list[index] = predicted_class # print predicted_class, expected_class print "Total: ", len(expected_class_list) print "True positives: ", np.sum(expected_class_list == predicted_class_list) print "Accuracy: ", np.mean(expected_class_list == predicted_class_list)
def jsonurl(): object_path = os.path.join(DATA_PATH, 'objects.json') data, errors = read_data(object_path) result = {} result['data'] = data result['errors'] = errors return json.dumps(result)
def main(): conf = Config().parse(create_dir_flag=False) conf.name = 'TEST_' + conf.name conf.output_dir_path = util.prepare_result_dir(conf) gan = InGAN(conf) try: gan.resume(conf.test_params_path, test_flag=True) [input_tensor] = util.read_data(conf) if conf.test_video: retarget_video(gan, input_tensor, define_video_scales(conf.test_vid_scales), 8, conf.output_dir_path) if conf.test_collage: generate_collage_and_outputs(conf, gan, input_tensor) if conf.test_non_rect: test_homo(conf, gan, input_tensor) print 'Done with %s' % conf.input_image_path except KeyboardInterrupt: raise except Exception as e: # print 'Something went wrong with %s (%d/%d), iter %dk' % (input_image_path, i, n_files, snapshot_iter) print_exc()
def __init__(self, model_id, max_deg=50, bounds=None, D=None, vvv=0): """ Constructor for a LogitModel object. The data can be provided directly, or it can be read in from file. Keyword arguments: model_id -- model_id can be either the file name from where the choices are read, or an idenfier for the current model max_deg -- the max degree that will be considered (default: 50) D -- 2d-array representing choice options vvv -- int representing level of debug output [0:none, 1:some, 2:lots] If data is supplied directly, D is a (n*i)x4 matrix, where each choice set has i choices, exactly one of which should be chosen. For every example we get the following covariates: [choice_id, Y, degree, n_fofs] """ self.id = model_id self.vvv = vvv if D is not None: self.D = D elif '.' in model_id: self.D = util.read_data(model_id, max_deg, vvv=vvv) else: self.exception("neither filename nor D are specified..") self.n = len(set(self.D.choice_id)) # number of examples self.d = max_deg + 1 # number of degrees considered # initiate the rest of the parameters self.n_it = 0 # number of iterations for optimization self.u = [1] # current parameter value self.se = [None] # current SE value self.bounds = bounds # whether there are bounds for the parameters
def get_extent(args): # Extract args i, frame = args # Get data and measure extent intensity = util.read_data(frame, 'polar_intensity', fargo_par, id_number=id_number) extent, azimuthal_profile = az.get_extent(intensity, fargo_par, normalize=True, threshold=threshold, sliver_width=sliver_width) # Count peaks peaks, _ = find_peaks(azimuthal_profile, height=threshold) peak_count = len(peaks) # Convert to degrees extent *= (180.0 / np.pi) print i, frame, extent, peak_count # Store extents[i] = extent peak_counts[i] = peak_count
def full_procedure(frame, show=False): """ Every Step """ # Read Data density = util.read_data(frame, 'input_density', fargo_par, id_number=id_number).T # Note: Transpose!!!! # Choose shift option if center: #### Note: Re-work this section! The input density is already centered, but you still need the shift for the plot # Center vortex if fargo_par["MassTaper"] < 10.1: shift_c = az.get_azimuthal_peak(density, fargo_par) else: gas_fargo_par = util.get_pickled_parameters( directory="../cm-size") ## shorten name? ######## Need to extract parameters, and add 'rad' and 'theta' ######## gas_rad = np.linspace(gas_fargo_par['Rmin'], gas_fargo_par['Rmax'], gas_fargo_par['Nrad']) gas_theta = np.linspace(0, 2 * np.pi, gas_fargo_par['Nsec']) gas_fargo_par['rad'] = gas_rad gas_fargo_par['theta'] = gas_theta gas_surface_density_zero = gas_fargo_par['Sigma0'] dust_surface_density_zero = gas_surface_density_zero / 100.0 dust_density = util.read_data(frame, 'dust', gas_fargo_par, id_number=id_number, directory="../cm-size") # Shift input density with center of dust density shift_c = az.get_azimuthal_center(dust_density, gas_fargo_par, threshold=10.0 * dust_surface_density_zero) else: shift_c = None # Get and plot profiles azimuthal_radii, azimuthal_profiles = az.get_profiles(density, fargo_par, args, shift=None) make_plot(frame, shift_c, azimuthal_radii, azimuthal_profiles, show=show)
def train(self, config): if config.is_train: input_setup(self.sess, config) else: nx, ny = input_setup(self.sess, config) # 合并图像块数 if config.is_train: data_path = os.path.join("./", config.checkpoint_dir, "train.h5") else: data_path = os.path.join("./", config.checkpoint_dir, "test.h5") train_data, train_label = read_data(data_path) self.train_op = tf.train.GradientDescentOptimizer( config.learning_rate).minimize(self.loss) tf.global_variables_initializer().run() counter = 0 # 输出判断数 start_time = time.time() # 加载训练数据 if self.load(config.checkpoint_dir): print("[*] Load SUCCESS") else: print("[!] Load Failed") if config.is_train: print("Train....") batch_index = len(train_data) // config.batch_size for ep in range(config.epoch): for idx in range(batch_index): batch_images = train_data[idx * config.batch_size:(idx + 1) * config.batch_size] batch_labels = train_label[idx * config.batch_size:(idx + 1) * config.batch_size] _, err = self.sess.run([self.train_op, self.loss], { self.images: batch_images, self.labels: batch_labels }) counter += 1 if counter % 10 == 0: print( "Epoch: %2d,step: %2d,time: %4.4f,loss: %.8f" % ((ep + 1), counter, time.time() - start_time, err)) if counter % 500 == 0: self.save(config.checkpoint_dir, counter) else: print("Test...") result = self.pred.eval({ self.images: train_data, self.labels: train_label }) result = merge(result, [nx, ny]) result = result.squeeze() # squeese():把 result 的 ? 维度删除 image_path = os.path.join(os.getcwd(), config.sample_dir, "text_image.png") imsave(image_path, result)
def main(file_name): fl = open(file_name, 'r') #measurements = read_separated_data(fl.readlines()) measurements = read_data(fl.readlines()) fl.close() for accelerometer in measurements: compute_movement(map(lambda a: map(float, a), accelerometer))
def run(fname, fname_test=None, threshold=0.2): X, Y= read_data(fname) print "run grafting algorithm for data set: %s"%fname indexes = gft.grafting(np.matrix(X), np.matrix(Y).T, threshold)[0] print "selected features index:" print indexes print "finish grafting algorithm" print "**"*40 print "run classification:" args = {"origin data":np.arange(X.shape[1])} args["grafting"] = map(lambda x:int(x), indexes) if fname_test==None: clf.classifying(X, Y, args=args) else: X_test, Y_test = read_data(fname_test) clf.classifying(X, Y, X_test, Y_test, args=args) print "finish classifying"
def __copy_files(files, src_dir, dest_dir, ext='a3daps', to_npy=False): for f in files: full_file_path = os.path.join(src_dir, f + '.' + ext) if to_npy: file_data = np.asarray(util.read_data(full_file_path)) np.save(os.path.join(dest_dir, f + '.npy'), file_data) else: shutil.copy2(full_file_path, dest_dir)
def main(file_name): fl = open(file_name, 'r') #measurements = read_separated_data(fl.readlines()) measurements = read_data(fl.readlines()) fl.close() for accelerometer in measurements: compute_movement(map(lambda a:map(float, a), accelerometer))
def sfs(fname, threshold, epsilon): # print "dataset: %s"%fname X, Y = read_data(fname) startTime = time.time() sfs_l21_norm_streaming(X, Y, threshold, epsilon) endTime = time.time() print "%.4f" % (endTime - startTime)
def update(self): df_mod = read_data(self.filename) if (len(df_mod) % int(self.config['FORECAST_PARAMS']['STEP_SIZE'])) == 0: self.df = df_mod[self.df.columns.values] self.df_sized = self.df.iloc[::int(self.config['FORECAST_PARAMS'] ['STEP_SIZE']), :] return True return False
def opt_threshold(fname, epsilon): # print "datasets: %s"%fname X, Y = read_data(fname) thresholds = np.arange(0.1, 0.51, 0.05) for threshold in thresholds: X_index_retained = sfs_l21_norm(X, Y, threshold, epsilon)[0] print "grafting l21-norm ", print "threshold = %s, epsilon = %s: "%(str(threshold), str(epsilon)), print "%s"%str(list(X_index_retained))
def main(): seed = 7 np.random.seed(seed) all_essays, all_avg_scores = read_data() if 'reload_data' in sys.argv or not os.path.isfile(os.path.join(data_home, 'glove_vectors.pickle')): print('Reloading data...') print('Loaded all data...') all_glove_data = create_glove_representations(all_essays) with open(os.path.join(data_home, 'glove_vectors.pickle'), 'wb') as handle: pickle.dump(all_glove_data, handle, protocol=pickle.HIGHEST_PROTOCOL) print('Wrote data to pickle file.') else: with open(os.path.join(data_home, 'glove_vectors.pickle'), 'rb') as handle: all_glove_data = pickle.load(handle) print('Loaded vectors from Pickle file... ') for i in range(1, len(all_glove_data)+1): glove_data = all_glove_data[i] avg_scores = [round (x, 0) for x in all_avg_scores[i]] # Limit data just for testing #X_train, X_test, y_train, y_test = train_test_split(keras_data, keras_labels, train_size=0.9) # Split data into test and train print('Creating Estimator...') Y, keras_length = encode_data_for_keras(avg_scores) X_train, X_test, y_train, y_test = train_test_split(glove_data, Y, train_size=0.9) # c_y_train, keras_length = encode_data_for_keras(y_train) # c_y_test, test_len = encode_data_for_keras(y_test) param_matrix = {'activation': ['relu'], } rnn = MLP(keras_length) rnn.fit(X_train, y_train, epochs=50, batch_size=20) scores = rnn.evaluate(X_test, y_test, verbose=0) for i in range(len(rnn.metrics_names)): print("%s: %.2f%%" % (rnn.metrics_names[i], scores[i]*100)) # estimator = KerasClassifier(build_fn=rnn_model, epochs=200, batch_size=20) # estimator.fit() # Compile model # print('Compiling Model...') # kfold = KFold(n_splits=1, shuffle=True, random_state=seed) # results = cross_val_score(estimator, glove_data, keras_labels, cv=kfold) # print("Baseline: %.2f%% (%.2f%%)" % (results.mean(), results.std())) # # with open(os.path.join(data_home, 'results.pickle'), 'wb') as handle: # pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL) break
def main(test_pattern, model_dir, alphabet, print_tags, print_score): alphabet = list(alphabet) # Open feature and transition params theta = read_model(model_dir) # Read test data data = read_data(test_pattern) test(theta, data, alphabet, print_tags, print_score)
def parse_unlocking_script(script): # Returns type, signatures, public keys and address of the input if len(script) in [71, 72, 73]: # Pay-to-Public-Key: the unlocking script is the signature sig, script = read_data(script) assert script == bytes() return "p2pk", [sig], None, None, None elif len(script) in [105, 106, 107, 137, 138, 139]: #P2PKH # Pay-to-Public-Key-Hash: signature and public key sig, script = read_data(script) pubkey, script = read_data(script) assert script == bytes() return "p2pkh", [sig], [PublicKey.from_ser(pubkey) ], Address.from_pubkey(pubkey), None elif script[0] == OP_0: # P2SH multisig zero, script = read_bytes(script, 1, int, 'little') data = [] while script != bytes(): d, script = read_data(script) data.append(d) signatures, redeemScript = data[:-1], data[-1] # Address address = Address.from_script(redeemScript) rs = redeemScript # Parsing of redeem script m, rs = read_bytes(rs, 1, int, 'little') assert len(signatures) == (m - OP_1 + 1), "m = {:d}, len sigs = {:d}".format( m, len(signatures)) pubkeys = [] while 0 < rs[0] <= OP_PUSHDATA4: pubkey, rs = read_data(rs) pubkeys.append(PublicKey.from_ser(pubkey)) n, rs = read_bytes(rs, 1, int, 'little') assert len(pubkeys) == (n - OP_1 + 1) assert rs[0] == OP_CHECKMULTISIG return "p2sh", signatures, pubkeys, address, redeemScript else: raise ScriptError("cannot parse unlocking script")
def load_features_all(fname, laymap): dataset_raw = util.read_data(fname, 3) dataset_new = np.array([]) for rix, row in enumerate(dataset_raw): if not rix % 50: print rix new_row = np.append(transform_all(laymap, row[0]), float(row[1]) / 5 - 1) dataset_new = np.append(dataset_new, new_row) return dataset_new.reshape(len(dataset_raw), 25)
def production_rule_train(self, **kargs): print('production rule train, %d' % config.USE_PRODUCTION_RULE_NO) arg1_production_rule_dict = util.load_production_rule_dict(config.ARG1_PRODUCTOIN_RULE, config.USE_PRODUCTION_RULE_NO) arg2_production_rule_dict = util.load_production_rule_dict(config.ARG2_PRODUCTOIN_RULE, config.USE_PRODUCTION_RULE_NO) both_production_rule_dict = util.load_production_rule_dict(config.BOTH_PRODUCTOIN_RULE, config.USE_PRODUCTION_RULE_NO) with codecs.open(config.ARG1_PARSETREE) as file: arg1_parsetree = file.read().split('\n') #for line in file: # arg1_parsetree.append(line) with codecs.open(config.ARG2_PARSETREE) as file: arg2_parsetree = file.read().split('\n') feature = [] self.trainData = util.read_data(self.file_name) for index, relation in enumerate(self.trainData): feat = {} feat.update( \ feature_functions._train_production_rules(index, \ [ arg1_production_rule_dict, arg2_production_rule_dict, both_production_rule_dict ], \ [arg1_parsetree, arg2_parsetree]) ) for sense in relation['Sense']: num_sense = util.map_sense_to_number(sense) if num_sense != -1: feature.append( (feat, num_sense) ) start = time.time() try: model = nltk.MaxentClassifier.train(feature, **kargs) except: util.store_model(model, 'model/production_rule.model') end = time.time() print('train production rules model costs: %f seconds' %(end-start)) util.store_model(model, 'model/production_rule.model') return model
from datetime import date from datetime import timedelta from datetime import datetime from sklearn import linear_model from datetime import timedelta from sklearn.cross_validation import KFold non_price_inputs = ['avg-confirmation-time.txt', 'estimated-transaction-volume.txt', 'my-wallet-transaction-volume.txt', 'total-bitcoins.txt', 'bitcoin-days-destroyed-cumulative.txt','hash-rate.txt', 'n-orphaned-blocks.txt','trade-volume.txt', 'bitcoin-days-destroyed.txt','market-cap.txt', 'n-transactions-excluding-popular.txt','transaction-fees.txt', 'blocks-size.txt','n-transactions-per-block.txt', 'tx-trade-ratio.txt', 'cost-per-transaction.txt','miners-revenue.txt', 'n-transactions.txt', 'difficulty.txt','my-wallet-n-tx.txt', 'n-unique-addresses.txt', 'estimated-transaction-volume-usd.txt', 'my-wallet-n-users.txt', 'output-volume.txt'] data = {} for f in non_price_inputs: data[f] = util.read_data('data/' + f) data['market-price.txt'] = util.read_data('data/market-price.txt') day_price = util.day_to_price() # find all the features and prices for 100-600 days before today all_features = [] Y = [] for test_day in range(100,600): # the last 600-100 days from today end_day = date.today() - timedelta(days = test_day + 1) start_day = date.today() - timedelta(days = test_day + 101) # look at the last 100 days this_day = date.today() - timedelta(days = test_day) future_day = this_day + timedelta(days = 200)
y.append(rating) # break # X = sequence.pad_sequences(X, maxlen=max_len) X = np.asarray(X) y = np.asarray(y) return X, y def execute_model(X, y): kf = KFold(y.shape[0], n_folds=n_fold, shuffle=True) results_user = np.array([0.0, 0.0, 0.0, 0.0]) for train_index, test_index in kf: X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] accuracy, precision, recall, f1 = cnn_model(X_train, y_train, X_test, y_test) # precision, recall, f1 = bidirectional_lstm(X_train, y_train, X_test, y_test) results_user[0] += accuracy results_user[1] += precision results_user[2] += recall results_user[3] += f1 results_user /= n_fold return results_user if __name__ == '__main__': n_count = 0 data = util.read_data(path.join(util.data_path, util.file_name)) X, y = build_dataset(data) results = execute_model(X, y) print results util.insert_results('CNN', results[0], results[2], results[1], results[3])
def get_train_data(): train_data, _ = util.read_data(error=0, is_train=True) train_data = preprocessing.StandardScaler().fit_transform(train_data) return train_data
#!/usr/bin/python2.7 import sys import util import numpy as np required_fields = util.required_fields attr_n = util.attr_n if len(sys.argv) != 3: util.print_usage() exit() data = util.read_data(sys.argv[1]) util.init(data) c_cnt = util.c_cnt o_cnt = util.o_cnt simple_attr = util.simple_attr ranged_attr = util.ranged_attr # Learning data = np.array(data) np.random.shuffle(data) data = data[:100] for record in data: t = int(record[13]) # 0 => Healthy # 1-4 => Has heart disease if t > 4: print "Error"
import os from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction tf.logging.set_verbosity(tf.logging.INFO) # Starting the tensorflow session sess = tf.Session() # Defining the special token that will be used SENTENCE_START_TOKEN = "<START>" SENTENCE_END_TOKEN = "<EOS>" OOV_TOKEN = "<UNK>" PAD_TOKEN = "<PAD>" # Reading the data df = read_data() # Spliting the data into train, validation and test msk = np.random.rand(len(df)) < 0.8 df_train = df[msk] msk2 = np.random.rand(len(df_train)) < 0.8 df_validate = df_train[~msk2] df_train = df_train[msk2] df_test = df[~msk] print("Train size: %s" % len(df_train)) print("Validation size: %s" % len(df_validate)) print("Test size: %s" % len(df_test)) # Creating the english and hebrew vocabularies eng_vocab, rev_eng_vocab = get_vocab(df["english_sentences"], addtional_tokens=[PAD_TOKEN, OOV_TOKEN], top=None)
def read_xy_p (filename): data = ut.read_data(filename) return np.array([[row[0] for row in data], [row[1] for row in data]])
import crf import util theta = util.read_model('model') # model directory data = util.read_data('data/test*') # regex pattern of binarized text image files, # each sequence (word, sentence, etc.) in its own file alphabet = list('etainoshrd') # list of all possible character labels predictions = crf.predict(theta, data, alphabet) for prediction in predictions: print ''.join(prediction)
def load_basic(self): self.basic = read_data(data_dir + 'basic/basic_%s' % self.index)
def reload(self): # Force load the data self._data = read_data(self.data_file)[self.index]