예제 #1
0
def main(argv=None):
    if not os.path.exists(FLAGS.train_dir):
        os.mkdir(FLAGS.train_dir)

    # load dataset
    train_data = util.read_data(FLAGS.data_dir,
                                'train',
                                FLAGS.sent_len,
                                negative=FLAGS.negative,
                                hierarchical=FLAGS.hierarchical,
                                shuffle=False)
    test_data = util.read_data(FLAGS.data_dir,
                               'dev',
                               FLAGS.sent_len,
                               negative=FLAGS.negative,
                               hierarchical=FLAGS.hierarchical)

    class_names = None
    relations = None
    if FLAGS.negative:
        class_names = util.load_from_dump(
            os.path.join(FLAGS.data_dir, 'classes.cPickle'))
        relations = util.load_from_dump(
            os.path.join(FLAGS.data_dir, 'relations.cPickle'))

    train(train_data, test_data, class_names=class_names, relations=relations)
예제 #2
0
def generate_combined(src='all', num=None, method='avg', img_scale=False):
    """
        Generates a combined file from aps files in the src_dir for help in identifying zones.
        This file is serialized as an npy file named 'combinedNUM.npy' where 'num' is the number
        of files used in the combination. If 'num' is not given, all files are used.
    """
    files = shuffled_files(src)
    if num is None:
        num = len(files)

    sample = np.asarray(util.read_data(files[0]))
    combined = np.zeros(sample.shape)
    for file in files[0:num]:
        file_data = np.asarray(util.read_data(file))
        if img_scale:
            file_data = scipy.misc.bytescale(file_data)

        if method == 'avg':
            combined = combined + file_data
        else:
            combined = np.maximum(combined, file_data)

    if method == 'avg':
        combined = combined / num
    # np.sum(combined, axis=(1,2))

    np.save(f"combined-{src}-{num}", combined)
예제 #3
0
def run():
    db = initdb()
    o = read_data("中国城市坐标D.xls")
    d = read_data("中国城市坐标D.xls")
    logger.info("成功加载文件")
    bmap = BaiduMap(AK, SK)
    # 加载数据
    if recov.hasfile:
        datas = recov.recover()
        for data in datas:
            bmap.load_data(**data)
    else:
        for i in o.index:
            for j in d.index:
                if i >= j:
                    continue
                data = {
                    "origin": "%s,%s" % (o.iloc[i].y, o.iloc[i].x),
                    "destination": "%s,%s" % (d.iloc[j].y, d.iloc[j].x)
                }
                bmap.load_data(**data)
    logger.info("完成数据加载")
    # 开始写入
    for result in bmap.get_train_data():
        try:
            db.insert_transit(result)
        except Exception:
            logger.error("写入失败")
예제 #4
0
def get_data():
    train_data, _ = util.read_data(error=0, is_train=True)
    test_data = np.vstack(
        list(map(lambda x: util.read_data(x, False)[0], range(22))))
    scaler = preprocessing.StandardScaler().fit(train_data)
    train_data = scaler.transform(train_data)
    test_data = scaler.transform(test_data)
    return train_data, test_data
def get_test_data():
    test_data = []
    for i in range(22):
        data, _ = util.read_data(error=i, is_train=False)
        test_data.append(data)
    test_data = np.concatenate(test_data)
    train_data, _ = util.read_data(error=0, is_train=True)
    scaler = preprocessing.StandardScaler().fit(train_data)
    test_data = scaler.transform(test_data)
    return test_data
예제 #6
0
def get_test_data():
    test_data = []
    for i in range(22):
        data, _ = util.read_data(error=i, is_train=False)
        test_data.append(data)
    test_data = np.concatenate(test_data)
    train_data, _ = util.read_data(error=0, is_train=True)
    scaler = preprocessing.StandardScaler().fit(train_data)
    test_data = scaler.transform(test_data)
    return test_data
예제 #7
0
파일: dnom.py 프로젝트: htz-ecust/DNOM
def get_data(n_samples):
    data, _ = util.read_data(error=0, is_train=True)
    train_data = _hstack(data, n_samples)

    test_data = []
    for i in range(22):
        data, _ = util.read_data(error=i, is_train=False)
        test_data.append(_hstack(data, n_samples))
    test_data = np.vstack(test_data)

    scaler = preprocessing.StandardScaler().fit(train_data)
    train_data = scaler.transform(train_data)
    test_data = scaler.transform(test_data)
    return train_data, test_data
예제 #8
0
def main(fname, threshold, epsilon):
    X, Y = read_data(fname)
    print X.shape, Y.shape
    print "X shape: %s" % str(X.shape)

    startTime = time.time()
    X_index_retained, W, X_index, obj_values = sfs_l21_norm(
        X, Y, threshold, epsilon)
    endTime = time.time()

    print "runtime: %.4f seconds" % (endTime - startTime)

    print "**" * 30
    print "data set name: %s" % fname
    print "selected features index: "
    print "sfs-l21-norm "
    print "threshold = %s, epsilon = %s: " % (str(threshold), str(epsilon)),
    print "%s\n" % str(list(X_index_retained))
    print "selected features weight: \n%s\n" % str(W)
    print "features index that through the gradient validation: \n%s\n" % str(
        X_index)
    print "**" * 30
    print "DONE"

    obj_values_df = pd.DataFrame(obj_values,
                                 index=X_index,
                                 columns=["obj_values"])
    columns = ["weight_%d" % i for i in range(Y.shape[1])]
    weights_df = pd.DataFrame(W,
                              index=np.hstack((-1, X_index_retained)),
                              columns=columns)
    df = pd.concat((obj_values_df, weights_df))
    df.plot(subplots=True)
    plt.show()
예제 #9
0
def main(argv=None):
    restore_param = util.load_from_dump(
        os.path.join(FLAGS.train_dir, 'flags.cPickle'))
    restore_param['train_dir'] = FLAGS.train_dir

    if restore_param.has_key('contextwise') and restore_param['contextwise']:
        source_path = os.path.join(restore_param['data_dir'], "ids")
        target_path = os.path.join(restore_param['data_dir'], "target.txt")
        _, data = util.read_data_contextwise(
            source_path,
            target_path,
            restore_param['sent_len'],
            train_size=restore_param['train_size'])
    else:
        source_path = os.path.join(restore_param['data_dir'], "ids.txt")
        target_path = os.path.join(restore_param['data_dir'], "target.txt")
        _, data = util.read_data(source_path,
                                 target_path,
                                 restore_param['sent_len'],
                                 train_size=restore_param['train_size'])

    pre, rec = evaluate(data, restore_param)
    util.dump_to_file(os.path.join(FLAGS.train_dir, 'results.cPickle'), {
        'precision': pre,
        'recall': rec
    })
예제 #10
0
def get_middle_profile(directory):
    # Change directories
    cwd = os.getcwd()
    os.chdir(directory)

    ### Data ###
    intensity_polar = util.read_data(frame,
                                     'polar_intensity',
                                     fargo_par,
                                     id_number=id_number,
                                     directory=".")
    if normalize:
        intensity_polar /= np.max(intensity_polar)
    azimuthal_radii, azimuthal_profiles = az.get_profiles(intensity_polar,
                                                          fargo_par,
                                                          args,
                                                          shift=None)

    # Middle Profile
    middle_i = (num_profiles - 1) / 2
    middle_profile = azimuthal_profiles[middle_i]

    # Return to previous directory
    os.chdir(cwd)

    return middle_profile
예제 #11
0
def measure_peak_offset(args):
    # Unpack
    i, frame = args

    intensity_polar = util.read_data(frame,
                                     'polar_intensity',
                                     fargo_par,
                                     id_number=id_number)

    # Shift and get peak
    shift_c = az.shift_away_from_minimum(intensity_polar, fargo_par)
    intensity_polar = np.roll(intensity_polar, shift_c, axis=-1)
    peak_r_i, peak_phi_i = az.get_peak(intensity_polar, fargo_par)

    # Return peak relative to the center, where the edges are set by a threshold
    intensity_polar /= np.max(intensity_polar)  # normalize

    intensity_polar_at_peak = intensity_polar[
        peak_r_i, :]  # take azimuthal profile
    left_index = az.my_searchsorted(intensity_polar_at_peak, threshold)
    right_index = len(intensity_polar_at_peak) - az.my_searchsorted(
        intensity_polar_at_peak[::-1], threshold) - 1

    # Convert to theta
    left_theta = theta[left_index] * (180.0 / np.pi)
    right_theta = theta[right_index] * (180.0 / np.pi)
    center_theta = left_theta + (right_theta - left_theta) / 2.0

    peak_theta = theta[peak_phi_i] * (180.0 / np.pi)
    peak_offset = peak_theta - center_theta

    print i, frame, peak_offset, center_theta, peak_theta

    # Store in mp_array
    peak_offsets[i] = peak_offset
예제 #12
0
def run(fname, epsilon, threshold, label_pos):
    from util import read_data
    X, Y = read_data(fname, label_pos)
    print X.shape, Y.shape
    print "X shape: %s" % str(X.shape)
    X_index_retained, W, X_index, obj_values = sfs_l21_norm(
        X, Y, threshold, epsilon)

    print "**" * 30
    print "data set name: %s" % fname
    print "selected features index: "
    print "grafting l21-norm "
    print "threshold = %s, epsilon = %s: " % (str(threshold), str(epsilon)),
    print "%s\n" % str(list(X_index_retained))
    print "selected features weight: \n%s\n" % str(W)
    print "features index that through the gradient validation: \n%s\n" % str(
        X_index)
    print "**" * 30
    print "DONE"

    obj_values_df = pd.DataFrame(obj_values,
                                 index=X_index,
                                 columns=["obj_values"])
    columns = ["weight_%d" % i for i in range(Y.shape[1])]
    weights_df = pd.DataFrame(W,
                              index=np.hstack((-1, X_index_retained)),
                              columns=columns)
    df = pd.concat((obj_values_df, weights_df))
    df.plot(subplots=True)
    plt.show()
예제 #13
0
	def word_pair_train(self, **kargs):	
		print('word pair train, %d' % config.USE_WORD_PAIR_NO)
		self.dict_word_pairs = util.load_dict_word_pairs(config.WORD_PAIRS, config.USE_WORD_PAIR_NO)

		"""
			:type feature: list(tuple(dict, str/int))
										^		^
										|		|
									  feature  sense
		"""
		
		self.trainData = util.read_data(self.file_name)

		start = time.time()
		feature = []
		for relation in self.trainData:
			feat = {}
			feat.update(feature_functions.word_pairs(relation, self.dict_word_pairs))

			for sense in relation['Sense']:
				num_sense = util.map_sense_to_number(sense)
				if num_sense != -1:
					feature.append( (feat,  num_sense) )

		end=time.time()
		print('extract word pair costs: %f seconds' %(end-start))
			
		start = time.time()
		model = nltk.MaxentClassifier.train(feature, **kargs)
		end = time.time()
		print('train word pair model costs: %f seconds' %(end-start))

		util.store_model(model, 'model/word_pair.model')
		return model
		"""
예제 #14
0
파일: run.py 프로젝트: aaren/lab-waves
 def data(self):
     # Explicitly load the run data
     if not hasattr(self, '_data'):
         self._data = read_data(self.data_file)[self.index]
     else:
         pass
     return self._data
예제 #15
0
def read_data(frame):
    """ Step 0: read intensity data """
    intensity = util.read_data(frame,
                               'intensity',
                               fargo_par,
                               id_number=id_number)
    return intensity
예제 #16
0
def split_all_data():
    # READ DATA
    dataset_dir = os.path.join('..', 'data', DATASET)
    feats_file = os.path.join(dataset_dir, 'feats.17')
    labels_file = os.path.join(dataset_dir, 'time')
    data = util.read_data(feats_file, labels_file)

    # SHUFFLE DATA
    np.random.seed(1000)
    np.random.shuffle(data)

    # BUILD FOLDER STRUCTURE
    dataset_dir = os.path.join(SPLIT_DIR, DATASET)
    try:
        os.makedirs(dataset_dir)
    except OSError:
        print "skipping folder creation"

    # SPLIT TRAIN/TEST AND SAVE
    fold_indices = cross_validation.KFold(data.shape[0], n_folds=10)
    for fold, index in enumerate(fold_indices):
        print index[0].shape
        train_data = data[index[0]]
        test_data = data[index[1]]
        train_data, scaler = util.normalize_train_data(train_data)
        test_data = util.normalize_test_data(test_data, scaler)

        fold_dir = os.path.join(dataset_dir, str(fold))
        try:
            os.makedirs(fold_dir)
        except OSError:
            print "skipping fold dir"
        np.savetxt(os.path.join(fold_dir, 'train'), train_data, fmt="%.5f")
        np.savetxt(os.path.join(fold_dir, 'test'), test_data, fmt="%.5f")
예제 #17
0
def points_file(src='all', padding=True):
    """
        Creates points file for the given 'src' files ('train', 'valid', 'test', etc)
    """

    files = shuffled_files(src)
    file = os.path.join(config.PSCREENING_HOME, 'points-' + src + '.csv')
    with open(file, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile, delimiter=',')
        f_count = 0
        for f in files:
            f_count += 1
            print(f"Reading file {f}...")
            file_images = util.read_data(f, as_images=True)
            w, h = file_images[0].size
            print(f"Creating zones...")
            zone_rects = z.create_zones16(file_images)
            if padding:
                zones_config.apply_padding(zone_rects,
                                           max_width=w,
                                           max_height=h)
            print(f"Write record...")
            for i in range(16):
                row = [[f], [i]
                       ] + [list(zone_rects[i][j]) for j in ZONE_EXTRACTIONS]
                row = [val for sublist in row for val in sublist]
                writer.writerow(row)
            print(f"Record #{f_count} completed")
예제 #18
0
def main(argv=None):
    FLAGS = tf.app.flags.FLAGS
    if not os.path.exists(FLAGS.train_dir):
        os.mkdir(FLAGS.train_dir)

    # load dataset
    # source_path = os.path.join(FLAGS.data_dir, 'ids.txt')
    # target_path = os.path.join(FLAGS.data_dir, 'target.txt')
    source_path = os.path.join(FLAGS.data_dir,
                               'test_cs_unlabeled_data_combined.txt')
    target_path = os.path.join(FLAGS.data_dir, 'test_cs_labels_combined.txt')
    attention_path = None
    if FLAGS.attention:
        if os.path.exists(os.path.join(FLAGS.data_dir, 'source.att')):
            attention_path = os.path.join(FLAGS.data_dir, 'source.att')
        else:
            raise ValueError("Attention file %s not found.",
                             os.path.join(FLAGS.data_dir, 'source.att'))
    train_data, test_data = util.read_data(
        source_path,
        target_path,
        FLAGS.sent_len,
        attention_path=attention_path,
        train_size=FLAGS.train_size,
        hide_key_phrases=FLAGS.hide_key_phrases)
    train(train_data, test_data)
예제 #19
0
def main(argv=None):
    if not os.path.exists(FLAGS.train_dir):
        os.mkdir(FLAGS.train_dir)

    # load dataset
    train_data = util.read_data(FLAGS.data_dir,
                                'train',
                                FLAGS.sent_len,
                                negative=FLAGS.negative,
                                hierarchical=FLAGS.hierarchical)
    test_data = util.read_data(FLAGS.data_dir,
                               'dev',
                               FLAGS.sent_len,
                               negative=FLAGS.negative,
                               hierarchical=FLAGS.hierarchical)
    train(train_data, test_data)
def main():
    filename = "data/data.csv"
    column_type = {'Review Text':str, 'Sentiment':str, 'Fit':str, 'Fabric':str, 'Color':str, 'Style':str, 'Cost':str}
    x_label = 'Review Text'
    y_label_sentiment = 'Sentiment'
    sentiment_labels = ['-1', '0', '1']
    data = read_data(filename, column_type, x_label=x_label, y_label=y_label_sentiment, clean_data=True)
    x = data[x_label]
    y = data.drop([x_label], axis=1)

    ## split data : train and test
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=50)

    # naive bayes

    w_prob, c_prob = naive_bayes_learner(X_train, y_train[y_label_sentiment], sentiment_labels, verbose=False)

    expected_class_list = np.zeros(len(y_test))
    predicted_class_list = np.zeros((len(y_test)))
    for index, (data, expected_class) in enumerate(zip(X_test, y_test[y_label_sentiment])):
        predicted_class, log_prob = naive_bayes_classifier(w_prob, c_prob, sentiment_labels, data)
        expected_class_list[index] = expected_class
        predicted_class_list[index] = predicted_class
        # print predicted_class, expected_class

    print "Total: ", len(expected_class_list)
    print "True positives: ", np.sum(expected_class_list == predicted_class_list)
    print "Accuracy: ", np.mean(expected_class_list == predicted_class_list)
예제 #21
0
def jsonurl():
    object_path = os.path.join(DATA_PATH, 'objects.json')
    data, errors = read_data(object_path)
    result = {}
    result['data'] = data
    result['errors'] = errors
    return json.dumps(result)
예제 #22
0
파일: test.py 프로젝트: liuguoyou/MISGAN
def main():
    conf = Config().parse(create_dir_flag=False)
    conf.name = 'TEST_' + conf.name
    conf.output_dir_path = util.prepare_result_dir(conf)
    gan = InGAN(conf)

    try:
        gan.resume(conf.test_params_path, test_flag=True)
        [input_tensor] = util.read_data(conf)

        if conf.test_video:
            retarget_video(gan, input_tensor,
                           define_video_scales(conf.test_vid_scales), 8,
                           conf.output_dir_path)
        if conf.test_collage:
            generate_collage_and_outputs(conf, gan, input_tensor)
        if conf.test_non_rect:
            test_homo(conf, gan, input_tensor)

        print 'Done with %s' % conf.input_image_path

    except KeyboardInterrupt:
        raise
    except Exception as e:
        # print 'Something went wrong with %s (%d/%d), iter %dk' % (input_image_path, i, n_files, snapshot_iter)
        print_exc()
예제 #23
0
파일: logit.py 프로젝트: mindis/choose2grow
    def __init__(self, model_id, max_deg=50, bounds=None, D=None, vvv=0):
        """
        Constructor for a LogitModel object. The data can be provided directly,
        or it can be read in from file.

        Keyword arguments:

        model_id -- model_id can be either the file name from where the choices
            are read, or an idenfier for the current model
        max_deg -- the max degree that will be considered (default: 50)
        D -- 2d-array representing choice options
        vvv -- int representing level of debug output [0:none, 1:some, 2:lots]

        If data is supplied directly, D is a (n*i)x4 matrix, where each
        choice set has i choices, exactly one of which should be chosen.
        For every example we get the following covariates:
           [choice_id, Y, degree, n_fofs]
        """
        self.id = model_id
        self.vvv = vvv

        if D is not None:
            self.D = D
        elif '.' in model_id:
            self.D = util.read_data(model_id, max_deg, vvv=vvv)
        else:
            self.exception("neither filename nor D are specified..")
        self.n = len(set(self.D.choice_id))  # number of examples
        self.d = max_deg + 1  # number of degrees considered

        # initiate the rest of the parameters
        self.n_it = 0  # number of iterations for optimization
        self.u = [1]  # current parameter value
        self.se = [None]  # current SE value
        self.bounds = bounds  # whether there are bounds for the parameters
def get_extent(args):
    # Extract args
    i, frame = args

    # Get data and measure extent
    intensity = util.read_data(frame,
                               'polar_intensity',
                               fargo_par,
                               id_number=id_number)
    extent, azimuthal_profile = az.get_extent(intensity,
                                              fargo_par,
                                              normalize=True,
                                              threshold=threshold,
                                              sliver_width=sliver_width)

    # Count peaks
    peaks, _ = find_peaks(azimuthal_profile, height=threshold)
    peak_count = len(peaks)

    # Convert to degrees
    extent *= (180.0 / np.pi)

    print i, frame, extent, peak_count

    # Store
    extents[i] = extent
    peak_counts[i] = peak_count
def full_procedure(frame, show=False):
    """ Every Step """
    # Read Data
    density = util.read_data(frame,
                             'input_density',
                             fargo_par,
                             id_number=id_number).T  # Note: Transpose!!!!

    # Choose shift option
    if center:
        #### Note: Re-work this section! The input density is already centered, but you still need the shift for the plot
        # Center vortex
        if fargo_par["MassTaper"] < 10.1:
            shift_c = az.get_azimuthal_peak(density, fargo_par)
        else:
            gas_fargo_par = util.get_pickled_parameters(
                directory="../cm-size")  ## shorten name?

            ######## Need to extract parameters, and add 'rad' and 'theta' ########
            gas_rad = np.linspace(gas_fargo_par['Rmin'], gas_fargo_par['Rmax'],
                                  gas_fargo_par['Nrad'])
            gas_theta = np.linspace(0, 2 * np.pi, gas_fargo_par['Nsec'])
            gas_fargo_par['rad'] = gas_rad
            gas_fargo_par['theta'] = gas_theta
            gas_surface_density_zero = gas_fargo_par['Sigma0']
            dust_surface_density_zero = gas_surface_density_zero / 100.0

            dust_density = util.read_data(frame,
                                          'dust',
                                          gas_fargo_par,
                                          id_number=id_number,
                                          directory="../cm-size")

            # Shift input density with center of dust density
            shift_c = az.get_azimuthal_center(dust_density,
                                              gas_fargo_par,
                                              threshold=10.0 *
                                              dust_surface_density_zero)
    else:
        shift_c = None

    # Get and plot profiles
    azimuthal_radii, azimuthal_profiles = az.get_profiles(density,
                                                          fargo_par,
                                                          args,
                                                          shift=None)
    make_plot(frame, shift_c, azimuthal_radii, azimuthal_profiles, show=show)
예제 #26
0
    def train(self, config):
        if config.is_train:
            input_setup(self.sess, config)
        else:
            nx, ny = input_setup(self.sess, config)  # 合并图像块数

        if config.is_train:
            data_path = os.path.join("./", config.checkpoint_dir, "train.h5")
        else:
            data_path = os.path.join("./", config.checkpoint_dir, "test.h5")

        train_data, train_label = read_data(data_path)

        self.train_op = tf.train.GradientDescentOptimizer(
            config.learning_rate).minimize(self.loss)

        tf.global_variables_initializer().run()

        counter = 0  # 输出判断数
        start_time = time.time()
        # 加载训练数据
        if self.load(config.checkpoint_dir):
            print("[*] Load SUCCESS")
        else:
            print("[!] Load Failed")
        if config.is_train:
            print("Train....")
            batch_index = len(train_data) // config.batch_size
            for ep in range(config.epoch):
                for idx in range(batch_index):
                    batch_images = train_data[idx *
                                              config.batch_size:(idx + 1) *
                                              config.batch_size]
                    batch_labels = train_label[idx *
                                               config.batch_size:(idx + 1) *
                                               config.batch_size]
                    _, err = self.sess.run([self.train_op, self.loss], {
                        self.images: batch_images,
                        self.labels: batch_labels
                    })
                    counter += 1

                    if counter % 10 == 0:
                        print(
                            "Epoch: %2d,step: %2d,time: %4.4f,loss: %.8f" %
                            ((ep + 1), counter, time.time() - start_time, err))
                    if counter % 500 == 0:
                        self.save(config.checkpoint_dir, counter)
        else:
            print("Test...")
            result = self.pred.eval({
                self.images: train_data,
                self.labels: train_label
            })
            result = merge(result, [nx, ny])
            result = result.squeeze()  # squeese():把 result 的 ? 维度删除
            image_path = os.path.join(os.getcwd(), config.sample_dir,
                                      "text_image.png")
            imsave(image_path, result)
예제 #27
0
def main(file_name):
    fl = open(file_name, 'r')
    #measurements = read_separated_data(fl.readlines())
    measurements = read_data(fl.readlines())
    fl.close()

    for accelerometer in measurements:
        compute_movement(map(lambda a: map(float, a), accelerometer))
예제 #28
0
def run(fname, fname_test=None, threshold=0.2):
    X, Y= read_data(fname)
    print "run grafting algorithm for data set: %s"%fname
    indexes = gft.grafting(np.matrix(X), np.matrix(Y).T, threshold)[0]
    print "selected features index:"
    print indexes
    print "finish grafting algorithm"
    print "**"*40
    print "run classification:"
    args = {"origin data":np.arange(X.shape[1])}
    args["grafting"] = map(lambda x:int(x), indexes)
    if fname_test==None:
        clf.classifying(X, Y, args=args)
    else:
        X_test, Y_test = read_data(fname_test)
        clf.classifying(X, Y, X_test, Y_test, args=args)
    print "finish classifying"
예제 #29
0
def __copy_files(files, src_dir, dest_dir, ext='a3daps', to_npy=False):
    for f in files:
        full_file_path = os.path.join(src_dir, f + '.' + ext)
        if to_npy:
            file_data = np.asarray(util.read_data(full_file_path))
            np.save(os.path.join(dest_dir, f + '.npy'), file_data)
        else:
            shutil.copy2(full_file_path, dest_dir)
예제 #30
0
def main(file_name):
    fl = open(file_name, 'r')
    #measurements = read_separated_data(fl.readlines())
    measurements = read_data(fl.readlines())
    fl.close()

    for accelerometer in measurements:
        compute_movement(map(lambda a:map(float, a), accelerometer))
예제 #31
0
def sfs(fname, threshold, epsilon):
    # print "dataset: %s"%fname
    X, Y = read_data(fname)

    startTime = time.time()
    sfs_l21_norm_streaming(X, Y, threshold, epsilon)
    endTime = time.time()

    print "%.4f" % (endTime - startTime)
예제 #32
0
 def update(self):
     df_mod = read_data(self.filename)
     if (len(df_mod) %
             int(self.config['FORECAST_PARAMS']['STEP_SIZE'])) == 0:
         self.df = df_mod[self.df.columns.values]
         self.df_sized = self.df.iloc[::int(self.config['FORECAST_PARAMS']
                                            ['STEP_SIZE']), :]
         return True
     return False
예제 #33
0
def opt_threshold(fname, epsilon):
    # print "datasets: %s"%fname
    X, Y = read_data(fname)
    thresholds = np.arange(0.1, 0.51, 0.05)
    for threshold in thresholds:
        X_index_retained = sfs_l21_norm(X, Y, threshold, epsilon)[0]
        print "grafting l21-norm ",
        print "threshold = %s, epsilon = %s: "%(str(threshold), str(epsilon)),
        print "%s"%str(list(X_index_retained))
예제 #34
0
def main():
    seed = 7
    np.random.seed(seed)
    all_essays, all_avg_scores = read_data()
    if 'reload_data' in sys.argv or not os.path.isfile(os.path.join(data_home, 'glove_vectors.pickle')):
        print('Reloading data...')
        print('Loaded all data...')

        all_glove_data = create_glove_representations(all_essays)
        with open(os.path.join(data_home, 'glove_vectors.pickle'), 'wb') as handle:
            pickle.dump(all_glove_data, handle, protocol=pickle.HIGHEST_PROTOCOL)
            print('Wrote data to pickle file.')
    else:

        with open(os.path.join(data_home, 'glove_vectors.pickle'), 'rb') as handle:
            all_glove_data = pickle.load(handle)
            print('Loaded vectors from Pickle file... ')

    for i in range(1, len(all_glove_data)+1):
        glove_data = all_glove_data[i]
        avg_scores = [round (x, 0) for x in all_avg_scores[i]]

        # Limit data just for testing

        #X_train, X_test, y_train, y_test = train_test_split(keras_data, keras_labels, train_size=0.9)
        # Split data into test and train
        print('Creating Estimator...')
        Y, keras_length = encode_data_for_keras(avg_scores)
        X_train, X_test, y_train, y_test = train_test_split(glove_data, Y, train_size=0.9)
        # c_y_train, keras_length = encode_data_for_keras(y_train)
        # c_y_test, test_len = encode_data_for_keras(y_test)
        param_matrix = {'activation': ['relu'], }


        rnn = MLP(keras_length)
        rnn.fit(X_train, y_train, epochs=50, batch_size=20)

        scores = rnn.evaluate(X_test, y_test, verbose=0)
        for i in range(len(rnn.metrics_names)):
            print("%s: %.2f%%" % (rnn.metrics_names[i], scores[i]*100))
        # estimator = KerasClassifier(build_fn=rnn_model, epochs=200, batch_size=20)


        # estimator.fit()


        # Compile model
        # print('Compiling Model...')
        # kfold = KFold(n_splits=1, shuffle=True, random_state=seed)
        # results = cross_val_score(estimator, glove_data, keras_labels, cv=kfold)
        # print("Baseline: %.2f%% (%.2f%%)" % (results.mean(), results.std()))
        #
        # with open(os.path.join(data_home, 'results.pickle'), 'wb') as handle:
        #     pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

        break
예제 #35
0
def main(test_pattern, model_dir, alphabet, print_tags, print_score):
	alphabet = list(alphabet)

	# Open feature and transition params
	theta = read_model(model_dir)

	# Read test data
	data = read_data(test_pattern)

	test(theta, data, alphabet, print_tags, print_score)
예제 #36
0
def parse_unlocking_script(script):
    # Returns type, signatures, public keys and address of the input
    if len(script) in [71, 72, 73]:
        # Pay-to-Public-Key: the unlocking script is the signature
        sig, script = read_data(script)
        assert script == bytes()
        return "p2pk", [sig], None, None, None
    elif len(script) in [105, 106, 107, 137, 138, 139]:  #P2PKH
        # Pay-to-Public-Key-Hash: signature and public key
        sig, script = read_data(script)
        pubkey, script = read_data(script)
        assert script == bytes()
        return "p2pkh", [sig], [PublicKey.from_ser(pubkey)
                                ], Address.from_pubkey(pubkey), None
    elif script[0] == OP_0:
        # P2SH multisig
        zero, script = read_bytes(script, 1, int, 'little')
        data = []
        while script != bytes():
            d, script = read_data(script)
            data.append(d)
        signatures, redeemScript = data[:-1], data[-1]

        # Address
        address = Address.from_script(redeemScript)

        rs = redeemScript
        # Parsing of redeem script
        m, rs = read_bytes(rs, 1, int, 'little')
        assert len(signatures) == (m - OP_1 +
                                   1), "m = {:d}, len sigs = {:d}".format(
                                       m, len(signatures))
        pubkeys = []
        while 0 < rs[0] <= OP_PUSHDATA4:
            pubkey, rs = read_data(rs)
            pubkeys.append(PublicKey.from_ser(pubkey))
        n, rs = read_bytes(rs, 1, int, 'little')
        assert len(pubkeys) == (n - OP_1 + 1)
        assert rs[0] == OP_CHECKMULTISIG

        return "p2sh", signatures, pubkeys, address, redeemScript
    else:
        raise ScriptError("cannot parse unlocking script")
예제 #37
0
def load_features_all(fname, laymap):
    dataset_raw = util.read_data(fname, 3)

    dataset_new = np.array([])
    for rix, row in enumerate(dataset_raw):
        if not rix % 50:
            print rix

        new_row = np.append(transform_all(laymap, row[0]),
                            float(row[1]) / 5 - 1)

        dataset_new = np.append(dataset_new, new_row)

    return dataset_new.reshape(len(dataset_raw), 25)
예제 #38
0
	def production_rule_train(self, **kargs):
		print('production rule train, %d' % config.USE_PRODUCTION_RULE_NO)
		arg1_production_rule_dict = util.load_production_rule_dict(config.ARG1_PRODUCTOIN_RULE, config.USE_PRODUCTION_RULE_NO)
		arg2_production_rule_dict = util.load_production_rule_dict(config.ARG2_PRODUCTOIN_RULE, config.USE_PRODUCTION_RULE_NO)
		both_production_rule_dict = util.load_production_rule_dict(config.BOTH_PRODUCTOIN_RULE, config.USE_PRODUCTION_RULE_NO)

		with codecs.open(config.ARG1_PARSETREE) as file:
			arg1_parsetree = file.read().split('\n')
			#for line in file:
			#	arg1_parsetree.append(line)

		with codecs.open(config.ARG2_PARSETREE) as file:
			arg2_parsetree = file.read().split('\n')

		feature = []

		self.trainData = util.read_data(self.file_name)
		for index, relation in enumerate(self.trainData):
			feat = {}
			feat.update(	\
				feature_functions._train_production_rules(index, \
					[ arg1_production_rule_dict, arg2_production_rule_dict, both_production_rule_dict ], \
					[arg1_parsetree, arg2_parsetree])
					)

			for sense in relation['Sense']:
				num_sense = util.map_sense_to_number(sense)
				if num_sense != -1:
					feature.append( (feat,  num_sense) )

		start = time.time()
		try:
			model = nltk.MaxentClassifier.train(feature, **kargs)
		except:
			util.store_model(model, 'model/production_rule.model')

		end = time.time()
		print('train production rules model costs: %f seconds' %(end-start))

		util.store_model(model, 'model/production_rule.model')
		return model
예제 #39
0
from datetime import date
from datetime import timedelta
from datetime import datetime
from sklearn import linear_model
from datetime import timedelta
from sklearn.cross_validation import KFold

non_price_inputs =  ['avg-confirmation-time.txt', 'estimated-transaction-volume.txt', 'my-wallet-transaction-volume.txt', 'total-bitcoins.txt', 
             'bitcoin-days-destroyed-cumulative.txt','hash-rate.txt', 'n-orphaned-blocks.txt','trade-volume.txt', 'bitcoin-days-destroyed.txt','market-cap.txt', 
             'n-transactions-excluding-popular.txt','transaction-fees.txt', 'blocks-size.txt','n-transactions-per-block.txt', 'tx-trade-ratio.txt', 
             'cost-per-transaction.txt','miners-revenue.txt', 'n-transactions.txt', 'difficulty.txt','my-wallet-n-tx.txt', 'n-unique-addresses.txt', 
             'estimated-transaction-volume-usd.txt', 'my-wallet-n-users.txt', 'output-volume.txt']

data = {}
for f in non_price_inputs: 
    data[f] = util.read_data('data/' + f)

data['market-price.txt'] = util.read_data('data/market-price.txt')

day_price = util.day_to_price()


# find all the features and prices for 100-600 days before today
all_features = []
Y = []

for test_day in range(100,600): # the last 600-100 days from today
    end_day = date.today() - timedelta(days = test_day + 1)
    start_day = date.today() - timedelta(days = test_day + 101) # look at the last 100 days
    this_day = date.today() - timedelta(days = test_day)
    future_day = this_day + timedelta(days = 200)
예제 #40
0
        y.append(rating)
        # break
    # X = sequence.pad_sequences(X, maxlen=max_len)
    X = np.asarray(X)
    y = np.asarray(y)

    return X, y


def execute_model(X, y):
    kf = KFold(y.shape[0], n_folds=n_fold, shuffle=True)
    results_user = np.array([0.0, 0.0, 0.0, 0.0])
    for train_index, test_index in kf:
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        accuracy, precision, recall, f1 = cnn_model(X_train, y_train, X_test, y_test)
        # precision, recall, f1 = bidirectional_lstm(X_train, y_train, X_test, y_test)
        results_user[0] += accuracy
        results_user[1] += precision
        results_user[2] += recall
        results_user[3] += f1
    results_user /= n_fold
    return results_user

if __name__ == '__main__':
    n_count = 0
    data = util.read_data(path.join(util.data_path, util.file_name))
    X, y = build_dataset(data)
    results = execute_model(X, y)
    print results
    util.insert_results('CNN', results[0], results[2], results[1], results[3])
def get_train_data():
    train_data, _ = util.read_data(error=0, is_train=True)
    train_data = preprocessing.StandardScaler().fit_transform(train_data)
    return train_data
예제 #42
0
파일: main.py 프로젝트: Deryugin/university
#!/usr/bin/python2.7

import sys
import util
import numpy as np

required_fields = util.required_fields
attr_n = util.attr_n

if len(sys.argv) != 3:
    util.print_usage()
    exit()

data = util.read_data(sys.argv[1])
util.init(data)

c_cnt    = util.c_cnt
o_cnt    = util.o_cnt
simple_attr = util.simple_attr
ranged_attr = util.ranged_attr

# Learning
data = np.array(data)
np.random.shuffle(data)
data = data[:100]

for record in data:
    t = int(record[13]) # 0   => Healthy
                        # 1-4 => Has heart disease
    if t > 4:
        print "Error"
예제 #43
0
import os
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

tf.logging.set_verbosity(tf.logging.INFO)

# Starting the tensorflow session
sess = tf.Session()

# Defining the special token that will be used
SENTENCE_START_TOKEN = "<START>"
SENTENCE_END_TOKEN = "<EOS>"
OOV_TOKEN = "<UNK>"
PAD_TOKEN = "<PAD>"

# Reading the data
df = read_data()

# Spliting the data into train, validation and test
msk = np.random.rand(len(df)) < 0.8
df_train = df[msk]
msk2 = np.random.rand(len(df_train)) < 0.8
df_validate = df_train[~msk2]
df_train = df_train[msk2]
df_test = df[~msk]

print("Train size: %s" % len(df_train))
print("Validation size: %s" % len(df_validate))
print("Test size: %s" % len(df_test))

# Creating the english and hebrew vocabularies
eng_vocab, rev_eng_vocab = get_vocab(df["english_sentences"], addtional_tokens=[PAD_TOKEN, OOV_TOKEN], top=None)
예제 #44
0
def read_xy_p (filename):
    data = ut.read_data(filename)
    return np.array([[row[0] for row in data], [row[1] for row in data]])
예제 #45
0
import crf
import util

theta = util.read_model('model')    # model directory
data = util.read_data('data/test*') # regex pattern of binarized text image files,
                                    # each sequence (word, sentence, etc.) in its own file
alphabet = list('etainoshrd')       # list of all possible character labels

predictions = crf.predict(theta, data, alphabet)
for prediction in predictions:
    print ''.join(prediction)
예제 #46
0
파일: run.py 프로젝트: aaren/lab-waves
 def load_basic(self):
     self.basic = read_data(data_dir + 'basic/basic_%s' % self.index)
예제 #47
0
파일: run.py 프로젝트: aaren/lab-waves
 def reload(self):
     # Force load the data
     self._data = read_data(self.data_file)[self.index]