Exemplo n.º 1
0
    def read_from_txt(self,
                      file_name_and_path,
                      test_train_ratio=0.8,
                      valid_train_ratio=0.75):
        """Reads a data set from a .txt file, storing it as three data sets: training, testing, and validation.

        Arguments:
            file_name_and_path: A string describing the file name (and relative path) of the .txt file to read.
            test_train_ratio: A float describing how much of the data to use for training and how much to use for testing.
            valid_train_ratio: A float describing how much of the training data to use for actual training and how much to use for validation.

        Returns:
            Nothing.
        """
        self.__data_loader = data_loader.DataLoader(file_name_and_path,
                                                    test_train_ratio,
                                                    valid_train_ratio)
        self.__data_loader.convert_data_to_1_hot()
        self.__data_loader.split_data()
        training_x, training_y1, training_y2 = self.__data_loader.get_training_data(
        )
        self.__training = data_batcher.DataBatcher(training_x, training_y1,
                                                   training_y2)
        testing_x, testing_y1, testing_y2 = self.__data_loader.get_testing_data(
        )
        self.__testing = data_batcher.DataBatcher(testing_x, testing_y1,
                                                  testing_y2)
        validation_x, validation_y1, validation_y2 = self.__data_loader.get_validation_data(
        )
        self.__validation = data_batcher.DataBatcher(validation_x,
                                                     validation_y1,
                                                     validation_y2)
        self.__headers = self.__data_loader.get_header_data()
def dataPlot(data, median):
    n, bins, patches = plt.hist(data,
                                45,
                                density=True,
                                facecolor='g',
                                alpha=0.75)
    plt.xlabel('interval')
    plt.ylabel('Probability')
    plt.title('Histogram')
    plt.grid(True)
    x = np.linspace(0, 40, 20)
    tmp = 1 / median

    print(tmp)
    y = tmp * np.exp(-tmp * x)
    plt.plot(x, y, '-', lw=2)
    plt.show()

    cumu_prob = 0
    cumu_x = 0
    print(n)
    for patch in patches:
        cumu_x += patch.get_width()
        cumu_prob += patch.get_width() * patch.get_height()
        print("0-{}:{}".format(cumu_x, cumu_prob))
    print(median)

    # output to file
    result = [["Distance", "Probability"]]
    cumu_x = 0
    for patch in patches:
        result.append([cumu_x, patch.get_height()])
        cumu_x += patch.get_width()
    dataloader = data_loader.DataLoader()
    dataloader.write_to_file(result, 'distance-histogram.dat', split=' ')
Exemplo n.º 3
0
def main():
    output_dir = r"./output"
    if not tf.gfile.Exists(output_dir):
        tf.gfile.MakeDirs(output_dir)
    run_config = model_helper.get_configure()
    model_fn = get_model_fn()
    estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config)
    if hp.model_mode == "train":
        input_path = r"./data/train.recoder"
        train_input_fn = data_loader.file_based_input_fn_builder(
            input_file_path=input_path,
            is_training=True,
            drop_remainder=True,
            mode=hp.model_mode)
        estimator.train(input_fn=train_input_fn, max_steps=hp.train_steps)
    elif hp.model_mode == "predict":
        input_path = r"./data/test.recoder"
        result_path = r"./output/result.txt"
        vocdict = data_loader.DataLoader().voacb_list
        assert tf.gfile.Exists(input_path)
        train_input_fn = data_loader.file_based_input_fn_builder(
            input_file_path=input_path,
            is_training=False,
            drop_remainder=True,
            mode=hp.model_mode)
        with tf.gfile.Open(result_path, mode="w") as f:

            for result in estimator.predict(input_fn=train_input_fn,
                                            yield_single_examples=True,
                                            checkpoint_path=hp.predict_ckpt):
                f.write(
                    model_helper.id2sentence(result["y_hat"], vocdict) + "\n")
def MercerTest():
    data_loader_obj = data_loader.DataLoader([10, 90], test=False)
    batch_iter = data_loader_obj.train_batch_iter(batch_size=100, num_epochs=1)

    # Fake variables for testing
    session = None
    samples_op = None
    output_dir = './exp/eval_mercer_test'
    y_samples_ph = 'y_ph'
    z_samples_ph = 'z_ph'
    num_randos = 1

    class FakeSession:
        def run(self, dummy_op, feed_dict):
            yield 1.5 * feed_dict['y_ph'][0][1] + 3 + 10 * feed_dict['z_ph'][0]

    session = FakeSession()

    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)

    p0_data, p1_data = prepare_energy_data(batch_iter)
    plot_energy_data(p0_data, output_dir)
    plot_energy_data(p1_data, output_dir)
    p0_data = add_gan_data(p0_data, session, samples_op, y_samples_ph,
                           z_samples_ph, num_randos)
    p1_data = add_gan_data(p1_data, session, samples_op, y_samples_ph,
                           z_samples_ph, num_randos)
    plot_energy_data(p0_data, output_dir, step=1)
    plot_energy_data(p1_data, output_dir, step=1)
    create_histogram(p0_data, output_dir, step=1)
    create_histogram(p1_data, output_dir, step=1)
    def setUp(self):
        """Sets up a DataLoader object initialised with a data set containing 100 samples.

        Also creates a temporary text file from which to read the data.

        Arguments:
            Nothing.

        Returns:
            Nothing.
        """
        try:
            open_file = open("tmp.txt", 'w')
        except IOError as excep:
            print('Error writing temp file for testing')
            print(excep)
            sys.exit(2)

        open_file.write("N1\tN2\tN3\tN4\tN5\tN6\tN7\tM8\tM9\tc\n")
        for _ in range(50):
            open_file.write("0\t1\t2\t0\t1\t2\t0\t1\t2\t1\n")
        for _ in range(50):
            open_file.write("2\t1\t0\t2\t1\t0\t2\t1\t0\t0\n")
        open_file.close()

        self.dl = data_loader.DataLoader("tmp.txt", 0.8, 0.75)

        self.dl.convert_data_to_1_hot()

        self.dl.split_data()
Exemplo n.º 6
0
def trainSVM(d2vModel, model_path, tag_path, input_path):
    print("Initializing data loader")
    loader = dl.DataLoader(input_path, tag_path)
    label_size = loader.tag_cnt
    X = []
    Y = []
    print("Loading data into data loader")
    for content in loader.data:
        for sub_con in content:
            text = sub_con['sentence']
            tags = sub_con['labels']
            text = re.sub(r'[{}]'.format(punction), ' ', text).split(' ')
            text = [jieba.cut(i) for i in text if i != '']
            X.append(d2vModel.infer_vector(text))
            Y.append(transferTagVec(tags))
    X = np.array(X)
    Y = np.array(Y)
    Y = Y.transpose()
    print("Beginning trainning SVC")
    for i in range(1, label_size + 1):
        classifier = SVC(gamma='auto')
        print("SVC {} training finished".format(i))
        classifier.fit(X, Y[i - 1])
        joblib.dump(classifier, model_path + str(i) + '.model')
    print("Train process end")
Exemplo n.º 7
0
def main():
    args = parse_args()

    mp.set_start_method('spawn')  # Using spawn is decided.
    _logger = log.get_logger(__name__, args)
    _logger.info(print_args(args))

    loaders = []
    file_list = os.listdir(args.train_file)
    random.shuffle(file_list)
    for i in range(args.worker):
        loader = data_loader.DataLoader(args.train_file,
                                        args.dict_file,
                                        separate_conj_stmt=args.direction,
                                        binary=args.binary,
                                        part_no=i,
                                        part_total=args.worker,
                                        file_list=file_list,
                                        norename=args.norename,
                                        filter_abelian=args.fabelian,
                                        compatible=args.compatible)
        loaders.append(loader)
        loader.start_reader()

    cuda_test = torch.cuda.is_available()
    cuda_tensor = torch.randn(10).cuda()

    net, mid_net, loss_fn = create_models(args, loaders[0], allow_resume=True)
    # Use fake modules to replace the real ones
    net = FakeModule(net)
    if mid_net is not None:
        mid_net = FakeModule(mid_net)
    for i in range(len(loss_fn)):
        loss_fn[i] = FakeModule(loss_fn[i])
    opt = get_opt(net, mid_net, loss_fn, args)

    inqueues = []
    outqueues = []

    plist = []
    for i in range(args.worker):
        recv_p, send_p = Pipe(False)
        recv_p2, send_p2 = Pipe(False)
        inqueues.append(send_p)
        outqueues.append(recv_p2)
        plist.append(
            Process(target=worker,
                    args=(recv_p, send_p2, loaders[i], args, i)))
        plist[-1].start()

    _logger.warning('Training begins')
    train(inqueues, outqueues, net, mid_net, loss_fn, opt, loaders, args,
          _logger)
    loader.destruct()
    for p in plist:
        p.terminate()
    for loader in loaders:
        loader.destruct()
    _logger.warning('Training ends')
Exemplo n.º 8
0
    def __init__(self, config, path, train_idx, test_idx):

        self.epochs = config.epochs
        self.test_patch_num = config.test_patch_num

        self.model_hyper = models.HyperNet(16, 112, 224, 112, 56, 28, 14,
                                           7).cuda()
        self.model_hyper.train(True)

        self.l1_loss = torch.nn.L1Loss().cuda()

        backbone_params = list(map(id, self.model_hyper.res.parameters()))
        self.hypernet_params = filter(lambda p: id(p) not in backbone_params,
                                      self.model_hyper.parameters())
        self.lr = config.lr
        self.lrratio = config.lr_ratio
        self.weight_decay = config.weight_decay
        paras = [{
            'params': self.hypernet_params,
            'lr': self.lr * self.lrratio
        }, {
            'params': self.model_hyper.res.parameters(),
            'lr': self.lr
        }]
        self.solver = torch.optim.Adam(paras, weight_decay=self.weight_decay)

        train_loader = data_loader.DataLoader(config.dataset,
                                              path,
                                              train_idx,
                                              config.patch_size,
                                              config.train_patch_num,
                                              batch_size=config.batch_size,
                                              istrain=True)
        test_loader = data_loader.DataLoader(config.dataset,
                                             path,
                                             test_idx,
                                             config.patch_size,
                                             config.test_patch_num,
                                             istrain=False)
        self.train_data = train_loader.get_data()
        self.test_data = test_loader.get_data()
def rnn_generator(split, batch_size, n_epochs=1, test=0, partition=0):
    '''
    Put data into a format for RNN training or prediction
    '''
    if test:
        dl = data_loader.DataLoader(splits,
                                    test=True,
                                    local_test_data_dir='../..')
    else:
        dl = data_loader.DataLoader(splits, test=False)
    for ecal, target in dl.batch_iter(partition, batch_size, n_epochs):
        flat = np.array([[x[:, :, i].flatten() for i in range(x.shape[-1])]
                         for x in ecal])
        X = np.log(1 + flat[:, :-1, :])
        Y = np.log(1 + flat[:, 1:, :])
        P = np.zeros((Y.shape[0], 2))
        P[np.arange(P.shape[0]), np.array([int(t[0]) for t in target])] = 1
        M = np.array([t[1] for t in target])
        X_dict = {'X_input': X, 'P_input': P, 'M_input': M}
        Y_dict = {'output': Y}
        yield (X_dict, Y_dict)
Exemplo n.º 10
0
def data_generator(args, partition=0):
    splits = args['splits']
    batch_size = args['batch_size']
    test = args['test']
    cropped_width = args['cropped_width']
    log_data = args['log_data']  # TODO
    averaged = args['averaged_data']

    # compute normalization stddev based on width, logging, and averaging
    stddev = STDDEV_MAP[cropped_width, log_data, averaged]

    if averaged:
        data_loader._SCRATCH_DIR = data_loader._AVG_SCRATCH_DIR
        data_loader._FILENAME_REGEX = data_loader._AVG_FILENAME_REGEX
    if test:
        assert not averaged, 'averaged should not be true when testing'
        dl = data_loader.DataLoader(splits,
                                    test=True,
                                    local_test_data_dir='../..')
    else:
        dl = data_loader.DataLoader(splits, test=False)

    for ecals, targets in dl.batch_iter(partition, batch_size, num_epochs=1):
        if cropped_width < data_loader.DATA_DIM[0]:
            ecals = data_loader.truncate_ecals(ecals,
                                               (cropped_width, cropped_width))
        if log_data:
            ecals = data_loader.log_ecals(ecals)
        # normalize using the stddev computed above based on width+logging+averaging
        ecals = data_loader.normalize_ecals(ecals, TRAIN_MEAN, stddev)
        ecals = data_loader.unroll_ecals(ecals)
        # NOTE this is needed to make it fit with the model architecture
        ecals = np.expand_dims(ecals, axis=1)
        particle_types = np.array([y[0] for y in targets])
        input_energies = np.array([y[1] for y in targets])
        yield (ecals, particle_types, input_energies)
Exemplo n.º 11
0
    def __init__(self):
        # --------------------------------------------
        # 데이터로드
        # --------------------------------------------
        self.d_loader = data_loader.DataLoader()

        # 단어 딕셔너리 생성
        self.word_to_index, self.index_to_word = self.d_loader.set_word_dic()

        # 모델로드
        build = test_models.BuildModel(2345, self.d_loader.embedding_dim,
                                       self.d_loader.lstm_hidden_dim)

        model = build.train_model()
        model.load_weights("./datasets/seq2seq_model.h5")

        self.encoder_model, self.decoder_model = build.predict_model()
Exemplo n.º 12
0
def LabelImages():
    config = config.Config()

    loader = data_loader.DataLoader(config.DataFolder, config.LabelFile)
    _, _, to_label_filenames = data_loader.LoadUnlabeledImages()
    to_label = set(to_label_filenames)

    print("To label count: ", len(to_label))
    print("Already labeled count:", len(already_labeled))

    # Randomness of samples is contingent on the behavior of set.pop(). (I \
    # haven't looked it up).
    while len(to_label) > 0:
        image_filename = to_label.pop()
        DisplayImage(image_filename)
        money, tech_points = PromptUserForAnnotation()
        WriteAnnotationToFile(image_filename, money, tech_points,
                              kLabelingFile)
Exemplo n.º 13
0
 def __init__(self, file_or_dict):
     file_h5, file_json = None, None
     if type(file_or_dict) is str:
         file_ = file_or_dict
         if '.json' in file_:
             file_json = file_
         elif '.h5' in file_:
             file_h5 = file_
         else:
             raise ValueError(file_)
         self.mag_data = data_loader.DataLoader(file_json=file_json,
                                                file_h5=file_h5)
         self.data_type = 0
     elif type(file_or_dict) is dict:
         self.data_type = 1
         self.mag_data = file_or_dict
     else:
         raise ValueError(type(file_or_dict))
Exemplo n.º 14
0
 def _LoadRight(self):
   # Load right points and mark each of it as 'right point' to be able
   # later to separate them after binning.
   cust_attrs_to_set = {_DATASET_TYPE_CUSTOM_ATTRIBUTE_NAME: _RIGHT_DATASET}
   self._all_right_points_by_cluster_id = data_loader.DataLoader(
        _RIGHT_FILENAME, 
        num_first_rows_to_skip=
        _NUM_FIRST_ROWS_TO_SKIP_IN_THE_DATA_FILES,
        line_separator=_DATA_FILES_LINE_SEPARATOR,
        x_column=_DATA_FILES_X_COLUMN,
        y_column=_DATA_FILES_Y_COLUMN,
        cluster_id_column=_DATA_FILES_CLUSTER_ID_COLUMN,
        cluster_ids_to_exclude={0, -1000},
        columns_separator_regex=_COLUMNS_SEPARATOR_REGEX
   ).LoadAndReturnPointsDividedByClusterId(
       point_custom_attributes=cust_attrs_to_set)
   print 'Right points are loaded. Clusters are %s' % (', ').join(
       [str(s) for s in self._all_right_points_by_cluster_id.iterkeys()])
Exemplo n.º 15
0
def main():

    current_th = min_th
    rate_list_auto = []
    interval_list = []
    rate_list_lin = []

    while current_th < max_th:

        loader = data_loader.DataLoader(validation_folder)
        all_ious = []
        all_itervals = []
        while (True):
            imgs, gts = loader.get_next()

            if imgs == None:
                break
            # Do the auto tracking
            pred_auto = methods.auto_select(imgs, gts, stride=current_th)
            iou, est_interval = evaluate.evaluate_estimation_iou(
                pred_auto, gts)
            # evaluate the system
            all_ious += iou
            all_itervals.append(est_interval)

            rate_list_auto.append(evaluate.evaluate_accuracy(iou, accuracy_th))

            interval_list.append(1. / est_interval)

            pred_lin = methods.linear_annotation(imgs, gts, stride=current_th)
            iou, est_interval = evaluate.evaluate_estimation_iou(pred_lin, gts)

            rate_list_lin.append(evaluate.evaluate_accuracy(iou, accuracy_th))

            print("Processed data point - ", len(rate_list_auto))

            visualize.visualize_video(imgs, pred_lin, pred_auto, gts)
        current_th += inter_th

        print("Evaluating for TH = ", current_th)

    pickle.dump([rate_list_lin, rate_list_auto, interval_list],
                open("save2f.p", "wb"))
Exemplo n.º 16
0
    def get_vector(self, inputs, tokenized_corpus, max_word_num, max_sequence_len):
        loader = data_loader.DataLoader(inputs)
        self.data = pd.DataFrame({'title': loader.title, 'context': loader.context, 'question':loader.question, 'answer_start':loader.answer_start, 'answer_end':loader.answer_end, 'answer_text':loader.answer_text})
            
        self.tokenizer, self.vocabulary = self.create_vocab(tokenized_corpus, max_word_num)
                            
        # tokenization & add tokens, token indexes to columns
        nltk_tokenizer = MosesTokenizer()
        vectors = []
        for i, text_column in enumerate(['context' , 'question']):
            self.data[text_column + '_tk'] = self.data[text_column].apply(lambda i: nltk_tokenizer.tokenize(i.replace('\n', '').strip(), escape=False))
        
            # token to index
            self.data[text_column+'_tk_index'] = self.tokenizer.texts_to_sequences(self.data[text_column + '_tk'].apply(lambda i: ' '.join(i)))
            
            # padding: It returns context, question vectors.
            vectors.append(pad_sequences(self.data[text_column+'_tk_index'], max_sequence_len[i]))

        return vectors
Exemplo n.º 17
0
def predict_test_dataset(model,
                         fruit_label_enum=create_fruit_labels(fruits=("apple",
                                                                      "banana",
                                                                      "mix"))):
    test_spectrum_path = r"dataset/test_spectrum_after5_anal_5000.npy"
    test_labels_path = r"dataset/test_labels_after5_anal_5000.npy"
    test_data_loader = data_loader.DataLoader(
        "test",
        test_spectrum_path=test_spectrum_path,
        test_labels_path=test_labels_path,
        batch_size=1,
        transform=transform)

    for spectrum, labels in test_data_loader.load_data():
        # convert string representation of labels to int
        labels = np.array([fruit_label_enum[label].value for label in labels])
        data_to_predict = spectrum
        amount_of_data = 1
        if transform:
            data_to_predict = np.reshape(data_to_predict, (-1, 1))
            data_to_predict = transform(data_to_predict).reshape(
                amount_of_data, 1, 2, -1)
        else:
            data_to_predict = torch.from_numpy(
                data_to_predict.reshape(amount_of_data, 1, 2, -1))

        for spectrum in data_to_predict:
            # if transform:
            #     spectrum = transform(spectrum).reshape(1, 1, 2, -1)
            # else:
            spectrum = spectrum.view(1, 1, 2, -1)

            # Run the spectrum through the model
            outputs = model(Variable(spectrum.float()))

            # Brings us probabilities
            outputs = torch.nn.functional.softmax(outputs, dim=1)

            # Get prediction and the confidence (probability) by taking the maximal value of the outputs
            confidence, prediction = torch.max(outputs.data, 1)
            if prediction != labels[0]:
                print("False prediction")
Exemplo n.º 18
0
def test_model():
    count = 0

    video_g_net = VideoGANGenerator()
    video_g_net.load_state_dict(torch.load(MODEL_FILEPATH))
    video_g_net.eval()

    max_size = len(os.listdir('train'))
    pacman_dataloader = data_loader.DataLoader('train', min(max_size, 500000),
                                               16, 32, 32, 4)
    clips_x, clips_y = pacman_dataloader.get_train_batch()
    clips_x = torch.tensor(np.rollaxis(clips_x, 3, 1))
    clips_y = torch.tensor(np.rollaxis(clips_y, 3, 1))

    # batch_size x noise_size x 1 x 1
    batch_size = 16
    noise_size = 100

    video_images = video_g_net(clips_x)

    save_samples(video_images, count, "test_model")
def my_generator(split,
                 batch_size,
                 slice_start=0,
                 slice_end=24,
                 n_epochs=1,
                 test=False,
                 partition=0):
    '''Wrapper on data_loader.DataLoader to yield batches that only grab particular slice&target
       This wrapper plays nicely with Keras' model.fit_generator() functionality
       Notes:
       1) split=[train, valid, test] where train+valid+test=100, can have arbitrary number of partitions
       2) partition_batch_iter(N_batch, N_epochs,parition_index) ; set N_epochs to 1 if using Keras' fit_generator'''
    dl = data_loader.DataLoader(split, test=test)
    for ecal, target in dl.batch_iter(partition, batch_size, n_epochs):
        X = np.array([
            np.expand_dims(x[:, :, slice_start:slice_end + 1].mean(axis=2),
                           axis=0) for x in ecal
        ])

        #save the particle type, which is y[0], not the momentum
        Y = np.array([y[0] for y in target])
        yield (X, Y)
def getData():
    dataloader = data_loader.DataLoader()
    files = os.listdir(INPUT_DIR)
    files.sort()
    result = [["file", "avg", "median"]]
    for file in files[4:5]:
        count = 0
        total_interval = 0
        interval_list = []
        if file.endswith(".csv"):
            data = dataloader.get_data(os.path.join(INPUT_DIR, file))
            for idx in range(1, len(data) - 1):
                delta_t1 = datetime.datetime.strptime(data[idx][1], "%Y-%m-%d %H:%M:%S")
                delta_t2 = datetime.datetime.strptime(data[idx + 1][1], "%Y-%m-%d %H:%M:%S")
                interval = (delta_t2 - delta_t1).seconds
                interval_list.append(interval)
                if interval < 1000 and interval != 0:
                    total_interval += interval
                    count += 1
            print(file, "avg:{}".format(total_interval / count), "median:{}".format(np.median(interval_list)))
            result.append([file, total_interval / count, np.median(interval_list)])
    dataloader.write_to_file(result, './exp-time-interval/interval.txt', split=' ')
def getGaussainDistribute():
    dataloader = data_loader.DataLoader()
    files = os.listdir(INPUT_DIR)
    files.sort()
    count = 0
    total_interval = 0

    for file in files[:]:
        interval_list = []
        if file.endswith(".csv"):
            data = dataloader.get_data(os.path.join(INPUT_DIR, file))
            for idx in range(1, len(data) - 1):
                delta_t1 = datetime.datetime.strptime(data[idx][1], "%Y-%m-%d %H:%M:%S")
                delta_t2 = datetime.datetime.strptime(data[idx + 1][1], "%Y-%m-%d %H:%M:%S")
                interval = (delta_t2 - delta_t1).seconds
                if interval < 200 and interval != 0:
                    total_interval += interval
                    count += 1
                    interval_list.append(interval)

            print(file, "avg:{}".format(total_interval / count), "median:{}".format(np.median(interval_list)))
            dataPlot(interval_list, total_interval / count)
Exemplo n.º 22
0
def test_DataLoaderMultipleImages(fs):
    kLabeledImage = "labeled.png"
    kUnlabeledImage = "unlabeled.png"
    kLabelPath = "labels.txt"
    kDataFolder = "./"

    images_dim = (10, 5)  # (H, W)

    Image.new("RGB", images_dim[::-1]).save(kLabeledImage)  # (W, H)
    Image.new("RGB", images_dim[::-1]).save(kUnlabeledImage)  # (W, H)

    label_writer = data_loader.LabelWriter(kLabelPath)
    label_writer.WriteLabel(kLabeledImage, 0, 0)

    loader = data_loader.DataLoader(kDataFolder, kLabelPath)
    labeled_images, _, _ = loader.LoadLabeledImages()
    unlabeled_images, _, _ = loader.LoadUnlabeledImages()

    # Test that LoadLabeledImages gave us our labeled image
    assert labeled_images[0].shape == (3, *images_dim)
    # Test that LoadUnlabedImages gave us our unlabeled image
    assert unlabeled_images[0].shape == (3, *images_dim)
Exemplo n.º 23
0
    def __init__(self, glo_params, vid_params, rn):
        self.netZ = model_video_orig._netZ(glo_params.nz, vid_params.n)
        self.netZ.apply(
            model_video_orig.weights_init)  # init the weights of the model
        self.netZ.cuda()  # on GPU
        self.rn = rn
        self.lr = 0.01
        self.data_loader = data_loader.DataLoader()
        self.netG = model_video_orig.netG_new(glo_params.nz)
        self.netG.apply(model_video_orig.weights_init)
        self.netG.cuda()

        num_devices = torch.cuda.device_count()
        if num_devices > 1:
            print("Using " + str(num_devices) + " GPU's")
            for i in range(num_devices):
                print(torch.cuda.get_device_name(i))
            self.netG = nn.DataParallel(self.netG)

        if load:  #Load point
            self.load_weights(counter, self.rn)

        self.vis_n = 100
        fixed_noise = torch.FloatTensor(self.vis_n, glo_params.nz).normal_(
            0, 1)  # for visualize func - Igen
        self.fixed_noise = fixed_noise.cuda()
        self.nag_params = glo_params
        self.vid_params = vid_params
        self.blockResnext = 101

        if VGG:
            self.dist_frame = utils.distance_metric(64, 3, glo_params.force_l2)
        elif LAP:
            self.lap_loss = lap.LapLoss(max_levels=3)
        else:
            self.dist = perceptual_loss_video._resnext_videoDistance(
                self.blockResnext)
def process_train_load_modeling(athletes_name):
    loader = data_loader.DataLoader()
    data_set = loader.load_merged_data(athletes_name=athletes_name)
    sub_dataframe_dict = utility.split_dataframe_by_activities(data_set)
    best_model_dict = {}

    for activity, sub_dataframe in sub_dataframe_dict.items():
        utility.SystemReminder().display_activity_modeling_start(activity)
        sub_dataframe_for_modeling = sub_dataframe[sub_dataframe['Training Stress Score®'].notnull()]
        if sub_dataframe_for_modeling.shape[0] > 20:
            general_features = utility.FeatureManager().get_common_features_among_activities()
            activity_specific_features = utility.FeatureManager().get_activity_specific_features(activity)
            features = [feature for feature in general_features + activity_specific_features
                        if feature in sub_dataframe.columns
                        and not sub_dataframe[feature].isnull().any()]   # Handle columns with null

            def select_best_model():
                min_mae, best_model_type, best_regressor = float('inf'), '', None
                for model_class in [ModelLinearRegression, ModelNeuralNetwork, ModelRandomForest, ModelXGBoost, ModelAdaBoost]:
                    model_type = model_class.__name__[5:]
                    print('\nBuilding {}...'.format(model_type))
                    builder = model_class(sub_dataframe_for_modeling, features)
                    mae, regressor = builder.process_modeling()
                    if model_type != 'NeuralNetwork':
                        utility.save_model(athletes_name, activity, model_type, regressor)
                        if mae < min_mae: min_mae, best_model_type, best_regressor = mae, model_type, regressor
                print("\n***Best model for activity '{}' is {} with mean absolute error: {}***"
                  .format(activity, best_model_type, min_mae))
                if best_regressor is not None:
                    best_model_dict[activity] = best_model_type

            select_best_model()
            utility.SystemReminder().display_activity_modeling_end(activity, True)

        else:
            utility.SystemReminder().display_activity_modeling_end(activity, False)
    utility.update_trainload_model_types(athletes_name, best_model_dict)
Exemplo n.º 25
0
def test_DataLoaderSingleImage(fs):
    kImagePath = "test.png"
    kLabelPath = "labels.txt"
    kDataFolder = "./"
    kImageCrop = (0, 0, 40, 20)  # (x0, y0, x1, y1)
    kMoney = 10
    kTechPoints = 20

    image = Image.new("RGB", (80, 60))  # Note the format here is (W, H) which
    # is the opposite of torch.
    image.save(kImagePath)

    # Create the label file
    with open(kLabelPath, 'w+'):
        pass

    loader = data_loader.DataLoader(kDataFolder, kLabelPath, kImageCrop)
    unlabeled_images, _, _ = loader.LoadUnlabeledImages()
    unlabeled_image = unlabeled_images[0]

    label_writer = data_loader.LabelWriter(kLabelPath)
    label_writer.WriteLabel(kImagePath, kMoney, kTechPoints)

    labeled_images, labels, labeled_filenames = loader.LoadLabeledImages()
    label = labels[0]
    labeled_image = labeled_images[0]

    np.testing.assert_array_equal(unlabeled_image, \
                                  labeled_image)

    # Test that we can load labels
    assert int(label["money"]) == kMoney
    assert int(label["tech_points"]) == kTechPoints

    # Test that the crop worked
    assert labeled_image.shape == (3, 20, 40)  # (C, H, W)
Exemplo n.º 26
0
            self.cuda()
        else:
            self.load_state_dict(
                torch.load(path, map_location=lambda storage, loc: storage))
            self.cpu()


if __name__ == "__main__":
    import data_loader
    import os
    import utils
    import argparse

    use_cuda = torch.cuda.is_available()
    corpus = torch.load(os.path.join(const.DATAPATH, "corpus.pt"))
    dl = data_loader.DataLoader(const.DATAPATH, corpus["word2idx"], cuda=False)
    doc = dl.sample_data()[0]

    parser = argparse.ArgumentParser()
    parser.add_argument('--batch_size', type=int, default=100)
    parser.add_argument('--max_len', type=int, default=500)
    parser.add_argument('--span_len', type=int, default=4)
    parser.add_argument('--d_model', type=int, default=512)
    parser.add_argument('--pos_dim', type=int, default=20)
    parser.add_argument('--n_head', type=int, default=8)
    parser.add_argument('--rnn_hidden_size', type=int, default=128)
    parser.add_argument('--dropout', type=float, default=0.5)

    args = parser.parse_args()

    args.word_ebd_weight = corpus["wordW"]
Exemplo n.º 27
0
from keras.layers import LSTM
import numpy as np
import cPickle
from keras.models import Sequential
import data_loader as dl
from keras.layers import Dense
from keras.layers import Masking
from keras.layers.wrappers import TimeDistributed
from keras.preprocessing.sequence import pad_sequences

data = dl.DataLoader()

X, Y, m = data.load()

X_pad = pad_sequences(X, maxlen=m, padding='post')
Y_pad = pad_sequences(Y, maxlen=m, padding='post')

sample_weights = np.ones((273, m))
for i in xrange(273):
    for j in xrange(m):
        if (X_pad[i][j] == np.zeros(12)).all():
            sample_weights[i][j] = 0

model = Sequential()
accuracies = dict()
for i in range(1, 200, 20):
    mask = np.zeros(12)
    model.add(Masking(mask_value=mask, input_shape=(m, 12)))
    model.add(LSTM(i, return_sequences=True, dropout_W=0.1, dropout_U=0.1))
    model.add(TimeDistributed(Dense(12, activation="softmax")))
    model.compile(optimizer='rmsprop',
    flatten = Flatten()(merged)
    drop = Dropout(0.5)(flatten)
    outputs = Dense(y_train.shape[1], activation='softmax')(drop)

    model = Model(inputs, outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    print(model.summary())

    return model

if __name__ == "__main__":
    config = argparser()
    
    current =datetime.datetime.now().strftime("%Y%m%d%H%M%S")
    loader = data_loader.DataLoader(config.corpus_tk, config.trained_word_vector, config.score_corpus)
    loader.load_data()

    x_train, y_train = loader.train
    x_test, y_test = loader.test
    
    # build model
    model = build_model()
    
    # training
    hist = model.fit(x_train, y_train,
                     epochs = config.epoch,
                     batch_size = config.batch_size,
                     validation_data=(x_test, y_test), verbose=2)
    
    # evaluation: confusion matrix & roc curve
Exemplo n.º 29
0
from datetime import datetime
import data_loader
import numpy as np

dl = data_loader.DataLoader(file_json='/storage/data_2020-02-03/2020-02-03.json1')


tt0 = list(dl.values())[0][0,0]
tt = datetime.fromtimestamp(tt0)
tt1 = datetime.now()


# Timezone seems to be correct...

last_quad = dl['SARBD02-MQUA030:I-SET']
#sarbd01_quad = dl['SARBD01-MQUA020:I-SET']

eduard_optics1 = {
        #'SARUN15.MQUA080.Q1.K1': -9.672893217266694e-01,
        #'SARUN16.MQUA080.Q1.K1': -2.443535112150988e+00,
        #'SARUN17.MQUA080.Q1.K1': +1.608546947532094e+00,
        'SARUN18.MQUA080.Q1.K1': +1.360154558769963e+00,
        'SARUN19.MQUA080.Q1.K1': -1.495693035627149e+00,
        'SARUN20.MQUA080.Q1.K1': -1.072774681910800e+00,
        'SARBD01.MQUA020.Q1.K1': -1.136049185308167e-01,
        }

eduard_optics2 = {
        #'SARUN15.MQUA080.Q1.K1': -2.810125636006008e-01,
        #'SARUN16.MQUA080.Q1.K1': -1.820840559288582e+00,
        #'SARUN17.MQUA080.Q1.K1': +1.581672326954900e+00,
Exemplo n.º 30
0
    run_config = tf.estimator.RunConfig(model_dir=hp.model_dir,
                                        save_checkpoints_secs=None,
                                        save_checkpoints_steps=hp.save_steps,
                                        keep_checkpoint_max=hp.max_save,
                                        )
    return run_config


def id2sentence(ids_list, vocdict):
    # 将ids转化为句子

    sentence = ""
    for i in ids_list:
        if i == 0 or i == 3:
            return sentence
        sentence += vocdict[i]
    sentence+="\n"
    return sentence

def get_batch_size():
    if hp.model_mode == "train":
        return hp.batch_size
    else:
        return hp.batch_size
if __name__ == "__main__":
    vocdict = data_loader.DataLoader().voacb_list
    a = [[5, 192, 344, 23, 343, 4324, 432, 0]]
    print(id2sentence(a, vocdict))