Exemplo n.º 1
0
def main(args: argparse.Namespace):
    train_arr, test_arr = Data.read_pickle(args.pickle,
                                           'train'), Data.read_pickle(
                                               args.pickle, 'test')
    train = DataLoader(Data(train_arr, args.dev),
                       batch_size=args.bsize,
                       num_workers=args.workers,
                       shuffle=True,
                       collate_fn=Data.collate_fn)
    test = DataLoader(Data(test_arr, args.dev, train.dataset.mean,
                           train.dataset.std),
                      batch_size=args.bsize,
                      num_workers=args.workers,
                      shuffle=True,
                      collate_fn=Data.collate_fn)
    model = PixelCnn(train.dataset.W, train.dataset.C, args.kernel_size,
                     args.layers, args.filters, args.dist_size,
                     getattr(models, args.conv_class))
    if os.path.exists(args.save_path):
        model.load(args.save_path, args.dev)
    elif args.dev == 'cuda':
        model.cuda()

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    nlls_train, nlls_test = training(train, test, model, optimizer,
                                     args.epochs, args.save_path)
    samples = model.generate_samples(args.n_samples, args.dev,
                                     train.dataset.mean, train.dataset.std)

    save_training_plot(nlls_train, nlls_test, 'NLL (nats/dim)',
                       args.nll_img_path)
    show_samples(samples.cpu().numpy(), args.samples_img_path)
Exemplo n.º 2
0
    def setUp(self):
        self.filename1 = 'car.c45-names.txt'  #attributes
        self.filename2 = 'car.data'  # data examples

        self.dataset = Data()
        self.dataset.attr_file = self.filename1
        self.dataset.data_file = self.filename2

        self.dataset.read_attr_data()
        self.dataset.read_examples_data()
Exemplo n.º 3
0
def load_data(train_path, val_path, glove_path):
    data = Data(train_path, val_path, glove_path)
    train_x_list, _, val_x_list, _ = data.split_sentence()
    data.build_vocab()
    orig_data = train_x_list + val_x_list
    train_data = get_train_data(data.vocab, orig_data)

    print("数据实例个数: {}".format(len(train_data)))

    vocab_size = len(data.vocab) + 1
    print("词表长度为:", vocab_size)

    dist = np.array([v for k, v in data.word_freq.items()])
    dist = np.power(dist, 0.75)
    dist = dist / dist.sum()

    return train_data, data.vocab, vocab_size, dist
Exemplo n.º 4
0
def main(args):
    train_dataset = Data(args.train_manifest)
    train_loader = data_utils.DataLoader(train_dataset, args.batch_size)

    eval_loader = None
    if args.eval_manifest:
        eval_dataset = Data(args.eval_manifest)
        eval_loader = data_utils.DataLoader(eval_dataset, args.batch_size)
    
    net = models_dict[args.model](args)

    if args.train:
        net.train(train_loader, eval_loader)
    elif args.eval:
        net.eval()
    else:
        net.predict()
Exemplo n.º 5
0
def test0():

    # Generate dataset
    np.random.seed(125)
    random.seed(239)
    n = 100

    model_desc = semopy.model_generation.generate_desc(n_lat=0,
                                                       n_endo=1,
                                                       n_exo=2,
                                                       n_inds=3,
                                                       n_cycles=0)
    params, aux = semopy.model_generation.generate_parameters(model_desc)
    data_gen = semopy.model_generation.generate_data(aux, n)
    data_gen.index = [f's{i}' for i in range(n)]

    # generate random effects
    group1 = pd.DataFrame(data={'group1': np.random.binomial(1, 0.5, size=n)})
    group1.index = [f's{i}' for i in range(n)]

    model_desc = """
    x1 ~ g1 + g2 + group1
    """
    #
    # model_desc = """
    # x1 ~ g1 + g2
    # """

    data_gen['x1'] = data_gen['x1'] + 10 * group1['group1']
    data_gen = concat([data_gen, group1], axis=1)

    data = Data(d_phens=data_gen, show_warning=False)

    model = mtmlModel(model_desc=model_desc, data=data)
    model.opt_bayes()

    print(model.unnormalize_params())

    # -----------------------------------
    # semopy
    sem_old = Model(model_desc)
    sem_old.fit(data_gen)
    print('semopy')
    insp = sem_old.inspect()
    insp = insp.loc[insp['op'] == '~', ['lval', 'rval', 'Estimate']]
    print(insp)

    # semba
    semba_model = semba.Model(model_desc)
    semba_model.fit(data_gen, num_samples=1000)
    print('semba')
    insp = semba_model.inspect()
    insp = insp.loc[insp['op'] == '~', ['lval', 'rval', 'Estimate']]
    print(insp)
    print('params')
    print(params)

    return model_desc, data, model
Exemplo n.º 6
0
 def val_dataloader(self):
     d_params = Data.parameters
     d_params.update(self.args.dparams_override)
     test_dataset = Data(json_path=self.args.valid_file, **d_params, valid=True)
     return DataLoader(dataset=test_dataset,
                         batch_size=self.args.batch_size,
                         num_workers=self.args.data_workers,
                         collate_fn=collate_fn_padd,
                         pin_memory=True)
Exemplo n.º 7
0
 def train_dataloader(self):
     d_params = Data.parameters
     d_params.update(self.args.dparams_override)
     train_dataset = Data(json_path=self.args.train_file, **d_params)
     return DataLoader(dataset=train_dataset,
                         batch_size=self.args.batch_size,
                         num_workers=self.args.data_workers,
                         pin_memory=True,
                         collate_fn=collate_fn_padd)
def main(args):
    # ===============================================
    # Build Data Loader
    # ===============================================
    train_transformer = transformer.Compose([
        transformer.RandomHorizontalFlip(),
        transformer.RandomScaleCrop(),
        transformer.ArrayToTensor(),
        transformer.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    train_dataset = Data('train',
                        transformer = train_transformer
                        seed = args.seed,
                        train = True,
                        seq_length = 3)
    valid_transformer = transformer.Compose([transformer.ArrayToTensor(), normalize])
    val_dataset = Data('val',
                        transformer = valid_transformer
                        seed = args.seed,
                        train = True,
                        seq_length = 3)
    train_loader = DataLoader(train_dataset,
                            batch_size = args.batch_size,
                            shuffle = True,
                            num_workers = cfg.workers,
                            pin_memory = True)
    val_loader = DataLoader(val_dataset,
                            batch_size = args.batch_size,
                            shuffle = True,
                            num_workers = cfg.workers,
                            pin_memory = True)


    # ===============================================
    # Build Model
    # ===============================================
    model = Net()
    model = model.cuda()
    if args.load:
        pass
    else:
        model.init_weights()
    
    model = DataParallel(model)
Exemplo n.º 9
0
def data_import(feature_data, label_data):
    with open(feature_data, "r") as fi:
        feature_list = fi.readlines()
    with open(label_data, "r") as fi:
        label_list = fi.readlines()
    data_set = []
    for i in range(feature_list.__len__()):
        data = Data(list(feature_list[i]), list(label_list[i])[0:1])
        data_set.append(data)
    return Dataset(data_set)
Exemplo n.º 10
0
def get_dataset(model_block, model, year=2018):
    global THRESHOLD

    kanapki = []
    d = Data(year)

    a = d.load_dataset()
    b = a[:,0]
    model.mi=np.nanmean(b, axis=(0,1,2)).reshape(1,1,7)
    model.sigma=np.nanstd(b, axis=(0,1,2)).reshape(1,1,7)
    # print(b.shape, np.nanmean(a[:,1,:,:,0]))
    # for x, y in a:
    #     if np.nanmean(y[:,:,0]) > 0.8:
    #         print(np.nanmean(x[:,:,0]), np.nanmean(y[:,:,0]))

    for x, y in tqdm(a):
        x_map = model_block.get_input(x)
        kanapki.append(
            Kanapka(
                model_block=model_block,
                features=x_map,
                label=model_block.get_output(y),
            ))
        # FIXME: add [[augment]]
        """try:
            for _ in range(4):
                x_copy = x_map
                x_map = np.swapaxes(x_map, 0, 1)
                if np.array_equal(x_copy, x_map):
                    break
                kanapki.append(
                    Kanapka(
                        model_block=model_block,
                        features=x_map,
                        label=model_block.get_output(y),
                    ))
        except:
            pass"""

    return Dataset(model_block=model_block,
                   kanapki=kanapki,
                   threshold=THRESHOLD, model=model)
Exemplo n.º 11
0
def data_import(feature_data, label_data, bit):
    with open(feature_data, "r") as fi:
        feature_list = fi.readlines()
    with open(label_data, "r") as fi:
        label_list = fi.readlines()
    data_set = []
    for i in range(feature_list.__len__()):
        data = Data(list(feature_list[i]), [list(
            label_list[i])[bit]])  # list(label_list[i])[0:1] 代表的是输出数据第0位构成的列表
        data_set.append(data)
    return Dataset(data_set)
Exemplo n.º 12
0
def main(input_filepath, output_filepath, interim_filepath, config_filepath,
         tokenization):
    """
    Turns raw data locate in .../raw into processed data locate in .../processed

    Keyword arguments:
    input_filepath -- filepath of raw/input data
    output_filepath -- filepath to put processed data
    interim_filepath -- filepath to folder to save interim/intermediary file
    config_filepath -- filepath to config_file used to set options executions
    tokenization -- columns to apply operation tokenization
    save_interim_files -- save each operation to separate files (append operation into final's filename)
    """
    logger = logging.getLogger(__name__)
    if interim_filepath is not None:
        logger.info("Making final data set from interim data: {0}".format(
            str(interim_filepath)))
    else:
        logger.info("Making final data set from raw data: {0}".format(
            str(input_filepath)))

    logger.info("Processed file will be save in: {0}".format(
        str(output_filepath)))

    with open(config_filepath, "r") as stream:
        try:
            config_data = yaml.safe_load(stream)
        except yaml.YAMLError as exc:
            print(exc)

    # Manage arguments passed by user with config file yaml
    manage_arguments(config_data, tokenization, input_filepath,
                     output_filepath, interim_filepath)

    # Load data
    data = Data(tokenization, input_filepath, output_filepath,
                interim_filepath)

    # Apply operations in file
    data.apply_operations()
Exemplo n.º 13
0
    def __init__(self, args):
        self.args = args

        data = Data(args.train_path, args.val_path, args.glove_path)
        data.build_vocab()
        train_data, val_data = data.input2tensor()
        embedding_matrix = data.build_embedding_matrix(args.embed_type,
                                                       args.embed_dim)
        train_dataset = MyDataset(train_data, data.max_len)
        val_dataset = MyDataset(val_data, data.max_len)

        self.train_dataloader = DataLoader(train_dataset,
                                           batch_size=args.batch_size,
                                           shuffle=True)
        self.val_dataloader = DataLoader(val_dataset,
                                         batch_size=args.batch_size,
                                         shuffle=False)

        if args.model_type == 'CNN':
            self.model = CNNModel(args, data.vocab_size,
                                  embedding_matrix).to(args.device)
        else:
            self.model = LSTMNet(args, data.vocab_size,
                                 embedding_matrix).to(args.device)

        self.loss_func = nn.CrossEntropyLoss()
        self.optim = torch.optim.Adam(self.model.parameters(),
                                      lr=args.learning_rate)

        if torch.cuda.is_available():
            print('cuda memory allocated:',
                  torch.cuda.memory_allocated(device=args.device.index))
Exemplo n.º 14
0
def process_user_inputs(classifier):
    while True:
        print("\nGive your inputs to validate or press ENTER to continue")
        values = classifier.get_data_size()
        data = []
        for i in range(values):
            text = 'Enter value %s: ' % (i + 1)
            _data = input(text)
            if not _data:
                return
            data.append(float(_data))
        predicted_value = classifier.predict(Data(data, None))
        print('Predicted value: %s' % predicted_value)
Exemplo n.º 15
0
def test1():

    # Generate dataset
    np.random.seed(125)
    random.seed(239)
    n = 100

    model_desc = semopy.model_generation.generate_desc(n_lat=1,
                                                       n_endo=0,
                                                       n_exo=0,
                                                       n_inds=3,
                                                       n_cycles=0)
    # show(model_desc)
    params, aux = semopy.model_generation.generate_parameters(model_desc)
    data_gen = semopy.model_generation.generate_data(aux, n)
    data_gen.index = [f's{i}' for i in range(n)]

    model_desc = """
    eta1 =~ y1 + y2 + y3
    """

    data = Data(d_phens=data_gen)

    model = mtmlModel(model_desc=model_desc, data=data)
    model.show_mod()
    # self = model

    model.opt_bayes()

    print(model.unnormalize_params())

    # -----------------------------------
    # semopy
    sem_old = Model(model_desc)
    sem_old.fit(data_gen)
    print('semopy')
    insp = sem_old.inspect()
    insp = insp.loc[insp['op'] == '~', ['lval', 'rval', 'Estimate']]
    print(insp)

    # semba
    semba_model = semba.Model(model_desc)
    semba_model.fit(data_gen, num_samples=1000)
    print('semba')
    insp = semba_model.inspect()
    insp = insp.loc[insp['op'] == '~', ['lval', 'rval', 'Estimate']]
    print(insp)
    print('params')
    print(params)

    return model_desc, data, model
 def __init__(self, path: str):
     """
      Args:
         path: str,数据文件路径
     """
     self.train = list()
     self.test = list()
     self._path = path
     self._base = os.path.abspath(".")
     self.split_data(0.5)
     start = time.perf_counter()
     self.data = Data(self.train, self.test)
     end = time.perf_counter()
     print("读数据,建立对象用时 {0:10}".format(end - start))
Exemplo n.º 17
0
def calc_gain(dataset, entropy, val, attr_index):
    attr_entropy = 0
    total_examples = len(dataset.examples)

    # count for dataset class
    class_count = count_class_values(dataset.examples)
    new_dataset = Data()
    for example in dataset.examples:
        if (example[attr_index] == val):
            new_dataset.examples.append(example)

    attr_entropy = len(
        new_dataset.examples) / total_examples * calc_entropy(new_dataset)

    return attr_entropy
Exemplo n.º 18
0
def main():
    config = get_args()
    dataset = Data(config)

    num_features = dataset.features.shape[1]
    num_classes = dataset.labels.max().item() + 1

    model = GCN(config=config, num_features=num_features, num_classes=num_classes)
    solver = Solver(config, model, dataset)

    if torch.cuda.is_available():
        model = model.to('cuda')

    criterion, best_model = solver.train()
    solver.test(criterion, best_model)
Exemplo n.º 19
0
def main():
    p=Path(__file__).parents[0]
    directory = os.path.abspath(os.path.join(p,"gemini_ETHUSD_d.csv"))
    
    data = Data(directory)
    train,test = data.split_data(test_size=0.2)
    numOfDays=20
    x_train,x_test,y_train,y_test = data.prepare_data(train,test,numOfDays)
    
    
    model = Model()   
    #hyperparameters tuning
    epochs = 50
    optimizer='adam'
    loss='mean_squared_error'
    activation ='tanh'
    batch_size = 1
    neurons = 30
    
    
    model.LSTM_model(x_train,activation =activation,optimizer=optimizer,loss=loss,neurons=neurons)
    history = model.train(x_train,y_train,x_test,y_test,epochs=epochs,batch_size=batch_size)



    targets = test['Close'][numOfDays:]
    preds = model.predict(x_test).squeeze()
    
    print('MAE: ',mean_absolute_error(preds,y_test))
    
    
    preds = test['Close'].values[:-numOfDays] * (preds + 1)
    preds = pd.Series(index=targets.index, data=preds)
    
    line_plot(targets, preds, 'actual', 'prediction', lw=3)
    line_plot(history.history['loss'],history.history['val_loss'],'train loss','test loss',lw=3)
Exemplo n.º 20
0
def main():
    args = sys.argv
    if (len(args) < 2):
        print("You should provide a filename to data.")
        filename1 = 'car.c45-names.txt'  #attributes
        filename2 = 'car.data'  # data examples
    else:
        filename1 = str(sys.argv[0])
        filename2 = str(sys.argv[1])  # data examples

    dataset = Data()
    dataset.attr_file = filename1
    dataset.data_file = filename2

    dataset.read_attr_data()
    dataset.read_examples_data()

    # Proportion training set to testing set (1 means only training set)
    PROPORTION = 1

    train_dtset = copy.deepcopy(dataset)
    test_dtset = copy.deepcopy(dataset)
    train_dtset.examples, test_dtset.examples = [], []

    total = len(dataset.examples)

    # polluting train dataset
    train_index_list = random.sample(xrange(total), int(total * PROPORTION))
    train_dtset.examples = [
        dataset.examples[index] for index in train_index_list
        if (dataset.examples[index] not in train_dtset.examples)
    ]

    # polluting test dataset
    test_dtset.examples = [
        ex for ex in dataset.examples if (ex not in train_dtset.examples)
    ]

    print("Computing tree...")
    root = compute_tree(train_dtset, None, None)
    tree_filename = 'results/tree.txt'
    with open(tree_filename, "w") as tree_file:
        write_tree(root, 0, tree_file)
def main():
    log = get_logger(LOG_DIR)

    transform_val = Compose([
        Scale((224, 224)),
        ToTensor(),
        Normalize(mean=[0.45, 0.45, 0.45], std=[0.225, 0.225, 0.225])
    ])

    valset = Data(DATA_DIR, training=False, transform=transform_val)

    val_loader = DataLoader(valset, batch_size=1, num_workers=0)

    log.info('Data loaded.')
    log.info('Val samples:{}'.format(len(val_loader)))

    # set device
    device = torch.device('cpu')
    # torch.manual_seed(SEED)

    log.info('Torch Device:{}'.format(device))

    # set model and optimizer
    net = FCNResNet()
    net.to(device)
    # net.init_weights()

    # pretrained resnet weights
    net.load_state_dict(torch.load('./checkpoints/06.22.23.14.41_ep36_val.pt'),
                        strict=False)

    log.info('Model loaded.')

    for i, data in tqdm(enumerate(val_loader)):
        img, gt = data
        img = img.to(device)
        gt = gt.to(device)
        pred = net(img)
        iou = iou_loss(pred, gt)
        pred[pred < 0.5] = 0
        print(iou.detach_().item())

        pred = pred.detach_().numpy().squeeze(0).squeeze(0)
        pred = Image.fromarray((pred * 255).astype(np.uint8))
        pred.save(f'{OUTPUT_DIR}/{i}.png')
Exemplo n.º 22
0
 def load_data(path: str):
     data = Data()
     with open(path, 'r', encoding='utf-8') as f:
         while True:
             q = f.readline().strip('\n')
             if len(q) <= 0:
                 break
             question = Question(json.loads(q))
             question.parse = ParserTree(f.readline().strip('\n'))
             question.dep = list(DependencyTriple(i[0], i[1], i[2]) for i in json.loads(f.readline().strip('\n')))
             question.ner = json.loads(f.readline().strip('\n'))
             ans_num = int(f.readline().strip('\n'))
             for i in range(ans_num):
                 item = f.readline().strip('\n').split('\t')
                 if len(item) > 1:
                     question.add_answer(json.loads(item[0]), convert_bool(item[1]))
                 else:
                     question.add_answer(json.loads(item[0]))
             data.questions.append(question)
     return data
def main(args):
	device = torch.device(args.device)
	#we first see if there is a model with needed name in our models folder,
	model = torch.load(f"{args.model_folder}/{args.model_name}", map_location=device)
	model.eval()


	data_transforms = transforms.Compose([ToTensor()])
	
	dataset = Data(args.filename_x, args.filename_y, args.data_root,transform=data_transforms)

	output = {"Super_resolution": []}


	for sample in dataset:
		lores = sample['x'].to(device).float()
		print(lores.shape)
		sures = model(lores.unsqueeze(0)).squeeze(0)
		output["Super_resolution"].append(sures.detach().cpu().data.numpy())
	
	savemat(f"{args.model_folder}/{args.filename_out}", output)
Exemplo n.º 24
0
def jsma_craft(surrogate_model, source_samples, target_info):
    import tensorflow as tf
    import numpy as np
    from dataset import Data
    from cleverhans.attacks import SaliencyMapMethod

    with tf.variable_scope(surrogate_model.scope):
        sess = tf.get_default_session()

        jsma = SaliencyMapMethod(surrogate_model.tf(), back='tf', sess=sess)

        one_hot_target = np.zeros((1, 10), dtype=np.float32)
        one_hot_target[0, target_info['target_class']] = 1
        jsma_params = {
            'theta': 1.,
            'gamma': 0.1,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': one_hot_target
        }

        # Loop over the samples we want to perturb into adversarial examples

        adv_xs = []
        for sample_ind in range(len(source_samples)):
            sample = source_samples.getx(sample_ind)

            adv_xs.append(jsma.generate_np(sample, **jsma_params))

            # TODO remove break
            if sample_ind == 2:
                break

        adv_ys = np.concatenate([one_hot_target] * len(adv_xs))

        adv_xs_d = Data(np.concatenate(adv_xs), adv_ys)

        return source_samples, adv_xs_d
Exemplo n.º 25
0
def train():

    # parameters for NN-training
    end_epoch = 20
    batch_size = 16
    n_workers = 8

    # use gpu (if available) otherwise cpu
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    print('Creating Model')

    # copy the NN-model to GPU/CPU
    net = Net().to(device)

    # load dataset
    train_ds = Data(is_train=True)

    #read the data and put into memory
    train_dl = torch.utils.data.DataLoader(train_ds,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=n_workers)

    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    criterion = Loss()

    plot_dict = {
        'total': [],
        'shape': [],
        'color': [],
        'coord': [],
    }

    print('Started Training')
    for epoch in range(end_epoch):

        losses = AverageMeter()

        bar = Bar(f'Epoch {epoch + 1}/{end_epoch}',
                  fill='#',
                  max=len(train_dl))

        for i, data in enumerate(train_dl, 0):

            img, shape, color, coords, _ = data

            img = img.to(device)
            shape = shape.to(device)
            color = color.to(device)
            coords = coords.to(device)

            optimizer.zero_grad()

            outputs = net(img)
            labels = [shape, color, coords]
            loss, loss_dict = criterion(outputs, labels)
            losses.update(loss.item(), img.size(0))
            loss.backward()
            optimizer.step()

            plot_dict['total'].append(loss.item())
            for k, v in loss_dict.items():
                plot_dict[k].append(v.item())

            summary_string = f'({i + 1}/{len(train_dl)}) | Total: {bar.elapsed_td} | ' \
                             f'ETA: {bar.eta_td:} | loss: {losses.avg:.4f}'

            for k, v in loss_dict.items():
                summary_string += f' | {k}: {v:.4f}'

            bar.suffix = summary_string
            bar.next()

        bar.finish()

    print('Finished Training')

    print(f'loss_avg: {losses.avg:.4f}')

    print('Saving model')

    save_plots(plot_dict)

    torch.save(net.state_dict(), MODEL_PATH)
Exemplo n.º 26
0
def test():
    test_ds = Data(is_train=False)
    test_dl = torch.utils.data.DataLoader(test_ds,
                                          batch_size=1,
                                          shuffle=True,
                                          num_workers=1)

    print('Creating Model')
    net = Net()
    net.load_state_dict(torch.load(MODEL_PATH, map_location='cpu'))

    # use gpu (if available) otherwise cpu
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    net = net.to(device).eval()

    result_filename = 'predicted_test_result.csv'
    write_to_file(text='filename,logo-name,x,y,color', file=result_filename)

    color_acc, shape_acc, coord_err = [], [], []

    print('Started Testing')
    with torch.no_grad():
        bar = Bar(f'Test', fill='#', max=len(test_dl))

        for i, data in enumerate(test_dl, 0):
            img, shape, color, coord, fname = data

            img = img.to(device)
            shape = shape.to(device)
            color = color.to(device)
            coord = coord.to(device) * 128

            output = net(img)

            pshape, pcolor, pcoord = output

            _, pshape = torch.max(pshape, 1)
            _, pcolor = torch.max(pcolor, 1)
            pcoord *= 128

            img = img[0]

            pshape, pcolor, pcoord = pshape[0], pcolor[0], pcoord[0]
            tshape, tcolor, tcoord = shape[0], color[0], coord[0]

            pcoord = pcoord.cpu().numpy()
            tcoord = tcoord.cpu().numpy()

            pshape, tshape = label2shape[int(pshape)], label2shape[int(tshape)]
            pcolor, tcolor = label2color[int(pcolor)], label2color[int(tcolor)]

            color_acc.append(pcolor == tcolor)
            shape_acc.append(pshape == tshape)
            coord_err.append(np.absolute(pcoord - tcoord))

            # title for sample images
            title = f'Predicted shape: {pshape}, color: {pcolor}, ' \
                    f'coord: ({int(pcoord[1].round())}/{int(pcoord[0].round())})\n'

            title += f'True shape: {tshape}, color: {tcolor}, ' \
                     f'coord: ({int(tcoord[1])}/{int(tcoord[0])})'

            # print 20 sample images
            if i < 20:
                plt.title(title, y=1.05)
                imshow(img, idx=i)

            ## code for writing to csv and than to xlsx
            fname = fname[0]
            write_to_file(
                f'{fname},'
                f'{pshape},'
                f'{int(pcoord[1].round())},'
                f'{int(pcoord[0].round())},'
                f'{pcolor}', result_filename)

            summary_string = f'Total: {bar.elapsed_td} | ETA: {bar.eta_td:}'

            bar.suffix = summary_string
            bar.next()

        bar.finish()

        read_file = pd.read_csv(result_filename)
        read_file.to_excel('predicted_test_result.xlsx',
                           index=None,
                           header=True)

        # mean error (x,y)
        coord_err = np.array(coord_err)
        mean_error_y = coord_err[0].mean()
        mean_error_x = coord_err[1].mean()

        # std error (x,y)
        std_error_y = coord_err[0].std()
        std_error_x = coord_err[1].std()

        print(f'Mean pixel error x: {mean_error_x:.4f}, y: {mean_error_y:.4f}')
        print(f'Std of error x: {std_error_x:.4f}, y: {std_error_y:.4f}')

        # color accuracy
        color_acc = np.array(color_acc)
        color_acc = (color_acc.sum() / color_acc.shape[0]) * 100
        print(f'Color accuracy: {color_acc:.4f}')

        # shape accuracy
        shape_acc = np.array(shape_acc)
        shape_acc = (shape_acc.sum() / shape_acc.shape[0]) * 100
        print(f'Shape accuracy: {shape_acc:.4f}')
Exemplo n.º 27
0
def main():
    data = Data()
    logistic_regression = models.LogisticRegression()
    neural_network = models.NeuralNet()
    svm = models.SupportVectorMachine(C=1.0, kernel='rbf', gamma='scale')
    random_forest = models.RandomForest(n_estimators=100,
                                        max_depth=None,
                                        random_state=None)

    # Process dataset
    training_data_features, training_data_labels, mnist_test_data_features, mnist_test_data_labels, \
    usps_test_data_features, usps_test_data_labels, combined_test_data_features, combined_test_data_labels = \
        data.pre_process()

    # Logistic Regression
    logistic_regression.fit(training_data_features,
                            training_data_labels,
                            learning_rate=0.01,
                            epochs=500)
    accuracy_mnist, confusion_mnist = logistic_regression.predict(
        mnist_test_data_features, mnist_test_data_labels)
    accuracy_usps, confusion_usps = logistic_regression.predict(
        usps_test_data_features, usps_test_data_labels)
    accuracy_combined, confusion_combined = logistic_regression.predict(
        combined_test_data_features, combined_test_data_labels)
    print_and_plot('Logistic Regression', accuracy_mnist, accuracy_usps,
                   accuracy_combined, confusion_mnist, confusion_usps,
                   confusion_combined)

    # Neural Network
    neural_network.fit(training_data_features, training_data_labels, epochs=10)
    accuracy_mnist, confusion_mnist = neural_network.predict(
        mnist_test_data_features, mnist_test_data_labels)
    accuracy_usps, confusion_usps = neural_network.predict(
        usps_test_data_features, usps_test_data_labels)
    accuracy_combined, confusion_combined = neural_network.predict(
        combined_test_data_features, combined_test_data_labels)
    print_and_plot('Neural Network', accuracy_mnist, accuracy_usps,
                   accuracy_combined, confusion_mnist, confusion_usps,
                   confusion_combined)

    # Support Vector Machine
    svm.fit(training_data_features, training_data_labels)
    accuracy_mnist, confusion_mnist = svm.predict(mnist_test_data_features,
                                                  mnist_test_data_labels)
    accuracy_usps, confusion_usps = svm.predict(usps_test_data_features,
                                                usps_test_data_labels)
    accuracy_combined, confusion_combined = svm.predict(
        combined_test_data_features, combined_test_data_labels)
    print_and_plot('SVM', accuracy_mnist, accuracy_usps, accuracy_combined,
                   confusion_mnist, confusion_usps, confusion_combined)

    # Random Forest
    random_forest.fit(training_data_features, training_data_labels)
    accuracy_mnist, confusion_mnist = random_forest.predict(
        mnist_test_data_features, mnist_test_data_labels)
    accuracy_usps, confusion_usps = random_forest.predict(
        usps_test_data_features, usps_test_data_labels)
    accuracy_combined, confusion_combined = random_forest.predict(
        combined_test_data_features, combined_test_data_labels)
    print_and_plot('Random Forest', accuracy_mnist, accuracy_usps,
                   accuracy_combined, confusion_mnist, confusion_usps,
                   confusion_combined)
Exemplo n.º 28
0
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 criterion = LabelSmoothCELoss(reduction='none')
 skf = KFold(n_splits=3, shuffle=True, random_state=47)
 total_val = 0
 for fold, (train_idx, val_idx) in enumerate(skf.split(train_df), 1):
     train_writer = SummaryWriter(
         log_dir=os.path.join('tbx_log', current_time, str(fold), 'train'))
     val_writer = SummaryWriter(
         log_dir=os.path.join('tbx_log', current_time, str(fold), 'val'))
     best_val = []
     print('=' * 20, 'Fold', fold, '=' * 20)
     model = EfficientNet.from_pretrained(args.model, num_classes=4)
     # model = pretrainedmodels.se_resnext50_32x4d(num_classes=1000, pretrained='imagenet')
     # model.last_linear = nn.Linear(2048,3)
     model = model.to(device)
     train_set = Data(train_df.iloc[train_idx].reset_index(drop=True),
                      train_transform)
     val_set = Data(train_df.iloc[val_idx].reset_index(drop=True),
                    test_transform)
     train_loader = DataLoader(dataset=train_set,
                               batch_size=args.bs,
                               shuffle=True)
     val_loader = DataLoader(dataset=val_set, batch_size=16, shuffle=False)
     optim = torch.optim.Adam(model.parameters(), lr=0.0005)
     # optim = SWA(base_optim, swa_start=770, swa_freq=77, swa_lr=0.0001)
     scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
         optim, T_max=args.epoch + 5, eta_min=5e-6)
     # scheduler_warmup = GradualWarmupScheduler(optim, multiplier=1, total_epoch=5, after_scheduler=scheduler)
     for epoch in range(1, args.epoch + 1):
         model.train()
         running_loss = 0.0
         for i, data in enumerate(train_loader, 0):
Exemplo n.º 29
0
import torch
import loadModel
from dataset import Data
import Criterion


classifier = loadModel.load()
# device = 'cpu'
device = 'cuda:0'

trainData = Data(test=False)
criterion = Criterion.Criterion()

batch_size = 32
epochs = 200
alpha = 1e-2
momentum = 0.9

for epoch in range(epochs):
    if epoch == 60:
        alpha = 5e-3
    elif epoch == 100:
        alpha = 1e-3
    correct = 0
    count = 0
    for i in range(0, trainData.m, batch_size):
        # print i
        X, y = trainData.sample(batch_size, i)
        classifier.clearGradParam()
        y_pred = classifier.forward(X)
        # print y_pred
Exemplo n.º 30
0
                        if question.answers[idx].label:
                            ap_list.append(right_count / (ranking_idx + 1))
                            right_count += 1

                assert right_count - 1 == len(
                    question.right_answers
                ), 'Leave out some right answers: %s - %s' % (
                    right_count - 1, len(question.right_answers))

                map_list.append(sum(ap_list) / len(ap_list))

                index += len(question.answers)

            assert len(map_list) == len(
                data.questions
            ), 'Length not equal: MRR List: %s, Question List: %s' % (
                len(map_list), len(data.questions))
            self.score = sum(map_list) / len(map_list)
            return self.score


if __name__ == '__main__':
    d = Data('./data/nlpcc-iccpol-2016.dbqa.testing-data',
             allow_duplicate=True)
    mrr = MRR()
    s = mrr.calculate(d, './result/res.txt')
    print('MRR: ', s)
    m = MAP()
    s = m.calculate(d, './result/res.txt')
    print('MAP: ', s)