示例#1
0
def read_dataset_and_make_features(path):
    try:
        df = pd.read_csv(path, sep=sep, parse_dates=[date_col],
                         usecols=[date_col, open_col, close_col])
    except ValueError:
        return None
    if df.shape[0] < 390:  # min ~13 months are needed.
        return None
    df.dropna(inplace=True)
    df.sort_values(by=date_col, inplace=True)
    df['DAY'] = df[date_col].dt.day
    df['MONTH'] = (df[date_col].dt.year - 1970) * 12 + df[date_col].dt.month
    df['DAYRETURN'] = df[close_col] / df[open_col]
    df['ISJAN'] = (df[date_col].dt.month == 1)
    df['ISJAN'] = df['ISJAN'].astype(float)
    df.drop(date_col, axis=1, inplace=True)
    months = df['MONTH'].drop_duplicates()
    # Make features.
    idx_to_drop = []
    # Drop months which don't have enough data.
    for m in months:
        mask = df['MONTH'] == m
        idx = df.loc[mask].index.tolist()
        if len(idx) < n_days_to_retain:
            idx_to_drop.extend(idx)
        else:
            # Retain last 18 days, drop others.
            idx_to_drop.extend(idx[:len(idx) - n_days_to_retain])
    df.drop(idx_to_drop, axis=0, inplace=True)
    df['DAYCUMRETURN'] = 0.0
    df['MONTHRETURN'] = 0.0
    # Calculating day cumulative returns and monthly returns.
    months = df['MONTH'].drop_duplicates()  # Maybe we dropped
    # some months at all.
    for m in months:
        mask = df['MONTH'] == m
        month_open = df.loc[mask, open_col].iloc[0]
        month_close = df.loc[mask, close_col].iloc[-1]
        month_return = month_close / month_open
        day_returns = df[mask]['DAYRETURN']
        day_cum_returns = np.zeros(day_returns.shape[0])
        day_cum_returns[0] = day_returns.iloc[0]
        for i in xrange(1, day_returns.shape[0]):
            day_cum_returns[i] = day_cum_returns[i - 1] * day_returns.iloc[i]
        df.loc[mask, ['DAYCUMRETURN']] = day_cum_returns
        df.loc[mask, ['MONTHRETURN']] = month_return
    df.drop(['DAYRETURN'], axis=1, inplace=True)
    df['STOCKNAME'] = utils.get_fname(path).split('.')[0]
    if df.isnull().sum().sum() > 0:
        return None
    return df
示例#2
0
def main():
    config = utils.parse_args()

    if config['cuda'] and torch.cuda.is_available():
        device = 'cuda:0'
    else:
        device = 'cpu'

    dataset_args = (config['task'], config['dataset'], config['dataset_path'],
                    'train', config['num_layers'], config['self_loop'],
                    config['normalize_adj'], config['transductive'])
    dataset = utils.get_dataset(dataset_args)
    loader = DataLoader(dataset=dataset,
                        batch_size=config['batch_size'],
                        shuffle=True,
                        collate_fn=dataset.collate_wrapper)
    input_dim, output_dim = dataset.get_dims()

    model = models.GAT(input_dim, config['hidden_dims'], output_dim,
                       config['num_heads'], config['dropout'], device)
    model.to(device)

    if not config['load']:
        criterion = utils.get_criterion(config['task'])
        optimizer = optim.Adam(model.parameters(),
                               lr=config['lr'],
                               weight_decay=config['weight_decay'])
        epochs = config['epochs']
        stats_per_batch = config['stats_per_batch']
        num_batches = int(ceil(len(dataset) / config['batch_size']))
        model.train()
        print('--------------------------------')
        print('Training.')
        for epoch in range(epochs):
            print('Epoch {} / {}'.format(epoch + 1, epochs))
            running_loss = 0.0
            num_correct, num_examples = 0, 0
            for (idx, batch) in enumerate(loader):
                features, node_layers, mappings, rows, labels = batch
                features, labels = features.to(device), labels.to(device)
                optimizer.zero_grad()
                out = model(features, node_layers, mappings, rows)
                loss = criterion(out, labels)
                loss.backward()
                optimizer.step()
                with torch.no_grad():
                    running_loss += loss.item()
                    predictions = torch.max(out, dim=1)[1]
                    num_correct += torch.sum(predictions == labels).item()
                    num_examples += len(labels)
                if (idx + 1) % stats_per_batch == 0:
                    running_loss /= stats_per_batch
                    accuracy = num_correct / num_examples
                    print('    Batch {} / {}: loss {}, accuracy {}'.format(
                        idx + 1, num_batches, running_loss, accuracy))
                    running_loss = 0.0
                    num_correct, num_examples = 0, 0
        print('Finished training.')
        print('--------------------------------')

        if config['save']:
            print('--------------------------------')
            directory = os.path.join(os.path.dirname(os.getcwd()),
                                     'trained_models')
            if not os.path.exists(directory):
                os.makedirs(directory)
            fname = utils.get_fname(config)
            path = os.path.join(directory, fname)
            print('Saving model at {}'.format(path))
            torch.save(model.state_dict(), path)
            print('Finished saving model.')
            print('--------------------------------')

    if config['load']:
        directory = os.path.join(os.path.dirname(os.getcwd()),
                                 'trained_models')
        fname = utils.get_fname(config)
        path = os.path.join(directory, fname)
        model.load_state_dict(torch.load(path))
    dataset_args = (config['task'], config['dataset'], config['dataset_path'],
                    'test', config['num_layers'], config['self_loop'],
                    config['normalize_adj'], config['transductive'])
    dataset = utils.get_dataset(dataset_args)
    loader = DataLoader(dataset=dataset,
                        batch_size=config['batch_size'],
                        shuffle=False,
                        collate_fn=dataset.collate_wrapper)
    criterion = utils.get_criterion(config['task'])
    stats_per_batch = config['stats_per_batch']
    num_batches = int(ceil(len(dataset) / config['batch_size']))
    model.eval()
    print('--------------------------------')
    print('Testing.')
    running_loss, total_loss = 0.0, 0.0
    num_correct, num_examples = 0, 0
    total_correct, total_examples = 0, 0
    for (idx, batch) in enumerate(loader):
        features, node_layers, mappings, rows, labels = batch
        features, labels = features.to(device), labels.to(device)
        out = model(features, node_layers, mappings, rows)
        loss = criterion(out, labels)
        running_loss += loss.item()
        total_loss += loss.item()
        predictions = torch.max(out, dim=1)[1]
        num_correct += torch.sum(predictions == labels).item()
        total_correct += torch.sum(predictions == labels).item()
        num_examples += len(labels)
        total_examples += len(labels)
        if (idx + 1) % stats_per_batch == 0:
            running_loss /= stats_per_batch
            accuracy = num_correct / num_examples
            print('    Batch {} / {}: loss {}, accuracy {}'.format(
                idx + 1, num_batches, running_loss, accuracy))
            running_loss = 0.0
            num_correct, num_examples = 0, 0
    total_loss /= num_batches
    total_accuracy = total_correct / total_examples
    print('Loss {}, accuracy {}'.format(total_loss, total_accuracy))
    print('Finished testing.')
    print('--------------------------------')
示例#3
0
    # Build all train dataset
    merged = utils.merge_dataframes(final_dfs)
    # Sort by time
    merged = utils.sort_by_month(merged, month_col='MONTH', drop=False)
    utils.drop_inf(merged)
    # Normalize dataset
    print "NaNs before normalizing dataset", merged.isnull().sum().sum()
    merged_scaled, scaler = utils.normalize_dataset(merged[day_month_cols])
    merged[day_month_cols] = merged_scaled
    print "NaNs after normalizing dataset", merged.isnull().sum().sum()

    if train_size == 1.0:
        train, test = merged, None
    else:
        bound = int(train_size * merged.shape[0])
        train, test = merged[:bound], merged[bound:]

    dirname = utils.get_dirname(namespace.output)
    out_fname = utils.get_fname(namespace.output).split('.')[0]
    train_fname = utils.get_path(out_fname + '_train.csv', dirname)
    test_fname = utils.get_path(out_fname + '_test.csv', dirname)
    scaler_fname = utils.get_path(out_fname + '_scaler', dirname)
    print "Writing train to %s" % train_fname
    train.to_csv(train_fname, index=False)
    if test is not None:
        print "Writing test to %s" % test_fname
        test.to_csv(test_fname, index=False)

    print "Dumping scaler to %s" % scaler_fname
    utils.dump_scaler(scaler, scaler_fname)
    print "Done."
示例#4
0
import utils
import matplotlib.pyplot as plt  # for plotting
from scipy.spatial import Delaunay
import numpy as np
import sys

exit_flag = '1'
while exit_flag == '1':
    print(
        "If you want to see a fish - enter f \nIf you want to see a bird - enter b"
    )
    flag = sys.stdin.readline().split()[0]
    print("Enter number of the image \nbirds: 1 - 51, fish: 1-56")
    num = sys.stdin.readline().split()[0]

    filename = utils.get_fname(flag, num)
    short_fname = filename.split('/')[-1]
    pts = utils.read_file(filename)

    tri = Delaunay(pts)
    triang_dist, mean_length, std_length = utils.get_tri_dist(tri, pts)
    tri_sel = utils.select_tri_short_edges(triang_dist, mean_length,
                                           std_length, 1.75)
    edges = utils.get_tri_edges(tri_sel)
    m, points_m = utils.get_outer_edges(edges, pts)
    length_edges = utils.get_edges_with_length(m, pts)

    p = 0
    for i in length_edges:
        p += i[1]
示例#5
0
def main(opt, weight_path: str):

    device = torch.device(opt.gpu_id)

    # model
    if opt.arch == 'resnet56':
        model = ResNet56()
    else:
        raise ValueError()
    try:
        model.load_state_dict(torch.load(weight_path, map_location='cpu'))
    except:
        model.load_state_dict(
            torch.load(weight_path, map_location='cpu')['model'])
    model.to(device)
    model.eval()

    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize(MEAN, STD)])

    accs = dict()
    with tqdm(total=len(opt.corruptions), ncols=80) as pbar:
        for ci, cname in enumerate(opt.corruptions):
            # load dataset
            if cname == 'natural':
                dataset = datasets.CIFAR10(
                    os.path.join(opt.data_root, 'cifar10'),
                    train=False,
                    transform=transform,
                    download=True,
                )
            else:
                dataset = CIFAR10C(os.path.join(opt.data_root, 'cifar10-c'),
                                   cname,
                                   transform=transform)
            loader = DataLoader(dataset,
                                batch_size=opt.batch_size,
                                shuffle=False,
                                num_workers=4)

            acc_meter = AverageMeter()
            with torch.no_grad():
                for itr, (x, y) in enumerate(loader):
                    x = x.to(device, non_blocking=True)
                    y = y.to(device, dtype=torch.int64, non_blocking=True)

                    z = model(x)
                    loss = F.cross_entropy(z, y)
                    acc, _ = accuracy(z, y, topk=(1, 5))
                    acc_meter.update(acc.item())

            accs[f'{cname}'] = acc_meter.avg

            pbar.set_postfix_str(f'{cname}: {acc_meter.avg:.2f}')
            pbar.update()

    avg = np.mean(list(accs.values()))
    accs['avg'] = avg

    pprint.pprint(accs)
    save_name = get_fname(weight_path)
    create_barplot(accs, save_name + f' / avg={avg:.2f}',
                   os.path.join(opt.fig_dir, save_name + '.png'))
示例#6
0
def main():

    # Set up arguments for datasets, models and training.
    config = utils.parse_args()
    config['num_layers'] = len(config['hidden_dims']) + 1

    if config['cuda'] and torch.cuda.is_available():
        device = 'cuda:0'
    else:
        device = 'cpu'
    config['device'] = device

    # Get the dataset, dataloader and model.
    dataset_args = (config['task'], config['dataset'], config['dataset_path'],
                    config['generate_neg_examples'], 'train',
                    config['duplicate_examples'], config['repeat_examples'],
                    config['num_layers'], config['self_loop'],
                    config['normalize_adj'])
    dataset = utils.get_dataset(dataset_args)

    loader = DataLoader(dataset=dataset,
                        batch_size=config['batch_size'],
                        shuffle=True,
                        collate_fn=dataset.collate_wrapper)
    input_dim, output_dim = dataset.get_dims()

    if config['model'] == 'GraphSAGE':
        agg_class = utils.get_agg_class(config['agg_class'])
        model = models.GraphSAGE(input_dim, config['hidden_dims'], output_dim,
                                 config['dropout'], agg_class,
                                 config['num_samples'], config['device'])
    else:
        model = models.GAT(input_dim, config['hidden_dims'], output_dim,
                           config['num_heads'], config['dropout'],
                           config['device'])
        model.apply(models.init_weights)
    model.to(config['device'])
    print(model)

    # Compute ROC-AUC score for the untrained model.
    if not config['load']:
        print('--------------------------------')
        print(
            'Computing ROC-AUC score for the training dataset before training.'
        )
        y_true, y_scores = [], []
        num_batches = int(ceil(len(dataset) / config['batch_size']))
        with torch.no_grad():
            for (idx, batch) in enumerate(loader):
                edges, features, node_layers, mappings, rows, labels = batch
                features, labels = features.to(device), labels.to(device)
                out = model(features, node_layers, mappings, rows)
                all_pairs = torch.mm(out, out.t())
                scores = all_pairs[edges.T]
                y_true.extend(labels.detach().cpu().numpy())
                y_scores.extend(scores.detach().cpu().numpy())
                print('    Batch {} / {}'.format(idx + 1, num_batches))
        y_true = np.array(y_true).flatten()
        y_scores = np.array(y_scores).flatten()
        area = roc_auc_score(y_true, y_scores)
        print('ROC-AUC score: {:.4f}'.format(area))
        print('--------------------------------')

    # Train.
    if not config['load']:
        use_visdom = config['visdom']
        if use_visdom:
            vis = visdom.Visdom()
            loss_window = None
        criterion = utils.get_criterion(config['task'])
        optimizer = optim.Adam(model.parameters(),
                               lr=config['lr'],
                               weight_decay=config['weight_decay'])
        epochs = config['epochs']
        stats_per_batch = config['stats_per_batch']
        num_batches = int(ceil(len(dataset) / config['batch_size']))
        # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=150, gamma=0.8)
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                   milestones=[300, 600],
                                                   gamma=0.5)
        model.train()
        print('--------------------------------')
        print('Training.')
        for epoch in range(epochs):
            print('Epoch {} / {}'.format(epoch + 1, epochs))
            running_loss = 0.0
            for (idx, batch) in enumerate(loader):
                edges, features, node_layers, mappings, rows, labels = batch
                features, labels = features.to(device), labels.to(device)
                optimizer.zero_grad()
                out = model(features, node_layers, mappings, rows)
                all_pairs = torch.mm(out, out.t())
                scores = all_pairs[edges.T]
                loss = criterion(scores, labels.float())
                loss.backward()
                optimizer.step()
                with torch.no_grad():
                    running_loss += loss.item()
                if (idx + 1) % stats_per_batch == 0:
                    running_loss /= stats_per_batch
                    print('    Batch {} / {}: loss {:.4f}'.format(
                        idx + 1, num_batches, running_loss))
                    if (torch.sum(labels.long() == 0).item() >
                            0) and (torch.sum(labels.long() == 1).item() > 0):
                        area = roc_auc_score(labels.detach().cpu().numpy(),
                                             scores.detach().cpu().numpy())
                        print('    ROC-AUC score: {:.4f}'.format(area))
                    running_loss = 0.0
                    num_correct, num_examples = 0, 0
                if use_visdom:
                    if loss_window is None:
                        loss_window = vis.line(Y=[loss.item()],
                                               X=[epoch * num_batches + idx],
                                               opts=dict(xlabel='batch',
                                                         ylabel='Loss',
                                                         title='Training Loss',
                                                         legend=['Loss']))
                    else:
                        vis.line([loss.item()], [epoch * num_batches + idx],
                                 win=loss_window,
                                 update='append')
                scheduler.step()
        if use_visdom:
            vis.close(win=loss_window)
        print('Finished training.')
        print('--------------------------------')

    if not config['load']:
        if config['save']:
            print('--------------------------------')
            directory = os.path.join(os.path.dirname(os.getcwd()),
                                     'trained_models')
            if not os.path.exists(directory):
                os.makedirs(directory)
            fname = utils.get_fname(config)
            path = os.path.join(directory, fname)
            print('Saving model at {}'.format(path))
            torch.save(model.state_dict(), path)
            print('Finished saving model.')
            print('--------------------------------')

        # Compute ROC-AUC score after training.
        if not config['load']:
            print('--------------------------------')
            print(
                'Computing ROC-AUC score for the training dataset after training.'
            )
            y_true, y_scores = [], []
            num_batches = int(ceil(len(dataset) / config['batch_size']))
            with torch.no_grad():
                for (idx, batch) in enumerate(loader):
                    edges, features, node_layers, mappings, rows, labels = batch
                    features, labels = features.to(device), labels.to(device)
                    out = model(features, node_layers, mappings, rows)
                    all_pairs = torch.mm(out, out.t())
                    scores = all_pairs[edges.T]
                    y_true.extend(labels.detach().cpu().numpy())
                    y_scores.extend(scores.detach().cpu().numpy())
                    print('    Batch {} / {}'.format(idx + 1, num_batches))
            y_true = np.array(y_true).flatten()
            y_scores = np.array(y_scores).flatten()
            area = roc_auc_score(y_true, y_scores)
            print('ROC-AUC score: {:.4f}'.format(area))
            print('--------------------------------')

        # Plot the true positive rate and true negative rate vs threshold.
        if not config['load']:
            tpr, fpr, thresholds = roc_curve(y_true, y_scores)
            tnr = 1 - fpr
            plt.plot(thresholds, tpr, label='tpr')
            plt.plot(thresholds, tnr, label='tnr')
            plt.xlabel('Threshold')
            plt.title('TPR / TNR vs Threshold')
            plt.legend()
            plt.show()

        # Choose an appropriate threshold and generate classification report on the train set.
        idx1 = np.where(tpr <= tnr)[0]
        idx2 = np.where(tpr >= tnr)[0]
        t = thresholds[idx1[-1]]
        total_correct, total_examples = 0, 0
        y_true, y_pred = [], []
        num_batches = int(ceil(len(dataset) / config['batch_size']))
        with torch.no_grad():
            for (idx, batch) in enumerate(loader):
                edges, features, node_layers, mappings, rows, labels = batch
                features, labels = features.to(device), labels.to(device)
                out = model(features, node_layers, mappings, rows)
                all_pairs = torch.mm(out, out.t())
                scores = all_pairs[edges.T]
                predictions = (scores >= t).long()
                y_true.extend(labels.detach().cpu().numpy())
                y_pred.extend(predictions.detach().cpu().numpy())
                total_correct += torch.sum(predictions == labels.long()).item()
                total_examples += len(labels)
                print('    Batch {} / {}'.format(idx + 1, num_batches))
        print('Threshold: {:.4f}, accuracy: {:.4f}'.format(
            t, total_correct / total_examples))
        y_true = np.array(y_true).flatten()
        y_pred = np.array(y_pred).flatten()
        report = classification_report(y_true, y_pred)
        print('Classification report\n', report)

    # Evaluate on the validation set.
    if config['load']:
        directory = os.path.join(os.path.dirname(os.getcwd()),
                                 'trained_models')
        fname = utils.get_fname(config)
        path = os.path.join(directory, fname)
        model.load_state_dict(torch.load(path))
        dataset_args = (config['task'], config['dataset'],
                        config['dataset_path'],
                        config['generate_neg_examples'], 'val',
                        config['duplicate_examples'],
                        config['repeat_examples'], config['num_layers'],
                        config['self_loop'], config['normalize_adj'])
        dataset = utils.get_dataset(dataset_args)
        loader = DataLoader(dataset=dataset,
                            batch_size=config['batch_size'],
                            shuffle=False,
                            collate_fn=dataset.collate_wrapper)
        criterion = utils.get_criterion(config['task'])
        stats_per_batch = config['stats_per_batch']
        num_batches = int(ceil(len(dataset) / config['batch_size']))
        model.eval()
        print('--------------------------------')
        print(
            'Computing ROC-AUC score for the validation dataset after training.'
        )
        running_loss, total_loss = 0.0, 0.0
        num_correct, num_examples = 0, 0
        total_correct, total_examples = 0, 0
        y_true, y_scores, y_pred = [], [], []
        for (idx, batch) in enumerate(loader):
            edges, features, node_layers, mappings, rows, labels = batch
            features, labels = features.to(device), labels.to(device)
            out = model(features, node_layers, mappings, rows)
            all_pairs = torch.mm(out, out.t())
            scores = all_pairs[edges.T]
            loss = criterion(scores, labels.float())
            running_loss += loss.item()
            total_loss += loss.item()
            predictions = (scores >= t).long()
            num_correct += torch.sum(predictions == labels.long()).item()
            total_correct += torch.sum(predictions == labels.long()).item()
            num_examples += len(labels)
            total_examples += len(labels)
            y_true.extend(labels.detach().cpu().numpy())
            y_scores.extend(scores.detach().cpu().numpy())
            y_pred.extend(predictions.detach().cpu().numpy())
            if (idx + 1) % stats_per_batch == 0:
                running_loss /= stats_per_batch
                accuracy = num_correct / num_examples
                print('    Batch {} / {}: loss {:.4f}, accuracy {:.4f}'.format(
                    idx + 1, num_batches, running_loss, accuracy))
                if (torch.sum(labels.long() == 0).item() >
                        0) and (torch.sum(labels.long() == 1).item() > 0):
                    area = roc_auc_score(labels.detach().cpu().numpy(),
                                         scores.detach().cpu().numpy())
                    print('    ROC-AUC score: {:.4f}'.format(area))
                running_loss = 0.0
                num_correct, num_examples = 0, 0
        total_loss /= num_batches
        total_accuracy = total_correct / total_examples
        print('Loss {:.4f}, accuracy {:.4f}'.format(total_loss,
                                                    total_accuracy))
        y_true = np.array(y_true).flatten()
        y_scores = np.array(y_scores).flatten()
        y_pred = np.array(y_pred).flatten()
        report = classification_report(y_true, y_pred)
        area = roc_auc_score(y_true, y_scores)
        print('ROC-AUC score: {:.4f}'.format(area))
        print('Classification report\n', report)
        print('Finished validating.')
        print('--------------------------------')

        # Evaluate on test set.
    if config['load']:
        directory = os.path.join(os.path.dirname(os.getcwd()),
                                 'trained_models')
        fname = utils.get_fname(config)
        path = os.path.join(directory, fname)
        model.load_state_dict(torch.load(path))
        dataset_args = (config['task'], config['dataset'],
                        config['dataset_path'],
                        config['generate_neg_examples'], 'test',
                        config['duplicate_examples'],
                        config['repeat_examples'], config['num_layers'],
                        config['self_loop'], config['normalize_adj'])
        dataset = utils.get_dataset(dataset_args)
        loader = DataLoader(dataset=dataset,
                            batch_size=config['batch_size'],
                            shuffle=False,
                            collate_fn=dataset.collate_wrapper)
        criterion = utils.get_criterion(config['task'])
        stats_per_batch = config['stats_per_batch']
        num_batches = int(ceil(len(dataset) / config['batch_size']))
        model.eval()
        print('--------------------------------')
        print('Computing ROC-AUC score for the test dataset after training.')
        running_loss, total_loss = 0.0, 0.0
        num_correct, num_examples = 0, 0
        total_correct, total_examples = 0, 0
        y_true, y_scores, y_pred = [], [], []
        for (idx, batch) in enumerate(loader):
            edges, features, node_layers, mappings, rows, labels = batch
            features, labels = features.to(device), labels.to(device)
            out = model(features, node_layers, mappings, rows)
            all_pairs = torch.mm(out, out.t())
            scores = all_pairs[edges.T]
            loss = criterion(scores, labels.float())
            running_loss += loss.item()
            total_loss += loss.item()
            predictions = (scores >= t).long()
            num_correct += torch.sum(predictions == labels.long()).item()
            total_correct += torch.sum(predictions == labels.long()).item()
            num_examples += len(labels)
            total_examples += len(labels)
            y_true.extend(labels.detach().cpu().numpy())
            y_scores.extend(scores.detach().cpu().numpy())
            y_pred.extend(predictions.detach().cpu().numpy())
            if (idx + 1) % stats_per_batch == 0:
                running_loss /= stats_per_batch
                accuracy = num_correct / num_examples
                print('    Batch {} / {}: loss {:.4f}, accuracy {:.4f}'.format(
                    idx + 1, num_batches, running_loss, accuracy))
                if (torch.sum(labels.long() == 0).item() >
                        0) and (torch.sum(labels.long() == 1).item() > 0):
                    area = roc_auc_score(labels.detach().cpu().numpy(),
                                         scores.detach().cpu().numpy())
                    print('    ROC-AUC score: {:.4f}'.format(area))
                running_loss = 0.0
                num_correct, num_examples = 0, 0
        total_loss /= num_batches
        total_accuracy = total_correct / total_examples
        print('Loss {:.4f}, accuracy {:.4f}'.format(total_loss,
                                                    total_accuracy))
        y_true = np.array(y_true).flatten()
        y_scores = np.array(y_scores).flatten()
        y_pred = np.array(y_pred).flatten()
        report = classification_report(y_true, y_pred)
        area = roc_auc_score(y_true, y_scores)
        print('ROC-AUC score: {:.4f}'.format(area))
        print('Classification report\n', report)
        print('Finished testing.')
        print('--------------------------------')
示例#7
0
def main():
    config = utils.parse_args()

    if config['cuda'] and torch.cuda.is_available():
        device = 'cuda:0'
    else:
        device = 'cpu'

    dataset_args = (config['task'], config['dataset'], config['dataset_path'],
                    config['num_layers'], config['self_loop'],
                    config['normalize_adj'])
    dataset = utils.get_dataset(dataset_args)

    input_dim, output_dim = dataset.get_dims()
    adj, features, labels, idx_train, idx_val, idx_test = dataset.get_data()
    x = features
    y_train = labels[idx_train]
    y_val = labels[idx_val]
    y_test = labels[idx_test]

    model = models.GCN(input_dim, config['hidden_dims'], output_dim,
                       config['dropout'])
    model.to(device)

    if not config['load']:
        criterion = utils.get_criterion(config['task'])
        optimizer = optim.Adam(model.parameters(),
                               lr=config['lr'],
                               weight_decay=config['weight_decay'])
        epochs = config['epochs']
        model.train()
        print('--------------------------------')
        print('Training.')
        for epoch in range(epochs):
            optimizer.zero_grad()
            scores = model(x, adj)[idx_train]
            loss = criterion(scores, y_train)
            loss.backward()
            optimizer.step()
            predictions = torch.max(scores, dim=1)[1]
            num_correct = torch.sum(predictions == y_train).item()
            accuracy = num_correct / len(y_train)
            print('    Training epoch: {}, loss: {:.3f}, accuracy: {:.2f}'.
                  format(epoch + 1, loss.item(), accuracy))
        print('Finished training.')
        print('--------------------------------')

        if config['save']:
            print('--------------------------------')
            directory = os.path.join(os.path.dirname(os.getcwd()),
                                     'trained_models')
            if not os.path.exists(directory):
                os.makedirs(directory)
            fname = utils.get_fname(config)
            path = os.path.join(directory, fname)
            print('Saving model at {}'.format(path))
            torch.save(model.state_dict(), path)
            print('Finished saving model.')
            print('--------------------------------')

    if config['load']:
        directory = os.path.join(os.path.dirname(os.getcwd()),
                                 'trained_models')
        fname = utils.get_fname(config)
        path = os.path.join(directory, fname)
        model.load_state_dict(torch.load(path))
    model.eval()
    print('--------------------------------')
    print('Testing.')
    scores = model(x, adj)[idx_test]
    predictions = torch.max(scores, dim=1)[1]
    num_correct = torch.sum(predictions == y_test).item()
    accuracy = num_correct / len(y_test)
    print('    Test accuracy: {}'.format(accuracy))
    print('Finished testing.')
    print('--------------------------------')