Пример #1
0
def recombination():
    """
    Detect recombination in VGS data. FASTQ files containing the data are read from
    an input folder and processed to return the sequences before and after a seed sequence.
    The output are CSV files, one per FASTQ file and per seed sequence.'
    """
    setup_dirs(params)
    input_files = get_fastq_files(params)
    output_files = []
    for input_file in input_files:
        for seed_sequence_name, seed_sequence in params.seed_sequences.items():
            L.info('Processing file: {} with seed sequence: {}...'.format(
                input_file, seed_sequence_name))
            processor = Processor(input_file, seed_sequence_name,
                                  seed_sequence.upper())
            try:
                output_file = processor.process_and_write_to_file(
                    params.output_folder)
                if output_file:
                    output_files.append(output_file)
            except Exception as e:
                L.error('    >> Error: {}. Ignoring file...'.format(e))

    L.info('\nDone! Your output is in the following {} files:'.format(
        str(len(output_files))))
    for output_file in output_files:
        L.info(output_file)
Пример #2
0
def process_dir(root_dir, subdir, template):
    curr_dir = os.path.join(root_dir, subdir)

    # Look for template in current dir
    template_file = os.path.join(curr_dir, config.TEMPLATE_FILE)
    if os.path.exists(template_file):
        template = Template(template_file)

    # look for images in current dir to process
    paths = config.Paths(os.path.join(args['output_dir'], subdir))
    exts = ('*.png', '*.jpg')
    omr_files = sorted(
        [f for ext in exts for f in glob(os.path.join(curr_dir, ext))])

    # Exclude marker image if exists
    if (template and template.marker_path):
        omr_files = [f for f in omr_files if f != template.marker_path]
        print("\n\n\nMarker Found\n\n\n")

    subfolders = sorted([
        file for file in os.listdir(curr_dir)
        if os.path.isdir(os.path.join(curr_dir, file))
    ])
    if omr_files:
        args_local = args.copy()
        if ("OverrideFlags" in template.options):
            args_local.update(template.options["OverrideFlags"])
        print(
            '\n------------------------------------------------------------------'
        )
        print(f'Processing directory {curr_dir} with settings- ')
        print("\tTotal images       : %d" % (len(omr_files)))
        print("\tCropping Enabled   : " + str(not args_local["noCropping"]))
        print("\tAuto Alignment     : " + str(args_local["autoAlign"]))
        print("\tUsing Template     : " +
              str(template.path) if (template) else "N/A")
        print("\tUsing Marker       : " + str(template.marker_path) if (
            template.marker is not None) else "N/A")
        print('')

        if not template:
            print(f'Error: No template file when processing {curr_dir}.')
            print(
                f'  Place {config.TEMPLATE_FILE} in the directory or specify a template using -t.'
            )
            return

        utils.setup_dirs(paths)
        output_set = setup_output(paths, template)
        return process_files(omr_files, template, args_local, output_set)
    elif (len(subfolders) == 0):
        # the directory should have images or be non-leaf
        print(f'Note: No valid images or subfolders found in {curr_dir}')

    # recursively process subfolders
    results_lists = []
    for folder in subfolders:
        results_lists.append(
            process_dir(root_dir, os.path.join(subdir, folder), template))
    return results_lists
Пример #3
0
def process_dir(root_dir, subdir, template, kesme_islemi, onizleme):
    curr_dir = os.path.join(root_dir, subdir)
    args['noCropping'] = bool(kesme_islemi)
    # Look for template in current dir
    template_file = os.path.join(curr_dir, config.TEMPLATE_FILE)
    if os.path.exists(template_file):
        template = Template(template_file)

    # look for images in current dir to process
    paths = config.Paths(os.path.join(args['output_dir'], subdir))   
    exts = ('*.png', '*.jpg')
    omr_files = sorted(
        [f for ext in exts for f in glob(os.path.join(curr_dir, ext))])

    # Exclude marker image if exists
    if(template and template.marker_path):
        omr_files = [f for f in omr_files if f != template.marker_path]

    subfolders = sorted([file for file in os.listdir(
        curr_dir) if os.path.isdir(os.path.join(curr_dir, file))])
    if omr_files:
        args_local = args.copy()
        if("OverrideFlags" in template.options):
            args_local.update(template.options["OverrideFlags"])
        print('\n------------------------------------------------------------------')
        print(f'"{curr_dir}" dizini ayarları ile birlikte işleniyor- ')
        print("\tToplan resim        : %d" % (len(omr_files)))
        print("\tKırpma Aktif        : " + str(not args_local["noCropping"]))
        print("\tOtomatik Hizalama   : " + str(args_local["autoAlign"]))
        print("\tKullanılan Şablon   : " + str(template.path) if(template) else "N/A")
        print("\tKullanılan İşaretçi : " + str(template.marker_path)
              if(template.marker is not None) else "N/A")
        print('')

        if not template:
            print(f'Hata: işlenirken şablon bulunamadı {curr_dir}.')
            print(f'  Çalışma dizininde {config.TEMPLATE_FILE} dosyasını konumlandırın veya -t parametresi ile birlikte belirtin.')
            return

        utils.setup_dirs(paths)
        output_set = setup_output(paths, template)
        process_files(omr_files, template, args_local, output_set, onizleme)
    elif(len(subfolders) == 0):
        # the directory should have images or be non-leaf
        print(f'Bilgi: {curr_dir} klasöründe geçerli bir resim veya alt klasör bulunamadı.')

    # recursively process subfolders
    for folder in subfolders:
        process_dir(root_dir, os.path.join(subdir, folder), template, kesme_islemi, onizleme)
Пример #4
0
def serotypes():
    """
    Reporty specific sequences in VGS data. This command reads FASTQ files from the input folder
    specified in Serotype_Report_Parameters, extracts the reads containing the sequence and counts the occurrences.
    The Research Team is using this tool to detect specific sequences in the capsid genes,
    thereby differentiating between various serotypes.
    """
    start = datetime.now()
    setup_dirs(params)
    input_files = get_fastq_files(params)
    L.info('Found {} FASTQ files.'.format(len(input_files)))
    processor = Processor(input_files)
    processor.process()
    L.info('\nDone! the following two files were created:\n')
    L.info(processor.full_output_file)
    L.info(processor.summary_output_file)
    delta = datetime.now() - start
    L.info('\nCommand took {} seconds.'.format(delta.total_seconds()))
Пример #5
0
def main(config):
    setup_dirs(config)
    
    torch.manual_seed(config.random_seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(config.random_seed)
    
    data_loader = get_dataloader( 
        data_dir=config.data_dir, batch_size=config.batch_size,
        random_seed=config.random_seed, is_train=config.train,
        valid_size=config.valid_size, shuffle=config.shuffle,
        pin_memory=torch.cuda.is_available(), 
        cluttered_translated=config.cluttered_translated
    )
    
    trainer = Trainer(config, data_loader)
    
    if config.train:
        trainer.train()
    else:
        trainer.test()
    
    return
def train(wandb_track,
          experiment_name,
          epochs,
          task,
          gpu_num=0,
          pretrained='',
          margin=0.4,
          losstype='deepcca'):
    """Train joint embedding networks."""

    epochs = int(epochs)
    gpu_num = int(gpu_num)
    margin = float(margin)

    # Setup the results and device.
    results_dir = setup_dirs(experiment_name)
    if not os.path.exists(results_dir + 'train_results/'):
        os.makedirs(results_dir + 'train_results/')
    train_results_dir = results_dir + 'train_results/'
    device = setup_device(gpu_num)

    #### Hyperparameters #####
    #Initialize wandb
    if wandb_track == 1:
        import wandb
        wandb.init(project=experiment_name)
        config = wandb.config
        config.epochs = epochs

    with open(results_dir + 'hyperparams_train.txt', 'w') as f:
        f.write('Command used to run: python ')
        f.write(' '.join(sys.argv))
        f.write('\n')
        f.write('device in use: ' + str(device))
        f.write('\n')
        f.write('--experiment_name ' + str(experiment_name))
        f.write('\n')
        f.write('--epochs ' + str(epochs))
        f.write('\n')

    # Setup data loaders and models.
    if task == 'cifar10':
        train_loader, test_loader = cifar10_loaders()
        model_A = CIFAREmbeddingNet()
        model_B = CIFAREmbeddingNet()
    elif task == 'mnist':
        train_loader, test_loader = mnist_loaders()
        model_A = MNISTEmbeddingNet()
        model_B = MNISTEmbeddingNet()
    elif task == 'uw':
        uw_data = 'bert'
        train_loader, test_loader = uw_loaders(uw_data)
        if uw_data == 'bert':
            model_A = RowNet(3072, embed_dim=1024)  # Language.
            model_B = RowNet(4096, embed_dim=1024)  # Vision.

    # Finish model setup.
    if pretrained == 'pretrained':  # If we want to load pretrained models to continue training.
        print('Starting from pretrained networks.')
        model_A.load_state_dict(
            torch.load(train_results_dir + 'model_A_state.pt'))
        model_B.load_state_dict(
            torch.load(train_results_dir + 'model_B_state.pt'))

    print('Starting from scratch to train networks.')

    model_A.to(device)
    model_B.to(device)

    # Initialize the optimizers and loss function.
    optimizer_A = torch.optim.Adam(model_A.parameters(), lr=0.00001)
    optimizer_B = torch.optim.Adam(model_B.parameters(), lr=0.00001)

    # Add learning rate scheduling.
    def lr_lambda(e):
        if e < 50:
            return 0.001
        elif e < 100:
            return 0.0001
        else:
            return 0.00001

    scheduler_A = torch.optim.lr_scheduler.LambdaLR(optimizer_A, lr_lambda)
    scheduler_B = torch.optim.lr_scheduler.LambdaLR(optimizer_B, lr_lambda)

    # Track batch losses.
    loss_hist = []

    # Put models into training mode.
    model_A.train()
    model_B.train()

    # Train.
    # wandb
    if wandb_track == 1:
        wandb.watch(model_A, log="all")
        wandb.watch(model_B, log="all")
    epoch_list = []  # in order to save epoch in a pickle file
    loss_list = []  # in order to save loss in a pickle file
    for epoch in tqdm(range(epochs)):
        epoch_loss = 0.0
        counter = 0
        for data in train_loader:
            data_a = data[0].to(device)
            data_b = data[1].to(device)
            #label = data[2]

            # Zero the parameter gradients.
            optimizer_A.zero_grad()
            optimizer_B.zero_grad()

            # Forward.
            if losstype == 'deepcca':  # Based on Galen Andrew's Deep CCA
                # data_a is from domain A, and data_b is the paired data from domain B.
                embedding_a = model_A(data_a)
                embedding_b = model_B(data_b)
                loss = deepcca(embedding_a,
                               embedding_b,
                               device,
                               use_all_singular_values=True,
                               outdim_size=128)

            # Backward.
            loss.backward()

            # Update.
            optimizer_A.step()
            optimizer_B.step()

            # Save batch loss. Since we are minimizing -corr the loss is negative.
            loss_hist.append(-1 * loss.item())

            epoch_loss += embedding_a.shape[0] * loss.item()

            #reporting progress
            counter += 1
            if counter % 64 == 0:
                print('epoch:', epoch, 'loss:', loss.item())
                if wandb_track == 1:
                    wandb.log({"epoch": epoch, "loss": loss})

        # Save network state at each epoch.
        torch.save(model_A.state_dict(),
                   train_results_dir + 'model_A_state.pt')
        torch.save(model_B.state_dict(),
                   train_results_dir + 'model_B_state.pt')

        #since the batch size is 1 therefore: len(trainloader)==counter
        print('*********** epoch is finished ***********')
        epoch_loss = -1 * epoch_loss
        print('epoch: ', epoch, 'loss(correlation): ', (epoch_loss) / counter)
        epoch_list.append(epoch + 1)
        loss_list.append(epoch_loss / counter)
        pickle.dump(([epoch_list, loss_list]),
                    open(train_results_dir + 'epoch_loss.pkl', "wb"))
        Visualize(train_results_dir + 'epoch_loss.pkl', 'Correlation History',
                  True, 'epoch', 'Correlation (log scale)', None, 'log', None,
                  (14, 7), train_results_dir + 'Figures/')
        # Update learning rate schedulers.
        scheduler_A.step()
        scheduler_B.step()

    # Plot and save batch loss history.
    pickle.dump(([loss_hist[::10]]),
                open(train_results_dir + 'epoch_corr.pkl', "wb"))
    Visualize(train_results_dir + 'epoch_corr.pkl', 'Correlation Batch', False,
              'Batch', 'Correlation (log scale)', None, 'log', None, (14, 7),
              train_results_dir + 'Figures/')

    #### Learn the transformations for CCA ####
    if losstype == "CCA":
        a_base = []
        b_base = []
        no_model = True

        if no_model:  # without using model: using raw data without featurization
            for data in train_loader:
                x = data[0].to(device)
                y = data[1].to(device)
                if task == 'uw':
                    a_base.append(x)
                    b_base.append(y)
                else:
                    a_base.append(x.cpu().detach().numpy())
                    b_base.append(y.cpu().detach().numpy())
        else:
            import torchvision.models as models
            #Either use these models, or use trained models with triplet loss
            res18_model = models.resnet18(pretrained=True)
            #changing the first layer of ResNet to accept images with 1 channgel instead of 3.
            res18_model.conv1 = torch.nn.Conv2d(1,
                                                64,
                                                kernel_size=7,
                                                stride=2,
                                                padding=3,
                                                bias=False)
            # Select the desired layers
            model_A = torch.nn.Sequential(*list(res18_model.children())[:-2])
            model_B = torch.nn.Sequential(*list(res18_model.children())[:-2])
            model_A.eval()
            model_B.eval()
            for data in train_loader:
                x = data[0].to(device)  # Domain A
                y = data[1].to(device)  # Domain B
                a_base.append(model_A(x).cpu().detach().numpy())
                b_base.append(model_B(y).cpu().detach().numpy())

        # Concatenate predictions.
        a_base = np.concatenate(a_base, axis=0)
        b_base = np.concatenate(b_base, axis=0)
        a_base = np.squeeze(a_base)
        b_base = np.squeeze(b_base)

        if no_model:
            new_a_base = []
            new_b_base = []
            for i in range(len(a_base)):
                new_a_base.append(a_base[i, :, :].flatten())
                new_b_base.append(b_base[i, :, :].flatten())
            new_a_base = np.asarray(new_a_base)
            new_b_base = np.asarray(new_b_base)
            a_base = new_a_base
            b_base = new_b_base

            print('Finished reshaping data, the shape is:', new_a_base.shape)

        from sklearn.cross_decomposition import CCA
        from joblib import dump
        components = 128
        cca = CCA(n_components=components)
        cca.max_iter = 5000
        cca.fit(a_base, b_base)
        dump(cca, 'Learned_CCA.joblib')
    #### End of CCA fit to find the transformations ####

    print('Training Done!')
Пример #7
0
from args import init_parser, post_processing
import numpy as np
import torch
from train import train_policy
from evaluate import evaluate_policy
from utils import setup_dirs
import random
from envs import make_env

parser = argparse.ArgumentParser(description='SPC')
init_parser(parser)  # See `args.py` for default arguments
args = parser.parse_args()
args = post_processing(args)

if __name__ == '__main__':
    setup_dirs(args)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    random.seed(args.seed)

    if 'carla8' in args.env:
        # run spc on carla0.9 simulator, currently only 0.9.4 is supported
        from envs.CARLA.carla.client import make_carla_client
        from envs.CARLA.carla8 import CarlaEnv
        with make_carla_client('localhost', args.port, 10000) as client:
            env = CarlaEnv(client)
            if args.eval:
                evaluate_policy(args, env)
            else:
                train_policy(args, env, max_steps=args.max_steps)
    else:
def test(experiment_name,
         task,
         gpu_num=0,
         pretrained='',
         margin=0.4,
         losstype='deepcca'):
    cosined = False
    embed_dim = 1024
    gpu_num = int(gpu_num)
    margin = float(margin)

    # Setup the results and device.
    results_dir = setup_dirs(experiment_name)
    if not os.path.exists(results_dir + 'test_results/'):
        os.makedirs(results_dir + 'test_results/')
    test_results_dir = results_dir + 'test_results/'

    device = setup_device(gpu_num)

    #### Hyperparameters #####
    #Initialize wandb
    #import wandb
    #wandb.init(project=experiment_name)
    #config = wandb.config

    with open(results_dir + 'hyperparams_test.txt', 'w') as f:
        f.write('Command used to run: python ')
        f.write(' '.join(sys.argv))
        f.write('\n')
        f.write('device in use: ' + str(device))
        f.write('\n')
        f.write('--experiment_name ' + str(experiment_name))
        f.write('\n')

    # Setup data loaders and models based on task.
    if task == 'cifar10':
        train_loader, test_loader = cifar10_loaders()
        model_A = CIFAREmbeddingNet()
        model_B = CIFAREmbeddingNet()
    elif task == 'mnist':
        train_loader, test_loader = mnist_loaders()
        model_A = MNISTEmbeddingNet()
        model_B = MNISTEmbeddingNet()
    elif task == 'uw':
        uw_data = 'bert'
        train_loader, test_loader = uw_loaders(uw_data)
        if uw_data == 'bert':
            model_A = RowNet(3072, embed_dim=1024)  # Language.
            model_B = RowNet(4096, embed_dim=1024)  # Vision.

    # Finish model setup.
    model_A.load_state_dict(
        torch.load(results_dir + 'train_results/model_A_state.pt'))
    model_B.load_state_dict(
        torch.load(results_dir + 'train_results/model_B_state.pt'))
    model_A.to(device)
    model_B.to(device)
    # Put models into evaluation mode.
    model_A.eval()
    model_B.eval()
    """For UW data."""
    ## we use train data to calculate the threshhold for distance.
    a_train = []
    b_train = []
    # loading saved embeddings to be faster
    a_train = load_embeddings(test_results_dir + 'lang_embeds_train.npy')
    b_train = load_embeddings(test_results_dir + 'img_embeds_train.npy')

    # Iterate through the train data.
    if a_train is None or b_train is None:
        a_train = []
        b_train = []
        print(
            "Computing embeddings for train data to calculate threshhold for distance"
        )
        for data in train_loader:
            anchor_data = data[0].to(device)
            positive_data = data[1].to(device)
            label = data[2]
            a_train.append(
                model_A(anchor_data.to(device)).cpu().detach().numpy())
            b_train.append(
                model_B(positive_data.to(device)).cpu().detach().numpy())
        print("Finished Computing embeddings for train data")
    #saving embeddings if not already saved
    save_embeddings(test_results_dir + 'lang_embeds_train.npy', a_train)
    save_embeddings(test_results_dir + 'img_embeds_train.npy', b_train)

    a_train = np.concatenate(a_train, axis=0)
    b_train = np.concatenate(b_train, axis=0)

    # Test data
    # For accumulating predictions to check embedding visually using test set.
    # a is embeddings from domain A, b is embeddings from domain B, ys is their labels
    a = []
    b = []
    ys = []
    instance_data = []

    # loading saved embeddings to be faster
    a = load_embeddings(test_results_dir + 'lang_embeds.npy')
    b = load_embeddings(test_results_dir + 'img_embeds.npy')
    if a is None or b is None:
        compute_test_embeddings = True
        a = []
        b = []

    # Iterate through the test data.
    print("computing embeddings for test data")
    for data in test_loader:
        language_data, vision_data, object_name, instance_name = data
        language_data = language_data.to(device)
        vision_data = vision_data.to(device)
        instance_data.extend(instance_name)
        if compute_test_embeddings:
            a.append(
                model_A(language_data).cpu().detach().numpy())  # Language.
            b.append(model_B(vision_data).cpu().detach().numpy())  # Vision.
        ys.extend(object_name)
    print("finished computing embeddings for test data")
    # Convert string labels to ints.
    labelencoder = LabelEncoder()
    labelencoder.fit(ys)
    ys = labelencoder.transform(ys)

    #saving embeddings if not already saved
    save_embeddings(test_results_dir + 'lang_embeds.npy', a)
    save_embeddings(test_results_dir + 'img_embeds.npy', b)

    # Concatenate predictions.
    a = np.concatenate(a, axis=0)
    b = np.concatenate(b, axis=0)
    ab = np.concatenate((a, b), axis=0)

    ground_truth, predicted, distance = object_identification_task_classifier(
        a, b, ys, a_train, b_train, lamb_std=1, cosine=cosined)

    #### Retrieval task by giving an image and finding the closest word descriptions ####
    ground_truth_word, predicted_word, distance_word = object_identification_task_classifier(
        b, a, ys, b_train, a_train, lamb_std=1, cosine=cosined)
    with open('retrieval_non_pro.csv', mode='w') as retrieval_non_pro:
        csv_file_writer = csv.writer(retrieval_non_pro,
                                     delimiter=',',
                                     quotechar='"',
                                     quoting=csv.QUOTE_MINIMAL)
        csv_file_writer.writerow(
            ['image', 'language', 'predicted', 'ground truth'])
        for i in range(50):
            csv_file_writer.writerow([
                instance_data[0], instance_data[i], predicted_word[0][i],
                ground_truth_word[0][i]
            ])

    precisions = []
    recalls = []
    f1s = []
    precisions_pos = []
    recalls_pos = []
    f1s_pos = []
    #print(classification_report(oit_res[i], 1/np.arange(1,len(oit_res[i])+1) > 0.01))
    for i in range(len(ground_truth)):
        p, r, f, s = precision_recall_fscore_support(ground_truth[i],
                                                     predicted[i],
                                                     warn_for=(),
                                                     average='micro')
        precisions.append(p)
        recalls.append(r)
        f1s.append(f)
        p, r, f, s = precision_recall_fscore_support(ground_truth[i],
                                                     predicted[i],
                                                     warn_for=(),
                                                     average='binary')
        precisions_pos.append(p)
        recalls_pos.append(r)
        f1s_pos.append(f)

    print('\n ')
    print(experiment_name + '_' + str(embed_dim))
    print('MRR,    KNN,    Corr,   Mean F1,    Mean F1 (pos only)')
    print('%.3g & %.3g & %.3g & %.3g & %.3g' %
          (mean_reciprocal_rank(
              a, b, ys, cosine=cosined), knn(a, b, ys, k=5, cosine=cosined),
           corr_between(a, b, cosine=cosined), np.mean(f1s), np.mean(f1s_pos)))

    plt.figure(figsize=(14, 7))
    for i in range(len(ground_truth)):
        fpr, tpr, thres = roc_curve(ground_truth[i],
                                    [1 - e for e in distance[i]],
                                    drop_intermediate=True)
        plt.plot(fpr, tpr, alpha=0.08, color='r')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.savefig(test_results_dir + '_' + str(embed_dim) + '_ROC.svg')

    # Pick a pair, plot distance in A vs distance in B. Should be correlated.
    a_dists = []
    b_dists = []
    for _ in range(3000):
        i1 = random.randrange(len(a))
        i2 = random.randrange(len(a))
        a_dists.append(euclidean(a[i1], a[i2]))
        b_dists.append(euclidean(b[i1], b[i2]))
    #     a_dists.append(cosine(a[i1], a[i2]))
    #     b_dists.append(cosine(b[i1], b[i2]))

    # Plot.
    plt.figure(figsize=(14, 14))
    #plt.title('Check Distance Correlation Between Domains')
    plt.xlim([0, 3])
    plt.ylim([0, 3])
    # plt.xlim([0,max(a_dists)])
    # plt.ylim([0,max(b_dists)])
    # plt.xlabel('Distance in Domain A')
    # plt.ylabel('Distance in Domain B')
    plt.xlabel('Distance in Language Domain')
    plt.ylabel('Distance in Vision Domain')
    #plt.plot(a_dists_norm[0],b_dists_norm[0],'.')
    #plt.plot(np.arange(0,2)/20,np.arange(0,2)/20,'k-',lw=3)
    plt.plot(a_dists, b_dists, 'o', alpha=0.5)
    plt.plot(np.arange(0, 600), np.arange(0, 600), 'k--', lw=3, alpha=0.5)
    #plt.text(-0.001, -0.01, 'Corr: %.3f'%(pearsonr(a_dists,b_dists)[0]),  fontsize=20)
    plt.savefig(test_results_dir + '_' + str(embed_dim) + '_CORR.svg')

    # Inspect embedding distances.
    clas = 5  # Base class.
    i_clas = [i for i in range(len(ys)) if ys[i].item() == clas]
    i_clas_2 = np.random.choice(i_clas, len(i_clas), replace=False)

    clas_ref = 4  # Comparison class.
    i_clas_ref = [i for i in range(len(ys)) if ys[i].item() == clas_ref]

    ac = np.array([a[i] for i in i_clas])
    bc = np.array([b[i] for i in i_clas])

    ac2 = np.array([a[i] for i in i_clas_2])
    bc2 = np.array([b[i] for i in i_clas_2])

    ac_ref = np.array([a[i] for i in i_clas_ref])
    aa_diff_ref = norm(ac[:min(len(ac), len(ac_ref))] -
                       ac_ref[:min(len(ac), len(ac_ref))],
                       ord=2,
                       axis=1)

    ab_diff = norm(ac - bc2, ord=2, axis=1)
    aa_diff = norm(ac - ac2, ord=2, axis=1)
    bb_diff = norm(bc - bc2, ord=2, axis=1)

    # aa_diff_ref = [cosine(ac[:min(len(ac),len(ac_ref))][i],ac_ref[:min(len(ac),len(ac_ref))][i]) for i in range(len(ac[:min(len(ac),len(ac_ref))]))]

    # ab_diff = [cosine(ac[i],bc2[i]) for i in range(len(ac))]
    # aa_diff = [cosine(ac[i],ac2[i]) for i in range(len(ac))]
    # bb_diff = [cosine(bc[i],bc2[i]) for i in range(len(ac))]

    bins = np.linspace(0, 0.1, 100)

    plt.figure(figsize=(14, 7))
    plt.hist(ab_diff, bins, alpha=0.5, label='between embeddings')
    plt.hist(aa_diff, bins, alpha=0.5, label='within embedding A')
    plt.hist(bb_diff, bins, alpha=0.5, label='within embedding B')

    plt.hist(aa_diff_ref,
             bins,
             alpha=0.5,
             label='embedding A, from class ' + str(clas_ref))

    plt.title('Embedding Distances - Class: ' + str(clas))
    plt.xlabel('L2 Distance')
    plt.ylabel('Count')
    plt.legend()

    #labelencoder.classes_
    classes_to_keep = [36, 6, 9, 46, 15, 47, 50, 22, 26, 28]
    print(labelencoder.inverse_transform(classes_to_keep))

    ab_norm = [
        e for i, e in enumerate(ab) if ys[i % len(ys)] in classes_to_keep
    ]
    ys_norm = [e for e in ys if e in classes_to_keep]

    color_index = {list(set(ys_norm))[i]: i
                   for i in range(len(set(ys_norm)))}  #set(ys_norm)
    markers = ["o", "v", "^", "s", "*", "+", "x", "D", "h", "4"]
    marker_index = {
        list(set(ys_norm))[i]: markers[i]
        for i in range(len(set(ys_norm)))
    }

    embedding = umap.UMAP(n_components=2).fit_transform(
        ab_norm)  # metric='cosine'
    # Plot UMAP embedding of embeddings for all classes.
    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))

    mid = len(ys_norm)

    ax1.set_title('Language UMAP')
    for e in list(set(ys_norm)):
        x1 = [
            embedding[:mid, 0][i] for i in range(len(ys_norm))
            if ys_norm[i] == e
        ]
        x2 = [
            embedding[:mid, 1][i] for i in range(len(ys_norm))
            if ys_norm[i] == e
        ]
        ax1.scatter(
            x1,
            x2,
            marker=marker_index[int(e)],
            alpha=0.5,
            c=[sns.color_palette("colorblind", 10)[color_index[int(e)]]],
            label=labelencoder.inverse_transform([int(e)])[0])
    ax1.set_xlim([min(embedding[:, 0]) - 4, max(embedding[:, 0]) + 4])
    ax1.set_ylim([min(embedding[:, 1]) - 4, max(embedding[:, 1]) + 4])
    ax1.grid(True)
    ax1.legend(loc='upper center',
               bbox_to_anchor=(1.1, -0.08),
               fancybox=True,
               shadow=True,
               ncol=5)

    ax2.set_title('Vision UMAP')
    for e in list(set(ys_norm)):
        x1 = [
            embedding[mid::, 0][i] for i in range(len(ys_norm))
            if ys_norm[i] == e
        ]
        x2 = [
            embedding[mid::, 1][i] for i in range(len(ys_norm))
            if ys_norm[i] == e
        ]
        ax2.scatter(
            x1,
            x2,
            marker=marker_index[int(e)],
            alpha=0.5,
            c=[sns.color_palette("colorblind", 10)[color_index[int(e)]]])
    ax2.set_xlim([min(embedding[:, 0]) - 4, max(embedding[:, 0]) + 4])
    ax2.set_ylim([min(embedding[:, 1]) - 4, max(embedding[:, 1]) + 4])
    ax2.grid(True)

    plt.savefig(test_results_dir + '_' + str(embed_dim) + '_UMAP_wl.svg',
                bbox_inches='tight')
Пример #9
0
import argparse
import torch

from datasets import OmniglotDataset, MiniImageNet
from core import NShotTaskSampler, create_nshot_task_label, EvaluateFewShot
from models import VAE
from maml import meta_gradient_step
from train import fit
from callbacks import *
from utils import setup_dirs
from config import PATH

import torch.nn.functional as F
from torchsummary import summary

setup_dirs()
assert torch.cuda.is_available()
device = torch.device('cuda')
torch.backends.cudnn.benchmark = True

####################
#### Parameters ####
####################
parser = argparse.ArgumentParser()
parser.add_argument('--dataset', default='omniglot')
parser.add_argument('--n', default=1, type=int)
parser.add_argument('--k', default=5, type=int)
parser.add_argument('--q', default=1, type=int)
parser.add_argument('--inner-train-steps', default=1, type=int)
parser.add_argument('--inner-val-steps', default=3, type=int)
parser.add_argument('--inner-lr', default=0.4, type=float)
Пример #10
0
    parser.add_argument("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient")
    parser.add_argument("--n_cpu", type=int, default=8, help="number of cpu threads to use during batch generation")
    opt = parser.parse_args()

    if opt.dont_shuffle:
        opt.shuffle = False
    else:
        opt.shuffle = True

    model_name = "{0:%Y%m%d_%H%M%S}_WGAN_GP_{1}".format(datetime.datetime.now(), os.path.basename(opt.dset.rstrip('/')))
    opt.model_name = model_name

    print(opt)

    # Setup logging
    model_dir, fig_dir = utils.setup_dirs(opt)

    # Configure dataloader
    dataloader = utils.configure_dataloader(opt.dset, opt.batch_size, opt.img_size, opt.shuffle)

    # Find img_shape
    # img_shape = (opt.channels, opt.img_size, opt.img_size)
    a, _ = next(iter(dataloader))
    img_shape = (a.shape[1], a.shape[2], a.shape[3])

    cuda = True if torch.cuda.is_available() else False

    # Loss weight for gradient penalty
    lambda_gp = 10

    # Initialize generator and discriminator