Exemplo n.º 1
0
def pooled_processing_fastq(fastqfile_options):
    # Cannot send logger object to functions run in a multiprocessing Pool.
    logger = logging.getLogger(__name__ + '.pooled_processing_fastq')
    print(logger.handlers)
    try:
        fastqfile, options = fastqfile_options[0], fastqfile_options[1]
        modelName = path.splitext(path.basename(options.hmm_model))[0]
        fastqBaseName = path.splitext(path.basename(fastqfile))[0]
        fastqFilesBaseName = path.basename(fastqfile)
        fastafile = '%s/%s.fasta' % (path.abspath(
            options.tmp_dir), fastqBaseName)
        hmmOut = '%s/%s-%s-hmmsearched.out' % (path.abspath(
            options.hmm_out_dir), fastqBaseName, modelName)
        hitFile = '%s/%s-positives.out' % (path.abspath(
            options.tmp_dir), fastqBaseName)
        logger.info('Converting fastq to fasta')
        if options.rerun:
            peptideFile = '%s/%s-amino.fasta' % (options.amino_dir,
                                                 fastqBaseName)
            if path.isfile(peptideFile):
                logger.info('Performing hmmsearch')
                utils.perform_hmmsearch(peptideFile, options.hmm_model, hmmOut,
                                        options)
            else:
                fastafile = '%s/%s.fasta' % (options.fasta_dir, fastqBaseName)
                logger.info('Translating and searching')
                utils.translate_and_search(fastafile, options.hmm_model,
                                           hmmOut, options)

        elif options.store_peptides:
            logger.info('Translating')
            peptideFile = '%s/%s-amino.fasta' % (path.abspath(
                options.tmp_dir), fastqBaseName)
            frame = '6'
            if not options.rerun:
                utils.translate_sequence(fastafile, peptideFile, options,
                                         frame)
            logger.info('Performing hmmsearch')
            utils.perform_hmmsearch(peptideFile, options.hmm_model, hmmOut,
                                    options)
        else:
            logger.info('Translating and searching')
            utils.translate_and_search(fastafile, options.hmm_model, hmmOut,
                                       options)

        logger.info('Start to classify')
        utils.classifier(hmmOut, hitFile, options)
        logger.info('Translating, searching, and classification done')

        fastqPath = path.dirname(
            path.abspath(fastqfile)
        )  # Assuming the path is the same to every input fastqfile
        return fastqFilesBaseName, hitFile
    except KeyboardInterrupt:
        raise KeyboardInterruptError()
Exemplo n.º 2
0
                                                     num_workers=1)

            # test for accuracy
            true = 0
            true_adv = 0
            total = len(dataset)
            for image, label in dataloader:
                image = image.cuda()
                label = label.cuda()

                output, adv_out = add_adv(classifier,
                                          image,
                                          label,
                                          'fgsm',
                                          default=True)
                output_class = classifier(output)
                adv_output_class = classifier(adv_out)
                def_out, _, _, _ = model(adv_out)
                cleaned_class = classifier(def_out)

                true_class = torch.argmax(output_class, 1)
                adv_class = torch.argmax(adv_output_class, 1)
                adv_clean_class = torch.argmax(cleaned_class, 1)

                print(f'attack method fgsm')
                print(f'actual class {true_class}')
                print(f'actual advclass {adv_class}')
                print(f'adversarial class {adv_clean_class}')

                true += torch.sum(torch.eq(true_class, adv_clean_class))
                true_adv += torch.sum(torch.eq(true_class, adv_class))
Exemplo n.º 3
0
import pandas as pd
from utils.scraper import *
from utils.classifier import *
from utils.db_controller import *
from utils.json_reader import *

print('Process started... "Scraping profiles"')
scraper = Scraper()
print('Process ended... "Scraping profiles"')

print('Process started... "Reading JSON files"')
reader = json_reader()
posts, comments = reader.get_df()
print('Process ended... "Reading JSON FILES"')

print('Process started... "Classifying comments"')
classif = classifier(comments.copy())
comments = classif.get_df()
print('Process ended... "Classifying comments"')

print('Process started... "Inserting into DB"')
db = db_controller(posts, comments)
db.insert_into()
print('Process ended... "Inserting into DB"')

print('Cleaning the JSON files.')
reader.clean_files()
Exemplo n.º 4
0
test_data = datasets.ImageFolder(test_dir, transform=test_transforms)

train_loader = torch.utils.data.DataLoader(train_data,
                                           batch_size=64,
                                           shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=32)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32)

image_datasets = {
    'train_data': train_data,
    'test_data': test_data,
    'valid_data': valid_data
}

# Creating classifier with function
model = u.classifier(arch, hidden_units, dropout, categories)

# Using NLLLoss with Softmax
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.0008)

# Using function to training and validation
u.train(model, epochs, train_loader, valid_loader, optimizer, criterion,
        device)

model.class_to_idx = image_datasets['train_data'].class_to_idx

# Saving Checkpoint
u.save_cp(arch, model, hidden_units, dropout, epochs, optimizer, categories,
          save_dir)
Exemplo n.º 5
0
def parse_fasta_input(options, Results, logger):
    modelName = path.splitext(path.basename(options.hmm_model))[0]
    logger.info('Parsing FASTA files')
    frame = '6'
    for fastafile in options.infiles:
        fastaBaseName = path.splitext(path.basename(fastafile))[0]
        hmmOut = '%s/%s-%s-hmmsearched.out' % (path.abspath(
            options.hmm_out_dir), fastaBaseName, modelName)
        fastaOut = '%s/%s-%s-filtered.fasta' % (path.abspath(
            options.final_gene_dir), fastaBaseName, modelName)
        aminoOut = '%s/%s-%s-filtered-peptides.fasta' % (path.abspath(
            options.final_gene_dir), fastaBaseName, modelName)
        orfFile = '%s/%s-%s-predicted-orfs.fasta' % (path.abspath(
            options.final_gene_dir), fastaBaseName, modelName)
        orfAminoFile = '%s/%s-%s-predicted-orfs-amino.fasta' % (path.abspath(
            options.final_gene_dir), fastaBaseName, modelName)
        hitFile = '%s/%s-positives.out' % (path.abspath(
            options.tmp_dir), fastaBaseName)
        elongated_fasta = '%s/%s-gene-elongated.fasta' % (path.abspath(
            options.tmp_dir), fastaBaseName)
        if options.protein:
            utils.perform_hmmsearch(fastafile, options.hmm_model, hmmOut,
                                    options)
            utils.classifier(hmmOut, hitFile, options)
            hitDict = utils.create_dictionary(hitFile, options)
            utils.retrieve_fasta(hitDict, fastafile, fastaOut, options)
        else:
            if options.store_peptides:
                peptideFile = '%s/%s-amino.fasta' % (path.abspath(
                    options.tmp_dir), fastaBaseName)
                utils.translate_sequence(fastafile, peptideFile, options,
                                         frame)
                logger.info('Performing hmmsearch')
                utils.perform_hmmsearch(peptideFile, options.hmm_model, hmmOut,
                                        options)
            else:
                utils.translate_and_search(fastafile, options.hmm_model,
                                           hmmOut, options)
            utils.classifier(hmmOut, hitFile, options)
            hitDict = utils.create_dictionary(hitFile, options)
            utils.retrieve_fasta(hitDict, fastafile, fastaOut, options)
            if not path.isfile(fastaOut):
                logger.critical('Could not find file %s', fastaOut)


#                exit()
            else:
                utils.retrieve_surroundings(hitDict, fastafile,
                                            elongated_fasta)
                if path.isfile(elongated_fasta):
                    if not options.orf_finder:
                        tmpORFfile = '%s/%s-long-orfs.fasta' % (
                            options.tmp_dir, fastaBaseName)
                        predict_orfs_prodigal(elongated_fasta, options.tmp_dir,
                                              tmpORFfile,
                                              options.min_orf_length)
                        orfFile = utils.retrieve_predicted_orfs(
                            options, tmpORFfile)
                    else:
                        tmpORFfile = '%s/%s-long-orfs.fasta' % (
                            options.tmp_dir, fastaBaseName)
                        predict_orfs_orfFinder(elongated_fasta,
                                               options.tmp_dir, tmpORFfile,
                                               options.min_orf_length)
                        orfFile = utils.retrieve_predicted_orfs(
                            options, tmpORFfile)
                if options.store_peptides:
                    options.retrieve_whole = False
                    utils.retrieve_peptides(hitDict, peptideFile, aminoOut,
                                            options)
                else:
                    tmpFastaOut = utils.make_fasta_unique(fastaOut, options)
                    utils.retrieve_predicted_genes_as_amino(options,
                                                            tmpFastaOut,
                                                            aminoOut,
                                                            frame='6')
        Results.count_hits(hitFile)
    if path.isfile(orfFile):
        if not options.orf_finder:
            Results.count_orfs_genomes(orfFile)
        else:
            Results.predictedOrfs = Results.count_contigs(orfFile)

    return orfFile
#加载数据集
trainData = h5_dataset(train_list)
valData = h5_dataset(val_list)
trainDataLoader = DataLoader(trainData,
                             batch_size=bs,
                             shuffle=True,
                             num_workers=4)
valDataLoader = DataLoader(valData,
                           batch_size=bs,
                           shuffle=False,
                           num_workers=4)

#构建网络
feature_dim = trainData.feature.size(1)
net = classifier(feature_dim, numclasses)
optimizer = optim.Adam(net.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()
if use_gpu:
    net.cuda()
    criterion = criterion.cuda()

if __name__ == '__main__':
    #训练测试
    for e in range(epoch):
        net.train()
        train_loss = 0
        train_correct = 0
        for i, (batchData, batchLabel) in enumerate(trainDataLoader):
            if use_gpu:
                batchData, batchLabel = batchData.cuda(), batchLabel.cuda()
Exemplo n.º 7
0
        dataloader = torch.utils.data.DataLoader(dataset, batch_size=128, shuffle=True, num_workers=1)

        # adversarial methods
        adv_accuracy = {'fgsm': 0, 'r-fgsm': 0, 'cw': 0, 'mi-fgsm': 0, 'pgd': 0, 'single': 0}

        # test for accuracy
        for adv in adv_accuracy:
            true = 0
            total = len(dataset)
            for image, label in dataloader:
                image = image.cuda()
                label = label.cuda()

                # get model output
                output, adv_out = add_adv(classifier, image, label, adv, default=True)
                output_class = classifier(output)
                def_out, _, _, _ = model(adv_out)
                adv_out_class = classifier(def_out)

                # get model predicted class
                true_class = torch.argmax(output_class, 1)
                adversarial_class = torch.argmax(adv_out_class, 1)

                print(f'attack method {adv}')
                print(f'actual class {true_class}')
                print(f'adversarial class {adversarial_class}')

                # calculate number of correct classification
                true += torch.sum(torch.eq(true_class, adversarial_class))

                print(int(true) / total)
Exemplo n.º 8
0
                                             num_workers=1)
    # adversarial methods
    adv_list = ['fgsm', 'r-fgsm', 'cw']
    # test for accuracy
    xs = list()
    ys = list()
    advs = list()
    for image, label in dataloader:
        image = image.cuda()
        label = label.cuda()
        batch += 1
        print(batch)
        for i in range(3):
            for adv in adv_list:
                output, adv_out = add_adv(classifier, image, label, adv, i)
                output = classifier(output)
                adv_class = classifier(adv_out)
                print('attack method {}'.format(adv))
                print('actual class ', torch.argmax(output, 1))
                print('adversarial class ', torch.argmax(adv_class, 1))
                print('====================================')
                xs.append(image.cpu().detach().numpy())
                ys.append(label.cpu().detach().numpy())
                advs.append(adv_out.cpu().detach().numpy())

    adv_x = np.concatenate(advs, axis=0)
    xt = np.concatenate(xs, axis=0)
    yt = np.concatenate(ys, axis=0)

    np.save('../data/' + 'advs_mnist.npy', adv_x)
    np.save('../data/' + 'xs_mnist.npy', xt)