示例#1
0
def run_main():
    args = arguments.get_arguments()
    pp.pprint(args._to_dicts())

    p, conn = None, None
    env = None

    random.seed(args.seed)
    torch.manual_seed(args.seed)

    choice, input_args = get_choice(args.input)

    if choice == 'remote':
        print("connecting to: " + input_args.host)
        p, conn = connection.connect('ws://' + input_args.host)
    else:
        config, dataset = load_dataset(args)
        env = initialise(config, dataset, args)

    try:
        run_trainer(args, conn, env=env)
    except (KeyboardInterrupt, SystemExit):
        p.terminate()
    except Exception:
        traceback.print_exc()
        p.terminate()
示例#2
0
def main():

    args = get_arguments()
    print(args)
    np.random.seed(args.seed)

    dataset, train_img_feature, train_data = get_train_data(args)
    dataset, test_img_feature, test_data, val_answers = get_data_test(args)

    train_X = [train_data[u'question'], train_img_feature]
    train_Y = np_utils.to_categorical(train_data[u'answers'], args.nb_classes)

    test_X = [test_data[u'question'], test_img_feature]
    test_Y = np_utils.to_categorical(val_answers, args.nb_classes)

    model_name = importlib.import_module("models." + args.model)
    model = model_name.model(args)
    model.compile(loss='categorical_crossentropy',
                  optimizer=args.optimizer,
                  metrics=['accuracy'])
    model.summary()  # prints model layers with weights

    history = model.fit(train_X,
                        train_Y,
                        batch_size=args.batch_size,
                        nb_epoch=args.nb_epoch,
                        validation_data=(test_X, test_Y))

    return history.history
示例#3
0
def test_setup_network():
    # NOTE: Removing any sys.argv to get default arguments
    sys.argv = [""]
    args = get_arguments()
    device = "cuda" if torch.cuda.is_available() else "cpu"
    segmenter, training_loss, validation_loss = setup_network(args, device)
    assert isinstance(segmenter, torch.nn.Module)
    assert isinstance(training_loss, torch.nn.CrossEntropyLoss)
    assert isinstance(validation_loss, dt.engine.MeanIoU)
def test_setup_data_loaders(mocker):
    # NOTE: Removing any sys.argv to get default arguments
    sys.argv = [""]
    args = get_arguments()
    mocker.patch.object(train, "get_datasets", side_effect=get_fake_datasets)
    train_loaders, val_loader = train.setup_data_loaders(args)
    assert len(train_loaders) == args.num_stages
    for train_loader in train_loaders:
        assert isinstance(train_loader, torch.utils.data.DataLoader)
    assert isinstance(val_loader, torch.utils.data.DataLoader)
示例#5
0
def code_init():
    args = get_arguments()
    pathdir_init(args)
    if args.mode == 'train' and args.submode == 'mutual_kd':
        pass
    else:
        print("Using random seed:{}".format(args.seeds))
        seed_torch(seed=args.seeds)
    if args.submode == 'mutual':
        args.loop = args.mutual_model_num
    sys.stdout = Logger(osp.join(args.save_dir, 'Log.txt'))
    printargs(args)
    return args
示例#6
0
def main():
    args = get_arguments()
    logger = logging.getLogger(__name__)
    torch.backends.cudnn.deterministic = True
    dt.misc.set_seed(args.random_seed)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    # Network
    segmenter, training_loss, validation_loss = setup_network(args,
                                                              device=device)
    # Checkpoint
    saver, epoch_start = setup_checkpoint_and_maybe_restore(args,
                                                            model=segmenter)
    # Data
    train_loaders, val_loader = setup_data_loaders(args)
    # Optimisers
    optimisers, schedulers = setup_optimisers_and_schedulers(args,
                                                             model=segmenter)

    total_epoch = epoch_start
    for stage, num_epochs in enumerate(args.epochs_per_stage):
        if stage > 0:
            epoch_start = 0
        for epoch in range(epoch_start, num_epochs):
            logger.info(f"Training: stage {stage} epoch {epoch}")
            dt.engine.train(
                model=segmenter,
                opts=optimisers,
                crits=training_loss,
                dataloader=train_loaders[stage],
                freeze_bn=args.freeze_bn[stage],
            )
            total_epoch += 1
            for scheduler in schedulers:
                scheduler.step(total_epoch)
            if (epoch + 1) % args.val_every[stage] == 0:
                logger.info(f"Validation: stage {stage} epoch {epoch}")
                vals = dt.engine.validate(
                    model=segmenter,
                    metrics=validation_loss,
                    dataloader=val_loader,
                )
                saver.maybe_save(
                    new_val=vals,
                    dict_to_save={
                        "state_dict": segmenter.state_dict(),
                        "epoch": total_epoch,
                    },
                )
示例#7
0
def main():

    # Read config file for date
    current_date = get_current_date()

    # Get command line arguments
    args = get_arguments()

    # Check date handling commands and execute the corresponding routine
    if args.advance_date is not None:
        current_date = print(advance_date(args.advance_date))
        return current_date
    if args.set_date is not None:
        current_date = print(set_date(args.set_date))
        return current_date

    # Check commands and execute the corresponding routine
    if args.CLI_command.lower() == 'buy':
        print(
            buy(args.product_name, args.buy_date, args.price,
                args.expiration_date))
    elif args.CLI_command.lower() == 'sell':
        print(sell(args.product_name, args.sell_date, args.price))
    elif args.CLI_command.lower() == 'report':

        # Convert yesterday, now, today or date to date
        report_date = None
        if args.yesterday is not None:
            report_date = (datetime.strptime(current_date, '%Y-%m-%d') -
                           timedelta(days=1)).strftime('%Y-%m-%d')
        if args.now is not None:
            report_date = current_date
        if args.today is not None:
            report_date = current_date
        if args.date is not None:
            report_date = args.date
        if report_date is not None:
            print(
                report.show_report(args.report_name, report_date,
                                   args.export_csv, args.show_graph))
        else:
            print(f"ERROR: missing <date>")

    # Unknown command
    else:
        print(
            f"ERROR: unknown command '{args.CLI_command}' <buy, sell, report>")
    return
示例#8
0
文件: tagfind.py 项目: fruser/tagfind
def main():
    output, repo, domain = get_arguments()
    archive = 'https://{0}/{1}/archive/master.zip'.format(domain, repo)

    time_format = datetime.now().strftime('%Y-%m-%d_%H.%M.%S')

    utils.LOG.info('Starting repository analysis...')

    output = output + '/' + repo.replace('/','_') + '-' + time_format
    utils.get_package(archive, output)

    feature_files = utils.get_feature_files(output)

    tag_ojb_list = utils.parse_files(feature_files)

    utils.stats_output(tag_ojb_list)

    utils.LOG.info('Finished...')
示例#9
0
def main():

    args = get_arguments('generate')

    try:
        checkpoint = torch.load("models/" + args.dataset + "/model.pt")
    except OSError:
        print("No model exists at models/" + args.dataset +
              ". You must train the model" + " before generation.")
    net, _ = get_model_and_data(checkpoint['args'])

    net.load_state_dict(checkpoint['model'])

    for _ in range(args.num_samples):
        print(
            net.get_sample(init_string=args.init_string,
                           max_length=args.max_length,
                           temperature=args.temperature))
def test_setup_optimisers_and_schedulers():
    # NOTE: Removing any sys.argv to get default arguments
    sys.argv = [""]
    args = get_arguments()
    model = DummyEncDecModel()
    optimisers, schedulers = setup_optimisers_and_schedulers(args, model)
    assert len(optimisers) == 2
    assert len(schedulers) == 2
    for optimiser in optimisers:
        assert isinstance(optimiser, torch.optim.Optimizer)
        assert hasattr(optimiser, "state_dict")
        assert hasattr(optimiser, "load_state_dict")
        assert hasattr(optimiser, "step")
        assert hasattr(optimiser, "zero_grad")
    for scheduler in schedulers:
        assert isinstance(scheduler, torch.optim.lr_scheduler._LRScheduler)
        assert hasattr(scheduler, "state_dict")
        assert hasattr(scheduler, "load_state_dict")
        assert hasattr(scheduler, "step")
示例#11
0
import torch.nn as nn
import torch.nn.functional as F
from datetime import datetime

from tqdm import tqdm

from pose_dataset import *
from pose_resnet import *
from pose_resnet_2d import *
from pose_hrnet import *
import arguments
from make_log import *
from evaluate import *
from loss import *

args = arguments.get_arguments()

# model name
model_name = '{}_nlayer{}_{}_lr{}_batch{}_momentum{}_schedule{}_nepoch{}_{}'.format(
        args.name,
        args.nlayer,
        args.optimizer,
        args.lr,
        args.batch_size,
        args.momentum,
        args.schedule,
        args.nepochs,
        args.arch
    )
logger = make_logger(log_file=model_name)
logger.info("saved model name "+model_name)        
示例#12
0
def main():
    args = get_arguments()
    logger = logging.getLogger(__name__)
    torch.backends.cudnn.deterministic = True
    dt.misc.set_seed(args.random_seed)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    # Network
    segmenter, training_loss, validation_loss = setup_network(args,
                                                              device=device)
    # Data
    train_loaders, val_loader = setup_data_loaders(args)
    # Optimisers
    optimisers, schedulers = setup_optimisers_and_schedulers(args,
                                                             model=segmenter)
    # Checkpoint
    saver, restart_epoch = setup_checkpoint_and_maybe_restore(
        args,
        model=segmenter,
        optimisers=optimisers,
        schedulers=schedulers,
    )
    # Calculate from which stage and which epoch to restart the training
    total_epoch = restart_epoch
    all_epochs = np.cumsum(args.epochs_per_stage)
    restart_stage = sum(restart_epoch >= all_epochs)
    if restart_stage > 0:
        restart_epoch -= all_epochs[restart_stage - 1]
    for stage in range(restart_stage, args.num_stages):
        if stage > restart_stage:
            restart_epoch = 0
        for epoch in range(restart_epoch, args.epochs_per_stage[stage]):
            logger.info(f"Training: stage {stage} epoch {epoch}")
            dt.engine.train(
                model=segmenter,
                opts=optimisers,
                crits=training_loss,
                dataloader=train_loaders[stage],
                freeze_bn=args.freeze_bn[stage],
                grad_norm=args.grad_norm[stage],
            )
            total_epoch += 1
            for scheduler in schedulers:
                scheduler.step(total_epoch)
            if (epoch + 1) % args.val_every[stage] == 0:
                logger.info(f"Validation: stage {stage} epoch {epoch}")
                vals = dt.engine.validate(
                    model=segmenter,
                    metrics=validation_loss,
                    dataloader=val_loader,
                )
                saver.maybe_save(
                    new_val=vals,
                    dict_to_save={
                        "model":
                        segmenter.state_dict(),
                        "epoch":
                        total_epoch,
                        "optimisers":
                        [optimiser.state_dict() for optimiser in optimisers],
                        "schedulers":
                        [scheduler.state_dict() for scheduler in schedulers],
                    },
                )
示例#13
0
from bots import *
from Player import Player


# Change these to edit the default Game parameters
DEFAULT_VERBOSITY = True
DEFAULT_MIN_ROUNDS = 300
DEFAULT_AVERAGE_ROUNDS = 1000
DEFAULT_END_EARLY = False
DEFAULT_PLAYERS = ([Player()] * 5) + ([Player1()] * 5) + ([Player2()] * 5) + ([Player3(0.0001)] * 5) + ([Freeloader()] * 5) + ([Alternator()] * 1) + ([MaxRepHunter()] * 5) + ([Random(0.3)] * 2) + ([Random(0.96)] * 2) + ([FairHunter()] * 2) + ([BoundedHunter(0.8,0.999)] * 5) + ([AverageHunter()] * 3) + ([Pushover()] * 1) 


# Bare minimum test game. See README.md for details.

if __name__ == '__main__':
    (players, options) = arguments.get_arguments()
    # The list of players for the game is made up of
    #   'Player' (your strategy)
    #   bots from get_arguments (the bots to use)
    player_list = players
    # **options -> interpret game options from get_arguments
    #              as a dictionary to unpack into the Game parametersi

    winnerMap = {}
    for i in range(200) :
        game = Game(player_list, **options)
        
        winner = game.play_game()
        print ("Winner: " + winner)
        sys.stdout.flush()
        if (not winner in winnerMap):
示例#14
0
def main():
    # Get all the command-line arguments
    args = arguments.get_arguments()

    if args.log_file != None:
        logging.basicConfig(format='%(asctime)s %(message)s',
                            filename=args.log_file,
                            filemode='a',
                            level=logging.DEBUG)
    else:
        logging.basicConfig(format='%(asctime)s %(message)s',
                            level=logging.DEBUG)

    # Get all the command-line arguments
    logging.info('Obtained all arguments: %s', str(args))

    logging.info('parsing all data')
    if args.train_data:
        train_data = parse_file(args.train_data, args.dont_preprocess_data,
                                args.dont_lowercase_words, args.just_pad_sents)
    if args.dev_data:
        dev_data = parse_file(args.dev_data, args.dont_preprocess_data,
                              args.dont_lowercase_words, args.just_pad_sents)
    if args.test_data:
        test_data = parse_file(args.test_data, args.dont_preprocess_data,
                               args.dont_lowercase_words, args.just_pad_sents)

    logging.info('train_length: %d', len(train_data))
    logging.info('dev_length: %d', len(dev_data))
    logging.info('test_length: %d', len(test_data))

    char_to_ix, word_to_ix, label_to_ix = get_word_label_ix(
        train_data, args.vocab_size)

    if len(word_to_ix) != args.vocab_size:
        logging.info('vocab_size changed to %d', len(word_to_ix))
        args.vocab_size = len(word_to_ix)

    if args.word_embeds_file != None and args.word_embeds_file != 'None':
        logging.info('obtaining %s embeddings for words', args.word_embeds)
        word_vectors = extract_embeds(args.word_embeds_file,
                                      args.word_embed_dim, word_to_ix)
    else:
        word_vectors = None

    logging.info('Obtained word_to_ix: %d and label_to_ix: %d ',
                 len(word_to_ix), len(label_to_ix))
    logging.info(label_to_ix)

    if args.model == 'exact_summarunner':
        logging.info('using a word-level bilstm and sent-level bilstm model')

        summarunner = model.summarunner(args, word_to_ix, label_to_ix,
                                        word_vectors)

        logging.info('Created the network')

        logging.info('Training the network')

        # training the network
        train(summarunner, train_data, dev_data, test_data, args)

    else:
        logging.error('no such option for the model yet')
示例#15
0
from arguments import get_arguments
from combine import Combine
from models.vgg import *
from models.dpn import *
from models.lenet import *
from models.senet import *
from models.resnet import *
from models.resnext import *
from models.densenet import *
from models.googlenet import *
from models.mobilenet import *
from models.shufflenet import *
from models.preact_resnet import *
from models.smallnet import *

parser = get_arguments()
opt = parser.parse_args()
img_size = (1, 3, 32, 32)
inputs = torch.randn(1, 3, 32, 32)
# resnet18 = models.resnet18()
# model = VGG16()
arch_config = {}
arch_config['num_classes'] = 10
arch_config['num_channels'] = 3
if opt.E > 1:
    arch_config['balance'] = opt.balance
else:
    arch_config['balance'] = False
model = Combine(opt.arch, opt.E, opt.probs, ensemble=True, **arch_config)
# model = SmallNetOriginal()
y, _ = model(Variable(inputs))
示例#16
0
def main():
    """
    Create main function to capture code errors: https://stackoverflow.com/questions/6234405/logging-uncaught-exceptions-in-python
    """

    args = get_arguments()

    ######################################################################
    #####################START PIPELINE###################################
    ######################################################################
    output = os.path.abspath(args.output)
    group_name = output.split("/")[-1]
    reference = os.path.abspath(args.reference)
    #annotation = os.path.abspath(args.annotation)

    # LOGGING
    # Create log file with date and time
    right_now = str(datetime.datetime.now())
    right_now_full = "_".join(right_now.split(" "))
    log_filename = group_name + "_" + right_now_full + ".log"
    log_folder = os.path.join(output, 'Logs')
    check_create_dir(log_folder)
    log_full_path = os.path.join(log_folder, log_filename)

    logger = logging.getLogger()
    logger.setLevel(logging.DEBUG)

    formatter = logging.Formatter('%(asctime)s:%(message)s')

    file_handler = logging.FileHandler(log_full_path)
    file_handler.setLevel(logging.DEBUG)
    file_handler.setFormatter(formatter)

    stream_handler = logging.StreamHandler()
    stream_handler.setLevel(logging.INFO)
    # stream_handler.setFormatter(formatter)

    logger.addHandler(stream_handler)
    logger.addHandler(file_handler)

    logger.info("\n\n" + BLUE + BOLD +
                "STARTING PIPELINE IN GROUP: " + group_name + END_FORMATTING)

    today = str(datetime.date.today())

    logger.info("ARGUMENTS:")
    logger.info(str(args))

    # Obtain all R1 and R2 from folder
    r1, r2 = extract_read_list(args.input_dir)

    # Check if there are samples to filter out
    sample_list_F = []
    if args.sample_list == None:
        logger.info("\n" + "No samples to filter")
        for r1_file, r2_file in zip(r1, r2):
            sample = extract_sample(r1_file, r2_file)
            sample_list_F.append(sample)
    else:
        logger.info("samples will be filtered")
        sample_list_F = file_to_list(args.sample_list)

    new_samples = check_reanalysis(args.output, sample_list_F)

    logger.info("\n%d samples will be analysed: %s" %
                (len(sample_list_F), ",".join(sample_list_F)))
    logger.info("\n%d NEW samples will be analysed: %s" %
                (len(new_samples), ",".join(new_samples)))
    #DECLARE FOLDERS CREATED IN PIPELINE ################
    #AND KEY FILES ######################################
    #####################################################
    # Annotation related parameters
    #script_dir = os.path.dirname(os.path.realpath(__file__))

    # Output related
    out_qc_dir = os.path.join(output, "Quality")
    out_qc_pre_dir = os.path.join(out_qc_dir, "raw")  # subfolder
    out_variant_dir = os.path.join(output, "Variants")
    out_core_dir = os.path.join(output, "Core")

    out_stats_dir = os.path.join(output, "Stats")
    out_stats_bamstats_dir = os.path.join(
        out_stats_dir, "Bamstats")  # subfolder
    out_stats_coverage_dir = os.path.join(
        out_stats_dir, "Coverage")  # subfolder
    out_compare_dir = os.path.join(output, "Compare")

    out_annot_dir = os.path.join(output, "Annotation")
    out_annot_snpeff_dir = os.path.join(out_annot_dir, "snpeff")  # subfolder
    out_annot_user_dir = os.path.join(out_annot_dir, "user")  # subfolder
    out_annot_user_aa_dir = os.path.join(out_annot_dir, "user_aa")  # subfolder
    out_annot_blast_dir = os.path.join(out_annot_dir, "blast")  # subfolder

    out_species_dir = os.path.join(output, "Species")
    new_sample_number = 0
    for r1_file, r2_file in zip(r1, r2):
        # EXtract sample name
        sample = extract_sample(r1_file, r2_file)
        args.sample = sample
        if sample in sample_list_F:
            # VARINAT SAMPLE DIR
            sample_variant_dir = os.path.join(out_variant_dir, sample)

            sample_number = str(sample_list_F.index(sample) + 1)
            sample_total = str(len(sample_list_F))
            if sample in new_samples:
                new_sample_number = str(int(new_sample_number) + 1)
                new_sample_total = str(len(new_samples))
                logger.info("\n" + WHITE_BG + "STARTING SAMPLE: " + sample +
                            " (" + sample_number + "/" + sample_total + ")" + " (" + new_sample_number + "/" + new_sample_total + ")" + END_FORMATTING)
            else:
                logger.info("\n" + WHITE_BG + "STARTING SAMPLE: " + sample +
                            " (" + sample_number + "/" + sample_total + ")" + END_FORMATTING)

            output_final_vcf = os.path.join(
                sample_variant_dir, 'snps.all.ivar.tsv')

            if not os.path.isfile(output_final_vcf):

                ##############START PIPELINE#####################
                #################################################

                # INPUT ARGUMENTS
                ################
                # check_file_exists(r1_file)
                # check_file_exists(r2_file)

                args.output = os.path.abspath(args.output)
                check_create_dir(args.output)

                # QUALITY CHECK in RAW with fastqc
                ######################################################
                check_create_dir(out_qc_dir)

                out_qc_raw_name_r1 = (".").join(r1_file.split(
                    '/')[-1].split('.')[0:-2]) + '_fastqc.html'
                out_qc_raw_name_r2 = (".").join(r2_file.split(
                    '/')[-1].split('.')[0:-2]) + '_fastqc.html'
                output_qc_raw_file_r1 = os.path.join(
                    out_qc_pre_dir, out_qc_raw_name_r1)
                output_qc_raw_file_r2 = os.path.join(
                    out_qc_pre_dir, out_qc_raw_name_r2)

                if os.path.isfile(output_qc_raw_file_r1) and os.path.isfile(output_qc_raw_file_r2):
                    logger.info(YELLOW + DIM + output_qc_raw_file_r1 +
                                " EXIST\nOmmiting QC for sample " + sample + END_FORMATTING)
                else:
                    logger.info(
                        GREEN + "Checking quality in sample " + sample + END_FORMATTING)
                    logger.info("R1: " + r1_file + "\nR2: " + r2_file)
                    fastqc_quality(r1_file, r2_file,
                                   out_qc_pre_dir, args.threads)

                """
                TODO: Human filter
                """

                # VARIANT CALLING WITH SNIPPY
                ###################################################

                output_vcf_sub = os.path.join(
                    sample_variant_dir, "snps.subs.vcf")
                output_vcf = os.path.join(sample_variant_dir, "snps.vcf")

                if os.path.isfile(output_vcf_sub) and os.path.isfile(output_vcf):
                    logger.info(YELLOW + DIM + output_vcf +
                                " EXIST\nOmmiting Variant calling in  " + sample + END_FORMATTING)
                else:
                    logger.info(
                        GREEN + "Calling variants with snippy " + sample + END_FORMATTING)
                    run_snippy(r1_file, r2_file, reference, out_variant_dir, sample,
                               threads=args.threads, minqual=10, minfrac=0.1, mincov=1)
                    old_bam = os.path.join(sample_variant_dir, "snps.bam")
                    old_bai = os.path.join(sample_variant_dir, "snps.bam.bai")
                    new_bam = os.path.join(sample_variant_dir, sample + ".bam")
                    new_bai = os.path.join(
                        sample_variant_dir, sample + ".bam.bai")
                    os.rename(old_bam, new_bam)
                    os.rename(old_bai, new_bai)

                #VARIANT FORMAT COMBINATION (REMOVE COMPLEX) ########
                #####################################################
                out_variant_indel_sample = os.path.join(
                    sample_variant_dir, "snps.indel.vcf")
                out_variant_all_sample = os.path.join(
                    sample_variant_dir, "snps.all.vcf")

                if os.path.isfile(out_variant_indel_sample):
                    logger.info(YELLOW + DIM + out_variant_indel_sample +
                                " EXIST\nOmmiting indel filtering in sample " + sample + END_FORMATTING)
                else:
                    logger.info(GREEN + "Filtering INDELS in " +
                                sample + END_FORMATTING)
                    extract_indels(output_vcf)

                if os.path.isfile(out_variant_all_sample):
                    logger.info(YELLOW + DIM + out_variant_all_sample +
                                " EXIST\nOmmiting vcf combination in sample " + sample + END_FORMATTING)
                else:
                    logger.info(GREEN + "Combining vcf in " +
                                sample + END_FORMATTING)
                    merge_vcf(output_vcf_sub, out_variant_indel_sample)

                #VARIANT FORMAT ADAPTATION TO IVAR ##################
                #####################################################
                out_variant_tsv_file = os.path.join(
                    sample_variant_dir, 'snps.all.ivar.tsv')

                if os.path.isfile(out_variant_tsv_file):
                    logger.info(YELLOW + DIM + out_variant_tsv_file +
                                " EXIST\nOmmiting format adaptation for sample " + sample + END_FORMATTING)
                else:
                    logger.info(
                        GREEN + "Adapting variants format in sample " + sample + END_FORMATTING)
                    prior = datetime.datetime.now()
                    vcf_to_ivar_tsv(out_variant_all_sample,
                                    out_variant_tsv_file)
                    after = datetime.datetime.now()
                    print(("Done with function in: %s" % (after - prior)))

            # SPECIES DETERMINATION
            ###################################################
            check_create_dir(out_species_dir)

            output_species = os.path.join(
                out_species_dir, sample + ".screen.tab")

            if os.path.isfile(output_species):
                logger.info(YELLOW + DIM + output_species +
                            " EXIST\nOmmiting Species determinatin in " + sample + END_FORMATTING)
            else:
                logger.info(
                    GREEN + "Determining species in " + sample + END_FORMATTING)
                mash_screen(r1_file, out_species_dir, r2_file=r2_file, winner=True, threads=args.threads,
                            mash_database=args.mash_database)

            ########################CREATE STATS AND QUALITY FILTERS########################################################################
            ################################################################################################################################
            #CREATE Bamstats#######################################
            #######################################################
            check_create_dir(out_stats_dir)
            check_create_dir(out_stats_bamstats_dir)
            out_bamstats_name = sample + ".bamstats"
            out_bamstats_file = os.path.join(
                out_stats_bamstats_dir, out_bamstats_name)
            bam_sample_file = os.path.join(sample_variant_dir, sample + ".bam")

            if os.path.isfile(out_bamstats_file):
                logger.info(YELLOW + DIM + out_bamstats_file +
                            " EXIST\nOmmiting Bamstats for  sample " + sample + END_FORMATTING)
            else:
                logger.info(GREEN + "Creating bamstats in sample " +
                            sample + END_FORMATTING)
                create_bamstat(
                    bam_sample_file, out_stats_bamstats_dir, sample, threads=args.threads)

            #CREATE Bamstats#######################################
            #######################################################
            check_create_dir(out_stats_coverage_dir)
            out_coverage_name = sample + ".cov"
            out_coverage_file = os.path.join(
                out_stats_coverage_dir, out_coverage_name)

            if os.path.isfile(out_coverage_file):
                logger.info(YELLOW + DIM + out_coverage_file +
                            " EXIST\nOmmiting Bamstats for  sample " + sample + END_FORMATTING)
            else:
                logger.info(GREEN + "Creating coverage in sample " +
                            sample + END_FORMATTING)
                create_coverage(bam_sample_file,
                                out_stats_coverage_dir, sample)

    # coverage OUTPUT SUMMARY
    ######################################################
    prior_recal = datetime.datetime.now()
    logger.info(GREEN + "Creating summary report for coverage result in group " +
                group_name + END_FORMATTING)
    obtain_group_cov_stats(out_stats_dir, group_name)
    after_recal = datetime.datetime.now()
    logger.info("Done with report for coverage: %s" %
                (after_recal - prior_recal))

    # READS and VARIANTS OUTPUT SUMMARY
    ######################################################
    logger.info(GREEN + "Creating overal summary report in group " +
                group_name + END_FORMATTING)
    obtain_overal_stats(output, group_name)

    # REMOVE UNCOVERED
    ##############################################################################################################################
    logger.info(GREEN + "Removing low quality samples in group " +
                group_name + END_FORMATTING)
    uncovered_samples = remove_low_quality(
        output, min_coverage=args.coverage20, min_hq_snp=args.min_snp, type_remove='Uncovered')

    if len(uncovered_samples) > 1:
        logger.info(GREEN + "Uncovered samples: " +
                    (",").join(uncovered_samples) + END_FORMATTING)
    else:
        logger.info(GREEN + "NO uncovered samples found" + END_FORMATTING)

    # RUN SNIPPY CORE
    ##############################################################################################################################
    if args.core:
        check_create_dir(out_core_dir)
        logger.info(GREEN + "Running snippy-core " +
                    group_name + END_FORMATTING)
        run_snippy_core(out_variant_dir, out_core_dir, reference)

        logger.info(GREEN + "Adapting core-snp to compare format " +
                    group_name + END_FORMATTING)
        core_vcf_file = os.path.join(out_core_dir, "core.vcf")
        core_vcf_file_adapted = os.path.join(
            out_core_dir, "core.vcf.adapted.tsv")
        core_vcf_file_removed = os.path.join(
            out_core_dir, "core.vcf.adapted.final.tsv")

        core_vcf_df_adapted = import_VCF4_core_to_compare(core_vcf_file)
        core_vcf_df_adapted.to_csv(
            core_vcf_file_adapted, sep="\t", index=False)

        logger.info(GREEN + "Obtaining clustered positions " +
                    group_name + END_FORMATTING)

        close_positions_list = extract_close_snps(
            core_vcf_df_adapted, snps_in_10=1)
        logger.info(GREEN + "Obtaining uncovered positions " +
                    group_name + END_FORMATTING)
        uncovered_list = identify_uncovered(
            out_stats_coverage_dir, min_coverage=10, nocall_fr=0.5)

        logger.debug('Clustered positions in core SNP:\n{}'.format(
            (",".join([str(x) for x in close_positions_list]))))
        logger.debug('Uncovered positions in all samples:\n{}'.format(
            (",".join([str(x) for x in uncovered_list]))))

        to_remove_list = close_positions_list + uncovered_list

        remove_df = remove_position_from_compare(
            core_vcf_df_adapted, to_remove_list)
        remove_df.to_csv(core_vcf_file_removed, sep="\t", index=False)

        ddtb_compare(core_vcf_file_removed, distance=10)

    #ANNOTATION WITH SNPEFF AND USER INPUT ##############
    #####################################################
    logger.info("\n\n" + BLUE + BOLD + "STARTING ANNOTATION IN GROUP: " +
                group_name + END_FORMATTING + "\n")
    check_create_dir(out_annot_dir)
    check_create_dir(out_annot_snpeff_dir)
    # SNPEFF
    if args.snpeff_database != False:
        for root, _, files in os.walk(out_variant_dir):
            for name in files:
                if name == 'snps.all.vcf':
                    sample = root.split('/')[-1]
                    filename = os.path.join(root, name)
                    chrom_filename = os.path.join(
                        root, 'snps.all.chromosome.vcf')
                    out_annot_file = os.path.join(
                        out_annot_snpeff_dir, sample + ".annot")
                    if os.path.isfile(out_annot_file):
                        logger.info(YELLOW + DIM + out_annot_file +
                                    " EXIST\nOmmiting snpEff Annotation for sample " + sample + END_FORMATTING)
                    else:
                        logger.info(
                            GREEN + "Annotating sample with snpEff: " + sample + END_FORMATTING)
                        rename_reference_snpeff(filename, chrom_filename)
                        annotate_snpeff(chrom_filename, out_annot_file,
                                        database=args.snpeff_database)
    else:
        logger.info(YELLOW + DIM + " No SnpEff database suplied, skipping annotation in group " +
                    group_name + END_FORMATTING)
    # USER DEFINED
    if not args.annot_bed and not args.annot_vcf:
        logger.info(
            YELLOW + BOLD + "Ommiting User Annotation, no BED or VCF files supplied" + END_FORMATTING)
    else:
        check_create_dir(out_annot_user_dir)
        for root, _, files in os.walk(out_variant_dir):
            for name in files:
                if name == 'snps.all.ivar.tsv':
                    sample = root.split('/')[-1]
                    logger.info(
                        'User bed/vcf annotation in sample {}'.format(sample))
                    filename = os.path.join(root, name)
                    out_annot_file = os.path.join(
                        out_annot_user_dir, sample + ".tsv")
                    user_annotation(
                        filename, out_annot_file, vcf_files=args.annot_vcf, bed_files=args.annot_bed)

    # USER AA DEFINED
    if not args.annot_aa:
        logger.info(
            YELLOW + BOLD + "Ommiting User aa Annotation, no AA files supplied" + END_FORMATTING)
    else:
        check_create_dir(out_annot_user_aa_dir)
        for root, _, files in os.walk(out_annot_snpeff_dir):
            if root == out_annot_snpeff_dir:
                for name in files:
                    if name.endswith('.annot'):
                        sample = name.split('.')[0]
                        logger.info(
                            'User aa annotation in sample {}'.format(sample))
                        filename = os.path.join(root, name)
                        out_annot_aa_file = os.path.join(
                            out_annot_user_aa_dir, sample + ".tsv")
                        if os.path.isfile(out_annot_aa_file):
                            user_annotation_aa(
                                out_annot_aa_file, out_annot_aa_file, aa_files=args.annot_aa)
                        else:
                            user_annotation_aa(
                                filename, out_annot_aa_file, aa_files=args.annot_aa)
    # USER FASTA ANNOTATION
    if not args.annot_fasta:
        logger.info(
            YELLOW + BOLD + "Ommiting User FASTA Annotation, no FASTA files supplied" + END_FORMATTING)
    else:
        check_create_dir(out_annot_blast_dir)
        for root, _, files in os.walk(out_variant_dir):
            for name in files:
                if name.endswith('.consensus.subs.fa'):
                    filename = os.path.join(root, name)
                    sample = root.split('/')[-1]
                    logger.info(
                        'User FASTA annotation in sample {}'.format(sample))
                    # out_annot_aa_file = os.path.join(
                    #    out_annot_user_aa_dir, sample + ".tsv")
                    for db in args.annot_fasta:
                        make_blast(filename, db, sample, out_annot_blast_dir,
                                   db_type="nucl", query_type="nucl", evalue=0.0001, threads=8)

    # USER AA TO HTML
    if not args.annot_aa:
        logger.info(
            YELLOW + BOLD + "Ommiting User aa Annotation to HTML, no AA files supplied" + END_FORMATTING)
    else:
        annotated_samples = []
        logger.info('Adapting annotation to html in {}'.format(group_name))
        for root, _, files in os.walk(out_annot_user_aa_dir):
            if root == out_annot_user_aa_dir:
                for name in files:
                    if name.endswith('.tsv'):
                        sample = name.split('.')[0]
                        annotated_samples.append(sample)
                        filename = os.path.join(root, name)
                        annotation_to_html(filename, sample)
        annotated_samples = [str(x) for x in annotated_samples]
        report_samples_html_all = report_samples_html.replace(
            'ALLSAMPLES', ('","').join(annotated_samples))  # NEW
        with open(os.path.join(out_annot_user_aa_dir, '00_all_samples.html'), 'w+') as f:
            f.write(report_samples_html_all)

    # SNP COMPARISON using tsv variant files
    ######################################################
    logger.info("\n\n" + BLUE + BOLD + "STARTING COMPARISON IN GROUP: " +
                group_name + END_FORMATTING + "\n")

    check_create_dir(out_compare_dir)
    folder_compare = today + "_" + group_name
    path_compare = os.path.join(out_compare_dir, folder_compare)
    check_create_dir(path_compare)
    full_path_compare = os.path.join(path_compare, group_name)

    compare_snp_matrix_recal = full_path_compare + ".revised.final.tsv"
    compare_snp_matrix_recal_intermediate = full_path_compare + ".revised_intermediate.tsv"
    compare_snp_matrix_recal_mpileup = full_path_compare + \
        ".revised_intermediate_vcf.tsv"
    compare_snp_matrix_INDEL_intermediate = full_path_compare + \
        ".revised_INDEL_intermediate.tsv"

    # Create intermediate

    recalibrated_snp_matrix_intermediate = ddbb_create_intermediate(
        out_variant_dir, out_stats_coverage_dir, min_freq_discard=0.1, min_alt_dp=10, only_snp=False)
    # recalibrated_snp_matrix_intermediate.to_csv(
    #     compare_snp_matrix_recal_intermediate, sep="\t", index=False)

    # Remove SNPs from BED file (PE/PPE)

    if args.remove_bed:
        recalibrated_snp_matrix_intermediate = remove_bed_positions(
            recalibrated_snp_matrix_intermediate, args.remove_bed)

    recalibrated_snp_matrix_intermediate.to_csv(
        compare_snp_matrix_recal_intermediate, sep="\t", index=False)

    # Recalibrate intermediate with VCF

    prior_recal = datetime.datetime.now()
    recalibrated_snp_matrix_mpileup = recalibrate_ddbb_vcf_intermediate(
        compare_snp_matrix_recal_intermediate, out_variant_dir, min_cov_low_freq=10)
    recalibrated_snp_matrix_mpileup.to_csv(
        compare_snp_matrix_recal_mpileup, sep="\t", index=False)

    after_recal = datetime.datetime.now()
    logger.debug("Done with recalibration vcf: %s" %
                 (after_recal - prior_recal))

    # Remove SNPs located within INDELs

    compare_snp_matrix_INDEL_intermediate_df = remove_position_range(
        recalibrated_snp_matrix_mpileup)
    compare_snp_matrix_INDEL_intermediate_df.to_csv(
        compare_snp_matrix_INDEL_intermediate, sep="\t", index=False)

    # Extract all positions marked as complex
    complex_variants = extract_complex_list(out_variant_dir)
    logger.debug('Complex positions in all samples:\n{}'.format(
        (",".join([str(x) for x in complex_variants]))))

    # Clean all faulty positions and samples => Final table

    recalibrated_revised_INDEL_df = revised_df(compare_snp_matrix_INDEL_intermediate_df,
                                               path_compare,
                                               complex_pos=complex_variants,
                                               min_freq_include=0.8,
                                               min_threshold_discard_uncov_sample=args.min_threshold_discard_uncov_sample,
                                               min_threshold_discard_uncov_pos=args.min_threshold_discard_uncov_pos,
                                               min_threshold_discard_htz_sample=args.min_threshold_discard_htz_sample,
                                               min_threshold_discard_htz_pos=args.min_threshold_discard_htz_pos,
                                               min_threshold_discard_all_pos=args.min_threshold_discard_all_pos,
                                               min_threshold_discard_all_sample=args.min_threshold_discard_all_sample,
                                               remove_faulty=True,
                                               drop_samples=True,
                                               drop_positions=True,
                                               windows_size_discard=args.window)
    recalibrated_revised_INDEL_df.to_csv(
        compare_snp_matrix_recal, sep="\t", index=False)

    # Matrix to pairwise and mwk

    ddtb_compare(compare_snp_matrix_recal, distance=5)

    logger.info("\n\n" + MAGENTA + BOLD + "COMPARING FINISHED IN GROUP: " +
                group_name + END_FORMATTING + "\n")

    logger.info("\n\n" + MAGENTA + BOLD +
                "#####END OF PIPELINE AUTOSNIPPY ANALYSIS#####" + END_FORMATTING + "\n")
示例#17
0
from Player import Player

# Change these to edit the default Game parameters
DEFAULT_VERBOSITY = False
DEFAULT_QUIET = False
DEFAULT_MIN_ROUNDS = 300
DEFAULT_AVERAGE_ROUNDS = 1000
DEFAULT_END_EARLY = False
DEFAULT_PLAYERS = [
    Player(),
    Pushover(),
    Freeloader(),
    Alternator(),
    MaxRepHunter(),
    Random(.2),
    Random(.8)
]

# Bare minimum test game. See README.md for details.

if __name__ == '__main__':
    (players, options) = arguments.get_arguments()
    # The list of players for the game is made up of
    #   'Player' (your strategy)
    #   bots from get_arguments (the bots to use)
    player_list = players
    # **options -> interpret game options from get_arguments
    #              as a dictionary to unpack into the Game parameters
    game = Game(player_list, **options)
    game.play_game()
def main():
    args = get_arguments('parser')
    parse_corpus(args.dataset)
示例#19
0
                        break
            await asyncio.sleep(1)

        if ('error' in response):
            raise ValueError('Timeout exception[' + str(i) + '].')

        logger.log('Polling[' + str(i) + '] complete!')
        response = response['response']['result']

    data = json.loads(response['value'])
    responses[i] = data

if __name__ == "__main__":
    """ Gets the arguments and makes the requests according to them. """

    args = get_arguments()
    responses = [None] * args.requests
    loop = asyncio.get_event_loop()
    loop.run_until_complete(asyncio.gather(
        *[ga_action_request(args, i) for i in range(0, args.requests)]
    ))
    loop.close()
    if (args.only_population):
        population = [
            individual
            for response in responses
            for individual in response['population']
        ]
        print(population)
    else:
        print(responses)
示例#20
0
#!/usr/bin/python3

import re
from glob import glob
from copy import deepcopy
from math import log
from arguments import get_arguments

DOC_DIR = 'test_docs'
NOT_WORD_RE = '[^\w]'
ops = {'and': set.intersection,
       'or': set.union,
       'not': set.difference}
args = get_arguments()


def idf(term, docs):
    N = len(docs)
    df = count_docs_containing_term(term, [doc[1] for doc in docs])
    if df == 0:
        return 0
    else:
        return log(N/df)


def tf(term, doc):
    count = 0
    for word in doc:
        if word == term:
            count += 1
    return count
示例#21
0
from __future__ import division, print_function

import arguments
from Game import Game
from bots import *
from Player import Player


# Change these to edit the default Game parameters
DEFAULT_VERBOSITY = True
DEFAULT_MIN_ROUNDS = 300
DEFAULT_AVERAGE_ROUNDS = 1000
DEFAULT_END_EARLY = False

# Bare minimum test game. See README.md for details.

if __name__ == '__main__':
    (bots, options) = arguments.get_arguments()
    # The list of players for the game is made up of
    #   'Player' (your strategy)
    #   bots from get_arguments (the bots to use)
    players = [Player()] + bots
    # **options -> interpret game options from get_arguments
    #              as a dictionary to unpack into the Game parameters
    game = Game(players, **options)
    game.play_game()
示例#22
0
def main():

    args = arguments.get_arguments()

    if args.log_file != None:
        logging.basicConfig(format='%(asctime)s %(message)s',
                            filename=args.log_file,
                            filemode='a',
                            level=logging.DEBUG)
    else:
        logging.basicConfig(format='%(asctime)s %(message)s',
                            level=logging.DEBUG)

    #logging.basicConfig(format='%(asctime)s %(message)s', filename='example.log', filemode='w', level=logging.DEBUG)
    #logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
    #logging.debug('This message should go to the log file')
    #logging.info('So should this')
    #logging.warning('And this, too')

    # Get all the command-line arguments
    #args = arguments.get_arguments()
    logging.info('Obtained all arguments: %s', str(args))

    if args.train_data:
        train_data = parse_file(args.train_data, args.dont_preprocess_data,
                                args.dont_lowercase_words,
                                args.share_hiddenstates, args.just_pad_sents)
    if args.dev_data:
        dev_data = parse_file(args.dev_data, args.dont_preprocess_data,
                              args.dont_lowercase_words,
                              args.share_hiddenstates, args.just_pad_sents)
    if args.test_data:
        test_data = parse_file(args.test_data, args.dont_preprocess_data,
                               args.dont_lowercase_words,
                               args.share_hiddenstates, args.just_pad_sents)
    if args.test_data2:
        test_data2 = parse_file(args.test_data2, args.dont_preprocess_data,
                                args.dont_lowercase_words,
                                args.share_hiddenstates, args.just_pad_sents)

    if args.model == 'char_bilstm' or args.model == 'char_bilstm-crf':
        if args.train_data:
            #train_data_word = train_data
            train_data_word = parse_file(args.train_data, True, True,
                                         args.share_hiddenstates,
                                         args.just_pad_sents)
            #train_data_word = parse_file2(args.train_data, args.dont_lowercase_words, args.share_hiddenstates)
        if args.dev_data:
            #dev_data_word = dev_data
            dev_data_word = parse_file(args.dev_data, True, True,
                                       args.share_hiddenstates,
                                       args.just_pad_sents)
            #dev_data_word = parse_file2(args.dev_data, args.dont_lowercase_words, args.share_hiddenstates)
        if args.test_data:
            #test_data_word = test_data
            test_data_word = parse_file(args.test_data, True, True,
                                        args.share_hiddenstates,
                                        args.just_pad_sents)
            #test_data_word = parse_file2(args.test_data, args.dont_lowercase_words, args.share_hiddenstates)
        if args.test_data2:
            #test_data2_word = test_data2
            test_data2_word = parse_file(args.test_data2, True, True,
                                         args.share_hiddenstates,
                                         args.just_pad_sents)
            #test_data2_word = parse_file2(args.test_data2, args.dont_lowercase_words, args.share_hiddenstates)

    if args.combine_train_dev:
        logging.info('combining training and dev data')
        train_data = train_data + dev_data
        if args.model == 'char_bilstm' or args.model == 'char_bilstm-crf':
            train_data_word = train_data_word + dev_data_word

    if args.combine_train_dev_test:
        logging.info('combining training, dev data and test data')
        train_data = train_data + dev_data + test_data
        if args.model == 'char_bilstm' or args.model == 'char_bilstm-crf':
            train_data_word = train_data_word + dev_data_word + test_data_word

    #Check the initializer if needed
    # Declare a DynetParams object
    #dyparams = dy.DynetParams()
    # Fetch the command line arguments (optional)
    #dyparams.from_args()
    # Set some parameters manualy (see the command line arguments documentation)
    #dyparams.set_mem(2048)
    #dyparams.set_random_seed(666)
    # Initialize with the given parameters
    #dyparams.init() # or init_from_params(dyparams)

    logging.info('parsing all data')
    logging.info('train_length: %d', len(train_data))
    logging.info('dev_length: %d', len(dev_data))
    logging.info('test_length: %d', len(test_data))
    logging.info('test_length2: %d', len(test_data2))

    _, word_to_ix, tag_to_ix = get_word_label_ix(train_data, args.vocab_size)
    if args.model == 'char_bilstm' or args.model == 'char_bilstm-crf':
        char_to_ix, _, _ = get_word_label_ix(train_data_word, args.vocab_size)

    if len(word_to_ix) != args.vocab_size:
        logging.info('vocab_size changed to %d', len(word_to_ix))
        args.vocab_size = len(word_to_ix)

    if args.word_embeds_file != None and args.word_embeds_file != 'None':
        logging.info('obtaining %s embeddings for words', args.word_embeds)
        word_vectors = extract_embeds(args.word_embeds_file,
                                      args.word_embed_dim, word_to_ix)
        '''
		if args.use_pretrained_embed2:
			word_vectors2 = extract_embeds(args.word_embeds_file2, args.word_embed_dim, word_to_ix)
			new_word_vectors = []
			for i,j in zip(word_vectors, word_vectors2):
				new_word_vectors.append(i+j)
			word_vectors = new_word_vectors
		'''
        #word_vectors2 = extract_embeds('utils/final_polyglot_embeds.txt', args.word_embed_dim, word_to_ix2)
        #new_word_vectors = []
        #for i,j in zip(word_vectors,word_vectors2):
        #	temp_vector = i+j
        #	new_word_vectors.append(temp_vector)

        if args.model == 'char_bilstm' or args.model == 'char_bilstm-crf':
            #logging.info('obtaining %s embeddings for chars',args.word_embeds)
            #char_vectors = extract_embeds(args.word_embeds_file, args.word_embed_dim, char_to_ix)
            char_vectors = None

    else:
        word_vectors = None
        if args.model == 'char_bilstm' or args.model == 'char_bilstm-crf':
            char_vectors = None

    #word_vectors = extract_embeds('../glove.twitter.27B/glove.twitter.27B.25d.txt', 25, word_to_ix)
    #word_vectors = None

    logging.info('Obtained word_to_ix: %d and tag_to_ix: %d ', len(word_to_ix),
                 len(tag_to_ix))
    logging.info(tag_to_ix)

    if args.model == 'char_bilstm':
        logging.info(char_to_ix)
        logging.info('using a char-level bilstm and word-level bilstm model')
        #char_vectors=None
        char_bilstm = model.ensemble_char_BiLSTMNetwork(
            args, char_to_ix, word_to_ix, tag_to_ix, args.num_basis,
            word_vectors, char_vectors)  #, new_word_vectors)

        logging.info('Created the network')

        logging.info('Training the network')
        # training the network
        char_train(char_bilstm, train_data, dev_data, test_data, test_data2,
                   train_data_word, dev_data_word, test_data_word,
                   test_data2_word, args.epochs, args.batch_size, args,
                   tag_to_ix)

    else:
        logging.error('no such option for the model yet')
示例#23
0
def main():

    args = get_arguments('train')
    net, poems_split_by_words = get_model_and_data(args)

    random.shuffle(poems_split_by_words)
    training_num_poems = int(
        len(poems_split_by_words) * args.training_testing_split)
    testing_num_poems = len(poems_split_by_words) - training_num_poems
    training_poems = poems_split_by_words[:training_num_poems]
    testing_poems = poems_split_by_words[training_num_poems:]

    training_poems = training_poems[:5]
    testing_poems = testing_poems[:1]
    testing_poems = training_poems

    print("*** Num training poems: {0} ***".format(training_num_poems))
    print("*** Num testing poems: {0} ***".format(testing_num_poems))
    print("*** Vocabulary size: {0} ***".format(len(net.word2i)))

    num_params = 0
    for param in list(net.parameters()):
        product_of_dimensions = 1
        for dimension in param.size():
            product_of_dimensions *= dimension
        num_params += product_of_dimensions
    print("*** Num Params: {0} ***".format(num_params))

    if not args.no_cuda and torch.cuda.is_available():
        print("Created net. Sending to GPU ...")
        net.cuda()
    print("GatedLSTM:", net)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=args.lr)

    split_training_poems = [net.split_poem(poem) for poem in training_poems]
    split_testing_poems = [net.split_poem(poem) for poem in testing_poems]

    for epoch in range(args.epochs):

        for i, (input, output) in enumerate(split_training_poems):

            loss = train(net, criterion, optimizer, input, output)

            if i % args.loss_every == args.loss_every - 1:
                print("Step {0}, Loss {1}".format(
                    epoch * len(split_training_poems) + i, loss))

            if i % args.example_every == args.example_every - 1:
                print("Example:")
                print(repr(net.get_sample()))
                print("\n")

            if i % args.eval_every == args.eval_every - 1:
                testing_losses = [
                    train(net,
                          criterion,
                          optimizer,
                          split_poem[0],
                          split_poem[1],
                          update_model=False)
                    for split_poem in split_testing_poems
                ]
                testing_loss = sum(testing_losses) / len(testing_losses)
                print("Validation Loss {0}".format(testing_loss))

    checkpoint = {
        'model': net.state_dict(),
        'optimizer': optimizer.state_dict(),
        'args': args
    }

    if not os.path.exists("models/" + args.dataset):
        os.makedirs("models/" + args.dataset)
    torch.save(checkpoint, "models/" + args.dataset + "/model.pt")
    print("*** Saved model. ***")
示例#24
0
def main():
    """Create the model and start the training."""
    args = get_arguments()

    cudnn.enabled = True
    n_discriminators = 5

    # create teacher & student
    student_net = UNet(3, n_classes=args.num_classes)
    teacher_net = UNet(3, n_classes=args.num_classes)
    student_params = list(student_net.parameters())

    # teacher doesn't need gradient as it's just a EMA of the student
    teacher_params = list(teacher_net.parameters())
    for param in teacher_params:
        param.requires_grad = False

    student_net.train()
    student_net.cuda(args.gpu)
    teacher_net.train()
    teacher_net.cuda(args.gpu)

    cudnn.benchmark = True
    unsup_weights = [
        args.unsup_weight5, args.unsup_weight6, args.unsup_weight7,
        args.unsup_weight8, args.unsup_weight9
    ]
    lambda_adv_tgts = [
        args.lambda_adv_tgt5, args.lambda_adv_tgt6, args.lambda_adv_tgt7,
        args.lambda_adv_tgt8, args.lambda_adv_tgt9
    ]

    # create a list of discriminators
    discriminators = []
    for dis_idx in range(n_discriminators):
        discriminators.append(FCDiscriminator(num_classes=args.num_classes))
        discriminators[dis_idx].train()
        discriminators[dis_idx].cuda(args.gpu)

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    max_iters = args.num_steps * args.iter_size * args.batch_size
    src_set = REFUGE(True,
                     domain='REFUGE_SRC',
                     is_transform=True,
                     augmentations=aug_student,
                     aug_for_target=aug_teacher,
                     max_iters=max_iters)
    src_loader = data.DataLoader(src_set,
                                 batch_size=args.batch_size,
                                 shuffle=True,
                                 num_workers=args.num_workers,
                                 pin_memory=True)

    src_loader_iter = enumerate(src_loader)
    tgt_set = REFUGE(True,
                     domain='REFUGE_DST',
                     is_transform=True,
                     augmentations=aug_student,
                     aug_for_target=aug_teacher,
                     max_iters=max_iters)
    tgt_loader = data.DataLoader(tgt_set,
                                 batch_size=args.batch_size,
                                 shuffle=True,
                                 num_workers=args.num_workers,
                                 pin_memory=True)

    tgt_loader_iter = enumerate(tgt_loader)
    student_optimizer = optim.SGD(student_params,
                                  lr=args.learning_rate,
                                  momentum=args.momentum,
                                  weight_decay=args.weight_decay)
    teacher_optimizer = optim_weight_ema.WeightEMA(teacher_params,
                                                   student_params,
                                                   alpha=args.teacher_alpha)

    d_optimizers = []
    for idx in range(n_discriminators):
        optimizer = optim.Adam(discriminators[idx].parameters(),
                               lr=args.learning_rate_D,
                               betas=(0.9, 0.99))
        d_optimizers.append(optimizer)

    calc_bce_loss = torch.nn.BCEWithLogitsLoss()

    # labels for adversarial training
    source_label, tgt_label = 0, 1
    for i_iter in range(args.num_steps):

        total_seg_loss = 0
        seg_loss_vals = [0] * n_discriminators
        adv_tgt_loss_vals = [0] * n_discriminators
        d_loss_vals = [0] * n_discriminators
        unsup_loss_vals = [0] * n_discriminators

        for d_optimizer in d_optimizers:
            d_optimizer.zero_grad()
            adjust_learning_rate_D(d_optimizer, i_iter, args)

        student_optimizer.zero_grad()
        adjust_learning_rate(student_optimizer, i_iter, args)

        for sub_i in range(args.iter_size):

            # ******** Optimize source network with segmentation loss ********
            # As we don't change the discriminators, their parameters are fixed
            for discriminator in discriminators:
                for param in discriminator.parameters():
                    param.requires_grad = False

            _, src_batch = src_loader_iter.__next__()
            _, _, src_images, src_labels, _ = src_batch
            src_images = Variable(src_images).cuda(args.gpu)

            # calculate the segmentation losses
            sup_preds = list(student_net(src_images))
            seg_losses, total_seg_loss = [], 0
            for idx, sup_pred in enumerate(sup_preds):
                sup_interp_pred = (sup_pred)
                # you also can use dice loss like: dice_loss(src_labels, sup_interp_pred)
                seg_loss = Weighted_Jaccard_loss(src_labels, sup_interp_pred,
                                                 args.class_weights, args.gpu)
                seg_losses.append(seg_loss)
                total_seg_loss += seg_loss * unsup_weights[idx]
                seg_loss_vals[idx] += seg_loss.item() / args.iter_size

            _, tgt_batch = tgt_loader_iter.__next__()
            tgt_images0, tgt_lbl0, tgt_images1, tgt_lbl1, _ = tgt_batch
            tgt_images0 = Variable(tgt_images0).cuda(args.gpu)
            tgt_images1 = Variable(tgt_images1).cuda(args.gpu)

            # calculate ensemble losses
            stu_unsup_preds = list(student_net(tgt_images1))
            tea_unsup_preds = teacher_net(tgt_images0)
            total_mse_loss = 0
            for idx in range(n_discriminators):
                stu_unsup_probs = F.softmax(stu_unsup_preds[idx], dim=-1)
                tea_unsup_probs = F.softmax(tea_unsup_preds[idx], dim=-1)

                unsup_loss = calc_mse_loss(stu_unsup_probs, tea_unsup_probs,
                                           args.batch_size)
                unsup_loss_vals[idx] += unsup_loss.item() / args.iter_size
                total_mse_loss += unsup_loss * unsup_weights[idx]

            total_mse_loss = total_mse_loss / args.iter_size

            # As the requires_grad is set to False in the discriminator, the
            # gradients are only accumulated in the generator, the target
            # student network is optimized to make the outputs of target domain
            # images close to the outputs of source domain images
            stu_unsup_preds = list(student_net(tgt_images0))
            d_outs, total_adv_loss = [], 0
            for idx in range(n_discriminators):
                stu_unsup_interp_pred = (stu_unsup_preds[idx])
                d_outs.append(discriminators[idx](stu_unsup_interp_pred))
                label_size = d_outs[idx].data.size()
                labels = torch.FloatTensor(label_size).fill_(source_label)
                labels = Variable(labels).cuda(args.gpu)
                adv_tgt_loss = calc_bce_loss(d_outs[idx], labels)

                total_adv_loss += lambda_adv_tgts[idx] * adv_tgt_loss
                adv_tgt_loss_vals[idx] += adv_tgt_loss.item() / args.iter_size

            total_adv_loss = total_adv_loss / args.iter_size

            # requires_grad is set to True in the discriminator,  we only
            # accumulate gradients in the discriminators, the discriminators are
            # optimized to make true predictions
            d_losses = []
            for idx in range(n_discriminators):
                discriminator = discriminators[idx]
                for param in discriminator.parameters():
                    param.requires_grad = True

                sup_preds[idx] = sup_preds[idx].detach()
                d_outs[idx] = discriminators[idx](sup_preds[idx])

                label_size = d_outs[idx].data.size()
                labels = torch.FloatTensor(label_size).fill_(source_label)
                labels = Variable(labels).cuda(args.gpu)

                d_losses.append(calc_bce_loss(d_outs[idx], labels))
                d_losses[idx] = d_losses[idx] / args.iter_size / 2
                d_losses[idx].backward()
                d_loss_vals[idx] += d_losses[idx].item()

            for idx in range(n_discriminators):
                stu_unsup_preds[idx] = stu_unsup_preds[idx].detach()
                d_outs[idx] = discriminators[idx](stu_unsup_preds[idx])

                label_size = d_outs[idx].data.size()
                labels = torch.FloatTensor(label_size).fill_(tgt_label)
                labels = Variable(labels).cuda(args.gpu)

                d_losses[idx] = calc_bce_loss(d_outs[idx], labels)
                d_losses[idx] = d_losses[idx] / args.iter_size / 2
                d_losses[idx].backward()
                d_loss_vals[idx] += d_losses[idx].item()

        for d_optimizer in d_optimizers:
            d_optimizer.step()

        total_loss = total_seg_loss + total_adv_loss + total_mse_loss
        total_loss.backward()
        student_optimizer.step()
        teacher_optimizer.step()

        log_str = 'iter = {0:7d}/{1:7d}'.format(i_iter, args.num_steps)
        log_str += ', total_seg_loss = {0:.3f} '.format(total_seg_loss)
        templ = 'seg_losses = [' + ', '.join(['%.2f'] * len(seg_loss_vals))
        log_str += templ % tuple(seg_loss_vals) + '] '
        templ = 'ens_losses = [' + ', '.join(['%.5f'] * len(unsup_loss_vals))
        log_str += templ % tuple(unsup_loss_vals) + '] '
        templ = 'adv_losses = [' + ', '.join(['%.2f'] * len(adv_tgt_loss_vals))
        log_str += templ % tuple(adv_tgt_loss_vals) + '] '
        templ = 'd_losses = [' + ', '.join(['%.2f'] * len(d_loss_vals))
        log_str += templ % tuple(d_loss_vals) + '] '

        print(log_str)
        if i_iter >= args.num_steps_stop - 1:
            print('save model ...')
            filename = 'UNet' + str(
                args.num_steps_stop) + '_v18_weightedclass.pth'
            torch.save(teacher_net.cpu().state_dict(),
                       os.path.join(args.snapshot_dir, filename))
            break

        if i_iter % args.save_pred_every == 0 and i_iter != 0:
            print('taking snapshot ...')
            filename = 'UNet' + str(i_iter) + '_v18_weightedclass.pth'
            torch.save(teacher_net.cpu().state_dict(),
                       os.path.join(args.snapshot_dir, filename))
            teacher_net.cuda(args.gpu)