Пример #1
0
    def __init__(self, args):
        self.args = args
        self.cfg = load_config(args.cfg_name)

        self.converter = LabelConverter(chars_file=args.chars_file)

        self.tr_ds = Dataset(self.cfg, args.train_dir, args.train_gt_dir,
                             self.converter, self.cfg.batch_size)

        self.cfg.lr_boundaries = [self.tr_ds.num_batches * epoch for epoch in self.cfg.lr_decay_epochs]
        self.cfg.lr_values = [self.cfg.lr * (self.cfg.lr_decay_rate ** i) for i in
                              range(len(self.cfg.lr_boundaries) + 1)]

        if args.val_dir is None:
            self.val_ds = None
        else:
            self.val_ds = Dataset(self.cfg, args.val_dir, args.val_gt_dir,
                                  self.converter, self.cfg.batch_size, shuffle=False)

        if args.test_dir is None:
            self.test_ds = None
        else:
            # Test images often have different size, so set batch_size to 1
            self.test_ds = Dataset(self.cfg, args.test_dir, args.test_gt_dir,
                                   self.converter, shuffle=False, batch_size=1)

        self.model = ResNetV2(self.cfg, self.converter.num_classes)
        self.model.create_architecture()
        self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

        self.epoch_start_index = 0
        self.batch_start_index = 0
Пример #2
0
    def __init__(self, default_params, specific_params, db_client, exchange_client=None, is_backtest=False):
        # initialize status and settings of bot.
        # if you try backtest, db_client is in need.

        self.exchange_client = exchange_client
        self.db_client = db_client

        self.default_params = default_params
        self.extract_default_params(self.default_params)

        self.specific_params = specific_params

        self.combined_params = dict(**self.default_params, **self.specific_params)

        self.is_backtest = is_backtest
        
        if is_backtest:
            # for params table
            self.backtest_management_table_name = self.bot_name + "_backtest_management"

            # backtest configure
            self.initial_balance = 100.0  # USD
            self.account_currency = "USD"
        
            self.dataset_manipulator = Dataset(self.db_client, self.exchange_client)

            if self.db_client.is_table_exist(self.backtest_management_table_name) is not True:
                self.create_backtest_management_table()
            if self.db_client.is_table_exist("backtest_management") is True:
                # delete useless template table
                drop_query = "DROP TABLE backtest_management;"
                self.db_client.exec_sql(drop_query, return_df=False)
Пример #3
0
def main():
    parser = argparse.ArgumentParser(description="Import dataset into a useful representation that we can use")
    parser.add_argument("dataset_file", help="the file to import")
    parser.add_argument("data_directory", help="the location where we are storing the data.")
    args = parser.parse_args()
    print("processing file: " + args.dataset_file)
    if not path.exists(args.dataset_file):
        raise Exception("no such file: " + args.dataset_file)

    dataset_file, data_directory = open_file(args.dataset_file), args.data_directory

    ts_remove_str_len = -len(".000Z")

    with Dataset(data_directory) as dataset:
        for line in tqdm(dataset_file):
            try:
                if type(line) != str: 
                    line = line.decode("ascii")
                if len(line) == 0: continue 
                _, az, insttype, os, price, timestamp_str = tuple(line.strip().split("\t"))
                price = float(price)
                timestamp = datetime.strptime(timestamp_str[:ts_remove_str_len], "%Y-%m-%dT%H:%M:%S")
                dataset.insert_data(az, insttype, time_to_epoch(timestamp), price)
            except Exception as e:
                print(e)
Пример #4
0
def make_match_json(json_path, dst_dir, num=4):
    """
    prepare the top 4 match for each id
    store the dataset in dst_dir and write the results into json
    """
    dataset = Dataset()
    dataset.make_data(json_path)
    pickle.dump(dataset, open(dst_dir + 'dataset.pkl', 'wb'))

    dic = {}
    for person in dataset.people:
        for i in range(person.img_num):
            match = euclid_match_human(person, i, num)
            dic[person.imgs[i]] = match

    f = open(dst_dir + 'match.json', 'w')
    json.dump(dic, f)
Пример #5
0
    def __init__(self, config, device, sess=None, graph=None):
        '''
    if game_lm is not None, the result (mesh obj and UV texture map)
    will be convert from nsh to the game
    '''
        self.config = config
        self.name = config.name
        self.device = device
        self.sess = sess
        self.graph = graph
        self.log = logging.getLogger('x')
        self.rot_order = 'XYZ'

        self.debug = config.debug
        self.ex_idx = [4, 5, 8]

        self.inpaint_model = InpaintingModel(config,
                                             device,
                                             self.rot_order,
                                             debug=self.debug).to(device)
        # self.inpaint_model = InpaintingModel(config, device, self.debug)
        self.epoch = 0
        if config.restore:
            self.epoch = self.inpaint_model.load()
        # self.phase = config.phase

        if config.mode == 'train':
            num_test = 2048
            flist = glob(os.path.join(config.data_dir, '*_uv.png'))
            random.shuffle(flist)
            train_flist = flist[:-2 * num_test]
            val_flist = flist[-2 * num_test:-num_test]
            test_flist = flist[-num_test:]

            num_test = 300
            flist_gt = glob(os.path.join(config.data_gt_dir, '*_uv*.png'))
            random.shuffle(flist_gt)
            train_flist_gt = flist_gt[:-2 * num_test]
            val_flist_gt = flist_gt[-2 * num_test:-num_test]
            test_flist_gt = flist_gt[-num_test:]

            self.train_dataset = Dataset(config, train_flist_gt, train_flist)
            self.val_dataset = Dataset(config, val_flist_gt, val_flist)
            self.val_sample_iterator = self.val_dataset.create_iterator(
                config.batch_size)
            self.test_dataset = Dataset(config,
                                        test_flist_gt,
                                        test_flist,
                                        test=True)
            self.test_sample_iterator = self.test_dataset.create_iterator(
                config.batch_size)
            self.samples_dir = os.path.join('samples', config.name)
            os.makedirs(self.samples_dir, exist_ok=True)
        elif config.mode == 'test':
            self.test_dataset = Dataset(config, [], [], test=True)
            self.init_test()
Пример #6
0
    def set_helper_libs(self):
        self.dataset_manipulator = Dataset(self.db_client,
                                           self.exchange_client,
                                           self.is_backtest)
        self.ohlcv_tradingbot = OHLCV_tradingbot(self.dataset_manipulator,
                                                 self.default_params,
                                                 self.specific_params)

        if self.is_backtest:
            self.trading_bot_backtest = TradingBotBacktest(self)
        else:
            self.trading_bot_real = TradingBotReal(self)
            self.line = LineNotification(self.db_client.config_path)
Пример #7
0
def collect_predictions(
        model: Union[str, keras.models.Model],
        test_gen: dataset.Dataset) -> Tuple[np.ndarray, np.ndarray]:
    """ Loads a model and collects predictions.
    
    Args:
        model: a Keras model or a path to a Keras model.
        test_gen: generator for test data.
    
    Returns:
        A tuple of predictions and true values.
    """
    if isinstance(model, keras.models.Model):
        model = keras.models.load_model(model)
    pred, true = np.empty((0, 7)), np.empty((0, 7))
    for i in tqdm.tqdm(range(1000)):
        data = test_gen.generate()
        pred = np.append(pred, model.predict(data[0]), axis=0)
        true = np.append(true, data[1], axis=0)
    return pred, true
Пример #8
0
class Trainer(object):
    def __init__(self, args):
        self.args = args
        self.cfg = load_config(args.cfg_name)

        self.converter = LabelConverter(chars_file=args.chars_file)

        self.tr_ds = Dataset(self.cfg, args.train_dir, args.train_gt_dir,
                             self.converter, self.cfg.batch_size)

        self.cfg.lr_boundaries = [self.tr_ds.num_batches * epoch for epoch in self.cfg.lr_decay_epochs]
        self.cfg.lr_values = [self.cfg.lr * (self.cfg.lr_decay_rate ** i) for i in
                              range(len(self.cfg.lr_boundaries) + 1)]

        if args.val_dir is None:
            self.val_ds = None
        else:
            self.val_ds = Dataset(self.cfg, args.val_dir, args.val_gt_dir,
                                  self.converter, self.cfg.batch_size, shuffle=False)

        if args.test_dir is None:
            self.test_ds = None
        else:
            # Test images often have different size, so set batch_size to 1
            self.test_ds = Dataset(self.cfg, args.test_dir, args.test_gt_dir,
                                   self.converter, shuffle=False, batch_size=1)

        self.model = ResNetV2(self.cfg, self.converter.num_classes)
        self.model.create_architecture()
        self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

        self.epoch_start_index = 0
        self.batch_start_index = 0

    def train(self):
        self.sess.run(tf.global_variables_initializer())

        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=8)
        self.train_writer = tf.summary.FileWriter(self.args.log_dir, self.sess.graph)

        # if self.args.restore:
        #     self._restore()

        print('Begin training...')
        for epoch in range(self.epoch_start_index, self.cfg.epochs):
            self.sess.run(self.tr_ds.init_op)

            for batch in range(self.batch_start_index, self.tr_ds.num_batches):
                batch_start_time = time.time()
                total_cost, detect_loss, detect_cls_loss, detect_reg_loss, reco_loss, global_step, lr = self._train()

                # if batch != 0 and (batch % self.args.log_step == 0):
                #     batch_cost, global_step, lr = self._train_with_summary()
                # else:
                #     batch_cost, global_step, lr = self._train()

                print("{:.02f}s, epoch: {}, batch: {}/{}, total_loss: {:.03}, reco_loss: {:.03}, "
                      "detect_loss (total: {:.03}, cls: {:.03}, reg: {:.03}), "
                      "lr: {:.05}"
                      .format(time.time() - batch_start_time, epoch, batch, self.tr_ds.num_batches,
                              total_cost, reco_loss, detect_loss, detect_cls_loss, detect_reg_loss, lr))

                # if global_step != 0 and (global_step % self.args.val_step == 0):
                #     val_acc = self._do_val(self.val_ds, epoch, global_step, "val")
                #     test_acc = self._do_val(self.test_ds, epoch, global_step, "test")
                #     self._save_checkpoint(self.args.ckpt_dir, global_step, val_acc, test_acc)

            self.batch_start_index = 0

    # def _restore(self):
    #     utils.restore_ckpt(self.sess, self.saver, self.args.ckpt_dir)
    #
    #     step_restored = self.sess.run(self.model.global_step)
    #
    #     self.epoch_start_index = math.floor(step_restored / self.tr_ds.num_batches)
    #     self.batch_start_index = step_restored % self.tr_ds.num_batches
    #
    #     print("Restored global step: %d" % step_restored)
    #     print("Restored epoch: %d" % self.epoch_start_index)
    #     print("Restored batch_start_index: %d" % self.batch_start_index)

    def _train(self):
        imgs, score_maps, geo_maps, text_roi_count, affine_matrixs, affine_rects, labels, img_paths = \
            self.tr_ds.get_next_batch(self.sess)

        # print(imgs.shape)
        # print(score_maps.shape)
        # print(geo_maps.shape)
        # print(affine_matrixs.shape)
        # print(affine_rects.shape)
        # print(labels[0].shape)

        fetches = [
            self.model.total_loss,
            self.model.detect_loss,
            self.model.detect_cls_loss,
            self.model.detect_reg_loss,
            self.model.reco_ctc_loss,
            self.model.global_step,
            self.model.lr,
            self.model.train_op
        ]

        feed = {
            self.model.input_images: imgs,
            self.model.input_score_maps: score_maps,
            self.model.input_geo_maps: geo_maps,
            self.model.input_text_roi_count: text_roi_count,
            self.model.input_affine_matrixs: affine_matrixs,
            self.model.input_affine_rects: affine_rects,
            self.model.input_text_labels: labels,
            self.model.is_training: True
        }

        # try:
        total_loss, detect_loss, detect_cls_loss, detect_reg_loss, reco_ctc_loss, global_step, lr, _ = self.sess.run(
            fetches, feed)
        # except:
        #     print(img_paths)
        #     exit(-1)

        return total_loss, detect_loss, detect_cls_loss, detect_reg_loss, reco_ctc_loss, global_step, lr

    # def _train_with_summary(self):
    #     img_batch, label_batch, labels, _ = self.tr_ds.get_next_batch(self.sess)
    #     feed = {self.model.inputs: img_batch,
    #             self.model.labels: label_batch,
    #             self.model.is_training: True}
    #
    #     fetches = [self.model.total_loss,
    #                self.model.ctc_loss,
    #                self.model.regularization_loss,
    #                self.model.global_step,
    #                self.model.lr,
    #                self.model.merged_summay,
    #                self.model.dense_decoded,
    #                self.model.edit_distance,
    #                self.model.train_op]
    #
    #     batch_cost, _, _, global_step, lr, summary, predicts, edit_distance, _ = self.sess.run(fetches, feed)
    #     self.train_writer.add_summary(summary, global_step)
    #
    #     predicts = [self.converter.decode(p, CRNN.CTC_INVALID_INDEX) for p in predicts]
    #     accuracy, _ = infer.calculate_accuracy(predicts, labels)
    #
    #     tf_utils.add_scalar_summary(self.train_writer, "train_accuracy", accuracy, global_step)
    #     tf_utils.add_scalar_summary(self.train_writer, "train_edit_distance", edit_distance, global_step)
    #
    #     return batch_cost, global_step, lr

    # def _do_val(self, dataset, epoch, step, name):
    #     if dataset is None:
    #         return None
    #
    #     accuracy, edit_distance = infer.validation(self.sess, self.model.feeds(), self.model.fetches(),
    #                                                dataset, self.converter, self.args.result_dir, name, step)
    #
    #     tf_utils.add_scalar_summary(self.train_writer, "%s_accuracy" % name, accuracy, step)
    #     tf_utils.add_scalar_summary(self.train_writer, "%s_edit_distance" % name, edit_distance, step)
    #
    #     print("epoch: %d/%d, %s accuracy = %.3f" % (epoch, self.cfg.epochs, name, accuracy))
    #     return accuracy

    def _save_checkpoint(self, ckpt_dir, step, val_acc=None, test_acc=None):
        ckpt_name = "crnn_%d" % step
        if val_acc is not None:
            ckpt_name += '_val_%.03f' % val_acc
        if test_acc is not None:
            ckpt_name += '_test_%.03f' % test_acc

        name = os.path.join(ckpt_dir, ckpt_name)
        print("save checkpoint %s" % name)

        meta_exists, meta_file_name = self._meta_file_exist(ckpt_dir)

        self.saver.save(self.sess, name)

        # remove old meta file to save disk space
        if meta_exists:
            try:
                os.remove(os.path.join(ckpt_dir, meta_file_name))
            except:
                print('Remove meta file failed: %s' % meta_file_name)

    def _meta_file_exist(self, ckpt_dir):
        fnames = os.listdir(ckpt_dir)
        meta_exists = False
        meta_file_name = ''
        for n in fnames:
            if 'meta' in n:
                meta_exists = True
                meta_file_name = n
                break

        return meta_exists, meta_file_name
Пример #9
0
def main():
    logger = None
    output_dir = ''
    setup_default_logging()
    args = parser.parse_args()
    args.prefetcher = not args.no_prefetcher
    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1
        if args.distributed and args.num_gpu > 1:
            logging.warning(
                'Using more than one GPU per process in distributed mode is not allowed. Setting num_gpu to 1.'
            )
            args.num_gpu = 1

    args.device = 'cuda:0'
    args.world_size = 1
    args.rank = 0  # global rank
    if args.distributed:
        args.num_gpu = 1
        args.device = 'cuda:%d' % args.local_rank
        torch.cuda.set_device(args.local_rank)
        import random
        port = random.randint(0, 50000)
        torch.distributed.init_process_group(
            backend='nccl', init_method='env://'
        )  # tcp://127.0.0.1:{}'.format(port), rank=args.local_rank, world_size=8)
        args.world_size = torch.distributed.get_world_size()
        args.rank = torch.distributed.get_rank()
    assert args.rank >= 0

    if args.distributed:
        logging.info(
            'Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.'
            % (args.rank, args.world_size))
    else:
        logging.info('Training with a single process on %d GPUs.' %
                     args.num_gpu)

    seed = args.seed
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    model, sta_num, size_factor = _gen_supernet(
        flops_minimum=args.flops_minimum,
        flops_maximum=args.flops_maximum,
        num_classes=args.num_classes,
        drop_rate=args.drop,
        global_pool=args.gp,
        resunit=args.resunit,
        dil_conv=args.dil_conv,
        slice=args.slice)

    if args.local_rank == 0:
        print("Model Searched Using FLOPs {}".format(size_factor * 32))

    data_config = resolve_data_config(vars(args),
                                      model=model,
                                      verbose=args.local_rank == 0)
    if args.local_rank == 0:
        output_base = args.output if args.output else './experiments'
        exp_name = '-'.join([
            datetime.now().strftime("%Y%m%d-%H%M%S"), args.model,
            str(data_config['input_size'][-1]),
            str(args.flops_minimum),
            str(args.flops_maximum)
        ])
        output_dir = get_outdir(output_base, 'search', exp_name)
        log_file = os.path.join(output_dir, "search.log")
        logger = get_logger(log_file)
    if args.local_rank == 0:
        logger.info(args)

    choice_num = 6
    if args.resunit:
        choice_num += 1
    if args.dil_conv:
        choice_num += 2

    if args.local_rank == 0:
        logger.info("Choice_num: {}".format(choice_num))

    model_est = LatencyEst(model)

    if os.path.exists(args.initial_checkpoint):
        load_checkpoint(model, args.initial_checkpoint)

    if args.local_rank == 0:
        logger.info('Model %s created, param count: %d' %
                    (args.model, sum([m.numel() for m in model.parameters()])))

    # data_config = resolve_data_config(vars(args), model=model, verbose=args.local_rank == 0)

    # optionally resume from a checkpoint
    optimizer_state = None
    resume_epoch = None
    if args.resume:
        optimizer_state, resume_epoch = resume_checkpoint(model, args.resume)

    if args.num_gpu > 1:
        if args.amp:
            logging.warning(
                'AMP does not work well with nn.DataParallel, disabling. Use distributed mode for multi-GPU AMP.'
            )
            args.amp = False
        model = nn.DataParallel(model,
                                device_ids=list(range(args.num_gpu))).cuda()
    else:
        model.cuda()

    optimizer = create_optimizer_supernet(args, model)
    if optimizer_state is not None:
        optimizer.load_state_dict(optimizer_state['optimizer'])

    use_amp = False
    if has_apex and args.amp:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
        use_amp = True
    if args.local_rank == 0:
        logger.info('NVIDIA APEX {}. AMP {}.'.format(
            'installed' if has_apex else 'not installed',
            'on' if use_amp else 'off'))

    model_ema = None
    if args.model_ema:
        # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper
        model_ema = ModelEma(model,
                             decay=args.model_ema_decay,
                             device='cpu' if args.model_ema_force_cpu else '',
                             resume=args.resume)

    if args.distributed:
        if args.sync_bn:
            try:
                if has_apex:
                    model = convert_syncbn_model(model)
                else:
                    model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(
                        model)
                if args.local_rank == 0:
                    logger.info(
                        'Converted model to use Synchronized BatchNorm.')
            except Exception as e:
                logging.error(
                    'Failed to enable Synchronized BatchNorm. Install Apex or Torch >= 1.1'
                )
        if has_apex:
            model = DDP(model, delay_allreduce=True)
        else:
            if args.local_rank == 0:
                logger.info(
                    "Using torch DistributedDataParallel. Install NVIDIA Apex for Apex DDP."
                )
            model = DDP(model,
                        device_ids=[args.local_rank
                                    ])  # can use device str in Torch >= 1.1
        # NOTE: EMA model does not need to be wrapped by DDP

    lr_scheduler, num_epochs = create_scheduler(args, optimizer)

    start_epoch = 0
    if args.start_epoch is not None:
        # a specified start_epoch will always override the resume epoch
        start_epoch = args.start_epoch
    elif resume_epoch is not None:
        start_epoch = resume_epoch
    if start_epoch > 0:
        lr_scheduler.step(start_epoch)

    if args.local_rank == 0:
        logger.info('Scheduled epochs: {}'.format(num_epochs))

    if args.tiny:
        from lib.dataset.tiny_imagenet import get_newimagenet
        [loader_train,
         loader_eval], [train_sampler, test_sampler
                        ] = get_newimagenet(args.data, args.batch_size)
    else:
        train_dir = os.path.join(args.data, 'train')
        if not os.path.exists(train_dir):
            logger.error(
                'Training folder does not exist at: {}'.format(train_dir))
            exit(1)
        dataset_train = Dataset(train_dir)

        collate_fn = None

        loader_train = create_loader(
            dataset_train,
            input_size=data_config['input_size'],
            batch_size=args.batch_size,
            is_training=True,
            use_prefetcher=args.prefetcher,
            re_prob=args.reprob,
            re_mode=args.remode,
            color_jitter=args.color_jitter,
            interpolation=
            'random',  # FIXME cleanly resolve this? data_config['interpolation'],
            mean=data_config['mean'],
            std=data_config['std'],
            num_workers=args.workers,
            distributed=args.distributed,
            collate_fn=collate_fn,
        )

        eval_dir = os.path.join(args.data, 'val')
        if not os.path.isdir(eval_dir):
            logger.error(
                'Validation folder does not exist at: {}'.format(eval_dir))
            exit(1)
        dataset_eval = Dataset(eval_dir)

        loader_eval = create_loader(
            dataset_eval,
            input_size=data_config['input_size'],
            batch_size=4 * args.batch_size,
            is_training=False,
            use_prefetcher=args.prefetcher,
            interpolation=data_config['interpolation'],
            mean=data_config['mean'],
            std=data_config['std'],
            num_workers=args.workers,
            distributed=args.distributed,
        )

    if args.smoothing:
        train_loss_fn = LabelSmoothingCrossEntropy(
            smoothing=args.smoothing).cuda()
        validate_loss_fn = nn.CrossEntropyLoss().cuda()
    else:
        train_loss_fn = nn.CrossEntropyLoss().cuda()
        validate_loss_fn = train_loss_fn

    eval_metric = args.eval_metric
    best_metric = None
    best_epoch = None
    saver = None
    best_children_pool = []
    if args.local_rank == 0:
        decreasing = True if eval_metric == 'loss' else False
        saver = CheckpointSaver(checkpoint_dir=output_dir,
                                decreasing=decreasing)
    try:
        for epoch in range(start_epoch, num_epochs):
            if args.distributed:
                if args.tiny:
                    train_sampler.set_epoch(epoch)
                else:
                    loader_train.sampler.set_epoch(epoch)

            train_metrics, best_children_pool = train_epoch(
                epoch,
                model,
                loader_train,
                optimizer,
                train_loss_fn,
                args,
                CHOICE_NUM=choice_num,
                lr_scheduler=lr_scheduler,
                saver=saver,
                output_dir=output_dir,
                logger=logger,
                val_loader=loader_eval,
                best_children_pool=best_children_pool,
                use_amp=use_amp,
                model_ema=model_ema,
                est=model_est,
                sta_num=sta_num)

            # eval_metrics = OrderedDict([('loss', 0.0), ('prec1', 0.0), ('prec5', 0.0)])
            eval_metrics = validate(model,
                                    loader_eval,
                                    validate_loss_fn,
                                    args,
                                    CHOICE_NUM=choice_num,
                                    sta_num=sta_num)

            update_summary(epoch,
                           train_metrics,
                           eval_metrics,
                           os.path.join(output_dir, 'summary.csv'),
                           write_header=best_metric is None)

            if saver is not None:
                # save proper checkpoint with eval metric
                save_metric = eval_metrics[eval_metric]
                best_metric, best_epoch = saver.save_checkpoint(
                    model,
                    optimizer,
                    args,
                    epoch=epoch,
                    model_ema=model_ema,
                    metric=save_metric)

    except KeyboardInterrupt:
        pass
    if best_metric is not None:
        logging.info('*** Best metric: {0} (epoch {1})'.format(
            best_metric, best_epoch))
Пример #10
0
def main():
    test_args = parse_args()

    args = joblib.load('models/%s/args.pkl' % test_args.name)

    print('Config -----')
    for arg in vars(args):
        print('%s: %s' % (arg, getattr(args, arg)))
    print('------------')

    if args.pred_type == 'classification':
        num_outputs = 5
    elif args.pred_type == 'regression':
        num_outputs = 1
    elif args.pred_type == 'multitask':
        num_outputs = 6
    else:
        raise NotImplementedError

    cudnn.benchmark = True

    test_transform = transforms.Compose([
        transforms.Resize((args.input_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])

    # data loading code
    test_dir = preprocess('test',
                          args.img_size,
                          scale=args.scale_radius,
                          norm=args.normalize,
                          pad=args.padding,
                          remove=args.remove)
    test_df = pd.read_csv('inputs/test.csv')
    test_img_paths = test_dir + '/' + test_df['id_code'].values + '.png'
    test_labels = np.zeros(len(test_img_paths))

    test_set = Dataset(test_img_paths, test_labels, transform=test_transform)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=4)

    preds = []
    for fold in range(args.n_splits):
        print('Fold [%d/%d]' % (fold + 1, args.n_splits))

        # create model
        model_path = 'models/%s/model_%d.pth' % (args.name, fold + 1)
        if not os.path.exists(model_path):
            print('%s is not exists.' % model_path)
            continue
        model = get_model(model_name=args.arch,
                          num_outputs=num_outputs,
                          freeze_bn=args.freeze_bn,
                          dropout_p=args.dropout_p)
        model = model.cuda()
        model.load_state_dict(torch.load(model_path))

        model.eval()

        preds_fold = []
        with torch.no_grad():
            for i, (input, _) in tqdm(enumerate(test_loader),
                                      total=len(test_loader)):
                if test_args.tta:
                    outputs = []
                    for input in apply_tta(input):
                        input = input.cuda()
                        output = model(input)
                        outputs.append(output.data.cpu().numpy()[:, 0])
                    preds_fold.extend(np.mean(outputs, axis=0))
                else:
                    input = input.cuda()
                    output = model(input)

                    preds_fold.extend(output.data.cpu().numpy()[:, 0])
        preds_fold = np.array(preds_fold)
        preds.append(preds_fold)

        if not args.cv:
            break

    preds = np.mean(preds, axis=0)

    if test_args.tta:
        args.name += '_tta'

    test_df['diagnosis'] = preds
    test_df.to_csv('probs/%s.csv' % args.name, index=False)

    thrs = [0.5, 1.5, 2.5, 3.5]
    preds[preds < thrs[0]] = 0
    preds[(preds >= thrs[0]) & (preds < thrs[1])] = 1
    preds[(preds >= thrs[1]) & (preds < thrs[2])] = 2
    preds[(preds >= thrs[2]) & (preds < thrs[3])] = 3
    preds[preds >= thrs[3]] = 4
    preds = preds.astype('int')

    test_df['diagnosis'] = preds
    test_df.to_csv('submissions/%s.csv' % args.name, index=False)
Пример #11
0

# Training/Testing
if settings.phase == 'training':
    # Find patch locations or load from cache.
    patch_locations_path = join(settings.out_cache_path, 'patch_locations.pkl')
    if settings.find_patch_locations or not isfile(patch_locations_path):
        LOGGER.info('Calculating patch locations ...')
        find_and_store_patch_locations(settings)
    with open(patch_locations_path, 'rb') as handle:
        patch_locations = pickle.load(handle)
        LOGGER.info('Patch locations loaded ...')

    # Dataset iterators.
    LOGGER.info('Initializing training and validation datasets ...')
    training_dataset = Dataset(settings, patch_locations, phase='train')
    validation_dataset = Dataset(settings, patch_locations, phase='val')

    if settings.eval:
        model.restore_model('checkpoints-40000')
        validation_dataset = Dataset(settings, patch_locations, phase='val')
        error = model.run_inference_val(validation_dataset)
        LOGGER.info('Validation 3 pixel error: {}'.format(error))
    else:
        # Training.
        LOGGER.info('Starting training ...')
        model.fit(training_dataset, validation_dataset, optimizer,
                  settings.num_iterations)
        LOGGER.info('Training done ...')
elif settings.phase == 'testing':
    model.restore_model('checkpoints-40000')
Пример #12
0
def main():
    args = parse_args()

    if args.name is None:
        args.name = '%s_%s' % (args.mode, args.arch)
    if not os.path.exists('models/%s' % args.name):
        os.makedirs('models/%s' % args.name)

    print('Config -----')
    for arg in vars(args):
        print('- %s: %s' % (arg, getattr(args, arg)))
    print('------------')

    with open('models/%s/args.txt' % args.name, 'w') as f:
        for arg in vars(args):
            print('- %s: %s' % (arg, getattr(args, arg)), file=f)

    joblib.dump(args, 'models/%s/args.pkl' % args.name)

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus

    # switch to benchmark model, a little forward results fluctuation, a little fast training
    cudnn.benchmark = True
    # switch to deterministic model, more stable
    # cudnn.deterministic = True

    img_path, img_labels, num_outputs = img_path_generator(
        dataset=args.train_dataset)
    if args.pred_type == 'regression':
        num_outputs = 1

    skf = StratifiedKFold(n_splits=args.n_splits,
                          shuffle=True,
                          random_state=30)
    img_paths = []
    labels = []
    for fold, (train_idx,
               val_idx) in enumerate(skf.split(img_path, img_labels)):
        img_paths.append((img_path[train_idx], img_path[val_idx]))
        labels.append((img_labels[train_idx], img_labels[val_idx]))

    train_transform = []
    train_transform = transforms.Compose([
        transforms.Resize((args.img_size, args.img_size)),
        # transforms.RandomAffine(
        #     degrees=(args.rotate_min, args.rotate_max) if args.rotate else 0,
        #     translate=(args.translate_min, args.translate_max) if args.translate else None,
        #     scale=(args.rescale_min, args.rescale_max) if args.rescale else None,
        #     shear=(args.shear_min, args.shear_max) if args.shear else None,
        # ),
        transforms.RandomCrop(args.input_size, padding=4),
        transforms.RandomHorizontalFlip(),
        # transforms.RandomVerticalFlip(),
        # transforms.ColorJitter(
        #     brightness=0,
        #     contrast=args.contrast,
        #     saturation=0,
        #     hue=0),
        RandomErase(prob=args.random_erase_prob if args.random_erase else 0,
                    sl=args.random_erase_sl,
                    sh=args.random_erase_sh,
                    r=args.random_erase_r),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])

    val_transform = transforms.Compose([
        transforms.Resize((args.img_size, args.img_size)),
        transforms.CenterCrop(args.input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])

    if args.loss == 'CrossEntropyLoss':
        criterion = nn.CrossEntropyLoss().cuda()
    elif args.loss == 'FocalLoss':
        criterion = FocalLoss().cuda()
    elif args.loss == 'MSELoss':
        criterion = nn.MSELoss().cuda()
    elif args.loss == 'LabelSmoothingLoss':
        criterion = LabelSmoothingLoss(classes=num_outputs,
                                       smoothing=0.8).cuda()
    else:
        raise NotImplementedError

    folds = []
    best_losses = []
    best_ac_scores = []
    best_epochs = []

    for fold, ((train_img_paths, val_img_paths),
               (train_labels, val_labels)) in enumerate(zip(img_paths,
                                                            labels)):
        print('Fold [%d/%d]' % (fold + 1, len(img_paths)))

        # if os.path.exists('models/%s/model_%d.pth' % (args.name, fold+1)):
        #     log = pd.read_csv('models/%s/log_%d.csv' % (args.name, fold+1))
        #     best_loss, best_ac_score = log.loc[log['val_loss'].values.argmin(
        #     ), ['val_loss', 'val_score', 'val_ac_score']].values
        #     folds.append(str(fold + 1))
        #     best_losses.append(best_loss)
        #     best_ac_scores.append(best_ac_score)
        #     continue

        # train
        train_set = Dataset(train_img_paths,
                            train_labels,
                            transform=train_transform)

        train_loader = torch.utils.data.DataLoader(train_set,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=0,
                                                   sampler=None)

        val_set = Dataset(val_img_paths, val_labels, transform=val_transform)
        val_loader = torch.utils.data.DataLoader(val_set,
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=0)

        # create model
        if args.mode == 'baseline':
            model = get_model(model_name=args.arch,
                              num_outputs=num_outputs,
                              freeze_bn=args.freeze_bn,
                              dropout_p=args.dropout_p)
        elif args.mode == 'gcn':
            model_path = 'models/%s/model_%d.pth' % ('baseline_' + args.arch,
                                                     fold + 1)
            if not os.path.exists(model_path):
                print('%s is not exists' % model_path)
                continue
            model = SoftLabelGCN(cnn_model_name=args.arch,
                                 cnn_pretrained=False,
                                 num_outputs=num_outputs)
            pretrained_dict = torch.load(model_path)
            model_dict = model.cnn.state_dict()
            pretrained_dict = {
                k: v
                for k, v in pretrained_dict.items() if k in model_dict
            }
            model_dict.update(pretrained_dict)
            model.cnn.load_state_dict(model_dict)
            for p in model.cnn.parameters():
                p.requires_grad = False
        else:
            # model = RA(cnn_model_name=args.arch, input_size=args.input_size, hidden_size=args.lstm_hidden,
            #            layer_num=args.lstm_layers, recurrent_num=args.lstm_recurrence, class_num=num_outputs, pretrain=True)
            model_path = 'models/%s/model_%d.pth' % ('baseline_' + args.arch,
                                                     fold + 1)
            if not os.path.exists(model_path):
                print('%s is not exists' % model_path)
                continue
            model = RA(cnn_model_name=args.arch,
                       input_size=args.input_size,
                       hidden_size=args.lstm_hidden,
                       layer_num=args.lstm_layers,
                       recurrent_num=args.lstm_recurrence,
                       class_num=num_outputs)
            pretrained_dict = torch.load(model_path)
            model_dict = model.cnn.state_dict()
            pretrained_dict = {
                k: v
                for k, v in pretrained_dict.items() if k in model_dict
            }
            model_dict.update(pretrained_dict)
            model.cnn.load_state_dict(model_dict)
            for p in model.cnn.parameters():
                p.requires_grad = False

        device = torch.device('cuda')
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
        model.to(device)
        # model = model.cuda()
        if args.pretrained_model is not None:
            model.load_state_dict(
                torch.load('models/%s/model_%d.pth' %
                           (args.pretrained_model, fold + 1)))

        # print(model)

        if args.optimizer == 'Adam':
            optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                          model.parameters()),
                                   lr=args.lr)
        elif args.optimizer == 'AdamW':
            optimizer = optim.AdamW(filter(lambda p: p.requires_grad,
                                           model.parameters()),
                                    lr=args.lr)
        elif args.optimizer == 'RAdam':
            optimizer = RAdam(filter(lambda p: p.requires_grad,
                                     model.parameters()),
                              lr=args.lr)
        elif args.optimizer == 'SGD':
            optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                         model.parameters()),
                                  lr=args.lr,
                                  momentum=args.momentum,
                                  weight_decay=args.weight_decay,
                                  nesterov=args.nesterov)
            # optimizer = optim.SGD(model.get_config_optim(args.lr, args.lr * 10, args.lr * 10), lr=args.lr,
            #                       momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov)

        if args.scheduler == 'CosineAnnealingLR':
            scheduler = lr_scheduler.CosineAnnealingLR(optimizer,
                                                       T_max=args.epochs,
                                                       eta_min=args.min_lr)
        elif args.scheduler == 'ReduceLROnPlateau':
            scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                       factor=args.factor,
                                                       patience=args.patience,
                                                       verbose=1,
                                                       min_lr=args.min_lr)

        log = pd.DataFrame(index=[],
                           columns=[
                               'epoch',
                               'loss',
                               'ac_score',
                               'val_loss',
                               'val_ac_score',
                           ])
        log = {
            'epoch': [],
            'loss': [],
            'ac_score': [],
            'val_loss': [],
            'val_ac_score': [],
        }

        best_loss = float('inf')
        best_ac_score = 0
        best_epoch = 0

        for epoch in range(args.epochs):
            print('Epoch [%d/%d]' % (epoch + 1, args.epochs))

            # train for one epoch
            train_loss, train_ac_score = train(args, train_loader, model,
                                               criterion, optimizer, epoch)

            # evaluate on validation set
            val_loss, val_ac_score = validate(args, val_loader, model,
                                              criterion)

            if args.scheduler == 'CosineAnnealingLR':
                scheduler.step()
            elif args.scheduler == 'ReduceLROnPlateau':
                scheduler.step(val_loss)

            print(
                'loss %.4f - ac_score %.4f - val_loss %.4f - val_ac_score %.4f'
                % (train_loss, train_ac_score, val_loss, val_ac_score))

            log['epoch'].append(epoch)
            log['loss'].append(train_loss)
            log['ac_score'].append(train_ac_score)
            log['val_loss'].append(val_loss)
            log['val_ac_score'].append(val_ac_score)

            pd.DataFrame(log).to_csv('models/%s/log_%d.csv' %
                                     (args.name, fold + 1),
                                     index=False)

            if val_ac_score > best_ac_score:
                if args.mode == 'baseline':
                    torch.save(
                        model.state_dict(),
                        'models/%s/model_%d.pth' % (args.name, fold + 1))
                best_loss = val_loss
                best_ac_score = val_ac_score
                best_epoch = epoch
                print("=> saved best model")

        print('val_loss:  %f' % best_loss)
        print('val_ac_score: %f' % best_ac_score)

        folds.append(str(fold + 1))
        best_losses.append(best_loss)
        best_ac_scores.append(best_ac_score)
        best_epochs.append(best_epoch)

        results = pd.DataFrame({
            'fold':
            folds + ['mean'],
            'best_loss':
            best_losses + [np.mean(best_losses)],
            'best_ac_score':
            best_ac_scores + [np.mean(best_ac_scores)],
            'best_epoch':
            best_epochs + [''],
        })

        print(results)
        results.to_csv('models/%s/results.csv' % args.name, index=False)
        torch.cuda.empty_cache()

        if not args.cv:
            break
Пример #13
0
class TradingBot:
    def __init__(self, default_params, specific_params, db_client, exchange_client=None, is_backtest=False):
        # initialize status and settings of bot.
        # if you try backtest, db_client is in need.

        self.exchange_client = exchange_client
        self.db_client = db_client

        self.default_params = default_params
        self.extract_default_params(self.default_params)

        self.specific_params = specific_params

        self.combined_params = dict(**self.default_params, **self.specific_params)

        self.is_backtest = is_backtest
        
        if is_backtest:
            # for params table
            self.backtest_management_table_name = self.bot_name + "_backtest_management"

            # backtest configure
            self.initial_balance = 100.0  # USD
            self.account_currency = "USD"
        
            self.dataset_manipulator = Dataset(self.db_client, self.exchange_client)

            if self.db_client.is_table_exist(self.backtest_management_table_name) is not True:
                self.create_backtest_management_table()
            if self.db_client.is_table_exist("backtest_management") is True:
                # delete useless template table
                drop_query = "DROP TABLE backtest_management;"
                self.db_client.exec_sql(drop_query, return_df=False)


    def create_backtest_management_table(self):
        backtest_management_template = BacktestManagement
        table_def = backtest_management_template.__table__

        # add specific params columns
        table_def = self.append_specific_params_column(table_def)

        backtest_summary_id = Column("backtest_summary_id", Integer)
        table_def.relation = relationship("BacktestSummary")
        table_def.append_column(backtest_summary_id)

        table_def.name = self.backtest_management_table_name

        table_def.create(bind=self.db_client.connector)
        
       # add foreign key constraint

        ctx = MigrationContext.configure(self.db_client.connector)
        op = Operations(ctx)

        with op.batch_alter_table(self.bot_name + "_backtest_management") as batch_op:
            batch_op.create_foreign_key("fk_management_summary", "backtest_summary", ["backtest_summary_id"], ["id"])

    def backtest_management_table(self):
        return Table(self.backtest_management_table_name, MetaData(bind=self.db_client.connector),
            autoload=True, autoload_with=self.db_client.connector)

    def append_specific_params_column(self, table_def):
        return table_def
        # Need to be oberride
        # return table def

    def extract_default_params(self, default_params):
        # default_params = {
        #    "bot_name" : bot_name, # used in bot name builder for log
        #    "timeframe": integer,
        #    "close_position_on_do_nothing": close_position_on_do_nothing,
        #    "inverse_trading": inverse_trading
        # }
        self.bot_name = default_params["bot_name"]
        self.timeframe = default_params["timeframe"]
        self.close_position_on_do_nothing = default_params["close_position_on_do_nothing"]
        self.inverse_trading = default_params["inverse_trading"]

    def run(self, ohlcv_df=None, backtest_start_time=datetime.now() - timedelta(days=90), backtest_end_time=datetime.now(),
            floor_time=True):

        if ohlcv_df is not None:
            self.ohlcv_df = ohlcv_df
        else:
            self.ohlcv_df = self.dataset_manipulator.get_ohlcv(self.timeframe, backtest_start_time, backtest_end_time)

        self.ohlcv_with_metrics = self.calculate_metrics_for_backtest()

        if self.is_backtest:
            # for summary
            if floor_time:
                backtest_start_time = self.dataset_manipulator.floor_datetime_to_ohlcv(backtest_start_time, "up")
                backtest_end_time = self.dataset_manipulator.floor_datetime_to_ohlcv(backtest_end_time, "down")
            
            self.backtest_start_time = backtest_start_time
            self.backtest_end_time = backtest_end_time
            
            self.ohlcv_with_signals = self.calculate_signs_for_backtest().dropna()
            
            self.summary_id = self.init_summary()
            self.insert_backtest_transaction_logs()
            self.insert_params_management()
            self.update_summary()

    def bulk_insert(self):
        self.db_client.session.commit()

    def reset_backtest_result_with_params(self, default_params, specific_params):
        # for loop and serach optimal metrics value
        self.ohlcv_with_metrics = None
        self.ohlcv_with_signals = None
        self.summary_id = None
        self.closed_positions_df = None

        self.default_params = default_params
        self.extract_default_params(self.default_params)
        self.specific_params = specific_params
        self.combined_params = dict(**self.default_params, **self.specific_params)

    def insert_params_management(self):
        backtest_management = self.backtest_management_table()

        self.combined_params["backtest_summary_id"] = int(self.summary_id)
        del self.combined_params["bot_name"]

        self.db_client.connector.execute(backtest_management.insert().values(self.combined_params))

    def init_summary(self):
        summary = BacktestSummary().__table__
        init_summary = {
            "bot_name": self.bot_name,
            "initial_balance": self.initial_balance,
            "account_currency": self.account_currency
        }

        self.db_client.connector.execute(summary.insert().values(init_summary))
        # [FIXME] only for single task processing, unable to parallel process
        return int(self.db_client.get_last_row("backtest_summary").index.array[0])
        
    def calculate_lot(self):
        return 1 # 100 %
        # if you need, you can override
        # default is invest all that you have

    def calculate_leverage(self):
        return 1 # 1 times
        # if you need, you can override

    def calculate_metric(self):
        return "metric"

    def calculate_metrics_for_backtest(self):
        return "ohlcv_with_metric_dataframe"
        # need to override

    def calculate_sign(self):
        return "signal"
        # need to override
        # return ["buy", "sell", "do_nothing"]

    def calculate_signs_for_backtest(self):
        return "ohlcv_with_signal_data"
        # need to override
        # return dataframe with ["buy", "sell", "do_nothing"]

    def insert_backtest_transaction_logs(self):
        # refer to signal then judge investment
        # keep one order at most

        position = None
        current_balance = self.initial_balance

        transaction_logs = []

        for row in self.ohlcv_with_signals.itertuples(): # self.ohlcv_with_signals should be dataframe
            if row.signal == "buy":
                if position is not None and position.order_type == "long":
                    # inverse => close position
                    if self.inverse_trading:
                        position.close_position(row)
                        transaction_logs.append(position.generate_transaction_log(self.db_client, self.summary_id))
                        current_balance = position.current_balance
                        position = None
                    # normal => still holding
                    else:
                        pass

                elif position is not None and position.order_type == "short":
                    # inverse => still holding
                    if self.inverse_trading:
                        pass
                    # normal => close position
                    else:
                        position.close_position(row)
                        transaction_logs.append(position.generate_transaction_log(self.db_client, self.summary_id))
                        current_balance = position.current_balance
                        position = None
                else:
                    lot = self.calculate_lot()
                    leverage = self.calculate_leverage()
                    # inverse => open short position
                    if self.inverse_trading:
                        position = OrderPosition(row, "short", current_balance, lot, leverage, is_backtest=True)
                    else:
                        # normal => open long position
                        position = OrderPosition(row, "long", current_balance, lot, leverage, is_backtest=True)

            elif row.signal == "sell":
                if position is not None and position.order_type == "long":
                    # inverse => still holding
                    if self.inverse_trading:
                        pass
                    # normal => close position
                    else:
                        position.close_position(row)
                        transaction_logs.append(position.generate_transaction_log(self.db_client, self.summary_id))
                        current_balance = position.current_balance
                        position = None

                elif position is not None and position.order_type == "short":
                    # inverse => close position
                    if self.inverse_trading:
                        position.close_position(row)
                        transaction_logs.append(position.generate_transaction_log(self.db_client, self.summary_id))
                        current_balance = position.current_balance
                        position = None

                    # normal => still holding
                    else:
                        pass

                else:
                    lot = self.calculate_lot()
                    leverage = self.calculate_leverage()
                    # inverse => open long position
                    if self.inverse_trading:
                        position = OrderPosition(row, "long",current_balance, lot, leverage, is_backtest=True)
                    else:
                        # normal => open short position
                        position = OrderPosition(row, "short",current_balance, lot, leverage, is_backtest=True)

            elif row.signal == "do_nothing":
                if self.close_position_on_do_nothing:
                    # if do nothing option is true
                    # and you get do nothing from signal, then close out the position
                    if position is not None:
                        # close position
                        position.close_position(row)
                        transaction_logs.append(position.generate_transaction_log(self.db_client, self.summary_id))
                        current_balance = position.current_balance
                        position = None

        # the processing time is propotionate to the number of transaction logs
        self.db_client.session.bulk_insert_mappings(BacktestTransactionLog, transaction_logs)

        self.closed_positions_df = pd.DataFrame(transaction_logs)
        self.closed_positions_df["holding_time"] = self.closed_positions_df["close_time"] - \
            self.closed_positions_df["entry_time"]

    def update_summary(self):
        win_entries_condition = (self.closed_positions_df["profit_status"] == "win")
        win_row = self.closed_positions_df[(win_entries_condition)]
        lose_entries_condition = (self.closed_positions_df["profit_status"] == "lose")
        lose_row = self.closed_positions_df[(lose_entries_condition)]

        long_entries_condition = (self.closed_positions_df["order_type"] == "long")
        long_row = self.closed_positions_df[(long_entries_condition)]
        short_entries_condition = (self.closed_positions_df["order_type"] == "short")
        short_row = self.closed_positions_df[(short_entries_condition)]

        win_long_row = self.closed_positions_df[(win_entries_condition) & (long_entries_condition)]
        win_short_row = self.closed_positions_df[(win_entries_condition) & (short_entries_condition)]
        lose_long_row = self.closed_positions_df[(lose_entries_condition) & (long_entries_condition)]
        lose_short_row = self.closed_positions_df[(lose_entries_condition) & (short_entries_condition)]

        total_return = float(self.closed_positions_df.profit_size.sum())

        if self.closed_positions_df.iloc[-1].current_balance > 0:
            total_return_percentage = float(100 * (self.closed_positions_df.iloc[-1].current_balance / self.initial_balance))
        else:
            total_return_percentage = \
                float(-100 * (abs(self.closed_positions_df.iloc[-1].current_balance) + self.initial_balance) / self.initial_balance)

        win_consecutive = self.build_consecutive("win")
        lose_consecutive = self.build_consecutive("lose")
    
        drawdowns = self.build_drawdowns()

        if drawdowns["maximal_drawdown"] == 0:
            recovery_factor = 0
        else:
            recovery_factor = total_return / drawdowns["maximal_drawdown"]

        # total
        summary_dict = {
        "id": self.summary_id,
        "total_entry": len(self.closed_positions_df),

        "total_max_holding_ms": self.closed_positions_df["holding_time"].max().to_pytimedelta(),
        "total_average_holding_ms": self.closed_positions_df["holding_time"].mean().to_pytimedelta(),
        "total_min_holding_ms": self.closed_positions_df["holding_time"].min().to_pytimedelta(),

        "total_return": total_return,
        "total_return_average": float(self.closed_positions_df.profit_size.mean()),
        "total_standard_deviation": float(self.closed_positions_df.profit_size.std()),
        "total_skewness": float(self.closed_positions_df.profit_size.skew()),
        "total_kurtosis": float(self.closed_positions_df.profit_size.kurt()),
        "total_median": float(self.closed_positions_df.profit_size.median()),

        "total_return_percentage": total_return_percentage,
        "total_return_average_percentage": float(self.closed_positions_df.profit_percentage.mean()),
        "total_standard_deviation_percentage": float(self.closed_positions_df.profit_percentage.std()),
        "total_skewness_percentage": float(self.closed_positions_df.profit_percentage.skew()),
        "total_kurtosis_percentage": float(self.closed_positions_df.profit_percentage.kurt()),
        "total_median_percentage": float(self.closed_positions_df.profit_percentage.median()),

        "total_transaction_cost": float(self.closed_positions_df.transaction_cost.sum()),

        # win
        "win_entry": len(win_row),
        "win_average_holding_ms": win_row["holding_time"].mean().to_pytimedelta(),
        "win_rate": (len(win_row) / len(self.closed_positions_df)) * 100,

        "win_return": float(win_row.profit_size.sum()),
        "win_return_average": float(win_row.profit_size.mean()),
        "win_standard_deviation": float(win_row.profit_size.std()),
        "win_skewness": float(win_row.profit_size.skew()),
        "win_kurtosis": float(win_row.profit_size.kurt()),
        "win_median": float(win_row.profit_size.median()),

        "win_return_average_percentage": float(win_row.profit_percentage.mean()),
        "win_standard_deviation_percentage": float(win_row.profit_percentage.std()),
        "win_skewness_percentage": float(win_row.profit_percentage.skew()),
        "win_kurtosis_percentage": float(win_row.profit_percentage.kurt()),
        "win_median_percentage": float(win_row.profit_percentage.median()),

        "win_transaction_cost": float(win_row.transaction_cost.sum()),
            
        "win_consecutive_max_entry": win_consecutive["consecutive_max_entry"],
        "win_consecutive_average_entry": win_consecutive["consecutive_average_entry"],
        "win_consecutive_max_profit": float(win_consecutive["consecutive_df"].profit_size.sum()),

        "win_max_profit": float(win_row.profit_size.max()),
        "win_max_profit_percentage": float(win_row.profit_percentage.max()),
                
        # lose
        "lose_entry": len(lose_row),
        "lose_rate": (len(lose_row) / len(self.closed_positions_df)) * 100,
        "lose_average_holding_ms": lose_row["holding_time"].mean().to_pytimedelta(),
  
        "lose_return": float(lose_row.profit_size.sum()),
        "lose_return_average": float(lose_row.profit_size.mean()),
        "lose_standard_deviation": float(lose_row.profit_size.std()),
        "lose_skewness": float(lose_row.profit_size.skew()),
        "lose_kurtosis": float(lose_row.profit_size.kurt()),
        "lose_median": float(lose_row.profit_size.median()),

        "lose_return_average_percentage": float(lose_row.profit_percentage.mean()),
        "lose_standard_deviation_percentage": float(lose_row.profit_percentage.std()),
        "lose_skewness_percentage": float(lose_row.profit_percentage.skew()),
        "lose_kurtosis_percentage": float(lose_row.profit_percentage.kurt()),
        "lose_median_percentage": float(lose_row.profit_percentage.median()),

        "lose_transaction_cost": float(lose_row.transaction_cost.sum()),
            
        "lose_consecutive_max_entry": lose_consecutive["consecutive_max_entry"],
        "lose_consecutive_average_entry": lose_consecutive["consecutive_average_entry"],
        "lose_consecutive_max_loss": float(lose_consecutive["consecutive_df"].profit_size.sum()),

        "lose_max_loss": float(lose_row.profit_size.min()),
        "lose_max_loss_percentage": float(lose_row.profit_percentage.min()),

        # long
        "long_entry": len(long_row),
        "long_rate": (len(long_row) / len(self.closed_positions_df)) * 100,
        "long_average_holding_ms": long_row["holding_time"].mean().to_pytimedelta(),

        "long_return": float(long_row.profit_size.sum()),
        "long_return_average": float(long_row.profit_size.mean()),
        "long_standard_deviation": float(long_row.profit_size.std()),
        "long_skewness": float(long_row.profit_size.skew()),
        "long_kurtosis": float(long_row.profit_size.kurt()),
        "long_median": float(long_row.profit_size.median()),

        "long_return_average_percentage": float(long_row.profit_percentage.mean()),
        "long_standard_deviation_percentage": float(long_row.profit_percentage.std()),
        "long_skewness_percentage": float(long_row.profit_percentage.skew()),
        "long_kurtosis_percentage": float(long_row.profit_percentage.kurt()),
        "long_median_percentage": float(long_row.profit_percentage.median()),
            
        "long_max_profit": float(long_row.profit_size.max()),
        "long_max_profit_percentage": float(long_row.profit_percentage.max()),

        "long_max_loss": float(long_row.profit_size.min()),
        "long_max_loss_percentage": float(long_row.profit_percentage.min()),

        #short
        "short_entry": len(short_row),
        "short_rate": (len(short_row) / len(self.closed_positions_df)) * 100,
        "short_average_holding_ms": short_row["holding_time"].mean().to_pytimedelta(),

        "short_return": float(short_row.profit_size.sum()),
        "short_return_average": float(short_row.profit_size.mean()),
        "short_standard_deviation": float(short_row.profit_size.std()),
        "short_skewness": float(short_row.profit_size.skew()),
        "short_kurtosis": float(short_row.profit_size.kurt()),
        "short_median": float(short_row.profit_size.median()),

        "short_return_average_percentage": float(short_row.profit_percentage.mean()),
        "short_standard_deviation_percentage": float(short_row.profit_percentage.std()),
        "short_skewness_percentage": float(short_row.profit_percentage.skew()),
        "short_kurtosis_percentage": float(short_row.profit_percentage.kurt()),
        "short_median_percentage": float(short_row.profit_percentage.median()),
            
        "short_max_profit": float(short_row.profit_size.max()),
        "short_max_profit_percentage": float(short_row.profit_percentage.max()),

        "short_max_loss": float(short_row.profit_size.min()),
        "short_max_loss_percentage": float(short_row.profit_percentage.min()),

        # win long
        "win_long_entry": len(win_long_row),
        "win_long_average_holding_ms": win_long_row["holding_time"].mean().to_pytimedelta(),

        "win_long_return": float(win_long_row.profit_size.sum()),
        "win_long_return_average": float(win_long_row.profit_size.mean()),
        "win_long_standard_deviation": float(win_long_row.profit_size.std()),
        "win_long_skewness": float(win_long_row.profit_size.skew()),
        "win_long_kurtosis": float(win_long_row.profit_size.kurt()),
        "win_long_median": float(win_long_row.profit_size.median()),

        "win_long_return_average": float(win_long_row.profit_size.mean()),
        "win_long_return_average_percentage": float(win_long_row.profit_percentage.mean()),
        "win_long_standard_deviation_percentage": float(win_long_row.profit_percentage.std()),
        "win_long_skewness_percentage": float(win_long_row.profit_percentage.skew()),
        "win_long_kurtosis_percentage": float(win_long_row.profit_percentage.kurt()),
        "win_long_median_percentage": float(win_long_row.profit_percentage.median()),

        # win short
        "win_short_entry": len(win_short_row),
        "win_short_average_holding_ms": win_short_row["holding_time"].mean().to_pytimedelta(),

        "win_short_return": float(win_short_row.profit_size.sum()),
        "win_short_return_average": float(win_short_row.profit_size.mean()),
        "win_short_standard_deviation": float(win_short_row.profit_size.std()),
        "win_short_skewness": float(win_short_row.profit_size.skew()),
        "win_short_kurtosis": float(win_short_row.profit_size.kurt()),
        "win_short_median": float(win_short_row.profit_size.median()),

        "win_short_return_average_percentage": float(win_short_row.profit_percentage.mean()),
        "win_short_standard_deviation_percentage": float(win_short_row.profit_percentage.std()),
        "win_short_skewness_percentage": float(win_short_row.profit_percentage.skew()),
        "win_short_kurtosis_percentage": float(win_short_row.profit_percentage.kurt()),
        "win_short_median_percentage": float(win_short_row.profit_percentage.median()),

        # lose long
        "lose_long_entry": len(lose_long_row),
        "lose_long_average_holding_ms": lose_long_row["holding_time"].mean().to_pytimedelta(),

        "lose_long_return": float(lose_long_row.profit_size.sum()),
        "lose_long_return_average": float(lose_long_row.profit_size.mean()),
        "lose_long_standard_deviation": float(lose_long_row.profit_size.std()),
        "lose_long_skewness": float(lose_long_row.profit_size.skew()),
        "lose_long_kurtosis": float(lose_long_row.profit_size.kurt()),
        "lose_long_median": float(lose_long_row.profit_size.median()),

        "lose_long_return_average_percentage": float(lose_long_row.profit_percentage.mean()),
        "lose_long_standard_deviation_percentage": float(lose_long_row.profit_percentage.std()),
        "lose_long_skewness_percentage": float(lose_long_row.profit_percentage.skew()),
        "lose_long_kurtosis_percentage": float(lose_long_row.profit_percentage.kurt()),
        "lose_long_median_percentage": float(lose_long_row.profit_percentage.median()),

        # lose short
        "lose_short_entry": len(lose_short_row),
        "lose_short_average_holding_ms": lose_short_row["holding_time"].mean().to_pytimedelta(),

        "lose_short_return": float(lose_short_row.profit_size.sum()),
        "lose_short_return_average": float(lose_short_row.profit_size.mean()),
        "lose_short_standard_deviation": float(lose_short_row.profit_size.std()),
        "lose_short_skewness": float(lose_short_row.profit_size.skew()),
        "lose_short_kurtosis": float(lose_short_row.profit_size.kurt()),
        "lose_short_median": float(lose_short_row.profit_size.median()),

        "lose_short_return_average_percentage": float(lose_short_row.profit_percentage.mean()),
        "lose_short_standard_deviation_percentage": float(lose_short_row.profit_percentage.std()),
        "lose_short_skewness_percentage": float(lose_short_row.profit_percentage.skew()),
        "lose_short_kurtosis_percentage": float(lose_short_row.profit_percentage.kurt()),
        "lose_short_median_percentage": float(lose_short_row.profit_percentage.median()),

        # other metrics
        "backtest_start_time": self.backtest_start_time,
        "backtest_end_time": self.backtest_end_time,

        "bot_name": self.bot_name,
        "initial_balance": self.initial_balance,
        "account_currency": self.account_currency,


        "absolute_drawdown": float(drawdowns["absolute_drawdown"]),
        "maximal_drawdown": float(drawdowns["maximal_drawdown"]),
        "relative_drawdown": float(drawdowns["relative_drawdown"]),

        "profit_factor": float(win_row.profit_size.sum() / abs(lose_row.profit_size.sum())),
        "recovery_factor": recovery_factor
        }

        self.db_client.session.bulk_update_mappings(BacktestSummary, [summary_dict])

    def build_drawdowns(self):
        if self.closed_positions_df.current_balance.min() < 0:
            absolute_drawdown = self.initial_balance + self.closed_positions_df.current_balance.min() 
        else:
            absolute_drawdown = self.initial_balance - self.closed_positions_df.current_balance.min() 

        current_drawdown = 0
        current_relative_drawdown = 0

        max_balance = self.initial_balance

        maximal_drawdown = 0
        relative_drawdown = 0

        for log in self.closed_positions_df.itertuples():
            if max_balance < log.current_balance:
                max_balance = log.current_balance
            else:

                if log.current_balance > 0:
                    current_drawdown = max_balance - log.current_balance
                else:
                    current_drawdown = max_balance + log.current_balance

                current_relative_drawdown = (abs(current_drawdown) / max_balance)*100

                if maximal_drawdown < current_drawdown:
                    maximal_drawdown = current_drawdown

                if relative_drawdown < current_relative_drawdown:
                    relative_drawdown = current_relative_drawdown
        
        result = {
            "absolute_drawdown" : absolute_drawdown,
            "maximal_drawdown": maximal_drawdown,
            "relative_drawdown" : relative_drawdown
        }

        return result

    def build_consecutive(self, profit_status):
        current_start_index = 0
        current_end_index = 0

        max_start_index = 0
        max_consecutive = 0
        max_end_index = 0

        consective_flag = False

        consecutive_win_lose_entries = []

        profit_status_df = self.closed_positions_df.loc[:,["profit_status"]]
        profit_status_np = profit_status_df.to_numpy(copy=True)

        # for loop
        for row_id, row in enumerate(profit_status_np):
            if row[0] == profit_status:
                if consective_flag is False:
                # consecutive count start
                    current_start_index = row_id
                    consective_flag = True
            else:
                if consective_flag:
                    current_end_index = row_id

                    if max_consecutive <= current_end_index - current_start_index:
                        max_start_index = current_start_index
                        max_end_index = current_end_index - 1
                        max_consecutive = current_end_index - current_start_index

                    consecutive_win_lose_entries.append(current_end_index - current_start_index)
                    consective_flag = False

        consecutive_max_entry = np.max(consecutive_win_lose_entries)
        consecutive_average_entry = np.mean(consecutive_win_lose_entries)

        return_hash = {
            "consecutive_df": self.closed_positions_df.loc[max_start_index:max_end_index],
            "consecutive_max_entry": int(consecutive_max_entry),
            "consecutive_average_entry": float(consecutive_average_entry)
        }

        return return_hash
def main():
    parser = argparse.ArgumentParser(
        description=
        "Generate some useful quantile graphs and perhaps a few histograms too"
    )
    parser.add_argument("data_directory",
                        help="the location where we are storing the data.")
    parser.add_argument(
        "output_directory",
        help="the location where we will want to output the graphs.")
    args = parser.parse_args()

    def make_graph(az, insttype, data_before, data_after, interval, axes):
        print("building a graph for az: %s insttype: %s" % (az, insttype))
        print(
            "\twe have %d datapoints from before, and %d datapoints from after"
            % (len(data_before), len(data_after)))

        # create the price data models
        data_before = PriceTimeSeriesNaive(data_before)
        data_after = PriceTimeSeriesNaive(data_after)

        expanded_before = list(data_before.expand_with_interval(350))
        expanded_after = list(data_after.expand_with_interval(350))

        print("\tbefore expanded length: %d" % len(expanded_before))
        print("\tafter expanded length: %d" % len(expanded_after))

        buckets_before = [
            PriceBucketNaive(list(bucket))
            for idx, bucket in itertools.groupby(
                expanded_before, lambda x: x["timestamp"] // interval)
        ]
        buckets_after = [
            PriceBucketNaive(list(bucket))
            for idx, bucket in itertools.groupby(
                expanded_after, lambda x: x["timestamp"] // interval)
        ]
        expanded_before, expanded_after = None, None  # we null them so that their memory can be collected if need be

        print("\tbuckets before length: %d" % len(buckets_before))
        print("\tbuckets after length: %d" % len(buckets_after))

        min_len = min(len(buckets_before), len(buckets_after))
        buckets_before = buckets_before[-min_len:]
        buckets_after = buckets_after[-min_len:]

        series_avg_before = [bucket.getAverage() for bucket in buckets_before]
        series_avg_after = [bucket.getAverage() for bucket in buckets_after]
        series_p50_before = [
            bucket.getQuantile(0.5)["price"] for bucket in buckets_before
        ]
        series_p50_after = [
            bucket.getQuantile(0.5)["price"] for bucket in buckets_after
        ]
        series_p66_before = [
            bucket.getQuantile(0.66)["price"] for bucket in buckets_before
        ]
        series_p66_after = [
            bucket.getQuantile(0.66)["price"] for bucket in buckets_after
        ]
        series_p95_before = [
            bucket.getQuantile(0.95)["price"] for bucket in buckets_before
        ]
        series_p95_after = [
            bucket.getQuantile(0.95)["price"] for bucket in buckets_after
        ]

        # f = plt.figure(figsize=(5, 10))

        xvalues = list(range(-min_len, 0))
        axes[0].set_title("%s-%s averages\nduration: %d" %
                          (az, insttype, interval / 3600))
        axes[0].set_ylabel("Price $")
        axes[0].plot(xvalues, series_avg_before, label="before")
        axes[0].plot(xvalues, series_avg_after, label="after")

        axes[1].set_title("%s-%s q.5\nduration: %d" %
                          (az, insttype, interval / 3600))
        axes[1].set_ylabel("Price $")
        axes[1].plot(xvalues, series_p50_before, label="before")
        axes[1].plot(xvalues, series_p50_after, label="after")

        axes[2].set_title("%s-%s q.66\nduration: %d" %
                          (az, insttype, interval / 3600))
        axes[2].set_ylabel("Price $")
        axes[2].plot(xvalues, series_p66_before, label="before")
        axes[2].plot(xvalues, series_p66_after, label="after")

        axes[3].set_title("%s-%s q.95\nduration: %d" %
                          (az, insttype, interval / 3600))
        axes[3].set_ylabel("Price $")
        axes[3].plot(xvalues, series_p95_before, label="before")
        axes[3].plot(xvalues, series_p95_after, label="after")

        # plt.legend()
        # plt.tight_layout()
        # f.savefig(path.join(args.output_directory, "%s_%s_interval%d.png" % (az, insttype, interval / 3600)), bbox_inches="tight", dpi=150)
        # plt.close()

    def transform_query_results(results):
        return ({
            "timestamp": result[0],
            "price": result[1]
        } for result in results)

    with Dataset(args.data_directory) as dataset:
        for az, insttype in dataset.get_databases(
        ):  # misleading function name
            conn = dataset.open(az, insttype)

            # compute the most recent timestamp so we can determine a good
            # duration for the intervals we will be examining
            c = conn.cursor()
            c.execute("SELECT MAX(timestamp) AS timestamp FROM prices")
            most_recent_ts = c.fetchone()[0]
            interval_duration = most_recent_ts - AFTER_START_EPOCH

            if interval_duration < 0: continue

            # fetch the data for the before window
            c = conn.cursor()
            c.execute(
                "SELECT timestamp, price FROM prices  "
                " WHERE timestamp > %d AND timestamp < %d ORDER BY timestamp" %
                (BEFORE_END_EPOCH - interval_duration, BEFORE_END_EPOCH))
            data_before = list(transform_query_results(c.fetchall()))
            c = conn.cursor()
            c.execute("SELECT timestamp, price FROM prices " +
                      " WHERE timestamp > %d ORDER BY timestamp " %
                      (AFTER_START_EPOCH))
            data_after = list(transform_query_results(c.fetchall()))

            if len(data_before) == 0 or len(data_after) == 0: continue

            print("producing a graph for %s - %s" % (az, insttype))

            f, axes = plt.subplots(4, 4, figsize=(24, 12))
            axes = axes.flatten()

            make_graph(az, insttype, list(data_before), list(data_after), 3600,
                       axes[0::4])
            make_graph(az, insttype, list(data_before), list(data_after),
                       8 * 3600, axes[1::4])
            make_graph(az, insttype, list(data_before), list(data_after),
                       24 * 3600, axes[2::4])
            make_graph(az, insttype, list(data_before), list(data_after),
                       3 * 24 * 3600, axes[3::4])

            plt.legend()
            plt.tight_layout()
            f.savefig(path.join(args.output_directory,
                                "%s_%s.png" % (az, insttype)),
                      bbox_inches="tight",
                      dpi=150)
            plt.close()
Пример #15
0
def main():
    test_args = parse_args()

    args = joblib.load('models/%s/args.pkl' % test_args.name)

    print('Config -----')
    for arg in vars(args):
        print('%s: %s' % (arg, getattr(args, arg)))
    print('------------')

    if args.pred_type == 'all':
        num_outputs = 6
    elif args.pred_type == 'except_any':
        num_outputs = 5
    else:
        raise NotImplementedError

    # create model
    model_path = 'models/%s/model.pth' % args.name
    model = get_model(model_name=args.arch,
                      num_outputs=num_outputs,
                      freeze_bn=args.freeze_bn,
                      dropout_p=args.dropout_p,
                      pooling=args.pooling,
                      lp_p=args.lp_p)
    model = model.cuda()
    model.load_state_dict(torch.load(model_path))

    model.eval()

    cudnn.benchmark = True

    test_transform = Compose([
        transforms.Resize(args.img_size, args.img_size),
        ForegroundCenterCrop(args.crop_size),
        transforms.Normalize(mean=model.mean, std=model.std),
        ToTensor(),
    ])

    # data loading code
    # if args.img_type:
    #     stage_1_test_dir = 'processed/stage_1_test_%s' %args.img_type
    # else:
    #     stage_1_test_dir = 'processed/stage_1_test'
    if args.img_type:
        stage_2_test_dir = 'processed/stage_2_test_%s' % args.img_type
    else:
        stage_2_test_dir = 'processed/stage_2_test'

    # test_df = pd.read_csv('inputs/stage_1_sample_submission.csv')
    test_df = pd.read_csv('inputs/stage_2_sample_submission.csv')
    # test_img_paths = np.array([stage_1_test_dir + '/' + '_'.join(s.split('_')[:-1]) + '.png' for s in test_df['ID']][::6])
    test_img_paths = np.array([
        stage_2_test_dir + '/' + '_'.join(s.split('_')[:-1]) + '.png'
        for s in test_df['ID']
    ][::6])
    test_labels = np.array([
        test_df.loc[c::6, 'Label'].values for c in range(6)
    ]).T.astype('float32')

    test_set = Dataset(test_img_paths, test_labels, transform=test_transform)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=args.num_workers)

    preds = []
    preds_fold = []
    with torch.no_grad():
        for i, (input, _) in tqdm(enumerate(test_loader),
                                  total=len(test_loader)):
            outputs = []
            for input in apply_tta(args, input):
                input = input.cuda()
                output = model(input)
                output = torch.sigmoid(output)
                if args.pred_type == 'except_any':
                    output = torch.cat(
                        [output, torch.max(output, 1, keepdim=True)[0]], 1)
                outputs.append(output.data.cpu().numpy())
            preds_fold.extend(np.mean(outputs, axis=0))
    preds_fold = np.vstack(preds_fold)
    preds.append(preds_fold)

    preds = np.mean(preds, axis=0)

    test_df = pd.DataFrame(preds,
                           columns=[
                               'epidural', 'intraparenchymal',
                               'intraventricular', 'subarachnoid', 'subdural',
                               'any'
                           ])
    test_df['ID'] = test_img_paths

    # Unpivot table, i.e. wide (N x 6) to long format (6N x 1)
    test_df = test_df.melt(id_vars=['ID'])

    # Combine the filename column with the variable column
    test_df['ID'] = test_df.ID.apply(lambda x: os.path.basename(x).replace(
        '.png', '')) + '_' + test_df.variable
    test_df['Label'] = test_df['value']

    if test_args.hflip:
        args.name += '_hflip'
    test_df[['ID', 'Label']].to_csv('submissions/%s.csv' % args.name,
                                    index=False)
Пример #16
0
from datetime import datetime, timedelta
from pathlib import Path

from lib.dataset import Dataset

from client.db_client import DBClient
from client.exchange_client import ExchangeClient

bitmex_exchange_client = ExchangeClient("bitmex", "config.ini")
mysql_client = DBClient("mysql", "config.ini")

dataset_manager = Dataset(mysql_client, bitmex_exchange_client, True)

start_time = datetime.now() - timedelta(days=200)
end_time = datetime.now()

dataset_manager.update_ohlcv("bitmex", start_time)
Пример #17
0
def main():
    parser = argparse.ArgumentParser(description="Generate some useful quantile graphs and perhaps a few histograms too")
    parser.add_argument("data_directory", help="the location where we are storing the data.")
    parser.add_argument("output_file", help="the file where we will dump the JSON result aggregate data for analysis")
    args = parser.parse_args()

    not_enough_data = []
    cost_increased_types = []
    cost_decreased_types = []

    def perform_analysis(az, insttype, data_before, data_after):
        data_before = PriceTimeSeriesNaive(data_before)
        data_after = PriceTimeSeriesNaive(data_after)

        days_required = 90
        hours_required = days_required * 24
        
        bucket_interval = 350 # bucket interval in seconds
        bucket_before = PriceBucketNaive(data_before.expand_with_interval(bucket_interval))
        bucket_after = PriceBucketNaive(data_after.expand_with_interval(bucket_interval))

        SECONDS_IN_DAY = 3600 * 24
        BUCKETS_PER_DAY = SECONDS_IN_DAY / bucket_interval
        BUCKETS_PER_HOUR = 3600 / bucket_interval
        SIZE_REQUIREMENT = int(BUCKETS_PER_DAY * days_required)
        if bucket_before.size() < SIZE_REQUIREMENT or bucket_after.size() < SIZE_REQUIREMENT:
            print("\tnot enough data for analysis")
            not_enough_data.append((az, insttype))
            return 
        print("\tanalyzing!")
        # shrink down to exactly the size requirement
        bucket_before = bucket_before.shrink_to_size(SIZE_REQUIREMENT)
        bucket_after = bucket_after.shrink_to_size(SIZE_REQUIREMENT)

        mean = bucket_before.getAverage()
        median = bucket_before.getQuantile(0.5)["price"]
        
        # return the result object containing aggregates of the values we computed
        return {
            "key": (az, insttype),
            "interval_days": days_required, # how many days we are looking at
            "total_cost_before": bucket_before.getAverage() * hours_required,
            "total_cost_after": bucket_after.getAverage() * hours_required,
            "mean_price_before": bucket_before.getAverage(),
            "mean_price_after": bucket_after.getAverage(),
            "median_price_before": bucket_before.getQuantile(0.5),
            "median_price_after": bucket_after.getQuantile(0.5),
        }

    def transform_query_results(results):
        return ({"timestamp": result[0], "price": result[1]} for result in results)

    results = []

    with Dataset(args.data_directory) as dataset:
        count = 0 
        for az, insttype in dataset.get_databases(): # misleading function name            
            conn = dataset.open(az, insttype)
            
            # compute the most recent timestamp so we can determine a good
            # duration for the intervals we will be examining
            c = conn.cursor()
            c.execute("SELECT MAX(timestamp) AS timestamp FROM prices")
            most_recent_ts = c.fetchone()[0]
            interval_duration = most_recent_ts - AFTER_START_EPOCH

            if interval_duration < 0: continue 
            
            # fetch the data for the before window
            c = conn.cursor()
            c.execute("SELECT timestamp, price FROM prices  "
                " WHERE timestamp > %d AND timestamp < %d ORDER BY timestamp" 
                % (BEFORE_END_EPOCH - interval_duration, BEFORE_END_EPOCH))
            data_before = list(transform_query_results(c.fetchall()))
            c = conn.cursor()
            c.execute("SELECT timestamp, price FROM prices " +
                " WHERE timestamp > %d ORDER BY timestamp " % (AFTER_START_EPOCH))
            data_after = list(transform_query_results(c.fetchall()))

            if len(data_before) == 0 or len(data_after) == 0: continue

            print("analyzing data for %s - %s" % (az, insttype))

            results.append(
                perform_analysis(az, insttype, data_before, data_after)
            )

    results = list(filter(lambda x: x is not None, results))
    results.sort(key=lambda r: r["key"])

    with open(args.output_file, "w") as f:
        json.dump(results, f)
Пример #18
0
def main():
    args = parse_args()

    if args.name is None:
        args.name = '%s_%s' % (args.arch, datetime.now().strftime('%m%d%H'))

    if not os.path.exists('models/%s' % args.name):
        os.makedirs('models/%s' % args.name)

    print('Config -----')
    for arg in vars(args):
        print('- %s: %s' % (arg, getattr(args, arg)))
    print('------------')

    with open('models/%s/args.txt' % args.name, 'w') as f:
        for arg in vars(args):
            print('- %s: %s' % (arg, getattr(args, arg)), file=f)

    joblib.dump(args, 'models/%s/args.pkl' % args.name)

    if args.loss == 'CrossEntropyLoss':
        criterion = nn.CrossEntropyLoss().cuda()
    elif args.loss == 'FocalLoss':
        criterion = FocalLoss().cuda()
    elif args.loss == 'MSELoss':
        criterion = nn.MSELoss().cuda()
    elif args.loss == 'multitask':
        criterion = {
            'classification': nn.CrossEntropyLoss().cuda(),
            'regression': nn.MSELoss().cuda(),
        }
    else:
        raise NotImplementedError

    if args.pred_type == 'classification':
        num_outputs = 5
    elif args.pred_type == 'regression':
        num_outputs = 1
    elif args.loss == 'multitask':
        num_outputs = 6
    else:
        raise NotImplementedError

    cudnn.benchmark = True

    model = get_model(model_name=args.arch,
                      num_outputs=num_outputs,
                      freeze_bn=args.freeze_bn,
                      dropout_p=args.dropout_p)

    train_transform = []
    train_transform = transforms.Compose([
        transforms.Resize((args.img_size, args.img_size)),
        transforms.RandomAffine(
            degrees=(args.rotate_min, args.rotate_max) if args.rotate else 0,
            translate=(args.translate_min, args.translate_max) if args.translate else None,
            scale=(args.rescale_min, args.rescale_max) if args.rescale else None,
            shear=(args.shear_min, args.shear_max) if args.shear else None,
        ),
        transforms.CenterCrop(args.input_size),
        transforms.RandomHorizontalFlip(p=0.5 if args.flip else 0),
        transforms.RandomVerticalFlip(p=0.5 if args.flip else 0),
        transforms.ColorJitter(
            brightness=0,
            contrast=args.contrast,
            saturation=0,
            hue=0),
        RandomErase(
            prob=args.random_erase_prob if args.random_erase else 0,
            sl=args.random_erase_sl,
            sh=args.random_erase_sh,
            r=args.random_erase_r),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])

    val_transform = transforms.Compose([
        transforms.Resize((args.img_size, args.input_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])

    # data loading code
    if 'diabetic_retinopathy' in args.train_dataset:
        diabetic_retinopathy_dir = preprocess(
            'diabetic_retinopathy',
            args.img_size,
            scale=args.scale_radius,
            norm=args.normalize,
            pad=args.padding,
            remove=args.remove)
        diabetic_retinopathy_df = pd.read_csv('inputs/diabetic-retinopathy-resized/trainLabels.csv')
        diabetic_retinopathy_img_paths = \
            diabetic_retinopathy_dir + '/' + diabetic_retinopathy_df['image'].values + '.jpeg'
        diabetic_retinopathy_labels = diabetic_retinopathy_df['level'].values

    if 'aptos2019' in args.train_dataset:
        aptos2019_dir = preprocess(
            'aptos2019',
            args.img_size,
            scale=args.scale_radius,
            norm=args.normalize,
            pad=args.padding,
            remove=args.remove)
        aptos2019_df = pd.read_csv('inputs/train.csv')
        aptos2019_img_paths = aptos2019_dir + '/' + aptos2019_df['id_code'].values + '.png'
        aptos2019_labels = aptos2019_df['diagnosis'].values

    if args.train_dataset == 'aptos2019':
        skf = StratifiedKFold(n_splits=args.n_splits, shuffle=True, random_state=41)
        img_paths = []
        labels = []
        for fold, (train_idx, val_idx) in enumerate(skf.split(aptos2019_img_paths, aptos2019_labels)):
            img_paths.append((aptos2019_img_paths[train_idx], aptos2019_img_paths[val_idx]))
            labels.append((aptos2019_labels[train_idx], aptos2019_labels[val_idx]))
    elif args.train_dataset == 'diabetic_retinopathy':
        img_paths = [(diabetic_retinopathy_img_paths, aptos2019_img_paths)]
        labels = [(diabetic_retinopathy_labels, aptos2019_labels)]
    elif 'diabetic_retinopathy' in args.train_dataset and 'aptos2019' in args.train_dataset:
        skf = StratifiedKFold(n_splits=args.n_splits, shuffle=True, random_state=41)
        img_paths = []
        labels = []
        for fold, (train_idx, val_idx) in enumerate(skf.split(aptos2019_img_paths, aptos2019_labels)):
            img_paths.append((np.hstack((aptos2019_img_paths[train_idx], diabetic_retinopathy_img_paths)), aptos2019_img_paths[val_idx]))
            labels.append((np.hstack((aptos2019_labels[train_idx], diabetic_retinopathy_labels)), aptos2019_labels[val_idx]))
    # else:
    #     raise NotImplementedError

    if args.pseudo_labels:
        test_df = pd.read_csv('probs/%s.csv' % args.pseudo_labels)
        test_dir = preprocess(
            'test',
            args.img_size,
            scale=args.scale_radius,
            norm=args.normalize,
            pad=args.padding,
            remove=args.remove)
        test_img_paths = test_dir + '/' + test_df['id_code'].values + '.png'
        test_labels = test_df['diagnosis'].values
        for fold in range(len(img_paths)):
            img_paths[fold] = (np.hstack((img_paths[fold][0], test_img_paths)), img_paths[fold][1])
            labels[fold] = (np.hstack((labels[fold][0], test_labels)), labels[fold][1])

    if 'messidor' in args.train_dataset:
        test_dir = preprocess(
            'messidor',
            args.img_size,
            scale=args.scale_radius,
            norm=args.normalize,
            pad=args.padding,
            remove=args.remove)

    folds = []
    best_losses = []
    best_scores = []

    for fold, ((train_img_paths, val_img_paths), (train_labels, val_labels)) in enumerate(zip(img_paths, labels)):
        print('Fold [%d/%d]' %(fold+1, len(img_paths)))

        if os.path.exists('models/%s/model_%d.pth' % (args.name, fold+1)):
            log = pd.read_csv('models/%s/log_%d.csv' %(args.name, fold+1))
            best_loss, best_score = log.loc[log['val_loss'].values.argmin(), ['val_loss', 'val_score']].values
            folds.append(str(fold + 1))
            best_losses.append(best_loss)
            best_scores.append(best_score)
            continue

        if args.remove_duplicate:
            md5_df = pd.read_csv('inputs/strMd5.csv')
            duplicate_img_paths = aptos2019_dir + '/' + md5_df[(md5_df.strMd5_count > 1) & (~md5_df.diagnosis.isnull())]['id_code'].values + '.png'
            print(duplicate_img_paths)
            for duplicate_img_path in duplicate_img_paths:
                train_labels = train_labels[train_img_paths != duplicate_img_path]
                train_img_paths = train_img_paths[train_img_paths != duplicate_img_path]
                val_labels = val_labels[val_img_paths != duplicate_img_path]
                val_img_paths = val_img_paths[val_img_paths != duplicate_img_path]

        # train
        train_set = Dataset(
            train_img_paths,
            train_labels,
            transform=train_transform)

        _, class_sample_counts = np.unique(train_labels, return_counts=True)
        # print(class_sample_counts)
        # weights = 1. / torch.tensor(class_sample_counts, dtype=torch.float)
        # weights = np.array([0.2, 0.1, 0.6, 0.1, 0.1])
        # samples_weights = weights[train_labels]
        # sampler = WeightedRandomSampler(
        #     weights=samples_weights,
        #     num_samples=11000,
        #     replacement=False)
        train_loader = torch.utils.data.DataLoader(
            train_set,
            batch_size=args.batch_size,
            shuffle=False if args.class_aware else True,
            num_workers=4,
            sampler=sampler if args.class_aware else None)

        val_set = Dataset(
            val_img_paths,
            val_labels,
            transform=val_transform)
        val_loader = torch.utils.data.DataLoader(
            val_set,
            batch_size=args.batch_size,
            shuffle=False,
            num_workers=4)

        # create model
        model = get_model(model_name=args.arch,
                          num_outputs=num_outputs,
                          freeze_bn=args.freeze_bn,
                          dropout_p=args.dropout_p)
        model = model.cuda()
        if args.pretrained_model is not None:
            model.load_state_dict(torch.load('models/%s/model_%d.pth' % (args.pretrained_model, fold+1)))

        # print(model)

        if args.optimizer == 'Adam':
            optimizer = optim.Adam(
                filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr)
        elif args.optimizer == 'AdamW':
            optimizer = optim.AdamW(
                filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr)
        elif args.optimizer == 'RAdam':
            optimizer = RAdam(
                filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr)
        elif args.optimizer == 'SGD':
            optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr,
                                  momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov)

        if args.scheduler == 'CosineAnnealingLR':
            scheduler = lr_scheduler.CosineAnnealingLR(
                optimizer, T_max=args.epochs, eta_min=args.min_lr)
        elif args.scheduler == 'ReduceLROnPlateau':
            scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, factor=args.factor, patience=args.patience,
                                                       verbose=1, min_lr=args.min_lr)

        log = pd.DataFrame(index=[], columns=[
            'epoch', 'loss', 'score', 'val_loss', 'val_score'
        ])
        log = {
            'epoch': [],
            'loss': [],
            'score': [],
            'val_loss': [],
            'val_score': [],
        }

        best_loss = float('inf')
        best_score = 0
        for epoch in range(args.epochs):
            print('Epoch [%d/%d]' % (epoch + 1, args.epochs))

            # train for one epoch
            train_loss, train_score = train(
                args, train_loader, model, criterion, optimizer, epoch)
            # evaluate on validation set
            val_loss, val_score = validate(args, val_loader, model, criterion)

            if args.scheduler == 'CosineAnnealingLR':
                scheduler.step()
            elif args.scheduler == 'ReduceLROnPlateau':
                scheduler.step(val_loss)

            print('loss %.4f - score %.4f - val_loss %.4f - val_score %.4f'
                  % (train_loss, train_score, val_loss, val_score))

            log['epoch'].append(epoch)
            log['loss'].append(train_loss)
            log['score'].append(train_score)
            log['val_loss'].append(val_loss)
            log['val_score'].append(val_score)

            pd.DataFrame(log).to_csv('models/%s/log_%d.csv' % (args.name, fold+1), index=False)

            if val_loss < best_loss:
                torch.save(model.state_dict(), 'models/%s/model_%d.pth' % (args.name, fold+1))
                best_loss = val_loss
                best_score = val_score
                print("=> saved best model")

        print('val_loss:  %f' % best_loss)
        print('val_score: %f' % best_score)

        folds.append(str(fold + 1))
        best_losses.append(best_loss)
        best_scores.append(best_score)

        results = pd.DataFrame({
            'fold': folds + ['mean'],
            'best_loss': best_losses + [np.mean(best_losses)],
            'best_score': best_scores + [np.mean(best_scores)],
        })

        print(results)
        results.to_csv('models/%s/results.csv' % args.name, index=False)

        torch.cuda.empty_cache()

        if not args.cv:
            break
Пример #19
0
def main():
    args, args_text = _parse_args()

    seed = args.seed
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    if args.local_rank == 0:
        print("rank:{0},word_size:{1},dist_url:{2}".format(
            local_rank, word_size, dist_url))

    assert args.model_selection in [14, 114, 470, 600, 285, 42]

    if args.model_selection == 470:
        arch_list = [[0], [3, 4, 3, 1], [3, 2, 3, 0], [3, 3, 3, 1],
                     [3, 3, 3, 3], [3, 3, 3, 3], [0]]
        arch_def = [
            # stage 0, 112x112 in
            ['ds_r1_k3_s1_e1_c16_se0.25'],
            # stage 1, 112x112 in
            [
                'ir_r1_k3_s2_e4_c24_se0.25', 'ir_r1_k3_s1_e4_c24_se0.25',
                'ir_r1_k3_s1_e4_c24_se0.25', 'ir_r1_k3_s1_e4_c24_se0.25'
            ],
            # stage 2, 56x56 in
            [
                'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s1_e4_c40_se0.25',
                'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25'
            ],
            # stage 3, 28x28 in
            [
                'ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25',
                'ir_r1_k3_s1_e4_c80_se0.25', 'ir_r2_k3_s1_e4_c80_se0.25'
            ],
            # stage 4, 14x14in
            [
                'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25',
                'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25'
            ],
            # stage 5, 14x14in
            [
                'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25',
                'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25'
            ],
            # stage 6, 7x7 in
            ['cn_r1_k1_s1_c320_se0.25'],
        ]
        args.img_size = 224
    elif args.model_selection == 42:
        arch_list = [[0], [3], [3, 1], [3, 1], [3, 3, 3], [3, 3], [0]]
        arch_def = [
            # stage 0, 112x112 in
            ['ds_r1_k3_s1_e1_c16_se0.25'],
            # stage 1, 112x112 in
            ['ir_r1_k3_s2_e4_c24_se0.25'],
            # stage 2, 56x56 in
            ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25'],
            # stage 3, 28x28 in
            ['ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s2_e6_c80_se0.25'],
            # stage 4, 14x14in
            [
                'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25',
                'ir_r1_k3_s1_e6_c96_se0.25'
            ],
            # stage 5, 14x14in
            ['ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25'],
            # stage 6, 7x7 in
            ['cn_r1_k1_s1_c320_se0.25'],
        ]
        args.img_size = 96
    elif args.model_selection == 14:
        arch_list = [[0], [3], [3, 3], [3, 3], [3], [3], [0]]
        arch_def = [
            # stage 0, 112x112 in
            ['ds_r1_k3_s1_e1_c16_se0.25'],
            # stage 1, 112x112 in
            ['ir_r1_k3_s2_e4_c24_se0.25'],
            # stage 2, 56x56 in
            ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k3_s2_e4_c40_se0.25'],
            # stage 3, 28x28 in
            ['ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s2_e4_c80_se0.25'],
            # stage 4, 14x14in
            ['ir_r1_k3_s1_e6_c96_se0.25'],
            # stage 5, 14x14in
            ['ir_r1_k5_s2_e6_c192_se0.25'],
            # stage 6, 7x7 in
            ['cn_r1_k1_s1_c320_se0.25'],
        ]
        args.img_size = 64
    elif args.model_selection == 112:
        arch_list = [[0], [3], [3, 3], [3, 3], [3, 3, 3], [3, 3], [0]]
        arch_def = [
            # stage 0, 112x112 in
            ['ds_r1_k3_s1_e1_c16_se0.25'],
            # stage 1, 112x112 in
            ['ir_r1_k3_s2_e4_c24_se0.25'],
            # stage 2, 56x56 in
            ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k3_s2_e4_c40_se0.25'],
            # stage 3, 28x28 in
            ['ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s2_e6_c80_se0.25'],
            # stage 4, 14x14in
            [
                'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25',
                'ir_r1_k3_s1_e6_c96_se0.25'
            ],
            # stage 5, 14x14in
            ['ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25'],
            # stage 6, 7x7 in
            ['cn_r1_k1_s1_c320_se0.25'],
        ]
        args.img_size = 160
    elif args.model_selection == 285:
        arch_list = [[0], [3], [3, 3], [3, 1, 3], [3, 3, 3, 3], [3, 3, 3], [0]]
        arch_def = [
            # stage 0, 112x112 in
            ['ds_r1_k3_s1_e1_c16_se0.25'],
            # stage 1, 112x112 in
            ['ir_r1_k3_s2_e4_c24_se0.25'],
            # stage 2, 56x56 in
            ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25'],
            # stage 3, 28x28 in
            [
                'ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s2_e6_c80_se0.25',
                'ir_r1_k3_s2_e6_c80_se0.25'
            ],
            # stage 4, 14x14in
            [
                'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25',
                'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25'
            ],
            # stage 5, 14x14in
            [
                'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25',
                'ir_r1_k5_s2_e6_c192_se0.25'
            ],
            # stage 6, 7x7 in
            ['cn_r1_k1_s1_c320_se0.25'],
        ]
        args.img_size = 224
    elif args.model_selection == 600:
        arch_list = [[0], [3, 3, 2, 3, 3], [3, 2, 3, 2, 3], [3, 2, 3, 2, 3],
                     [3, 3, 2, 2, 3, 3], [3, 3, 2, 3, 3, 3], [0]]
        arch_def = [
            # stage 0, 112x112 in
            ['ds_r1_k3_s1_e1_c16_se0.25'],
            # stage 1, 112x112 in
            [
                'ir_r1_k3_s2_e4_c24_se0.25', 'ir_r1_k3_s2_e4_c24_se0.25',
                'ir_r1_k3_s2_e4_c24_se0.25', 'ir_r1_k3_s2_e4_c24_se0.25',
                'ir_r1_k3_s2_e4_c24_se0.25'
            ],
            # stage 2, 56x56 in
            [
                'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25',
                'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25',
                'ir_r1_k5_s2_e4_c40_se0.25'
            ],
            # stage 3, 28x28 in
            [
                'ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25',
                'ir_r1_k3_s1_e4_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25',
                'ir_r1_k3_s1_e4_c80_se0.25'
            ],
            # stage 4, 14x14in
            [
                'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25',
                'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25',
                'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25'
            ],
            # stage 5, 14x14in
            [
                'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25',
                'ir_r1_k5_s1_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25',
                'ir_r1_k5_s1_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25'
            ],
            # stage 6, 7x7 in
            ['cn_r1_k1_s1_c320_se0.25'],
        ]
        args.img_size = 224

    model = _gen_childnet(arch_list,
                          arch_def,
                          num_classes=args.num_classes,
                          drop_rate=args.drop,
                          drop_path_rate=args.drop_path,
                          global_pool=args.gp,
                          bn_momentum=args.bn_momentum,
                          bn_eps=args.bn_eps,
                          pool_bn=args.pool_bn,
                          zero_gamma=args.zero_gamma)

    data_config = resolve_data_config(vars(args),
                                      model=model,
                                      verbose=args.local_rank == 0)

    if args.local_rank == 0:
        img_size = args.img_size or 224
        scope(model, input_size=(3, img_size, img_size))

    eval_metric = args.eval_metric
    best_metric = None
    best_epoch = None
    saver = None
    output_dir = ''
    if args.local_rank == 0:
        output_base = args.output if args.output else './experiments/'
        exp_name = '-'.join([
            args.name,
            datetime.now().strftime("%Y%m%d-%H%M%S"), args.model,
            str(data_config['input_size'][-1])
        ])
        output_dir = get_outdir(output_base, 'test', exp_name)
        logger = get_logger(os.path.join(output_dir, 'test.log'))
        writer = SummaryWriter(os.path.join(output_dir, 'runs'))
        decreasing = True if eval_metric == 'loss' else False
        if not args.nosave:
            saver = CheckpointSaver(checkpoint_dir=output_dir,
                                    decreasing=decreasing)
        with open(os.path.join(output_dir, 'config.yaml'), 'w') as f:
            f.write(args_text)
    else:
        writer = None
        logger = None

    args.prefetcher = not args.no_prefetcher
    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1
        if args.distributed and args.num_gpu > 1 and args.local_rank == 0:
            logger.warning(
                'Using more than one GPU per process in distributed mode is not allowed. Setting num_gpu to 1.'
            )
            args.num_gpu = 1

    args.device = 'cuda:0'
    args.world_size = 1
    args.rank = 0  # global rank
    args.distributed = True
    if args.distributed:
        args.num_gpu = 1
        args.device = 'cuda:%d' % args.local_rank
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.world_size = torch.distributed.get_world_size()
        args.rank = torch.distributed.get_rank()
    assert args.rank >= 0

    if args.local_rank == 0:
        if args.distributed:
            logger.info(
                'Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.'
                % (args.rank, args.world_size))
        else:
            logger.info('Training with a single process on %d GPUs.' %
                        args.num_gpu)

    num_aug_splits = 0
    if args.aug_splits > 0:
        assert args.aug_splits > 1, 'A split of 1 makes no sense'
        num_aug_splits = args.aug_splits

    if args.split_bn:
        assert num_aug_splits > 1 or args.resplit
        model = convert_splitbn_model(model, max(num_aug_splits, 2))

    if os.path.exists(args.initial_checkpoint):
        load_checkpoint(model, args.initial_checkpoint)

    if args.local_rank == 0:
        logger.info('Model %s created, param count: %d' %
                    (args.model, sum([m.numel() for m in model.parameters()])))

    if args.num_gpu > 1:
        if args.amp:
            if args.local_rank == 0:
                logger.warning(
                    'AMP does not work well with nn.DataParallel, disabling. Use distributed mode for multi-GPU AMP.'
                )
            args.amp = False
        model = nn.DataParallel(model,
                                device_ids=list(range(args.num_gpu))).cuda()
    else:
        model.cuda()

    optimizer = create_optimizer(args, model)

    use_amp = False
    if has_apex and args.amp:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
        use_amp = True
    if args.local_rank == 0:
        logger.info('NVIDIA APEX {}. '
                    ' {}.'.format('installed' if has_apex else 'not installed',
                                  'on' if use_amp else 'off'))

    # optionally resume from a checkpoint
    resume_state = {}
    resume_epoch = None
    if args.resume:
        resume_state, resume_epoch = resume_checkpoint(model, args.resume)
    if resume_state and not args.no_resume_opt:
        if 'optimizer' in resume_state:
            if args.local_rank == 0:
                logging.info('Restoring Optimizer state from checkpoint')
            optimizer.load_state_dict(resume_state['optimizer'])
        if use_amp and 'amp' in resume_state and 'load_state_dict' in amp.__dict__:
            if args.local_rank == 0:
                logging.info('Restoring NVIDIA AMP state from checkpoint')
            amp.load_state_dict(resume_state['amp'])
    del resume_state

    model_ema = None
    if args.model_ema:
        # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper
        model_ema = ModelEma(model,
                             decay=args.model_ema_decay,
                             device='cpu' if args.model_ema_force_cpu else '',
                             resume=args.resume)

    if args.distributed:
        if args.sync_bn:
            assert not args.split_bn
            try:
                if has_apex:
                    model = convert_syncbn_model(model)
                else:
                    model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(
                        model)
                if args.local_rank == 0:
                    logger.info(
                        'Converted model to use Synchronized BatchNorm.')
            except Exception as e:
                if args.local_rank == 0:
                    logger.error(
                        'Failed to enable Synchronized BatchNorm. Install Apex or Torch >= 1.1'
                    )
        if has_apex:
            model = DDP(model, delay_allreduce=True)
        else:
            if args.local_rank == 0:
                logger.info(
                    "Using torch DistributedDataParallel. Install NVIDIA Apex for Apex DDP."
                )
            model = DDP(model,
                        device_ids=[args.local_rank
                                    ])  # can use device str in Torch >= 1.1
        # NOTE: EMA model does not need to be wrapped by DDP

    eval_dir = os.path.join(args.data, 'val')
    if not os.path.exists(eval_dir) and args.local_rank == 0:
        logger.error(
            'Validation folder does not exist at: {}'.format(eval_dir))
        exit(1)
    dataset_eval = Dataset(eval_dir)

    loader_eval = create_loader(
        dataset_eval,
        input_size=data_config['input_size'],
        batch_size=args.validation_batch_size_multiplier * args.batch_size,
        is_training=False,
        use_prefetcher=args.prefetcher,
        interpolation=data_config['interpolation'],
        mean=data_config['mean'],
        std=data_config['std'],
        num_workers=args.workers,
        distributed=args.distributed,
        crop_pct=data_config['crop_pct'],
        pin_memory=args.pin_mem,
    )

    validate_loss_fn = nn.CrossEntropyLoss().cuda()
    validate(0,
             model_ema.ema,
             loader_eval,
             validate_loss_fn,
             args,
             log_suffix=' (EMA)',
             logger=logger,
             writer=writer)
Пример #20
0
from client.exchange_client import ExchangeClient
from client.db_client import DBClient
from client.exchange_ws_client import WSClient
from lib.pandamex import PandaMex
from lib.time_ms import TimeMS
from lib.dataset import Dataset

from hypothesis_test.fetch_high_frequency_data_test import FetchHighFrequencyData
from hypothesis_test.h_price_move_probability import HPriceMovePlobability
from hypothesis_test.volatility_dependent_offset_test import VolatilityDependentOffsetTest

from bot.bottom_trend_follower import BottomTrendFollow

bitmex_exchange_client = ExchangeClient("bitmex",
                                        Path("tradingbot/config.ini"))

# influx_client = DBClient("influxdb")
# print(influx_client.connector.get_list_database())

mysql_client = DBClient("mysql", Path("tradingbot/config.ini"))

# update database
dataset_manager = Dataset(mysql_client, bitmex_exchange_client)
download_start_time = datetime.now() - timedelta(days=1000)
dataset_manager.update_ohlcv("bitmex", download_start_time)

bot_bot = BottomTrendFollow(db_client=mysql_client,
                            exchange_client=bitmex_exchange_client,
                            is_backtest=True)
bot_bot.run()
Пример #21
0
class UVInpainting():
    def __init__(self, config, device, sess=None, graph=None):
        '''
    if game_lm is not None, the result (mesh obj and UV texture map)
    will be convert from nsh to the game
    '''
        self.config = config
        self.name = config.name
        self.device = device
        self.sess = sess
        self.graph = graph
        self.log = logging.getLogger('x')
        self.rot_order = 'XYZ'

        self.debug = config.debug
        self.ex_idx = [4, 5, 8]

        self.inpaint_model = InpaintingModel(config,
                                             device,
                                             self.rot_order,
                                             debug=self.debug).to(device)
        # self.inpaint_model = InpaintingModel(config, device, self.debug)
        self.epoch = 0
        if config.restore:
            self.epoch = self.inpaint_model.load()
        # self.phase = config.phase

        if config.mode == 'train':
            num_test = 2048
            flist = glob(os.path.join(config.data_dir, '*_uv.png'))
            random.shuffle(flist)
            train_flist = flist[:-2 * num_test]
            val_flist = flist[-2 * num_test:-num_test]
            test_flist = flist[-num_test:]

            num_test = 300
            flist_gt = glob(os.path.join(config.data_gt_dir, '*_uv*.png'))
            random.shuffle(flist_gt)
            train_flist_gt = flist_gt[:-2 * num_test]
            val_flist_gt = flist_gt[-2 * num_test:-num_test]
            test_flist_gt = flist_gt[-num_test:]

            self.train_dataset = Dataset(config, train_flist_gt, train_flist)
            self.val_dataset = Dataset(config, val_flist_gt, val_flist)
            self.val_sample_iterator = self.val_dataset.create_iterator(
                config.batch_size)
            self.test_dataset = Dataset(config,
                                        test_flist_gt,
                                        test_flist,
                                        test=True)
            self.test_sample_iterator = self.test_dataset.create_iterator(
                config.batch_size)
            self.samples_dir = os.path.join('samples', config.name)
            os.makedirs(self.samples_dir, exist_ok=True)
        elif config.mode == 'test':
            self.test_dataset = Dataset(config, [], [], test=True)
            self.init_test()

    def train(self):
        train_loader = DataLoader(dataset=self.train_dataset,
                                  batch_size=self.config.batch_size,
                                  num_workers=self.config.workers,
                                  drop_last=True,
                                  shuffle=True)

        if not self.train_dataset:
            self.log.info('No training data was provided!')
            return

        writer = SummaryWriter('logs/' + self.config.name)

        while self.epoch < self.config.epochs:
            self.log.info('Training epoch: %d', self.epoch)
            self.epoch += 1

            for items in train_loader:
                self.inpaint_model.train()
                iteration = self.inpaint_model.iteration

                images, uvmaps, uvmap_gts, vertices, coeffs, rand_images, rand_uvmaps, rand_verts, rand_coeffs = self.to_device(
                    *items)

                _, gen_loss, im_dis_loss, uv_dis_loss, logs = self.inpaint_model.process(
                    images, uvmaps, uvmap_gts, vertices, coeffs)
                for k, v in logs.items():
                    writer.add_scalar(k, v, iteration)

                self.inpaint_model.backward(gen_loss=gen_loss,
                                            im_dis_loss=im_dis_loss,
                                            uv_dis_loss=uv_dis_loss)

                _, rand_gen_loss, rand_im_dis_loss, rand_uv_dis_loss, rand_logs = self.inpaint_model.process(
                    rand_images, rand_uvmaps, uvmap_gts, rand_verts,
                    rand_coeffs, False)
                self.inpaint_model.backward(gen_loss=rand_gen_loss,
                                            im_dis_loss=rand_im_dis_loss,
                                            uv_dis_loss=rand_uv_dis_loss)

                self.inpaint_model.iteration += 1

                # log model at checkpoints
                if self.config.log_interval and iteration % self.config.log_interval == 0:
                    info = 'Epoch: {} Iter:{}\n'.format(self.epoch, iteration)
                    info = create_log(logs, info)
                    self.log.info(info)

                    info = 'Epoch: {} Iter:{} RANDOM UVMAP\n'.format(
                        self.epoch, iteration)
                    info = create_log(rand_logs, info)
                    self.log.info(info)

                # sample model at checkpoints
                if self.config.sample_interval and iteration % self.config.sample_interval == 0:
                    self.val_sample()
                    self.test_sample()

                if self.config.ckpt_interval and iteration % self.config.ckpt_interval == 0:
                    self.inpaint_model.save(self.epoch)

        self.log.info('\nEnd training....')

    def val_sample(self, it=None):
        self.inpaint_model.eval()

        val_items = next(self.val_sample_iterator)
        images, uvmaps, uvmap_gts, vertices, coeffs, rand_images, rand_uvmaps, rand_verts, rand_coeffs = self.to_device(
            *val_items)

        gen_uvmaps, im_merged = self.sample(images, uvmaps, vertices, coeffs)
        rand_gen_uvmaps, rand_im_merged = self.sample(rand_images, rand_uvmaps,
                                                      rand_verts, rand_coeffs)

        iteration = self.inpaint_model.iteration
        if it is not None:
            iteration = it

        image_per_row = 2
        if self.config.batch_size <= 6:
            image_per_row = 1

        images = utils.stitch_images(
            utils.to_uint8_torch(images[:, :3]),
            utils.to_uint8_torch(uvmaps[:, :3]),
            utils.to_uint8_torch(gen_uvmaps[:, :3]),
            utils.to_uint8_torch(uvmap_gts),
            utils.to_uint8_torch(im_merged[:self.config.batch_size]),
            utils.to_uint8_torch(im_merged[self.config.batch_size:]),
            im_size=self.config.uv_size,
            img_per_row=image_per_row)
        name = os.path.join(self.samples_dir,
                            str(iteration - 1).zfill(5) + ".png")
        images.save(name)
        self.log.info('Val Sample saved to %s', name)

        images = utils.stitch_images(
            utils.to_uint8_torch(rand_images[:, :3]),
            utils.to_uint8_torch(rand_uvmaps[:, :3]),
            utils.to_uint8_torch(rand_gen_uvmaps[:, :3]),
            utils.to_uint8_torch(rand_im_merged[:self.config.batch_size]),
            utils.to_uint8_torch(rand_im_merged[self.config.batch_size:]),
            im_size=self.config.uv_size,
            img_per_row=image_per_row)
        name = os.path.join(self.samples_dir,
                            str(iteration - 1).zfill(5) + "_r.png")
        images.save(name)
        self.log.info('Val Sample saved to %s', name)

    def test_sample(self, it=None):

        self.inpaint_model.eval()

        test_items = next(self.test_sample_iterator)
        images, uvmaps, vertices, coeffs = self.to_device(*test_items)

        gen_uvmaps, im_merged = self.sample(images, uvmaps, vertices, coeffs)

        iteration = self.inpaint_model.iteration
        if it is not None:
            iteration = it

        image_per_row = 2
        if self.config.batch_size <= 6:
            image_per_row = 1

        images = utils.stitch_images(
            utils.to_uint8_torch(images[:, :3]),
            utils.to_uint8_torch(uvmaps[:, :3]),
            utils.to_uint8_torch(gen_uvmaps[:, :3]),
            utils.to_uint8_torch(im_merged[:self.config.batch_size]),
            utils.to_uint8_torch(im_merged[self.config.batch_size:]),
            im_size=self.config.uv_size,
            img_per_row=image_per_row)

        # path = os.path.join(self.samples_dir, self.name)
        name = os.path.join(self.samples_dir,
                            str(iteration - 1).zfill(5) + "_t.png")
        os.makedirs(self.samples_dir, exist_ok=True)
        images.save(name)

        self.log.info('Test sample saved to %s\n', name)

    def sample(self, images, uvmaps, vertices, coeffs):
        gen_uvmaps, renders, _ = self.inpaint_model(images[:, :3],
                                                    uvmaps,
                                                    vertices,
                                                    coeffs,
                                                    fix_uv=True)
        # io.imsave('tmp/render.png', renders[0].permute(1,2,0).cpu().detach().numpy())

        double_images = torch.cat([images, torch.flip(images, (3, ))], dim=0)
        no_l_eye = double_images[:, -1:] != self.ex_idx[0]
        no_r_eye = double_images[:, -1:] != self.ex_idx[1]
        no_mouth = double_images[:, -1:] != self.ex_idx[2]
        mask = renders[:, 3:4] * no_l_eye.float() * no_r_eye.float(
        ) * no_mouth.float()
        im_merged = double_images[:, :3] * (1 - mask) + renders[:, :3] * mask
        # io.imsave('tmp/mask.png', mask[0, 0].cpu().detach().numpy())

        return gen_uvmaps.cpu(), im_merged.cpu()

    def init_test(self):
        self.segmenter = Segment(self.device)

        up_line = 100
        bt_line = 80

        self.transfers = {}
        self.uv_creators = {}
        self.nsh_face_tris = {}
        self.nsh_meshes = {}
        self.nsh_face_meshes = {}
        for face_model in ['230']:
            self.transfers[face_model] = Shape_Transfer(face_model=face_model,
                                                        device=self.device)
            self.uv_creators[face_model] = UVCreator(
                face_model=face_model,
                bfm_version=self.config.bfm_version,
                device=self.device)

            self.nsh_face_meshes[face_model] = meshio.Mesh(
                'data/mesh/{}/nsh_bfm_face.obj'.format(face_model))
            self.nsh_face_tris[face_model] = self.to_tensor(
                self.nsh_face_meshes[face_model].triangles, torch.int64)
            self.nsh_meshes[face_model] = meshio.Mesh(
                'data/mesh/{}/nsh_std.obj'.format(face_model), group=True)

        self.up_line = int(up_line * (self.config.uv_size / 1024))
        self.bt_line = int(bt_line * (self.config.uv_size / 1024))

        self.eye_lm_idx = np.loadtxt('data/mesh/eye_lm_idx.txt',
                                     dtype=np.int32)

        self.cropper = ImageCropper(self.config.im_size, use_dlib=False)
        self.reconstructor = Deep3DFace(self.sess, self.graph)

        R, T = look_at_view_transform(10, 0, 0)
        self.cameras = OpenGLPerspectiveCameras(znear=0.001,
                                                zfar=30.0,
                                                aspect_ratio=1.0,
                                                fov=12.5936,
                                                degrees=True,
                                                R=R,
                                                T=T,
                                                device=self.device)

        raster_settings = RasterizationSettings(image_size=512,
                                                blur_radius=0.0,
                                                faces_per_pixel=1,
                                                bin_size=0,
                                                cull_backfaces=True)
        self.rasterizer = MeshRasterizer(cameras=self.cameras,
                                         raster_settings=raster_settings)

    def preprocess(self, image, face_model):
        #* input image should be uint8, in RGB order

        image = utils.center_crop_resize(image, self.config.im_size)
        image = self.cropper.crop_image(image, self.config.im_size)

        image = image[:, ::-1].copy()

        images_224 = cv2.resize(image, (224, 224),
                                interpolation=cv2.INTER_AREA).astype(
                                    np.float32)[None]

        images = self.to_tensor(image[None])
        segments = self.segmenter.segment_torch(images)
        segments = center_crop(segments, images.shape[1])
        image_segment = torch.cat([images, segments[..., None]], dim=-1)
        image_segment = image_segment.permute(0, 3, 1, 2)

        coeff, bfm_vert, bfm_neu_vert = self.reconstructor.predict(
            images_224, neutral=True)
        bfm_neu_vert = self.to_tensor(bfm_neu_vert)

        #! using torch from now on -----------------------------
        bfm_vert = self.to_tensor(bfm_vert)
        nsh_vert = self.transfers[face_model].transfer_shape_torch(bfm_vert)
        nsh_neu_vert = None
        nsh_neu_vert = self.transfers[face_model].transfer_shape_torch(
            bfm_neu_vert)
        nsh_face_vert = nsh_vert[self.uv_creators[face_model].
                                 nsh_face_start_idx:]

        coeff = self.to_tensor(coeff[None])
        _, _, _, angles, _, translation = utils.split_bfm09_coeff(coeff)
        # angle = (angle / 180.0 * math.pi) if degrees else angle
        transformer = Transform3d(device=self.device)
        transformer = transformer.rotate_axis_angle(angles[:, 0],
                                                    self.rot_order[0], False)
        transformer = transformer.rotate_axis_angle(angles[:, 1],
                                                    self.rot_order[1], False)
        transformer = transformer.rotate_axis_angle(angles[:, 2],
                                                    self.rot_order[2], False)
        transformer = transformer.translate(translation)

        nsh_trans_vert = transformer.transform_points(nsh_face_vert[None])

        nsh_shift_vert = nsh_trans_vert[0] - self.to_tensor([[0, 0, 10]])
        image_segment = torch.flip(image_segment, (3, )).type(torch.float32)

        nsh_trans_mesh = Meshes(nsh_trans_vert,
                                self.nsh_face_tris[face_model][None])

        fragment = self.rasterizer(nsh_trans_mesh)
        visible_face = torch.unique(
            fragment.pix_to_face)[1:]  # exclude face id -1
        visible_vert = self.nsh_face_tris[face_model][visible_face]
        visible_vert = torch.unique(visible_vert)
        vert_alpha = torch.zeros([nsh_shift_vert.shape[0], 1],
                                 device=self.device)
        vert_alpha[visible_vert] = 1
        nsh_shift_vert_alpha = torch.cat([nsh_shift_vert, vert_alpha], axis=-1)

        uvmap = self.uv_creators[face_model].create_nsh_uv_torch(
            nsh_shift_vert_alpha, image_segment, self.config.uv_size)

        uvmap[..., 3] = uvmap[..., 3] + uvmap[..., 4] * 128
        uvmap = uvmap[..., :4].cpu().numpy()
        uvmap = self.test_dataset.process_uvmap(uvmap.astype(np.uint8),
                                                dark_brow=True)

        images = images.permute(0, 3, 1, 2) / 127.5 - 1.0
        images = F.interpolate(images,
                               size=self.config.im_size,
                               mode='bilinear',
                               align_corners=False)
        segments = F.interpolate(segments[:, None],
                                 size=self.config.im_size,
                                 mode='nearest')
        images = torch.cat([images, segments], dim=1)
        uvmaps = uvmap[None].permute(0, 3, 1, 2)

        return images, uvmaps, coeff, nsh_face_vert, nsh_neu_vert

    def predict(self,
                image,
                out_dir,
                idx=None,
                deploy=False,
                face_model='230'):
        '''deploy for nsh'''
        if not deploy and idx is None:
            idx = '{:>05d}'.format(idx)

        images, uvmaps, params, nsh_face_vert, nsh_neu_vert = self.preprocess(
            image, face_model)

        fnames = []

        gen_uvmaps = self.inpaint_model.forward(images[:, :3],
                                                uvmaps,
                                                nsh_face_vert[None],
                                                params,
                                                fix_uv=True,
                                                deploy=deploy,
                                                face_model=face_model)
        nsh_uv = F.interpolate(gen_uvmaps.detach(),
                               size=1024,
                               mode='bilinear',
                               align_corners=False)[0]

        fnames.append(os.path.join(out_dir, '{}_uv.png'.format(idx)))
        self.imsave(fnames[-1], nsh_uv, False, True)

        lm_idx = self.to_tensor(self.transfers[face_model].lm_icp_idx,
                                torch.int64)
        nsh_vert_lm = nsh_neu_vert[None, lm_idx]
        nsh_std_lm = self.to_tensor(
            self.transfers[face_model].tgt_std_vert)[None, lm_idx]
        R, T, s = corresponding_points_alignment(nsh_vert_lm,
                                                 nsh_std_lm,
                                                 estimate_scale=True)
        s = s * 0.97

        nsh_neu_vert_trans = (
            s[:, None, None] * torch.bmm(nsh_neu_vert[None], R) +
            T[:, None, :])[0]
        nsh_neu_vert = nsh_neu_vert_trans.cpu().numpy()
        nsh_neu_vert = self.transfers[face_model].normalize(nsh_neu_vert)
        fnames.append(os.path.join(out_dir, '{}_neu.obj'.format(idx)))
        meshio.write_obj(
            fnames[-1],
            nsh_neu_vert[self.uv_creators[face_model].nsh_face_start_idx:],
            self.nsh_face_meshes[face_model].triangles,
            texcoords=self.nsh_face_meshes[face_model].texcoords,
            mtllib=True,
            uv_name='{}_uv'.format(idx))

        fnames.append(os.path.join(out_dir, '{}_neu.mtl'.format(idx)))

        try:
            self.imsave(os.path.join(out_dir, '{}_input.jpg'.format(idx)),
                        images[0, :3], True)
        except:
            pass

    def to_device(self, *args):
        return (item.to(self.device) for item in args)

    def to_tensor(self, array, dtype=torch.float32):
        if not isinstance(array, np.ndarray):
            array = np.array(array)
        return torch.from_numpy(array).type(dtype).to(self.device)

    def imsave(self, path, image, h_flip=False, v_flip=False):
        image = utils.to_uint8_torch(image.cpu()).numpy()
        if h_flip:
            image = image[:, ::-1]
        if v_flip:
            image = image[::-1]
        io.imsave(path, image)

    def compute_eye_param(self, vertices, eye_lm_idx, face_model):
        nsh_vert_lm = vertices[None, eye_lm_idx]
        nsh_std_lm = self.to_tensor(
            self.transfers[face_model].tgt_std_vert)[None, eye_lm_idx]
        R, T, s = corresponding_points_alignment(nsh_vert_lm,
                                                 nsh_std_lm,
                                                 estimate_scale=True)
        R = R.cpu().numpy()[0]
        T = T.cpu().numpy()[0]
        s = s.cpu().numpy()
        angle = Rotation.from_matrix(R).as_euler('xyz')
        eye_param = np.concatenate([angle, T, s])

        return eye_param
from technical_analysis.roc import TechnicalAnalysisROC
from technical_analysis.rsi import TechnicalAnalysisRSI
from technical_analysis.so import TechnicalAnalysisSTOCH
from technical_analysis.williamsr import TechnicalAnalysisWilliamsR
from technical_analysis.wma import TechnicalAnalysisWMA

# option settings
pd.set_option("display.max_columns", 250)
pd.set_option("display.max_rows", 250)
import warnings
warnings.filterwarnings('ignore')
plt.rcParams['figure.figsize'] = (10.0, 20.0)

bitmex_exchange_client = ExchangeClient("bitmex", Path(config_ini))
mysql_client = DBClient("mysql", Path(config_ini))
dataset_manager = Dataset(mysql_client, bitmex_exchange_client, True)
dataset_manager.update_ohlcv("bitmex",
                             start_time=datetime.now() - timedelta(days=365),
                             with_ta=True)


# manually added
def get_joined_params_and_summary():
    bot = BottomTrendFollow(db_client=mysql_client,
                            exchange_client=bitmex_exchange_client,
                            is_backtest=True)
    backtest_management = bot.trading_bot_backtest.trading_bot_backtest_db.backtest_management_table(
    )
    backtest_summary = BacktestSummary()

    query_management = "SELECT * FROM " + bot.trading_bot_backtest.trading_bot_backtest_db.backtest_management_table_name + ";"
Пример #23
0
from technical_analysis.roc import TechnicalAnalysisROC
from technical_analysis.rsi import TechnicalAnalysisRSI
from technical_analysis.so import TechnicalAnalysisSTOCH
from technical_analysis.williamsr import TechnicalAnalysisWilliamsR
from technical_analysis.wma import TechnicalAnalysisWMA

# option settings
pd.set_option("display.max_columns", 250)
pd.set_option("display.max_rows", 250)
import warnings
warnings.filterwarnings('ignore')
plt.rcParams['figure.figsize'] = (10.0, 20.0)

bitmex_exchange_client = ExchangeClient("bitmex", Path(config_ini))
mysql_client = DBClient("mysql", Path(config_ini))
dataset_manager = Dataset(mysql_client, bitmex_exchange_client)
dataset_manager.update_ohlcv("bitmex")


# manually added
def get_joined_params_and_summary():
    bot = BottomTrendFollow(db_client=mysql_client,
                            exchange_client=bitmex_exchange_client,
                            is_backtest=True)
    backtest_management = bot.backtest_management_table()
    backtest_summary = BacktestSummary()

    query_management = "SELECT * FROM " + bot.backtest_management_table_name + ";"
    query_summary = "SELECT * FROM backtest_summary;"

    backtest_management_df = mysql_client.exec_sql(query_management)
Пример #24
0
#!/usr/bin/python3
#
# Author: Steve Landherr <*****@*****.**>
#
# Script to extract standardized features from the
# comment submissions to FCC proceedings 17-108, driven by
# the Startup Policy Lab survey results.
#
# NOTE: Customize data extraction using the extract_spec.py module.
import sys
from lib.dataset import Dataset


def usage():
    sys.stderr.write("usage: extract-dataset.py\n")
    sys.exit(0)


if __name__ == '__main__':
    if len(sys.argv) > 1:
        usage()

    sys.stderr.write('starting...\n')
    dataset = Dataset()
    dataset.pull_points_from_survey()
    dataset.add_features_from_fcc()
    dataset.filter_one_hot_features()
    dataset.print_csv()
Пример #25
0
def main():
    args = parse_args()
    np.random.seed(args.seed)
    cudnn.benchmark = False
    cudnn.deterministic = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)

    if args.name is None:
        args.name = '%s_%s' % (args.arch, datetime.now().strftime('%m%d%H'))

    if not os.path.exists('models/%s' % args.name):
        os.makedirs('models/%s' % args.name)

    print('Config -----')
    for arg in vars(args):
        print('- %s: %s' % (arg, getattr(args, arg)))
    print('------------')

    with open('models/%s/args.txt' % args.name, 'w') as f:
        for arg in vars(args):
            print('- %s: %s' % (arg, getattr(args, arg)), file=f)

    joblib.dump(args, 'models/%s/args.pkl' % args.name)

    if args.loss == 'CrossEntropyLoss':
        criterion = nn.CrossEntropyLoss().cuda()
    elif args.loss == 'FocalLoss':
        criterion = FocalLoss().cuda()
    elif args.loss == 'MSELoss':
        criterion = nn.MSELoss().cuda()
    elif args.loss == 'multitask':
        criterion = {
            'classification': nn.CrossEntropyLoss().cuda(),
            'regression': nn.MSELoss().cuda(),
        }
    else:
        raise NotImplementedError

    if args.pred_type == 'classification':
        num_outputs = 5
    elif args.pred_type == 'regression':
        num_outputs = 1
    elif args.loss == 'multitask':
        num_outputs = 6
    else:
        raise NotImplementedError

    train_transform = transforms.Compose([
        transforms.Resize((args.img_size, args.img_size)),
        transforms.RandomAffine(
            degrees=(args.rotate_min, args.rotate_max) if args.rotate else 0,
            translate=(args.translate_min,
                       args.translate_max) if args.translate else None,
            scale=(args.rescale_min,
                   args.rescale_max) if args.rescale else None,
            shear=(args.shear_min, args.shear_max) if args.shear else None,
        ),
        transforms.CenterCrop(args.input_size),
        transforms.RandomHorizontalFlip(p=0.5 if args.flip else 0),
        transforms.RandomVerticalFlip(p=0.5 if args.flip else 0),
        transforms.ColorJitter(brightness=0,
                               contrast=args.contrast,
                               saturation=0,
                               hue=0),
        RandomErase(prob=args.random_erase_prob if args.random_erase else 0,
                    sl=args.random_erase_sl,
                    sh=args.random_erase_sh,
                    r=args.random_erase_r),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])

    val_transform = transforms.Compose([
        transforms.Resize((args.img_size, args.input_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])

    # data loading code
    if 'diabetic_retinopathy' in args.train_dataset:
        diabetic_retinopathy_dir = preprocess('diabetic_retinopathy',
                                              args.img_size,
                                              scale=args.scale_radius,
                                              norm=args.normalize,
                                              pad=args.padding,
                                              remove=args.remove)
        diabetic_retinopathy_df = pd.read_csv(
            'inputs/diabetic-retinopathy-resized/trainLabels.csv')
        diabetic_retinopathy_img_paths = \
            diabetic_retinopathy_dir + '/' + diabetic_retinopathy_df['image'].values + '.jpeg'
        diabetic_retinopathy_labels = diabetic_retinopathy_df['level'].values

    if 'aptos2019' in args.train_dataset:
        aptos2019_dir = preprocess('aptos2019',
                                   args.img_size,
                                   scale=args.scale_radius,
                                   norm=args.normalize,
                                   pad=args.padding,
                                   remove=args.remove)
        aptos2019_df = pd.read_csv('inputs/train.csv')
        aptos2019_img_paths = aptos2019_dir + '/' + aptos2019_df[
            'id_code'].values + '.png'
        aptos2019_labels = aptos2019_df['diagnosis'].values

    if 'chestxray' in args.train_dataset:
        chestxray_dir = preprocess('chestxray',
                                   args.img_size,
                                   scale=args.scale_radius,
                                   norm=args.normalize,
                                   pad=args.padding,
                                   remove=args.remove)

        chestxray_img_paths = []
        chestxray_labels = []
        normal_cases = glob('chest_xray/chest_xray/train/NORMAL/*.jpeg')
        pneumonia_cases = glob('chest_xray/chest_xray/train/PNEUMONIA/*.jpeg')
        for nor in normal_cases:
            p = nor.split('/')[-1]
            chestxray_img_paths.append(chestxray_dir + '/' + p)
            chestxray_labels.append(0)
        for abn in pneumonia_cases:
            p = abn.split('/')[-1]
            chestxray_img_paths.append(chestxray_dir + '/' + p)
            chestxray_labels.append(1)

        normal_cases = glob('chest_xray/chest_xray/test/NORMAL/*.jpeg')
        pneumonia_cases = glob('chest_xray/chest_xray/test/PNEUMONIA/*.jpeg')
        for nor in normal_cases:
            p = nor.split('/')[-1]
            chestxray_img_paths.append(chestxray_dir + '/' + p)
            chestxray_labels.append(0)
        for abn in pneumonia_cases:
            p = abn.split('/')[-1]
            chestxray_img_paths.append(chestxray_dir + '/' + p)
            chestxray_labels.append(1)

        normal_cases = glob('chest_xray/chest_xray/val/NORMAL/*.jpeg')
        pneumonia_cases = glob('chest_xray/chest_xray/val/PNEUMONIA/*.jpeg')
        for nor in normal_cases:
            p = nor.split('/')[-1]
            chestxray_img_paths.append(chestxray_dir + '/' + p)
            chestxray_labels.append(0)
        for abn in pneumonia_cases:
            p = abn.split('/')[-1]
            chestxray_img_paths.append(chestxray_dir + '/' + p)
            chestxray_labels.append(1)

        chestxray_img_paths = np.array(chestxray_img_paths)
        chestxray_labels = np.array(chestxray_labels)

    if args.train_dataset == 'aptos2019':
        skf = StratifiedKFold(n_splits=args.n_splits,
                              shuffle=True,
                              random_state=41)
        img_paths = []
        labels = []
        for fold, (train_idx, val_idx) in enumerate(
                skf.split(aptos2019_img_paths, aptos2019_labels)):
            img_paths.append(
                (aptos2019_img_paths[train_idx], aptos2019_img_paths[val_idx]))
            labels.append(
                (aptos2019_labels[train_idx], aptos2019_labels[val_idx]))
    elif args.train_dataset == 'diabetic_retinopathy':
        img_paths = [(diabetic_retinopathy_img_paths, aptos2019_img_paths)]
        labels = [(diabetic_retinopathy_labels, aptos2019_labels)]
    elif 'diabetic_retinopathy' in args.train_dataset and 'aptos2019' in args.train_dataset:
        skf = StratifiedKFold(n_splits=args.n_splits,
                              shuffle=True,
                              random_state=41)
        img_paths = []
        labels = []
        for fold, (train_idx, val_idx) in enumerate(
                skf.split(aptos2019_img_paths, aptos2019_labels)):
            img_paths.append((np.hstack((aptos2019_img_paths[train_idx],
                                         diabetic_retinopathy_img_paths)),
                              aptos2019_img_paths[val_idx]))
            labels.append((np.hstack(
                (aptos2019_labels[train_idx], diabetic_retinopathy_labels)),
                           aptos2019_labels[val_idx]))

    # FL setting: separate data into users
    if 'diabetic_retinopathy' in args.train_dataset and 'aptos2019' in args.train_dataset:
        combined_paths = np.hstack(
            (aptos2019_img_paths, diabetic_retinopathy_img_paths))
        combined_labels = np.hstack(
            (aptos2019_labels, diabetic_retinopathy_labels))
    elif 'chestxray' in args.train_dataset:
        combined_paths = chestxray_img_paths
        combined_labels = chestxray_labels
    else:
        raise NotImplementedError
    user_ind_dict, ind_test = split_dataset(combined_labels, args.num_users,
                                            args.iid)

    model = get_model(model_name=args.arch,
                      num_outputs=num_outputs,
                      freeze_bn=args.freeze_bn,
                      dropout_p=args.dropout_p)
    model = model.cuda()
    test_set = Dataset(combined_paths[ind_test],
                       combined_labels[ind_test],
                       transform=val_transform)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=4)

    test_acc = []
    test_scores = []
    test_scores_f1 = []
    lr = args.lr
    for epoch in range(args.epochs):

        print('Epoch [%d/%d]' % (epoch + 1, args.epochs))
        weight_list = []
        selected_ind = np.random.choice(args.num_users,
                                        int(args.num_users / 10),
                                        replace=False)
        for i in selected_ind:
            print('user: %d' % (i + 1))
            train_set = Dataset(combined_paths[user_ind_dict[i]],
                                combined_labels[user_ind_dict[i]],
                                transform=train_transform)
            train_loader = torch.utils.data.DataLoader(
                train_set,
                batch_size=args.batch_size,
                shuffle=False if args.class_aware else True,
                num_workers=4,
                sampler=sampler if args.class_aware else None)

            # train for one epoch
            train_loss, train_score, ret_w = train(args, train_loader,
                                                   copy.deepcopy(model),
                                                   criterion, lr)
            weight_list.append(ret_w)
            print('loss %.4f - score %.4f' % (train_loss, train_score))
        weights = fedavg(weight_list)
        model.load_state_dict(weights)
        test_loss, test_score, test_scoref1, accuracy, confusion_matrix = test(
            args, test_loader, copy.deepcopy(model), criterion)
        print('loss %.4f - score %.4f - accuracy %.4f' %
              (test_loss, test_score, accuracy))
        test_acc.append(accuracy)
        test_scores.append(test_score)
        test_scores_f1.append(test_scoref1)
        lr *= 0.992

    np.savez('./accuracy-xray-iid' + str(args.iid) + '-' + str(args.epochs) +
             '-beta' + str(args.beta) + '-seed' + str(args.seed),
             acc=np.array(test_acc),
             score=np.array(test_scores),
             scoref1=np.array(test_scores_f1),
             confusion=confusion_matrix)
def main():
    args = parse_args()

    if args.name is None:
        args.name = '%s_%s' % (args.arch, datetime.now().strftime('%m%d%H'))

    if not os.path.exists('models/%s' % args.name):
        os.makedirs('models/%s' % args.name)

    if args.resume:
        args = joblib.load('models/%s/args.pkl' % args.name)
        args.resume = True

    print('Config -----')
    for arg in vars(args):
        print('- %s: %s' % (arg, getattr(args, arg)))
    print('------------')

    with open('models/%s/args.txt' % args.name, 'w') as f:
        for arg in vars(args):
            print('- %s: %s' % (arg, getattr(args, arg)), file=f)

    joblib.dump(args, 'models/%s/args.pkl' % args.name)

    if args.seed is not None and not args.resume:
        print('set random seed')
        random.seed(args.seed)
        np.random.seed(args.seed)
        torch.manual_seed(args.seed)

    if args.loss == 'BCEWithLogitsLoss':
        criterion = BCEWithLogitsLoss().cuda()
    elif args.loss == 'WeightedBCEWithLogitsLoss':
        criterion = BCEWithLogitsLoss(weight=torch.Tensor([1., 1., 1., 1., 1., 2.]),
                                      smooth=args.label_smooth).cuda()
    elif args.loss == 'FocalLoss':
        criterion = FocalLoss().cuda()
    elif args.loss == 'WeightedFocalLoss':
        criterion = FocalLoss(weight=torch.Tensor([1., 1., 1., 1., 1., 2.])).cuda()
    else:
        raise NotImplementedError

    if args.pred_type == 'all':
        num_outputs = 6
    elif args.pred_type == 'except_any':
        num_outputs = 5
    else:
        raise NotImplementedError

    cudnn.benchmark = True

    # create model
    model = get_model(model_name=args.arch,
                      num_outputs=num_outputs,
                      freeze_bn=args.freeze_bn,
                      dropout_p=args.dropout_p,
                      pooling=args.pooling,
                      lp_p=args.lp_p)
    model = model.cuda()

    train_transform = Compose([
        transforms.Resize(args.img_size, args.img_size),
        transforms.HorizontalFlip() if args.hflip else NoOp(),
        transforms.VerticalFlip() if args.vflip else NoOp(),
        transforms.ShiftScaleRotate(
            shift_limit=args.shift_limit,
            scale_limit=args.scale_limit,
            rotate_limit=args.rotate_limit,
            border_mode=cv2.BORDER_CONSTANT,
            value=0,
            p=args.shift_scale_rotate_p
        ) if args.shift_scale_rotate else NoOp(),
        transforms.RandomContrast(
            limit=args.contrast_limit,
            p=args.contrast_p
        ) if args.contrast else NoOp(),
        RandomErase() if args.random_erase else NoOp(),
        transforms.CenterCrop(args.crop_size, args.crop_size) if args.center_crop else NoOp(),
        ForegroundCenterCrop(args.crop_size) if args.foreground_center_crop else NoOp(),
        transforms.RandomCrop(args.crop_size, args.crop_size) if args.random_crop else NoOp(),
        transforms.Normalize(mean=model.mean, std=model.std),
        ToTensor(),
    ])

    if args.img_type:
        stage_1_train_dir = 'processed/stage_1_train_%s' %args.img_type
    else:
        stage_1_train_dir = 'processed/stage_1_train'

    df = pd.read_csv('inputs/stage_1_train.csv')
    img_paths = np.array([stage_1_train_dir + '/' + '_'.join(s.split('_')[:-1]) + '.png' for s in df['ID']][::6])
    labels = np.array([df.loc[c::6, 'Label'].values for c in range(6)]).T.astype('float32')

    df = df[::6]
    df['img_path'] = img_paths
    for c in range(6):
        df['label_%d' %c] = labels[:, c]
    df['ID'] = df['ID'].apply(lambda s: '_'.join(s.split('_')[:-1]))

    meta_df = pd.read_csv('processed/stage_1_train_meta.csv')
    meta_df['ID'] = meta_df['SOPInstanceUID']
    test_meta_df = pd.read_csv('processed/stage_1_test_meta.csv')
    df = pd.merge(df, meta_df, how='left')

    patient_ids = meta_df['PatientID'].unique()
    test_patient_ids = test_meta_df['PatientID'].unique()
    if args.remove_test_patient_ids:
        patient_ids = np.array([s for s in patient_ids if not s in test_patient_ids])

    train_img_paths = np.hstack(df[['img_path', 'PatientID']].groupby(['PatientID'])['img_path'].apply(np.array).loc[patient_ids].to_list()).astype('str')
    train_labels = []
    for c in range(6):
        train_labels.append(np.hstack(df[['label_%d' %c, 'PatientID']].groupby(['PatientID'])['label_%d' %c].apply(np.array).loc[patient_ids].to_list()))
    train_labels = np.array(train_labels).T

    if args.resume:
        checkpoint = torch.load('models/%s/checkpoint.pth.tar' % args.name)

    # train
    train_set = Dataset(
        train_img_paths,
        train_labels,
        transform=train_transform)
    train_loader = torch.utils.data.DataLoader(
        train_set,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers,
        # pin_memory=True,
    )

    if args.optimizer == 'Adam':
        optimizer = optim.Adam(
            filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'AdamW':
        optimizer = optim.AdamW(
            filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'RAdam':
        optimizer = RAdam(
            filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'SGD':
        optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr,
                              momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov)
    else:
        raise NotImplementedError

    if args.apex:
        amp.initialize(model, optimizer, opt_level='O1')

    if args.scheduler == 'CosineAnnealingLR':
        scheduler = lr_scheduler.CosineAnnealingLR(
            optimizer, T_max=args.epochs, eta_min=args.min_lr)
    elif args.scheduler == 'MultiStepLR':
        scheduler = lr_scheduler.MultiStepLR(optimizer,
            milestones=[int(e) for e in args.milestones.split(',')], gamma=args.gamma)
    else:
        raise NotImplementedError

    log = {
        'epoch': [],
        'loss': [],
    }

    start_epoch = 0

    if args.resume:
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        scheduler.load_state_dict(checkpoint['scheduler'])
        start_epoch = checkpoint['epoch']
        log = pd.read_csv('models/%s/log.csv' % args.name).to_dict(orient='list')

    for epoch in range(start_epoch, args.epochs):
        print('Epoch [%d/%d]' % (epoch + 1, args.epochs))

        # train for one epoch
        train_loss = train(args, train_loader, model, criterion, optimizer, epoch)

        if args.scheduler == 'CosineAnnealingLR':
            scheduler.step()

        print('loss %.4f' % (train_loss))

        log['epoch'].append(epoch)
        log['loss'].append(train_loss)

        pd.DataFrame(log).to_csv('models/%s/log.csv' % args.name, index=False)

        torch.save(model.state_dict(), 'models/%s/model.pth' % args.name)
        print("=> saved model")

        state = {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'scheduler': scheduler.state_dict(),
        }
        torch.save(state, 'models/%s/checkpoint.pth.tar' % args.name)
Пример #27
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Generate some useful quantile graphs and perhaps a few histograms too"
    )
    parser.add_argument("data_directory",
                        help="the location where we are storing the data.")
    args = parser.parse_args()

    not_enough_data = []

    def perform_analysis(az, insttype, data_before, data_after):
        data_before = PriceTimeSeriesNaive(data_before)
        data_after = PriceTimeSeriesNaive(data_after)

        days_required = 90
        bucket_interval = 350  # bucket interval in seconds
        bucket_before = PriceBucketNaive(
            data_before.expand_with_interval(bucket_interval))
        bucket_after = PriceBucketNaive(
            data_after.expand_with_interval(bucket_interval))

        if len(bucket_after.by_ts) == 0 or len(bucket_before.by_ts) == 0:
            return

        SECONDS_IN_DAY = 3600 * 24
        SIZE_REQUIREMENT = SECONDS_IN_DAY / bucket_interval * days_required
        if bucket_before.size() < SECONDS_IN_DAY or bucket_after.size(
        ) < SECONDS_IN_DAY:
            print("\tnot enough data for region")
        bucket_before = bucket_before.shrink_to_size(min_size)
        bucket_after = bucket_after.shrink_to_size(min_size)

        mean = bucket_before.getAverage()
        median = bucket_before.getQuantile(0.5)["price"]

        print("\tprice window duration: %f" %
              ((bucket_before.by_ts[-1]["timestamp"] -
                bucket_before.by_ts[0]["timestamp"]) / (3600 * 24)))
        print("\tmean before/after: %f/%f" % (float(
            bucket_before.getAverage()), float(bucket_after.getAverage())))
        print("\tmedian before/after: %f/%f" %
              (float(bucket_after.getQuantile(0.5)["price"]),
               bucket_after.getQuantile(0.5)["price"]))

        total_cost_before = sum(r["price"] for r in bucket_before.by_ts)
        print("\ttotal/avg cost before: %f/%f" %
              (total_cost_before,
               total_cost_before / float(len(bucket_before.by_ts))))

        total_cost_after = sum(r["price"] for r in bucket_after.by_ts)
        print("\ttotal/avg cost after: %f/%f" %
              (total_cost_after,
               total_cost_after / float(len(bucket_before.by_ts))))

        print("\ttotal/avg price difference: %f/%f" %
              (total_cost_after - total_cost_before,
               (total_cost_after - total_cost_before) /
               float(len(bucket_before.by_ts))))

        # return the result object containing aggregates of the values we computed
        return {
            "key": (az, insttype),
            "'popularity'": len(data_before.data) + len(data_after.data),
            "total_cost_before": total_cost_before,
            "avg_cost_before":
            total_cost_before / float(len(bucket_before.by_ts)),
            "total_cost_after": total_cost_after,
            "avg_cost_after":
            total_cost_after / float(len(bucket_before.by_ts))
        }

    def transform_query_results(results):
        return ({
            "timestamp": result[0],
            "price": result[1]
        } for result in results)

    results = []

    with Dataset(args.data_directory) as dataset:
        count = 0
        for az, insttype in dataset.get_databases(
        ):  # misleading function name
            conn = dataset.open(az, insttype)

            # compute the most recent timestamp so we can determine a good
            # duration for the intervals we will be examining
            c = conn.cursor()
            c.execute("SELECT MAX(timestamp) AS timestamp FROM prices")
            most_recent_ts = c.fetchone()[0]
            interval_duration = most_recent_ts - AFTER_START_EPOCH

            if interval_duration < 0: continue

            # fetch the data for the before window
            c = conn.cursor()
            c.execute(
                "SELECT timestamp, price FROM prices  "
                " WHERE timestamp > %d AND timestamp < %d ORDER BY timestamp" %
                (BEFORE_END_EPOCH - interval_duration, BEFORE_END_EPOCH))
            data_before = list(transform_query_results(c.fetchall()))
            c = conn.cursor()
            c.execute("SELECT timestamp, price FROM prices " +
                      " WHERE timestamp > %d ORDER BY timestamp " %
                      (AFTER_START_EPOCH))
            data_after = list(transform_query_results(c.fetchall()))

            if len(data_before) == 0 or len(data_after) == 0: continue

            print("analyzing data for %s - %s" % (az, insttype))

            results.append(
                perform_analysis(az, insttype, data_before, data_after))
            # count += 1
            # if count > 100:
            #     break

    results = list(filter(lambda x: x is not None, results))
    results.sort(key=lambda r: r["'popularity'"])

    print("Information filtered by instance popularity")

    def aggregate_dictionary(dicts, aggfunc):
        return {
            key: aggfunc([d[key] for d in dicts])
            for key in sorted(dicts[0].keys())
            if type(dicts[0][key]) == int or type(dicts[0][key]) == float
        }

    def aggmedian(values):
        return sorted(values)[len(values) // 2]

    def aggaverage(values):
        return sum(values) / float(len(values))

    def print_results(popdict, unpopdict):
        for key in popdict:
            print("\t%s popular/unpopular: %f/%f" %
                  (key, popdict[key], unpopdict[key]))

    popular_inst_types = [
        "m1.small", "m1.large", "t1.micro", "m1.medium", "c1.xlarge",
        "c1.medium"
    ]
    results_popular = list(
        filter(lambda x: x["key"][1] in popular_inst_types, results))
    results_unpopular = list(
        filter(lambda x: x["key"][1] not in popular_inst_types, results))

    print("results (median, hard coded popular instance types)")
    print("%d popular results found" % (len(results_popular)))
    pop_results = aggregate_dictionary(results_popular, aggaverage)
    unpop_results = aggregate_dictionary(results_unpopular, aggaverage)
    print_results(pop_results, unpop_results)