Exemplo n.º 1
0
    def quantiles(self, *quantiles):
        """Return the estimated data value for the given quantile(s).

        The requested quantile(s) must be between 0 and 1. Note that even if a
        single quantile is input, a list is always returned.
        """
        temp = bin_sums(self.bins)
        sums = list(accumulate(temp))
        result = []
        for x in quantiles:
            target_sum = x * self.total
            if x <= 0:
                qq = self._min
            elif x >= self.total:
                qq = self._max
            else:
                index = bisect_left(sums, target_sum)
                bin_i = self.bins[index]
                if index < len(sums):
                    bin_i1 = self.bins[index+1]
                else:
                    bin_i1 = self.bins[index]
                if index:
                    prev_sum = sums[index-1]
                else:
                    prev_sum = 0.0
                qq = _compute_quantile(target_sum, bin_i, bin_i1, prev_sum+1)
            result.append(qq)
        return result
Exemplo n.º 2
0
    def quantiles(self, *quantiles):
        """Return the estimated data value for the given quantile(s).

        The requested quantile(s) must be between 0 and 1. Note that even if a
        single quantile is input, a list is always returned.
        """
        temp = bin_sums(self.bins)
        sums = list(accumulate(temp))
        result = []
        for x in quantiles:
            target_sum = x * self.total
            if x <= 0:
                qq = self._min
            elif x >= self.total:
                qq = self._max
            else:
                index = bisect_left(sums, target_sum)
                bin_i = self.bins[index]
                if index < len(sums):
                    bin_i1 = self.bins[index + 1]
                else:
                    bin_i1 = self.bins[index]
                if index:
                    prev_sum = sums[index - 1]
                else:
                    prev_sum = 0.0
                qq = _compute_quantile(target_sum, bin_i, bin_i1, prev_sum + 1)
            result.append(qq)
        return result
Exemplo n.º 3
0
    def generate(self, ffrom, tto):
        period_type = self.get_period_type()

        sum_type = self.config.get("sum_type", "sum")

        ret, new_ffrom, new_tto = self._reused_data(ffrom, tto)
        if new_ffrom is None and new_tto is None:  # full reuse - ret == old_data
            return ret

        query = self._get_query()

        with self.db.cursor() as cursor:
            for period_from in generate_date_series(new_ffrom, new_tto,
                                                    period_type):
                period_to = calculate_period_to(period_from, period_type)
                params = {
                    'ffrom': period_from,
                    'tto': period_to,
                    "period": period_type
                }
                params.update(self.get_params())
                try:
                    cursor.execute(query, params)
                except:
                    import ipdb
                    ipdb.set_trace()
                rrow = cursor.fetchone()
                if not rrow:
                    ret[period_from] = 0
                else:
                    ret[period_from] = rrow["value"]

            totalized_movs = sorted(ret.items(), key=lambda k_v: k_v[0])

        accumulate(totalized_movs, sum_type)

        if [row for row in totalized_movs if row[1] is None]:
            import ipdb
            ipdb.set_trace()

        data = [{
            "label": row[0].strftime("%Y-%m-%d"),
            "data": float(row[1])
        } for row in totalized_movs]
        return data
Exemplo n.º 4
0
    def generate(self, ffrom, tto):
        period_type = self.get_period_type()

        sum_type = self.config.get("sum_type", "sum")

        ret, new_ffrom, new_tto = self._reused_data(ffrom, tto)
        if new_ffrom is None and new_tto is None:  # full reuse - ret == old_data
            return ret

        query = self._get_query()

        with self.db.cursor() as cursor:
            params = {
                'ffrom': new_ffrom,
                'tto': new_tto,
                "period": period_type
            }
            params.update(self.get_params())
            try:
                cursor.execute(query, params)
            except:
                import ipdb
                ipdb.set_trace()
                raise
            for row in cursor.fetchall():
                ret[ensure_date(row["period"])] = row["value"]
            # Fill missing dates with zeros
            missing_dates = [
                d
                for d in generate_date_series(new_ffrom, new_tto, period_type)
                if d not in ret
            ]
            for missing in missing_dates:
                ret[missing] = 0

            totalized_movs = sorted(ret.items(), key=lambda k_v: k_v[0])

        accumulate(totalized_movs, sum_type)

        data = [{
            "label": row[0].strftime("%Y-%m-%d"),
            "data": float(row[1])
        } for row in totalized_movs]
        return data
Exemplo n.º 5
0
def calculate_gae(rewards, values, gamma, lamb, normalize=True):
    td_errors = calculate_td_errors(rewards, values, gamma)
    discount_rate = gamma * lamb
    advantages = accumulate(td_errors, discount_rate)
    future_returns = calculate_future_rewards(rewards, gamma)

    if normalize:
        advantages = batch_normalize(advantages)

    return advantages, future_returns
Exemplo n.º 6
0
    def generate(self, ffrom, tto):
        account_codes = self.config["account_codes"].split(",")

        accounts_plan_id = self.config["accounts_plan_id"]

        journal = self.config["journal"]
        sum_type = self.config["sum_type"]

        sign = self.config["sign"]
        account_ids = self._get_account_ids(accounts_plan_id, account_codes)
        totalized_movs = self._sumarize_movements(ffrom, tto, account_ids,
                                                  journal)

        accumulate(totalized_movs, sum_type)

        data = [{
            "label": row[0].strftime("%Y-%m-%d"),
            "data": sign * int(row[1])
        } for row in totalized_movs]
        return data
Exemplo n.º 7
0
 def get(self):
     btc_rpc_connection = AuthServiceProxy(BTC_RPC_URL)#todo-junying-20180325
     try:
         addr = self.get_argument("address")
         data = BTC_ListUTXO.utxo(btc_rpc_connection,addr)
         if not data: self.write(json.dumps(BaseHandler.error_ret_with_data("utxo no available")))
         from utils import accumulate
         self.write(json.dumps(BaseHandler.success_ret_with_data(accumulate(data)), default=decimal_default))
     except Exception as e:
         self.write(json.dumps(BaseHandler.error_ret_with_data("error: %s"%e)))
         print("BTC_GetBalance error:{0} in {1}".format(e,get_linenumber()))
Exemplo n.º 8
0
 def get(self):
     btc_rpc_connection = AuthServiceProxy(RPC_URL)
     try:
         addr = self.get_argument("address")
         data = BTC_ListUTXO.utxo(btc_rpc_connection, addr)
         if not data:
             self.write(json.dumps(BaseHandler.error_ret_with_data("0")))
             return
         from utils import accumulate
         self.write(
             json.dumps(BaseHandler.success_ret_with_data('%.8f' %
                                                          accumulate(data)),
                        default=decimal_default))
     except Exception as e:
         self.write(
             json.dumps(BaseHandler.error_ret_with_data("error: %s" % e)))
         logging.error("BTC_GetBalance error:{0} in {1}".format(
             e, get_linenumber()))
Exemplo n.º 9
0
    def get(self):
        omni_rpc_connection = AuthServiceProxy(OMNI_RPC_URL)
        try:

            addr = self.get_argument("address")
            #print("addr" + str(addr))
            data = uBTC_ListUTXO.utxo(omni_rpc_connection,
                                      self.get_argument("address"), 0, 99999)
            if not data:
                self.write(
                    json.dumps(
                        BaseHandler.error_ret_with_data("utxo no available")))
                return
            from utils import accumulate
            self.write(
                json.dumps(BaseHandler.success_ret_with_data('%.8f' %
                                                             accumulate(data)),
                           default=decimal_default))
        except Exception as e:
            self.write(
                json.dumps(BaseHandler.error_ret_with_data("error: %s" % e)))
            logging.error("uBTC_GetBalance error:{0} in {1}".format(
                e, get_linenumber()))
Exemplo n.º 10
0
def train(P, opt, models, optimizers, train_loader, logger):
    generator, discriminator, GD, g_ema = models
    opt_G, opt_D = optimizers

    losses = {
        'G_loss': [],
        'D_loss': [],
        'D_penalty': [],
        'D_real': [],
        'D_gen': [],
        'D_r1': []
    }
    metrics = {}

    metrics['image_grid'] = ImageGrid(volatile=P.no_gif)
    metrics['fixed_gen'] = FixedSampleGeneration(g_ema, volatile=P.no_gif)
    if not P.no_fid:
        metrics['fid_score'] = FIDScore(opt['dataset'], opt['fid_size'],
                                        P.n_eval_avg)

    logger.log_dirname("Steps {}".format(P.starting_step))

    for step in range(P.starting_step, opt['max_steps'] + 1):
        d_regularize = (step % P.d_reg_every == 0) and (P.lbd_r1 > 0)

        if P.use_warmup:
            _update_warmup(opt_G, step, opt["warmup"], opt["lr"])
            _update_warmup(opt_D, step, opt["warmup"], opt["lr_d"])
        if (not P.use_warmup) or step > opt["warmup"]:
            cur_lr_g = _update_lr(opt_G, step, opt["batch_size"],
                                  P.halflife_lr, opt["lr"])
            cur_lr_d = _update_lr(opt_D, step, opt["batch_size"],
                                  P.halflife_lr, opt["lr_d"])
            if cur_lr_d and cur_lr_g:
                logger.log('LR Updated: [G %.5f] [D %.5f]' %
                           (cur_lr_g, cur_lr_d))

        do_ema = (step * opt['batch_size']) > (P.ema_start_k * 1000)
        accum = P.accum if do_ema else 0
        accumulate(g_ema, generator, accum)

        generator.train()
        discriminator.train()

        images, labels = next(train_loader)
        images = images.cuda()

        set_grad(generator, True)
        set_grad(discriminator, False)

        d_gen = GD(P, images, train_G=True)
        g_loss = _loss_G_fn(d_gen)

        opt_G.zero_grad()
        g_loss.backward()
        opt_G.step()
        losses['G_loss'].append(g_loss.item())

        set_grad(generator, False)
        set_grad(discriminator, True)

        d_all, view_r, view_f = GD(P, images)
        d_loss, aux = _loss_D_fn(P, d_all, view_r, view_f)
        loss = d_loss + aux['penalty']

        if d_regularize:
            r1 = GD(P, images, return_r1_loss=True).mean()
            lazy_r1 = (0.5 * P.lbd_r1) * r1 * P.d_reg_every
            loss = loss + lazy_r1
            losses['D_r1'].append(r1.item())

        opt_D.zero_grad()
        loss.backward()
        opt_D.step()
        losses['D_loss'].append(d_loss.item())
        losses['D_real'].append(aux['d_real'].item())
        losses['D_gen'].append(aux['d_gen'].item())
        losses['D_penalty'].append(aux['penalty'].item())

        for i in range(opt['n_critic'] - 1):
            images, labels = next(train_loader)
            images = images.cuda()

            d_all, view_r, view_f = GD(P, images)
            d_loss, aux = _loss_D_fn(P, d_all, view_r, view_f)
            loss = d_loss + aux['penalty']

            opt_D.zero_grad()
            loss.backward()
            opt_D.step()

        generator.eval()
        discriminator.eval()

        if step % P.print_every == 0:
            logger.log('[Steps %7d] [G %.3f] [D %.3f]' %
                       (step, losses['G_loss'][-1], losses['D_loss'][-1]))
            for name in losses:
                values = losses[name]
                if len(values) > 0:
                    logger.scalar_summary('gan/train/' + name, values[-1],
                                          step)

        if step % P.evaluate_every == 0:
            logger.log_dirname("Steps {}".format(step + 1))
            fid_score = metrics.get('fid_score')
            fixed_gen = metrics.get('fixed_gen')
            image_grid = metrics.get('image_grid')

            if fid_score:
                fid_avg = fid_score.update(step, g_ema)
                fid_score.save(logger.logdir +
                               f'/results_fid_{P.eval_seed}.csv')
                logger.scalar_summary('gan/test/fid', fid_avg, step)
                logger.scalar_summary('gan/test/fid/best', fid_score.best,
                                      step)

            if not P.no_gif:
                _ = fixed_gen.update(step)
                imageio.mimsave(
                    logger.logdir + f'/training_progress_{P.eval_seed}.gif',
                    fixed_gen.summary())
            aug_grid = image_grid.update(step, P.augment_fn(images))
            imageio.imsave(logger.logdir + f'/real_augment_{P.eval_seed}.jpg',
                           aug_grid)

            G_state_dict = generator.state_dict()
            D_state_dict = discriminator.state_dict()
            Ge_state_dict = g_ema.state_dict()
            torch.save(G_state_dict, logger.logdir + '/gen.pt')
            torch.save(D_state_dict, logger.logdir + '/dis.pt')
            torch.save(Ge_state_dict, logger.logdir + '/gen_ema.pt')
            if fid_score and fid_score.is_best:
                torch.save(G_state_dict, logger.logdir + '/gen_best.pt')
                torch.save(D_state_dict, logger.logdir + '/dis_best.pt')
                torch.save(Ge_state_dict, logger.logdir + '/gen_ema_best.pt')
            if step % P.save_every == 0:
                torch.save(G_state_dict, logger.logdir + f'/gen_{step}.pt')
                torch.save(D_state_dict, logger.logdir + f'/dis_{step}.pt')
                torch.save(Ge_state_dict,
                           logger.logdir + f'/gen_ema_{step}.pt')
            torch.save(
                {
                    'epoch': step,
                    'optim_G': opt_G.state_dict(),
                    'optim_D': opt_D.state_dict(),
                }, logger.logdir + '/optim.pt')
Exemplo n.º 11
0
    return user


def get_password():
    pwd = os.environ.get("spark_password", None)
    if pwd is None:
        pwd = getpass("No password configured, Type password: "******"__main__":
    username = get_username()
    password = get_password()

    sparkClient = SparkClient(username, password)
    account = sparkClient.get_account()
    balance = sparkClient.get_balance(account.key)
    holdings = sparkClient.get_holdings(account.key)

    print(account)
    print_currency(prefix="Portfolio value", value=balance.portfolio_value)
    print_currency(prefix="Holdings value",
                   value=accumulate(holdings, lambda a: a.current_value))
    print_currency(prefix="Accumulative profit",
                   value=accumulate(holdings, lambda a: a.profit),
                   text_format=red_green_color)
    print_currency(prefix="Remaining cash", value=balance.remaining_cash)

    for holding in holdings:
        print(holding)
Exemplo n.º 12
0
    def run(self):
        try:
            # setting variables and constants
            model = self.model
            generator = model.generator.train()
            g_running = model.g_running
            discriminator = model.discriminator
            n_frames_discriminator = model.n_frames_discriminator
            g_optimizer = model.g_optimizer
            d_optimizer = model.d_optimizer
            nfd_optimizer = model.nfd_optimizer
            used_samples = model.used_samples
            step = model.step
            resolution = model.resolution
            iteration = model.iteration

            n_critic = constants.N_CRITIC

            config = self.config
            code_size = config.get('code_size', constants.DEFAULT_CODE_SIZE)
            lr = config.get('lr', constants.LR)
            batch_size = config.get('batch_size', constants.BATCH_SIZE)
            init_size = config.get('init_size', constants.INIT_SIZE)
            n_gen_steps = config.get('n_gen_steps', 1)
            max_size = config['max_size']
            max_iterations = config.get('max_iterations',
                                        constants.MAX_ITERATIONS)
            samples_per_phase = config['samples_per_phase']
            loss_fn = config['loss_fn']

            n_frames_params = config.get('n_frames_params', dict())
            n_frames = n_frames_params.get('n', 1)
            n_frames_loss_coef = n_frames_params.get('loss_coef', 0)
            n_frames_final_freq = n_frames_params.get('final_freq', 0)
            n_frames_decay_duration = n_frames_params.get('decay_duration', 0)
            crop_freq = n_frames_params.get('crop_freq', 0)
            mixing = config.get('mixing', False)

            # getting data
            cur_batch_size = batch_size[resolution]
            images_dataloader = CycleLoader(self.images_dataset,
                                            cur_batch_size, resolution)

            if n_frames_loss_coef > 0:
                n_frames_dataloader = CycleLoader(self.n_frames_dataset,
                                                  cur_batch_size, resolution)
                if crop_freq > 0:
                    n_crops_dataloader = CycleLoader(self.n_crops_dataset,
                                                     cur_batch_size,
                                                     resolution)

            if iteration == 0:
                self.adjust_lr(lr, resolution)

            pbar = tqdm.trange(iteration, max_iterations, initial=iteration)

            requires_grad(generator, False)
            requires_grad(discriminator, True)

            discr_loss_val = 0
            gen_loss_val = 0
            grad_loss_val = 0

            max_step = int(math.log2(max_size)) - 2
            final_progress = False

            for iteration in pbar:
                model.iteration = iteration

                # update alpha, step and resolution
                alpha = min(1, 1 / samples_per_phase * (used_samples + 1))
                if resolution == init_size or final_progress:
                    alpha = 1
                if not final_progress and used_samples > samples_per_phase * 2:
                    LOGGER.debug(f'Used samples: {used_samples}.')
                    used_samples = 0
                    step += 1
                    if step > max_step:
                        step = max_step
                        final_progress = True
                        LOGGER.info('Final progress.')
                    else:
                        alpha = 0
                        LOGGER.info(
                            f'Changing resolution from {resolution} to {resolution * 2}.'
                        )
                    resolution = 4 * 2**step
                    model.step = step
                    model.resolution = resolution
                    model.used_samples = used_samples
                    LOGGER.debug(
                        f'Used samples on saving: {model.used_samples}.')
                    self.save_model(step=step)
                    self.adjust_lr(lr, resolution)

                    # setup loaderts
                    cur_batch_size = batch_size[resolution]
                    images_dataloader = CycleLoader(self.images_dataset,
                                                    cur_batch_size, resolution)
                    if n_frames_loss_coef > 0:
                        n_frames_dataloader = CycleLoader(
                            self.n_frames_dataset, cur_batch_size, resolution)
                        if crop_freq > 0:
                            n_crops_dataloader = CycleLoader(
                                self.n_crops_dataset, cur_batch_size,
                                resolution)

                # decide if need to use n_frames on this iteration
                if final_progress or n_frames_decay_duration == 0:
                    n_frames_freq = n_frames_final_freq
                else:
                    n_frames_freq = 0.5 - min(1, used_samples / n_frames_decay_duration) *\
                        (0.5 - n_frames_final_freq)
                n_frames_iteration = True if random.random(
                ) < n_frames_freq else False
                if n_frames_iteration:
                    cur_discr = n_frames_discriminator
                    cur_dataloader = n_frames_dataloader
                    cur_n_frames = n_frames
                    cur_d_optimizer = nfd_optimizer
                else:
                    cur_discr = discriminator
                    cur_dataloader = images_dataloader
                    cur_n_frames = 1
                    cur_d_optimizer = d_optimizer

                cur_discr.zero_grad()
                real_image = next(cur_dataloader)
                LOGGER.debug(f'n_frames iteration: {n_frames_iteration}')
                LOGGER.debug(f'cur_discr: {type(cur_discr.module)}')
                LOGGER.debug(
                    f'real_image shape {real_image.shape}; resolution {resolution}'
                )

                # discriminator step
                real_predict, real_grad_loss_val = discr_backward_real(
                    cur_discr, loss_fn, real_image, step, alpha)
                if mixing and random.random() < 0.9:
                    num_latents = 2
                else:
                    num_latents = 1
                LOGGER.debug(f'Batch size: {cur_batch_size}')
                latents = get_latents(cur_batch_size, code_size,
                                      2 * num_latents)
                gen_in1 = latents[:num_latents]
                gen_in2 = latents[num_latents:]
                LOGGER.debug(f'Latents shape: {gen_in1[0].shape}')
                fake_image = generator(gen_in1,
                                       step=step,
                                       alpha=alpha,
                                       n_frames=cur_n_frames)

                crop_iteration = False
                if n_frames_iteration:
                    if random.random() < crop_freq:
                        crop_iteration = True
                        fake_image = next(n_crops_dataloader)
                discr_loss_val, fake_grad_loss_val = discr_backward_fake(
                    cur_discr, loss_fn, fake_image, real_image, real_predict,
                    step, alpha, False)
                grad_loss_val = real_grad_loss_val or fake_grad_loss_val
                cur_d_optimizer.step()

                # generator step
                if (iteration + 1) % n_critic == 0:
                    for gen_step in range(n_gen_steps):
                        generator.zero_grad()

                        requires_grad(generator, True)
                        requires_grad(cur_discr, False)

                        fake_image = generator(gen_in2,
                                               step=step,
                                               alpha=alpha,
                                               n_frames=cur_n_frames)
                        LOGGER.debug(
                            f'fake image shape when gen {fake_image.shape}')

                        predict = cur_discr(fake_image, step=step, alpha=alpha)
                        if loss_fn == 'wgan-gp':
                            loss = -predict.mean()
                        elif loss_fn == 'r1':
                            loss = F.softplus(-predict).mean()

                        if n_frames_iteration:
                            loss *= n_frames_loss_coef
                        gen_loss_val = loss.item()

                        loss.backward()
                        g_optimizer.step()
                        LOGGER.debug('generator optimizer step')
                        accumulate(to_model=g_running,
                                   from_model=generator.module)

                        requires_grad(generator, False)
                        requires_grad(cur_discr, True)

                used_samples += real_image.shape[0]
                model.used_samples = used_samples

                if (iteration + 1) % constants.SAMPLE_FREQUENCY == 0:
                    LOGGER.info(
                        f'Saving samples on {iteration + 1} iteration.')
                    save_sample(generator=g_running,
                                alpha=alpha,
                                step=step,
                                code_size=code_size,
                                resolution=resolution,
                                save_dir=os.path.join(self.sample_dir),
                                name=f'{str(iteration + 1).zfill(6)}',
                                sample_size=constants.SAMPLE_SIZE,
                                images_n_frames=n_frames,
                                video_n_frames=32)

                if (iteration + 1) % constants.SAVE_FREQUENCY == 0:
                    self.save_model(iteration=iteration + 1)

                if n_frames_iteration:
                    prefix = 'NF'
                    suffix = 'n_frames'
                else:
                    prefix = ''
                    suffix = 'loss'

                state_msg = f'Size: {resolution}; {prefix}G: {gen_loss_val:.3f}; {prefix}D: {discr_loss_val:.3f}; ' +\
                            f'{prefix}Grad: {grad_loss_val:.3f}; Alpha: {alpha:.5f}'
                pbar.set_description(state_msg)

                if iteration % constants.LOG_LOSS_FREQUENCY == 0:
                    self.summary_writer.add_scalar('size', resolution,
                                                   iteration)
                    self.summary_writer.add_scalar(f'G/{suffix}', gen_loss_val,
                                                   iteration)
                    self.summary_writer.add_scalar(f'D/{suffix}',
                                                   discr_loss_val, iteration)
                    self.summary_writer.add_scalar(f'Grad/{suffix}',
                                                   grad_loss_val, iteration)
                    self.summary_writer.add_scalar('alpha', alpha, iteration)
                    if n_frames_iteration and crop_freq > 0:
                        if crop_iteration:
                            suffix = 'crop'
                        else:
                            suffix = 'no_crop'
                        self.summary_writer.add_scalar(f'D/{suffix}',
                                                       discr_loss_val,
                                                       iteration)

        except KeyboardInterrupt:
            LOGGER.warning('Interrupted by user')
            self.save_model(iteration=iteration)
Exemplo n.º 13
0
def calculate_future_rewards(rewards, gamma):
    """rewards is list of episodes where length of list is max_episode_length.
    And each elements is rewards of each batches. So, shape of the rewards becomes [max_episode_length, batch_size]"""
    return accumulate(rewards, gamma)
Exemplo n.º 14
0
    def generate(self, ffrom, tto):
        sucursal_id = self.config["sucursal"]
        sucursal_negated = False
        if sucursal_id == "all":
            sucursal_id = None
        elif sucursal_id.startswith("-"):
            sucursal_negated = True
        period_type = self.config["period"]

        ret, new_ffrom, new_tto = self._reused_data(ffrom, tto)
        if new_ffrom is None and new_tto is None:  # full reuse - ret == old_data
            return ret

        sum_type = self.config["sum_type"]

        if sucursal_negated:
            equal = "!="
        else:
            equal = "="

        query = """SELECT date_trunc('%s', S.real_date) AS period, SUM(S.amount) AS amount
            FROM (
                SELECT D.real_date, getBuyQuotationAt(SB.currency_id, D.real_date) * SB.amount AS amount
                FROM sale_bill SB
                    INNER JOIN document D ON D.document_id = SB.document_id
                WHERE NOT SB.cancelled AND
                    (%%(sucursal_id)s IS NULL OR D.sucursal_id %s %%(sucursal_id)s) AND
                    D.real_date BETWEEN %%(ffrom)s AND %%(tto)s
                UNION ALL
                SELECT D.real_date, getBuyQuotationAt(SCN.currency_id, D.real_date) * -SCN.amount AS amount
                FROM sale_credit_note SCN
                    INNER JOIN document D ON D.document_id = SCN.document_id
                WHERE NOT SCN.cancelled AND
                    (%%(sucursal_id)s IS NULL OR D.sucursal_id %s %%(sucursal_id)s) AND
                    D.real_date BETWEEN %%(ffrom)s AND %%(tto)s
                ) AS S
            GROUP BY period
            ORDER BY period
            """ % (period_type, equal, equal)
        with self.db.cursor() as cursor:
            cursor.execute(
                """
                SELECT generate_series AS period, 0 AS amount
                FROM generate_series(%%(ffrom)s, %%(tto)s, interval '1 %s')
                        """ % period_type, {
                    'ffrom': new_ffrom,
                    'tto': new_tto
                })
            ret.update(
                dict((ensure_date(row["period"]), row["amount"])
                     for row in cursor.fetchall()))

            cursor.execute(query, {
                "sucursal_id": sucursal_id,
                'ffrom': new_ffrom,
                'tto': new_tto
            })
            for row in cursor.fetchall():
                ret[ensure_date(row["period"])] = row["amount"]
            totalized_movs = sorted(ret.items(), key=lambda k_v: k_v[0])

        accumulate(totalized_movs, sum_type)

        data = [{
            "label": row[0].strftime("%Y-%m-%d"),
            "data": float(row[1])
        } for row in totalized_movs]
        return data
Exemplo n.º 15
0
def process_consecutive_blocks(contigs_group, soi, chr_, snp_threshold,
                               sample_list, num_of_hets, lods_cut_off,
                               writelod, maxed_as):

    #print()
    print(
        '  - Grouping the dataframe using unique "PI - phased index" values. ')
    ''' Step 02 - D: group dataframe again by "PI keys" of soi and then
       sort by minimum "POS" value for each "PI key".
       - This sorting is necessary because sometimes "haplotype blocks" are like 3-3-3-3  5-5-5  3-3-3-3
          - i.e there are small RBphased blocks within the boundry of larger RBphased block.
          - Not, sure what is causing this (prolly sampling difference of large vs. small chunks in PE reads)
          - This problem should go away in first round of haplotype-extension'''

    contigs_group = contigs_group. \
        assign(New=contigs_group.groupby([soi + ':PI']).
               POS.transform('min')).sort_values(['New', 'POS'])
    ''' Step 03: Now, start reading the "contigs_group" for haplotype-extension.
    A) Store the data as dictionary with 'header' values as keys. Some keys are: CHROM, POS, sample (PI, PG within sample),
       etc ... Then group the dictionary using unique "PI" values as 'keys' for grouping.
        Note: This dict-data should contain information about two adjacent haplotype blocks that needs extending.
        In this example I want to extend the haplotypes for "sample ms02g" which has two blocks 6 and 4.
        So, I read the PI and PG value for this sample. Also, data should store with some unique keys.
    B) Iterate over two consecutive Haplotype-Blocks at once.
        Note: While iterating over two blocks, initially we write the very first block of the "contig". With this
        method, now when we iterate over two consecutive blocks we can only update and write the second block.
        '''

    # covert pandas dataframe back to text like file before converting it into dictionary.
    contigs_group = pd.DataFrame.to_csv(contigs_group,
                                        sep='\t',
                                        index=False,
                                        header=True)
    ''' Step 03 - A : read the data with header as keys and groupby using that "keys" '''
    phased_dict = csv.DictReader(StringIO(contigs_group), delimiter='\t')
    phased_grouped = itertools.groupby(phased_dict,
                                       key=lambda x: x[soi + ':PI'])
    ''' Since the dictionary isn't ordered, we return the order using OrderedDictionary '''
    # ** for future: there is room for improvement in here (memory and speed)
    grouped_data = collections.OrderedDict()
    for key, grp in phased_grouped:
        grouped_data[key] = accumulate(grp)
    ''' Clear memory '''
    del phased_dict
    del phased_grouped
    del contigs_group

    #print()
    print('  - Starting MarkovChains for contig %s' % chr_)
    ''' Step 03 - B : now pipe the data for phase extension '''
    ''' Step 03 - B : And, iterate over two consecutive Haplotype-Blocks at once. This is done to obtain all
    possible Haplotype configurations between two blocks. The (keys,values) for first block is represented as
    k1,v2 and for the later block as k2,v2. '''
    ''' Step 03 - B (i): Before running consecutive blocks, we write data from the very first block to the file.
    Reason : Before we start computing and solving the haplotype phase state, we plan to write the
    data for very first block (k1, v1). So, after that, we can solve the relation between two consecutive..
    .. blocks but only write data from 2nd block each time - based on what relation comes out. '''
    very_first_block = [list(grouped_data.items())[0]]

    if len(list(grouped_data.items())) == 1:
        print('there is only one block, so skipping phase extension')

    # write header of the extended phase-block
    extended_haplotype = '\t'.join([
        'CHROM', 'POS', 'REF', 'all-alleles', soi + ':PI', soi + ':PG_al'
    ]) + '\n'

    if writelod == 'yes':  # add extra field if desired by user
        extended_haplotype = extended_haplotype.rstrip('\n') + '\tlog2odds\n'
        log2odds = ''

    # write data/values from very first block.
    for k1, v1 in very_first_block:
        for r1, vals in enumerate(v1[soi + ':PI']):
            new_line = '\t'.join([
                v1['CHROM'][r1], v1['POS'][r1], v1['REF'][r1],
                v1['all-alleles'][r1], v1[soi + ':PI'][r1],
                v1[soi + ':PG_al'][r1]
            ]) + '\n'
            if writelod == 'yes':
                new_line = new_line.rstrip('\n') + '\t.\n'

            extended_haplotype += new_line

        #print('very first block end\n\n')  # marker for debugging
    ''' Step 03 - B (ii):  Starting MarkovChains.
            Now, read data from two consecutive blocks at a time.
            Note: At the end of computation write the data only from each k2 block. No need to write the data
            from k1 block of each iteration because it was written in earlier loop.'''
    ''' Step 03 - B (ii - 1) Create empty "checker variables".
        Note: This checker variables (actually multi-level boolean logic) help to carryover information from
        ealier iteration of a for-loop - i.e identify if the values from later block i.e k2, v2 were phased to
        to earlier block (k1, v1) in "parallel" vs. "alternate configuration".
        - If two consecutive blocks are phased, k2_new is now assigned k1 from earlier block; else (if not phased)
          k2_new stays empty ('').
        - So, the role of flipped variable is to keep information if k2,v2 were phased straight vs. alternate
          compared to k1, v1 in the earlier run. These checker-variables are crucial to keep the proper phase-state
          in the output file.'''

    # start checker variables
    k2_new = ''  # updates the index of k2 for each k1,v1 ; k2,v2 run
    flipped = ''  # boolean logic to check and store if the phase state flipped during extension
    ''' Step 03 - B (ii - 2): Now, read two consecutive blocks at a time'''
    for (k1, v1), (k2,
                   v2) in zip(grouped_data.items(),
                              itertools.islice(grouped_data.items(), 1, None)):
        ''' Step 03 - B (ii - 2-A): iterate over the first Haplotype Block, i.e the k1 block.
        The nucleotides in the left of the phased SNPs are called Block01-haplotype-A,
        and similarly on the right as Block01-haplotype-B. '''

        # iterate over the first Haplotype Block, i.e the k1 block and v1 values
        hap_block1a = [x.split('|')[0] for x in v1[soi + ':PG_al']
                       ]  # the left haplotype of block01
        hap_block1b = [x.split('|')[1] for x in v1[soi + ':PG_al']]

        # iterate over the second Haplotype Block, i.e the k2 block and v2 values
        hap_block2a = [x.split('|')[0] for x in v2[soi + ':PG_al']]
        hap_block2b = [x.split('|')[1] for x in v2[soi + ':PG_al']]
        ''' Step 03 - B (ii - 2-B) : Create possible haplotype configurations for "forward markov chain".
        Possible haplotype Configurations will be, Either :

        1) Block01-haplotype-A phased with Block02-haplotype-A,
            creating -> hapb1a-hapb2a, hapb1b-hapb2b '''
        ''' First possible configuration '''
        hapb1a_hapb2a = [hap_block1a, hap_block2a]
        hapb1b_hapb2b = [hap_block1b, hap_block2b]
        ''' Or, Second Possible Configuration
        2) block01-haplotype-A phased with Block02-haplotype-B
            creating -> hapb1a-hapb2b, hapb1b-hapb2a '''
        hapb1a_hapb2b = [hap_block1a, hap_block2b]
        hapb1b_hapb2a = [hap_block1b, hap_block2a]
        ''' Step 03 - B (ii - 2-C) :
        Create possible haplotype configurations for "reverse markov chain"
        - reverse markov chain are added to increase the confidence in likelyhood estimation. '''

        # switch the keys values for reverse markov chain
        k1_r = k2
        k2_r = k1
        v1_r = v2
        v2_r = v1

        # switch the haplotype positions for preparing the reverse markov chains
        hapb1a_hapb2a_r = [hapb1a_hapb2a[1], hapb1a_hapb2a[0]]
        hapb1b_hapb2b_r = [hapb1b_hapb2b[1], hapb1b_hapb2b[0]]

        hapb1a_hapb2b_r = [hapb1a_hapb2b[1], hapb1a_hapb2b[0]]
        hapb1b_hapb2a_r = [hapb1b_hapb2a[1], hapb1b_hapb2a[0]]

        ################################# - inactive for now- can be used for adding SNP phasing later on.
        ''' skip if one of the keys has no values - this is redundant ?? - keep it for just in case situation
        ** can also be used in the future if we want to phase the SNPs that have no assigned 'PI' values,
        i.e the "PI" will be "." '''
        if k1 == '.' or k2 == '.':
            for xi in range(len(v2[soi + ':PI'])):
                new_line = '\t'.join([
                    v2['CHROM'][xi], v2['POS'][xi], v2['REF'][xi],
                    v2['all-alleles'][xi], k2,
                    hapb1a_hapb2a[1][xi] + '|' + hapb1b_hapb2b[1][xi]
                ]) + '\n'
                if writelod == 'yes':
                    new_line = new_line.rstrip('\n') + '\t.\n'

                extended_haplotype += new_line

            # update the values of checker variables
            k2_new = ''
            flipped = ''

            continue  # to next consecutive blocks
        ######################################################
        ''' Step 03 - C : Set the threshold for the minimum number of SNPs required in haplotype block
        before continuing phase extension. '''
        ''' If all the data in soi, in either v1 or v2 are SNPs below a certain threshold we just write
        the data and continue. i.e say if a Haplotype-Block is composed of only 2 SNPs it will be less
        reliable to extend the phase-state.
        - So, this step can also be used to control the minimum number/size of the haplotypes that is required
        before it can be phase-extended.
        - by default the minimum number of SNPs (exclusive) in the soi haplotype is set to 3.
        - If minimum requirement isn't met just skip extending the phase and write it to a file and continue. '''
        number_of_snp_in_soi_v1 = len(
            [x for x in v1[soi + ':PG_al'] if len(x) == 3])
        number_of_snp_in_soi_v2 = len(
            [x for x in v2[soi + ':PG_al'] if len(x) == 3])

        # print('number of SNPs: ', NumSNPsInsoi_v1, NumSNPsInsoi_v2)
        if number_of_snp_in_soi_v1 < snp_threshold \
                or number_of_snp_in_soi_v2 < snp_threshold:
            for xth, vals in enumerate(v2[soi + ':PI']):
                new_line = '\t'.join([
                    v2['CHROM'][xth], v2['POS'][xth], v2['REF'][xth],
                    v2['all-alleles'][xth], k2,
                    hapb1a_hapb2a[1][xth] + '|' + hapb1b_hapb2b[1][xth]
                ]) + '\n'
                if writelod == 'yes':
                    new_line = new_line.rstrip('\n') + '\t.\n'

                extended_haplotype += new_line

            # update values of the checker variables
            # this is important, so previous k2 and flip state doesn't get carried over without purpose
            k2_new = ''
            flipped = ''

            continue  # to next consecutive blocks
        ''' Step 04: For the consecutive blocks that pass the thresholds (SNP number, have PI != '.', etc.,
        pipe the data (k1, v1 ; k2, v2) to a defined function for computation of forward and reverse
        markov chain transition probabilities for these two consecutive blocks (k1, v1; k2, v2) '''

        #### for forward chain   ########
        # ** set "orientation=reversed" to compute transition ..
        # .. from the lower tip of former block with upper tip of later block
        # .. this helps in using the closest genomic position between consecutive blocks thus ..
        # .. downsizing the effects created by recombination.
        lhfc_f, lhsc_f = \
            compute_maxLh_score(soi, sample_list, k1, k2, v1, v2, num_of_hets,
                                      hapb1a_hapb2a, hapb1b_hapb2b,
                                      hapb1a_hapb2b, hapb1b_hapb2a, maxed_as, orientation=reversed)

        #### for reverse chain   ########
        # set "orientation=lambda..." just passes a null value keeping orientation as it is.
        lhfc_r, lhsc_r = compute_maxLh_score \
            (soi, sample_list, k1_r, k2_r, v1_r, v2_r, num_of_hets,
             hapb1a_hapb2a_r, hapb1b_hapb2b_r,
             hapb1a_hapb2b_r, hapb1b_hapb2a_r,maxed_as, orientation=lambda x: x)
        ''' Step 05-06 are inside the function "compute_maxLh_score()". The values
        (lhfc_f, lhsc_f, lhfc_r, lhsc_r) returned from this function is then used in Step 07. '''
        ''' Step 07 :  previous (Step 06) returns the likelyhoods and/or LODs score for both "parallel"
        and alternate configurations (for both forward and reverse algorithm).
        - We now extend the phase states by comparing LODs score against  cutoff-values.'''
        ''' Step 07 - A(i): calculate the average of the likelyhoods, odds and then log2 of odds. '''
        # average of the likelyhooods for first vs. second configuration
        # (from both forward and reverse algorithm)
        # ** note: "maxed_as" variable doesn't apply here, because maxLH using forward vs. reverse ..
        # .. are just re-estimates. So, we simply take and average on both "maxSum" and "maxPd"
        avg_lhfc = Decimal(lhfc_f + lhfc_r) / 2
        avg_lhsc = Decimal(lhsc_f + lhsc_r) / 2

        # therefore, odds of first_vs_second_configuration is
        odds_fc_vs_sc = avg_lhfc / avg_lhsc
        ''' Step 07 - A(ii) : convert the likelyhoods to odds-ratio and then logOf 2 odds'''
        lods2_score_1st_config = Decimal(odds_fc_vs_sc).ln() / (
            Decimal('2').ln())
        lods2_score_2nd_config = (-lods2_score_1st_config)

        #print('logOdds')  # marker for debugging
        #print(lods2_score_1st_config)
        ''' Step 07 - B : pipe the LOD scores and write the phase state between two consecutive blocks.
                - use "lods cutoff" to decide on phase extension
                - and then store, write it to files.
         ** We can also use accumulation of this stage to run histogram building at later stage.
            - that acculated "extended_haplotype" can be all written at once - this is important while multiprocessing. '''
        k2_new, flipped, extended_haplotype = extend_phase_state(
            soi, k1, k2, v1, v2, k2_new, flipped, lods2_score_1st_config,
            lods_cut_off, extended_haplotype, hapb1a_hapb2a, hapb1b_hapb2b,
            writelod)
        ''' Now, go to Step 08, function "extend_phase_state" '''
        # this process udates the data in "extended_haplotype" recursively on the for-loop

    # finally return the extended haplotype as pandas dataframe
    phase_extend = extended_haplotype

    del extended_haplotype

    return pd.read_table(StringIO(phase_extend), sep='\t')