Пример #1
0
def main():
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--discard-garbage',
                        dest='discard_garbage',
                        action='store_true')
    parser.add_argument('--handbuilt', dest='handbuilt_fn', required=True)
    parser.add_argument('pwgs_ssm_fn')
    parser.add_argument('pwgs_params_fn')
    parser.add_argument('pairtree_ssm_fn')
    parser.add_argument('pairtree_params_fn')
    args = parser.parse_args()

    tree_type = 'handbuilt.xeno'
    hb = load_handbuilt(args.handbuilt_fn, tree_type)
    clusters = convert_clusters(hb['clusters'])
    garbage = hb['garbage']
    # Since we remove the empty first cluster, the indexing on `structure` is now
    # a little weird -- cluster `i` is now represented by `i + 1` in `structure`.
    # That's okay.
    adjl = hb['structure']
    parents = convert_adjl_to_parents(adjl)

    pwgs_params = inputparser.load_params(args.pwgs_params_fn)
    variants = load_phylowgs(args.pwgs_ssm_fn)
    if args.discard_garbage:
        remove_garbage(variants, garbage)
        variants, clusters = make_varids_contiguous(variants, garbage,
                                                    clusters)
        garbage = []

    inputparser.write_ssms(variants, args.pairtree_ssm_fn)
    write_pairtree_params(pwgs_params['samples'], garbage, clusters, parents,
                          args.pairtree_params_fn)
Пример #2
0
def main():
  parser = argparse.ArgumentParser(
    description='LOL HI THERE',
    formatter_class=argparse.ArgumentDefaultsHelpFormatter
  )
  parser.add_argument('--counts', required=True)
  parser.add_argument('in_ssm_fn')
  parser.add_argument('in_params_fn')
  parser.add_argument('out_base')
  args = parser.parse_args()

  random.seed(1337)

  counts = [int(C) for C in args.counts.split(',')]
  assert len(counts) == len(set(counts))
  ssms = inputparser.load_ssms(args.in_ssm_fn)
  params = inputparser.load_params(args.in_params_fn)
  sampnames = params['samples']

  # Always include diagnosis sample, on assumption we're working with
  # SJbALL022609 from Steph for the paper congraph figure.
  subsets = _select_samp_subsets(sampnames, counts, all_must_include=['D'])
  for subset in subsets:
    idxs = _find_idxs(sampnames, subset)
    new_ssms = _filter_ssms(ssms, idxs)
    new_params = dict(params)
    new_params['samples'] = subset

    out_base = '%s_S%s' % (args.out_base, len(subset))
    inputparser.write_ssms(new_ssms, out_base + '.ssm')
    with open(out_base + '.params.json', 'w') as F:
      json.dump(new_params, F)
Пример #3
0
def main():
  parser = argparse.ArgumentParser(
    description='LOL HI THERE',
    formatter_class=argparse.ArgumentDefaultsHelpFormatter
  )
  parser.add_argument('in_ssm_fn')
  parser.add_argument('out_ssm_fn')
  args = parser.parse_args()

  np.set_printoptions(linewidth=400, precision=3, threshold=sys.maxsize, suppress=True)
  np.seterr(divide='raise', invalid='raise', over='raise')

  ssms = inputparser.load_ssms(args.in_ssm_fn)
  fixed_prop = _fix_omegas(ssms, print_bad=False)
  print('fixed_omegas=%s' % fixed_prop)
  inputparser.write_ssms(ssms, args.out_ssm_fn)
Пример #4
0
def _process(ssmfn, jsonfn, order):
    params = inputparser.load_params(jsonfn)
    ssms = inputparser.load_ssms(ssmfn)

    order = [int(idx) for idx in order.split(',')]
    N = len(params['samples'])
    assert set(range(N)) == set(order)
    assert len(list(ssms.values())[0]['var_reads']) == N

    params['samples'] = [params['samples'][idx] for idx in order]
    for vid in ssms.keys():
        for K in ('var_reads', 'ref_reads', 'total_reads', 'vaf', 'omega_v'):
            ssms[vid][K] = ssms[vid][K][order]

    with open(jsonfn, 'w') as F:
        json.dump(params, F)
    inputparser.write_ssms(ssms, ssmfn)
Пример #5
0
def _process(ssmfn, jsonfn, to_remove):
  params = inputparser.load_params(jsonfn)
  ssms = inputparser.load_ssms(ssmfn)

  to_remove = set([int(idx) for idx in to_remove.split(',')])
  N = len(params['samples'])
  all_samps = set(range(N))
  assert to_remove.issubset(all_samps)
  to_keep = sorted(all_samps - to_remove)
  assert len(to_keep) > 0

  params['samples'] = [params['samples'][idx] for idx in to_keep]
  for vid in ssms.keys():
    for K in ('var_reads', 'ref_reads', 'total_reads', 'vaf', 'omega_v'):
      ssms[vid][K] = ssms[vid][K][to_keep]

  with open(jsonfn, 'w') as F:
    json.dump(params, F)
  inputparser.write_ssms(ssms, ssmfn)
def main():
    parser = argparse.ArgumentParser(
        description='LOL HI THERE',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--remove-small-clusters', action='store_true')
    parser.add_argument('inssm_fn')
    parser.add_argument('outssm_fn')
    parser.add_argument('outparams_fn')
    args = parser.parse_args()

    variants = load_ssms(args.inssm_fn)
    clusters = extract_clusters(variants)
    clusters = sort_clusters_by_vaf(clusters, variants)
    inputparser.write_ssms(variants, args.outssm_fn)

    if args.remove_small_clusters:
        clusters, garbage = remove_small_clusters(clusters)
    else:
        garbage = []
    sampnames = make_sampnames(variants)
    write_params(clusters, garbage, sampnames, args.outparams_fn)
Пример #7
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Find variants with likely incorrect var_read_prob by comparing model with provided var_read_prob to haploid (LOH) model using Bayes factors',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        '--logbf-threshold',
        type=float,
        default=10.,
        help=
        'Logarithm of Bayes factor threshold at which the haploid model is accepted as more likely model than the model using the provided var_read_prob'
    )
    parser.add_argument('--verbose',
                        action='store_true',
                        help='Print debugging messages')
    parser.add_argument(
        '--ignore-existing-garbage',
        action='store_true',
        help=
        'Ignore any existing garbage variants listed in in_params_fn and test all variants. If not specified, any existing garbage variants will be kept as garbage and not tested again.'
    )
    parser.add_argument('--action',
                        choices=('add_to_garbage', 'modify_var_read_prob'),
                        default='add_to_garbage')
    parser.add_argument('--var-read-prob-alt', type=float, default=1.)
    parser.add_argument('in_ssm_fn', help='Input SSM file with mutations')
    parser.add_argument(
        'in_params_fn',
        help=
        'Input params file listing sample names and any existing garbage mutations'
    )
    parser.add_argument(
        'out_ssm_fn',
        help='Output SSM file with modified list of garbage mutations')
    parser.add_argument(
        'out_params_fn',
        help='Output params file with modified list of garbage mutations')
    args = parser.parse_args()

    np.set_printoptions(linewidth=400,
                        precision=3,
                        threshold=sys.maxsize,
                        suppress=True)
    np.seterr(divide='raise', invalid='raise', over='raise')

    if args.ignore_existing_garbage:
        variants, params = inputparser.load_ssms_and_params(args.in_ssm_fn,
                                                            args.in_params_fn,
                                                            remove_garb=False)
        params['garbage'] = []
    else:
        variants, params = inputparser.load_ssms_and_params(
            args.in_ssm_fn, args.in_params_fn)

    bad_vids, bad_samp_prop = _remove_bad(variants, args.logbf_threshold,
                                          args.var_read_prob_alt, args.verbose)
    bad_ssm_prop = len(bad_vids) / len(variants)

    if args.action == 'add_to_garbage':
        params['garbage'] = common.sort_vids(
            set(bad_vids) | set(params['garbage']))
    elif args.action == 'modify_var_read_prob':
        for vid in bad_vids:
            variants[vid]['omega_v'][:] = args.var_read_prob_alt
    else:
        raise Exception('Unknown action: %s' % args.action)

    inputparser.write_ssms(variants, args.out_ssm_fn)
    with open(args.out_params_fn, 'w') as F:
        json.dump(params, F)

    stats = {
        'num_bad_ssms': len(bad_vids),
        'bad_ssms': common.sort_vids(bad_vids),
        'bad_samp_prop': '%.3f' % bad_samp_prop,
        'bad_ssm_prop': '%.3f' % bad_ssm_prop,
    }
    for K, V in stats.items():
        print('%s=%s' % (K, V))