示例#1
0
def write_zenodo_files(args, baseoutdir):
    for study, dset in [v.split('/') for v in args.varvals]:
        print '%-10s  %15s   %s' % (study, dset, baseoutdir)
        metafos = heads.read_metadata(study)
        for method in args.methods:
            outdir = get_outdir(args, baseoutdir, varname='data', varval='zenodo/%s/%s/%s' % (study_translations.get(study, study), dset, method.replace('-default', '')))
            print '  %-15s' % method
            write_single_zenodo_subdir(outdir, args, study, dset, method, metafos[dset])
def get_data_plots(args, baseoutdir, methods, study, dsets):
    metafos = heads.read_metadata(study)
    assert len(set([metafos[ds]['locus'] for ds in dsets
                    ]))  # make sure everybody has the same locus
    mfo = metafos[dsets[0]]
    data_outdirs = [
        heads.get_datadir(
            study, 'processed', extra_str='gls-gen-paper-' + args.label) +
        '/' + ds for ds in dsets
    ]
    outdir = get_outdir(
        args,
        baseoutdir,
        varname='data',
        varval=study + '/' + '-vs-'.join(dsets)
    )  # for data, only the plots go here, since datascripts puts its output somewhere else
    if len(dsets) > 1 and len(methods) == 1:  # sample vs sample
        glslabels = dsets
        title = get_dset_title([metafos[ds] for ds in dsets])
        if study != 'kate-qrs':
            title += '  %s' % methstr(methods[0])
        title_color = methods[0]
        legends = get_dset_legends([metafos[ds] for ds in dsets])
        legend_title = methstr(
            methods[0]
        ) if study == 'kate-qrs' else None  # for kate-qrs we need to put the subject _and_ the isotype in the title, so there's no room for the method
        pie_chart_faces = False
        print '%s:' % utils.color('green', methods[0]),
    elif len(methods) > 1 and len(dsets) == 1:  # method vs method
        glslabels = methods
        title = get_dset_title([mfo])
        title_color = None
        legends = [methstr(m) + ' only' for m in methods]
        legend_title = None
        pie_chart_faces = True
        print '%s:' % utils.color('green', dsets[0]),
    else:
        raise Exception('one of \'em has to be length 1: %d %d' %
                        (len(methods), len(dsets)))
    print '%s' % (' %s ' % utils.color('light_blue', 'vs')).join(glslabels)
    make_gls_tree_plot(args,
                       outdir + '/' + '-vs-'.join(methods) + '/gls-gen-plots',
                       study + '-' + '-vs-'.join(dsets),
                       glsfnames=[
                           get_gls_fname(ddir,
                                         meth,
                                         locus=mfo['locus'],
                                         data=True) for ddir in data_outdirs
                           for meth in methods
                       ],
                       glslabels=glslabels,
                       locus=mfo['locus'],
                       title=title,
                       title_color=title_color,
                       legends=legends,
                       legend_title=legend_title,
                       pie_chart_faces=pie_chart_faces)
示例#3
0
def get_data_pair_plots(args, baseoutdir, method, study, dsets):
    mfo = heads.read_metadata(study)[dsets[0]]
    assert heads.read_metadata(study)[dsets[1]]['locus'] == mfo['locus']
    data_outdirs = [
        heads.get_datadir(
            study, 'processed', extra_str='gls-gen-paper-' + args.label) +
        '/' + ds for ds in dsets
    ]
    outdir = get_outdir(
        args,
        baseoutdir,
        varname='data',
        varval=study + '/' + '-vs-'.join(dsets)
    )  # for data, only the plots go here, since datascripts puts its output somewhere else
    make_gls_tree_plot(args,
                       outdir + '/' + method + '/gls-gen-plots',
                       study + '-' + '-vs-'.join(dsets),
                       glsfnames=[
                           get_gls_fname(dout,
                                         method,
                                         locus=mfo['locus'],
                                         data=True) for dout in data_outdirs
                       ],
                       glslabels=dsets)
示例#4
0
def get_data_plots(args, region, baseoutdir, methods, study, dsets):
    metafos = heads.read_metadata(study)
    assert len(set([metafos[ds]['locus'] for ds in dsets]))  # make sure everybody has the same locus
    mfo = metafos[dsets[0]]
    data_outdirs = [heads.get_datadir(study, 'processed', extra_str=args.label) + '/' + ds for ds in dsets]
    outdir = get_outdir(args, baseoutdir, varname='data', varval=study + '/' + '-vs-'.join(dsets))  # for data, only the plots go here, since datascripts puts its output somewhere else
    title, title_color, legends, legend_title = None, None, None, None
    pie_chart_faces = False
    if len(dsets) > 1 and len(methods) == 1:  # sample vs sample
        glslabels = dsets
        title = get_dset_title([metafos[ds] for ds in dsets])
        if study != 'kate-qrs':
            title += '  %s' % methstr(methods[0])
        title_color = methods[0]
        legends = get_dset_legends([metafos[ds] for ds in dsets])
        legend_title = methstr(methods[0]) if study == 'kate-qrs' else None  # for kate-qrs we need to put the subject _and_ the isotype in the title, so there's no room for the method
        print '%s:' % utils.color('green', methods[0]),
    elif len(methods) > 1 and len(dsets) == 1:  # method vs method
        glslabels = methods
        title = get_dset_title([mfo])
        title_color = None
        legends = [methstr(m) + ' only' for m in methods]
        legend_title = None
        pie_chart_faces = len(methods) > 2  # True
        print '%s:' % utils.color('green', dsets[0]),
    else:  # single sample plot
        glslabels = dsets
    print '%s' % (' %s ' % utils.color('light_blue', 'vs')).join(glslabels)
    plotdir = outdir + '/' + '-vs-'.join(methods) + '/gls-gen-plots'
    if args.all_regions:  # NOTE not actually checking this by running... but it's the same as the gls-gen one, so it should be ok
        plotdir += '/' + region
    param_dirs = None
    if args.add_gene_counts_to_tree_plots:  # this returns 'None' for non-partis methods, which is ok for now, but I think I do usually have the parameter dir somewhere if I've run the annotation performance stuff
        param_dirs = [get_param_dir(ddir, meth) for ddir in data_outdirs for meth in methods]
    make_gls_tree_plot(args, region, plotdir, study + '-' + '-vs-'.join(dsets),
                       glsfnames=[get_gls_fname(region, ddir, meth, locus=mfo['locus'], data=True) for ddir in data_outdirs for meth in methods],
                       glslabels=glslabels,
                       locus=mfo['locus'],
                       title=title,
                       title_color=title_color,
                       legends=legends,
                       legend_title=legend_title,
                       pie_chart_faces=pie_chart_faces,
                       param_dirs=param_dirs)
示例#5
0
def get_data_plots(args, baseoutdir, method):
    for var in args.varvals:
        study, dset = var.split('/')
        mfo = heads.read_metadata(study)[dset]
        data_outdir = heads.get_datadir(
            study, 'processed',
            extra_str='gls-gen-paper-' + args.label) + '/' + dset
        outdir = get_outdir(
            args, baseoutdir, varname='data', varval=study + '/' + dset
        )  # for data, only the plots go here, since datascripts puts its output somewhere else
        make_gls_tree_plot(args,
                           outdir + '/' + method + '/gls-gen-plots',
                           study + '-' + dset,
                           glsfnames=[
                               get_gls_fname(data_outdir,
                                             method,
                                             locus=mfo['locus'],
                                             data=True)
                           ],
                           glslabels=['data'])
示例#6
0
    # ],
    'crotty-fna' : [
        ['RUj15_ALN-FNA_week3_groupD', 'RUj15_L-ILN-FNA_week3_groupD', 'RUj15_R-ILN-FNA_week3_groupD'],
        ['RUj15_ALN-FNA_week3_groupD', 'RUj15_L-ILN-FNA_week15_groupD', 'RUj15_R-ILN-FNA_week15_groupD'],
        ['ROp15_R-ILN-FNA_week3_groupC', 'ROp15_L-ILN-FNA_week3_groupC', 'ROp15_ALN-FNA_week3_groupC'],
        ['ROp15_R-ILN-FNA_week9_groupC', 'ROp15_L-ILN-FNA_week9_groupC', 'ROp15_ALN-FNA_week9_groupC'],
        ['ROp15_R-ILN-FNA_week15_groupC', 'ROp15_L-ILN-FNA_week15_groupC', 'ROp15_ALN-FNA_week15_groupC'],
        ['ROp15_R-ILN-FNA_week21_groupC', 'ROp15_L-ILN-FNA_week21_groupC', 'ROp15_ALN-FNA_week21_groupC'],
        ['RJk15_L-ILN-FNA_week3_groupB', 'RJk15_R-ILN-FNA_week3_groupB', 'RJk15_ALN-FNA_week3_groupB'],
        ['RJk15_L-ILN-FNA_week9_groupB', 'RJk15_R-ILN-FNA_week9_groupB'],
        ['RJk15_L-ILN-FNA_week15_groupB', 'RJk15_R-ILN-FNA_week15_groupB', 'RJk15_ALN-FNA_week15_groupB'],
        ['RJk15_L-ILN-FNA_week21_groupB', 'RJk15_R-ILN-FNA_week21_groupB', 'RJk15_ALN-FNA_week21_groupB'],
    ]

}
default_varvals['data'] = ':'.join([study + '/' + heads.full_dataset(heads.read_metadata(study), dset) for study in default_varvals['data'] for dset in default_varvals['data'][study]])
for study in all_data_groups:
    for idp in range(len(all_data_groups[study])):
        all_data_groups[study][idp] = [heads.full_dataset(heads.read_metadata(study), ds) for ds in all_data_groups[study][idp]]
# ----------------------------------------------------------------------------------------
parser = argparse.ArgumentParser()
parser.add_argument('action', choices=['mfreq', 'nsnp', 'multi-nsnp', 'prevalence', 'n-leaves', 'weibull', 'alcluster', 'gls-gen', 'data'])
parser.add_argument('--methods', default='partis') # not using <choices> 'cause it's harder since it's a list
parser.add_argument('--method-vs-method', action='store_true')
parser.add_argument('--sample-vs-sample', action='store_true')
parser.add_argument('--v-genes', default='IGHV4-39*01')
parser.add_argument('--locus', default='igh')
parser.add_argument('--all-regions', action='store_true')  # it'd be nicer to just have an arg for which region we're running on, but i need a way to keep the directory structure for single-region plots the same as before I generalized to d and j
parser.add_argument('--varvals')
parser.add_argument('--n-event-list', default='1000:2000:4000:8000')  # NOTE modified later for multi-nsnp also NOTE not used for gen-gset
parser.add_argument('--gls-gen-events', type=int, default=50000)
示例#7
0
            'RJk15_L-ILN-FNA_week3_groupB', 'RJk15_R-ILN-FNA_week3_groupB',
            'RJk15_ALN-FNA_week3_groupB'
        ],
        ['RJk15_L-ILN-FNA_week9_groupB', 'RJk15_R-ILN-FNA_week9_groupB'],
        [
            'RJk15_L-ILN-FNA_week15_groupB', 'RJk15_R-ILN-FNA_week15_groupB',
            'RJk15_ALN-FNA_week15_groupB'
        ],
        [
            'RJk15_L-ILN-FNA_week21_groupB', 'RJk15_R-ILN-FNA_week21_groupB',
            'RJk15_ALN-FNA_week21_groupB'
        ],
    ]
}
default_varvals['data'] = ':'.join([
    study + '/' + heads.full_dataset(heads.read_metadata(study), dset)
    for study in default_varvals['data']
    for dset in default_varvals['data'][study]
])
for study in all_data_groups:
    for idp in range(len(all_data_groups[study])):
        all_data_groups[study][idp] = [
            heads.full_dataset(heads.read_metadata(study), ds)
            for ds in all_data_groups[study][idp]
        ]
# ----------------------------------------------------------------------------------------
parser = argparse.ArgumentParser()
parser.add_argument('action',
                    choices=[
                        'mfreq', 'nsnp', 'multi-nsnp', 'prevalence',
                        'n-leaves', 'weibull', 'alcluster', 'gls-gen', 'data'
示例#8
0
import colored_traceback.always

sys.path.insert(1, './python')
import utils
import glutils
sys.path.insert(1, './datascripts')
import heads

scolors = {
    'ok': 'DarkSeaGreen',
    'missing': 'IndianRed',
    'spurious': 'IndianRed',
    'data': 'LightSteelBlue',
    'both': 'LightGrey',
}
metafos = heads.read_metadata('kate-qrs')
for ds in metafos:
    if 'LN1' in ds or 'LN2' in ds:
        scolors[ds] = '#85ad98'  # green
    elif 'LN4' in ds or 'LN3' in ds:
        scolors[ds] = '#94a3d1'  # blue
faces = {
    'missing': ete3.CircleFace(10, 'white'),
    'spurious': ete3.CircleFace(10, 'black')
}


def get_cmdfos(cmdstr, workdir, outfname):
    return [{'cmd_str': cmdstr, 'workdir': workdir, 'outfname': outfname}]