Exemplo n.º 1
0
        }

        #Create the X reference (in the correct order)
        x_reference = {
            'X1': np.array([eval(x_ref[x]['X1']) for x in meta[:, 0]]),
            'X2': np.array([eval(x_ref[x]['X2']) for x in meta[:, 0]])
        }

        #Run all samples against the control
        print('Running JACKS inference')
        ctrldata = data[:, sample_ids.index(ctrl_sample), :]
        testdata = data[:, [
            i for i, x in enumerate(sample_ids) if ctrl_sample not in x
        ], :]
        jacks_results = infer_JACKS(gene_index,
                                    testdata,
                                    ctrldata,
                                    fixed_x=x_reference)

        #Write out the results
        print('Writing JACKS results')
        sample_ids_without_ctrl = [x for x in sample_ids if x != ctrl_sample]
        writeJacksWResults(outfile_w, jacks_results, sample_ids_without_ctrl)
        writeJacksWResults(outfile_w2,
                           jacks_results,
                           sample_ids_without_ctrl,
                           write_w2=True)
        writeJacksXResults(outfile_x, jacks_results, gene_grnas)
        pickleJacksFullResults(outfile_pickle, jacks_results,
                               sample_ids_without_ctrl, gene_grnas)
Exemplo n.º 2
0
    print 'Loading sample specification'
    sample_spec = createSampleSpec(countfile, replicatefile, rep_hdr, sample_hdr)
    
    #Load the mappings from guides to genes
    print 'Loading gene mappings'
    gene_spec = createGeneSpec(guidemappingfile, sgrna_hdr, gene_hdr)
        
    #Load the data and preprocess (or just load from pickle if we did this already)
    print 'Loading data and pre-processing'
    data, meta, sample_ids, genes, gene_index = load_data_and_preprocess(sample_spec, gene_spec)
    gene_grnas = {gene: [x for x in meta[gene_index[gene],0]] for gene in gene_index}
    
    #Run each sample against the control
    ctrldata = data[:,sample_ids.index(ctrl_sample),:]
    for sample_id in sample_ids:
        if sample_id == ctrl_sample: continue

        outfile_w = outprefix + '_' + sample_id + '_gene_JACKS_results.txt'
        outfile_x = outprefix + '_' + sample_id + '_grna_JACKS_results.txt'
        outfile_pickle = outprefix + '_' + sample_id +'_JACKS_results_full.pickle'

        print 'Running JACKS inference for', sample_id
        testdata = data[:,[sample_ids.index(sample_id)],:]
        jacks_results = infer_JACKS(gene_index, testdata, ctrldata)

        #Write out the results
        print 'Writing JACKS results for', sample_id
        writeJacksWResults( outfile_w, jacks_results, [sample_id])
        writeJacksXResults( outfile_x, jacks_results, gene_grnas )
        pickleJacksFullResults( outfile_pickle, jacks_results, [sample_id], gene_grnas )       
def run(countfile,
        replicatefile,
        guidemappingfile,
        rep_hdr,
        sample_hdr,
        ctrl_sample_or_hdr,
        sgrna_hdr,
        gene_hdr,
        outprefix,
        sgrna_reference_file=None,
        apply_w_hp=False,
        boot_strap=False):

    LOG.setLevel(logging.WARNING)

    print('Loading sample specification')
    sample_spec, ctrl_per_sample, ctrl_spec = createSampleSpec(
        countfile, replicatefile, rep_hdr, sample_hdr, ctrl_sample_or_hdr)

    outfile_w = outprefix + '_gene_JACKS_results.txt'
    outfile_w2 = outprefix + '_genestd_JACKS_results.txt'
    outfile_x = outprefix + '_grna_JACKS_results.txt'
    outfile_lfc = outprefix + '_logfoldchange_means.txt'
    outfile_lfc_std = outprefix + '_logfoldchange_std.txt'
    outfile_pickle = outprefix + '_JACKS_results_full.pickle'

    # Load the mappings from guides to genes
    print('Loading gene mappings')
    gene_spec = createGeneSpec(guidemappingfile, sgrna_hdr, gene_hdr)

    # Load the data and preprocess
    print('Loading data and pre-processing')
    data, meta, sample_ids, genes, gene_index = load_data_and_preprocess(
        sample_spec, gene_spec)
    gene_grnas = {
        gene: [x for x in meta[gene_index[gene], 0]]
        for gene in gene_index
    }
    writeFoldChanges(outfile_lfc, data, meta, sample_ids)
    writeFoldChanges(outfile_lfc_std, data, meta, sample_ids, write_std=True)

    if sgrna_reference_file:
        print('Loading sgrna reference values')
        x_ref = loadSgrnaReference(sgrna_reference_file)

        print('Checking sgrna reference identifiers against gene mappings')

        for guide in gene_spec:
            if guide not in x_ref:
                raise Exception('%s has no sgrna reference in %s' %
                                (guide, sgrna_reference_file))

        x_reference = {
            'X1': np.array([eval(x_ref[x]['X1']) for x in meta[:, 0]]),
            'X2': np.array([eval(x_ref[x]['X2']) for x in meta[:, 0]])
        }
    else:
        x_reference = None

    # Run all samples against their controls
    print('Running JACKS_ inference')
    if ctrl_per_sample:  # Different control samples specified per test sample
        test_sample_idxs = [
            i for i, x in enumerate(sample_ids) if ctrl_spec[x] != x
        ]
        testdata = data[:, test_sample_idxs, :]
        ctrldata = data[:, [
            sample_ids.index(ctrl_spec[sample_ids[idx]])
            for idx in test_sample_idxs
        ], :]
    else:  # Same control sample for all tests
        ctrldata = data[:, sample_ids.index(ctrl_sample_or_hdr), :]
        test_sample_idxs = [
            i for i, x in enumerate(sample_ids) if x != ctrl_sample_or_hdr
        ]
        testdata = data[:, test_sample_idxs, :]
    if not boot_strap:
        jacks_results = infer_JACKS(gene_index,
                                    testdata,
                                    ctrldata,
                                    fixed_x=x_reference,
                                    apply_w_hp=apply_w_hp)
        # Write out the results
        print('Writing JACKS_ results')
        sample_ids_without_ctrl = [sample_ids[idx] for idx in test_sample_idxs]
        writeJacksWResults(outfile_w, jacks_results, sample_ids_without_ctrl)
        writeJacksWResults(outfile_w2,
                           jacks_results,
                           sample_ids_without_ctrl,
                           write_w2=True)
        writeJacksXResults(outfile_x, jacks_results, gene_grnas)
        pickleJacksFullResults(outfile_pickle, jacks_results,
                               sample_ids_without_ctrl, gene_grnas)
    else:
        [sample_ids[idx] for idx in test_sample_idxs]
        boots_x_w = bootstrap(
            gene_index,
            testdata,
            ctrldata,
            gene_grnas,
            fixed_x=x_reference,
            apply_w_hp=apply_w_hp,
        )

        if outprefix:
            import pickle
            with open(outprefix + '.boostrap.pickle', 'wb') as f:
                pickle.dump(boots_x_w, f)
        return boots_x_w