Python LOG示例，jacks.infer.LOG Python示例

示例#1

0

显示文件

文件： preprocess.py 项目： peterpdu/JACKS

def inferMissingVariances(data, meta, sample_ids, ctrl_spec, ctrl_geneset):

    #Check for nan variances
    for s, sample_id in enumerate(sample_ids):
        if sum(np.isnan(data[:, s, 1]) == 0): continue

        #If this is a control replicate, ignore it, as JACKS will use the data variance for this at model time
        if sample_id in ctrl_spec and ctrl_spec[sample_id] == sample_id:
            continue

        # If this is a sample, and a ctrl_geneset is specified, use twice the variance between
        # sample and control within this set to infer the mean-variance relationship,
        # then apply to all nan variances
        if sample_id in ctrl_spec and len(ctrl_geneset) > 0:
            nan_flags = np.isnan(data[:, s, 1])
            guideset_indexs = [
                i for i, x in enumerate(meta[:, 1]) if x in ctrl_geneset
            ]
            ctrl_data = data[:, [sample_ids.index(ctrl_spec[sample_id])], 0]
            concat_data = np.concatenate((ctrl_data, data[:, [s], 0]), axis=1)
            data[nan_flags, s, 1] = 2 * calc_posterior_sd(
                concat_data,
                guideset_indexs=guideset_indexs)[nan_flags]  #sigma_hat
        else:
            LOG.warning(
                'Undefined variances in sample %s, set --ctrl_genes input to JACKS to infer variances from control genes'
                % sample_id)

    return data

示例#2

0

显示文件

文件： preprocess.py 项目： peterpdu/JACKS

def normalizeLogCounts(logcounts, normtype='median', ctrl_guide_indexes=[]):
    LOG.info('Applying %s normalisation' % normtype)
    G, L = logcounts.shape
    if normtype == 'median':
        logcounts -= np.tile(np.nanmedian(logcounts, axis=0),
                             (G, 1))  # median-normalize
    elif normtype == 'zmad':
        logcounts -= np.tile(np.nanmedian(logcounts, axis=0),
                             (G, 1))  # median-normalize
        logcounts = logcounts / np.tile(
            1.4826 * np.nanmedian(abs(logcounts), axis=0),
            (G, 1))  #adjust to median absolute deviation = 1
    elif normtype == 'mode':
        for i in range(L):
            hist, bin_edges = np.histogram(logcounts[:, i], bins=100)
            hist_smooth = 0.1 * hist[:-4] + 0.2 * hist[1:-3] + 0.4 * hist[
                2:-2] + 0.2 * hist[3:-1] + 0.1 * hist[4:]
            bin_middles = 0.5 * bin_edges[3:-2] + 0.5 * bin_edges[2:-3]
            norm_factor = bin_middles[np.argmax(hist_smooth)]
            logcounts[:, i] -= norm_factor
    elif normtype == 'ctrl_guides':
        if len(ctrl_guide_indexes) == 0:
            raise Exception('No guides specified for ctrl guide normalization')
        logcounts -= np.tile(
            np.nanmedian(logcounts[ctrl_guide_indexes, :], axis=0), (G, 1))
    else:
        raise Exception('Unrecognised normalisation type %s' % normtype)
    return logcounts

示例#3

0

显示文件

def resample_run_jacks(count_tab: Union[pd.DataFrame, Dict[str, pd.DataFrame]],
                       repmap_fn: Union[str, os.PathLike],
                       fractions: List[float],
                       nreps: int,
                       tabulate: True,
                       working_dir: Union[str, os.PathLike],
                       processors: int = None,
                       do_resample=True,
                       jacks_kwargs=None):
    """Run a resampling experiment. If do_resample is True, the count_tab is
    resampled, to size given in fractions, nreps times per fraction. If
    do_resample is False, a dictionary of already resampled counts should be
    supplied as count_tab.

    Returns dict of dict of DF prodcued by tabulate_score, keyed first by
    fraction and then rep letter.

    repmap is in the JACKS format."""
    #todo make work with other analyses
    from jacks.jacks_io import runJACKS
    from jacks.infer import LOG as jacksLOG
    jacksLOG.setLevel(logging.WARNING)

    if jacks_kwargs is None:
        jacks_kwargs = {}
    jkwgs = dict(ctrl_sample_hdr='ctrl', gene_hdr='gene', sgrna_hdr='guide')
    jkwgs.update(jacks_kwargs)

    assert os.path.isdir(working_dir)

    if do_resample:
        resamped_tabs = get_resampled_tabs(count_tab, fractions, nreps,
                                           processors)
    else:
        resamped_tabs = count_tab

    # the output
    tables = {f: {} for f in fractions}

    for frac, letter, k in iter_reps(nreps, fractions):
        tab = resamped_tabs[frac][letter]
        tabpath = f"{working_dir}/count_{k}.tsv"
        tab.to_csv(tabpath, '\t')
        respath = f"{working_dir}/jacks_{k}"
        runJACKS(tabpath,
                 repmap_fn,
                 tabpath,
                 'rep',
                 'samp',
                 outprefix=respath,
                 **jkwgs)
        if tabulate:
            tables[frac][letter] = tabulate_score(respath, return_ps=True)

    if tabulate:
        return tables
    else:
        return None

示例#4

0

显示文件

def set_logger(log_fn):

    hndlr = logging.FileHandler(log_fn, 'w')
    # hndlr.setLevel(logging.INFO)
    pipeLOG.setLevel(logging.INFO)
    pipeLOG.addHandler(hndlr)
    try:
        jacksLOG.addHandler(hndlr)
    except:
        pass

示例#5

0

显示文件

文件： preprocess.py 项目： peterpdu/JACKS

def collateTestControlSamples(data, sample_ids, ctrl_spec):
    test_sample_idxs = [
        i for i, x in enumerate(sample_ids) if ctrl_spec[x] != x
    ]
    LOG.info('Collating %d samples' % len(test_sample_idxs))
    testdata = data[:, test_sample_idxs, :]
    ctrldata = data[:, [
        sample_ids.index(ctrl_spec[sample_ids[idx]])
        for idx in test_sample_idxs
    ], :]
    return testdata, ctrldata, test_sample_idxs

示例#6

0

显示文件

文件： jacks_io.py 项目： pleprohon/JACKS

def readControlGeneset(ctrl_genes, gene_spec):
    known_genes = set([gene_spec[x] for x in gene_spec])
    if os.path.isfile(ctrl_genes):
        f = io.open(ctrl_genes)
        geneset = set([line.split()[0] for line in f if line.split()[0] in known_genes])
        f.close()
        LOG.info('Read %d recognised control genes from %s' % (len(geneset), ctrl_genes))
    else: 
        if ctrl_genes not in known_genes: raise Exception('Not a file or unrecognised control gene: %s' % ctrl_genes) 
        geneset = set([ctrl_genes])
        LOG.info('Using %s as control gene' % (ctrl_genes))
    return geneset

示例#7

0

显示文件

文件： jacks_io.py 项目： pleprohon/JACKS

def preprocess(countfile, replicatefile, guidemappingfile,
              rep_hdr=REP_HDR_DEFAULT, sample_hdr=SAMPLE_HDR_DEFAULT, common_ctrl_sample=COMMON_CTRL_SAMPLE_DEFAULT,
              ctrl_sample_hdr=None, sgrna_hdr=SGRNA_HDR_DEFAULT, gene_hdr=GENE_HDR_DEFAULT, ignore_blank_genes=False,
              outprefix=OUTPREFIX_DEFAULT, reffile=None):

    # Load the specification of samples to include
    LOG.info('Loading sample specification')
    sample_spec, ctrl_spec, sample_num_reps = createSampleSpec(countfile, replicatefile, rep_hdr,
                                                               sample_hdr, common_ctrl_sample, ctrl_sample_hdr)
    # Load the mappings from guides to genes
    LOG.info('Loading gene mappings')
    gene_spec = createGeneSpec(guidemappingfile, sgrna_hdr, gene_hdr, ignore_blank_genes=ignore_blank_genes)

    sgrna_reference_file = reffile
    x_ref = None
    if sgrna_reference_file:
        # Load the sgrna reference (precomputed X's)
        LOG.info('Loading sgrna reference values')
        x_ref = loadSgrnaReference(reffile)
        # Check that the data to be loaded have sgrna reference values
        LOG.info('Checking sgrna reference identifiers against gene mappings')
        for guide in gene_spec:
            if guide not in x_ref:
                raise Exception('%s has no sgrna reference in %s' % (guide, sgrna_reference_file))
    return sample_spec, ctrl_spec, gene_spec, x_ref

示例#8

0

显示文件

文件： jacks_io.py 项目： pleprohon/JACKS

def writeJacksWResults( outprefix, jacks_results, cell_lines, write_types=[''], ctrl_geneset=set(), fdr=None, fdr_thresh_type='REGULAR', pseudo=False):
    #Sort genes by w1
    ordered_genes = getSortedGenes(jacks_results)
    fouts = [io.open(outprefix + '_gene%s_JACKS_results.txt' % write_type,'w') for write_type in write_types]
    for fout in fouts: fout.write(u'Gene\t%s\n' % ('\t'.join(cell_lines)))
    if '_fdr' in write_types or '_pval' in write_types:
        LOG.info('Computing P-values')
        jacks_w1_pvals,jacks_w1_fdrs =  computeW1PvalsAndFDRs(jacks_results, cell_lines, noness_genes = ctrl_geneset, pseudo=pseudo, compute_fdr=('_fdr' in write_types))
    
    #Determine threshold sets for fdr cut-offs (blank out non-significant genes)
    if fdr is not None:
        if fdr_thresh_type == 'REGULAR':
            fdr_sets = getFDRGeneSets(jacks_w1_pvals, fdr)
        elif fdr_thresh_type == 'LOCAL_FDR':
            fdr_sets = getLocalFDRGeneSets(jacks_w1_fdrs, fdr)
        else: raise Exception('Unrecognised FDR threshold type (expecting REGULAR or LOCAL_FDR): ', fdr_thresh_type)

    #Write out one line per gene (all cell lines)
    for w1_mean, gene in ordered_genes:
        for write_type,fout in zip(write_types, fouts):
            
            #Determine whether to include the gene for each cell line (if fdr thresholded)
            if fdr is not None:
                sig_gene_flags = [(gene in x) for x in fdr_sets]
            else: 
                sig_gene_flags = [True for x in jacks_results[gene][4]]
            if sum(sig_gene_flags) == 0: continue

            #Write out the values
            if write_type=='_pval':
                w1s = ['%5e' % x for x in jacks_w1_pvals[gene]]
            elif write_type == '_fdr':
                w1s = ['%5e' % x for x in jacks_w1_fdrs[gene]]
            elif write_type == '_std':
                w1s = ['%5e' % np.sqrt(w2 - w1**2.0) for (w1,w2) in zip(jacks_results[gene][4],jacks_results[gene][5])]
            elif write_type == '':
                w1s = [('%5e' % w1) if flag else '' for (w1,flag) in zip(jacks_results[gene][4],sig_gene_flags)]
            else:  raise Exception('Unrecognised write type: %s' % write_type)
            w1_str = '\t'.join(w1s)
            if 'JACKS_PSEUDO_GENE' not in gene:
                fout.write(u'%s\t%s\n' % (gene, w1_str))
    for fout in fouts: fout.close()

示例#9

0

显示文件

文件： jacks_io.py 项目： pleprohon/JACKS

def load_data_and_run(sample_spec, gene_spec, ctrl_spec, sgrna_reference_file, x_ref,
                      outprefix, apply_w_hp=APPLY_W_HP_DEFAULT, norm_type=NORM_TYPE_DEFAULT, 
                      ctrl_genes=None, fdr=None, fdr_thresh_type = 'REGULAR', n_pseudo=0, count_prior=32 ):

    # Load negative control genes (if any)
    ctrl_geneset = readControlGeneset(ctrl_genes, gene_spec) if ctrl_genes is not None else set()

    if '/' in outprefix and not os.path.exists(os.path.dirname(outprefix)): os.makedirs(os.path.dirname(outprefix))
    outfile_x = outprefix + '_grna_JACKS_results.txt'
    outfile_lfc = outprefix + '_logfoldchange_means.txt'
    outfile_lfc_std = outprefix + '_logfoldchange_std.txt'
    outfile_pickle = outprefix + PICKLE_FILENAME

    # Load the data and preprocess
    LOG.info('Loading data and pre-processing')
    data, meta, sample_ids, genes, gene_index = loadDataAndPreprocess(sample_spec, gene_spec,ctrl_spec=ctrl_spec,normtype=norm_type, ctrl_geneset=ctrl_geneset, prior=count_prior)
    gene_grnas = {gene: [x for x in meta[gene_index[gene], 0]] for gene in gene_index}
    testdata, ctrldata, test_sample_idxs = collateTestControlSamples(data, sample_ids, ctrl_spec)
    sample_ids_without_ctrl = [sample_ids[idx] for idx in test_sample_idxs]

    x_reference = None
    if sgrna_reference_file:
        # Create the X reference (in the correct order)
        x_reference = {'X1': np.array([eval(x_ref[x]['X1']) for x in meta[:, 0]]),
                       'X2': np.array([eval(x_ref[x]['X2']) for x in meta[:, 0]])}
    else:
        writeFoldChanges(outfile_lfc, testdata, ctrldata, meta, sample_ids_without_ctrl)
        writeFoldChanges(outfile_lfc_std, testdata, ctrldata, meta, sample_ids_without_ctrl, write_std=True)
        
    #Run all samples against their controls
    LOG.info('Running JACKS inference')
    jacks_results = inferJACKS(gene_index, testdata, ctrldata, apply_w_hp=apply_w_hp, fixed_x=x_reference)

    #Add a set of pseudo genes, created by randomly sampling from guides targeting genes in the control set
    if n_pseudo > 0 and len(ctrl_geneset) > 0:
        LOG.info('Running JACKS inference on %d pseudogenes' % n_pseudo)
        pseudo_gene_index = createPseudoNonessGenes(gene_index, ctrl_geneset, n_pseudo)
        jacks_pseudo_results = inferJACKS(pseudo_gene_index, testdata, ctrldata, apply_w_hp=apply_w_hp)
        writeJacksWResults(outprefix + '_pseudo_noness', jacks_pseudo_results, sample_ids_without_ctrl, write_types=['', '_std'] )
        for gene in jacks_results:
            jacks_pseudo_results[gene] = jacks_results[gene]

    # Write out the results
    LOG.info('Writing JACKS results')
    if len(ctrl_geneset) > 0 and n_pseudo > 0:
        writeJacksWResults(outprefix, jacks_pseudo_results, sample_ids_without_ctrl, ctrl_geneset=set([x for x in jacks_pseudo_results if 'JACKS_PSEUDO_GENE' in x]), write_types=['', '_std', '_pval'], fdr=fdr, pseudo=True, fdr_thresh_type=fdr_thresh_type)
    else:
        writeJacksWResults(outprefix, jacks_results, sample_ids_without_ctrl, ctrl_geneset=ctrl_geneset, write_types=['', '_std'])
    writeJacksXResults(outfile_x, jacks_results, gene_grnas)
    pickleJacksFullResults(outfile_pickle, jacks_results, sample_ids_without_ctrl, gene_grnas)

示例#10

0

显示文件

文件： run_JACKS.py 项目： singjc/crisprtools

import logging
from jacks.jacks_io import runJACKSFromArgs
from jacks.infer import LOG

if __name__ == '__main__':
    LOG.setLevel(logging.INFO)
    runJACKSFromArgs()

示例#11

0

显示文件

文件： jacks_io.py 项目： singjc/crisprtools

def runJACKS(countfile,
             replicatefile,
             guidemappingfile,
             rep_hdr=REP_HDR_DEFAULT,
             sample_hdr=SAMPLE_HDR_DEFAULT,
             common_ctrl_sample=COMMON_CTRL_SAMPLE_DEFAULT,
             ctrl_sample_hdr=None,
             sgrna_hdr=SGRNA_HDR_DEFAULT,
             gene_hdr=GENE_HDR_DEFAULT,
             outprefix=OUTPREFIX_DEFAULT,
             reffile=None,
             apply_w_hp=APPLY_W_HP_DEFAULT):
    outprefix = outprefix
    if '/' in outprefix and not os.path.exists(os.path.dirname(outprefix)):
        os.makedirs(os.path.dirname(outprefix))
    outfile_w = outprefix + '_gene_JACKS_results.txt'
    outfile_w2 = outprefix + '_genestd_JACKS_results.txt'
    outfile_x = outprefix + '_grna_JACKS_results.txt'
    outfile_lfc = outprefix + '_logfoldchange_means.txt'
    outfile_lfc_std = outprefix + '_logfoldchange_std.txt'
    outfile_pickle = outprefix + PICKLE_FILENAME

    # Load the specification of samples to include
    LOG.info('Loading sample specification')
    sample_spec, ctrl_spec, sample_num_reps = createSampleSpec(
        countfile, replicatefile, rep_hdr, sample_hdr, common_ctrl_sample,
        ctrl_sample_hdr)
    # Load the mappings from guides to genes
    LOG.info('Loading gene mappings')
    gene_spec = createGeneSpec(guidemappingfile, sgrna_hdr, gene_hdr)

    sgrna_reference_file = reffile
    if sgrna_reference_file:
        # Load the sgrna reference (precomputed X's)
        LOG.info('Loading sgrna reference values')
        x_ref = loadSgrnaReference(reffile)
        # Check that the data to be loaded have sgrna reference values
        LOG.info('Checking sgrna reference identifiers against gene mappings')
        for guide in gene_spec:
            if guide not in x_ref:
                raise Exception('%s has no sgrna reference in %s' %
                                (guide, sgrna_reference_file))

    # Load the data and preprocess
    LOG.info('Loading data and pre-processing')
    data, meta, sample_ids, genes, gene_index = loadDataAndPreprocess(
        sample_spec, gene_spec)
    gene_grnas = {
        gene: [x for x in meta[gene_index[gene], 0]]
        for gene in gene_index
    }
    x_reference = None
    if sgrna_reference_file:
        # Create the X reference (in the correct order)
        x_reference = {
            'X1': np.array([eval(x_ref[x]['X1']) for x in meta[:, 0]]),
            'X2': np.array([eval(x_ref[x]['X2']) for x in meta[:, 0]])
        }
    else:
        writeFoldChanges(outfile_lfc, data, meta, sample_ids)
        writeFoldChanges(outfile_lfc_std,
                         data,
                         meta,
                         sample_ids,
                         write_std=True)

    #Run all samples against their controls
    LOG.info('Running JACKS inference')
    testdata, ctrldata, test_sample_idxs = collateTestControlSamples(
        data, sample_ids, ctrl_spec)
    jacks_results = inferJACKS(gene_index,
                               testdata,
                               ctrldata,
                               apply_w_hp=apply_w_hp)

    # Write out the results
    LOG.info('Writing JACKS results')
    sample_ids_without_ctrl = [sample_ids[idx] for idx in test_sample_idxs]
    writeJacksWResults(outfile_w, jacks_results, sample_ids_without_ctrl)
    writeJacksWResults(outfile_w2,
                       jacks_results,
                       sample_ids_without_ctrl,
                       write_w2=True)
    writeJacksXResults(outfile_x, jacks_results, gene_grnas)
    pickleJacksFullResults(outfile_pickle, jacks_results,
                           sample_ids_without_ctrl, gene_grnas)

示例#12

0

显示文件

parser = getJacksParser()
parser.add_argument("--sample_id",
                type=str,
                default=None,
                help="Sample id to run MAGeCK on")
parser.add_argument("--v10",
                type=str,
                default='',
                help="Data set label")
args = parser.parse_args()

inputs_dir = 'input_files'
if not os.path.isdir(inputs_dir): os.makedirs(inputs_dir)

# Load the specification of samples to include
LOG.info('Loading sample specification')
sample_spec, ctrl_spec, sample_num_reps = createSampleSpec(args.countfile, args.replicatefile, args.rep_hdr,
                                                            args.sample_hdr, args.common_ctrl_sample, args.ctrl_sample_hdr)
# Load the mappings from guides to genes
LOG.info('Loading gene mappings')
gene_spec = createGeneSpec(args.guidemappingfile, args.sgrna_hdr, args.gene_hdr)

# Sample not specified: re-call self for all samples
if args.sample_id is None:
    for sample_id in ctrl_spec:
        if ctrl_spec[sample_id] == sample_id: continue
        cmd = py_cmd + ' ' + ' '.join(sys.argv) + ' --sample_id="%s"' % sample_id
        os.system(cmd)

#Sample specified - run MAGeCK
else:

示例#13

0

显示文件

import sys, io, os, random, logging
import scipy.stats as ST
import numpy as np
from jacks.preprocess import subsample_and_preprocess
from jacks.jacks_io import readControlGeneset, collateTestControlSamples, createSampleSpec, createGeneSpec
from jacks.infer import LOG, inferJACKSGene

LOG.setLevel(logging.WARNING)

if len(sys.argv) != 8 and len(sys.argv) != 9:
    print('Usage: sample_jacks_screen.py condensed_input test_line num_replicates(-1 for all)  num_celllines(-1 for all) outfile num_samples num_guides(-1 for all) job_idx\n')
    print('where, condensed_input = countfile#replicatefile:rep_hdr:sample_hdr:ctrl_sample_or_hdr#guidemappingfile:sgrna_hdr:gene_hdr#ctrl_genes(can be blank)')
else:

    #Minimial checks on this, as this is for a script that is intended for use internally only
    condensed_input = sys.argv[1]
    countfile, replicatestuff, grnastuff, ctrl_genes = condensed_input.split('#')
    replicatefile, rep_hdr, sample_hdr, ctrl_sample_or_hdr = replicatestuff.split(':')
    guidemappingfile, sgrna_hdr, gene_hdr = grnastuff.split(':')
    ctrl_sample_hdr = ctrl_sample_or_hdr if ctrl_sample_or_hdr == 'Control' else None
    sample_spec, ctrl_spec, sample_num_reps = createSampleSpec(countfile, replicatefile, rep_hdr, sample_hdr, ctrl_sample_or_hdr, ctrl_sample_hdr)
    gene_spec = createGeneSpec(guidemappingfile, sgrna_hdr, gene_hdr)
    test_celllines = [sample_id for sample_id in ctrl_spec if ctrl_spec[sample_id] != sample_id]

    ctrl_geneset = readControlGeneset(ctrl_genes) if ctrl_genes is not '' else set()
    normtype = 'median'

    test_line = sys.argv[2]
    num_replicates = eval(sys.argv[3])
    num_celllines = eval(sys.argv[4])
    outfile = sys.argv[5]

示例#14

0

显示文件

        for sample_id, colname in sample_spec[filename]:
            if sample_id == cell_line or sample_id == ctrl_spec[cell_line]:
                if filename not in new_sample_spec:
                    new_sample_spec[filename] = []
                new_sample_spec[filename].append((sample_id, colname))
    return new_sample_spec
    
def filterCtrlSpec(ctrl_spec, cell_line):
    new_ctrl_spec = {}
    new_ctrl_spec[cell_line] = ctrl_spec[cell_line]             #Sample
    new_ctrl_spec[ctrl_spec[cell_line]] = ctrl_spec[cell_line]  #Control
    return new_ctrl_spec 

if __name__ == '__main__':

    LOG.setLevel(logging.WARNING)
    parser = getJacksParser()
    parser.add_argument("--cell_line",
                    type=str,
                    default=None,
                    help="cell line to run")
    parser.add_argument("--separate",
                    action='store_true',
                    default=False,
                    help="Run cell lines separately")
    args = parser.parse_args()

    outprefix = args.outprefix
    if '/' in outprefix and not os.path.exists(os.path.dirname(outprefix)): os.makedirs(os.path.dirname(outprefix))

    # Load the specification of samples to include

示例#15

0

显示文件

from argparse import Namespace
from attrdict import AttrDict, AttrMap

import yaml
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from crispr_tools import qc, tools, jacks_tools
import pprint

try:
    from jacks.jacks_io import runJACKS
    from jacks.infer import LOG as jacksLOG
    jacksLOG.setLevel(logging.WARNING)
except ImportError:
    print('To run jacks you need to install JACKS,\n', 'https://github.com/felicityallen/JACKS/tree/master/jacks\n' 
          "You can still run Mageck though, if it's installed.")
    def runJACKS(*a, **k):
        raise ModuleNotFoundError('JACKS not installed!!!')

from crispr_tools.drugz import drugZ_analysis

from crispr_tools.tools import list_not_str

# with open(pathlib.Path(__file__).parent/'version.txt') as f:
#     __version__ = f.readline().replace('\n', '')

class ConfigurationError(Exception):
    """Errors in the configuration file that would prevent the pipeline from running"""

示例#16

0

显示文件

from jacks.jacks_io import createGeneSpec, createSampleSpec, getJacksParser, collateTestControlSamples, writeJacksWResults
from jacks.preprocess import loadDataAndPreprocess
import scipy as SP


def infer_JACKS_meanfc(gene_index, testdata, ctrldata):
    results = {}
    for gene in gene_index:
        Ig = gene_index[gene]
        y = (testdata[Ig, :, 0] - ctrldata[Ig, :, 0])
        w1 = SP.nanmean(y, axis=0)
        results[gene] = (y, -1.0, -1.0, -1.0, w1, -1.0)
    return results


LOG.setLevel(logging.WARNING)
parser = getJacksParser()
args = parser.parse_args()

outprefix = args.outprefix
if '/' in outprefix and not os.path.exists(os.path.dirname(outprefix)):
    os.makedirs(os.path.dirname(outprefix))

outfile_w = outprefix + '_gene_results.txt'
outfile_w2 = outprefix + '_genestd_results.txt'

# Load the specification of samples to include
LOG.info('Loading sample specification')
sample_spec, ctrl_spec, sample_num_reps = createSampleSpec(
    args.countfile, args.replicatefile, args.rep_hdr, args.sample_hdr,
    args.common_ctrl_sample, args.ctrl_sample_hdr)