Exemplo n.º 1
0
class Preprocess(Task):
  """ Unzip fastq files, keep only sequence info of those containing only ACGT """
  mask = ['family', 'motif']
  inputs = {'fastq': File('original', path=lambda x: "{}/{}.fastq.gz".format(download_dir, x['accession']), mask=mask)}
  targets = {'seq': File('NoN.fastq', root=store_dir, mask=mask),
             'cnt': File('NoN.fastq.cnt', root=store_dir, mask=mask)}
  actions = [(unzip_seq_filter_N, ['#barcode', '$fastq', '$seq', '$cnt'])]
Exemplo n.º 2
0
class GetCounts(Task):
    """Count lines, words and characters in file"""
    inputs = {'inp': File('text', path=['a.txt', 'b.txt'])}
    targets = {'out': File('counts.csv')}
    actions = [
        ("(echo line word char file; wc {}) | sed 's/^ \+//;s/ \+/,/g' > {}",
         ["$inp", "$out"])
    ]
Exemplo n.º 3
0
class CombineCoverage(Task):
  mask = ['group', 'sample']
  inputs = {'cov': GetCoverage.targets['cov']}
  targets = {'csv': File('combined.csv', mask = mask),
           'pdf': File('pltcov.pdf', mask = mask, root = '.')}
  actions = [(combine_csvs, ['#cov', '#csv']), 
             ("""echo "library(ggplot2); pdf('{}')
              ggplot(read.csv('{}'), aes(x = val)) +
              geom_density(aes(color = factor(sample)))"\
              | R --vanilla""", ['$pdf','$csv'])]
Exemplo n.º 4
0
class Partition(Task):
  """ Partition aptamer sequences into motif-containing (fg) and motif-free (bg)
  based on distance from MOTIF """
  mask = ['family']
  inputs = {'seq': Preprocess.targets['seq'],
            #'nbr': File('nbr.txt', root=store_dir)
            }
  targets = {'fg': File('fg.txt', mask=mask, root=store_dir),
             'bg': File('bg.txt', mask=mask, root=store_dir)}
  actions = [(partition_aptamers, [fg_type, '$seq', '#motif', 'hahah', '$fg', '$bg', '#barcode'])]
Exemplo n.º 5
0
class MergeAdaptivePrecisionRecall(Task):
    inputs = {'inp': jf_roc_csv}
    targets = {
        'out': File('sim_precision_recall_multi_reg.tsv',
                    root=final_results_dir)
    }
    actions = [(combine_csvs, ['#inp', '#out', '\t'])]
Exemplo n.º 6
0
class MergeConstrainedPrecisionRecall(Task):
    inputs = {'inp': jf_const_roc_csv}
    targets = {
        'out': File('sim_precision_recall_single_reg.tsv',
                    root=final_results_dir)
    }
    actions = [(combine_csvs, ['#inp', '#out', '\t'])]
Exemplo n.º 7
0
class GetBgSeqmers(Task):
  """ Get seqmers in the motif-free pool"""
  mask = ['family']
  inputs = {'bg': Partition.targets['bg'],
            'seq': Preprocess.targets['seq'],
            'cnt': Preprocess.targets['cnt'],
            }
  targets = {'out': File('seqmer.txt', mask=mask, root=store_dir)}
  actions = [(gen_bg_seqmers, ['$seq', '$cnt', '$bg', '$out', seqmer_len, '#barcode'])]
Exemplo n.º 8
0
pdb_sim.add_param(sim_models, 'model')
pdb_sim.add_param(thetas, 'theta')
pdb_sim.add_param(sigmasqs, 'sigmasq')
pdb_sim.add_param(gammatildes, 'gammatilde')
pdb_sim.add_param(tree_sizes, 'tree')
pdb_sim.add_param(restrict_alphas)
pdb_sim.add_param(pd.read_csv(StringIO(reg_string), comment='#'))
pdb_sim.add_param(restrict_folds)

pdb_regime = ParamDb("regime")
pdb_regime.add_param(sim_regs, 'regime')

# Next define JUDI files associated with task: simulate

jf_reg = File('regime',
              param=pdb_regime,
              root=metadata_dir,
              path=lambda x: 'regime_{}.csv'.format(x['regime']))
jf_modreg = jf_reg.copy().rename({'regime': 'modreg'})

jf_tree = File(
    'newick',
    param=pdb_tree,
    root=metadata_dir,
    path=lambda x: 'drosophila{}.newick'.format(48
                                                if x['tree'] == 'big' else 9))

# ATTN.2 : We don't need to worry about the actual path of the files
#          JUDI will create them automatically in a hierachry determined
#          by the sorted order of parameters, e.g.,
# sim_data_dir/sim_data_tsv/alpha~1/fold~1/gammatilde~0.25/model~OU1/modreg~global/sigmasq~1/theta~1000/tree~small/sim_data.tsv
Exemplo n.º 9
0
class AlignFastq(Task):
  inputs = {'reads': File('orig_fastq', path = path_gen)}
  targets = {'sai': File('aln.sai')}
  actions = [('bwa aln {} {} > {}', [REF,'$reads','$sai'])]
Exemplo n.º 10
0
class GetCoverage(Task):
  mask = ['group']
  inputs = {'bam': CreateBam.targets['bam']}
  targets = {'cov': File('cov.csv', mask = mask)}
  actions = [('(echo val; samtools rmdup {} - | samtools mpileup - | cut -f4) > {}', ['$bam','$cov'])]
Exemplo n.º 11
0
class CreateBam(Task):
  mask = ['group']
  inputs = {'reads': AlignFastq.inputs['reads'],
            'sai': AlignFastq.targets['sai']}
  targets = {'bam': File('aln.bam', mask = mask)}
  actions = [('bwa sampe {} {} {} | samtools view -Sbh - | samtools sort - > {}', [REF,'$sai','$reads','$bam'])]
Exemplo n.º 12
0
class CountBgSeqmers(Task):
  """ Count seqmers in the motif-free pool"""
  mask = ['family']
  inputs = {'inp': GetBgSeqmers.targets['out']}
  targets = {'out': File('seqmer.txt.cnt', mask=mask, root=store_dir)}
  actions = [('cat {} | sort | awk \'BEGIN {{OFS="\t"}} ($1 == last || last == "") {{sum += $2}} ($1 != last && last != "") {{print last, sum; sum = $2}} {{last = $1}} END {{print last, sum}}\' > {}', ['$inp', '$out'])]
Exemplo n.º 13
0
class CombineCounts(Task):
    """Combine counts"""
    mask = ['n']
    inputs = {'inp': GetCounts.targets['out']}
    targets = {'out': File('result.csv', mask=mask, root='.')}
    actions = [(combine_csvs, ["#inp", "#out"])]
Exemplo n.º 14
0
from judi import ParamDb, File

racer = ParamDb('racer')
racer.add_param(['tortoise', 'hare'], 'racer')
racer.add_param([1, 2], 'game')

game = ParamDb('game')
game.add_param([1, 2], 'game')

jf_racer = File('timing.csv', param=racer)
jf_game = File('combined_timing.csv', param=game)

class simulate(Task):
    param = racer
    targets = {'out': jf_racer}
    actions = [('cp {}_{}.csv {}', ['#racer', '#game', '$out'])]

class combine(Task):
    param = game
    inputs = {'inp': jf_game}
    targets = {'out': jf_racer}
    actions = [(combine_csvs, ['#inp', '#out'])]