@files([Paths.nr_refseq_list] + [Paths.genomespace_refseq_counts(sample) for sample in Paths.ALL_SAMPLES], Paths.genomespace_all_expressed_transcripts) @follows(quantitate_refseq_in_gspace) def make_list_of_expressed_transcripts(inputfiles, outputfile): inputlist = ' '.join(inputfiles) runproc(""" $ENV MINDEPTH=$GSPACE_STATS_MINIMUM_RAW_READS \ $REFSEQCNT_PICK_EXPRESSED $inputlist > $outputfile""", outputfile) @files([Paths.nr_refseq_db, Paths.genomespace_all_expressed_transcripts, Paths.genome_fasta, Paths.genomespace_read_database(Options.SNPREF_SAMPLE)], [Paths.reftranscriptome_sequences, Paths.reftranscriptome_cds_anno]) @follows(make_list_of_expressed_transcripts) def generate_SNPfixed_transcriptome(inputfiles, outputfiles): nrdb, expressed, genomeseq, gspace = inputfiles outseq, outanno = outputfiles runproc(""" $GENFASTA_MUTATED_TRANSCRIPTOME $nrdb $expressed $genomeseq $gspace \ $outseq $outanno""", outputfiles) runproc("$SAMTOOLS faidx $outseq", outputfiles) runproc("$FASIZE -detailed $outseq > $outseq.size", outputfiles) @files(Paths.reftranscriptome_sequences, Paths.reftranscriptome_gmap_index) @follows(generate_SNPfixed_transcriptome)