def Pipeline(): testing = wga_settings['test'] target = wga_settings['target'] if testing: intervals = ('interval', [20]) else: intervals = ('interval',range(1,23) + ['X', 'Y']) glm = ('glm', ['SNP', 'INDEL']) align_to_reference = sequence_( apply_( reduce_(['sample_name', 'library', 'platform', 'platform_unit', 'chunk'], pipes.AlignAndCleanALN) ), ) if target: remove_dup = sequence_( reduce_(['sample_name'], picard.MERGE_SAMS) ) else: remove_dup = sequence_( reduce_(['sample_name'], picard.MarkDuplicates) ) preprocess_alignment = sequence_( map_(samtools.IndexBam), apply_( split_([intervals],gatk.RealignerTargetCreator) #if not is_capture or testing else map_(gatk.RealignerTargetCreator) ), map_(gatk.IndelRealigner), map_(gatk.BQSR), apply_( reduce_(['sample_name'], gatk.BQSRGatherer), map_(gatk.ApplyBQSR) #TODO I add BQSRGatherer as a parent with a hack inside ApplyBQSR.cmd ) ) call_variants = sequence_( apply_( #reduce_(['interval'],gatk.HaplotypeCaller,tag={'vcf':'HaplotypeCaller'}), reduce_split_(['interval'], [glm], gatk.UnifiedGenotyper, tag={'vcf': 'UnifiedGenotyper'}), combine=True ), reduce_(['vcf'], gatk.CombineVariants, 'Combine Into Raw VCFs'), split_([glm],gatk.VQSR), ) return sequence_( align_to_reference, remove_dup, preprocess_alignment, call_variants )
def Pipeline(): testing = wga_settings['test'] target = wga_settings['target'] if testing: intervals = ('interval', [20]) else: intervals = ('interval', list(range(1, 23)) + ['X', 'Y']) glm = ('glm', ['SNP', 'INDEL']) align_to_reference = sequence_( apply_( reduce_([ 'sample_name', 'library', 'platform', 'platform_unit', 'chunk' ], pipes.AlignAndCleanALN)), ) if target: remove_dup = sequence_(reduce_(['sample_name'], picard.MERGE_SAMS)) else: remove_dup = sequence_(reduce_(['sample_name'], picard.MarkDuplicates)) preprocess_alignment = sequence_( map_(samtools.IndexBam), apply_( split_( [intervals], gatk.RealignerTargetCreator ) #if not is_capture or testing else map_(gatk.RealignerTargetCreator) ), map_(gatk.IndelRealigner), map_(gatk.BQSR), apply_( reduce_(['sample_name'], gatk.BQSRGatherer), map_( gatk.ApplyBQSR ) #TODO I add BQSRGatherer as a parent with a hack inside ApplyBQSR.cmd )) call_variants = sequence_( apply_( #reduce_(['interval'],gatk.HaplotypeCaller,tag={'vcf':'HaplotypeCaller'}), reduce_split_(['interval'], [glm], gatk.UnifiedGenotyper, tag={'vcf': 'UnifiedGenotyper'}), combine=True), reduce_(['vcf'], gatk.CombineVariants, 'Combine Into Raw VCFs'), split_([glm], gatk.VQSR), ) return sequence_(align_to_reference, remove_dup, preprocess_alignment, call_variants)
def CteamPipeline(input_bams): bam_seq = None bam_dup = [] # to check duplicate input files for b in input_bams: # extract genome_id from file, add as a tag genome_id = os.path.basename(b).partition('.')[0] if genome_id in bam_dup: print '\n\nERROR: \"%s\" was already included in the input file list.\n' % b sys.exit() else: bam_dup.append(genome_id) # TEMPORARILLY, use genome_id as RG_ID, too s = sequence_( add_([INPUT(b, tags={'rg': genome_id})], stage_name="Load Input")) # append to sequence if bam_seq is None: bam_seq = s else: bam_seq = sequence_(bam_seq, s, combine=True) nInput = len(input_bams) nNodes = settings.settings['nNode'] nSplit = min( 256, 16 * max(nNodes / nInput, 1)) # will use floor, min 16, up to 256 splits settings.settings['nSplit'] = nSplit chrom = ('chrom', range(1, 23) + ['X', 'Y', 'MT']) split = ('split', range(1, nSplit + 1)) return sequence_( bam_seq, map_(pipes.CteamSortSplitBam), # sort bam by readname (== shuffling) split_([split], pipes.CteamTrimReadGroup), # map_(pipes.CteamBwaAln), # bwa aln map_(pipes.CteamBwaSampe), # bwa sampe reduce_(['rg'], pipes.CteamSplitByChromosome ), # merge split files and (re)split by chromosome split_([chrom], pipes.CteamRmDup_BuildIndex), # samtools rmdup + index map_(pipes.CteamRealignTarget), # gatk indel realign target creator map_(pipes.CteamIndelRealigner), # gatk indel realigner map_(pipes.CteamUnifiedGenotyper) # gatk unifiedGenotyper # #map_(pipes.CteamVariantFiltration) # gatk variantFilter )
def Pipeline_Somatic(): testing = wga_settings['test'] target = wga_settings['target'] if testing: intervals = ('interval', [20]) else: intervals = ('interval',range(1,23) + ['X', 'Y']) glm = ('glm', ['SNP', 'INDEL']) align_to_reference = sequence_( apply_( reduce_(['sample_name', 'library', 'platform', 'platform_unit','sample_type','chunk','rgid'], pipes.AlignAndCleanMEM) ), ) if target: remove_dup = sequence_( reduce_(['sample_name','sample_type','rgid'], picard.MERGE_SAMS) ) else: remove_dup = sequence_( reduce_(['sample_name','sample_type','rgid'], picard.MarkDuplicates) ) preprocess_alignment = sequence_( map_(samtools.IndexBam), apply_( split_([intervals],gatk.RealignerTargetCreator) #if not is_capture or testing else map_(gatk.RealignerTargetCreator) ), map_(gatk.IndelRealigner), map_(gatk.BQSR), apply_( reduce_(['sample_name','sample_type','rgid'], gatk.BQSRGatherer), map_(gatk.ApplyBQSR) #TODO I add BQSRGatherer as a parent with a hack inside ApplyBQSR.cmd ) ) somatic_call = sequence_( apply_( sequence_( map_(mutect.createInput), reduce_(['rgid','interval'], mutect.Somatic, tag={'vcf': 'Mutect'}), reduce_(['vcf'], gatk.CombineVariants, 'Combine Into Raw VCFs'), ), sequence_( map_(svdetect.PreProcessing), map_(svdetect.link2SV) ) ) ) return sequence_( align_to_reference, remove_dup, preprocess_alignment, somatic_call )
def Bam2Fastq(workflow, dag, settings, input_bams): if len(input_bams) == 0: raise WorkflowException, 'At least 1 BAM input required' dag.sequence_( sequence_( *[ sequence_( add_([ INPUT(input_bam, tags={'input':os.path.basename(input_bam)})],stage_name="Load Input Bams"), split_([('rgid',_inputbam2rgids(input_bam))],pipes.FilterBamByRG_To_FastQ) ) for input_bam in input_bams ], combine=True ), split_([('pair',[1,2])],genomekey_scripts.SplitFastq), configure(settings), add_run(workflow,finish=False), ).add_(list(_splitfastq2inputs(dag))) return dag
def Pipeline_Somatic(): testing = wga_settings['test'] target = wga_settings['target'] if testing: intervals = ('interval', [20]) else: intervals = ('interval', list(range(1, 23)) + ['X', 'Y']) glm = ('glm', ['SNP', 'INDEL']) align_to_reference = sequence_( apply_( reduce_([ 'sample_name', 'library', 'platform', 'platform_unit', 'sample_type', 'chunk', 'rgid' ], pipes.AlignAndCleanMEM)), ) if target: remove_dup = sequence_( reduce_(['sample_name', 'sample_type', 'rgid'], picard.MERGE_SAMS)) else: remove_dup = sequence_( reduce_(['sample_name', 'sample_type', 'rgid'], picard.MarkDuplicates)) preprocess_alignment = sequence_( map_(samtools.IndexBam), apply_( split_( [intervals], gatk.RealignerTargetCreator ) #if not is_capture or testing else map_(gatk.RealignerTargetCreator) ), map_(gatk.IndelRealigner), map_(gatk.BQSR), apply_( reduce_(['sample_name', 'sample_type', 'rgid'], gatk.BQSRGatherer), map_( gatk.ApplyBQSR ) #TODO I add BQSRGatherer as a parent with a hack inside ApplyBQSR.cmd )) somatic_call = sequence_( apply_( sequence_( map_(mutect.createInput), reduce_(['rgid', 'interval'], mutect.Somatic, tag={'vcf': 'Mutect'}), reduce_(['vcf'], gatk.CombineVariants, 'Combine Into Raw VCFs'), ), sequence_(map_(svdetect.PreProcessing), map_(svdetect.link2SV)))) return sequence_(align_to_reference, remove_dup, preprocess_alignment, somatic_call)
def Bam2Fastq(workflow, dag, settings, input_bams): if len(input_bams) == 0: raise WorkflowException, 'At least 1 BAM input required' dag.sequence_( sequence_(*[ sequence_( add_([ INPUT(input_bam, tags={'input': os.path.basename(input_bam)}) ], stage_name="Load Input Bams"), split_([('rgid', _inputbam2rgids(input_bam))], pipes.FilterBamByRG_To_FastQ)) for input_bam in input_bams ], combine=True), split_([('pair', [1, 2])], genomekey_scripts.SplitFastq), configure(settings), add_run(workflow, finish=False), ).add_(list(_splitfastq2inputs(dag))) return dag
def Pipeline(): is_capture = wga_settings['capture'] testing = wga_settings['test'] # split_ tuples if testing: intervals = ('interval', [20]) else: intervals = ('interval',range(1,23) + ['X', 'Y']) glm = ('glm', ['SNP', 'INDEL']) align_to_reference = sequence_( apply_( reduce_(['sample_name', 'library'], misc.FastqStats), reduce_(['sample_name', 'library', 'platform', 'platform_unit', 'chunk'], pipes.AlignAndClean) ), ) preprocess_alignment = sequence_( reduce_(['sample_name'], picard.MarkDuplicates), apply_( map_(picard.CollectMultipleMetrics), split_([intervals],gatk.RealignerTargetCreator) #if not is_capture or testing else map_(gatk.RealignerTargetCreator) ), map_(gatk.IndelRealigner), map_(gatk.BQSR), apply_( reduce_(['sample_name'], gatk.BQSRGatherer), map_(gatk.ApplyBQSR) #TODO I add BQSRGatherer as a parent with a hack inside ApplyBQSR.cmd ) ) call_variants = sequence_( # apply_( # reduce_split_([],[intervals,glm], gatk.UnifiedGenotyper, tag={'vcf': 'UnifiedGenotyper'}), # combine=True # ) if is_capture # else apply_( #reduce_(['interval'],gatk.HaplotypeCaller,tag={'vcf':'HaplotypeCaller'}), reduce_split_(['interval'], [glm], gatk.UnifiedGenotyper, tag={'vcf': 'UnifiedGenotyper'}), combine=True ), reduce_(['vcf'], gatk.CombineVariants, 'Combine Into Raw VCFs'), split_([glm],gatk.VQSR), map_(gatk.Apply_VQSR), reduce_(['vcf'], gatk.CombineVariants, "Combine into Master VCFs") ) if is_capture: return sequence_( align_to_reference, preprocess_alignment, call_variants, massive_annotation ) else: return sequence_( align_to_reference, preprocess_alignment, reduce_split_(['sample_name'],[intervals],gatk.ReduceReads), call_variants, massive_annotation )
from cosmos.Workflow.models import Workflow from cosmos.lib.ezflow.dag import DAG, add_,split_ from tools import ECHO, CAT #################### # Workflow #################### dag = DAG().sequence_( add_([ ECHO(tags={'word':'hello'}), ECHO(tags={'word':'world'}) ]), split_([('i',[1,2])],CAT) ) dag.create_dag_img('/tmp/ex.svg') ################# # Run Workflow ################# WF = Workflow.start('Example 1',restart=True) dag.add_to_workflow(WF) WF.run()
def Pipeline(): is_capture = wga_settings['capture'] testing = wga_settings['test'] # split_ tuples if testing: intervals = ('interval', [20]) else: intervals = ('interval', range(1, 23) + ['X', 'Y']) glm = ('glm', ['SNP', 'INDEL']) align_to_reference = sequence_( apply_( reduce_(['sample_name', 'library'], misc.FastqStats), reduce_([ 'sample_name', 'library', 'platform', 'platform_unit', 'chunk' ], pipes.AlignAndClean)), ) preprocess_alignment = sequence_( reduce_(['sample_name'], picard.MarkDuplicates), apply_( map_(picard.CollectMultipleMetrics), split_( [intervals], gatk.RealignerTargetCreator ) #if not is_capture or testing else map_(gatk.RealignerTargetCreator) ), map_(gatk.IndelRealigner), map_(gatk.BQSR), apply_( reduce_(['sample_name'], gatk.BQSRGatherer), map_( gatk.ApplyBQSR ) #TODO I add BQSRGatherer as a parent with a hack inside ApplyBQSR.cmd )) call_variants = sequence_( # apply_( # reduce_split_([],[intervals,glm], gatk.UnifiedGenotyper, tag={'vcf': 'UnifiedGenotyper'}), # combine=True # ) if is_capture # else apply_( #reduce_(['interval'],gatk.HaplotypeCaller,tag={'vcf':'HaplotypeCaller'}), reduce_split_(['interval'], [glm], gatk.UnifiedGenotyper, tag={'vcf': 'UnifiedGenotyper'}), combine=True), reduce_(['vcf'], gatk.CombineVariants, 'Combine Into Raw VCFs'), split_([glm], gatk.VQSR), map_(gatk.Apply_VQSR), reduce_(['vcf'], gatk.CombineVariants, "Combine into Master VCFs")) if is_capture: return sequence_(align_to_reference, preprocess_alignment, call_variants, massive_annotation) else: return sequence_( align_to_reference, preprocess_alignment, reduce_split_(['sample_name'], [intervals], gatk.ReduceReads), call_variants, massive_annotation)
def pipeline(bams, test_bam=False, chromosome_only_split=False): # split_ tuples #chrom = ('chrom', range(1,23) + ['X', 'Y', 'MT']) chrom = ('chrom', range(1,23)) glm = ('glm', ['SNP', 'INDEL']) dbnames = ('dbname', ['dbSNP135','CytoBand','Target_Scan','mirBase','Self_Chain','Repeat_Masker','TFBS','Segmental_Duplications','SIFT','COSMIC', 'PolyPhen2','Mutation_Taster','GERP','PhyloP','LRT','Mce46way','Complete_Genomics_69','The_1000g_Febuary_all','The_1000g_April_all', 'NHLBI_Exome_Project_euro','NHLBI_Exome_Project_aa','NHLBI_Exome_Project_all','ENCODE_DNaseI_Hypersensitivity','ENCODE_Transcription_Factor', 'UCSC_Gene','Refseq_Gene','Ensembl_Gene','CCDS_Gene','HGMD_INDEL','HGMD_SNP','GWAS_Catalog']) bam_seq = None for b in bams: header = _getHeaderInfo(b) sn = _getSeqName(header) rgid = [ h[0] for h in header['rg']] # restrict output for testing if test_bam: sn = ['chr1'] chrom = ('chrom',[1]) glm = ('glm',['SNP']) skip_VQSR = ('skip_VQSR', [True]) else: skip_VQSR = ('skip_VQSR', [False]) # if seqName is empty, then let's assume that the input is unaligned bam # use everything before extension as part of tag sample_name = os.path.splitext(os.path.basename(b))[0] if chromosome_only_split: # Stop splitting by rgId bam_bwa_split = [ ('prevSn', sn), ('chromosome_only_split', [True]) ] indelrealign_reduce = ['bam'] else: bam_bwa_split = [ ('rgId', rgid), ('prevSn', sn), ('chromosome_only_split', [False]) ] indelrealign_reduce = ['bam','rgId'] s = sequence_( add_([INPUT(b, tags={'bam':sample_name})], stage_name="Load BAMs"), split_(bam_bwa_split, pipes.Bam_To_BWA)) if bam_seq is None: bam_seq = s else: bam_seq = sequence_(bam_seq, s, combine=True) # Previous pipeline pr_pipeline = sequence_( bam_seq, reduce_split_(indelrealign_reduce, [chrom], pipes.IndelRealigner), map_( pipes.MarkDuplicates), reduce_(['bam','chrom'], pipes.BaseQualityScoreRecalibration), map_( pipes.ReduceReads), reduce_split_(['chrom'], [glm], pipes.UnifiedGenotyper), reduce_(['glm'], pipes.VariantQualityScoreRecalibration, tag={'vcf':'main'}), reduce_(['vcf'], pipes.CombineVariants, "Merge VCF"), map_( pipes.Vcf2Anno_in), split_([dbnames], pipes.Annotate, tag={'build':'hg19'}), reduce_(['vcf'], pipes.MergeAnnotations) ) # HaplotypeCaller Pipeline: official for GATK 3.0 hc_pipeline = sequence_( bam_seq, reduce_split_(indelrealign_reduce, [chrom], pipes.IndelRealigner), map_( pipes.MarkDuplicates), reduce_(['bam','chrom'], pipes.BaseQualityScoreRecalibration), map_( pipes.HaplotypeCaller), reduce_(['chrom'], pipes.GenotypeGVCFs), split_([glm, skip_VQSR], pipes.VariantQualityScoreRecalibration, tag={'vcf':'main'}) ) return hc_pipeline
from cosmos.lib.ezflow.dag import add_,map_,reduce_,split_,reduce_split_,sequence_,branch_ from subprocess import Popen,PIPE from genomekey.tools import annovarext from genomekey.wga_settings import wga_settings import sys import os def get_db_names(): cmd = '{0} listdbs'.format(wga_settings['annovarext_path']) if not os.path.exists(wga_settings['annovarext_path']): raise Exception, 'AnnovarExtensions is not installed at {0}'.format(wga_settings['annovarext_path']) dbs = Popen(cmd.split(' '),stdout=PIPE).communicate()[0] if len(dbs) < 10: raise Exception, "could not list databases, command was {0}".format(cmd) return [ db for db in dbs.split('\n') if db != '' ] massive_annotation = sequence_( map_(annovarext.Vcf2Anno_in), split_( [('build',['hg19']),('dbname',get_db_names()) ], annovarext.Annotate ), reduce_(['vcf'],annovarext.MergeAnnotations) )
from cosmos.Workflow.models import Workflow from cosmos.lib.ezflow.dag import DAG, split_,add_,map_,reduce_ from tools import ECHO, CAT, WC, PASTE, Sleep #################### # Workflow #################### dag = DAG().sequence_( add_([ ECHO(tags={'word':'hello'}), ECHO(tags={'word':'world'}) ]), map_(Sleep), split_([('i',[1,2])], CAT), reduce_([], PASTE), map_(WC), ) dag.create_dag_img('/tmp/ex.svg') ################# # Run Workflow ################# WF = Workflow.start('Example 3',restart=True,delete_intermediates=True) dag.add_to_workflow(WF) WF.run()
def pipeline(bams, test_bam=False, chromosome_only_split=False): # split_ tuples #chrom = ('chrom', range(1,23) + ['X', 'Y', 'MT']) chrom = ('chrom', range(1, 23)) glm = ('glm', ['SNP', 'INDEL']) dbnames = ('dbname', [ 'dbSNP135', 'CytoBand', 'Target_Scan', 'mirBase', 'Self_Chain', 'Repeat_Masker', 'TFBS', 'Segmental_Duplications', 'SIFT', 'COSMIC', 'PolyPhen2', 'Mutation_Taster', 'GERP', 'PhyloP', 'LRT', 'Mce46way', 'Complete_Genomics_69', 'The_1000g_Febuary_all', 'The_1000g_April_all', 'NHLBI_Exome_Project_euro', 'NHLBI_Exome_Project_aa', 'NHLBI_Exome_Project_all', 'ENCODE_DNaseI_Hypersensitivity', 'ENCODE_Transcription_Factor', 'UCSC_Gene', 'Refseq_Gene', 'Ensembl_Gene', 'CCDS_Gene', 'HGMD_INDEL', 'HGMD_SNP', 'GWAS_Catalog' ]) bam_seq = None for b in bams: header = _getHeaderInfo(b) sn = _getSeqName(header) rgid = [h[0] for h in header['rg']] # restrict output for testing if test_bam: sn = ['chr1'] chrom = ('chrom', [1]) glm = ('glm', ['SNP']) skip_VQSR = ('skip_VQSR', [True]) else: skip_VQSR = ('skip_VQSR', [False]) # if seqName is empty, then let's assume that the input is unaligned bam # use everything before extension as part of tag sample_name = os.path.splitext(os.path.basename(b))[0] if chromosome_only_split: # Stop splitting by rgId bam_bwa_split = [('prevSn', sn), ('chromosome_only_split', [True])] indelrealign_reduce = ['bam'] else: bam_bwa_split = [('rgId', rgid), ('prevSn', sn), ('chromosome_only_split', [False])] indelrealign_reduce = ['bam', 'rgId'] s = sequence_( add_([INPUT(b, tags={'bam': sample_name})], stage_name="Load BAMs"), split_(bam_bwa_split, pipes.Bam_To_BWA)) if bam_seq is None: bam_seq = s else: bam_seq = sequence_(bam_seq, s, combine=True) # Previous pipeline pr_pipeline = sequence_( bam_seq, reduce_split_(indelrealign_reduce, [chrom], pipes.IndelRealigner), map_(pipes.MarkDuplicates), reduce_(['bam', 'chrom'], pipes.BaseQualityScoreRecalibration), map_(pipes.ReduceReads), reduce_split_(['chrom'], [glm], pipes.UnifiedGenotyper), reduce_(['glm'], pipes.VariantQualityScoreRecalibration, tag={'vcf': 'main'}), reduce_(['vcf'], pipes.CombineVariants, "Merge VCF"), map_(pipes.Vcf2Anno_in), split_([dbnames], pipes.Annotate, tag={'build': 'hg19'}), reduce_(['vcf'], pipes.MergeAnnotations)) # HaplotypeCaller Pipeline: official for GATK 3.0 hc_pipeline = sequence_( bam_seq, reduce_split_(indelrealign_reduce, [chrom], pipes.IndelRealigner), map_(pipes.MarkDuplicates), reduce_(['bam', 'chrom'], pipes.BaseQualityScoreRecalibration), map_(pipes.HaplotypeCaller), reduce_(['chrom'], pipes.GenotypeGVCFs), split_([glm, skip_VQSR], pipes.VariantQualityScoreRecalibration, tag={'vcf': 'main'})) return hc_pipeline
from cosmos.lib.ezflow.dag import add_,map_,reduce_,split_,reduce_split_,sequence_,branch_ from subprocess import Popen,PIPE from genomekey.tools import annovarext from genomekey.wga_settings import wga_settings import sys import os def get_db_names(): cmd = '{0} listdbs'.format(wga_settings['annovarext_path']) if not os.path.exists(wga_settings['annovarext_path']): raise Exception('AnnovarExtensions is not installed at {0}'.format(wga_settings['annovarext_path'])) dbs = Popen(cmd.split(' '),stdout=PIPE).communicate()[0] if len(dbs) < 10: raise Exception("could not list databases, command was {0}".format(cmd)) return [ db for db in dbs.split('\n') if db != '' ] massive_annotation = sequence_( map_(annovarext.Vcf2Anno_in), split_( [('build',['hg19']),('dbname',get_db_names()) ], annovarext.Annotate ), reduce_(['vcf'],annovarext.MergeAnnotations) )
from cosmos.Workflow.models import Workflow from cosmos.lib.ezflow.dag import DAG, split_, add_, map_, reduce_ from tools import ECHO, CAT, WC, PASTE #################### # Workflow #################### dag = DAG().sequence_( add_([ECHO(tags={'word': 'hello'}), ECHO(tags={'word': 'world'})]), split_([('i', [1, 2])], CAT), reduce_([], PASTE), map_(WC)) dag.create_dag_img('/tmp/ex.svg') ################# # Run Workflow ################# WF = Workflow.start('Example 2', restart=True, delete_intermediates=True) dag.add_to_workflow(WF) WF.run()
from cosmos.lib.ezflow.dag import add_, map_, reduce_, split_, reduce_split_, sequence_, branch_ from subprocess import Popen, PIPE from genomekey.tools import annovarext from genomekey.wga_settings import wga_settings import sys import os def get_db_names(): cmd = '{0} listdbs'.format(wga_settings['annovarext_path']) if not os.path.exists(wga_settings['annovarext_path']): raise Exception, 'AnnovarExtensions is not installed at {0}'.format( wga_settings['annovarext_path']) dbs = Popen(cmd.split(' '), stdout=PIPE).communicate()[0] if len(dbs) < 10: raise Exception, "could not list databases, command was {0}".format( cmd) return [db for db in dbs.split('\n') if db != ''] massive_annotation = sequence_( map_(annovarext.Vcf2Anno_in), split_([('build', ['hg19']), ('dbname', get_db_names())], annovarext.Annotate), reduce_(['vcf'], annovarext.MergeAnnotations))