示例#1
0
def annotate(family_file, variant_file, family_type, vep, silent, phased,
             strict, cadd_raw, whole_gene, annotation_dir, cadd_file,
             cadd_1000g, cadd_exac, cadd_esp, cadd_indels, thousand_g, exac,
             outfile, split_variants, processes, dbnfsp, verbose):
    """Annotate variants in a VCF file.\n
        The main function with genmod is to annotate genetic inheritance patterns for variants in families. 
        Use flag --family together with a .ped file to describe which individuals in the vcf you wish to check inheritance for in the analysis.
        Individuals that are not present in the ped file will not be considered in the analysis.\n
        It is also possible to use genmod without a family file. In this case the variants will be annotated with a variety of options seen below.
        Please see docuentation on github.com/moonso/genmod or genmod/examples/readme.md for more information.
    """

    ######### This is for logging the command line string #########
    frame = inspect.currentframe()
    args, _, _, values = inspect.getargvalues(frame)
    argument_list = [
        i + '=' + str(values[i]) for i in values
        if values[i] and i != 'config' and i != 'frame'
    ]

    if verbose:
        print('\nRunning GENMOD annotate version %s \n' % VERSION,
              file=sys.stderr)

    start_time_analysis = datetime.now()

    ######### Setup a variant parser #########

    if variant_file == '-':
        variant_parser = VCFParser(fsock=sys.stdin,
                                   split_variants=split_variants)
    else:
        variant_parser = VCFParser(infile=variant_file,
                                   split_variants=split_variants)

    # These are the individuals in from the vcf file
    individuals = variant_parser.individuals

    head = variant_parser.metadata

    # Update version logging
    add_metadata(head,
                 'version',
                 'genmod',
                 version=VERSION,
                 command_line_string=' '.join(argument_list))

    ######### Parse the ped file (if there is one) #########

    families = {}

    if family_file:
        family_parser = FamilyParser(family_file, family_type)
        # The individuals in the ped file must be present in the variant file:
        families = family_parser.families

        for individual in family_parser.individuals:
            if individual not in individuals:
                warning(
                    'All individuals in ped file must be in vcf file! Aborting...'
                )
                warning('Individuals in PED file: %s' %
                        ' '.join(list(family_parser.individuals.keys())))
                warning('Individuals in VCF file: %s' % ' '.join(individuals))
                print('Exiting...', file=sys.stderr)
                sys.exit()

        add_metadata(
            head,
            'info',
            'GeneticModels',
            annotation_number='.',
            entry_type='String',
            description="':'-separated list of genetic models for this variant."
        )
        add_metadata(head,
                     'info',
                     'ModelScore',
                     annotation_number='1',
                     entry_type='Integer',
                     description="PHRED score for genotype models.")
        add_metadata(
            head,
            'info',
            'Compounds',
            annotation_number='.',
            entry_type='String',
            description=
            ("List of compound pairs for this variant."
             "The list is splitted on ',' family id is separated with compounds"
             "with ':'. Compounds are separated with '|'."))

    if verbose:
        if family_file:
            print('Starting analysis of families: %s' %
                  ','.join(list(families.keys())),
                  file=sys.stderr)
            print('Individuals included in analysis: %s\n' %
                  ','.join(list(family_parser.individuals.keys())),
                  file=sys.stderr)
    ######### Read to the annotation data structures #########

    gene_trees = {}
    exon_trees = {}

    # If the variants are already annotated we do not need to redo the annotation
    if not vep:

        gene_trees, exon_trees = load_annotations(annotation_dir, verbose)

        add_metadata(
            head,
            'info',
            'Annotation',
            annotation_number='.',
            entry_type='String',
            description='Annotates what feature(s) this variant belongs to.')
    else:
        if verbose:
            print('Using VEP annotation', file=sys.stderr)

    ######### Check which other annotations files that should be used in the analysis #########

    cadd_annotation = False

    if cadd_file:
        if verbose:
            print('Cadd file! %s' % cadd_file, file=sys.stderr)
        cadd_annotation = True
    if cadd_1000g:
        if verbose:
            print('Cadd 1000G file! %s' % cadd_1000g, file=sys.stderr)
        cadd_annotation = True
    if cadd_esp:
        if verbose:
            print('Cadd ESP6500 file! %s' % cadd_esp, file=sys.stderr)
        cadd_annotation = True
    if cadd_indels:
        if verbose:
            print('Cadd InDel file! %s' % cadd_indels, file=sys.stderr)
        cadd_annotation = True
    if cadd_exac:
        if verbose:
            print('Cadd ExAC file! %s' % cadd_exac, file=sys.stderr)
        cadd_annotation = True

    if cadd_annotation:
        add_metadata(
            head,
            'info',
            'CADD',
            annotation_number='A',
            entry_type='Float',
            description="The CADD relative score for this alternative.")
        if cadd_raw:
            add_metadata(
                head,
                'info',
                'CADD_raw',
                annotation_number='A',
                entry_type='Float',
                description="The CADD raw score(s) for this alternative(s).")

    if thousand_g:
        if verbose:
            print('1000G frequency file! %s' % thousand_g, file=sys.stderr)
        add_metadata(head,
                     'info',
                     '1000G_freq',
                     annotation_number='A',
                     entry_type='Float',
                     description="Frequency in the 1000G database.")

    if exac:
        if verbose:
            print('ExAC frequency file! %s' % exac, file=sys.stderr)
        add_metadata(head,
                     'info',
                     'ExAC_freq',
                     annotation_number='A',
                     entry_type='Float',
                     description="Frequency in the ExAC database.")

    if dbnfsp:
        if verbose:
            print('dbNFSP file! %s' % dbnfsp, file=sys.stderr)

    ###################################################################
    ### The task queue is where all jobs(in this case batches that  ###
    ### represents variants in a region) is put. The consumers will ###
    ### then pick their jobs from this queue.                       ###
    ###################################################################

    variant_queue = JoinableQueue(maxsize=1000)
    # The consumers will put their results in the results queue
    results = Manager().Queue()

    num_model_checkers = processes
    #Adapt the number of processes to the machine that run the analysis
    if cadd_annotation:
        # We need more power when annotating cadd scores:
        # But if flag is used that overrides
        if num_model_checkers == min(4, cpu_count()):
            num_model_checkers = min(8, cpu_count())

    if verbose:
        print('Number of CPU:s %s' % cpu_count(), file=sys.stderr)
        print('Number of model checkers: %s' % num_model_checkers,
              file=sys.stderr)

    # We use a temp file to store the processed variants
    temp_file = NamedTemporaryFile(delete=False)
    temp_file.close()
    # Open the temp file with codecs
    temporary_variant_file = open(temp_file.name,
                                  mode='w',
                                  encoding='utf-8',
                                  errors='replace')

    # These are the workers that do the heavy part of the analysis
    model_checkers = [
        VariantConsumer(variant_queue, results, families, phased, vep,
                        cadd_raw, cadd_file, cadd_1000g, cadd_exac, cadd_esp,
                        cadd_indels, thousand_g, exac, dbnfsp, strict, verbose)
        for i in range(num_model_checkers)
    ]

    for w in model_checkers:
        w.start()

    # This process prints the variants to temporary files
    var_printer = VariantPrinter(results,
                                 temporary_variant_file,
                                 head,
                                 mode='chromosome',
                                 verbosity=verbose)
    var_printer.start()

    start_time_variant_parsing = datetime.now()

    if verbose:
        print('Start parsing the variants ... \n', file=sys.stderr)

    # This process parses the original vcf and create batches to put in the variant queue:

    chromosome_list = get_batches(variant_parser, variant_queue, individuals,
                                  gene_trees, exon_trees, phased, vep,
                                  whole_gene, verbose)

    # Put stop signs in the variant queue
    for i in range(num_model_checkers):
        variant_queue.put(None)

    variant_queue.join()
    results.put(None)
    var_printer.join()

    temporary_variant_file.close()

    if verbose:
        print('Cromosomes found in variant file: %s \n' %
              ','.join(chromosome_list),
              file=sys.stderr)
        print('Models checked!\n', file=sys.stderr)

    sort_variants(temp_file.name, mode='chromosome', verbose=verbose)

    print_headers(head, outfile, silent)

    print_variants(temp_file.name, outfile, mode='modified', silent=silent)

    # Remove all temp files:
    os.remove(temp_file.name)

    if verbose:
        print('Time for whole analyis: %s' %
              str(datetime.now() - start_time_analysis),
              file=sys.stderr)
示例#2
0
文件: annotate.py 项目: gpcr/genmod
def annotate(family_file, variant_file, family_type, vep, silent, phased, strict, cadd_raw, whole_gene, 
                annotation_dir, cadd_file, cadd_1000g, cadd_exac, cadd_esp, cadd_indels, thousand_g, exac, outfile,
                split_variants, processes, dbnfsp, verbose):
    """Annotate variants in a VCF file.\n
        The main function with genmod is to annotate genetic inheritance patterns for variants in families. 
        Use flag --family together with a .ped file to describe which individuals in the vcf you wish to check inheritance for in the analysis.
        Individuals that are not present in the ped file will not be considered in the analysis.\n
        It is also possible to use genmod without a family file. In this case the variants will be annotated with a variety of options seen below.
        Please see docuentation on github.com/moonso/genmod or genmod/examples/readme.md for more information.
    """    
    
    ######### This is for logging the command line string #########
    frame = inspect.currentframe()
    args, _, _, values = inspect.getargvalues(frame)
    argument_list = [i+'='+str(values[i]) for i in values if values[i] and i != 'config' and i != 'frame']
    
    if verbose:
        print('\nRunning GENMOD annotate version %s \n' % VERSION ,file=sys.stderr)
    
    start_time_analysis = datetime.now()
    
    ######### Setup a variant parser #########
    
    if variant_file == '-':
        variant_parser = VCFParser(
            fsock = sys.stdin, 
            split_variants=split_variants, 
            skip_info_check=True
            )
    else:
        variant_parser = VCFParser(
            infile = variant_file, 
            split_variants=split_variants,
            skip_info_check=True
            )
    
    # These are the individuals in from the vcf file
    individuals = variant_parser.individuals
    
    head = variant_parser.metadata
    
    # Update version logging
    add_metadata(
        head,
        'version',    
        'genmod', 
        version=VERSION, 
        command_line_string=' '.join(argument_list)
    )
    
    ######### Parse the ped file (if there is one) #########
    
    families = {}
    
    if family_file:
        family_parser = FamilyParser(family_file, family_type)
        # The individuals in the ped file must be present in the variant file:
        families = family_parser.families
        
        for individual in family_parser.individuals:
            if individual not in individuals:
                warning('All individuals in ped file must be in vcf file! Aborting...')
                warning('Individuals in PED file: %s' % ' '.join(list(family_parser.individuals.keys())))
                warning('Individuals in VCF file: %s' % ' '.join(individuals))
                print('Exiting...', file=sys.stderr)
                sys.exit()
        
        add_metadata(
            head,
            'info',
            'GeneticModels', 
            annotation_number='.', 
            entry_type='String', 
            description="':'-separated list of genetic models for this variant."
        )
        add_metadata(
            head,
            'info',
            'ModelScore', 
            annotation_number='1', 
            entry_type='Integer', 
            description="PHRED score for genotype models."
        )
        add_metadata(
            head,
            'info',
            'Compounds', 
            annotation_number='.', 
            entry_type='String', 
            description=("List of compound pairs for this variant."
            "The list is splitted on ',' family id is separated with compounds"
            "with ':'. Compounds are separated with '|'.")
        )
        
    
    if verbose:
        if family_file:
            print('Starting analysis of families: %s' % 
                    ','.join(list(families.keys())), file=sys.stderr)
            print('Individuals included in analysis: %s\n' % 
                    ','.join(list(family_parser.individuals.keys())), file=sys.stderr)
    ######### Read to the annotation data structures #########
    
    gene_trees = {}
    exon_trees = {}
    
    # If the variants are already annotated we do not need to redo the annotation
    if not vep:
        
        gene_trees, exon_trees = load_annotations(annotation_dir, verbose)
        
        add_metadata(
            head,
            'info',
            'Annotation', 
            annotation_number='.', 
            entry_type='String', 
            description='Annotates what feature(s) this variant belongs to.'
        )
    else:
        if verbose:
            print('Using VEP annotation', file=sys.stderr)
    
    
    ######### Check which other annotations files that should be used in the analysis #########
    
    cadd_annotation = False
    
    if cadd_file:
        if verbose:
            print('Cadd file! %s' % cadd_file, file=sys.stderr)
        cadd_annotation = True
    if cadd_1000g:
        if verbose:
            print('Cadd 1000G file! %s' % cadd_1000g, file=sys.stderr)
        cadd_annotation = True
    if cadd_esp:
        if verbose:
            print('Cadd ESP6500 file! %s' % cadd_esp, file=sys.stderr)
        cadd_annotation = True
    if cadd_indels:
        if verbose:
            print('Cadd InDel file! %s' % cadd_indels, file=sys.stderr)
        cadd_annotation = True
    if cadd_exac:
        if verbose:
            print('Cadd ExAC file! %s' % cadd_exac, file=sys.stderr)
        cadd_annotation = True
    
    
    if cadd_annotation:
        add_metadata(
            head,
            'info',
            'CADD', 
            annotation_number='A',
            entry_type='Float', 
            description="The CADD relative score for this alternative."
        )
        if cadd_raw:
            add_metadata(
                head,
                'info',
                'CADD_raw', 
                annotation_number='A',
                entry_type='Float', 
                description="The CADD raw score(s) for this alternative(s)."
            )
        
    if thousand_g:
        if verbose:
            print('1000G frequency file! %s' % thousand_g, file=sys.stderr)
        add_metadata(
            head,
            'info',
            '1000G_freq', 
            annotation_number='A', 
            entry_type='Float', 
            description="Frequency in the 1000G database."
        )
        
    if exac:
        if verbose:
            print('ExAC frequency file! %s' % exac, file=sys.stderr)
        add_metadata(
            head,
            'info',
            'ExAC_freq', 
            annotation_number='A', 
            entry_type='Float', 
            description="Frequency in the ExAC database."
        )
        
    if dbnfsp:
        if verbose:
            print('dbNFSP file! %s' % dbnfsp, file=sys.stderr)
    
    
    ###################################################################
    ### The task queue is where all jobs(in this case batches that  ###
    ### represents variants in a region) is put. The consumers will ###
    ### then pick their jobs from this queue.                       ###
    ###################################################################
    
    variant_queue = JoinableQueue(maxsize=1000)
    # The consumers will put their results in the results queue
    results = Manager().Queue()
    
    num_model_checkers = processes
    #Adapt the number of processes to the machine that run the analysis
    if cadd_annotation:
        # We need more power when annotating cadd scores:
        # But if flag is used that overrides
        if num_model_checkers == min(4, cpu_count()):
            num_model_checkers = min(8, cpu_count())
    
    if verbose:
        print('Number of CPU:s %s' % cpu_count(), file=sys.stderr)
        print('Number of model checkers: %s' % num_model_checkers, file=sys.stderr)
    
    # We use a temp file to store the processed variants
    temp_file = NamedTemporaryFile(delete=False)
    temp_file.close()
    # Open the temp file with codecs
    temporary_variant_file = open(
                                temp_file.name, 
                                mode='w', 
                                encoding='utf-8', 
                                errors='replace'
                                )
    
    
    # These are the workers that do the heavy part of the analysis
    model_checkers = [
                    VariantConsumer(
                                variant_queue, 
                                results,
                                families,
                                phased,
                                vep,
                                cadd_raw,
                                cadd_file,
                                cadd_1000g,
                                cadd_exac,
                                cadd_esp,
                                cadd_indels,
                                thousand_g,
                                exac,
                                dbnfsp,
                                strict,
                                verbose
                            )
                        for i in range(num_model_checkers)
                        ]
    
    for w in model_checkers:
        w.start()
    
    # This process prints the variants to temporary files
    var_printer = VariantPrinter(
                            results, 
                            temporary_variant_file, 
                            head,
                            mode='chromosome',
                            verbosity=verbose
                        )
    var_printer.start()
    
    start_time_variant_parsing = datetime.now()
    
    if verbose:
        print('Start parsing the variants ... \n', file=sys.stderr)
    
    # This process parses the original vcf and create batches to put in the variant queue:
    
    chromosome_list = get_batches(
                                variant_parser, 
                                variant_queue,
                                individuals,
                                gene_trees, 
                                exon_trees, 
                                phased, 
                                vep, 
                                whole_gene, 
                                verbose
                            )
    
    # Put stop signs in the variant queue
    for i in range(num_model_checkers):
        variant_queue.put(None)
    
    variant_queue.join()
    results.put(None)
    var_printer.join()
    
    temporary_variant_file.close()
        
    if verbose:
        print('Cromosomes found in variant file: %s \n' % ','.join(chromosome_list), file=sys.stderr)
        print('Models checked!\n', file=sys.stderr)
    
    sort_variants(temp_file.name, mode='chromosome', verbose=verbose)
    
    print_headers(head, outfile, silent)
    
    print_variants(temp_file.name, outfile, mode='modified',  silent=silent)
    
    # Remove all temp files:
    os.remove(temp_file.name)
    
    if verbose:
        print('Time for whole analyis: %s' % str(datetime.now() - start_time_analysis), file=sys.stderr)
示例#3
0
def score(family_file, variant_file, family_type, annotation_dir, vep,
          plugin_file, processes, silent, outfile, verbose):
    """
    Score variants in a vcf file using Weighted Sum Model.
    The specific scores should be defined in a config file, see examples in 
    genmod/configs
    """

    frame = inspect.currentframe()
    args, _, _, values = inspect.getargvalues(frame)
    argument_list = [
        i + '=' + str(values[i]) for i in values
        if values[i] and i != 'args' and i != 'frame' and i != 'parser'
    ]

    start_time_analysis = datetime.now()

    if verbose:
        log.info('Running GENMOD score, version: %s \n' % VERSION)

    ## Start by parsing the pedigree file:
    prefered_models = []
    family_id = None

    if family_file:
        prefered_models, family_id = get_genetic_models(
            family_file, family_type)
    else:
        log.critical("Please provide a family file")
        sys.exit()

    if verbose:
        log.info('Prefered model found in family file: %s \n' %
                 prefered_models)

    if not plugin_file:
        log.critical("Please provide a plugin file")
        sys.exit()

    ######### Read to the annotation data structures #########

    gene_trees = {}
    exon_trees = {}

    # If the variants are already annotated we do not need to redo the annotation
    if not vep:
        gene_trees, exon_trees = load_annotations(annotation_dir, verbose)
    else:
        if verbose:
            log.info('Using VEP annotation')

    ## Check the variants:

    if variant_file == '-':
        variant_parser = VCFParser(fsock=sys.stdin, skip_info_check=True)
    else:
        variant_parser = VCFParser(infile=variant_file, skip_info_check=True)

    head = variant_parser.metadata

    add_metadata(head,
                 'version',
                 'genmod_score',
                 version=VERSION,
                 command_line_string=' '.join(argument_list))

    add_metadata(
        head,
        'info',
        'IndividualRankScore',
        annotation_number='.',
        entry_type='String',
        description="Individual rank score for the variant in this family. "\
        "This score is NOT corrected for compounds"
    )

    add_metadata(
        head,
        'info',
        'RankScore',
        annotation_number='.',
        entry_type='String',
        description="Combined rank score for the variant in this family. "\
        "This score is corrected for compounds"
    )

    alt_dict, score_dict, value_dict, operation_dict = check_plugin(
        plugin_file, variant_parser, verbose)

    ####################################################################
    ### The variant queue is where all jobs(in this case batches that###
    ### represents variants in a region) is put. The consumers will  ###
    ### then pick their jobs from this queue.                        ###
    ####################################################################

    variant_queue = JoinableQueue(maxsize=1000)
    # The consumers will put their results in the results queue
    results = Manager().Queue()

    num_model_scorers = processes

    if verbose:
        log.info('Number of CPU:s %s' % cpu_count(), file=sys.stderr)
        log.info('Number of model scorers: %s' % num_model_scorers,
                 file=sys.stderr)

    temp_file = NamedTemporaryFile(delete=False)
    temp_file.close()

    # We open a variant file to print the variants before sorting:
    temporary_variant_file = open(temp_file.name,
                                  mode='w',
                                  encoding='utf-8',
                                  errors='replace')

    model_scorers = [
        VariantScorer(variant_queue, results, variant_parser.header,
                      prefered_models, family_id, alt_dict, score_dict,
                      value_dict, operation_dict, verbose)
        for i in range(num_model_scorers)
    ]

    for proc in model_scorers:
        proc.start()

    # This process prints the variants to temporary files
    var_printer = VariantPrinter(results,
                                 temporary_variant_file,
                                 head,
                                 mode='score',
                                 verbosity=verbose)

    var_printer.start()

    start_time_variant_parsing = datetime.now()

    if verbose:
        log.info('Start parsing the variants ... \n')

    # get_batches put the variants in the queue and returns all chromosomes
    # found among the variants
    chromosome_list = get_batches(variant_parser,
                                  variant_queue,
                                  individuals=[],
                                  gene_trees=gene_trees,
                                  exon_trees=exon_trees,
                                  phased=False,
                                  vep=vep,
                                  whole_genes=True,
                                  verbosity=verbose)

    # Put stop signs in the variant queue
    for i in range(num_model_scorers):
        variant_queue.put(None)

    variant_queue.join()

    results.put(None)
    var_printer.join()

    temporary_variant_file.close()

    if verbose:
        log.info('Cromosomes found in variant file: %s \n' %
                 ','.join(chromosome_list))
        log.info('Variants scored!\n')

    sort_variants(infile=temp_file.name, mode='rank', verbose=verbose)

    print_headers(head, outfile, silent)

    print_variants(temp_file.name, outfile, mode='modified', silent=silent)

    os.remove(temp_file.name)

    if verbose:
        log.info('Time for whole analyis: %s' %
                 str(datetime.now() - start_time_analysis))
示例#4
0
def score(family_file, variant_file, family_type, annotation_dir, vep,
                       plugin_file, processes, silent, outfile, verbose):
    """
    Score variants in a vcf file using Weighted Sum Model.
    The specific scores should be defined in a config file, see examples in 
    genmod/configs
    """
    
    frame = inspect.currentframe()
    args, _, _, values = inspect.getargvalues(frame)
    argument_list = [i+'='+str(values[i]) for i in values if values[i] and 
                            i != 'args' and i != 'frame' and i != 'parser']
    
    start_time_analysis = datetime.now()
    
    
    if verbose:
        log.info('Running GENMOD score, version: %s \n' % VERSION)
    
    ## Start by parsing the pedigree file:
    prefered_models = []
    family_id = None
    
    if family_file:
        prefered_models, family_id = get_genetic_models(
                                                    family_file, 
                                                    family_type
                                                    )
    else:
        log.critical("Please provide a family file")
        sys.exit()
    
    if verbose:
        log.info('Prefered model found in family file: %s \n' % 
                prefered_models)
    
    if not plugin_file:
        log.critical("Please provide a plugin file")
        sys.exit()
    
    ######### Read to the annotation data structures #########
    
    gene_trees = {}
    exon_trees = {}
    
    # If the variants are already annotated we do not need to redo the annotation
    if not vep:
        gene_trees, exon_trees = load_annotations(annotation_dir, verbose)
    else:
        if verbose:
            log.info('Using VEP annotation')
    
    ## Check the variants:
    
    if variant_file == '-':
        variant_parser = VCFParser(
            fsock = sys.stdin, 
            skip_info_check=True
            )
    else:
        variant_parser = VCFParser(
            infile = variant_file, 
            skip_info_check=True
            )
    
    head = variant_parser.metadata
    
    add_metadata(
        head,
        'version',    
        'genmod_score', 
        version=VERSION, 
        command_line_string=' '.join(argument_list)
    )
    
    add_metadata(
        head,
        'info',
        'IndividualRankScore',
        annotation_number='.', 
        entry_type='String', 
        description="Individual rank score for the variant in this family. "\
        "This score is NOT corrected for compounds"
    )
    
    add_metadata(
        head,
        'info',
        'RankScore',
        annotation_number='.', 
        entry_type='String', 
        description="Combined rank score for the variant in this family. "\
        "This score is corrected for compounds"
    )
    
    alt_dict, score_dict, value_dict, operation_dict = check_plugin(
                                                            plugin_file, 
                                                            variant_parser, 
                                                            verbose
                                                            )
    
    ####################################################################
    ### The variant queue is where all jobs(in this case batches that###
    ### represents variants in a region) is put. The consumers will  ###
    ### then pick their jobs from this queue.                        ###
    ####################################################################
    
    variant_queue = JoinableQueue(maxsize=1000)
    # The consumers will put their results in the results queue
    results = Manager().Queue()
    
    
    num_model_scorers = processes
    
    if verbose:
        log.info('Number of CPU:s %s' % cpu_count(), file=sys.stderr)
        log.info('Number of model scorers: %s' % num_model_scorers, file=sys.stderr)
    
    
    temp_file = NamedTemporaryFile(delete=False)
    temp_file.close()
    
    # We open a variant file to print the variants before sorting:
    temporary_variant_file = open(
                                temp_file.name, 
                                mode='w', 
                                encoding='utf-8', 
                                errors='replace'
                                )
    
    model_scorers = [
                        VariantScorer(
                            variant_queue,
                            results,
                            variant_parser.header,
                            prefered_models,
                            family_id,
                            alt_dict, 
                            score_dict, 
                            value_dict,
                            operation_dict, 
                            verbose
                        )
                        for i in range(num_model_scorers)
                    ]
    
    
    for proc in model_scorers:
        proc.start()
    
    # This process prints the variants to temporary files
    var_printer = VariantPrinter(
                        results,
                        temporary_variant_file,
                        head,
                        mode='score',
                        verbosity=verbose
                    )
    
    var_printer.start()
    
    start_time_variant_parsing = datetime.now()
    
    if verbose:
        log.info('Start parsing the variants ... \n')
    
    # get_batches put the variants in the queue and returns all chromosomes
    # found among the variants
    chromosome_list = get_batches(
                        variant_parser, 
                        variant_queue,
                        individuals = [],
                        gene_trees = gene_trees, 
                        exon_trees = exon_trees, 
                        phased = False, 
                        vep = vep, 
                        whole_genes = True, 
                        verbosity = verbose
                    )
    
    
    # Put stop signs in the variant queue
    for i in range(num_model_scorers):
        variant_queue.put(None)
    
    variant_queue.join()
    
    results.put(None)
    var_printer.join()
    
    temporary_variant_file.close()
    
    if verbose:
        log.info('Cromosomes found in variant file: %s \n' % ','.join(chromosome_list))
        log.info('Variants scored!\n')
    
    
    sort_variants(
            infile=temp_file.name,
            mode='rank',
            verbose=verbose
    )
    
    print_headers(head, outfile, silent)
    
    print_variants(temp_file.name, outfile, mode='modified',  silent=silent)
    
    os.remove(temp_file.name)
    
    if verbose:
        log.info('Time for whole analyis: %s' % str(datetime.now() - start_time_analysis))