示例#1
0
文件: views.py 项目: cchng/mutalyzer
def bed():
    """
    Create a BED track for the given variant, listing the positions of its raw
    variants, e.g., for use in the UCSC Genome Browser.

    This basically just runs the variant checker and extracts the raw variants
    with positions.
    """
    # Backwards compatibility.
    if 'name' in request.args:
        return redirect(url_for('.bed',
                                description=request.args['name']),
                        code=301)

    description = request.args.get('description')

    if not description:
        abort(404)

    output = Output(__file__)

    variantchecker.check_variant(description, output)

    raw_variants = output.getIndexedOutput('rawVariantsChromosomal', 0)
    if not raw_variants:
        abort(404)

    # Todo: Hard-coded hg19.
    fields = {
        'name'       : 'Mutalyzer',
        'description': 'Mutalyzer track for ' + description,
        'visibility' : 'pack',
        'db'         : 'hg19',
        'url'        : url_for('.name_checker',
                               description=description,
                               _external=True),
        'color':       '255,0,0'}

    bed = ' '.join(['track'] +
                   ['%s="%s"' % field for field in fields.items()]) + '\n'

    for descr, positions in raw_variants[2]:
        bed += '\t'.join([raw_variants[0],
                          unicode(min(positions) - 1),
                          unicode(max(positions)),
                          descr,
                          '0',
                          raw_variants[1]]) + '\n'

    response = make_response(bed)
    response.headers['Content-Type'] = 'text/plain; charset=utf-8'
    return response
示例#2
0
def bed():
    """
    Create a BED track for the given variant, listing the positions of its raw
    variants, e.g., for use in the UCSC Genome Browser.

    This basically just runs the variant checker and extracts the raw variants
    with positions.
    """
    # Backwards compatibility.
    if 'name' in request.args:
        return redirect(url_for('.bed',
                                description=request.args['name']),
                        code=301)

    description = request.args.get('description')

    if not description:
        abort(404)

    output = Output(__file__)

    variantchecker.check_variant(description, output)

    raw_variants = output.getIndexedOutput('rawVariantsChromosomal', 0)
    if not raw_variants:
        abort(404)

    # Todo: Hard-coded hg19.
    fields = {
        'name'       : 'Mutalyzer',
        'description': 'Mutalyzer track for ' + description,
        'visibility' : 'pack',
        'db'         : 'hg19',
        'url'        : url_for('.name_checker',
                               description=description,
                               _external=True),
        'color':       '255,0,0'}

    bed = ' '.join(['track'] +
                   ['%s="%s"' % field for field in fields.items()]) + '\n'

    for descr, positions in raw_variants[2]:
        bed += '\t'.join([raw_variants[0],
                          unicode(min(positions) - 1),
                          unicode(max(positions)),
                          descr,
                          '0',
                          raw_variants[1]]) + '\n'

    response = make_response(bed)
    response.headers['Content-Type'] = 'text/plain; charset=utf-8'
    return response
示例#3
0
文件: views.py 项目: raux/mutalyzer
def back_translator():
    """
    Back translator.
    """
    output = Output(__file__)
    output.addMessage(
        __file__, -1, 'INFO',
        'Received Back Translate request from {}'.format(request.remote_addr))
    stats.increment_counter('back-translator/website')

    description = request.args.get('description')

    variants = []
    if description:
        variants = backtranslator.backtranslate(output, description)

    errors, warnings, summary = output.Summary()
    messages = map(util.message_info, output.getMessages())

    output.addMessage(__file__, -1, 'INFO', 'Finished Back Translate request')

    return render_template('back-translator.html',
                           errors=errors,
                           summary=summary,
                           description=description or '',
                           messages=messages,
                           variants=variants)
示例#4
0
文件: views.py 项目: raux/mutalyzer
def snp_converter():
    """
    SNP converter.

    Convert a dbSNP rs number to HGVS description(s) of the SNP specified on
    the reference sequence(s) used by dbSNP.
    """
    # Backwards compatibility.
    if 'rsId' in request.args:
        return redirect(url_for('.snp_converter', rs_id=request.args['rsId']),
                        code=301)

    rs_id = request.args.get('rs_id')

    if not rs_id:
        return render_template('snp-converter.html')

    output = Output(__file__)
    output.addMessage(
        __file__, -1, 'INFO', 'Received request snpConvert(%s) from %s' %
        (rs_id, request.remote_addr))
    stats.increment_counter('snp-converter/website')

    descriptions = ncbi.rsid_to_descriptions(rs_id, output)

    messages = map(util.message_info, output.getMessages())

    output.addMessage(__file__, -1, 'INFO',
                      'Finished request snpConvert(%s)' % rs_id)

    return render_template('snp-converter.html',
                           rs_id=rs_id,
                           descriptions=descriptions,
                           messages=messages,
                           summary=output.Summary()[2])
示例#5
0
def syntax_checker():
    """
    Parse the given variant and render the syntax checker HTML form.
    """
    # Backwards compatibility.
    if 'variant' in request.args:
        return redirect(url_for('.syntax_checker',
                                description=request.args['variant']),
                        code=301)

    description = request.args.get('description')

    if not description:
        return render_template('syntax-checker.html')

    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO',
                      'Received request syntaxCheck(%s) from %s'
                      % (description, request.remote_addr))
    stats.increment_counter('syntax-checker/website')

    grammar = Grammar(output)
    grammar.parse(description)

    parse_error = output.getOutput('parseError')
    messages = map(util.message_info, output.getMessages())

    output.addMessage(__file__, -1, 'INFO',
                      'Finished request syntaxCheck(%s)' % description)

    return render_template('syntax-checker.html',
                           description=description,
                           messages=messages,
                           parse_error=parse_error)
示例#6
0
    def _processSNP(self, batch_job, cmd, flags):
        """
        Process an entry from the SNP converter Batch, write the results
        to the job-file. If an Exception is raised, catch and continue.

        Side-effect:
            - Output written to outputfile.

        @arg cmd: The SNP converter input
        @type cmd:
        @arg i: The JobID
        @type i:
        @arg flags: Flags of the current entry
        @type flags:
        """
        O = Output(__file__)
        O.addMessage(__file__, -1, "INFO",
                     "Received SNP converter batch rs" + cmd)

        stats.increment_counter('snp-converter/batch')

        #Read out the flags
        # Todo: Do something with the flags?
        skip = self.__processFlags(O, flags)

        descriptions = []
        if not skip:
            R = Retriever.Retriever(O)
            descriptions = R.snpConvert(cmd)

        # Todo: Is output ok?
        outputline = "%s\t" % cmd
        outputline += "%s\t" % "|".join(descriptions)
        outputline += "%s\t" % "|".join(O.getBatchMessages(2))

        #Output
        filename = "%s/batch-job-%s.txt" % (settings.CACHE_DIR,
                                            batch_job.result_id)
        if not os.path.exists(filename):
            # If the file does not yet exist, create it with the correct
            # header above it. The header is read from the config file as
            # a list. We need a tab delimited string.
            header = [
                'Input Variant', 'HGVS description(s)', 'Errors and warnings'
            ]
            handle = io.open(filename, mode='a', encoding='utf-8')
            handle.write("%s\n" % "\t".join(header))
        #if
        else:
            handle = io.open(filename, mode='a', encoding='utf-8')

        if flags and 'C' in flags:
            separator = '\t'
        else:
            separator = '\n'

        handle.write("%s%s" % (outputline, separator))
        handle.close()
        O.addMessage(__file__, -1, "INFO",
                     "Finished SNP converter batch rs%s" % cmd)
示例#7
0
文件: views.py 项目: cchng/mutalyzer
def syntax_checker():
    """
    Parse the given variant and render the syntax checker HTML form.
    """
    # Backwards compatibility.
    if 'variant' in request.args:
        return redirect(url_for('.syntax_checker',
                                description=request.args['variant']),
                        code=301)

    description = request.args.get('description')

    if not description:
        return render_template('syntax-checker.html')

    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO',
                      'Received request syntaxCheck(%s) from %s'
                      % (description, request.remote_addr))
    stats.increment_counter('syntax-checker/website')

    grammar = Grammar(output)
    grammar.parse(description)

    parse_error = output.getOutput('parseError')
    messages = map(util.message_info, output.getMessages())

    output.addMessage(__file__, -1, 'INFO',
                      'Finished request syntaxCheck(%s)' % description)

    return render_template('syntax-checker.html',
                           description=description,
                           messages=messages,
                           parse_error=parse_error)
示例#8
0
def lovd_get_gs():
    """
    LOVD bypass to get the correct GeneSymbol incl Transcript variant.

    Used by LOVD to get the correct transcript variant out of a genomic
    record. LOVD uses a genomic reference (``NC_``?) in combination with a
    gene symbol to pass variant info to mutalyzer. Mutalyzer 1.0 was only
    using the first transcript. LOVD supplies the NM of the transcript needed
    but this was ignored. This helper allows LOVD to get the requested
    transcript variant from a genomic reference.

    Parameters:

    mutationName
      The mutationname without gene symbol.
    variantRecord
      The NM reference of the variant.
    forward
      If set this forwards the request to the name checker.

    Returns: Output of name checker if `forward` is set, otherwise the
    gene symbol with the variant notation as string.
    """
    mutation_name = request.args['mutationName']
    variant_record = request.args['variantRecord']
    forward = request.args.get('forward')

    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO',
                      'Received request getGS(%s, %s, %s) from %s'
                      % (mutation_name, variant_record, forward,
                         request.remote_addr))

    variantchecker.check_variant(mutation_name, output)

    output.addMessage(__file__, -1, 'INFO',
                      'Finished request getGS(%s, %s, %s)'
                      % (mutation_name, variant_record, forward))

    legends = output.getOutput('legends')

    # Filter the transcript from the legend.
    legends = [l for l in legends if '_v' in l[0]]
    for l in legends:
        if l[1] == variant_record:
            if forward:
                p, a = mutation_name.split(':')
                return redirect(url_for('.name_checker',
                                        description='%s(%s):%s' % (p, l[0], a),
                                        standalone=1))
            else:
                response = make_response(l[0])
                response.headers['Content-Type'] = 'text/plain; charset=utf-8'
                return response

    response = make_response('Transcript not found')
    response.headers['Content-Type'] = 'text/plain; charset=utf-8'
    return response
示例#9
0
    def _processSyntaxCheck(self, batch_job, cmd, flags):
        """
        Process an entry from the Syntax Check, write the results
        to the job-file.

        Side-effect:
            - Output written to outputfile

        @arg cmd:   The Syntax Checker input
        @type cmd:
        @arg i:     The JobID
        @type i:
        @arg flags: Flags of the current entry
        @type flags:
        """
        output = Output(__file__)
        grammar = Grammar(output)

        output.addMessage(__file__, -1, "INFO",
                          "Received SyntaxChecker batchvariant " + cmd)

        stats.increment_counter('syntax-checker/batch')

        skip = self.__processFlags(output, flags)
        #Process
        if not skip:
            parsetree = grammar.parse(cmd)
        else:
            parsetree = None

        if parsetree:
            result = "OK"
        else:
            result = "|".join(output.getBatchMessages(2))

        #Output
        filename = "%s/batch-job-%s.txt" % (settings.CACHE_DIR,
                                            batch_job.result_id)
        if not os.path.exists(filename):
            # If the file does not yet exist, create it with the correct
            # header above it. The header is read from the config file as
            # a list. We need a tab delimited string.
            header = ['Input', 'Status']
            handle = io.open(filename, mode='a', encoding='utf-8')
            handle.write("%s\n" % "\t".join(header))
        #if
        else:
            handle = io.open(filename, mode='a', encoding='utf-8')

        if flags and 'C' in flags:
            separator = '\t'
        else:
            separator = '\n'

        handle.write("%s\t%s%s" % (cmd, result, separator))
        handle.close()
        output.addMessage(__file__, -1, "INFO",
                          "Finished SyntaxChecker batchvariant " + cmd)
示例#10
0
def import_from_reference(assembly, reference):
    """
    Import transcript mappings from a genomic reference.

    .. todo: Also report how much was added/updated.

    .. note: Currently no exon locations are supported, this has only been
       tested on mtDNA.
    """
    chromosome = assembly.chromosomes.filter_by(name='chrM').one()

    output = Output(__file__)
    retriever = Retriever.GenBankRetriever(output)
    record = retriever.loadrecord(reference)

    if record.molType != 'm':
        raise ValueError('Only mitochondial references are supported')

    select_transcript = len(record.geneList) > 1

    for gene in record.geneList:
        # We support exactly one transcript per gene.
        try:
            transcript = sorted(gene.transcriptList, key=attrgetter('name'))[0]
        except IndexError:
            continue

        # We use gene.location for now, it is always present and the same
        # for our purposes.
        #start, stop = transcript.mRNA.location[0], transcript.mRNA.location[1]
        start, stop = gene.location

        orientation = 'reverse' if gene.orientation == -1 else 'forward'

        try:
            cds = transcript.CDS.location
        except AttributeError:
            cds = None

        mapping = TranscriptMapping.create_or_update(
            chromosome,
            'refseq',
            record.source_accession,
            gene.name,
            orientation,
            start,
            stop, [start], [stop],
            'reference',
            cds=cds,
            select_transcript=select_transcript,
            version=int(record.source_version))
        session.add(mapping)

    session.commit()
示例#11
0
    def test_getcache(self):
        """
        Running the getCache method should give us the expected number of
        cache entries.
        """
        created_since = datetime.datetime.today() - datetime.timedelta(days=14)

        output = Output(__file__)
        sync = CacheSync(output)

        r = self._call('getCache', created_since)
        assert len(r.CacheEntry) == 3
示例#12
0
    def _processSNP(self, batch_job, cmd, flags):
        """
        Process an entry from the SNP converter Batch, write the results
        to the job-file. If an Exception is raised, catch and continue.

        Side-effect:
            - Output written to outputfile.

        @arg cmd: The SNP converter input
        @type cmd:
        @arg i: The JobID
        @type i:
        @arg flags: Flags of the current entry
        @type flags:
        """
        O = Output(__file__)
        O.addMessage(__file__, -1, "INFO",
            "Received SNP converter batch rs" + cmd)

        stats.increment_counter('snp-converter/batch')

        #Read out the flags
        # Todo: Do something with the flags?
        skip = self.__processFlags(O, flags)

        descriptions = []
        if not skip :
            R = Retriever.Retriever(O)
            descriptions = R.snpConvert(cmd)

        # Todo: Is output ok?
        outputline =  "%s\t" % cmd
        outputline += "%s\t" % "|".join(descriptions)
        outputline += "%s\t" % "|".join(O.getBatchMessages(2))

        #Output
        filename = "%s/batch-job-%s.txt" % (settings.CACHE_DIR, batch_job.result_id)
        if not os.path.exists(filename) :
            # If the file does not yet exist, create it with the correct
            # header above it. The header is read from the config file as
            # a list. We need a tab delimited string.
            header = ['Input Variant',
                      'HGVS description(s)',
                      'Errors and warnings']
            handle = io.open(filename, mode='a', encoding='utf-8')
            handle.write("%s\n" % "\t".join(header))
        #if
        else :
            handle = io.open(filename, mode='a', encoding='utf-8')

        if flags and 'C' in flags:
            separator = '\t'
        else:
            separator = '\n'

        handle.write("%s%s" % (outputline, separator))
        handle.close()
        O.addMessage(__file__, -1, "INFO",
                     "Finished SNP converter batch rs%s" % cmd)
示例#13
0
文件: views.py 项目: cchng/mutalyzer
def lovd_get_gs():
    """
    LOVD bypass to get the correct GeneSymbol incl Transcript variant.

    Used by LOVD to get the correct transcript variant out of a genomic
    record. LOVD uses a genomic reference (``NC_``?) in combination with a
    gene symbol to pass variant info to mutalyzer. Mutalyzer 1.0 was only
    using the first transcript. LOVD supplies the NM of the transcript needed
    but this was ignored. This helper allows LOVD to get the requested
    transcript variant from a genomic reference.

    Parameters:

    mutationName
      The mutationname without gene symbol.
    variantRecord
      The NM reference of the variant.
    forward
      If set this forwards the request to the name checker.

    Returns: Output of name checker if `forward` is set, otherwise the
    gene symbol with the variant notation as string.
    """
    mutation_name = request.args['mutationName']
    variant_record = request.args['variantRecord']
    forward = request.args.get('forward')

    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO',
                      'Received request getGS(%s, %s, %s) from %s'
                      % (mutation_name, variant_record, forward,
                         request.remote_addr))

    variantchecker.check_variant(mutation_name, output)

    output.addMessage(__file__, -1, 'INFO',
                      'Finished request getGS(%s, %s, %s)'
                      % (mutation_name, variant_record, forward))

    legends = output.getOutput('legends')

    # Filter the transcript from the legend.
    legends = [l for l in legends if '_v' in l[0]]
    for l in legends:
        if l[1] == variant_record:
            if forward:
                p, a = mutation_name.split(':')
                return redirect(url_for('.name_checker',
                                        description='%s(%s):%s' % (p, l[0], a),
                                        standalone=1))
            else:
                response = make_response(l[0])
                response.headers['Content-Type'] = 'text/plain; charset=utf-8'
                return response

    response = make_response('Transcript not found')
    response.headers['Content-Type'] = 'text/plain; charset=utf-8'
    return response
示例#14
0
    def _processSyntaxCheck(self, batch_job, cmd, flags):
        """
        Process an entry from the Syntax Check, write the results
        to the job-file.

        Side-effect:
            - Output written to outputfile

        @arg cmd:   The Syntax Checker input
        @type cmd:
        @arg i:     The JobID
        @type i:
        @arg flags: Flags of the current entry
        @type flags:
        """
        output = Output(__file__)
        grammar = Grammar(output)

        output.addMessage(__file__, -1, "INFO",
                           "Received SyntaxChecker batchvariant " + cmd)

        stats.increment_counter('syntax-checker/batch')

        skip = self.__processFlags(output, flags)
        #Process
        if not skip :
            parsetree = grammar.parse(cmd)
        else :
            parsetree = None

        if parsetree :
            result = "OK"
        else :
            result = "|".join(output.getBatchMessages(2))

        #Output
        filename = "%s/batch-job-%s.txt" % (settings.CACHE_DIR, batch_job.result_id)
        if not os.path.exists(filename) :
            # If the file does not yet exist, create it with the correct
            # header above it. The header is read from the config file as
            # a list. We need a tab delimited string.
            header = ['Input', 'Status']
            handle = io.open(filename, mode='a', encoding='utf-8')
            handle.write("%s\n" % "\t".join(header))
        #if
        else :
            handle = io.open(filename, mode='a', encoding='utf-8')

        if flags and 'C' in flags:
            separator = '\t'
        else:
            separator = '\n'

        handle.write("%s\t%s%s" % (cmd, result, separator))
        handle.close()
        output.addMessage(__file__, -1, "INFO",
                          "Finished SyntaxChecker batchvariant " + cmd)
示例#15
0
def snp_converter():
    """
    SNP converter.

    Convert a dbSNP rs number to HGVS description(s) of the SNP specified on
    the reference sequence(s) used by dbSNP.
    """
    # Backwards compatibility.
    if 'rsId' in request.args:
        return redirect(url_for('.snp_converter',
                                rs_id=request.args['rsId']),
                        code=301)

    rs_id = request.args.get('rs_id')

    if not rs_id:
        return render_template('snp-converter.html')

    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO',
                      'Received request snpConvert(%s) from %s'
                      % (rs_id, request.remote_addr))
    stats.increment_counter('snp-converter/website')

    try:
        descriptions = ncbi.rsid_to_descriptions(rs_id)
    except ncbi.ServiceError:
        output.addMessage(__file__, 4, 'EENTREZ',
                          'An error occured while communicating with dbSNP.')

    messages = map(util.message_info, output.getMessages())

    output.addMessage(__file__, -1, 'INFO',
                      'Finished request snpConvert(%s)' % rs_id)

    return render_template('snp-converter.html',
                           rs_id=rs_id,
                           descriptions=descriptions,
                           messages=messages,
                           summary=output.Summary()[2])
示例#16
0
文件: views.py 项目: cchng/mutalyzer
def snp_converter():
    """
    SNP converter.

    Convert a dbSNP rs number to HGVS description(s) of the SNP specified on
    the reference sequence(s) used by dbSNP.
    """
    # Backwards compatibility.
    if 'rsId' in request.args:
        return redirect(url_for('.snp_converter',
                                rs_id=request.args['rsId']),
                        code=301)

    rs_id = request.args.get('rs_id')

    if not rs_id:
        return render_template('snp-converter.html')

    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO',
                      'Received request snpConvert(%s) from %s'
                      % (rs_id, request.remote_addr))
    stats.increment_counter('snp-converter/website')

    retriever = Retriever.Retriever(output)
    descriptions = retriever.snpConvert(rs_id)

    messages = map(util.message_info, output.getMessages())

    output.addMessage(__file__, -1, 'INFO',
                      'Finished request snpConvert(%s)' % rs_id)

    return render_template('snp-converter.html',
                           rs_id=rs_id,
                           descriptions=descriptions,
                           messages=messages,
                           summary=output.Summary()[2])
示例#17
0
def back_translator():
    """
    Back translator.
    """
    output = Output(__file__)
    output.addMessage(
        __file__, -1, 'INFO',
        'Received Back Translate request from {}'.format(request.remote_addr))
    stats.increment_counter('back-translator/website')

    description = request.args.get('description')

    variants = []
    if description:
        variants = backtranslator.backtranslate(output, description)

    errors, warnings, summary = output.Summary()
    messages = map(util.message_info, output.getMessages())

    output.addMessage(__file__, -1, 'INFO', 'Finished Back Translate request')

    return render_template(
        'back-translator.html', errors=errors, summary=summary,
        description=description or '', messages=messages, variants=variants)
示例#18
0
def description_extractor():
    """
    The Variant Description Extractor (experimental service).
    """
    reference_sequence = request.args.get('reference_sequence')
    variant_sequence = request.args.get('variant_sequence')

    if not (reference_sequence and variant_sequence):
        return render_template('description-extractor.html')

    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO',
                      'Received Description Extract request from %s'
                      % request.remote_addr)

    # Todo: Move this to the describe module.
    if not util.is_dna(reference_sequence):
        output.addMessage(__file__, 3, 'ENODNA',
                          'Reference sequence is not DNA.')
    if not util.is_dna(variant_sequence):
        output.addMessage(__file__, 3, 'ENODNA',
                          'Variant sequence is not DNA.')

    raw_vars = describe.describe(reference_sequence, variant_sequence)
    description = describe.alleleDescription(raw_vars)

    errors, warnings, summary = output.Summary()
    messages = map(util.message_info, output.getMessages())

    output.addMessage(__file__, -1, 'INFO',
                      'Finished Description Extract request')

    return render_template('description-extractor.html',
                           reference_sequence=reference_sequence,
                           variant_sequence=variant_sequence,
                           raw_vars=raw_vars,
                           description=description,
                           errors=errors,
                           summary=summary,
                           messages=messages)
示例#19
0
文件: fixtures.py 项目: p7k/mutalyzer
def output(settings):
    return Output('test')
示例#20
0
文件: views.py 项目: cchng/mutalyzer
def description_extractor_submit():
    """
    The Variant Description Extractor (experimental service).

    There multiple ways for the user to provide two sequences, corresponding to
    the values for the `reference_method` and `sample_method` fields, each
    requiring some additional fields to be defined:

    `raw_method`
      The reference and sample sequences are pasted into the form fields.

      - `reference_sequence`: The reference sequence.
      - `sample_sequence`: The sample sequence.

    `file_method`
      The reference and sample sequences are uploaded.

      - `reference_file`: The reference file.
      - `sample_file`: The sample file.

    `refseq_method`
      The reference and sample sequences are given by RefSeq accession numbers.

      - `reference_accession_number`: RefSeq accession number for the reference
        sequence.
      - `sample_accession_number`: RefSeq accession number for the sample
        sequence.
    """
    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO',
                      'Received Description Extract request from %s'
                      % request.remote_addr)
    stats.increment_counter('description-extractor/website')

    r = s = ''
    reference_method = request.form.get('reference_method')
    sample_method = request.form.get('sample_method')
    reference_sequence = request.form.get('reference_sequence')
    sample_sequence = request.form.get('sample_sequence')
    reference_file = request.files.get('reference_file')
    sample_file = request.files.get('sample_file')
    reference_filename = ''
    sample_filename = ''
    reference_accession_number = request.form.get('reference_accession_number')
    sample_accession_number = request.form.get('sample_accession_number')

    if reference_method == 'refseq_method':
        if reference_accession_number:
            retriever = Retriever.GenBankRetriever(output)
            genbank_record = retriever.loadrecord(reference_accession_number)
            if genbank_record:
                r = unicode(genbank_record.seq)
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'Reference accession number input fields is empty.')
    elif reference_method == 'file_method':
        if reference_file:
            reference_filename = reference_file.filename
            r = util.read_dna(reference_file)
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'No reference file provided.')
    else: # raw_method
        if reference_sequence:
            r = util.read_dna(StringIO.StringIO(reference_sequence))
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'Reference sequence number input fields is empty.')

    if sample_method == 'refseq_method':
        if sample_accession_number:
            retriever = Retriever.GenBankRetriever(output)
            genbank_record = retriever.loadrecord(sample_accession_number)
            if genbank_record:
                s = unicode(genbank_record.seq)
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'Sample accession number input fields is empty.')
    elif sample_method == 'file_method':
        if sample_file:
            sample_filename = sample_file.filename
            s = util.read_dna(sample_file)
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'No sample file provided.')
    else: # raw_method
        if sample_sequence:
            s = util.read_dna(StringIO.StringIO(sample_sequence))
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'Sample sequence number input fields is empty.')

    # Todo: Move this to the describe module.
    if not r or not util.is_dna(r):
        output.addMessage(__file__, 3, 'ENODNA',
                          'Reference sequence is not DNA.')
    if not s or not util.is_dna(s):
        output.addMessage(__file__, 3, 'ENODNA',
                          'Sample sequence is not DNA.')

    raw_vars = None
    if r and s:
        if (len(r) > settings.EXTRACTOR_MAX_INPUT_LENGTH or
            len(s) > settings.EXTRACTOR_MAX_INPUT_LENGTH):
            output.addMessage(__file__, 3, 'EMAXSIZE',
                              'Input sequences are restricted to {:,} bp.'
                              .format(settings.EXTRACTOR_MAX_INPUT_LENGTH))
        else:
            raw_vars = extractor.describe_dna(r, s)

    errors, warnings, summary = output.Summary()
    messages = map(util.message_info, output.getMessages())

    output.addMessage(__file__, -1, 'INFO',
                      'Finished Description Extract request')

    return render_template('description-extractor.html',
        extractor_max_input_length=settings.EXTRACTOR_MAX_INPUT_LENGTH,
        reference_sequence=reference_sequence or '',
        sample_sequence=sample_sequence or '',
        reference_accession_number=reference_accession_number or '',
        sample_accession_number=sample_accession_number or '',
        reference_filename=reference_filename or '',
        sample_filename=sample_filename or '',
        raw_vars=raw_vars, errors=errors, summary=summary, messages=messages,
        reference_method=reference_method, sample_method=sample_method)
示例#21
0
文件: views.py 项目: cchng/mutalyzer
def batch_jobs_submit():
    """
    Run batch jobs and render batch checker HTML form. The batch jobs are
    added to the database by the scheduler and ran by the BatchChecker
    daemon.
    """
    job_type = request.form.get('job_type')
    email = request.form.get('email')

    # Note that this is always a seekable binary file object.
    batch_file = request.files.get('file')

    assemblies = Assembly.query \
        .order_by(*Assembly.order_by_criteria) \
        .all()
    assembly_name_or_alias = request.form.get('assembly_name_or_alias',
                                              settings.DEFAULT_ASSEMBLY)

    errors = []

    if not email:
        errors.append('Please provide an email address.')

    if job_type not in BATCH_JOB_TYPES:
        errors.append('Invalid batch job type.')

    if not file:
        errors.append('Please select a local file for upload.')

    if job_type == 'position-converter':
        try:
            Assembly.by_name_or_alias(assembly_name_or_alias)
        except NoResultFound:
            errors.append('Not a valid assembly.')
        argument = assembly_name_or_alias
    else:
        argument = None

    output = Output(__file__)

    if not errors:
        stats.increment_counter('batch-job/website')

        scheduler = Scheduler.Scheduler()
        file_instance = File.File(output)
        job, columns = file_instance.parseBatchFile(batch_file)

        if job is None:
            errors.append('Could not parse input file, please check your '
                          'file format.')
        else:
            # Creates the result download URL from a job result_id.
            def create_download_url(result_id):
                return url_for('.batch_job_result',
                               result_id=result_id,
                               _external=True)

            result_id = scheduler.addJob(
                email, job, columns, job_type, argument=argument,
                create_download_url=create_download_url)

            # Todo: We now assume that the job was not scheduled if there are
            #   messages, which is probably not correct.
            if not output.getMessages():
                return redirect(url_for('.batch_job_progress',
                                        result_id=result_id))

    for error in errors:
        output.addMessage(__file__, 3, 'EBATCHJOB', error)

    messages = map(util.message_info, output.getMessages())

    return render_template('batch-jobs.html',
                           assemblies=assemblies,
                           assembly_name_or_alias=assembly_name_or_alias,
                           job_type=job_type,
                           max_file_size=settings.MAX_FILE_SIZE // 1048576,
                           messages=messages)
示例#22
0
 def setup(self):
     super(TestGrammar, self).setup()
     self.output = Output(__file__)
     self.grammar = Grammar(self.output)
示例#23
0
 def setup(self):
     super(TestConverter, self).setup()
     self.output = Output(__file__)
示例#24
0
def reference_loader_submit():
    """
    Reference sequence loader.

    There are five ways for the user to load a reference sequence,
    corresponding to values for the `method` field, each requiring some
    additional fields to be defined.:

    `method=upload_method`
      The reference sequence file is uploaded from a local file.

      - `file`: Reference sequence file to upload.

    `method=url_method`
      The reference sequence file can be found at the specified URL.

      - `url`: URL of reference sequence file to load.

    `method=slice_gene_method`
      Retrieve part of the reference genome for an HGNC gene symbol.

      - `genesymbol`: Gene symbol.
      - `organism`: Organism.
      - `upstream`: Number of 5' flanking nucleotides.
      - `downstream`: Number of 3' flanking nucleotides.

    `method=slice_accession_method`
      Retrieve a range of a chromosome by accession number.

      - `accession`: Chromosome Accession Number.
      - `accession_start`: Start position (one-based, inclusive, in reference
          orientation).
      - `accession_stop`: Stop position (one-based, inclusive, in reference
          orientation).
      - `accession_orientation`: Orientation.

    `method=slice_chromosome_method`
      Retrieve a range of a chromosome by name.

      - `assembly_name_or_alias`: Genome assembly by name or by alias.
      - `chromosome`: Chromosome name.
      - `chromosome_start`: Start position (one-based, inclusive, in reference
          orientation).
      - `chromosome_stop`: Stop position (one-based, inclusive, in reference
          orientation).
      - `chromosome_orientation`: Orientation.
    """
    method = request.form.get('method')

    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO',
                      'Received request upload(%s) with arguments %s from %s'
                      % (method, unicode(request.form), request.remote_addr))

    assemblies = Assembly.query \
        .order_by(*Assembly.order_by_criteria) \
        .all()

    retriever = Retriever.GenBankRetriever(output)
    ud, errors = '', []

    class InputException(Exception):
        pass

    def check_position(position, field):
        position = position.replace(',', '').replace('.', '').replace('-', '')
        try:
            return int(position)
        except AttributeError, ValueError:
            raise InputException('Expected an integer in field: %s' % field)
示例#25
0
class TestConverter(MutalyzerTest):
    """
    Test the Converter class.
    """
    fixtures = (database, hg19, hg19_transcript_mappings)

    def setup(self):
        super(TestConverter, self).setup()
        self.output = Output(__file__)

    def _converter(self, assembly_name_or_alias):
        """
        Create a Converter instance for a given genome assembly.
        """
        assembly = Assembly.query \
            .filter(or_(Assembly.name == assembly_name_or_alias,
                        Assembly.alias == assembly_name_or_alias)) \
            .one()
        return Converter(assembly, self.output)

    def test_converter(self):
        """
        Simple test.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_003002.2:c.274G>T')
        assert genomic == 'NC_000011.9:g.111959695G>T'
        coding = converter.chrom2c(genomic, 'list')
        assert 'NM_003002.2:c.274G>T' in coding
        # Fix for r536: disable the -u and +d convention.
        #assert 'NR_028383.1:c.1-u2173C>A' in coding
        assert 'NR_028383.1:n.-2173C>A' in coding

    def test_converter_non_coding(self):
        """
        Test with variant on non-coding transcript.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NR_028383.1:n.-2173C>A')
        assert genomic == 'NC_000011.9:g.111959695G>T'
        coding = converter.chrom2c(genomic, 'list')
        assert 'NM_003002.2:c.274G>T' in coding
        # Fix for r536: disable the -u and +d convention.
        #assert 'NR_028383.1:c.1-u2173C>A' in coding
        assert 'NR_028383.1:n.-2173C>A' in coding

    def test_converter_compound(self):
        """
        Test with compound variant.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_003002.2:c.[274G>T;278A>G]')
        assert genomic == 'NC_000011.9:g.[111959695G>T;111959699A>G]'
        coding = converter.chrom2c(genomic, 'list')
        assert 'NM_003002.2:c.[274G>T;278A>G]' in coding
        assert 'NR_028383.1:n.[-2173C>A;-2177T>C]' in coding

    def test_hla_cluster(self):
        """
        Convert to primary assembly.

        Transcript NM_000500.5 is mapped to different chromosome locations,
        but we like to just see the primary assembly mapping to chromosome 6.

        See also bug #58.
        """
        # Todo: This test is bogus now that we use a fixture that has just the
        #   mapping to chromosome 6. However, I think we only get this mapping
        #   from our current source (NCBI seq_gene.md) anyway, so I'm not sure
        #   where we got the other mappings from in the past (but haven't
        #   investigated really).
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_000500.5:c.92C>T')
        assert genomic == 'NC_000006.11:g.32006291C>T'
        coding = converter.chrom2c(genomic, 'list')
        assert 'NM_000500.5:c.92C>T' in coding

    def test_converter_del_length_reverse(self):
        """
        Position converter on deletion (denoted by length) on transcripts
        located on the reverse strand.
        """
        converter = self._converter('hg19')
        coding = converter.chrom2c(
            'NC_000022.10:g.51016285_51017117del123456789', 'list')
        # Fix for r536: disable the -u and +d convention.
        #assert 'NM_001145134.1:c.-138-u21_60del123456789' in coding
        #assert 'NR_021492.1:c.1-u5170_1-u4338del123456789' in coding
        assert 'NM_001145134.1:c.-159_60del123456789' in coding
        assert 'NR_021492.1:n.-5170_-4338del123456789' in coding

    def test_S_Venkata_Suresh_Kumar(self):
        """
        Test for correct mapping information on genes where CDS start or stop
        is exactly on the border of an exon.

        Bug reported February 24, 2012 by S Venkata Suresh Kumar.
        """
        converter = self._converter('hg19')
        coding = converter.chrom2c('NC_000001.10:g.115259837_115259837delT',
                                   'list')
        assert 'NM_001007553.1:c.3863delA' not in coding
        assert 'NM_001007553.1:c.*953delA' in coding
        assert 'NM_001130523.1:c.*953delA' in coding

    def test_S_Venkata_Suresh_Kumar_more(self):
        """
        Another test for correct mapping information on genes where CDS start
        or stop is exactly on the border of an exon.

        Bug reported March 21, 2012 by S Venkata Suresh Kumar.
        """
        converter = self._converter('hg19')
        coding = converter.chrom2c('NC_000001.10:g.160012314_160012329del16',
                                   'list')
        assert 'NM_002241.4:c.-27250-7_-27242del16' not in coding
        assert 'NM_002241.4:c.1-7_9del16' in coding

    def test_range_order_forward_correct(self):
        """
        Just a normal position converter call, both directions.  See Trac #95.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_003002.2:c.-1_274del')
        assert genomic == 'NC_000011.9:g.111957631_111959695del'
        coding = converter.chrom2c(genomic, 'list')
        assert 'NM_003002.2:c.-1_274del' in coding

    def test_range_order_forward_incorrect_c2chrom(self):
        """
        Incorrect order of a range on the forward strand. See Trac #95.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_003002.2:c.274_-1del')
        assert genomic == None
        erange = self.output.getMessagesWithErrorCode('ERANGE')
        assert len(erange) == 1

    def test_range_order_reverse_correct(self):
        """
        Just a normal position converter call on the reverse strand, both
        directions. See Trac #95.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_001162505.1:c.-1_40del')
        assert genomic == 'NC_000020.10:g.48770135_48770175del'
        coding = converter.chrom2c(genomic, 'list')
        assert 'NM_001162505.1:c.-1_40del' in coding

    def test_range_order_reverse_incorrect_c2chrom(self):
        """
        Incorrect order of a range on the reverse strand. See Trac #95.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_001162505.1:c.40_-1del')
        assert genomic == None
        erange = self.output.getMessagesWithErrorCode('ERANGE')
        assert len(erange) == 1

    def test_range_order_incorrect_chrom2c(self):
        """
        Incorrect order of a chromosomal range. See Trac #95.
        """
        converter = self._converter('hg19')
        coding = converter.chrom2c('NC_000011.9:g.111959695_111957631del',
                                   'list')
        assert coding == None
        erange = self.output.getMessagesWithErrorCode('ERANGE')
        assert len(erange) == 1

    def test_delins_large_ins_c2chrom(self):
        """
        Delins with multi-base insertion c. to chrom.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_003002.2:c.274delinsTAAA')
        assert genomic == 'NC_000011.9:g.111959695delinsTAAA'
        coding = converter.chrom2c(genomic, 'list')
        assert 'NM_003002.2:c.274delinsTAAA' in coding

    def test_delins_large_ins_explicit_c2chrom(self):
        """
        Delins with multi-base insertion and explicit deleted sequence c. to chrom.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_003002.2:c.274delGinsTAAA')
        assert genomic == 'NC_000011.9:g.111959695delinsTAAA'
        coding = converter.chrom2c(genomic, 'list')
        assert 'NM_003002.2:c.274delinsTAAA' in coding

    def test_delins_large_ins_chrom2c(self):
        """
        Delins with multi-base insertion chrom to c.
        """
        converter = self._converter('hg19')
        coding = converter.chrom2c('NC_000011.9:g.111959695delinsTAAA', 'list')
        assert 'NM_003002.2:c.274delinsTAAA' in coding

    def test_delins_large_ins_explicit_chrom2c(self):
        """
        Delins with multi-base insertion and explicit deleted sequence chrom to c.
        """
        converter = self._converter('hg19')
        coding = converter.chrom2c('NC_000011.9:g.111959695delGinsTAAA',
                                   'list')
        assert 'NM_003002.2:c.274delinsTAAA' in coding

    def test_chrm_chrom2c(self):
        """
        Mitochondrial m. to c.
        """
        converter = self._converter('hg19')
        coding = converter.chrom2c('NC_012920.1:m.12030del', 'list')
        assert 'NC_012920.1(ND4_v001):c.1271del' in coding

    def test_chrm_name_chrom2c(self):
        """
        Mitochondrial m. (by chromosome name) to c.
        """
        converter = self._converter('hg19')
        variant = converter.correctChrVariant('chrM:m.12030del')
        coding = converter.chrom2c(variant, 'list')
        assert 'NC_012920.1(ND4_v001):c.1271del' in coding

    def test_chrm_c2chrom(self):
        """
        Mitochondrial c. to m.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NC_012920.1(ND4_v001):c.1271del')
        assert genomic == 'NC_012920.1:m.12030del'

    def test_nm_without_selector_chrom2c(self):
        """
        NM reference without transcript selection c. to g.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_017780.2:c.109A>T')
        assert genomic == 'NC_000008.10:g.61654100A>T'

    def test_nm_with_selector_chrom2c(self):
        """
        NM reference with transcript selection c. to g.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_017780.2(CHD7_v001):c.109A>T')
        assert genomic == 'NC_000008.10:g.61654100A>T'

    def test_nm_c2chrom_no_selector(self):
        """
        To NM reference should never result in transcript selection.
        """
        converter = self._converter('hg19')
        variant = converter.correctChrVariant('NC_000008.10:g.61654100A>T')
        coding = converter.chrom2c(variant, 'list')
        assert 'NM_017780.2:c.109A>T' in coding

    def test_incorrect_selector_c2chrom(self):
        """
        Incorrect selector.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_017780.2(CHD8):c.109A>T')
        erange = self.output.getMessagesWithErrorCode('EACCNOTINDB')
        assert len(erange) == 1

    def test_incorrect_selector_version_c2chrom(self):
        """
        Incorrect selector version.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_017780.2(CHD7_v002):c.109A>T')
        erange = self.output.getMessagesWithErrorCode('EACCNOTINDB')
        assert len(erange) == 1

    def test_no_selector_version_c2chrom(self):
        """
        Selector but no selector version.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_017780.2(CHD7):c.109A>T')
        assert genomic == 'NC_000008.10:g.61654100A>T'

    def test_incorrect_selector_no_selector_version_c2chrom(self):
        """
        Incorrect selector, no selector version.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_017780.2(CHD8):c.109A>T')
        erange = self.output.getMessagesWithErrorCode('EACCNOTINDB')
        assert len(erange) == 1

    def test_ins_seq_chrom2c(self):
        """
        Insertion of a sequence (chrom2c).
        """
        converter = self._converter('hg19')
        coding = converter.chrom2c('NC_000011.9:g.111957482_111957483insGAT',
                                   'list')
        assert 'NM_003002.2:c.-150_-149insGAT' in coding
        assert 'NM_012459.2:c.10_11insATC' in coding

    def test_ins_seq_seq(self):
        """
        Insertion of two sequences (chrom2c).
        """
        converter = self._converter('hg19')
        coding = converter.chrom2c(
            'NC_000011.9:g.111957482_111957483ins[GAT;AAA]', 'list')
        assert 'NM_003002.2:c.-150_-149ins[GAT;AAA]' in coding
        assert 'NM_012459.2:c.10_11ins[TTT;ATC]' in coding

    def test_ins_seq_c2chrom_reverse(self):
        """
        Insertion of a sequence on reverse strand (c2chrom).
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_012459.2:c.10_11insATC')
        assert genomic == 'NC_000011.9:g.111957482_111957483insGAT'

    def test_ins_seq_seq_c2chrom_reverse(self):
        """
        Insertion of two sequences on reverse strand (c2chrom).
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_012459.2:c.10_11ins[TTT;ATC]')
        assert genomic == 'NC_000011.9:g.111957482_111957483ins[GAT;AAA]'
示例#26
0
class TestGrammar(MutalyzerTest):
    """
    Test the mytalyzer.grammar module.
    """
    def setup(self):
        super(TestGrammar, self).setup()
        self.output = Output(__file__)
        self.grammar = Grammar(self.output)

    def _parse(self, description):
        """
        Parse a variant description.
        """
        self.grammar.parse(description)
        assert self.output.getOutput('parseError') == []

    def test_some_variants(self):
        """
        Some example variants.
        """
        self._parse('NM_002001.2:c.[12del]')
        self._parse('NM_002001.2:c.[(12del)]')
        self._parse('NM_002001.2:c.[(12del)?]')
        self._parse('NM_002001.2:c.[(12del);(12del)]')
        self._parse('NM_002001.2:c.[(12del;12del)]')
        self._parse('NM_002001.2:c.[((12del)?;12del)?]')

    def test_compound_insertion(self):
        """
        Some some compound insertions.
        """
        self._parse('NM_002001.2:c.15_16insA')
        self._parse('NM_002001.2:c.15_16insATC')
        self._parse('NM_002001.2:c.15_16ins[A]')
        self._parse('NM_002001.2:c.15_16ins[ATC]')
        self._parse('NM_002001.2:c.15_16ins28_39')
        self._parse('NM_002001.2:c.15_16ins[28_39]')
        self._parse('NM_002001.2:c.15_16ins[28_39;A]')
        self._parse('NM_002001.2:c.15_16ins[28_39;ATC]')
        self._parse('NM_002001.2:c.15_16ins[28_39;A;ATC]')
        self._parse('NM_002001.2:c.15_16ins28_39inv')
        self._parse('NM_002001.2:c.15_16ins[28_39inv]')
        self._parse('NM_002001.2:c.15_16ins[28_39inv;A]')
        self._parse('NM_002001.2:c.15_16ins[28_39inv;ATC]')
        self._parse('NM_002001.2:c.15_16ins[28_39inv;A;ATC]')

    def test_compound_delins(self):
        """
        Some some compound deletion-insertions.
        """
        self._parse('NM_002001.2:c.12_17delinsA')
        self._parse('NM_002001.2:c.12_17delinsATC')
        self._parse('NM_002001.2:c.12_17delins[A]')
        self._parse('NM_002001.2:c.12_17delins[ATC]')
        self._parse('NM_002001.2:c.12_17delins28_39')
        self._parse('NM_002001.2:c.12_17delins[28_39]')
        self._parse('NM_002001.2:c.12_17delins[28_39;A]')
        self._parse('NM_002001.2:c.12_17delins[28_39;ATC]')
        self._parse('NM_002001.2:c.12_17delins[28_39;A;ATC]')
        self._parse('NM_002001.2:c.12_17delins28_39inv')
        self._parse('NM_002001.2:c.12_17delins[28_39inv]')
        self._parse('NM_002001.2:c.12_17delins[28_39inv;A]')
        self._parse('NM_002001.2:c.12_17delins[28_39inv;ATC]')
        self._parse('NM_002001.2:c.12_17delins[28_39inv;A;ATC]')

    def test_protein_variants(self):
        """
        Some protein variants.
        """
        self._parse('NG_009105.1(OPN1LW):p.=')
        self._parse('NG_009105.1(OPN1LW):p.?')
        self._parse('NM_000076.2(CDKN1C):p.0')
        self._parse('NM_000076.2(CDKN1C):p.0?')
        self._parse('NG_009105.1(OPN1LW):p.(=)')
        self._parse('NM_000076.2(CDKN1C):p.(Ala123del)')
        self._parse('NM_000076.2(CDKN1C):p.(Ala123_Leu126del)')
        self._parse('NM_000076.2(CDKN1C):p.(Ala123_Leu126delinsVal)')
        self._parse('NM_000076.2(CDKN1C):p.Ala123del')
        self._parse('NM_000076.2(CDKN1C):p.Ala123_Leu126del')
        self._parse('NM_000076.2(CDKN1C):p.Ala123_Leu126delinsVal')
        self._parse('NM_000076.2(CDKN1C):p.Ala123_*317delinsVal')
        self._parse('NM_000076.2(CDKN1C):p.Ala123_X317delinsVal')
        self._parse('NM_000076.2(CDKN1C):p.Ala123delinsVal')
        self._parse('NM_000076.2(CDKN1C):p.Ala123delinsValPro')
        self._parse('NM_000076.2(CDKN1C):p.Ala123delinsVP')
        self._parse('NM_000076.2(CDKN1C):p.Ala123fs')
        self._parse('NM_000076.2(CDKN1C_i001):p.(Glu124Serfs*148)')
        self._parse('NM_000076.2(CDKN1C_i001):p.(Glu124SerfsX148)')
        self._parse('NM_000076.2(CDKN1C_i001):p.(E124Sfs*148)')
        self._parse('NM_000076.2(CDKN1C_i001):p.(E124SfsX148)')
        self._parse('NG_009105.1(OPN1LW):p.Met1Leu')
        self._parse('NP_064445.1(OPN1LW):p.Met1?')
        self._parse('NP_064445.1(OPN1LW):p.M1?')
        self._parse('NP_064445.1:p.Gln16del')
        self._parse('NP_064445.1:p.Gln16dup')
        self._parse('NP_064445.1:p.Gln3del')
        self._parse('NP_064445.1:p.Q16del')
        self._parse('NP_064445.1:p.Q16dup')
        self._parse('NP_064445.1:p.Q16*')
        self._parse('NP_064445.1:p.Q16X')
        self._parse('NG_009105.1:p.Gln3Leu')
        self._parse('NG_009105.1(OPN1LW):p.Gln3Leu')
        self._parse('NG_009105.1(OPN1LW_i1):p.Gln3Leu')
        self._parse('NG_009105.1(OPN1LW_v1):p.Gln3Leu')
        self._parse('NG_009105.1(OPN1LW):p.Gln3_Gln4insLeu')
        self._parse('NG_009105.1(OPN1LW):p.Gln3_Gln4insGln')
        self._parse('NG_009105.1(OPN1LW):p.Gln3_Gln4dup')
        self._parse('NG_009105.1(OPN1LW):p.Q3_Q4insQ')
        self._parse('NG_009105.1(OPN1LW):p.Q3_Q4insQQ')
        self._parse('NG_009105.1(OPN1LW):p.Q3_Q4dup')
        self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7del')
        self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7delinsValLeu')
        self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7delinsValPro')
        self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7delinsGlnGlnTrpSerLeu')
        self._parse('NG_009105.1(OPN1LW):p.Q3_L7delinsGlnGlnTrpSerLeu')
        self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7delinsQQWSL')
        #self._parse('NG_009105.1(OPN1LW):p.Met1AlaextMet-1')
        #self._parse('NG_009105.1(OPN1LW):p.M1AextM-1')
        #self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7[3]')
        self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7(1_6)')
        self._parse('NG_009105.1(OPN1LW):p.Gln3Leu')
        self._parse('NG_009105.1(OPN1LW):p.Gln3Leu')
        #self._parse('NM_000076.2(CDKN1C_i001):p.(*317Trpext*3)')
        self._parse('NM_000076.2(CDKN1C_i001):p.(*317TrpextX3)')
        #self._parse('NM_000076.2(CDKN1C_i001):p.(*317Cysext*1)')
        self._parse('NM_000076.2(CDKN1C_i001):p.(*317CysextX1)')
        #self._parse('NM_000076.2(CDKN1C_i001):p.(*317Cext*1)')
        self._parse('NM_000076.2(CDKN1C_i001):p.(*317CextX1)')
        #self._parse('t(X;17)(DMD:p.Met1_Val1506; SGCA:p.Val250_*387)')

    def test_minus_in_gene_symbol(self):
        """
        Gene symbol is allowed to contain a minus character.
        """
        self._parse('UD_132464528477(KRTAP2-4_v001):c.100del')
示例#27
0
文件: views.py 项目: cchng/mutalyzer
def name_checker():
    """
    Name checker.
    """
    # For backwards compatibility with older LOVD versions, we support the
    # `mutationName` argument. If present, we redirect and add `standalone=1`.
    #
    # Also for backwards compatibility, we support the `name` argument as an
    # alias for `description`.
    if 'name' in request.args:
        return redirect(url_for('.name_checker',
                                description=request.args['name'],
                                standalone=request.args.get('standalone')),
                        code=301)
    if 'mutationName' in request.args:
        return redirect(url_for('.name_checker',
                                description=request.args['mutationName'],
                                standalone=1),
                        code=301)

    description = request.args.get('description')

    if not description:
        return render_template('name-checker.html')

    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO', 'Received variant %s from %s'
                      % (description, request.remote_addr))
    stats.increment_counter('name-checker/website')

    variantchecker.check_variant(description, output)

    errors, warnings, summary = output.Summary()
    parse_error = output.getOutput('parseError')

    record_type = output.getIndexedOutput('recordType', 0, '')
    reference = output.getIndexedOutput('reference', 0, '')
    if reference:
        if record_type == 'LRG':
            reference_filename = reference + '.xml'
        else :
            reference_filename = reference + '.gb'
    else:
        reference_filename = None

    genomic_dna = output.getIndexedOutput('molType', 0) != 'n'
    genomic_description = output.getIndexedOutput('genomicDescription', 0, '')

    # Create a link to the UCSC Genome Browser.
    browser_link = None
    raw_variants = output.getIndexedOutput('rawVariantsChromosomal', 0)
    if raw_variants:
        positions = [pos
                     for descr, (first, last) in raw_variants[2]
                     for pos in (first, last)]
        bed_url = url_for('.bed', description=description, _external=True)
        browser_link = ('http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&'
                        'position={chromosome}:{start}-{stop}&hgt.customText='
                        '{bed_file}'.format(chromosome=raw_variants[0],
                                            start=min(positions) - 10,
                                            stop=max(positions) + 10,
                                            bed_file=urllib.quote(bed_url)))

    # Experimental description extractor.
    if (output.getIndexedOutput('original', 0) and
        output.getIndexedOutput('mutated', 0)):
        allele = extractor.describe_dna(output.getIndexedOutput('original', 0),
                                        output.getIndexedOutput('mutated', 0))
        extracted = '(skipped)'
        if allele:
            extracted = unicode(allele)

    else:
        extracted = ''

    # Todo: Generate the fancy HTML views for the proteins here instead of in
    #   `mutalyzer.variantchecker`.
    arguments = {
        'description'         : description,
        'messages'            : map(util.message_info, output.getMessages()),
        'summary'             : summary,
        'parse_error'         : parse_error,
        'errors'              : errors,
        'genomicDescription'  : genomic_description,
        'chromDescription'    : output.getIndexedOutput(
                                  'genomicChromDescription', 0),
        'genomicDNA'          : genomic_dna,
        'visualisation'       : output.getOutput('visualisation'),
        'descriptions'        : output.getOutput('descriptions'),
        'protDescriptions'    : output.getOutput('protDescriptions'),
        'oldProtein'          : output.getOutput('oldProteinFancy'),
        'altStart'            : output.getIndexedOutput('altStart', 0),
        'altProtein'          : output.getOutput('altProteinFancy'),
        'newProtein'          : output.getOutput('newProteinFancy'),
        'transcriptInfo'      : output.getIndexedOutput('hasTranscriptInfo',
                                                        0, False),
        'transcriptCoding'    : output.getIndexedOutput('transcriptCoding', 0,
                                                        False),
        'exonInfo'            : output.getOutput('exonInfo'),
        'cdsStart_g'          : output.getIndexedOutput('cdsStart_g', 0),
        'cdsStart_c'          : output.getIndexedOutput('cdsStart_c', 0),
        'cdsStop_g'           : output.getIndexedOutput('cdsStop_g', 0),
        'cdsStop_c'           : output.getIndexedOutput('cdsStop_c', 0),
        'restrictionSites'    : output.getOutput('restrictionSites'),
        'legends'             : output.getOutput('legends'),
        'reference_filename'  : reference_filename,  # Todo: Download link is not shown...
        'browserLink'         : browser_link,
        'extractedDescription': extracted,
        'standalone'          : bool(request.args.get('standalone'))
    }

    output.addMessage(__file__, -1, 'INFO',
                      'Finished variant %s' % description)

    return render_template('name-checker.html', **arguments)
示例#28
0
 def setup(self):
     super(TestMutator, self).setup()
     self.output = Output(__file__)
示例#29
0
class TestConverter(MutalyzerTest):
    """
    Test the Converter class.
    """
    fixtures = (database, hg19, hg19_transcript_mappings)

    def setup(self):
        super(TestConverter, self).setup()
        self.output = Output(__file__)

    def _converter(self, assembly_name_or_alias):
        """
        Create a Converter instance for a given genome assembly.
        """
        assembly = Assembly.query \
            .filter(or_(Assembly.name == assembly_name_or_alias,
                        Assembly.alias == assembly_name_or_alias)) \
            .one()
        return Converter(assembly, self.output)

    def test_converter(self):
        """
        Simple test.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_003002.2:c.274G>T')
        assert genomic == 'NC_000011.9:g.111959695G>T'
        coding = converter.chrom2c(genomic, 'list')
        assert 'NM_003002.2:c.274G>T' in coding
        # Fix for r536: disable the -u and +d convention.
        #assert 'NR_028383.1:c.1-u2173C>A' in coding
        assert 'NR_028383.1:n.-2173C>A' in coding

    def test_converter_non_coding(self):
        """
        Test with variant on non-coding transcript.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NR_028383.1:n.-2173C>A')
        assert genomic == 'NC_000011.9:g.111959695G>T'
        coding = converter.chrom2c(genomic, 'list')
        assert 'NM_003002.2:c.274G>T' in coding
        # Fix for r536: disable the -u and +d convention.
        #assert 'NR_028383.1:c.1-u2173C>A' in coding
        assert 'NR_028383.1:n.-2173C>A' in coding

    def test_converter_compound(self):
        """
        Test with compound variant.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_003002.2:c.[274G>T;278A>G]')
        assert genomic == 'NC_000011.9:g.[111959695G>T;111959699A>G]'
        coding = converter.chrom2c(genomic, 'list')
        assert 'NM_003002.2:c.[274G>T;278A>G]' in coding
        assert 'NR_028383.1:n.[-2173C>A;-2177T>C]' in coding

    def test_hla_cluster(self):
        """
        Convert to primary assembly.

        Transcript NM_000500.5 is mapped to different chromosome locations,
        but we like to just see the primary assembly mapping to chromosome 6.

        See also bug #58.
        """
        # Todo: This test is bogus now that we use a fixture that has just the
        #   mapping to chromosome 6. However, I think we only get this mapping
        #   from our current source (NCBI seq_gene.md) anyway, so I'm not sure
        #   where we got the other mappings from in the past (but haven't
        #   investigated really).
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_000500.5:c.92C>T')
        assert genomic == 'NC_000006.11:g.32006291C>T'
        coding = converter.chrom2c(genomic, 'list')
        assert 'NM_000500.5:c.92C>T' in coding

    def test_converter_del_length_reverse(self):
        """
        Position converter on deletion (denoted by length) on transcripts
        located on the reverse strand.
        """
        converter = self._converter('hg19')
        coding = converter.chrom2c('NC_000022.10:g.51016285_51017117del123456789', 'list')
        # Fix for r536: disable the -u and +d convention.
        #assert 'NM_001145134.1:c.-138-u21_60del123456789' in coding
        #assert 'NR_021492.1:c.1-u5170_1-u4338del123456789' in coding
        assert 'NM_001145134.1:c.-159_60del123456789' in coding
        assert 'NR_021492.1:n.-5170_-4338del123456789' in coding

    def test_S_Venkata_Suresh_Kumar(self):
        """
        Test for correct mapping information on genes where CDS start or stop
        is exactly on the border of an exon.

        Bug reported February 24, 2012 by S Venkata Suresh Kumar.
        """
        converter = self._converter('hg19')
        coding = converter.chrom2c('NC_000001.10:g.115259837_115259837delT', 'list')
        assert 'NM_001007553.1:c.3863delA' not in coding
        assert 'NM_001007553.1:c.*953delA' in coding
        assert 'NM_001130523.1:c.*953delA' in coding

    def test_S_Venkata_Suresh_Kumar_more(self):
        """
        Another test for correct mapping information on genes where CDS start
        or stop is exactly on the border of an exon.

        Bug reported March 21, 2012 by S Venkata Suresh Kumar.
        """
        converter = self._converter('hg19')
        coding = converter.chrom2c('NC_000001.10:g.160012314_160012329del16', 'list')
        assert 'NM_002241.4:c.-27250-7_-27242del16' not in coding
        assert 'NM_002241.4:c.1-7_9del16' in coding

    def test_range_order_forward_correct(self):
        """
        Just a normal position converter call, both directions.  See Trac #95.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_003002.2:c.-1_274del')
        assert genomic == 'NC_000011.9:g.111957631_111959695del'
        coding = converter.chrom2c(genomic, 'list')
        assert 'NM_003002.2:c.-1_274del' in coding

    def test_range_order_forward_incorrect_c2chrom(self):
        """
        Incorrect order of a range on the forward strand. See Trac #95.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_003002.2:c.274_-1del')
        assert genomic == None
        erange = self.output.getMessagesWithErrorCode('ERANGE')
        assert len(erange) == 1

    def test_range_order_reverse_correct(self):
        """
        Just a normal position converter call on the reverse strand, both
        directions. See Trac #95.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_001162505.1:c.-1_40del')
        assert genomic == 'NC_000020.10:g.48770135_48770175del'
        coding = converter.chrom2c(genomic, 'list')
        assert 'NM_001162505.1:c.-1_40del' in coding

    def test_range_order_reverse_incorrect_c2chrom(self):
        """
        Incorrect order of a range on the reverse strand. See Trac #95.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_001162505.1:c.40_-1del')
        assert genomic == None
        erange = self.output.getMessagesWithErrorCode('ERANGE')
        assert len(erange) == 1

    def test_range_order_incorrect_chrom2c(self):
        """
        Incorrect order of a chromosomal range. See Trac #95.
        """
        converter = self._converter('hg19')
        coding = converter.chrom2c('NC_000011.9:g.111959695_111957631del', 'list')
        assert coding == None
        erange = self.output.getMessagesWithErrorCode('ERANGE')
        assert len(erange) == 1

    def test_delins_large_ins_c2chrom(self):
        """
        Delins with multi-base insertion c. to chrom.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_003002.2:c.274delinsTAAA')
        assert genomic == 'NC_000011.9:g.111959695delinsTAAA'
        coding = converter.chrom2c(genomic, 'list')
        assert 'NM_003002.2:c.274delinsTAAA' in coding

    def test_delins_large_ins_explicit_c2chrom(self):
        """
        Delins with multi-base insertion and explicit deleted sequence c. to chrom.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_003002.2:c.274delGinsTAAA')
        assert genomic == 'NC_000011.9:g.111959695delinsTAAA'
        coding = converter.chrom2c(genomic, 'list')
        assert 'NM_003002.2:c.274delinsTAAA' in coding

    def test_delins_large_ins_chrom2c(self):
        """
        Delins with multi-base insertion chrom to c.
        """
        converter = self._converter('hg19')
        coding = converter.chrom2c('NC_000011.9:g.111959695delinsTAAA', 'list')
        assert 'NM_003002.2:c.274delinsTAAA' in coding

    def test_delins_large_ins_explicit_chrom2c(self):
        """
        Delins with multi-base insertion and explicit deleted sequence chrom to c.
        """
        converter = self._converter('hg19')
        coding = converter.chrom2c('NC_000011.9:g.111959695delGinsTAAA', 'list')
        assert 'NM_003002.2:c.274delinsTAAA' in coding

    def test_chrm_chrom2c(self):
        """
        Mitochondrial m. to c.
        """
        converter = self._converter('hg19')
        coding = converter.chrom2c('NC_012920.1:m.12030del', 'list')
        assert 'NC_012920.1(ND4_v001):c.1271del' in coding

    def test_chrm_name_chrom2c(self):
        """
        Mitochondrial m. (by chromosome name) to c.
        """
        converter = self._converter('hg19')
        variant = converter.correctChrVariant('chrM:m.12030del')
        coding = converter.chrom2c(variant, 'list')
        assert 'NC_012920.1(ND4_v001):c.1271del' in coding

    def test_chrm_c2chrom(self):
        """
        Mitochondrial c. to m.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NC_012920.1(ND4_v001):c.1271del')
        assert genomic == 'NC_012920.1:m.12030del'

    def test_nm_without_selector_chrom2c(self):
        """
        NM reference without transcript selection c. to g.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_017780.2:c.109A>T')
        assert genomic == 'NC_000008.10:g.61654100A>T'

    def test_nm_with_selector_chrom2c(self):
        """
        NM reference with transcript selection c. to g.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_017780.2(CHD7_v001):c.109A>T')
        assert genomic == 'NC_000008.10:g.61654100A>T'

    def test_nm_c2chrom_no_selector(self):
        """
        To NM reference should never result in transcript selection.
        """
        converter = self._converter('hg19')
        variant = converter.correctChrVariant('NC_000008.10:g.61654100A>T')
        coding = converter.chrom2c(variant, 'list')
        assert 'NM_017780.2:c.109A>T' in coding

    def test_incorrect_selector_c2chrom(self):
        """
        Incorrect selector.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_017780.2(CHD8):c.109A>T')
        erange = self.output.getMessagesWithErrorCode('EACCNOTINDB')
        assert len(erange) == 1

    def test_incorrect_selector_version_c2chrom(self):
        """
        Incorrect selector version.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_017780.2(CHD7_v002):c.109A>T')
        erange = self.output.getMessagesWithErrorCode('EACCNOTINDB')
        assert len(erange) == 1

    def test_no_selector_version_c2chrom(self):
        """
        Selector but no selector version.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_017780.2(CHD7):c.109A>T')
        assert genomic == 'NC_000008.10:g.61654100A>T'

    def test_incorrect_selector_no_selector_version_c2chrom(self):
        """
        Incorrect selector, no selector version.
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_017780.2(CHD8):c.109A>T')
        erange = self.output.getMessagesWithErrorCode('EACCNOTINDB')
        assert len(erange) == 1

    def test_ins_seq_chrom2c(self):
        """
        Insertion of a sequence (chrom2c).
        """
        converter = self._converter('hg19')
        coding = converter.chrom2c('NC_000011.9:g.111957482_111957483insGAT', 'list')
        assert 'NM_003002.2:c.-150_-149insGAT' in coding
        assert 'NM_012459.2:c.10_11insATC' in coding

    def test_ins_seq_seq(self):
        """
        Insertion of two sequences (chrom2c).
        """
        converter = self._converter('hg19')
        coding = converter.chrom2c('NC_000011.9:g.111957482_111957483ins[GAT;AAA]', 'list')
        assert 'NM_003002.2:c.-150_-149ins[GAT;AAA]' in coding
        assert 'NM_012459.2:c.10_11ins[TTT;ATC]' in coding

    def test_ins_seq_c2chrom_reverse(self):
        """
        Insertion of a sequence on reverse strand (c2chrom).
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_012459.2:c.10_11insATC')
        assert genomic == 'NC_000011.9:g.111957482_111957483insGAT'

    def test_ins_seq_seq_c2chrom_reverse(self):
        """
        Insertion of two sequences on reverse strand (c2chrom).
        """
        converter = self._converter('hg19')
        genomic = converter.c2chrom('NM_012459.2:c.10_11ins[TTT;ATC]')
        assert genomic == 'NC_000011.9:g.111957482_111957483ins[GAT;AAA]'
示例#30
0
    def _processConversion(self, batch_job, cmd, flags):
        """
        Process an entry from the Position Converter, write the results
        to the job-file. The Position Converter is wrapped in a try except
        block which ensures that he Batch Process keeps running. Errors
        are caught and the user will be notified.

        Side-effect:
            - Output written to outputfile.

        @arg cmd: The Syntax Checker input
        @type cmd: unicode
        @arg i: The JobID
        @type i: integer
        @arg build: The build to use for the converter
        @type build: unicode
        @arg flags: Flags of the current entry
        @type flags:
        """
        O = Output(__file__)
        variant = cmd
        variants = None
        gName = ""
        cNames = [""]

        O.addMessage(__file__, -1, "INFO",
            "Received PositionConverter batchvariant " + cmd)

        stats.increment_counter('position-converter/batch')

        skip = self.__processFlags(O, flags)
        if not skip :
            try :
                #process
                try:
                    assembly = Assembly.by_name_or_alias(batch_job.argument)
                except NoResultFound:
                    O.addMessage(__file__, 3, 'ENOASSEMBLY',
                                 'Not a valid assembly: ' + batch_job.argument)
                    raise

                converter = Converter(assembly, O)

                #Also accept chr accNo
                variant = converter.correctChrVariant(variant)

                #TODO: Parse the variant and check for c or g. This is ugly
                if not(":c." in variant or ":n." in variant or ":g." in variant) :
                    #Bad name
                    grammar = Grammar(O)
                    grammar.parse(variant)
                #if

                if ":c." in variant or ":n." in variant :
                    # Do the c2chrom dance
                    variant = converter.c2chrom(variant)
                    # NOTE:
                    # If we received a coding reference convert that to the
                    # genomic position variant. Use that variant as the input
                    # of the chrom2c.

                # If the input is a genomic variant or if we converted a
                # coding variant to a genomic variant we try to find all
                # other affected coding variants.
                if variant and ":g." in variant :
                    # Do the chrom2c dance
                    variants = converter.chrom2c(variant, "dict")
                    if variants :
                        gName = variant
                        # Due to the cyclic behavior of the Position Converter
                        # we know for a fact that if a correct chrom name is
                        # generated by the converter.c2chrom that we will at
                        # least find one variant with chrom2c. Collect the
                        # variants from a nested lists and store them.
                        cNames = [cName for cName2 in variants.values() \
                                for cName in cName2]
            except Exception:
                #Catch all exceptions related to the processing of cmd
                O.addMessage(__file__, 4, "EBATCHU",
                        "Unexpected error occurred, dev-team notified")
            #except
        #if

        error = "%s" % "|".join(O.getBatchMessages(2))

        #Output
        filename = "%s/batch-job-%s.txt" % (settings.CACHE_DIR, batch_job.result_id)
        if not os.path.exists(filename) :
            # If the file does not yet exist, create it with the correct
            # header above it. The header is read from the config file as
            # a list. We need a tab delimited string.
            header = ['Input Variant',
                      'Errors',
                      'Chromosomal Variant',
                      'Coding Variant(s)']
            handle = io.open(filename, mode='a', encoding='utf-8')
            handle.write("%s\n" % "\t".join(header))
        #if
        else :
            handle = io.open(filename, mode='a', encoding='utf-8')

        if flags and 'C' in flags:
            separator = '\t'
        else:
            separator = '\n'

        handle.write("%s\t%s\t%s\t%s%s" % (cmd, error, gName, "\t".join(cNames), separator))
        handle.close()
        O.addMessage(__file__, -1, "INFO",
            "Finisehd PositionConverter batchvariant " + cmd)
示例#31
0
 def setup(self):
     super(TestConverter, self).setup()
     self.output = Output(__file__)
示例#32
0
def batch_jobs_submit():
    """
    Run batch jobs and render batch checker HTML form. The batch jobs are
    added to the database by the scheduler and ran by the BatchChecker
    daemon.
    """
    job_type = request.form.get('job_type')
    email = request.form.get('email')

    # Note that this is always a seekable binary file object.
    batch_file = request.files.get('file')

    assemblies = Assembly.query \
        .order_by(*Assembly.order_by_criteria) \
        .all()
    assembly_name_or_alias = request.form.get('assembly_name_or_alias',
                                              settings.DEFAULT_ASSEMBLY)

    errors = []

    if not email:
        email = '{}@website.mutalyzer'.format(request.remote_addr)

    if job_type not in BATCH_JOB_TYPES:
        errors.append('Invalid batch job type.')

    if not file:
        errors.append('Please select a local file for upload.')

    if job_type == 'position-converter':
        try:
            Assembly.by_name_or_alias(assembly_name_or_alias)
        except NoResultFound:
            errors.append('Not a valid assembly.')
        argument = assembly_name_or_alias
    else:
        argument = None

    output = Output(__file__)

    if not errors:
        stats.increment_counter('batch-job/website')

        scheduler = Scheduler.Scheduler()
        file_instance = File.File(output)
        job, columns = file_instance.parseBatchFile(batch_file)

        if job is None:
            errors.append('Could not parse input file, please check your '
                          'file format.')
        else:
            result_id = scheduler.addJob(email, job, columns, job_type,
                                         argument=argument)

            # Todo: We now assume that the job was not scheduled if there are
            #   messages, which is probably not correct.
            if not output.getMessages():
                return redirect(url_for('.batch_job_progress',
                                        result_id=result_id))

    for error in errors:
        output.addMessage(__file__, 3, 'EBATCHJOB', error)

    messages = map(util.message_info, output.getMessages())

    return render_template('batch-jobs.html',
                           assemblies=assemblies,
                           assembly_name_or_alias=assembly_name_or_alias,
                           job_type=job_type,
                           max_file_size=settings.MAX_FILE_SIZE // 1048576,
                           messages=messages)
示例#33
0
def lovd_variant_info():
    """
    The chromosomal to coding and vice versa conversion interface for LOVD.

    Search for an NM number in the database, if the version number matches,
    get the start and end positions in a variant and translate these positions
    to chromosomal notation if the variant is in coding notation and vice
    versa.

    - If no end position is present, the start position is assumed to be the
      end position.
    - If the version number is not found in the database, an error message is
      generated and a suggestion for an other version is given.
    - If the reference sequence is not found at all, an error is returned.
    - If no variant is present, the transcription start and end and CDS end
      in coding notation is returned.
    - If the variant is not accepted by the nomenclature parser, a parse error
      will be printed.

    Get variant info and return the result as plain text.

    Parameters:

    LOVD_ver
      The version of the calling LOVD.
    build
      The human genome build (hg19 assumed).
    acc
      The accession number (NM number).
    var
      A description of the variant.

    Returns:

    start_main
      The main coordinate of the start position in I{c.} (non-star) notation.
    start_offset
      The offset coordinate of the start position in I{c.} notation (intronic
      position).
    end_main
      The main coordinate of the end position in I{c.} (non-star) notation.
    end_offset
      The offset coordinate of the end position in I{c.} notation (intronic
      position).
    start_g
      The I{g.} notation of the start position.
    end_g
      The I{g.} notation of the end position.
    type
      The mutation type.

    Returns (alternative):

    trans_start
      Transcription start in I{c.} notation.
    trans_stop
      Transcription stop in I{c.} notation.
    CDS_stop
      CDS stop in I{c.} notation.
    """
    lovd_version = request.args['LOVD_ver']
    build = request.args['build']
    accession = request.args['acc']
    description = request.args.get('var')

    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO',
                      'Received request variantInfo(%s:%s (LOVD_ver %s, '
                      'build %s)) from %s'
                      % (accession, description, lovd_version, build,
                         request.remote_addr))

    try:
        assembly = Assembly.by_name_or_alias(build)
    except NoResultFound:
        response = make_response('invalid build')
        response.headers['Content-Type'] = 'text/plain; charset=utf-8'
        return response

    converter = Converter(assembly, output)

    result = ''

    # If no variant is given, return transcription start, transcription
    # end and CDS stop in c. notation.
    if description:
        ret = converter.mainMapping(accession, description)
    else:
        ret = converter.giveInfo(accession)
        if ret:
            result = '%i\n%i\n%i' % ret

    if not result and not getattr(ret, 'startmain', None):
        out = output.getOutput('LOVDERR')
        if out:
            result = out[0]
        else:
            result = 'Unknown error occured'

    output.addMessage(__file__, -1, 'INFO',
                      'Finished request variantInfo(%s:%s (LOVD_ver %s, '
                      'build %s))'
                      % (accession, description, lovd_version, build))

    if not result and getattr(ret, 'startmain', None):
        result = '%i\n%i\n%i\n%i\n%i\n%i\n%s' % (
            ret.startmain, ret.startoffset, ret.endmain, ret.endoffset,
            ret.start_g, ret.end_g, ret.mutationType)

    # Todo: Obsoleted error messages, remove soon.
    if lovd_version == '2.0-23':
        response = re.sub('^Error \(.*\):', 'Error:', result)

    response = make_response(result)
    response.headers['Content-Type'] = 'text/plain; charset=utf-8'
    return response
示例#34
0
    def _processConversion(self, batch_job, cmd, flags):
        """
        Process an entry from the Position Converter, write the results
        to the job-file. The Position Converter is wrapped in a try except
        block which ensures that he Batch Process keeps running. Errors
        are caught and the user will be notified.

        Side-effect:
            - Output written to outputfile.

        @arg cmd: The Syntax Checker input
        @type cmd: unicode
        @arg i: The JobID
        @type i: integer
        @arg build: The build to use for the converter
        @type build: unicode
        @arg flags: Flags of the current entry
        @type flags:
        """
        O = Output(__file__)
        variant = cmd
        variants = None
        gName = ""
        cNames = [""]

        O.addMessage(__file__, -1, "INFO",
                     "Received PositionConverter batchvariant " + cmd)

        stats.increment_counter('position-converter/batch')

        skip = self.__processFlags(O, flags)
        if not skip:
            try:
                #process
                try:
                    assembly = Assembly.by_name_or_alias(batch_job.argument)
                except NoResultFound:
                    O.addMessage(__file__, 3, 'ENOASSEMBLY',
                                 'Not a valid assembly: ' + batch_job.argument)
                    raise

                converter = Converter(assembly, O)

                #Also accept chr accNo
                variant = converter.correctChrVariant(variant)

                #TODO: Parse the variant and check for c or g. This is ugly
                if not (":c." in variant or ":n." in variant
                        or ":g." in variant):
                    #Bad name
                    grammar = Grammar(O)
                    grammar.parse(variant)
                #if

                if ":c." in variant or ":n." in variant:
                    # Do the c2chrom dance
                    variant = converter.c2chrom(variant)
                    # NOTE:
                    # If we received a coding reference convert that to the
                    # genomic position variant. Use that variant as the input
                    # of the chrom2c.

                # If the input is a genomic variant or if we converted a
                # coding variant to a genomic variant we try to find all
                # other affected coding variants.
                if variant and ":g." in variant:
                    # Do the chrom2c dance
                    variants = converter.chrom2c(variant, "dict")
                    if variants:
                        gName = variant
                        # Due to the cyclic behavior of the Position Converter
                        # we know for a fact that if a correct chrom name is
                        # generated by the converter.c2chrom that we will at
                        # least find one variant with chrom2c. Collect the
                        # variants from a nested lists and store them.
                        cNames = [cName for cName2 in variants.values() \
                                for cName in cName2]
            except Exception:
                #Catch all exceptions related to the processing of cmd
                O.addMessage(__file__, 4, "EBATCHU",
                             "Unexpected error occurred, dev-team notified")
            #except
        #if

        error = "%s" % "|".join(O.getBatchMessages(2))

        #Output
        filename = "%s/batch-job-%s.txt" % (settings.CACHE_DIR,
                                            batch_job.result_id)
        if not os.path.exists(filename):
            # If the file does not yet exist, create it with the correct
            # header above it. The header is read from the config file as
            # a list. We need a tab delimited string.
            header = [
                'Input Variant', 'Errors', 'Chromosomal Variant',
                'Coding Variant(s)'
            ]
            handle = io.open(filename, mode='a', encoding='utf-8')
            handle.write("%s\n" % "\t".join(header))
        #if
        else:
            handle = io.open(filename, mode='a', encoding='utf-8')

        if flags and 'C' in flags:
            separator = '\t'
        else:
            separator = '\n'

        handle.write("%s\t%s\t%s\t%s%s" %
                     (cmd, error, gName, "\t".join(cNames), separator))
        handle.close()
        O.addMessage(__file__, -1, "INFO",
                     "Finisehd PositionConverter batchvariant " + cmd)
示例#35
0
def name_checker():
    """
    Name checker.
    """
    # For backwards compatibility with older LOVD versions, we support the
    # `mutationName` argument. If present, we redirect and add `standalone=1`.
    #
    # Also for backwards compatibility, we support the `name` argument as an
    # alias for `description`.
    if 'name' in request.args:
        return redirect(url_for('.name_checker',
                                description=request.args['name'],
                                standalone=request.args.get('standalone')),
                        code=301)
    if 'mutationName' in request.args:
        return redirect(url_for('.name_checker',
                                description=request.args['mutationName'],
                                standalone=1),
                        code=301)

    description = request.args.get('description')

    if not description:
        return render_template('name-checker.html')

    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO', 'Received variant %s from %s'
                      % (description, request.remote_addr))
    stats.increment_counter('name-checker/website')

    variantchecker.check_variant(description, output)

    errors, warnings, summary = output.Summary()
    parse_error = output.getOutput('parseError')

    record_type = output.getIndexedOutput('recordType', 0, '')
    reference = output.getIndexedOutput('reference', 0, '')
    if reference:
        if record_type == 'LRG':
            reference_filename = reference + '.xml'
        else :
            reference_filename = reference + '.gb'
    else:
        reference_filename = None

    genomic_dna = output.getIndexedOutput('molType', 0) != 'n'
    genomic_description = output.getIndexedOutput('genomicDescription', 0, '')

    # Create a link to the UCSC Genome Browser.
    browser_link = None
    raw_variants = output.getIndexedOutput('rawVariantsChromosomal', 0)
    if raw_variants:
        positions = [pos
                     for descr, (first, last) in raw_variants[2]
                     for pos in (first, last)]
        bed_url = url_for('.bed', description=description, _external=True)
        browser_link = ('http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&'
                        'position={chromosome}:{start}-{stop}&hgt.customText='
                        '{bed_file}'.format(chromosome=raw_variants[0],
                                            start=min(positions) - 10,
                                            stop=max(positions) + 10,
                                            bed_file=urllib.quote(bed_url)))

    # Experimental description extractor.
    if (output.getIndexedOutput('original', 0) and
        output.getIndexedOutput('mutated', 0)):
        allele = extractor.describe_dna(output.getIndexedOutput('original', 0),
                                        output.getIndexedOutput('mutated', 0))
        extracted = '(skipped)'
        if allele:
            extracted = unicode(allele)

    else:
        extracted = ''

    # Todo: Generate the fancy HTML views for the proteins here instead of in
    #   `mutalyzer.variantchecker`.
    arguments = {
        'description'         : description,
        'messages'            : map(util.message_info, output.getMessages()),
        'summary'             : summary,
        'parse_error'         : parse_error,
        'errors'              : errors,
        'genomicDescription'  : genomic_description,
        'chromDescription'    : output.getIndexedOutput(
                                  'genomicChromDescription', 0),
        'genomicDNA'          : genomic_dna,
        'visualisation'       : output.getOutput('visualisation'),
        'descriptions'        : output.getOutput('descriptions'),
        'protDescriptions'    : output.getOutput('protDescriptions'),
        'oldProtein'          : output.getOutput('oldProteinFancy'),
        'altStart'            : output.getIndexedOutput('altStart', 0),
        'altProtein'          : output.getOutput('altProteinFancy'),
        'newProtein'          : output.getOutput('newProteinFancy'),
        'transcriptInfo'      : output.getIndexedOutput('hasTranscriptInfo',
                                                        0, False),
        'transcriptCoding'    : output.getIndexedOutput('transcriptCoding', 0,
                                                        False),
        'exonInfo'            : output.getOutput('exonInfo'),
        'cdsStart_g'          : output.getIndexedOutput('cdsStart_g', 0),
        'cdsStart_c'          : output.getIndexedOutput('cdsStart_c', 0),
        'cdsStop_g'           : output.getIndexedOutput('cdsStop_g', 0),
        'cdsStop_c'           : output.getIndexedOutput('cdsStop_c', 0),
        'restrictionSites'    : output.getOutput('restrictionSites'),
        'legends'             : output.getOutput('legends'),
        'reference_filename'  : reference_filename,  # Todo: Download link is not shown...
        'browserLink'         : browser_link,
        'extractedDescription': extracted,
        'standalone'          : bool(request.args.get('standalone'))
    }

    output.addMessage(__file__, -1, 'INFO',
                      'Finished variant %s' % description)

    return render_template('name-checker.html', **arguments)
示例#36
0
文件: views.py 项目: cchng/mutalyzer
def reference_loader_submit():
    """
    Reference sequence loader.

    There are five ways for the user to load a reference sequence,
    corresponding to values for the `method` field, each requiring some
    additional fields to be defined.:

    `method=upload_method`
      The reference sequence file is uploaded from a local file.

      - `file`: Reference sequence file to upload.

    `method=url_method`
      The reference sequence file can be found at the specified URL.

      - `url`: URL of reference sequence file to load.

    `method=slice_gene_method`
      Retrieve part of the reference genome for an HGNC gene symbol.

      - `genesymbol`: Gene symbol.
      - `organism`: Organism.
      - `upstream`: Number of 5' flanking nucleotides.
      - `downstream`: Number of 3' flanking nucleotides.

    `method=slice_accession_method`
      Retrieve a range of a chromosome by accession number.

      - `accession`: Chromosome Accession Number.
      - `accession_start`: Start position.
      - `accession_stop`: Stop position.
      - `accession_orientation`: Orientation.

    `method=slice_chromosome_method`
      Retrieve a range of a chromosome by name.

      - `assembly_name_or_alias`: Genome assembly by name or by alias.
      - `chromosome`: Chromosome name.
      - `chromosome_start`: Start position.
      - `chromosome_stop`: Stop position.
      - `chromosome_orientation`: Orientation.
    """
    method = request.form.get('method')

    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO',
                      'Received request upload(%s) with arguments %s from %s'
                      % (method, unicode(request.form), request.remote_addr))

    assemblies = Assembly.query \
        .order_by(*Assembly.order_by_criteria) \
        .all()

    retriever = Retriever.GenBankRetriever(output)
    ud, errors = '', []

    class InputException(Exception):
        pass

    def check_position(position, field):
        position = position.replace(',', '').replace('.', '').replace('-', '')
        try:
            return int(position)
        except AttributeError, ValueError:
            raise InputException('Expected an integer in field: %s' % field)
示例#37
0
def position_converter():
    """
    Position converter.
    """
    # Backwards compatibility.
    if 'variant' in request.args:
        return redirect(url_for('.position_converter',
                                description=request.args['variant']),
                        code=301)

    assemblies = Assembly.query \
        .order_by(*Assembly.order_by_criteria) \
        .all()

    assembly_name_or_alias = request.args.get('assembly_name_or_alias',
                                              settings.DEFAULT_ASSEMBLY)
    description = request.args.get('description')

    if not description:
        return render_template('position-converter.html',
                               assemblies=assemblies,
                               assembly_name_or_alias=assembly_name_or_alias)

    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO',
                      'Received request positionConverter(%s, %s) from %s'
                      % (assembly_name_or_alias, description,
                         request.remote_addr))
    stats.increment_counter('position-converter/website')

    chromosomal_description = None
    transcript_descriptions = None

    try:
        assembly = Assembly.by_name_or_alias(assembly_name_or_alias)
    except NoResultFound:
        output.addMessage(__file__, 3, 'ENOASSEMBLY',
                          'Not a valid assembly.')
    else:
        converter = Converter(assembly, output)

        # Convert chromosome name to accession number.
        corrected_description = converter.correctChrVariant(description)

        if corrected_description:
            # Now we're ready to actually do position conversion.
            if not(':c.' in corrected_description or
                   ':n.' in corrected_description or
                   ':g.' in corrected_description or
                   ':m.' in corrected_description):
                grammar = Grammar(output)
                grammar.parse(corrected_description)

            if (':c.' in corrected_description or
                ':n.' in corrected_description):
                corrected_description = converter.c2chrom(
                        corrected_description)

            chromosomal_description = corrected_description

            if corrected_description and (':g.' in corrected_description or
                                          ':m.' in corrected_description):
                descriptions = converter.chrom2c(corrected_description, 'dict')
                if descriptions is None:
                    chromosomal_description = None
                elif descriptions:
                    transcript_descriptions = [
                        '%-10s:\t%s' % (key[:10], '\n\t\t'.join(value))
                        for key, value in descriptions.items()]

    messages = map(util.message_info, output.getMessages())

    output.addMessage(__file__, -1, 'INFO',
                      'Finished request positionConverter(%s, %s)'
                      % (assembly_name_or_alias, description))

    return render_template('position-converter.html',
                           assemblies=assemblies,
                           assembly_name_or_alias=assembly_name_or_alias,
                           description=description,
                           chromosomal_description=chromosomal_description,
                           transcript_descriptions=transcript_descriptions,
                           messages=messages)
示例#38
0
    def _processNameBatch(self, batch_job, cmd, flags):
        """
        Process an entry from the Name Batch, write the results
        to the job-file. If an Exception is raised, catch and continue.

        Side-effect:
            - Output written to outputfile.

        @arg cmd: The NameChecker input
        @type cmd:
        @arg i: The JobID
        @type i:
        @arg flags: Flags of the current entry
        @type flags:
        """
        O = Output(__file__)
        O.addMessage(__file__, -1, "INFO",
            "Received NameChecker batchvariant " + cmd)

        stats.increment_counter('name-checker/batch')

        #Read out the flags
        skip = self.__processFlags(O, flags)

        if not skip :
            #Run mutalyzer and get values from Output Object 'O'
            try :
                variantchecker.check_variant(cmd, O)
            except Exception:
                #Catch all exceptions related to the processing of cmd
                O.addMessage(__file__, 4, "EBATCHU",
                        "Unexpected error occurred, dev-team notified")
                import traceback
                O.addMessage(__file__, 4, "DEBUG", unicode(repr(traceback.format_exc())))
            #except
            finally :
                #check if we need to update the database
                self._updateDbFlags(O, batch_job.id)
        #if

        batchOutput = O.getOutput("batchDone")

        outputline =  "%s\t" % cmd
        outputline += "%s\t" % "|".join(O.getBatchMessages(2))

        if batchOutput :
            outputline += batchOutput[0]

        #Output
        filename = "%s/batch-job-%s.txt" % (settings.CACHE_DIR, batch_job.result_id)
        if not os.path.exists(filename) :
            # If the file does not yet exist, create it with the correct
            # header above it. The header is read from the config file as
            # a list. We need a tab delimited string.
            header = ['Input',
                      'Errors and warnings',
                      'AccNo',
                      'Genesymbol',
                      'Variant',
                      'Reference Sequence Start Descr.',
                      'Coding DNA Descr.',
                      'Protein Descr.',
                      'GeneSymbol Coding DNA Descr.',
                      'GeneSymbol Protein Descr.',
                      'Genomic Reference',
                      'Coding Reference',
                      'Protein Reference',
                      'Affected Transcripts',
                      'Affected Proteins',
                      'Restriction Sites Created',
                      'Restriction Sites Deleted']
            handle = io.open(filename, mode='a', encoding='utf-8')
            handle.write("%s\n" % "\t".join(header))
        #if
        else :
            handle = io.open(filename, mode='a', encoding='utf-8')

        if flags and 'C' in flags:
            separator = '\t'
        else:
            separator = '\n'

        handle.write("%s%s" % (outputline, separator))
        handle.close()
        O.addMessage(__file__, -1, "INFO",
            "Finished NameChecker batchvariant " + cmd)
示例#39
0
def description_extractor_submit():
    """
    The Variant Description Extractor (experimental service).

    There multiple ways for the user to provide two sequences, corresponding to
    the values for the `reference_method` and `sample_method` fields, each
    requiring some additional fields to be defined:

    `raw_method`
      The reference and sample sequences are pasted into the form fields.

      - `reference_sequence`: The reference sequence.
      - `sample_sequence`: The sample sequence.

    `file_method`
      The reference and sample sequences are uploaded.

      - `reference_file`: The reference file.
      - `sample_file`: The sample file.

    `refseq_method`
      The reference and sample sequences are given by RefSeq accession numbers.

      - `reference_accession_number`: RefSeq accession number for the reference
        sequence.
      - `sample_accession_number`: RefSeq accession number for the sample
        sequence.
    """
    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO',
                      'Received Description Extract request from %s'
                      % request.remote_addr)
    stats.increment_counter('description-extractor/website')

    r = s = ''
    reference_method = request.form.get('reference_method')
    sample_method = request.form.get('sample_method')
    reference_sequence = request.form.get('reference_sequence')
    sample_sequence = request.form.get('sample_sequence')
    reference_file = request.files.get('reference_file')
    sample_file = request.files.get('sample_file')
    reference_filename = ''
    sample_filename = ''
    reference_accession_number = request.form.get('reference_accession_number')
    sample_accession_number = request.form.get('sample_accession_number')

    if reference_method == 'refseq_method':
        if reference_accession_number:
            retriever = Retriever.GenBankRetriever(output)
            genbank_record = retriever.loadrecord(reference_accession_number)
            if genbank_record:
                r = unicode(genbank_record.seq)
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'Reference accession number input fields is empty.')
    elif reference_method == 'file_method':
        if reference_file:
            reference_filename = reference_file.filename
            r = util.read_dna(reference_file)
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'No reference file provided.')
    else: # raw_method
        if reference_sequence:
            r = util.read_dna(StringIO.StringIO(reference_sequence))
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'Reference sequence number input fields is empty.')

    if sample_method == 'refseq_method':
        if sample_accession_number:
            retriever = Retriever.GenBankRetriever(output)
            genbank_record = retriever.loadrecord(sample_accession_number)
            if genbank_record:
                s = unicode(genbank_record.seq)
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'Sample accession number input fields is empty.')
    elif sample_method == 'file_method':
        if sample_file:
            sample_filename = sample_file.filename
            s = util.read_dna(sample_file)
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'No sample file provided.')
    else: # raw_method
        if sample_sequence:
            s = util.read_dna(StringIO.StringIO(sample_sequence))
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'Sample sequence number input fields is empty.')

    # Todo: Move this to the describe module.
    if not r or not util.is_dna(r):
        output.addMessage(__file__, 3, 'ENODNA',
                          'Reference sequence is not DNA.')
    if not s or not util.is_dna(s):
        output.addMessage(__file__, 3, 'ENODNA',
                          'Sample sequence is not DNA.')

    raw_vars = None
    if r and s:
        if (len(r) > settings.EXTRACTOR_MAX_INPUT_LENGTH or
            len(s) > settings.EXTRACTOR_MAX_INPUT_LENGTH):
            output.addMessage(__file__, 3, 'EMAXSIZE',
                              'Input sequences are restricted to {:,} bp.'
                              .format(settings.EXTRACTOR_MAX_INPUT_LENGTH))
        else:
            raw_vars = extractor.describe_dna(r, s)

    errors, warnings, summary = output.Summary()
    messages = map(util.message_info, output.getMessages())

    output.addMessage(__file__, -1, 'INFO',
                      'Finished Description Extract request')

    return render_template('description-extractor.html',
        extractor_max_input_length=settings.EXTRACTOR_MAX_INPUT_LENGTH,
        reference_sequence=reference_sequence or '',
        sample_sequence=sample_sequence or '',
        reference_accession_number=reference_accession_number or '',
        sample_accession_number=sample_accession_number or '',
        reference_filename=reference_filename or '',
        sample_filename=sample_filename or '',
        raw_vars=raw_vars, errors=errors, summary=summary, messages=messages,
        reference_method=reference_method, sample_method=sample_method)
示例#40
0
文件: views.py 项目: cchng/mutalyzer
def position_converter():
    """
    Position converter.
    """
    # Backwards compatibility.
    if 'variant' in request.args:
        return redirect(url_for('.position_converter',
                                description=request.args['variant']),
                        code=301)

    assemblies = Assembly.query \
        .order_by(*Assembly.order_by_criteria) \
        .all()

    assembly_name_or_alias = request.args.get('assembly_name_or_alias',
                                              settings.DEFAULT_ASSEMBLY)
    description = request.args.get('description')

    if not description:
        return render_template('position-converter.html',
                               assemblies=assemblies,
                               assembly_name_or_alias=assembly_name_or_alias)

    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO',
                      'Received request positionConverter(%s, %s) from %s'
                      % (assembly_name_or_alias, description,
                         request.remote_addr))
    stats.increment_counter('position-converter/website')

    chromosomal_description = None
    transcript_descriptions = None

    try:
        assembly = Assembly.by_name_or_alias(assembly_name_or_alias)
    except NoResultFound:
        output.addMessage(__file__, 3, 'ENOASSEMBLY',
                          'Not a valid assembly.')
    else:
        converter = Converter(assembly, output)

        # Convert chromosome name to accession number.
        corrected_description = converter.correctChrVariant(description)

        if corrected_description:
            # Now we're ready to actually do position conversion.
            if not(':c.' in corrected_description or
                   ':n.' in corrected_description or
                   ':g.' in corrected_description or
                   ':m.' in corrected_description):
                grammar = Grammar(output)
                grammar.parse(corrected_description)

            if (':c.' in corrected_description or
                ':n.' in corrected_description):
                corrected_description = converter.c2chrom(
                        corrected_description)

            chromosomal_description = corrected_description

            if corrected_description and (':g.' in corrected_description or
                                          ':m.' in corrected_description):
                descriptions = converter.chrom2c(corrected_description, 'dict')
                if descriptions is None:
                    chromosomal_description = None
                elif descriptions:
                    transcript_descriptions = [
                        '%-10s:\t%s' % (key[:10], '\n\t\t'.join(value))
                        for key, value in descriptions.items()]

    messages = map(util.message_info, output.getMessages())

    output.addMessage(__file__, -1, 'INFO',
                      'Finished request positionConverter(%s, %s)'
                      % (assembly_name_or_alias, description))

    return render_template('position-converter.html',
                           assemblies=assemblies,
                           assembly_name_or_alias=assembly_name_or_alias,
                           description=description,
                           chromosomal_description=chromosomal_description,
                           transcript_descriptions=transcript_descriptions,
                           messages=messages)
示例#41
0
文件: views.py 项目: cchng/mutalyzer
def lovd_variant_info():
    """
    The chromosomal to coding and vice versa conversion interface for LOVD.

    Search for an NM number in the database, if the version number matches,
    get the start and end positions in a variant and translate these positions
    to chromosomal notation if the variant is in coding notation and vice
    versa.

    - If no end position is present, the start position is assumed to be the
      end position.
    - If the version number is not found in the database, an error message is
      generated and a suggestion for an other version is given.
    - If the reference sequence is not found at all, an error is returned.
    - If no variant is present, the transcription start and end and CDS end
      in coding notation is returned.
    - If the variant is not accepted by the nomenclature parser, a parse error
      will be printed.

    Get variant info and return the result as plain text.

    Parameters:

    LOVD_ver
      The version of the calling LOVD.
    build
      The human genome build (hg19 assumed).
    acc
      The accession number (NM number).
    var
      A description of the variant.

    Returns:

    start_main
      The main coordinate of the start position in I{c.} (non-star) notation.
    start_offset
      The offset coordinate of the start position in I{c.} notation (intronic
      position).
    end_main
      The main coordinate of the end position in I{c.} (non-star) notation.
    end_offset
      The offset coordinate of the end position in I{c.} notation (intronic
      position).
    start_g
      The I{g.} notation of the start position.
    end_g
      The I{g.} notation of the end position.
    type
      The mutation type.

    Returns (alternative):

    trans_start
      Transcription start in I{c.} notation.
    trans_stop
      Transcription stop in I{c.} notation.
    CDS_stop
      CDS stop in I{c.} notation.
    """
    lovd_version = request.args['LOVD_ver']
    build = request.args['build']
    accession = request.args['acc']
    description = request.args.get('var')

    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO',
                      'Received request variantInfo(%s:%s (LOVD_ver %s, '
                      'build %s)) from %s'
                      % (accession, description, lovd_version, build,
                         request.remote_addr))

    try:
        assembly = Assembly.by_name_or_alias(build)
    except NoResultFound:
        response = make_response('invalid build')
        response.headers['Content-Type'] = 'text/plain; charset=utf-8'
        return response

    converter = Converter(assembly, output)

    result = ''

    # If no variant is given, return transcription start, transcription
    # end and CDS stop in c. notation.
    if description:
        ret = converter.mainMapping(accession, description)
    else:
        ret = converter.giveInfo(accession)
        if ret:
            result = '%i\n%i\n%i' % ret

    if not result and not getattr(ret, 'startmain', None):
        out = output.getOutput('LOVDERR')
        if out:
            result = out[0]
        else:
            result = 'Unknown error occured'

    output.addMessage(__file__, -1, 'INFO',
                      'Finished request variantInfo(%s:%s (LOVD_ver %s, '
                      'build %s))'
                      % (accession, description, lovd_version, build))

    if not result and getattr(ret, 'startmain', None):
        result = '%i\n%i\n%i\n%i\n%i\n%i\n%s' % (
            ret.startmain, ret.startoffset, ret.endmain, ret.endoffset,
            ret.start_g, ret.end_g, ret.mutationType)

    # Todo: Obsoleted error messages, remove soon.
    if lovd_version == '2.0-23':
        response = re.sub('^Error \(.*\):', 'Error:', result)

    response = make_response(result)
    response.headers['Content-Type'] = 'text/plain; charset=utf-8'
    return response
示例#42
0
    def _processNameBatch(self, batch_job, cmd, flags):
        """
        Process an entry from the Name Batch, write the results
        to the job-file. If an Exception is raised, catch and continue.

        Side-effect:
            - Output written to outputfile.

        @arg cmd: The NameChecker input
        @type cmd:
        @arg i: The JobID
        @type i:
        @arg flags: Flags of the current entry
        @type flags:
        """
        O = Output(__file__)
        O.addMessage(__file__, -1, "INFO",
                     "Received NameChecker batchvariant " + cmd)

        stats.increment_counter('name-checker/batch')

        #Read out the flags
        skip = self.__processFlags(O, flags)

        if not skip:
            #Run mutalyzer and get values from Output Object 'O'
            try:
                variantchecker.check_variant(cmd, O)
            except Exception:
                #Catch all exceptions related to the processing of cmd
                O.addMessage(__file__, 4, "EBATCHU",
                             "Unexpected error occurred, dev-team notified")
                import traceback
                O.addMessage(__file__, 4, "DEBUG",
                             unicode(repr(traceback.format_exc())))
            #except
            finally:
                #check if we need to update the database
                self._updateDbFlags(O, batch_job.id)
        #if

        batchOutput = O.getOutput("batchDone")

        outputline = "%s\t" % cmd
        outputline += "%s\t" % "|".join(O.getBatchMessages(2))

        if batchOutput:
            outputline += batchOutput[0]

        #Output
        filename = "%s/batch-job-%s.txt" % (settings.CACHE_DIR,
                                            batch_job.result_id)
        if not os.path.exists(filename):
            # If the file does not yet exist, create it with the correct
            # header above it. The header is read from the config file as
            # a list. We need a tab delimited string.
            header = [
                'Input', 'Errors and warnings', 'AccNo', 'Genesymbol',
                'Variant', 'Reference Sequence Start Descr.',
                'Coding DNA Descr.', 'Protein Descr.',
                'GeneSymbol Coding DNA Descr.', 'GeneSymbol Protein Descr.',
                'Genomic Reference', 'Coding Reference', 'Protein Reference',
                'Affected Transcripts', 'Affected Proteins',
                'Restriction Sites Created', 'Restriction Sites Deleted'
            ]
            handle = io.open(filename, mode='a', encoding='utf-8')
            handle.write("%s\n" % "\t".join(header))
        #if
        else:
            handle = io.open(filename, mode='a', encoding='utf-8')

        if flags and 'C' in flags:
            separator = '\t'
        else:
            separator = '\n'

        handle.write("%s%s" % (outputline, separator))
        handle.close()
        O.addMessage(__file__, -1, "INFO",
                     "Finished NameChecker batchvariant " + cmd)