def embl_gb_fasta(raw, ds, path=None):
        
        pattern =  r"(?:>.+\n^(?:^[^>]+?)(?=\n\n|>|LOCUS|ID))|(?:(?:LOCUS|ID)(?:(?:.|\n)+?)^//)"
        
        result_list = []

        rawseqs = _re.findall(pattern, _textwrap.dedent(raw + "\n\n"), flags=_re.MULTILINE)
               
        for rawseq in rawseqs:
            format_ = None
            handle = _io.StringIO(rawseq)
            if "circular" in rawseq.splitlines()[0]:
                circular = True
            else:
                circular = False
            try:
                parsed = _SeqIO.read(handle, "embl", alphabet=_IUPACAmbiguousDNA())
            except ValueError:
                handle.seek(0)
                try:
                    parsed = _SeqIO.read(handle, "genbank", alphabet=_IUPACAmbiguousDNA())
                    handle.seek(0)
                    parser = _RecordParser()
                    residue_type = parser.parse(handle).residue_type
                    if "circular" in residue_type :
                        circular = True
                    else:
                        try:
                            if parsed.annotations["topology"] == "circular":
                                circular = True
                            else:
                                circular = False
                        except KeyError:
                            circular = False
                except ValueError:
                    handle.seek(0)
                    try:
                        parsed = _SeqIO.read(handle, "fasta", alphabet=_IUPACAmbiguousDNA())
                    except ValueError:
                        parsed = ""
                    else: format_= "fasta"
                else: format_= "genbank"
            else: format_ = "embl"
            handle.close()
            if parsed:
                from copy import deepcopy as _deepcopy  ## TODO: clean up !
                from pydna.seqfeature import SeqFeature as _SeqFeature
                nfs = [_SeqFeature() for f in parsed.features]
                for f, nf in zip(parsed.features, nfs):
                    nf.__dict__ =_deepcopy(f.__dict__)
                parsed.features = nfs
                if ds and path:
                    result_list.append( _GenbankFile.from_SeqRecord(parsed, linear=not circular, circular=circular, path=path) )
                elif ds:
                    result_list.append ( _Dseqrecord.from_SeqRecord(parsed, linear=not circular, circular=circular) )
                else:
                    result_list.append(  parsed )

        return result_list
Exemplo n.º 2
0
    def embl_gb_fasta(raw, ds, path=None):

        pattern = (r"(?:>.+\n^(?:^[^>]+?)(?=\n\n|>|"
                   r"LOCUS|ID))|(?:(?:LOCUS|ID)(?:(?:.|\n)+?)^//)")

        result_list = []

        rawseqs = _re.findall(pattern,
                              _textwrap.dedent(raw + "\n\n"),
                              flags=_re.MULTILINE)

        for rawseq in rawseqs:
            handle = _io.StringIO(rawseq)
            circular = False
            try:
                parsed = _SeqIO.read(handle, "embl")
            except ValueError:
                handle.seek(0)
                try:
                    parsed = _SeqIO.read(handle, "genbank")
                    if "circular" in str(
                            parsed.annotations.get("topology")).lower():
                        circular = True
                except ValueError:
                    handle.seek(0)
                    try:
                        parsed = _SeqIO.read(handle, "fasta")
                    except ValueError:
                        parsed = ""
            handle.close()
            if ("circular" in rawseq.splitlines()[0].lower().split()
                ):  # hack to pick up topology from malformed files
                circular = True
            if parsed:
                from copy import deepcopy as _deepcopy  # TODO: clean up !
                from pydna.seqfeature import SeqFeature as _SeqFeature

                nfs = [_SeqFeature() for f in parsed.features]
                for f, nf in zip(parsed.features, nfs):
                    nf.__dict__ = _deepcopy(f.__dict__)
                parsed.features = nfs
                if ds and path:
                    result_list.append(
                        _GenbankFile.from_SeqRecord(parsed,
                                                    linear=not circular,
                                                    circular=circular,
                                                    path=path))
                elif ds:
                    result_list.append(
                        _Dseqrecord.from_SeqRecord(parsed,
                                                   linear=not circular,
                                                   circular=circular))
                else:
                    result_list.append(parsed)

        return result_list