Exemplo n.º 1
0
    def _get_blast_hits(self, blast_db, seqs):
        """ blast each seq in seqs against blast_db and retain good hits
        """
        max_evalue = self.Params["Max E value"]
        min_percent_identity = self.Params["Min percent identity"]
        seq_ids = [s[0] for s in seqs]
        result = {}

        blast_result = blast_seqs(
            seqs, Blastall, blast_db=blast_db, params={"-p": "blastn", "-n": "T"}, add_seq_names=False
        )

        if blast_result["StdOut"]:
            lines = [x for x in blast_result["StdOut"]]
            blast_result = BlastResult(lines)
        else:
            return {}.fromkeys(seq_ids, [])

        for seq_id in seq_ids:
            blast_result_id = seq_id.split()[0]
            try:
                result[seq_id] = [
                    (e["SUBJECT ID"], float(e["E-VALUE"]))
                    for e in blast_result[blast_result_id][0]
                    if (float(e["E-VALUE"]) <= max_evalue and float(e["% IDENTITY"]) >= min_percent_identity)
                ]
            except KeyError:
                result[seq_id] = []

        return result
Exemplo n.º 2
0
    def _get_blast_hits(self,blast_db,seqs):
        """ blast each seq in seqs against blast_db and retain good hits
        """
        max_evalue = self.Params['Max E value']
        min_percent_identity = self.Params['Min percent identity']
        if min_percent_identity < 1.0:
            min_percent_identity *= 100.0
        seq_ids = [s[0] for s in seqs]
        result = {}

        blast_result = blast_seqs(\
         seqs,Blastall,blast_db=blast_db,\
         params={'-p':'blastn','-n':'T'},\
         add_seq_names=False)

        if blast_result['StdOut']:
            lines = [x for x in blast_result['StdOut']]
            blast_result = BlastResult(lines)
        else:
            return {}.fromkeys(seq_ids,[])

        for seq_id in seq_ids:
            blast_result_id = seq_id.split()[0]
            try:
                result[seq_id] = [(e['SUBJECT ID'],float(e['E-VALUE'])) \
                 for e in blast_result[blast_result_id][0]
                 if (float(e['E-VALUE']) <= max_evalue and \
                  float(e['% IDENTITY']) >= min_percent_identity)]
            except KeyError:
                result[seq_id] = []

        return result
Exemplo n.º 3
0
    def _get_blast_hits(self, blast_db, seqs):
        """ blast each seq in seqs against blast_db and retain good hits
        """
        max_evalue = self.Params['Max E value']
        min_percent_identity = self.Params['Min percent identity']
        if min_percent_identity < 1.0:
            min_percent_identity *= 100.0
        seq_ids = [s[0] for s in seqs]
        result = {}

        blast_result = blast_seqs(\
         seqs,Blastall,blast_db=blast_db,\
         params={'-p':'blastn','-n':'T'},\
         add_seq_names=False)

        if blast_result['StdOut']:
            lines = [x for x in blast_result['StdOut']]
            blast_result = BlastResult(lines)
        else:
            return {}.fromkeys(seq_ids, [])

        for seq_id in seq_ids:
            blast_result_id = seq_id.split()[0]
            try:
                result[seq_id] = [(e['SUBJECT ID'],float(e['E-VALUE'])) \
                 for e in blast_result[blast_result_id][0]
                 if (float(e['E-VALUE']) <= max_evalue and \
                  float(e['% IDENTITY']) >= min_percent_identity)]
            except KeyError:
                result[seq_id] = []

        return result
Exemplo n.º 4
0
def get_blast_hits(seqs,
                   blast_db,
                   max_e_value=1e-10,
                   min_pct_identity=0.75,
                   min_aligned_percent=0.50):
    """ blast each seq in seqs against blast_db and retain good hits
    """
    max_evalue = max_e_value
    min_percent_identity = min_pct_identity
    seq_ids = [s[0] for s in seqs]
    result = {}

    blast_result = blast_seqs(\
     seqs,Blastall,blast_db=blast_db,\
     params={'-p':'blastn','-n':'F'},\
     add_seq_names=False)

    if blast_result['StdOut']:
        lines = [x for x in blast_result['StdOut']]
        blast_result = BlastResult(lines)
    else:
        return {}.fromkeys(seq_ids, [])

    for seq_id, seq in seqs:
        blast_result_id = seq_id.split()[0]
        min_alignment_length = len(seq) * min_aligned_percent
        result[seq_id] = []
        if blast_result_id in blast_result:
            for e in blast_result[blast_result_id][0]:
                if (float(e['E-VALUE']) <= max_evalue and\
                    float(e['% IDENTITY']) / 100. >= min_percent_identity and\
                    int(e['ALIGNMENT LENGTH']) >= min_alignment_length):
                    result[seq_id].append(e)

    return result
Exemplo n.º 5
0
def get_blast_hits(seqs, blast_db, max_e_value=1e-10, min_pct_identity=0.75, min_aligned_percent=0.50):
    """ blast each seq in seqs against blast_db and retain good hits
    """
    max_evalue = max_e_value
    min_percent_identity = min_pct_identity
    seq_ids = [s[0] for s in seqs]
    result = {}

    blast_result = blast_seqs(
        seqs, Blastall, blast_db=blast_db, params={"-p": "blastn", "-n": "F"}, add_seq_names=False
    )

    if blast_result["StdOut"]:
        lines = [x for x in blast_result["StdOut"]]
        blast_result = BlastResult(lines)
    else:
        return {}.fromkeys(seq_ids, [])

    for seq_id, seq in seqs:
        blast_result_id = seq_id.split()[0]
        min_alignment_length = len(seq) * min_aligned_percent
        result[seq_id] = []
        if blast_result_id in blast_result:
            for e in blast_result[blast_result_id][0]:
                if (
                    float(e["E-VALUE"]) <= max_evalue
                    and float(e["% IDENTITY"]) / 100.0 >= min_percent_identity
                    and int(e["ALIGNMENT LENGTH"]) >= min_alignment_length
                ):
                    result[seq_id].append(e)

    return result
Exemplo n.º 6
0
def blast_genome(seqs,
                 blast_db,
                 e_value,
                 max_hits,
                 word_size,
                 working_dir,
                 blast_mat_root,
                 extra_params=[],
                 DEBUG=True):
    """Blast sequences against all genes in a genome

    seqs -- input sequences as strings
    blast_db -- path to blast database
    e_value -- e_value (float)
    max_hits -- maximum sequences detected by BLAST  to show
    word_size -- word size for initial BLAST screen.
    blast_mat_root -- location of BLAST matrix files
    extra_params -- additional paramters to pass to BLAST
    DEBUG -- display verbose debugging outout
    """

    # set up params to use with blastp or
    params = {
        # matrix
        "-M": "BLOSUM62",

        # max procs
        "-a": "1",

        # expectation
        "-e": e_value,

        # max seqs to show
        "-b": max_hits,

        # Word size
        "-W": word_size,

        # max one line descriptions
        "-v": max_hits,

        # tabular output
        "-m": "9",

        # program
        "-p": "blastn"
    }
    params.update(extra_params)

    output = blast_seqs(seqs,
                        Blastall,
                        blast_db=blast_db,
                        params=params,
                        WorkingDir=working_dir,
                        add_seq_names=False,
                        blast_mat_root=blast_mat_root)

    raw_output = [x for x in output['StdOut']]
    return raw_output
Exemplo n.º 7
0
def blast_genome(seqs,blast_db,e_value,max_hits, word_size, working_dir,\
                  blast_mat_root, extra_params=[],DEBUG=True):
    """Blast sequences against all genes in a genome
    
    seqs -- input sequences as strings
    blast_db -- path to blast database
    e_value -- e_value (float)
    max_hits -- maximum sequences detected by BLAST  to show
    word_size -- word size for initial BLAST screen.
    blast_mat_root -- location of BLAST matrix files
    extra_params -- additional paramters to pass to BLAST
    DEBUG -- display verbose debugging outout
    """
    
    # set up params to use with blastp or 
    params = {
        # matrix
        "-M":"BLOSUM62",

        # max procs
        "-a":"1",

        # expectation
        "-e":e_value,

        # max seqs to show
        "-b":max_hits,
        
        # Word size
        "-W":word_size,

        # max one line descriptions
        "-v":max_hits,
        
        #tabular output
        "-m":"9",

        # program
        "-p":"blastn"
    }
    params.update(extra_params)

    output=blast_seqs(seqs,\
                Blastall,\
                blast_db=blast_db,\
                params=params,\
                WorkingDir=working_dir,\
                add_seq_names=False,\
                blast_mat_root=blast_mat_root)
    
    raw_output=[x for x in output['StdOut']]
    return raw_output