def align_sequences(query, reference, o):

    nuc_matrix = parasail.matrix_create("ACGT", 2, -1)

    result_trace = parasail.nw_trace_striped_sat(query, reference, 3, 2,
                                                 nuc_matrix)
    traceback = result_trace.get_traceback('|', '.', ' ')
    print(traceback.ref)
    print(traceback.comp)
    print(traceback.query)
    snps = get_snp_locs(traceback, o)
    alignment_string = ""
    for i in range(0, len(traceback.ref), 60):

        alignment_string += f"Ref\t{i}\t{traceback.ref[i:i+60]}\n"

        gap = ''
        for j in range(len(str(i))):
            gap += ' '

        alignment_string += f"   \t{gap}\t{traceback.comp[i:i+60]}\n"
        alignment_string += f"CNS\t{i}\t{traceback.query[i:i+60]}\n"
        alignment_string += "\n"
    o.write("\n*****\n\n### Alignment with best reference\n\n")
    o.write(alignment_string + "\n*****\n")
示例#2
0
def generate_paf_file(query_fasta, target_fasta, output_file):

    user_matrix = parasail.matrix_create("ACGT", 2, -2)

    with open(target_fasta, 'r') as hin:
        for line in hin:
            if line.startswith('>'):
                tid = line.rstrip('\n').split(' ')[0].lstrip('>')
            else:
                tseq = line.rstrip('\n')

    with open(query_fasta, 'r') as hin, open(output_file, 'w') as hout:
        for line in hin:
            if line.startswith('>'):
                qid = line.rstrip('\n').lstrip('>')
            else:
                qseq = line.rstrip('\n')

                res = parasail.ssw(qseq, tseq, 3, 1, user_matrix)
                if res is not None:
                    print("%s\t%d\t%d\t%d\t+\t%s\t%d\t%d\t%d\t*\t*\t60" %
                          (qid, len(qseq), res.read_begin1, res.read_end1, tid,
                           len(tseq), res.ref_begin1, res.ref_end1),
                          file=hout)
                else:
                    logger.warning(
                        f'Error occured in the alignment of {qid} and {tid} via parasail'
                    )
示例#3
0
def get_refined_bp_sbnd(tconsensus, fasta_file_h, tchr, tstart, tend, tdir, hout_log, margin = 200):

    tconsensus_part = tconsensus[:1000] if len(tconsensus) > 1000 else tconsensus

    ref_len = fasta_file_h.get_reference_length(tchr)
    if tstart < 1: tstart = 1
    if ref_len < tend: tend = ref_len 

    if tdir == '+':
        qseq = fasta_file_h.fetch(tchr, max(int(tstart) - margin, 0), int(tend))
    else:
        qseq = fasta_file_h.fetch(tchr, max(int(tstart) - 1, 0), int(tend) + margin)
        qseq = reverse_complement(qseq)

    user_matrix = parasail.matrix_create("ACGT", 1, -2)
    res = parasail.ssw(qseq, tconsensus, 3, 1, user_matrix)
    if res is None:
        logger.debug(f"Alignment for breakpoint localization failed for {tchr},{tstart},{tend},{tdir}")
        return None


    if tdir == '+':
        bp_pos_reference = tend - (len(qseq) - res.read_end1 - 1)
    else:
        bp_pos_reference = tstart + (len(qseq) - res.read_end1 - 1) 

    tconsensus_after = tconsensus[(res.ref_end1 + 1):]

    return (bp_pos_reference, tconsensus_after)
示例#4
0
    def generate_paf_file(self, query_fasta, target_fasta, output_file):

        user_matrix = parasail.matrix_create("ACGT", 2, -2)
        paf_rec_count = 0

        with open(target_fasta, 'r') as hin:
            for line in hin:
                if line.startswith('>'): 
                    tid = line.rstrip('\n').split(' ')[0].lstrip('>')
                else:
                    tseq = line.rstrip('\n')

        with open(query_fasta, 'r') as hin, open(output_file, 'w') as hout:
            for line in hin:
                if line.startswith('>'):
                    qid = line.rstrip('\n').lstrip('>')
                else:
                    qseq = line.rstrip('\n')
                    
                    res = parasail.ssw(qseq, tseq, 3, 1, user_matrix)
                    if res is not None:
                        print(f"{qid}\t{len(qseq)}\t{res.read_begin1}\t{res.read_end1}\t+\t" +
                            f"{tid}\t{len(tseq)}\t{res.ref_begin1}\t{res.ref_end1}\t*\t*\t60", file = hout)
                        paf_rec_count = paf_rec_count + 1
                    else:
                        self.parasail_error.append((qid, tid))

        return(paf_rec_count)
示例#5
0
def parasail_alignment(query, ref):
    user_matrix = parasail.matrix_create("ACGT", 2, -20)
    result = parasail.nw_trace_scan_16(query, ref, 50, 0, user_matrix)
    if result.saturated:
        print("SATURATED!")
    else:
        return cigar_to_seq(result.cigar.decode, query, ref)
示例#6
0
def ssw_check_parasail(query, target):

    user_matrix = parasail.matrix_create("ACGT", 2, -2)

    alignment_info = {}
    for sQId, sQSeq, sQQual in read(query):

        sQSeq_r = reverse_complement(sQSeq)

        for sTId, sTSeq, STQual in read(target):

            res = parasail.ssw(sQSeq, sTSeq, 3, 1, user_matrix)
            res_r = parasail.ssw(sQSeq_r, sTSeq, 3, 1, user_matrix)

            if res.score1 > res_r.score1:
                score = res.score1
                qstart, qend = res.read_begin1 + 1., res.read_end1 + 1
                tstart, tend = res.ref_begin1 + 1, res.ref_end1 + 1
                strand = '+'
            else:
                score = res_r.score1
                qstart, qend = len(sQSeq) - res_r.read_end1, len(
                    sQSeq) - res_r.read_begin1
                tstart, tend = res_r.ref_begin1 + 1, res_r.ref_end1 + 1
                strand = '-'

            alignment_info[sTId] = [
                score,
                int(qstart),
                int(qend),
                int(tstart),
                int(tend), strand
            ]

    return (alignment_info)
示例#7
0
def parasail_alignment(s1,
                       s2,
                       match_score=2,
                       mismatch_penalty=-2,
                       opening_penalty=3,
                       gap_ext=1):
    user_matrix = parasail.matrix_create("ACGT", match_score, mismatch_penalty)
    result = parasail.sg_trace_scan_16(s1, s2, opening_penalty, gap_ext,
                                       user_matrix)
    if result.saturated:
        print("SATURATED!", len(s1), len(s2))
        result = parasail.sg_trace_scan_32(s1, s2, opening_penalty, gap_ext,
                                           user_matrix)
        print("computed 32 bit instead")

    # difference in how to obtain string from parasail between python v2 and v3...
    if sys.version_info[0] < 3:
        cigar_string = str(result.cigar.decode).decode('utf-8')
    else:
        cigar_string = str(result.cigar.decode, 'utf-8')
    s1_alignment, s2_alignment, cigar_tuples = cigar_to_seq(
        cigar_string, s1, s2)
    # print(result.score, len(s1), len(s2))
    # print(s1_alignment)
    # print(s2_alignment)
    # print(cigar_string)
    # sys.exit()
    # print(dir(result))
    # print(result.end_query, result.end_ref, result.len_query, result.len_ref, result.length, result.matches)
    # print()
    return s1_alignment, s2_alignment, cigar_string, cigar_tuples, result.score
示例#8
0
def align_secondary_reference(primary_flank, secondary_ref):
    """
    An seqan alignment of a primary reference slice against a complete
    secondary reference.
    """

    MATRIX = parasail.matrix_create("ACGT", 2, -1)
    OPEN = 2
    EXTEND = 1

    # Semi-Global, do not penalize gaps at beginning and end of s2/database
    trace = parasail.sg_dx_trace_striped_sat(str(primary_flank.seq),
                                             str(secondary_ref.seq), OPEN,
                                             EXTEND, MATRIX)
    traceback = trace.get_traceback()
    query_end = trace.end_query
    ref_end = trace.end_ref
    aligned_query = traceback.query[ref_end - query_end:ref_end + 1]
    aligned_ref = traceback.ref[ref_end - query_end:ref_end + 1]

    # Alignment failed (indels)
    if "-" in aligned_query + aligned_ref or len(primary_flank) != len(
            aligned_ref):
        raise FailedAlignmentError(
            "Alignment failed between primary and secondary reference.",
            reference=secondary_ref,
        )

    return SeqRecord(Seq(aligned_ref), id=secondary_ref.id)
def align_affine(query: str, target: str, match_score: int, mismatch_cost: int,
                 gap_open_cost: int, gap_extension_cost: int) -> Cigar:
    assert min(match_score, mismatch_cost, gap_open_cost,
               gap_extension_cost) >= 0, "Specify positive integers"
    return Cigar(
        parasail.nw_trace(
            query, target, gap_open_cost, gap_extension_cost,
            parasail.matrix_create(
                "ACGT", match_score,
                -mismatch_cost)).cigar.decode.decode('utf-8'))
示例#10
0
def parasail_block_alignment(s1,
                             s2,
                             k,
                             match_id,
                             x_acc="",
                             y_acc="",
                             match_score=2,
                             mismatch_penalty=-2,
                             opening_penalty=5,
                             gap_ext=1,
                             ends_discrepancy_threshold=0):
    user_matrix = parasail.matrix_create("ACGT", match_score, mismatch_penalty)
    result = parasail.sg_trace_scan_16(s1, s2, opening_penalty, gap_ext,
                                       user_matrix)
    if result.saturated:
        print("SATURATED!")
        result = parasail.sg_trace_scan_32(s1, s2, opening_penalty, gap_ext,
                                           user_matrix)
    if sys.version_info[0] < 3:
        cigar_string = str(result.cigar.decode).decode('utf-8')
    else:
        cigar_string = str(result.cigar.decode, 'utf-8')

    s1_alignment, s2_alignment = cigar_to_seq(cigar_string, s1, s2)

    # Rolling window of matching blocks
    # k=15
    # match_id = int(k*0.8)  1.0 - math.ceil(window_fraction)
    match_vector = [
        1 if n1 == n2 else 0 for n1, n2 in zip(s1_alignment, s2_alignment)
    ]
    # print("".join([str(m) for m in match_vector]))

    match_window = deque(match_vector[:k])  # initialization
    current_match_count = sum(match_window)
    aligned_region = []
    if current_match_count >= match_id:
        aligned_region.append(1)
    else:
        aligned_region.append(0)

    for new_m_state in match_vector[k:]:
        prev_m_state = match_window.popleft()
        current_match_count = current_match_count - prev_m_state + new_m_state
        match_window.append(new_m_state)

        if current_match_count >= match_id:
            aligned_region.append(1)
        else:
            aligned_region.append(0)

    # print("".join([str(m) for m in aligned_region]))
    # print("Aligned ratio (tot aligned/len(seq1):", sum(aligned_region)/float(len(s1)))
    alignment_ratio = sum(aligned_region) / float(len(s1))
    return (s1, s2, (s1_alignment, s2_alignment, alignment_ratio))
def parasail_alignment(query, ref):
    user_matrix = parasail.matrix_create("ACGT", 2, -4)
    result = parasail.nw_trace_scan_32(query, ref, 12, 1, user_matrix)
    if result.saturated:
        print("SATURATED!")
    else:
        if sys.version_info[0] < 3:
            cigar_string = str(result.cigar.decode).decode('utf-8')
        else:
            cigar_string = str(result.cigar.decode, 'utf-8')
        
        return cigar_string, cigar_to_seq(cigar_string, query, ref)
示例#12
0
    def __init__(self, elements, gap_open=3, gap_extend=3, matrix=parasail.blosum62, useIdentity=False):
        self.sim_cache = {}
        self.elements = elements
        self.e2i = {e:i[0] for e,i in zip(elements, self.indices())}
        self.i2e = {i[0]:e for e,i in zip(elements, self.indices())}
        self.matrix = matrix
        self.gap_extend = gap_extend
        self.gap_open = gap_open

        if useIdentity:
            self.matrix = parasail.matrix_create(alphabet='ACDEFGHIKLMNPQRSTVWXY', match=1, mismatch=0)
        self.paraParams = dict(open=self.gap_open, extend=self.gap_extend, matrix=self.matrix)
示例#13
0
def parasail_block_alignment(s1,
                             s2,
                             k,
                             match_id,
                             match_score=2,
                             mismatch_penalty=-2,
                             opening_penalty=5,
                             gap_ext=1):
    user_matrix = parasail.matrix_create("ACGT", match_score, mismatch_penalty)
    result = parasail.sg_trace_scan_16(s1, s2, opening_penalty, gap_ext,
                                       user_matrix)
    if result.saturated:
        print("SATURATED!", len(s1), len(s2))
        result = parasail.sg_trace_scan_32(s1, s2, opening_penalty, gap_ext,
                                           user_matrix)
        print("computed 32 bit instead")

    # difference in how to obtain string from parasail between python v2 and v3...
    if sys.version_info[0] < 3:
        cigar_string = str(result.cigar.decode).decode('utf-8')
    else:
        cigar_string = str(result.cigar.decode, 'utf-8')

    s1_alignment, s2_alignment = help_functions.cigar_to_seq(
        cigar_string, s1, s2)

    # Rolling window of matching blocks
    match_vector = [
        1 if n1 == n2 else 0 for n1, n2 in zip(s1_alignment, s2_alignment)
    ]
    match_window = deque(match_vector[:k])  # initialization
    current_match_count = sum(match_window)
    aligned_region = []
    if current_match_count >= match_id:
        aligned_region.append(1)
    else:
        aligned_region.append(0)

    for new_m_state in match_vector[k:]:
        prev_m_state = match_window.popleft()
        current_match_count = current_match_count - prev_m_state + new_m_state
        match_window.append(new_m_state)

        if current_match_count >= match_id:
            aligned_region.append(1)
        else:
            aligned_region.append(0)

    # print("".join([str(m) for m in aligned_region]))
    # print("Aligned ratio (tot aligned/len(seq1):", sum(aligned_region)/float(len(s1)))
    alignment_ratio = sum(aligned_region) / float(len(s1))
    return (s1, s2, (s1_alignment, s2_alignment, alignment_ratio))
示例#14
0
def get_alignment(primer, reference):
    """An seqan alignment of a primer against a reference."""

    MATRIX = parasail.matrix_create("ACGT", 2, -1)
    OPEN = 2
    EXTEND = 1
    IDENTITY_THRESHOLD = 0.7

    if primer.direction == Primer.Direction.left:
        ref = reference.seq
    elif primer.direction == Primer.Direction.right:
        ref = reference.reverse_complement().seq

    # Semi-Global, do not penalize gaps at beginning and end of s2/database
    trace = parasail.sg_dx_trace_striped_sat(str(primer.seq), str(ref), OPEN,
                                             EXTEND, MATRIX)
    traceback = trace.get_traceback()

    query_end = trace.end_query + 1
    ref_end = trace.end_ref + 1

    # Get alignment strings
    aln_query = traceback.query[ref_end - query_end:ref_end]
    cigar = traceback.comp[ref_end - query_end:ref_end]
    aln_ref = traceback.ref[ref_end - query_end:ref_end]

    # Identity for glocal alignment
    identity = cigar.count("|") / len(cigar)

    # Alignment failed
    if identity < IDENTITY_THRESHOLD:
        return None

    # Format alignment
    refid = reference.id[:30]
    name = primer.name[:30]
    formatted_query = f"{name: <30} {1: >6} {aln_query} {query_end}"
    if primer.direction == Primer.Direction.left:
        formatted_ref = (
            f"{refid: <30} {ref_end - query_end + 1: >6} {aln_ref} {ref_end}")
    elif primer.direction == Primer.Direction.right:
        rev_start = len(reference) - ref_end
        formatted_ref = (
            f"{refid: <30} {rev_start + query_end: >6} {aln_ref} {rev_start + 1}"
        )
    formatted_cigar = f"{'': <30} {'': >6} {cigar}"
    formatted_alignment = "\n".join(
        ["", formatted_query, formatted_cigar, formatted_ref])

    del trace

    return (identity, formatted_alignment)
示例#15
0
def make_scoring_matrix(match_reward_coding):
    match_reward_non_coding = 2
    mismatch_penalty = -4
    matrix = parasail.matrix_create("ACGTacgt*", match_reward_non_coding, mismatch_penalty, True)
    matrix[0,4] = match_reward_coding
    matrix[1,5] = match_reward_coding
    matrix[2,6] = match_reward_coding
    matrix[3,7] = match_reward_coding
    matrix[4,0] = match_reward_coding
    matrix[5,1] = match_reward_coding
    matrix[6,2] = match_reward_coding
    matrix[7,3] = match_reward_coding
    return matrix
示例#16
0
def test1():
    matrix = parasail.blosum62
    matrix[3, 4] = 100
    print(matrix.matrix)

    matrix = parasail.blosum62.copy()
    matrix[3, 4] = 100
    print(matrix.matrix)

    matrix = parasail.matrix_create("ACGT", 10, 1)
    matrix[2] = 200
    matrix[1:3, 1:3] = 300
    matrix.set_value(0, 4, 400)
    print(matrix.matrix)

    matrix = parasail.matrix_create("AcgT", 10, 1)
    print(matrix.matrix)
    print(matrix.mapper)

    matrix = parasail.matrix_create("AcgT", 10, 1, True)
    print(matrix.matrix)
    print(matrix.mapper)
示例#17
0
def main():
    '''
    Entry point for Readucks. Gets arguments, processes them and then calls process_files function
    to do the actual work.
    :return:
    '''
    args = get_arguments()

    barcode_set = 'native'
    # if args.native_barcodes:
    #     barcode_set = 'native'
    if args.pcr_barcodes:
        barcode_set = 'pcr'
    if args.rapid_barcodes:
        barcode_set = 'rapid'

    settings = {
        'barcode_set': "native",
        'single_barcode': args.single,
        'threshold': args.threshold / 100.0,
        'secondary_threshold': None
    }

    if args.secondary_threshold:
        settings['secondary_threshold'] = args.secondary_threshold / 100.0

    # set_alignment_settings( 10,
    #                         1,
    #                         parasail.matrix_create("ACGT", 3, -2))
    set_alignment_settings(
        -args.scoring_scheme_vals[2], -args.scoring_scheme_vals[3],
        parasail.matrix_create("ACGT", args.scoring_scheme_vals[0],
                               args.scoring_scheme_vals[1]))
    output_path = None
    if args.output_dir:
        if os.path.isdir(args.output_dir):
            output_path = args.output_dir.rstrip("/") + "/"

    output = {
        'path': output_path,
        'prefix': args.prefix,
        'bin_barcodes': args.bin_barcodes,
        'annotate_files': args.annotate_files,
        'extended_info': args.extended_info,
        'bin_files': {}
    }

    process_files(args.input_path, output, barcode_set, args.limit_barcodes_to,
                  settings, args.verbosity, args.threads)
示例#18
0
def ssw_alignment(s1,
                  s2,
                  match_score=2,
                  mismatch_penalty=-2,
                  opening_penalty=3,
                  gap_ext=1):
    user_matrix = parasail.matrix_create("ACGT", match_score, mismatch_penalty)
    result = parasail.ssw(s1, s2, opening_penalty, gap_ext, user_matrix)
    print(result, type(result), dir(result))
    print(dir(result))
    for attr, value in result.__dict__.items():
        print(attr, value)
    # print(result.ref_begin1, result.ref_end1, result.read_begin1, result.read_end1)
    # print()
    return s1_alignment, s2_alignment, cigar_string, cigar_tuples, result.score
示例#19
0
def validate_reconstructed_seq(seq, orig):
    """
    seq --- the sequence that is reconstructed
    orig --- the original sequence

    because the reconstructed seq can be longer, we don't care about deletions
      (deletions w.r.t could just be exon skipping or minor base errors)
    we only care that there is NOT a lot of insertions (which would indicate error in my bubble solution)
    """
    o1 = parasail.sg_qx_trace(seq, orig, 3, 1, parasail.matrix_create("ACGT", 2, -5))
    if o1.score < l2*2*.90: return False, o1.cigar.decode
    for num, type in iter_cigar_string(o1.cigar.decode):
        if type == 'I' and num > 5:
            return False, o1.cigar.decode
    return True, o1.cigar.decode
示例#20
0
def pair_align(reference, query, params=DEFAULT_ALIGN_PARAMS):
    """ Perform pairwise local alignment using scikit-bio.
    :param reference: Reference sequence.
    :param query: Query sequence.
    :param params: Alignment parameters in a dictionary.
    :returns: Alignments in scikit-bio format.
    :rtype: list of tuples
    """

    subs_mat = parasail.matrix_create("ACGT", params['match'],
                                      params['mismatch'])
    aln = parasail.sw_striped_32(reference, query, params['gap_open'],
                                 params['gap_extend'], subs_mat)

    return aln
示例#21
0
文件: config.py 项目: t-neumann/qcat
    def update_matrix(self):
        """
        Create new parasail scoring matrix. 'N' is used as wildcard character
        for barcodes and has its own match parameter (0 per default).
        'X' is used as wildcard character for modified bp as in the 16S
        sequencing adapter.

        :return: None
        """
        self.matrix = parasail.matrix_create("ATGCNX", self.match,
                                             self.mismatch)

        pointers = [4, 11, 18, 25, 28, 29, 30, 31, 32]
        for i in pointers:
            self.matrix.pointer[0].matrix[i] = self.nmatch

        pointers = [5, 12, 19, 26, 33, 35, 36, 37, 38, 39, 40]
        for i in pointers:
            self.matrix.pointer[0].matrix[i] = 0
示例#22
0
def create_matrix(match, mismatch, nmatch):
    """
    Create new parasail scoring matrix. 'N' is used as wildcard character
    for barcodes and has its own match parameter (0 per default).
    'X' is used as wildcard character for modified bp as in the 16S
    sequencing adapter. Taken from qcat.

    :return: parasail matrix
    """
    matrix = parasail.matrix_create("ATGCNX", match, mismatch)

    pointers = [4, 11, 18, 25, 28, 29, 30, 31, 32]
    for i in pointers:
        matrix.pointer[0].matrix[i] = nmatch

    pointers = [5, 12, 19, 26, 33, 35, 36, 37, 38, 39, 40]
    for i in pointers:
        matrix.pointer[0].matrix[i] = 0
    return matrix
示例#23
0
    def __init__(self,
                 elements,
                 gap_open=3,
                 gap_extend=3,
                 matrix=parasail.blosum62,
                 useIdentity=False):
        self.sim_cache = {}
        self.elements = elements
        self.e2i = {e: i[0] for e, i in zip(elements, self.indices())}
        self.i2e = {i[0]: e for e, i in zip(elements, self.indices())}
        self.matrix = matrix
        self.gap_extend = gap_extend
        self.gap_open = gap_open

        if useIdentity:
            self.matrix = parasail.matrix_create(
                alphabet='ACDEFGHIKLMNPQRSTVWXY', match=1, mismatch=0)
        self.paraParams = dict(open=self.gap_open,
                               extend=self.gap_extend,
                               matrix=self.matrix)
示例#24
0
def node_is_similar(seq1, seq2):
    l1 = len(seq1)
    l2 = len(seq2)
    if l1 == 0 or l2 == 0: return False
    if l1 <= 2 and l2 <= 2: return True
    if l1 < l2:
        l1, l2 = l2, l1
        seq1, seq2 = seq2, seq1
    # always make seq1 the longer one
    o1 = parasail.sg_qx_trace(seq1, seq2, 3, 1, parasail.matrix_create("ACGT", 2, -5))
    # require the the whole (shorter) seq2 must be aligned
    # and set min score to approx 90% accuracy

    if EXPECTED_ERR_RATE == 0:
        return o1.score > l2*2*1.0
    elif EXPECTED_ERR_RATE < 2:
        return o1.score > l1*2*0.8
    else:
        raise Exception("Expected error rate not implemented for {0}% and above".format(EXPECTED_ERR_RATE))
    return res is not None
示例#25
0
文件: config.py 项目: t-neumann/qcat
    def __init__(self, config_path=None):

        self._match = 5
        self._nmatch = -1
        self._mismatch = -2
        self._gap_open = 2
        self._gap_extend = 2

        self._max_align_length = 150

        self._extracted_barcode_extension = 11
        self._barcode_context_length = 11

        self._matrix = None
        self.update_matrix()

        self._matrix_barcode = parasail.matrix_create("ATGCN", 1, -1)

        if config_path is not None:
            self.read(config_path)
示例#26
0
def parasail_local(s1,
                   s2,
                   match_score=2,
                   mismatch_penalty=-2,
                   opening_penalty=3,
                   gap_ext=1):
    user_matrix = parasail.matrix_create("ACGT", match_score, mismatch_penalty)
    result = parasail.sw_trace_scan_16(s1, s2, opening_penalty, gap_ext,
                                       user_matrix)
    if result.saturated:
        print("SATURATED!", len(s1), len(s2))
        result = parasail.sg_trace_scan_32(s1, s2, opening_penalty, gap_ext,
                                           user_matrix)
        print("computed 32 bit instead")

    # difference in how to obtain string from parasail between python v2 and v3...
    if sys.version_info[0] < 3:
        cigar_string = str(result.cigar.decode).decode('utf-8')
    else:
        cigar_string = str(result.cigar.decode, 'utf-8')
    s1_alignment, s2_alignment, cigar_tuples = cigar_to_seq(
        cigar_string, s1[result.cigar.beg_query:result.end_query],
        s2[result.cigar.beg_ref:result.end_ref])
    # print(result.traceback.ref)
    # print(result.traceback.comp)
    # print(result.traceback.query)
    # print(result.score, len(s1), len(s2))
    print("read", s1_alignment)
    print("Rref", s2_alignment)
    print(result.cigar.beg_query, result.end_query)
    print(result.cigar.beg_ref, result.end_ref)
    print(cigar_string)
    # print(result.cigar.seq)

    # sys.exit()
    # print(dir(result))
    # for attr, value in result.__dict__.items():
    #     print(attr, value)
    # print(result.end_query, result.end_ref, result.len_query, result.len_ref, result.length, result.matches)
    # print()
    return s1_alignment, s2_alignment, cigar_string, cigar_tuples, result.score
def parasail_alignment(read,
                       reference,
                       x_acc="",
                       y_acc="",
                       match_score=2,
                       mismatch_penalty=-2,
                       opening_penalty=2,
                       gap_ext=1,
                       ends_discrepancy_threshold=0):
    user_matrix = parasail.matrix_create("ACGT", match_score, mismatch_penalty)
    result = parasail.sg_trace_scan_16(read, reference, opening_penalty,
                                       gap_ext, user_matrix)
    if result.saturated:
        print("SATURATED!")
        result = parasail.sg_trace_scan_32(read, reference, opening_penalty,
                                           gap_ext, user_matrix)
    if sys.version_info[0] < 3:
        cigar_string = str(result.cigar.decode).decode('utf-8')
    else:
        cigar_string = str(result.cigar.decode, 'utf-8')

    read_alignment, ref_alignment = cigar_to_seq(cigar_string, read, reference)
    return read_alignment, ref_alignment
示例#28
0
def generate_paf_file(query_fasta, target_fasta, output_file):

    user_matrix = parasail.matrix_create("ACGT", 2, -2)

    with open(target_fasta, 'r') as hin:
        for line in hin:
            if line.startswith('>'):
                tid = line.rstrip('\n').lstrip('>')
            else:
                tseq = line.rstrip('\n')

    with open(query_fasta, 'r') as hin, open(output_file, 'w') as hout:
        for line in hin:
            if line.startswith('>'):
                qid = line.rstrip('\n').lstrip('>')
            else:
                qseq = line.rstrip('\n')

                res = parasail.ssw(qseq, tseq, 3, 1, user_matrix)
                print("%s\t%d\t%d\t%d\t+\t%s\t%d\t%d\t%d\t*\t*\t60" %
                      (qid, len(qseq), res.read_begin1, res.read_end1, tid,
                       len(tseq), res.ref_begin1, res.ref_end1),
                      file=hout)
                """ 
示例#29
0
文件: demux.py 项目: qiuosier/Cancer
 def create_score_matrix(self):
     """Creates a parasail score matrix for alignment
     """
     return parasail.matrix_create("ACGTN", self.score, -1 * self.penalty)
示例#30
0
import os
import logging
import math
import dnaio
import gc
import parasail
from .fastq_file import ReadIdentifier
logger = logging.getLogger(__name__)

match_score = 0
DEFAULT_SCORE_MATRIX = parasail.matrix_create("ACGTN", match_score, -1)


class ReadPair:
    def __init__(self, read1, read2):
        self.read1 = read1
        self.read2 = read2

        id_1 = ReadIdentifier(self.read1.name)
        id_2 = ReadIdentifier(self.read2.name)

        if id_1.identifier != id_2.identifier:
            raise ValueError(
                "Identifiers of the read pairs does not match each other.\n"
                "Read1: %s\n"
                "Read2: %s" % (id_1.identifier, id_2.identifier))

        self._identifier = id_1.identifier

        if id_1.pair_member and id_2.pair_member:
            if id_1.pair_member == id_2.pair_member:
示例#31
0
def test3():
    matrix = parasail.matrix_create("acgt", 1, -1)
    result = parasail.sw("acgt", "acgt", 10, 1, matrix)
    assert (result.score == 4)
    del result
    del matrix
示例#32
0
def test21():
    matrix = parasail.matrix_create("ACGTacgt", 2, 1, True)
    result = parasail.sw_trace("ACGT","AcgT",10,1,matrix)
    traceback = result.traceback
    print_traceback_attributes(traceback)
示例#33
0
def test4():
    parasail.set_case_sensitive(True)
    matrix = parasail.matrix_create("ACGT", 2, 1)
    result = parasail.sw_trace("ACGT","AcgT",10,1,matrix)
    traceback = result.traceback
    print_traceback_attributes(traceback)
示例#34
0
def test22():
    matrix = parasail.matrix_create("ACGTacgt", 2, 1, True)
    result = parasail.sw_trace("ACGT","AcgT",10,1,matrix)
    traceback = result.get_traceback(case_sensitive=True)
    print_traceback_attributes(traceback)