Python states2alignment示例，deepblast.dataset.utils.states2alignment Python示例

示例#1

0

显示文件

def alignment_text(x, y, pred, truth, stats):
    """ Used to visualize alignment as text

    Parameters
    ----------
    x : str
        Protein X
    y : str
        Protein Y
    pred : list of int
        Predicted states
    truth : list of int
        Ground truth states
    stats : list of float
        List of statistics from roc_edges
    """
    # TODO: we got the truth and prediction edges swapped somewhere earlier
    true_alignment = states2alignment(truth, x, y)
    pred_alignment = states2alignment(pred, x, y)
    cols = ['tp', 'fp', 'fn', 'perc_id', 'ppv', 'fnr', 'fdr']
    stats = list(map(lambda x: np.round(x, 2), stats))
    s = list(map(lambda x: f'{x[0]}: {x[1]}', list(zip(cols, stats))))

    stats_viz = ' '.join(s)
    truth_viz = ('# Ground truth\n'
                 f'    {true_alignment[0]}\n    {true_alignment[1]}')
    pred_viz = ('# Prediction\n'
                f'    {pred_alignment[0]}\n    {pred_alignment[1]}')

    s = stats_viz + '\n' + truth_viz + '\n' + pred_viz
    return s

示例#2

0

显示文件

 def test_states2alignment_8(self):
     x = 'HECDDCSKQFSRNNHLAKHLRAH'
     y = 'YRCHKVCPYTFVGKSDLDLHQFITAH'
     s = np.array([
         1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 0, 0, 1
     ])
     states2alignment(s, y, x)

示例#3

0

显示文件

 def test_states2alignment_10(self):
     gen = 'YACSGGCGQNFRTMSEFNEHMIRLVH'
     oth = 'LICPKHTRDCGKVFKRNSSLRVHEH'
     pred = np.array([
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 1, 1, 0, 1,
         2, 0, 1, 1, 1, 1
     ])
     states2alignment(pred, gen, oth)

示例#4

0

显示文件

 def test_states2alignment_11(self):
     gen = 'LNCKEIKKYCEMSFRNPDDIRKHRGAIH'
     oth = 'YTCSSCNESLRTAWCLNKHLR'
     pred = np.array([
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0
     ])
     states2alignment(pred, gen, oth)

示例#5

0

显示文件

    def test_decoding2(self):
        X = 'HECDRKTCDESFSTKGNLRVHKLGH'
        Y = 'LKCSGCGKNFKSQYAYKRHEQTH'

        needle = NeedlemanWunschDecoder(self.operator)
        dm = torch.Tensor(np.loadtxt(get_data_path('dm.txt')))
        decoded = needle.traceback(dm)
        pred_x, pred_y, pred_states = list(zip(*decoded))
        states2alignment(np.array(pred_states), X, Y)

示例#6

0

显示文件

 def test_states2alignment_3(self):
     x = ('XSDHGDVSLPPEDRVRALSQLGSAVEVNEDIPPRRYFRSGVEIIRMA'
          'SIYSEEGNIEHAFILYNKYITLFIEKLPKHRDYKSAVIPEKKDTVK'
          'KLKEIAFPKAEELKAELLKRYTKEYTEYNEEKKKEAEELARNMAIQ'
          'QELX')
     y = ('XIDVLRAKAAKERAERRLQSQQDDIDFKRAELALKRAMNRLSVAEMKX')
     s = np.array([
         1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 0, 1, 1, 2, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
         0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
     ])
     states2alignment(s, x, y)

示例#7

0

显示文件

 def test_states2alignment_7(self):
     x = ('XGSSGSSGFDENWGADEELLLIDACETLGLGNWADIADYVGNARTKEECRDHYLKTYIEX')
     y = ('XGEIRVGNRYQADITDLLKEGEEDGRDQSRLETQVWEAHNPLTDKQIDQFLVVARSVGTF'
          'ARALDSLHMSAAAASRDITLFHAMDTLHKNIYDISKAISALVPQGGPVLCRDEMEEWSAS'
          'EANLFEEALEKYGKDFTDIQQDFLPWKSLTSIIEYYYMWKTTX')
     s = np.array([
         1, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 0,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
     ])
     states2alignment(s, x, y)

示例#8

0

显示文件

 def test_states2alignment_2(self):
     s = "111:::111"
     s = np.array(list(map(tmstate_f, s)))
     X = "123456789"
     Y = "abc"
     exp_x = "123456789"
     exp_y = "---abc---"
     res_x, res_y = states2alignment(s, X, Y)
     self.assertEqual(res_x, exp_x)
     self.assertEqual(res_y, exp_y)

示例#9

0

显示文件

 def test_states2alignment_1(self):
     s = "111:::222"
     s = np.array(list(map(tmstate_f, s)))
     X = "123456"
     Y = "abcdef"
     exp_x = "123456---"
     exp_y = "---abcdef"
     res_x, res_y = states2alignment(s, X, Y)
     self.assertEqual(res_x, exp_x)
     self.assertEqual(res_y, exp_y)

示例#10

0

显示文件

def deepblast_align(
    pairings: List[Tuple[str, str]],
    query_by_id: Dict[str, str],
    target_by_id: Dict[str, str],
    model_file: str,
    device: torch.device,
    batch_size: int,
) -> List[Tuple[str, str, str, str]]:
    """Aligns the given pairings using DeepBLAST

    Returns a list of query id, target id, query aligned, target aligned

    The model on its own takes between 740MiB (Titan X, torch 1.5) and 1284MiB (RTX 8000, torch 1.7)

    Note that the batch size has much less of an impact for DeepBLAST than for the embedders
    """
    model = LightningAligner.load_from_checkpoint(model_file).to(device)
    tokenizer = UniprotTokenizer()
    alignments = []
    # Naive batching
    batches = numpy.array_split(pairings,
                                math.ceil(len(pairings) / batch_size))
    for batch in tqdm(batches):
        # noinspection PyArgumentList
        queries = [
            torch.Tensor(tokenizer(query_by_id[query].encode())).long()
            for query, _ in batch
        ]
        # noinspection PyArgumentList
        targets = [
            torch.Tensor(tokenizer(target_by_id[target].encode())).long()
            for _, target in batch
        ]
        seqs, order = pack_sequences(queries, targets)
        gen = model.aligner.traceback(seqs.to(device), order)
        for (decoded, _), (query, target) in zip(gen, batch):
            pred_x, pred_y, pred_states = zip(*decoded)
            pred_alignment = "".join(list(map(revstate_f, pred_states)))
            x_aligned, y_aligned = states2alignment(pred_alignment,
                                                    query_by_id[query],
                                                    target_by_id[target])
            alignments.append((query, target, x_aligned, y_aligned))
    return alignments

示例#11

0

显示文件

 def test_states2alignment_9(self):
     x = 'HCH'
     y = 'HCAH'
     s = np.array([1, 1, 0, 1])
     states2alignment(s, y, x)

示例#12

0

显示文件

seqs={}
with open(args.input) as f:
	for i in f:
		i=i.rstrip()
		if i[0]=='>':
			ID=i[1:]
			continue
		seqs[ID]=seqs.get(ID,'')+i

keys_list = list(seqs)
x = seqs[keys_list[0]]
y = seqs[keys_list[1]]
pred_alignment = model.align(x, y)

x_aligned, y_aligned = states2alignment(pred_alignment, x, y)

file = open(args.output,"w")
file.write(">%s\n%s\n>%s\n%s" % (keys_list[0], x_aligned, keys_list[1], y_aligned))
file.close()

print(x_aligned)
print(pred_alignment)
print(y_aligned)

x_ = torch.Tensor(model.tokenizer(str.encode(x))).long()
y_ = torch.Tensor(model.tokenizer(str.encode(y))).long()

seq, order = pack_sequences([x_], [y_])

score = model.aligner.score(seq, order).item()