def generateNDTAlign(tplname, tgtname, distname, observations, transitions,
                     init_alignments_path):
    tpl = load_tpl(os.path.join(args.t, tplname))
    if args.q.endswith('.hhm') or args.q.endswith('.hhm.pkl'):
        tgt = load_hhm(os.path.join(args.q, tgtname))
    else:
        tgt = load_tgt(os.path.join(args.q, tgtname))
    tgtseq = tgt['sequence']
    tplseq = tpl['sequence']
    tplname = tpl['name']
    tgtname = tgt['name']
    model_size = observations.size(0)
    sequence = Batchpair(tpl['name'], tgt['name'], tplseq, tgtseq, tpl,
                         model_size)
    dis = tpl['atomDistMatrix']['CbCb']
    dis = torch.from_numpy(dis).float()
    dis = torch.where(torch.lt(dis, 0), torch.ones(dis.size()) * 10000, dis)
    sequence.set_dismatrix(dis)

    pair_dis, disc_method, _ = Load_EdgeScore(os.path.join(args.d, distname),
                                              tgt)
    pair_dis = torch.from_numpy(pair_dis)
    disc_method = torch.from_numpy(disc_method)

    alignment_paths = []
    for ba in range(model_size):
        if os.path.exists(
                os.path.join(init_alignments_path,
                             '%s-%s.%d.fasta' % (tplname, tgtname, ba))):
            alignment_path = os.path.join(
                init_alignments_path,
                '%s-%s.%d.fasta' % (tplname, tgtname, ba))
        else:
            alignment_path = os.path.join(init_alignments_path,
                                          '%s-%s.fasta' % (tplname, tgtname))
        alignment_paths.append(alignment_path)

    alignments = sequence.alignment_init(observations, transitions, args.a)
    node_score, edge_score, norm_score = sequence.compute_NDT_score(
        alignments, observations, transitions, pair_dis, disc_method)
    sequence.update4NDT(alignments, node_score, edge_score, norm_score)

    alignment = sequence.maxalign

    output = sequence.get_RNDToutput(
        alignment, observations[sequence.bestobs].unsqueeze(0),
        transitions[sequence.bestobs].unsqueeze(0), pair_dis, disc_method,
        'dist', 1)
    alignment_out = sequence.get_alignment_output()

    return [tpl['name'], tgt['name'], output, alignment_out]
示例#2
0
def generateAlign(tplname, tgtname, observations, transitions):
    tpl = load_tpl(os.path.join(args.t, tplname))
    if args.q.endswith('.hhm') or args.q.endswith('.hhm.pkl'):
        tgt = load_hhm(args.q)
    else:
        tgt = load_tgt(args.q)
    tgtseq = tgt['sequence']
    tplseq = tpl['sequence']
    sequence = Batchpair(tpl['name'], tgt['name'], tplseq, tgtseq, tpl)
    observation = torch.mean(observations, 0, keepdim=True)
    transition = torch.mean(transitions, 0, keepdim=True)
    alignments = sequence.alignment_init(observation, transition, args.a)
    output = sequence.get_CNF_output(alignments[0], observation, transition,
                                     args.a)
    sequence.maxalign = alignments[0]
    alignment_output = sequence.get_alignment_output()
    return [tpl['name'], tgt['name'], alignment_output, output]
def generateAlign(tplname, tgtname, observations, transitions):
    tpl = load_tpl(os.path.join(args.t, tplname))
    if args.q.endswith('.hhm') or args.q.endswith('.hhm.pkl'):
        tgt = load_hhm(os.path.join(args.q, tgtname))
    else:
        tgt = load_tgt(os.path.join(args.q, tgtname))
    tgtseq = tgt['sequence']
    tplseq = tpl['sequence']
    model_size = observations.size(0)
    sequence = Batchpair(tpl['name'], tgt['name'], tplseq, tgtseq, tpl,
                         model_size)
    alignments = sequence.alignment_init(observations, transitions, args.a)
    alignments_output = []
    for ba in range(sequence.batchsize):
        alignments_output.append(
            alignment_output(tpl['name'], tgt['name'], tpl['sequence'],
                             tgt['sequence'], alignments[ba]))
    return [tpl['name'], tgt['name'], alignments_output]
示例#4
0
def compute_alignment(tplname, observations, transitions,
                      pair_dis, disc_method, iteration,
                      edge_type="dist", Node_Weight=1):
    if args.q.endswith('.hhm') or args.q.endswith('.hhm.pkl'):
        tgt = load_hhm(args.q)
    else:
        tgt = load_tgt(args.q)
    tgtseq = tgt['sequence']
    tpl = load_tpl(os.path.join(args.t, tplname))
    tplseq = tpl['sequence']
    sequence = Batchpair(tpl['name'], tgt['name'],
                         tplseq, tgtseq, tpl, len(args.m))
    sequence.set_iter(iteration)
    # set the dis_matrix
    if 'atomDistMatrix' in tpl:
        dis_matrix = tpl['atomDistMatrix']['CbCb']
    else:
        dis_matrix = Compute_CbCb_distance_matrix(tpl)
    dis_matrix = torch.from_numpy(dis_matrix).float()
    dis_matrix = torch.where(torch.lt(dis_matrix, 0),
                             torch.ones(dis_matrix.size())*10000, dis_matrix)
    sequence.set_dismatrix(dis_matrix)
    observations = sequence.ModifyObs(observations, Node_Weight)
    alignment = sequence.alignment_init(
                observations, transitions, args.a)
    sequence.set_alignment(alignment)
    if args.extra != []:
        sequence.add_init_alignment(args.extra)
        observations = sequence.add_observation(observations)
    searchspace = sequence.template_search_space(disc_method, edge_type)
    sequence.set_searchspace(searchspace)
    alignment, output = sequence.ADMM_algorithm(
            observations, transitions, pair_dis,
            disc_method, edge_type, Node_Weight)
    sequence.set_output(output)
    alignment_output = sequence.get_alignment_output()
    return [tpl['name'], tgt['name'], alignment_output, output]
示例#5
0
    # ----------------------- Load data --------------------------- #
    start = time.time()
    # check and load template (.tpl) and sequence(.tgt) file
    if os.path.exists(args.t):
        tpl = load_tpl(args.t)
    else:
        print("the template is not exist")
        sys.exit(-1)
    if os.path.exists(args.q):
        if args.q.endswith('hhm') or args.q.endswith('.hhm.pkl'):
            if any(SS3FeatureModes) or any(SS8FeatureModes) \
                    or any(ACCFeatureModes):
                print("Please use TGT format file as input or use model "
                      "not using structure information")
                sys.exit(-1)
            tgt = load_hhm(args.q)
        else:
            tgt = load_tgt(args.q)
    else:
        print("the query sequence is not exist")
        sys.exit(-1)

    # check and load pairwise potential file
    if not os.path.exists(args.d):
        print("the distance potential %s is not exist" % args.d)
        sys.exit(-1)
    # load distance potential
    pair_dis, disc_method, edge_type = Load_EdgeScore(args.d, tgt)
    pair_dis = torch.from_numpy(pair_dis)
    pair_distance = pair_dis.detach().share_memory_()
    disc_method = torch.Tensor(disc_method).detach().share_memory_()