Пример #1
0
                        reg.append('U')
                    elif i == 3:
                        reg.append('G')
                    elif i == 4:
                        reg.append('C')
                    else:
                        continue
                reg = ''.join(reg)
                print reg
                print iteration
                if similar(str_struc,DOT_BRACKET) >= MIN_THRESHOLD:
                    print 'similar'
                    print str_struc
                    print DOT_BRACKET
                    print reg
                    break
        except KeyboardInterrupt:
            print 'Puzzles solved: %i/%i' % (SOLVED, (plist.index(db) + 1))
            break

    level1,m2,S1 = sbc(DOT_BRACKET,reg)
    level2,m3,S2 = dsp(DOT_BRACKET,level1)
    #print level2
    if S1 or S2:
        SOLVED += 1

    movesets.extend(m2)
    movesets.extend(m3)

print 'Puzzles solved: %i/100' % SOLVED
Пример #2
0
        reg = ''.join(reg)
        #print inputs2[0][:len_puzzle]
        print reg
        print iteration
        #print current_struc[:len(enc_struc)]
        #print target_struc[:len(enc_struc)]
        #print inputs2[1][:len(enc_struc)]
        #print format_pairmap(str_struc)
        if similar(str_struc, DOT_BRACKET) >= MIN_THRESHOLD:
            print 'similar'
            print str_struc
            print DOT_BRACKET
            print reg
            break

level1, m2, _ = sbc(DOT_BRACKET, reg)
level2, m3, _ = dsp(DOT_BRACKET, level1)
print level2

#movesets.extend(m2)
#movesets.extend(m3)
#print movesets

# mp = pickle.load(open(os.getcwd()+'/pickles/evolved-raw-ms','r'))
# bp = pickle.load(open(os.getcwd()+'/pickles/evolved-raw-bf','r'))
#
# mp.append(movesets)
# bp.append(NUCLEOTIDES)
#
# pickle.dump(mp,open(os.getcwd()+'/pickles/evolved-raw-ms','w'))
# pickle.dump(bp,open(os.getcwd()+'/pickles/evolved-raw-bf','w'))
Пример #3
0
def predict(secondary_structure, vienna_version=1, bool_print=True):
    """Runs EternaBrain algorithm
    
    Arguments:
        secondary_structure {String} -- Secondary structure in dot bracket notation
    
    Returns:
        boolean -- True if puzzle solved, False otherwise
    """

    # Define constants often used in this function
    len_puzzle = len(secondary_structure)
    NUCLEOTIDES = 'A' * len_puzzle
    ce = 0.0
    te = 0.0

    LOCATION_FEATURES = 8
    BASE_FEATURES = 9
    NAME = 'CNN15'

    MIN_THRESHOLD = 0.6
    MAX_ITERATIONS = len_puzzle * 2
    MAX_LEN = 400
    TF_SHAPE = LOCATION_FEATURES * MAX_LEN
    BASE_SHAPE = BASE_FEATURES * MAX_LEN
    len_longest = MAX_LEN

    base_seq = (convert_to_list(NUCLEOTIDES)) + ([0] *
                                                 (len_longest - len_puzzle))
    # cdb = '.'*len_puzzle
    current_struc = (encode_struc(
        RNA.fold(NUCLEOTIDES)[0])) + ([0] * (len_longest - len_puzzle))
    target_struc = encode_struc(secondary_structure) + (
        [0] * (len_longest - len_puzzle))
    current_energy = [ce] + ([0] * (len_longest - 1))
    target_energy = [te] + ([0] * (len_longest - 1))
    current_pm = format_pairmap(NUCLEOTIDES) + ([0] *
                                                (len_longest - len_puzzle))
    target_pm = format_pairmap(secondary_structure) + (
        [0] * (len_longest - len_puzzle))
    #locks = ([2]*32 + [1] * 85 + [2]*85) + ([0]*(len_longest - len_puzzle))
    locks = ([1] * len_puzzle) + ([0] * (len_longest - len_puzzle))

    #print len(base_seq),len(current_struc),len(secondary_structure),len(target_struc),len(current_energy),len(target_energy),len(locks)

    inputs2 = np.array([
        base_seq, current_struc, target_struc, current_energy, target_energy,
        current_pm, target_pm, locks
    ])
    '''
    Change inputs when altering number of features
    '''
    #inputs2 = np.array([base_seq,current_energy,target_energy,current_pm,target_pm,locks])

    inputs = inputs2.reshape([-1, TF_SHAPE])

    with tf.Graph().as_default() as base_graph:
        saver1 = tf.train.import_meta_graph(os.getcwd() + '/models/base/base' +
                                            NAME + '.meta')  # CNN15
    sess1 = tf.Session(
        graph=base_graph
    )  # config=tf.ConfigProto(allow_soft_placement=True,log_device_placement=True)
    saver1.restore(sess1, os.getcwd() + '/models/base/base' + NAME)

    x = base_graph.get_tensor_by_name('x_placeholder:0')
    y = base_graph.get_tensor_by_name('y_placeholder:0')
    keep_prob = base_graph.get_tensor_by_name('keep_prob_placeholder:0')

    base_weights = base_graph.get_tensor_by_name('op7:0')

    base_feed_dict = {x: inputs, keep_prob: 1.0}

    with tf.Graph().as_default() as location_graph:
        saver2 = tf.train.import_meta_graph(os.getcwd() +
                                            '/models/location/location' +
                                            NAME + '.meta')
    sess2 = tf.Session(
        graph=location_graph
    )  # config=tf.ConfigProto(allow_soft_placement=True,log_device_placement=True)
    saver2.restore(sess2, os.getcwd() + '/models/location/location' + NAME)

    x2 = location_graph.get_tensor_by_name('x_placeholder:0')
    y2 = location_graph.get_tensor_by_name('y_placeholder:0')
    keep_prob2 = location_graph.get_tensor_by_name('keep_prob_placeholder:0')

    location_weights = location_graph.get_tensor_by_name('op7:0')

    if bool_print:
        print('models loaded')

    location_feed_dict = {x2: inputs, keep_prob2: 1.0}
    movesets = []
    iteration = 0
    reg = []
    for i in range(MAX_ITERATIONS):
        if np.all(inputs2[1] == inputs2[2]):
            if bool_print:
                print("Puzzle Solved")
            return True
        else:
            location_array = ((sess2.run(location_weights,
                                         location_feed_dict))[0])

            inputs2 = inputs.reshape(
                [LOCATION_FEATURES, TF_SHAPE // LOCATION_FEATURES])
            location_array = location_array[:len_puzzle] - min(
                location_array[:len_puzzle])
            total_l = sum(location_array)
            location_array = location_array / total_l
            #location_array = softmax(location_array)
            location_change = (choice(list(range(0, len(location_array))),
                                      1,
                                      p=location_array,
                                      replace=False))[0]
            #location_change = np.argmax(location_array)
            la = [0.0] * len_longest
            la[location_change] = 1.0
            inputs2 = np.append(inputs2, la)
            inputs = inputs2.reshape([-1, BASE_SHAPE])
            base_feed_dict = {x: inputs, keep_prob: 1.0}

            base_array = ((sess1.run(base_weights, base_feed_dict))[0])
            base_array = base_array - min(base_array)
            total = sum(base_array)
            base_array = base_array / total
            #base_array = softmax(base_array)

            #if np.random.rand() > 0.0:
            # FOR CHOOSING STOCHASTICALLY
            base_change = (choice([1, 2, 3, 4], 1, p=base_array,
                                  replace=False))[0]
            #else:
            # NOT STOCHASTICALLY
            #base_change = np.argmax(base_array) + 1

            inputs2 = inputs.reshape(
                [BASE_FEATURES, BASE_SHAPE // BASE_FEATURES])

            # if inputs2[0][location_change] == base_change:
            #     second = second_largest(base_array)
            #     base_change = np.where(base_array==second)[0][0] + 1

            temp = copy.deepcopy(inputs2[0])
            temp[location_change] = base_change
            move = [base_change, location_change]
            movesets.append(move)
            #print move
            str_seq = []
            for i in temp:
                if i == 1:
                    str_seq.append('A')
                elif i == 2:
                    str_seq.append('U')
                elif i == 3:
                    str_seq.append('G')
                elif i == 4:
                    str_seq.append('C')
                else:
                    continue
            str_seq = ''.join(str_seq)
            str_struc, current_e = RNA.fold(str_seq)
            current_pm = format_pairmap(str_struc)

            if bool_print:
                print(str_struc)
                print(similar(str_struc, secondary_structure))

            rna_struc = []
            for i in inputs2[2]:
                if i == 1:
                    rna_struc.append('.')
                elif i == 2:
                    rna_struc.append('(')
                elif i == 3:
                    rna_struc.append(')')
                else:
                    continue
            rna_struc = ''.join(rna_struc)
            target_e = RNA.energy_of_structure(str_seq, rna_struc, 0)
            enc_struc = []
            for i in str_struc:
                if i == '.':
                    enc_struc.append(1)
                elif i == '(':
                    enc_struc.append(2)
                elif i == ')':
                    enc_struc.append(3)
                else:
                    continue
            inputs2[0] = temp
            inputs2[1][:len(enc_struc)] = (enc_struc)
            inputs2[3][0] = current_e
            inputs2[4][0] = target_e
            inputs2[5][:len(enc_struc)] = current_pm
            inputs_loc = inputs2[0:8]
            inputs = inputs_loc.reshape([-1, TF_SHAPE])
            base_feed_dict = {x: inputs, keep_prob: 1.0}
            location_feed_dict = {x2: inputs, keep_prob2: 1.0}
            iteration += 1
            reg = []
            for i in inputs2[0]:
                if i == 1:
                    reg.append('A')
                elif i == 2:
                    reg.append('U')
                elif i == 3:
                    reg.append('G')
                elif i == 4:
                    reg.append('C')
                else:
                    continue
            reg = ''.join(reg)

            if bool_print:
                print(reg)
                print(iteration)

            if similar(str_struc, secondary_structure) >= MIN_THRESHOLD:
                if bool_print:
                    print('similar')
                    print(str_struc)
                    print(secondary_structure)
                    print(reg)
                break

    level1, m2, solved_sap1 = sbc(secondary_structure, reg)
    level2, m3, solved_sap2 = dsp(secondary_structure,
                                  level1,
                                  vienna_version=vienna_version,
                                  vienna_path=path)
    print(level2)
    return solved_sap1 or solved_sap2, level2
Пример #4
0
def design(dot_bracket,
           nucleotides,
           locks,
           ce=0,
           te=0,
           MIN_THRESHOLD=0.6,
           MAX_ITERATIONS=3):
    """
    Designs an RNA molecule for a specific target structure with the EternaBrain algorithm

    :param dot_bracket: The target structure in dot-bracket notation
    :param nucleotides: The current sequence of nucleotides
    :param locks: The locked bases ('x's for locked, 'o's for unlocked
    :param ce: The current energy in kcal/mol (default is 0 kcal/mol)
    :param te: The target folded energy in kcal/mol (default is 0 kcal/mol)
    :param MIN_THRESHOLD: Minimum threshold of CNN structure to target structure (default is 0.6, or 60%)
    :param MAX_ITERATIONS: The maximum number of moves allocated for the CNN (default is 3 times the length of the puzzle)
    :return: A list containing the following:
                Whether the puzzle was solved (True or False)
                The solved/unsolved nucleotide sequence
                The moves the algorithm took to get to the final nucleotide sequence
    """
    solved = False

    len_puzzle = len(dot_bracket)
    min_threshold = MIN_THRESHOLD
    max_iterations = len_puzzle * MAX_ITERATIONS
    MAX_LEN = 400
    TF_SHAPE = 8 * MAX_LEN
    BASE_SHAPE = 9 * MAX_LEN
    len_longest = MAX_LEN

    def similar(a, b):
        return SequenceMatcher(None, a, b).ratio()

    def encode_struc(dots):
        s = []
        for i in dots:
            if i == '.':
                s.append(1)
            elif i == '(':
                s.append(2)
            elif i == ')':
                s.append(3)
        return s

    def second_largest(numbers):
        count = 0
        m1 = m2 = float('-inf')
        for x in numbers:
            count += 1
            if x > m2:
                if x >= m1:
                    m1, m2 = x, m1
                else:
                    m2 = x
        return m2 if count >= 2 else None

    def convert_to_list(base_seq):
        str_struc = []
        for i in base_seq:
            if i == 'A':
                str_struc.append(1)
            elif i == 'U':
                str_struc.append(2)
            elif i == 'G':
                str_struc.append(3)
            elif i == 'C':
                str_struc.append(4)
        #struc = ''.join(str_struc)
        return str_struc

    def encoded_locks(raw_locks):
        newlocks = []
        for i in raw_locks:
            if i == 'o':
                newlocks.append(1)
            elif i == 'x':
                newlocks.append(2)

        return newlocks

    base_seq = (convert_to_list(nucleotides)) + ([0] *
                                                 (len_longest - len_puzzle))
    # cdb = '.'*len_puzzle
    current_struc = (encode_struc(
        RNA.fold(nucleotides)[0])) + ([0] * (len_longest - len_puzzle))
    target_struc = encode_struc(dot_bracket) + ([0] *
                                                (len_longest - len_puzzle))
    current_energy = [ce] + ([0] * (len_longest - 1))
    target_energy = [te] + ([0] * (len_longest - 1))
    current_pm = format_pairmap(nucleotides) + ([0] *
                                                (len_longest - len_puzzle))
    target_pm = format_pairmap(dot_bracket) + ([0] *
                                               (len_longest - len_puzzle))
    newlocks = (encoded_locks(locks)) + ([0] * (len_longest - len_puzzle))

    inputs2 = np.array([
        base_seq, current_struc, target_struc, current_energy, target_energy,
        current_pm, target_pm, newlocks
    ])
    inputs = inputs2.reshape([-1, TF_SHAPE])

    with tf.Graph().as_default() as base_graph:
        saver1 = tf.train.import_meta_graph(
            os.getcwd() + '/models/base/baseCNN15.meta')  # CNN15
    sess1 = tf.Session(
        graph=base_graph
    )  # config=tf.ConfigProto(allow_soft_placement=True,log_device_placement=True)
    saver1.restore(sess1, os.getcwd() + '/models/base/baseCNN15')

    x = base_graph.get_tensor_by_name('x_placeholder:0')
    y = base_graph.get_tensor_by_name('y_placeholder:0')
    keep_prob = base_graph.get_tensor_by_name('keep_prob_placeholder:0')

    base_weights = base_graph.get_tensor_by_name('op7:0')

    with tf.Graph().as_default() as location_graph:
        saver2 = tf.train.import_meta_graph(
            os.getcwd() + '/models/location/locationCNN15.meta')
    sess2 = tf.Session(
        graph=location_graph
    )  # config=tf.ConfigProto(allow_soft_placement=True,log_device_placement=True)
    saver2.restore(sess2, os.getcwd() + '/models/location/locationCNN15')

    x2 = location_graph.get_tensor_by_name('x_placeholder:0')
    y2 = location_graph.get_tensor_by_name('y_placeholder:0')
    keep_prob2 = location_graph.get_tensor_by_name('keep_prob_placeholder:0')

    location_weights = location_graph.get_tensor_by_name('op7:0')

    #print 'models loaded'

    location_feed_dict = {x2: inputs, keep_prob2: 1.0}
    movesets = []
    iteration = 0
    for i in range(max_iterations):
        if np.all(inputs2[1] == inputs2[2]):
            #print("Puzzle Solved")
            solved = True
            break
        else:
            location_array = ((sess2.run(location_weights,
                                         location_feed_dict))[0])

            inputs2 = inputs.reshape([8, TF_SHAPE / 8])
            location_array = location_array[:len_puzzle] - min(
                location_array[:len_puzzle])
            total_l = sum(location_array)
            location_array = location_array / total_l
            location_change = (choice(list(range(0, len(location_array))),
                                      1,
                                      p=location_array,
                                      replace=False))[0]
            #location_change = np.argmax(location_array)
            la = [0.0] * len_longest
            la[location_change] = 1.0
            inputs2 = np.append(inputs2, la)
            inputs = inputs2.reshape([-1, BASE_SHAPE])
            base_feed_dict = {x: inputs, keep_prob: 1.0}

            base_array = ((sess1.run(base_weights, base_feed_dict))[0])
            base_array = base_array - min(base_array)

            total = sum(base_array)
            base_array = base_array / total

            #if np.random.rand() > 0.0:
            # FOR CHOOSING STOCHASTICALLY
            base_change = (choice([1, 2, 3, 4], 1, p=base_array,
                                  replace=False))[0]
            #else:
            # NOT STOCHASTICALLY
            #base_change = np.argmax(base_array) + 1

            inputs2 = inputs.reshape([9, BASE_SHAPE / 9])

            temp = copy.deepcopy(inputs2[0])
            temp[location_change] = base_change
            move = [base_change, location_change]
            movesets.append(move)
            #print move
            str_seq = []

            for i in temp:
                if i == 1:
                    str_seq.append('A')
                elif i == 2:
                    str_seq.append('U')
                elif i == 3:
                    str_seq.append('G')
                elif i == 4:
                    str_seq.append('C')
                else:
                    continue
            str_seq = ''.join(str_seq)
            str_struc, current_e = RNA.fold(str_seq)
            current_pm = format_pairmap(str_struc)
            #print str_struc
            #print len(str_struc)
            #print similar(str_struc,dot_bracket)

            rna_struc = []
            for i in inputs2[2]:
                if i == 1:
                    rna_struc.append('.')
                elif i == 2:
                    rna_struc.append('(')
                elif i == 3:
                    rna_struc.append(')')
                else:
                    continue
            rna_struc = ''.join(rna_struc)
            target_e = RNA.energy_of_structure(str_seq, rna_struc, 0)
            enc_struc = []

            for i in str_struc:
                if i == '.':
                    enc_struc.append(1)
                elif i == '(':
                    enc_struc.append(2)
                elif i == ')':
                    enc_struc.append(3)
                else:
                    continue
            inputs2[0] = temp
            inputs2[1][:len(enc_struc)] = (enc_struc)
            inputs2[3][0] = current_e
            inputs2[4][0] = target_e
            inputs2[5][:len(enc_struc)] = current_pm
            inputs_loc = inputs2[0:8]
            inputs = inputs_loc.reshape([-1, TF_SHAPE])
            base_feed_dict = {x: inputs, keep_prob: 1.0}
            location_feed_dict = {x2: inputs, keep_prob2: 1.0}
            iteration += 1
            reg = []

            for i in inputs2[0]:
                if i == 1:
                    reg.append('A')
                elif i == 2:
                    reg.append('U')
                elif i == 3:
                    reg.append('G')
                elif i == 4:
                    reg.append('C')
                else:
                    continue
            reg = ''.join(reg)

            if similar(str_struc, dot_bracket) >= min_threshold:
                #print 'similar'
                #print str_struc
                #print dot_bracket
                #print reg
                break

    level1, m2, s1 = sbc(dot_bracket, reg)
    level2, m3, s2 = dsp(dot_bracket, level1)
    if s1 or s2:
        solved = True

    movesets.extend(m2)
    movesets.extend(m3)

    return solved, level2, movesets