Пример #1
0
def prep(pid):
    """
    Selects expert move sets and saves in npy file

    :param pid: Puzzle ID
    :return: npy saved files of training data
    """
    # pidList = pid
    # pid = pidList[0]
    threshold = 50

    print 'ready with pid %i' % pid

    data,users,encoded_bf,lens = experience_labs(pid,threshold)
    print 'experience_labs'
    encoded = encode_movesets_style_pr(data)
    encoded_base = encode_bases(data)
    encoded_loc = encode_location(data,len_longest)
    print 'encoded'

    # plist = []
    # lens = []
    # for pid in pidList:
    #     puzzles_pid = (moveset_dataFrame.loc[moveset_dataFrame['pid'] == pid])
    #     for uid in users:
    #         puzzles_pid2 = puzzles_pid.loc[puzzles_pid['uid'] == uid]
    #         p = (list(puzzles_pid2['move_set']))
    #         plist.extend(p)
    #     lens.append(len(list(puzzles_pid['move_set'])))
    #
    # bf_list = []
    # for i in plist:
    #  s1 = (ast.literal_eval(i))
    #  s2 = s1['begin_from']
    #  bf_list.append(s2)
    #
    # encoded_bf = []
    # for start in bf_list:
    #    enc = []
    #    for i in start:
    #        if i == 'A':
    #            enc.append(1)
    #        elif i == 'U':
    #            enc.append(2)
    #        elif i == 'G':
    #            enc.append(3)
    #        elif i == 'C':
    #            enc.append(4)
    #    encoded_bf.append(enc)
    print 'encoded_bf'
    print len(encoded), len(encoded_bf), len(data)
    print lens
    bases = base_sequence_at_current_time_pr(encoded,encoded_bf)

    #bases = base_sequence_at_current_time_pr(encoded[1006],encoded_bf[1006])
    X = np.array(structure_and_energy_at_current_time(bases,pid,data,len_longest))
    np.save(open(os.getcwd()+'/npsaves/X-exp-'+str(pid),'wb'),X)
    np.save(open(os.getcwd()+'/npsaves/y-exp-base-'+str(pid),'wb'),encoded_base)
    np.save(open(os.getcwd()+'/npsaves/y-exp-loc-'+str(pid),'wb'),encoded_loc)
Пример #2
0
def read(pid, uidList):
    """
    Returns training data for expert players of one puzzle

    :param pid: Puzzle ID
    :param uidList: List of user IDs
    :return: Pickled training data
    """

    #print 'ready with pid %i' % pid

    #uidList.remove(87216)
    #uidList = [8627]
    #print uidList
    final_dict = []
    bf_list = []
    #start = time.time()
    for user in uidList:
        #print user
        data = read_movesets_uid_pid(user, pid)
        #data = read_movesets_uid(user)
        #print 'data read'
        if not data:
            #print 'user %i with pid %i list empty' % (user,pid)
            continue
        else:
            for i in data:
                #print 'formatting into list'
                s1 = ast.literal_eval(i)
                s2 = s1['moves']
                s3 = s1['begin_from']
                final_dict.append(s2)
                bf_list.append(s3)
                #print 'done formatting list'
            #print 'user %i done with pid %i' % (user,pid)
    ##print time.time() - start()
    #print "complete data read"
    encoded_bf = []
    for start in bf_list:
        enc = []
        for i in start:
            if i == 'A':
                enc.append(1)
            elif i == 'U':
                enc.append(2)
            elif i == 'G':
                enc.append(3)
            elif i == 'C':
                enc.append(4)
        encoded_bf.append(enc)
    #print "encoded begin_from"

    encoded = encode_movesets_style_pr(final_dict)
    encoded_base = (encode_bases(final_dict))
    encoded_loc = (encode_location(final_dict, len_longest))
    #print 'encoded base and location'
    #print len(encoded), len(encoded_bf), len(final_dict)
    bases = base_sequence_at_current_time_pr(encoded, encoded_bf)
    #print 'encoded base seqs'
    #bases = base_sequence_at_current_time_pr(encoded[1006],encoded_bf[1006])
    X = (structure_and_energy_at_current_time(bases, pid))
    #X2 = (structure_and_energy_at_current_time_with_location(bases,pid,final_dict,len_longest))
    #print 'encoded strucs energy and locks'
    #print len(X)
    # np.save(open(os.getcwd()+'/npsaves/X-exp-base-eli.npy','wb'),X2)
    # np.save(open(os.getcwd()+'/npsaves/X-exp-loc-eli.npy','wb'),X)
    # np.save(open(os.getcwd()+'/npsaves/y-exp-base-eli.npy','wb'),encoded_base)
    # np.save(open(os.getcwd()+'/npsaves/y-exp-loc-eli.npy','wb'),encoded_loc)

    #pickle.dump(X2,open(os.getcwd()+'/pickles/X-exp-base-'+str(pid),'wb'))
    pickle.dump(X, open(os.getcwd() + '/pickles/X-hog-loc-' + str(pid), 'wb'))
    pickle.dump(encoded_base,
                open(os.getcwd() + '/pickles/y-hog-base-' + str(pid), 'wb'))
    pickle.dump(encoded_loc,
                open(os.getcwd() + '/pickles/y-hog-loc-' + str(pid), 'wb'))
Пример #3
0
def read_uid(uidList):
    """
    Returns training data for expert players of one puzzle

    :param pid: Puzzle ID
    :param uidList: List of user IDs
    :return: Pickled training data
    """

    #print 'ready with pid %i' % pid

    #uidList.remove(87216)
    #uidList = [8627]
    #print uidList
    final_dict = []
    bf_list = []
    count = 0
    #start = time.time()
    for user in uidList:
        #print user
        data = read_movesets_uid(user)
        #data = read_movesets_uid(user)
        #print 'data read'
        if not data:
            #print 'user %i with pid %i list empty' % (user,pid)
            continue
        else:
            for i in data:
                #print 'formatting into list'
                try:
                    s1 = ast.literal_eval(i)
                    s2 = s1['moves']
                    s3 = s1['begin_from']
                    final_dict.append(s2)
                    bf_list.append(s3)
                    print(s2, s3)
                    count += len(s2)
                except:
                    continue
                #print 'done formatting list'
        print('Comnpleted %i/%i' % (uidList.index(user), len(uidList)))
    ##print time.time() - start()
    #print "complete data read"
    encoded_bf = []
    for start in bf_list:
        enc = []
        for i in start:
            if i == 'A':
                enc.append(1)
            elif i == 'U':
                enc.append(2)
            elif i == 'G':
                enc.append(3)
            elif i == 'C':
                enc.append(4)
        encoded_bf.append(enc)
    #print "encoded begin_from"

    encoded = encode_movesets_style_pr(final_dict)
    encoded_base = (encode_bases(final_dict))
    encoded_loc = (encode_location(final_dict, len_longest))

    bases = base_sequence_at_current_time_pr(encoded, encoded_bf)

    X = (structure_and_energy_at_current_time(bases, pid))
    return X
Пример #4
0
def speed(pid):
    """
    Encodes the puzzle solutions that were completed in the fewest
    number of moves

    :param pid: Puzzle ID
    :return: CNN training data of fastest solutions for that puzzle
    """
    final_dict = []
    bf_list = []

    #for pid in pidList:
    print(pid)
    puzzles_pid = moveset_dataFrame.loc[moveset_dataFrame['pid'] == pid]
    plist = list(puzzles_pid['move_set'])
    ulist = list(puzzles_pid['uid'])
    plist_dict = []
    for i in (plist):
        s1 = (ast.literal_eval(i))
        s2 = int(s1['num_moves'])
        if s2 <= max_moves:  # solved in 50 moves or less
            print('fast')
            s3 = s1['moves']
            s4 = s1['begin_from']
            final_dict.append(s3)
            bf_list.append(s4)
        else:
            continue

    print("complete data read")
    encoded_bf = []
    for start in bf_list:
        enc = []
        for i in start:
            if i == 'A':
                enc.append(1)
            elif i == 'U':
                enc.append(2)
            elif i == 'G':
                enc.append(3)
            elif i == 'C':
                enc.append(4)
        encoded_bf.append(enc)
    print("encoded begin_from")
    print(len(final_dict))
    encoded = encode_movesets_style_pr(final_dict)
    encoded_base = (encode_bases(final_dict))
    encoded_loc = (encode_location(final_dict, len_longest))
    print('encoded base and location')
    print(len(encoded), len(encoded_bf), len(final_dict))
    bases = base_sequence_at_current_time_pr(encoded, encoded_bf)
    print('encoded base seqs')
    #print len(bases[0][0])
    #bases = base_sequence_at_current_time_pr(encoded[1006],encoded_bf[1006])
    X = (structure_and_energy_at_current_time(bases, pid))
    #X2 = (structure_and_energy_at_current_time_with_location(bases,pid,final_dict,len_longest))
    print('encoded strucs energy and locks')
    print(len(X))
    # np.save(open(os.getcwd()+'/npsaves/X-exp-base-eli.npy','wb'),X2)
    # np.save(open(os.getcwd()+'/npsaves/X-exp-loc-eli.npy','wb'),X)
    # np.save(open(os.getcwd()+'/npsaves/y-exp-base-eli.npy','wb'),encoded_base)
    # np.save(open(os.getcwd()+'/npsaves/y-exp-loc-eli.npy','wb'),encoded_loc)

    #pickle.dump(X2,open(os.getcwd()+'/pickles/X-exp-base-'+str(pid),'wb'))
    if len(encoded) != 0:
        pickle.dump(
            X, open(os.getcwd() + '/pickles/X2-fast-loc-' + str(pid), 'wb'))
        pickle.dump(
            encoded_base,
            open(os.getcwd() + '/pickles/y2-fast-base-' + str(pid), 'wb'))
        pickle.dump(
            encoded_loc,
            open(os.getcwd() + '/pickles/y2-fast-loc-' + str(pid), 'wb'))
Пример #5
0
from encodeRNA import encode_bases, base_sequence_at_current_time_pr, encode_movesets_style_pr, encode_location
import numpy as np
import pandas as pd
import ast
import copy
import pickle

pid = 6502997
len_puzzle = 80

filepath = os.getcwd() + '/movesets/moveset6-22a.txt'

data2, users = read_movesets_pid(filepath,pid)
data = data2
encoded = (encode_movesets_style_pr(data))
encoded_base = encode_bases(data)
encoded_loc = encode_location(data,len_puzzle)

moveset_dataFrame = pd.read_csv(filepath, sep=" ", header="infer", delimiter='\t')
puzzles_pid = (moveset_dataFrame.loc[moveset_dataFrame['pid'] == pid])
structure_file = os.getcwd() + '/movesets/puzzle-structure-data.txt'
#print puzzles_pid

plist = list(puzzles_pid['move_set'])
#print plist
bf_list = []
for i in plist:
 s1 = (ast.literal_eval(i))
 s2 = s1['begin_from']
 bf_list.append(s2)