def prep(pid): """ Selects expert move sets and saves in npy file :param pid: Puzzle ID :return: npy saved files of training data """ # pidList = pid # pid = pidList[0] threshold = 50 print 'ready with pid %i' % pid data,users,encoded_bf,lens = experience_labs(pid,threshold) print 'experience_labs' encoded = encode_movesets_style_pr(data) encoded_base = encode_bases(data) encoded_loc = encode_location(data,len_longest) print 'encoded' # plist = [] # lens = [] # for pid in pidList: # puzzles_pid = (moveset_dataFrame.loc[moveset_dataFrame['pid'] == pid]) # for uid in users: # puzzles_pid2 = puzzles_pid.loc[puzzles_pid['uid'] == uid] # p = (list(puzzles_pid2['move_set'])) # plist.extend(p) # lens.append(len(list(puzzles_pid['move_set']))) # # bf_list = [] # for i in plist: # s1 = (ast.literal_eval(i)) # s2 = s1['begin_from'] # bf_list.append(s2) # # encoded_bf = [] # for start in bf_list: # enc = [] # for i in start: # if i == 'A': # enc.append(1) # elif i == 'U': # enc.append(2) # elif i == 'G': # enc.append(3) # elif i == 'C': # enc.append(4) # encoded_bf.append(enc) print 'encoded_bf' print len(encoded), len(encoded_bf), len(data) print lens bases = base_sequence_at_current_time_pr(encoded,encoded_bf) #bases = base_sequence_at_current_time_pr(encoded[1006],encoded_bf[1006]) X = np.array(structure_and_energy_at_current_time(bases,pid,data,len_longest)) np.save(open(os.getcwd()+'/npsaves/X-exp-'+str(pid),'wb'),X) np.save(open(os.getcwd()+'/npsaves/y-exp-base-'+str(pid),'wb'),encoded_base) np.save(open(os.getcwd()+'/npsaves/y-exp-loc-'+str(pid),'wb'),encoded_loc)
def read(pid, uidList): """ Returns training data for expert players of one puzzle :param pid: Puzzle ID :param uidList: List of user IDs :return: Pickled training data """ #print 'ready with pid %i' % pid #uidList.remove(87216) #uidList = [8627] #print uidList final_dict = [] bf_list = [] #start = time.time() for user in uidList: #print user data = read_movesets_uid_pid(user, pid) #data = read_movesets_uid(user) #print 'data read' if not data: #print 'user %i with pid %i list empty' % (user,pid) continue else: for i in data: #print 'formatting into list' s1 = ast.literal_eval(i) s2 = s1['moves'] s3 = s1['begin_from'] final_dict.append(s2) bf_list.append(s3) #print 'done formatting list' #print 'user %i done with pid %i' % (user,pid) ##print time.time() - start() #print "complete data read" encoded_bf = [] for start in bf_list: enc = [] for i in start: if i == 'A': enc.append(1) elif i == 'U': enc.append(2) elif i == 'G': enc.append(3) elif i == 'C': enc.append(4) encoded_bf.append(enc) #print "encoded begin_from" encoded = encode_movesets_style_pr(final_dict) encoded_base = (encode_bases(final_dict)) encoded_loc = (encode_location(final_dict, len_longest)) #print 'encoded base and location' #print len(encoded), len(encoded_bf), len(final_dict) bases = base_sequence_at_current_time_pr(encoded, encoded_bf) #print 'encoded base seqs' #bases = base_sequence_at_current_time_pr(encoded[1006],encoded_bf[1006]) X = (structure_and_energy_at_current_time(bases, pid)) #X2 = (structure_and_energy_at_current_time_with_location(bases,pid,final_dict,len_longest)) #print 'encoded strucs energy and locks' #print len(X) # np.save(open(os.getcwd()+'/npsaves/X-exp-base-eli.npy','wb'),X2) # np.save(open(os.getcwd()+'/npsaves/X-exp-loc-eli.npy','wb'),X) # np.save(open(os.getcwd()+'/npsaves/y-exp-base-eli.npy','wb'),encoded_base) # np.save(open(os.getcwd()+'/npsaves/y-exp-loc-eli.npy','wb'),encoded_loc) #pickle.dump(X2,open(os.getcwd()+'/pickles/X-exp-base-'+str(pid),'wb')) pickle.dump(X, open(os.getcwd() + '/pickles/X-hog-loc-' + str(pid), 'wb')) pickle.dump(encoded_base, open(os.getcwd() + '/pickles/y-hog-base-' + str(pid), 'wb')) pickle.dump(encoded_loc, open(os.getcwd() + '/pickles/y-hog-loc-' + str(pid), 'wb'))
def read_uid(uidList): """ Returns training data for expert players of one puzzle :param pid: Puzzle ID :param uidList: List of user IDs :return: Pickled training data """ #print 'ready with pid %i' % pid #uidList.remove(87216) #uidList = [8627] #print uidList final_dict = [] bf_list = [] count = 0 #start = time.time() for user in uidList: #print user data = read_movesets_uid(user) #data = read_movesets_uid(user) #print 'data read' if not data: #print 'user %i with pid %i list empty' % (user,pid) continue else: for i in data: #print 'formatting into list' try: s1 = ast.literal_eval(i) s2 = s1['moves'] s3 = s1['begin_from'] final_dict.append(s2) bf_list.append(s3) print(s2, s3) count += len(s2) except: continue #print 'done formatting list' print('Comnpleted %i/%i' % (uidList.index(user), len(uidList))) ##print time.time() - start() #print "complete data read" encoded_bf = [] for start in bf_list: enc = [] for i in start: if i == 'A': enc.append(1) elif i == 'U': enc.append(2) elif i == 'G': enc.append(3) elif i == 'C': enc.append(4) encoded_bf.append(enc) #print "encoded begin_from" encoded = encode_movesets_style_pr(final_dict) encoded_base = (encode_bases(final_dict)) encoded_loc = (encode_location(final_dict, len_longest)) bases = base_sequence_at_current_time_pr(encoded, encoded_bf) X = (structure_and_energy_at_current_time(bases, pid)) return X
def speed(pid): """ Encodes the puzzle solutions that were completed in the fewest number of moves :param pid: Puzzle ID :return: CNN training data of fastest solutions for that puzzle """ final_dict = [] bf_list = [] #for pid in pidList: print(pid) puzzles_pid = moveset_dataFrame.loc[moveset_dataFrame['pid'] == pid] plist = list(puzzles_pid['move_set']) ulist = list(puzzles_pid['uid']) plist_dict = [] for i in (plist): s1 = (ast.literal_eval(i)) s2 = int(s1['num_moves']) if s2 <= max_moves: # solved in 50 moves or less print('fast') s3 = s1['moves'] s4 = s1['begin_from'] final_dict.append(s3) bf_list.append(s4) else: continue print("complete data read") encoded_bf = [] for start in bf_list: enc = [] for i in start: if i == 'A': enc.append(1) elif i == 'U': enc.append(2) elif i == 'G': enc.append(3) elif i == 'C': enc.append(4) encoded_bf.append(enc) print("encoded begin_from") print(len(final_dict)) encoded = encode_movesets_style_pr(final_dict) encoded_base = (encode_bases(final_dict)) encoded_loc = (encode_location(final_dict, len_longest)) print('encoded base and location') print(len(encoded), len(encoded_bf), len(final_dict)) bases = base_sequence_at_current_time_pr(encoded, encoded_bf) print('encoded base seqs') #print len(bases[0][0]) #bases = base_sequence_at_current_time_pr(encoded[1006],encoded_bf[1006]) X = (structure_and_energy_at_current_time(bases, pid)) #X2 = (structure_and_energy_at_current_time_with_location(bases,pid,final_dict,len_longest)) print('encoded strucs energy and locks') print(len(X)) # np.save(open(os.getcwd()+'/npsaves/X-exp-base-eli.npy','wb'),X2) # np.save(open(os.getcwd()+'/npsaves/X-exp-loc-eli.npy','wb'),X) # np.save(open(os.getcwd()+'/npsaves/y-exp-base-eli.npy','wb'),encoded_base) # np.save(open(os.getcwd()+'/npsaves/y-exp-loc-eli.npy','wb'),encoded_loc) #pickle.dump(X2,open(os.getcwd()+'/pickles/X-exp-base-'+str(pid),'wb')) if len(encoded) != 0: pickle.dump( X, open(os.getcwd() + '/pickles/X2-fast-loc-' + str(pid), 'wb')) pickle.dump( encoded_base, open(os.getcwd() + '/pickles/y2-fast-base-' + str(pid), 'wb')) pickle.dump( encoded_loc, open(os.getcwd() + '/pickles/y2-fast-loc-' + str(pid), 'wb'))
from encodeRNA import encode_movesets_style, base_sequence_at_current_time, structure_and_energy_at_current_time from encodeRNA import encode_bases, base_sequence_at_current_time_pr, encode_movesets_style_pr, encode_location import numpy as np import pandas as pd import ast import copy import pickle pid = 6502997 len_puzzle = 80 filepath = os.getcwd() + '/movesets/moveset6-22a.txt' data2, users = read_movesets_pid(filepath,pid) data = data2 encoded = (encode_movesets_style_pr(data)) encoded_base = encode_bases(data) encoded_loc = encode_location(data,len_puzzle) moveset_dataFrame = pd.read_csv(filepath, sep=" ", header="infer", delimiter='\t') puzzles_pid = (moveset_dataFrame.loc[moveset_dataFrame['pid'] == pid]) structure_file = os.getcwd() + '/movesets/puzzle-structure-data.txt' #print puzzles_pid plist = list(puzzles_pid['move_set']) #print plist bf_list = [] for i in plist: s1 = (ast.literal_eval(i)) s2 = s1['begin_from'] bf_list.append(s2)