def getRunSeq(): """ generate run seq, a seq list of pairs of indexes of profiles for job scheduling """ # TODO needed to generalize based on input data type if os.path.isfile("ss_profiles.pickle"): ss_profiles = io.readPickle("ss_profiles.pickle") else: return False if os.path.isfile("contacts_route.pickle"): map_route = io.readPickle("contacts_route.pickle") elif os.path.isfile("pcs_route.pickle"): map_route = io.readPickle("pcs_route.pickle") elif os.path.isfile("rdc_route.pickle"): map_route = io.readPickle("rdc_route.pickle") print map_route s1, s2 = map_route[0][0], map_route[0][1] s1_list, s2_list = getPairSSProfiles(s1, s2, ss_profiles) run_seq = [] for i in range(len(s1_list)): for j in range(len(s2_list)): run_seq.append([i, j]) return run_seq
def getSSlist(): ss_profiles = io.readPickle("ss_profiles.pickle") if os.path.isfile("contacts_route.pickle"): map_route = io.readPickle("contacts_route.pickle") elif os.path.isfile("pcs_route.pickle"): map_route = io.readPickle("pcs_route.pickle") elif os.path.isfile("rdc_route.pickle"): map_route = io.readPickle("rdc_route.pickle") s1, s2 = map_route[0][0], map_route[0][1] s1_list, s2_list = getPairSSProfiles(s1, s2, ss_profiles) return s1_list, s2_list
def start_top_hits(num_hits, stage, smotif_index): """ generate run seq, a seq list of pairs of indexes of profiles for job scheduling """ map_route = [] ss_profiles = io.readPickle("ss_profiles.pickle") if os.path.isfile("contacts_route.pickle"): map_route = io.readPickle("contacts_route.pickle") elif os.path.isfile("pcs_route.pickle"): map_route = io.readPickle("pcs_route.pickle") elif os.path.isfile("rdc_route.pickle"): map_route = io.readPickle("rdc_route.pickle") map_route_alt = io.readPickle("rdc_route_alt.pickle") alt_smotif_defs = map_route_alt[smotif_index] top_hits = [] top_hit_file = str(smotif_index - 1) + "_refined_tophits.gzip" if os.path.isfile(top_hit_file): top_hits = io.readGzipPickle(top_hit_file) print "loading from prevously assembled refined_tophits.pickle file" print "# hits :", len(top_hits) else: top_hit_file = str(smotif_index - 1) + "_tophits.gzip" if os.path.isfile(top_hit_file): top_hits = io.readGzipPickle(top_hit_file) print "loading from prevously assembled tophits.pickle file" print "# hits :", len(top_hits) else: print "No previous tophits file found, Generating a new one" return "exception" if not top_hits: return False, False run_seq = [] for next_smotif in alt_smotif_defs: print next_smotif direction = next_smotif[-1] if direction == 'left': next_ss_list = ss_profiles[next_smotif[0]] else: next_ss_list = ss_profiles[next_smotif[1]] for i in range(len(top_hits)): for j in range(len(next_ss_list)): run_seq.append([i, j, next_smotif]) return run_seq, smotif_index
def getPreviousSmotif(index): if os.path.isfile("contacts_route.pickle"): map_route = io.readPickle("contacts_route.pickle") elif os.path.isfile("pcs_route.pickle"): map_route = io.readPickle("pcs_route.pickle") elif os.path.isfile("rdc_route.pickle"): map_route = io.readPickle("rdc_route.pickle") next_index, next_smotif = getNextSmotif(map_route) top_hits = io.readPickle(str(next_index - 1) + "_tophits.pickle") # Read in previous index hits # print len(top_hits) return top_hits[index]
def getRunSeq(num_hits, stage): """ generate run seq, a seq list of pairs of indexes of profiles for job scheduling """ map_route = [] ss_profiles = io.readPickle("ss_profiles.pickle") if os.path.isfile("contacts_route.pickle"): map_route = io.readPickle("contacts_route.pickle") elif os.path.isfile("pcs_route.pickle"): map_route = io.readPickle("pcs_route.pickle") elif os.path.isfile("rdc_route.pickle"): map_route = io.readPickle("rdc_route.pickle") try: next_index, next_smotif = getNextSmotif(map_route) print next_index, next_smotif except TypeError: return [999], 999 direction = next_smotif[-1] if direction == 'left': next_ss_list = ss_profiles[next_smotif[0]] else: next_ss_list = ss_profiles[next_smotif[1]] # get and make a list of top 10(n) of the previous run top_hits = makeTopPickle(next_index - 1, num_hits, stage) # send the previous Smotif index # delete two stages down pickled files check_pickle = str(next_index - 2) + str("_*_*.pickle") file_list = glob.glob(check_pickle) if len(file_list) > 10: remove = "rm " + check_pickle os.system(remove) if top_hits: run_seq = [] for i in range(len(top_hits)): for j in range(len(next_ss_list)): run_seq.append([i, j]) return run_seq, next_index
def getSS2(index): if os.path.isfile("contacts_route.pickle"): map_route = io.readPickle("contacts_route.pickle") elif os.path.isfile("pcs_route.pickle"): map_route = io.readPickle("pcs_route.pickle") elif os.path.isfile("rdc_route.pickle"): map_route = io.readPickle("rdc_route.pickle") ss_profiles = io.readPickle("ss_profiles.pickle") next_index, next_smotif = getNextSmotif(map_route) direction = next_smotif[-1] if direction == 'left': next_ss_list = ss_profiles[next_smotif[0]] else: next_ss_list = ss_profiles[next_smotif[1]] return next_ss_list[index], direction
def getSS2(index, next_smotif): """ :param index: :param next_smotif: :return: """ ss_profiles = io.readPickle("ss_profiles.pickle") direction = next_smotif[-1] if direction == 'left': next_ss_list = ss_profiles[next_smotif[0]] else: next_ss_list = ss_profiles[next_smotif[1]] return next_ss_list[index], direction, next_smotif
def makeTopPickle(previous_smotif_index, num_hits, stage): """ Concatenate data from all of the threads, organize, remove redundancies, rank and extract top hits as defined :param previous_smotif_index: :param num_hits: :param stage: :return: """ hits = [] regex = str(previous_smotif_index) + "_*_*.pickle" file_list = glob.glob(regex) for f in file_list: t_hits = io.readPickle(f) for t_hit in t_hits: hits.append(t_hit) """ identifiers: smotif, smotif_def, seq_filter, contacts_filter, PCS_filter, qcp_rmsd, Evofilter RDC_filter, NOE_filter """ new_dict = collections.defaultdict(list) pcs_filter = False contact_filter = False rdc_filter = False noe_filter = False for hit in hits: # thread_data contains data from each search and filter thread. for data_filter in hit: if data_filter[0] == 'PCS_filter': pcs_filter = True pcs_data = data_filter Nchi = getNchiSum(pcs_data, stage) # new_dict.setdefault(Nchi, []).append(entry) new_dict[Nchi].append(hit) if data_filter[0] == 'Evofilter': contact_filter = True new_dict[data_filter[1]].append(hit) if data_filter[0] == 'RDC_filter': rdc_filter = True rdc_data = data_filter Nchi = rdcSumChi(rdc_data, stage) for filter in hit: if filter[0] == 'NOE_filter': noe_filter = True noe_fmeasure = filter[1] Nchi = Nchi / math.pow(10, noe_fmeasure * 10) new_dict[Nchi].append(hit) if not noe_filter: new_dict[Nchi].append(hit) # ************************************************ # Exclude the redundant entries and rank top hits # ************************************************ keys = new_dict.keys() keys.sort() if contact_filter and not pcs_filter: # Contact filter data should be as high as possible keys.reverse() # Exclude the redundant data. # non_redundant = {} non_redundant = collections.defaultdict(list) seqs = [] smotif_seq = '' Nchi = 0.0 for i in range(0, len(keys)): entries = new_dict[keys[i]] for entry in entries: for ent in entry: if ent[0] == 'smotif': name = ent[1][0] if ent[0] == 'seq_filter': seq_filter = ent smotif_seq = seq_filter[1] if ent[0] == 'PCS_filter': pcs_data = ent Nchi = getNchiSum(pcs_data, stage) if ent[0] == 'Evofilter': Nchi = ent[1] if ent[0] == 'RDC_filter': rdc_data = ent Nchi = rdcSumChi(rdc_data, stage) if noe_filter: for ent in entry: if ent[0] == 'NOE_filter': noe_fmeasure = ent[1] Nchi = Nchi / math.pow(10, noe_fmeasure * 10) else: Nchi = rdcSumChi(rdc_data, stage) if smotif_seq not in seqs: seqs.append(smotif_seq) # non_redundant.setdefault(Nchi, []).append(entry) non_redundant[Nchi].append(entry) # Rank top hits and dump the data keys = non_redundant.keys() keys.sort() if contact_filter and not pcs_filter: keys.reverse() dump_pickle = [] print "Dumping data to disk" count_top_hits = 0 while (True): for key in keys: if key == 999.999: # Do not work on these entries continue entries = non_redundant[key] for entry in entries: dump_pickle.append(entry) print "final sele", entry[0][1][0][0], key count_top_hits += 1 if count_top_hits >= num_hits: break if count_top_hits >= num_hits: break else: print "could only extract ", count_top_hits break io.dumpPickle(str(previous_smotif_index) + "_tophits.pickle", dump_pickle) print "actual number in top hits ", len(dump_pickle) return range(count_top_hits)