Exemplos de get_candidate_intensity em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: deepnovo_cython_modules

Método / Função: get_candidate_intensity

Exemplos em hotexamples.com: 2

get_candidate_intensity em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de deepnovo_cython_modules.get_candidate_intensity em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Exemplo n.º 1

0

Exibir arquivo

Arquivo: deepnovo_worker_db.py Projeto: zrolfs/DeepNovo

def _score_spectrum(self, precursor_mass, spectrum_original, state0_c, state0_h, candidate_list, model, model_output_logprob, model_lstm_state, session, direction): """TODO(nh2tran): docstring.""" #~ print("".join(["="] * 80)) # section-separating line #~ print("WorkerDB: _score()") # convert symbols into id candidate_list = [[deepnovo_config.vocab[x] for x in candidate] for candidate in candidate_list] # we shall group candidates into minibatches # === candidate_len === # s # i # z # e # ===================== minibatch_size = len(candidate_list) # number of candidates candidate_len = len(candidate_list[0]) # length of each candidate # candidates share the same state0, so repeat into [minibatch_size, 512] # the states will also be updated after every iteration state0_c = state0_c.reshape((1, -1)) # reshape to [1, 512] state0_h = state0_h.reshape((1, -1)) minibatch_state_c = np.repeat(state0_c, minibatch_size, axis=0) minibatch_state_h = np.repeat(state0_h, minibatch_size, axis=0) # mass of each candidate, will be accumulated everytime an AA is appended minibatch_prefix_mass = np.zeros(minibatch_size) # output is a list of candidate_len arrays of shape [minibatch_size, 26] # each row is log of probability distribution over 26 classes/symbols output_logprob_list = [] # recurrent iterations for position in range(candidate_len): # gather minibatch data minibatch_AA_id = np.zeros(minibatch_size) for index, candidate in enumerate(candidate_list): AA = candidate[position] minibatch_AA_id[index] = AA minibatch_prefix_mass[index] += deepnovo_config.mass_ID[AA] # this is the most time-consuming ~70-75% minibatch_intensity = [get_candidate_intensity(spectrum_original, precursor_mass, prefix_mass, direction) for prefix_mass in np.nditer(minibatch_prefix_mass)] # final shape [minibatch_size, 26, 8, 10] minibatch_intensity = np.array(minibatch_intensity) # model feed input_feed = {} input_feed[model.input_dict["AAid"][1].name] = minibatch_AA_id input_feed[model.input_dict["intensity"].name] = minibatch_intensity input_feed[model.input_dict["lstm_state"][0].name] = minibatch_state_c input_feed[model.input_dict["lstm_state"][1].name] = minibatch_state_h # and run output_feed = [model_output_logprob, model_lstm_state] output_logprob, (minibatch_state_c, minibatch_state_h) = session.run( fetches=output_feed, feed_dict=input_feed) output_logprob_list.append(output_logprob) return output_logprob_list

Exemplo n.º 2

0

Exibir arquivo

def _extend_peak(self, direction, session, model, spectrum_batch, peak_batch): """TODO(nh2tran): docstring. Inputs: spectrum_batch: a list of spectrum, each is a dictionary spectrum["scan"] spectrum["precursor_mass"] spectrum["spectrum_holder"] spectrum["spectrum_original_forward"] spectrum["spectrum_original_backward"] peak_batch: one peak for each spectrum, each peak is a dictionary peak["prefix_mass"] for extension in the forward direction peak["sufffix_mass"] for extension in the backward direction peak["mass_tolerance"] Outputs: top_path_batch: for every input spectrum, the output is a list of paths, each path is a dictionary path["AAid_list"] path["score_list"] path["score_sum"] """ print( "WorkerDenovo: _extend_peak(), direction={0:s}".format(direction)) # test running time and tensorflow time test_time_decode = 0.0 test_time_tf = 0.0 test_time = 0.0 start_time_decode = time.time() # for every input spectrum, the output is a list of paths, # each path is a dictionary # path["AAid_list"] # path["score_list"] # path["score_sum"] spectrum_batch_size = len(spectrum_batch) top_path_batch = [[] for x in range(spectrum_batch_size)] # forward/backward direction setting # the direction determines the model, the spectrum and the peak mass if direction == "forward": model_lstm_state0 = model.output_forward["lstm_state0"] model_output_log_prob = model.output_forward["logprob"] model_lstm_state = model.output_forward["lstm_state"] spectrum_original_name = "spectrum_original_forward" peak_mass_name = "prefix_mass" FIRST_LABEL = self.GO_ID LAST_LABEL = self.EOS_ID elif direction == "backward": model_lstm_state0 = model.output_backward["lstm_state0"] model_output_log_prob = model.output_backward["logprob"] model_lstm_state = model.output_backward["lstm_state"] spectrum_original_name = "spectrum_original_backward" peak_mass_name = "suffix_mass" FIRST_LABEL = self.EOS_ID LAST_LABEL = self.GO_ID # PEAK EXTENSION includes 4 steps: # STEP 1: initialize the lstm and the active_search_list. # STEP 2, 3, 4 are repeated until the active_search_list is empty. # STEP 2: gather data from active search entries and group into blocks. # STEP 3: run tensorflow model on data blocks to predict next AA. # STEP 4: retrieve data from blocks to update the active_search_list # with knapsack dynamic programming and beam search. start_time_tf = time.time() # STEP 1: initialize lstm spectrum_holder_array = np.array( [x["spectrum_holder"] for x in spectrum_batch]) input_feed = {} input_feed[model.input_dict["spectrum"].name] = spectrum_holder_array output_feed = model_lstm_state0 c_state0_array, h_state0_array = session.run(fetches=output_feed, feed_dict=input_feed) test_time_tf += time.time() - start_time_tf # STEP 1: initialize the active_search_list # active_search_list holds the info of search entries under processing # each search entry is a dictionary # search_entry["spectrum_id"] # search_entry["current_path_list"] # each path is also a dictionary # path["AAid_list"] # path["prefix_mass"] # path["score_list"] # path["score_sum"] # path["c_state"] # path["h_state"] active_search_list = [] for spectrum_id in range(spectrum_batch_size): search_entry = {} search_entry["spectrum_id"] = spectrum_id path = {} path["AAid_list"] = [FIRST_LABEL] path["prefix_mass"] = peak_batch[spectrum_id][peak_mass_name] path["score_list"] = [0.0] path["score_sum"] = 0.0 path["c_state"] = c_state0_array[spectrum_id] path["h_state"] = h_state0_array[spectrum_id] search_entry["current_path_list"] = [path] active_search_list.append(search_entry) # repeat STEP 2, 3, 4 until the active_search_list is empty. while True: # STEP 2: gather data from active search entries and group into blocks. # data blocks for the input feed of tensorflow model block_AAid_1 = [] # nobi block_AAid_2 = [] # nobi block_c_state = [] block_h_state = [] block_candidate_intensity = [] # data blocks to record the current status of search entries block_AAid_list = [] block_prefix_mass = [] block_score_list = [] block_score_sum = [] block_knapsack_candidate = [] # store the number of paths of each search entry in the big blocks # to retrieve the info of each search entry later in STEP 4. search_entry_size = [0] * len(active_search_list) # gather data into blocks through 2 nested loops over active_search_list # and over current_path_list of each search_entry for entry_index, search_entry in enumerate(active_search_list): spectrum_id = search_entry["spectrum_id"] current_path_list = search_entry["current_path_list"] precursor_mass = spectrum_batch[spectrum_id]["precursor_mass"] spectrum_original = spectrum_batch[spectrum_id][ spectrum_original_name] peak_mass_tolerance = peak_batch[spectrum_id]["mass_tolerance"] for path in current_path_list: # keep track of the AA predicted in the previous iteration # for nobi (short k-mer) model, we will need 2 previous AA AAid_list = path["AAid_list"] AAid_2 = AAid_list[-1] if len(AAid_list) > 1: AAid_1 = AAid_list[-2] else: AAid_1 = AAid_2 # nobi # the current status of this path prefix_mass = path["prefix_mass"] score_list = path["score_list"] score_sum = path["score_sum"] c_state = path["c_state"] h_state = path["h_state"] # when we reach LAST_LABEL, check if the mass of predicted sequence # is close to the given precursor_mass: # if yes, send the current path to output # if not, skip the current path if AAid_2 == LAST_LABEL: # nobi if (abs(prefix_mass - precursor_mass) <= peak_mass_tolerance): top_path_batch[spectrum_id].append({ "AAid_list": AAid_list, "score_list": score_list, "score_sum": score_sum }) continue start_time = time.time() # get CANDIDATE INTENSITY to predict next AA # TODO(nh2tran): change direction from 0/1 to "forward"/"backward" direction_id = 0 if direction == "forward" else 1 candidate_intensity = get_candidate_intensity( spectrum_original, precursor_mass, prefix_mass, direction_id) test_time += time.time() - start_time # use knapsack and SUFFIX MASS to filter next AA candidate suffix_mass = precursor_mass - prefix_mass - self.mass_ID[ LAST_LABEL] knapsack_tolerance = int( round(peak_mass_tolerance * self.KNAPSACK_AA_RESOLUTION)) knapsack_candidate = self._search_knapsack( suffix_mass, knapsack_tolerance) # if not possible to extend, add LAST_LABEL to end the sequence if not knapsack_candidate: knapsack_candidate.append(LAST_LABEL) # gather data blocks block_AAid_1.append(AAid_1) # nobi block_AAid_2.append(AAid_2) # nobi block_c_state.append(c_state) block_h_state.append(h_state) block_candidate_intensity.append(candidate_intensity) block_AAid_list.append(AAid_list) block_prefix_mass.append(prefix_mass) block_score_list.append(score_list) block_score_sum.append(score_sum) block_knapsack_candidate.append(knapsack_candidate) # record the size of each search entry in the blocks search_entry_size[entry_index] += 1 # STEP 3: run tensorflow model on data blocks to predict next AA. # output is stored in current_log_prob, current_c_state, current_h_state if block_AAid_1: start_time_tf = time.time() block_AAid_1 = np.array(block_AAid_1) # nobi block_AAid_2 = np.array(block_AAid_2) # nobi block_c_state = np.array(block_c_state) block_h_state = np.array(block_h_state) block_candidate_intensity = np.array(block_candidate_intensity) input_feed = {} input_feed[model.input_dict["AAid"] [0].name] = block_AAid_1 # nobi input_feed[model.input_dict["AAid"] [1].name] = block_AAid_2 # nobi input_feed[model.input_dict["lstm_state"] [0].name] = block_c_state input_feed[model.input_dict["lstm_state"] [1].name] = block_h_state input_feed[model.input_dict["intensity"]. name] = block_candidate_intensity output_feed = [model_output_log_prob, model_lstm_state] # lstm.len_full #~ output_feed = model_output_log_prob # nobi current_log_prob, (current_c_state, current_h_state) = session.run( output_feed, input_feed) # lstm.len_full #~ current_log_prob = session.run(output_feed,input_feed) # nobi test_time_tf += time.time() - start_time_tf # STEP 4: retrieve data from blocks to update the active_search_list # with knapsack dynamic programming and beam search. block_index = 0 for entry_index, search_entry in enumerate(active_search_list): # find all possible new paths within knapsack filter new_path_list = [] for index in range( block_index, block_index + search_entry_size[entry_index]): for AAid in block_knapsack_candidate[index]: new_path = {} new_path["AAid_list"] = block_AAid_list[index] + [AAid] new_path["prefix_mass"] = block_prefix_mass[ index] + self.mass_ID[AAid] if AAid > 2: # do NOT add score of GO, EOS, PAD new_path["score_list"] = ( block_score_list[index] + [current_log_prob[index][AAid]]) new_path["score_sum"] = ( block_score_sum[index] + current_log_prob[index][AAid]) else: new_path["score_list"] = block_score_list[index] new_path["score_sum"] = block_score_sum[index] new_path["c_state"] = current_c_state[ index] # lstm.len_full new_path["h_state"] = current_h_state[ index] # lstm.len_full #~ new_path["c_state"] = block_c_state[index] # nobi #~ new_path["h_state"] = block_h_state[index] # nobi new_path_list.append(new_path) # beam search to select top candidates if len(new_path_list) > self.beam_size: new_path_score = np.array( [x["score_sum"] for x in new_path_list]) top_k_index = np.argpartition(-new_path_score, self.beam_size)[:self.beam_size] # pylint: disable=line-too-long search_entry["current_path_list"] = [ new_path_list[top_k_index[x]] for x in range(self.beam_size) ] else: search_entry["current_path_list"] = new_path_list # update the accumulated block_index block_index += search_entry_size[entry_index] # update active_search_list by removing empty entries active_search_list = [ x for x in active_search_list if x["current_path_list"] ] # STOP the extension loop if active_search_list is empty if not active_search_list: break test_time_decode += time.time() - start_time_decode print(" test_time_tf = %.2f" % (test_time_tf)) print(" test_time_decode = %.2f" % (test_time_decode)) print(" test_time = %.2f" % (test_time)) return top_path_batch