def shmir_from_transcript_sequence( transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory ): """Generating function of shmir from transcript sequence. Args: transcript_name(str): Name of transcipt. minimum_CG(int): Minimum number of 'C' and 'G' nucleotide in sequence. maximum_CG(int): Maximum number of 'C' and 'G' nucleotide in sequence. maximum_offtarget(int): Maximum offtarget. scaffold(str): Name of frame of miRNA or 'all'. stimulatory_sequences(str): One of 'yes', 'no', 'no_difference'. Returns: list of sh-miR(s). """ # check if results are in database results = get_results(transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory) # sometimes results is an empty list if results is not None: return results path = create_path_string(transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory) mRNA = ncbi_api.get_mRNA(transcript_name) reversed_mRNA = reverse_complement(mRNA) original_frames = frames_by_scaffold(scaffold) frames_by_name = {frame.name: frame for frame in original_frames} # best patters should be choosen first patterns = { frame.name: OrderedDict(sorted(json.loads(frame.regexp).items(), reverse=True)) for frame in original_frames } with allow_join_result(): validated = ( group( validate_sequences.s( list(sequences), # generators are not serializable regexp_type, name, minimum_CG, maximum_CG, maximum_offtarget, immunostimulatory, ).set(queue="score") for name, patterns_dict in patterns.iteritems() for regexp_type, sequences in find_by_patterns(patterns_dict, reversed_mRNA).iteritems() ) .apply_async() .get() ) best_sequences = merge_results(validated) with allow_join_result(): results = ( group( shmir_from_fasta.s( siRNA["sequence"], siRNA["offtarget"], siRNA["regexp"], [frames_by_name[name]], path ).set(queue="score") for name, siRNA in unpack_dict_to_list(best_sequences) ) .apply_async() .get() ) # merge results = list(chain(*results)) if not results: with allow_join_result(): validated = ( validate_sequences.s( list(all_possible_sequences(reversed_mRNA, 21)), # not serializable 0, "all", minimum_CG, maximum_CG, maximum_offtarget, immunostimulatory, ) .apply_async(queue="subtasks") .get() ) best_sequences = merge_results([validated]) with allow_join_result(): results = ( group( shmir_from_fasta.s( siRNA["sequence"], siRNA["offtarget"], siRNA["regexp"], original_frames, path ).set(queue="score") for name, siRNA in unpack_dict_to_list(best_sequences) ) .apply_async() .get() ) # merge results = chain(*results) sorted_results = sorted(results, key=lambda result: result["score"]["all"], reverse=True)[:TRANSCRIPT_RESULT_LIMIT] db_results = store_results( transcript_name, minimum_CG, maximum_CG, maximum_offtarget, scaffold, immunostimulatory, sorted_results ) remove_bad_foldings(path, [result.get_task_id() for result in db_results]) return [result.as_json() for result in db_results]
def test_create_path_string(self): result = create_path_string('a', 'b', 'c') self.assertEqual(result, 'a_b_c')