def main(): import_modules() args = handler() index_array, values_array = IO.unpack_mask_file(args.exp_mask_file) discr_exp_profile = MI.discretize_exp_profile(index_array, values_array, nbins=args.nbins) seeds_passed = IO.read_motif_file(args.combined_seeds_filename) profiles_passed = IO.unpack_profiles_file(args.combined_profiles_filename) classification_array, N_families = filter_CMI(seeds_passed, profiles_passed, discr_exp_profile, index_array, args.nbins, args.min_ratio, do_print=args.do_print) MI_values_array = calculate_MIs_all_seeds(profiles_passed, discr_exp_profile, index_array, args.nbins) seeds_unique, profiles_unique = choose_best_reps_for_families( seeds_passed, profiles_passed, classification_array, N_families, MI_values_array, do_print=args.do_print) IO.write_list_of_seeds(seeds_unique, args.unique_seeds_filename) IO.write_array_of_profiles(profiles_unique, args.unique_profiles_filename) IO.write_classification_array(classification_array, args.families_classification_filename)
def main(): import_modules() args = handler() n_seqs_list = read_sequences(args.rna_bin_file) index_array, values_array = IO.unpack_mask_file(args.exp_mask_file) discr_exp_profile = MI.discretize_exp_profile(index_array, values_array, nbins = args.nbins) seeds_initial = IO.read_motif_file(args.unique_seeds_filename) profiles_initial = IO.unpack_profiles_file(args.unique_profiles_filename) seqs_of_interest = [n_seqs_list[x] for x in range(index_array.shape[0]) if index_array[x]] # get the task id env_variables_dict = sge.get_env_variables() seed_chunks, profiles_chunks = chunk_up_input_files(seeds_initial, profiles_initial, args.size_of_chunks) seed_right_chunk, profiles_right_chunk = pick_one_chunk(seed_chunks, profiles_chunks, env_variables_dict) seeds_filename_full, profiles_filename_full, \ char_filename_full, robustness_filename_full = make_output_filenames(env_variables_dict, args) seeds_optimized, profiles_optimized, \ seed_charact_array, robustness_array = optimize_motifs(seed_right_chunk, profiles_right_chunk, discr_exp_profile, args.nbins, index_array, seqs_of_interest, args, do_print=True) IO.write_list_of_seeds(seeds_optimized, seeds_filename_full) IO.write_array_of_profiles(profiles_optimized, profiles_filename_full) IO.write_np_array(seed_charact_array, char_filename_full) IO.write_np_array(robustness_array, robustness_filename_full)
def main(): # I only import things if I run this script itself # do relative import based on current working directory # otherwise I have to install the package for relative import to work import_modules() args = handler() # get mapping of task ids to input files mapping_dict = sge.parse_task_mapping_file(args.task_mapping_file) # get the task id env_variables_dict = sge.get_env_variables() # get the names of input and output files profiles_filename_full, MI_values_filename_full, rna_bin_filename = get_current_in_out_filenames( args, env_variables_dict, mapping_dict) decompressed_profiles_array, index_array, values_array = IO.unpack_profiles_and_mask( profiles_filename_full, args.exp_mask_file, do_print=True) discr_exp_profile = MI.discretize_exp_profile(index_array, values_array, args.nbins) MI_values_array = calculate_MI_for_seeds(decompressed_profiles_array, index_array, discr_exp_profile, args.nbins, args.min_occurences, do_print=True) IO.write_MI_values(MI_values_array, args.nbins, MI_values_filename_full) if args.print_qstat == 'y': sge.print_qstat_proc(env_variables_dict, args.path_to_qstat)
def main(): # I only import things if I run this script itself # do relative import based on current working directory # otherwise I have to install the package for relative import to work import_modules() args = handler() # get mapping of task ids to input files mapping_dict = sge.parse_task_mapping_file(args.task_mapping_file) # get the task id env_variables_dict = sge.get_env_variables() # get the names of input and output files profiles_filename_full, MI_values_filename_full, \ passed_seed_filename_full, passed_profiles_filename, \ seed_filename_full, \ rna_bin_filename, exp_mask_filename = get_current_in_out_filenames(args, env_variables_dict, mapping_dict) # read motifs, their profiles and MI values profiles_array, index_array, values_array = IO.unpack_profiles_and_mask( profiles_filename_full, exp_mask_filename, do_print=True) w_motifs_list = IO.read_motif_file(seed_filename_full) MI_values_array, nbins = IO.read_MI_values(MI_values_filename_full) # find the threshold discr_exp_profile = MI.discretize_exp_profile(index_array, values_array, nbins) last_positive_seed = determine_mi_threshold(MI_values_array, discr_exp_profile, nbins, profiles_array, index_array, args, do_print=True) write_seeds_passed(last_positive_seed, MI_values_array, w_motifs_list, passed_seed_filename_full) write_profiles_passed(last_positive_seed, MI_values_array, profiles_array, passed_profiles_filename) if args.print_qstat == 'y': sge.print_qstat_proc(env_variables_dict, args.path_to_qstat)
def main(): args = handler() # read occurence profiles and expression profile profiles_array, index_array, values_array = IO.unpack_profiles_and_mask( args, do_print=False) # read precalculated MI values MI_values_array, nbins = IO.read_MI_values(args.MI_values_file) # find the threshold discr_exp_profile = MI.discretize_exp_profile(index_array, values_array, nbins) determine_mi_threshold(MI_values_array, discr_exp_profile, profiles_array, index_array, args, do_print=True)