示例#1
0
def main():
    import_modules()
    args = handler()

    index_array, values_array = IO.unpack_mask_file(args.exp_mask_file)
    discr_exp_profile = MI.discretize_exp_profile(index_array,
                                                  values_array,
                                                  nbins=args.nbins)
    seeds_passed = IO.read_motif_file(args.combined_seeds_filename)
    profiles_passed = IO.unpack_profiles_file(args.combined_profiles_filename)

    classification_array, N_families = filter_CMI(seeds_passed,
                                                  profiles_passed,
                                                  discr_exp_profile,
                                                  index_array,
                                                  args.nbins,
                                                  args.min_ratio,
                                                  do_print=args.do_print)

    MI_values_array = calculate_MIs_all_seeds(profiles_passed,
                                              discr_exp_profile, index_array,
                                              args.nbins)

    seeds_unique, profiles_unique = choose_best_reps_for_families(
        seeds_passed,
        profiles_passed,
        classification_array,
        N_families,
        MI_values_array,
        do_print=args.do_print)

    IO.write_list_of_seeds(seeds_unique, args.unique_seeds_filename)
    IO.write_array_of_profiles(profiles_unique, args.unique_profiles_filename)
    IO.write_classification_array(classification_array,
                                  args.families_classification_filename)
示例#2
0
def main():
    import_modules()
    args = handler()

    n_seqs_list = read_sequences(args.rna_bin_file)
    index_array, values_array = IO.unpack_mask_file(args.exp_mask_file)
    discr_exp_profile = MI.discretize_exp_profile(index_array, values_array, nbins = args.nbins)
    seeds_initial = IO.read_motif_file(args.unique_seeds_filename)
    profiles_initial = IO.unpack_profiles_file(args.unique_profiles_filename)
    seqs_of_interest = [n_seqs_list[x] for x in range(index_array.shape[0]) if index_array[x]]

    # get the task id
    env_variables_dict = sge.get_env_variables()
    seed_chunks, profiles_chunks = chunk_up_input_files(seeds_initial, profiles_initial, args.size_of_chunks)
    seed_right_chunk, profiles_right_chunk = pick_one_chunk(seed_chunks, profiles_chunks, env_variables_dict)

    seeds_filename_full, profiles_filename_full, \
    char_filename_full, robustness_filename_full = make_output_filenames(env_variables_dict, args)


    seeds_optimized, profiles_optimized, \
    seed_charact_array, robustness_array  = optimize_motifs(seed_right_chunk, profiles_right_chunk,
                                            discr_exp_profile, args.nbins, index_array, seqs_of_interest,
                                            args, do_print=True)

    IO.write_list_of_seeds(seeds_optimized, seeds_filename_full)
    IO.write_array_of_profiles(profiles_optimized, profiles_filename_full)
    IO.write_np_array(seed_charact_array, char_filename_full)
    IO.write_np_array(robustness_array, robustness_filename_full)
示例#3
0
def main():
    # I only import things if I run this script itself
    # do relative import based on current working directory
    # otherwise I have to install the package for relative import to work
    import_modules()

    args = handler()

    # get mapping of task ids to input files
    mapping_dict = sge.parse_task_mapping_file(args.task_mapping_file)
    # get the task id
    env_variables_dict = sge.get_env_variables()
    # get the names of input and output files
    profiles_filename_full, MI_values_filename_full, rna_bin_filename = get_current_in_out_filenames(
        args, env_variables_dict, mapping_dict)

    decompressed_profiles_array, index_array, values_array = IO.unpack_profiles_and_mask(
        profiles_filename_full, args.exp_mask_file, do_print=True)

    discr_exp_profile = MI.discretize_exp_profile(index_array, values_array,
                                                  args.nbins)

    MI_values_array = calculate_MI_for_seeds(decompressed_profiles_array,
                                             index_array,
                                             discr_exp_profile,
                                             args.nbins,
                                             args.min_occurences,
                                             do_print=True)
    IO.write_MI_values(MI_values_array, args.nbins, MI_values_filename_full)

    if args.print_qstat == 'y':
        sge.print_qstat_proc(env_variables_dict, args.path_to_qstat)
def main():
    # I only import things if I run this script itself
    # do relative import based on current working directory
    # otherwise I have to install the package for relative import to work
    import_modules()

    args = handler()

    # get mapping of task ids to input files
    mapping_dict = sge.parse_task_mapping_file(args.task_mapping_file)
    # get the task id
    env_variables_dict = sge.get_env_variables()

    # get the names of input and output files
    profiles_filename_full, MI_values_filename_full, \
    passed_seed_filename_full, passed_profiles_filename, \
    seed_filename_full, \
    rna_bin_filename, exp_mask_filename = get_current_in_out_filenames(args, env_variables_dict, mapping_dict)

    # read motifs, their profiles and MI values
    profiles_array, index_array, values_array = IO.unpack_profiles_and_mask(
        profiles_filename_full, exp_mask_filename, do_print=True)
    w_motifs_list = IO.read_motif_file(seed_filename_full)
    MI_values_array, nbins = IO.read_MI_values(MI_values_filename_full)

    # find the threshold
    discr_exp_profile = MI.discretize_exp_profile(index_array, values_array,
                                                  nbins)
    last_positive_seed = determine_mi_threshold(MI_values_array,
                                                discr_exp_profile,
                                                nbins,
                                                profiles_array,
                                                index_array,
                                                args,
                                                do_print=True)

    write_seeds_passed(last_positive_seed, MI_values_array, w_motifs_list,
                       passed_seed_filename_full)
    write_profiles_passed(last_positive_seed, MI_values_array, profiles_array,
                          passed_profiles_filename)

    if args.print_qstat == 'y':
        sge.print_qstat_proc(env_variables_dict, args.path_to_qstat)
示例#5
0
def main():
    args = handler()

    # read occurence profiles and expression profile
    profiles_array, index_array, values_array = IO.unpack_profiles_and_mask(
        args, do_print=False)

    # read precalculated MI values
    MI_values_array, nbins = IO.read_MI_values(args.MI_values_file)

    # find the threshold
    discr_exp_profile = MI.discretize_exp_profile(index_array, values_array,
                                                  nbins)
    determine_mi_threshold(MI_values_array,
                           discr_exp_profile,
                           profiles_array,
                           index_array,
                           args,
                           do_print=True)