Exemplo n.º 1
0
def find_matches_in_dataset(dataset_id, input_spectrum_collection):
    dataset_match_list = []
    path_to_clustered_mgf = os.path.join(PATH_TO_DATASET_UPLOADS, dataset_id,
                                         "clustered",
                                         dataset_id + "_specs_ms.mgf")
    relative_user_path_to_clustered = os.path.join(
        dataset_id, "clustered", dataset_id + "_specs_ms.mgf")

    if not ming_fileio_library.is_path_present(path_to_clustered_mgf):
        return dataset_match_list

    #Lets compare these two files
    # input_spectra_filename and symlink_destination
    dataset_clustered_spectra = ming_spectrum_library.SpectrumCollection(
        path_to_clustered_mgf)
    dataset_clustered_spectra.load_from_file()

    for myspectrum in input_spectrum_collection.spectrum_list:
        match_list = dataset_clustered_spectra.search_spectrum(
            myspectrum, 1.0, 1.0, 6, 0.7, 1)
        for match in match_list:
            match.filename = relative_user_path_to_clustered
        dataset_match_list += match_list

    print "Dataset matches: " + str(len(dataset_match_list))

    return dataset_match_list
def get_proteosafe_result_file_path(task_id, username, source_folder_name):
    proteosafe_data_path = "/data/ccms-data/tasks/"
    source_folder_path = os.path.join(proteosafe_data_path, username, task_id, source_folder_name)

    if not ming_fileio_library.is_path_present(source_folder_path):
        return []

    source_files = ming_fileio_library.list_files_in_dir(source_folder_path)

    return source_files
def get_proteosafe_result_file_path(task_id, username, source_folder_name):
    proteosafe_data_path = "/data/ccms-data/tasks/"
    source_folder_path = os.path.join(proteosafe_data_path, username, task_id, source_folder_name)

    if not ming_fileio_library.is_path_present(source_folder_path):
        return []

    source_files = ming_fileio_library.list_files_in_dir(source_folder_path)

    return source_files
def get_proteosafe_backend_result_file_path(task_id, source_folder_name, site):
    proteosafe_data_path = "/data/"
    if site == "proteomics2":
        proteosafe_data_path += "beta-proteomics2"
    source_folder_path = os.path.join(proteosafe_data_path, "tasks", task_id, source_folder_name)
    if not ming_fileio_library.is_path_present(source_folder_path):
        return []

    source_files = ming_fileio_library.list_files_in_dir(source_folder_path)

    return source_files
def get_proteosafe_backend_result_file_path(task_id, source_folder_name, site):
    proteosafe_data_path = "/data/"
    if site == "proteomics2":
        proteosafe_data_path += "beta-proteomics2"
    source_folder_path = os.path.join(proteosafe_data_path, "tasks", task_id, source_folder_name)
    if not ming_fileio_library.is_path_present(source_folder_path):
        return []

    source_files = ming_fileio_library.list_files_in_dir(source_folder_path)

    return source_files
def find_matches_in_file(input_spectrum_collection,
                         dataset_filepath,
                         relative_dataset_filepath,
                         match_parameters,
                         top_k=1):
    dataset_match_list = []

    if not ming_fileio_library.is_path_present(dataset_filepath):
        print("Cant find", dataset_filepath)
        return dataset_match_list

    dataset_query_spectra = ming_spectrum_library.SpectrumCollection(
        dataset_filepath)
    try:
        dataset_query_spectra.load_from_file()
    except:
        return dataset_match_list

    for repo_spectrum in dataset_query_spectra.spectrum_list:
        if match_parameters["FILTER_WINDOW"]:
            repo_spectrum.window_filter_peaks(50, 6)
        if match_parameters["FILTER_PRECURSOR"]:
            repo_spectrum.filter_precursor_peaks()

    for myspectrum in input_spectrum_collection.spectrum_list:
        if match_parameters["FILTER_WINDOW"]:
            myspectrum.window_filter_peaks(50, 6)
        if match_parameters["FILTER_PRECURSOR"]:
            myspectrum.filter_precursor_peaks()

        try:
            match_list = dataset_query_spectra.search_spectrum(
                myspectrum,
                match_parameters["PM_TOLERANCE"],
                match_parameters["FRAGMENT_TOLERANCE"],
                match_parameters["MIN_MATCHED_PEAKS"],
                match_parameters["MIN_COSINE"],
                analog_search=match_parameters["ANALOG_SEARCH"],
                top_k=top_k)
            for match in match_list:
                match["filename"] = relative_dataset_filepath
            dataset_match_list += match_list
        except:
            print("Error in Matching")

    print("Dataset matches: " + str(len(dataset_match_list)))

    return dataset_match_list
def find_matches_in_dataset(dataset_id, input_spectrum_collection):
    dataset_match_list = []
    path_to_clustered_mgf = os.path.join(PATH_TO_DATASET_UPLOADS, dataset_id, "clustered", dataset_id + "_specs_ms.mgf")
    relative_user_path_to_clustered = os.path.join(dataset_id, "clustered", dataset_id + "_specs_ms.mgf")

    if not ming_fileio_library.is_path_present(path_to_clustered_mgf):
        return dataset_match_list

    #Lets compare these two files
    # input_spectra_filename and symlink_destination
    dataset_clustered_spectra = ming_spectrum_library.SpectrumCollection(path_to_clustered_mgf)
    dataset_clustered_spectra.load_from_file()

    for myspectrum in input_spectrum_collection.spectrum_list:
        match_list = dataset_clustered_spectra.search_spectrum(myspectrum, 1.0, 1.0, 6, 0.7, 1)
        for match in match_list:
            match.filename = relative_user_path_to_clustered
        dataset_match_list += match_list

    print "Dataset matches: " + str(len(dataset_match_list))

    return dataset_match_list