Пример #1
0
 def test_json_read_write_success(self):
     ss_file_path = os.path.join(self.temp_dir, 'ss_dis.json')
     self.assertFalse(exists(ss_file_path))
     write_json(self.ss_dis_dic, ss_file_path)
     self.assertTrue(exists(ss_file_path))
     result = read_json(ss_file_path)
     self.assertEqual(self.ss_dis_dic, result)
     return None
Пример #2
0
def fetch_ss_dis(dir_path):
    """Return a processed dictionary for ss_dis data.

    Args:
        dir_path (Unicode): The dir path where ss_dis files are located.

    Returns:
        ss_dis_data (dict): A dictionary of processed ss_dis data.

    """
    working_path = os.path.abspath(dir_path)
    ss_dis_files = _find_existing_files(working_path)

    if ss_dis_files['files_to_archive']:
        for name_to_archive in ss_dis_files['files_to_archive']:
            path_to_archive = os.path.join(working_path, name_to_archive)
            _archive_ss_data(
                path_to_archive
            )

    if ss_dis_files['valid_raw_file']:
        valid_raw_fp = os.path.join(
            working_path, ss_dis_files['valid_raw_file']
        )
    else:
        valid_raw_fp = None

    if ss_dis_files['valid_json_file']:
        valid_json_fp = os.path.join(
            working_path, ss_dis_files['valid_json_file']
        )
    else:
        valid_json_fp = None

        # If we a valid pair exists, use the json to return a dictionary.
    if valid_raw_fp and valid_json_fp:
        assert os.path.isfile(valid_raw_fp)
        assert os.path.isfile(valid_json_fp)
        current_json_path = valid_json_fp

    # Generate a companion json file if a single raw file is found.
    elif valid_raw_fp:
        valid_raw_fn = os.path.basename(valid_raw_fp)
        assert not valid_json_fp
        this_timestamp = SS_DIS_PAT.search(valid_raw_fn).group(2)
        companion_json = "{}.{}.{}".format(
            'ss_dis',
            this_timestamp,
            'json'
        )
        companion_json_path = os.path.join(working_path, companion_json)
        ss_dict = _generate_ss_dict(valid_raw_fp)
        write_json(ss_dict, companion_json_path)
        current_json_path = companion_json_path

    # Download new data and generate json file.
    elif not (valid_raw_fp or valid_json_fp):
        new_names = _new_filenames()
        new_raw_path = os.path.join(working_path, new_names.raw)
        new_json_path = os.path.join(working_path, new_names.json)

        _download_ss_data(new_raw_path)
        ss_dict = _generate_ss_dict(new_raw_path)
        write_json(ss_dict, new_json_path)
        current_json_path = new_json_path

    elif valid_raw_fp and not valid_json_fp:
        raise RuntimeError("Should not have a JSON file without a TXT file.")

    else:
        raise RuntimeError("Unhandled case.")

    # Always return the ss_dis dictionary by reading the json
    # file to ensure consistency of future runs.
    ss_dis_data = read_json(current_json_path)

    return ss_dis_data
Пример #3
0
def uniprot_composite(dirs):
    """Creates final UniProt DataFrame.

    Create final UniProt DataFrame where the
    UniProt ID provides a unique key.

    Args:
        dirs (ProjectFolders): A named tuple of directory paths.

    """
    pdb_initial_composite_fp = os.path.join(dirs.tsv_data, "pdb_initial_composite_df.tsv")
    assert os.path.isfile(pdb_initial_composite_fp)

    uni_folder_path = dirs.uni_data
    file_names = _create_composite_file_names()
    paths = _create_composite_file_paths(uni_folder_path, file_names)

    uni_composite_tsv = paths["tsv_file"]
    uni_composite_yaml = paths["yaml_file"]
    uni_composite_json = paths["json_file"]

    if _uni_composite_file_exists(uni_folder_path):
        print(
            "A final uni_composite file already exists. Composite "
            "function complete. (Note: remove existing uni_composite "
            'files in the "{}" directory to have them '
            "regenerated.".format(uni_folder_path)
        )
        return None

    pdb_df = pd.read_csv(
        pdb_initial_composite_fp, sep="\t", header=0, encoding="utf-8", keep_default_na=False, na_values=["NULL", "N/A"]
    )

    print("Creating the UniProt composite structure.")
    uni_df = create_uni_struct(pdb_df)
    print("Done creating UniProt composite structure.")

    print("Validating UniProt composite structure.")
    uni_pdb_validation(uni_df, pdb_df)
    print("Validation complete.")

    print("Assigning missing region designations.")
    uni_df = create_intervals(pdb_df, uni_df)
    print("Done assigning missing regions.")

    assert isinstance(uni_df, pd.DataFrame)
    delimiter = create_delimiter("\t")
    uni_df.to_csv(uni_composite_tsv, sep=delimiter, encoding="utf-8")
    uni_df.to_json(uni_composite_json, force_ascii=False)

    json_data = read_json(uni_composite_json)
    write_yaml(json_data, uni_composite_yaml)

    print("Done writing UniProt composite files:")
    print("\t{}".format(uni_composite_tsv))
    print("\t{}".format(uni_composite_yaml))
    print("\t{}".format(uni_composite_json))
    print("This is the final UniProt ID DataFrame.")

    return None