def test_json_read_write_success(self): ss_file_path = os.path.join(self.temp_dir, 'ss_dis.json') self.assertFalse(exists(ss_file_path)) write_json(self.ss_dis_dic, ss_file_path) self.assertTrue(exists(ss_file_path)) result = read_json(ss_file_path) self.assertEqual(self.ss_dis_dic, result) return None
def fetch_ss_dis(dir_path): """Return a processed dictionary for ss_dis data. Args: dir_path (Unicode): The dir path where ss_dis files are located. Returns: ss_dis_data (dict): A dictionary of processed ss_dis data. """ working_path = os.path.abspath(dir_path) ss_dis_files = _find_existing_files(working_path) if ss_dis_files['files_to_archive']: for name_to_archive in ss_dis_files['files_to_archive']: path_to_archive = os.path.join(working_path, name_to_archive) _archive_ss_data( path_to_archive ) if ss_dis_files['valid_raw_file']: valid_raw_fp = os.path.join( working_path, ss_dis_files['valid_raw_file'] ) else: valid_raw_fp = None if ss_dis_files['valid_json_file']: valid_json_fp = os.path.join( working_path, ss_dis_files['valid_json_file'] ) else: valid_json_fp = None # If we a valid pair exists, use the json to return a dictionary. if valid_raw_fp and valid_json_fp: assert os.path.isfile(valid_raw_fp) assert os.path.isfile(valid_json_fp) current_json_path = valid_json_fp # Generate a companion json file if a single raw file is found. elif valid_raw_fp: valid_raw_fn = os.path.basename(valid_raw_fp) assert not valid_json_fp this_timestamp = SS_DIS_PAT.search(valid_raw_fn).group(2) companion_json = "{}.{}.{}".format( 'ss_dis', this_timestamp, 'json' ) companion_json_path = os.path.join(working_path, companion_json) ss_dict = _generate_ss_dict(valid_raw_fp) write_json(ss_dict, companion_json_path) current_json_path = companion_json_path # Download new data and generate json file. elif not (valid_raw_fp or valid_json_fp): new_names = _new_filenames() new_raw_path = os.path.join(working_path, new_names.raw) new_json_path = os.path.join(working_path, new_names.json) _download_ss_data(new_raw_path) ss_dict = _generate_ss_dict(new_raw_path) write_json(ss_dict, new_json_path) current_json_path = new_json_path elif valid_raw_fp and not valid_json_fp: raise RuntimeError("Should not have a JSON file without a TXT file.") else: raise RuntimeError("Unhandled case.") # Always return the ss_dis dictionary by reading the json # file to ensure consistency of future runs. ss_dis_data = read_json(current_json_path) return ss_dis_data
def uniprot_composite(dirs): """Creates final UniProt DataFrame. Create final UniProt DataFrame where the UniProt ID provides a unique key. Args: dirs (ProjectFolders): A named tuple of directory paths. """ pdb_initial_composite_fp = os.path.join(dirs.tsv_data, "pdb_initial_composite_df.tsv") assert os.path.isfile(pdb_initial_composite_fp) uni_folder_path = dirs.uni_data file_names = _create_composite_file_names() paths = _create_composite_file_paths(uni_folder_path, file_names) uni_composite_tsv = paths["tsv_file"] uni_composite_yaml = paths["yaml_file"] uni_composite_json = paths["json_file"] if _uni_composite_file_exists(uni_folder_path): print( "A final uni_composite file already exists. Composite " "function complete. (Note: remove existing uni_composite " 'files in the "{}" directory to have them ' "regenerated.".format(uni_folder_path) ) return None pdb_df = pd.read_csv( pdb_initial_composite_fp, sep="\t", header=0, encoding="utf-8", keep_default_na=False, na_values=["NULL", "N/A"] ) print("Creating the UniProt composite structure.") uni_df = create_uni_struct(pdb_df) print("Done creating UniProt composite structure.") print("Validating UniProt composite structure.") uni_pdb_validation(uni_df, pdb_df) print("Validation complete.") print("Assigning missing region designations.") uni_df = create_intervals(pdb_df, uni_df) print("Done assigning missing regions.") assert isinstance(uni_df, pd.DataFrame) delimiter = create_delimiter("\t") uni_df.to_csv(uni_composite_tsv, sep=delimiter, encoding="utf-8") uni_df.to_json(uni_composite_json, force_ascii=False) json_data = read_json(uni_composite_json) write_yaml(json_data, uni_composite_yaml) print("Done writing UniProt composite files:") print("\t{}".format(uni_composite_tsv)) print("\t{}".format(uni_composite_yaml)) print("\t{}".format(uni_composite_json)) print("This is the final UniProt ID DataFrame.") return None