def main(): user_home = find_home_dir() project_dp = os.path.join(user_home, 'pdb') uni_dp = os.path.join(project_dp, 'uni_data') tsv_dp = os.path.join(project_dp, 'tsv_data') working_dp = os.path.join(project_dp, 'working') dirs = ProjectFolders( user_home=user_home, project_home=project_dp, uni_data=uni_dp, tsv_data=tsv_dp, working=working_dp ) create_folders(dirs) fetch_and_write_files(dirs) initial_filtering(dirs) fetcher = UniProtFetcher(dirs) fetcher.fetch_fasta_files() second_filtering(dirs) final_filtering(dirs) uniprot_composite(dirs) print("Processing finished successfully.") return None
def test_known_obsolete_pass(self): """Test zero length HTTP 200 UniProt download.""" uni_id = 'Q8NI70' fetcher = UniProtFetcher(self.dirs) fetcher._download_uniprot(uni_id) self.assertTrue(uni_id in fetcher.obs) return None
def test_404_assigned_as_obsolete(self): """HTTP 404 errors should be assigned as obsolete, not missing.""" uni_id = 'P123451' fetcher = UniProtFetcher(self.dirs) fetcher._download_uniprot(uni_id) self.assertTrue(uni_id in fetcher.obs) return None
def test_download_uniprot_pass(self): self._generate_dir_names() expected = '>sp|P12345|AATM_RABIT Aspartate aminotransferase, mitochondrial OS=Oryctolagus cuniculus GN=GOT2 PE=1 SV=2\nMALLHSARVLSGVASAFHPGLAAAASARASSWWAHVEMGPPDPILGVTEAYKRDTNSKKM\nNLGVGAYRDDNGKPYVLPSVRKAEAQIAAKGLDKEYLPIGGLAEFCRASAELALGENSEV\nVKSGRFVTVQTISGTGALRIGASFLQRFFKFSRDVFLPKPSWGNHTPIFRDAGMQLQSYR\nYYDPKTCGFDFTGALEDISKIPEQSVLLLHACAHNPTGVDPRPEQWKEIATVVKKRNLFA\nFFDMAYQGFASGDGDKDAWAVRHFIEQGINVCLCQSYAKNMGLYGERVGAFTVICKDADE\nAKRVESQLKILIRPMYSNPPIHGARIASTILTSPDLRKQWLQEVKGMADRIIGMRTQLVS\nNLKKEGSTHSWQHITDQIGMFCFTGLKPEQVERLTKEFSIYMTKDGRISVAGVTSGNVGY\nLAHAIHQVTK\n' uni_id = 'P12345' fetcher = UniProtFetcher(self.dirs) result = fetcher._download_uniprot(uni_id) self.assertEqual(expected, result) return None
def test_download_uniprot_with_obsolete_pass(self): self._write_initial_tsv() self._write_initial_uni() fetcher = UniProtFetcher(self.dirs) fetcher.fetch_fasta_files() with open(self.tsv_test_fp, 'r', encoding='utf-8') as result_fh: result = result_fh.read() self.assertEqual(result, TsvData.pdb_seq_tsv_valid) return None