def full_io(url, path, remove=True): path = UnsyncFetch.fetch_file(semaphore, 'get', dict(url=url), path, 1).result() with gzip_open(path, 'rt') as handle: mmcif_dict = MMCIF2DictPlus(handle, ('_pdbe_chain_remapping.', )) if remove: path.unlink() return mmcif_dict
def single_retrieve(cls, pdb, suffix: str, folder: Path, semaphore, rate: float = 1.5): return UnsyncFetch.single_task(task=cls.task_unit(pdb, suffix, folder), semaphore=semaphore, rate=rate)
def retrieve_metadata(cls, organism: str, dataset: str = 'complete', suffix: str = 'user_data/{organism}/download/{dataset}/', ret_res:bool=True): assert organism in cls.organisms, f"Invalid organism!\nValid set:{cls.organisms}" res = UnsyncFetch.single_task(cls.task_unit(cls.folder, f'{suffix.format(organism=organism, dataset=dataset)}interactions.dat', '.tsv', f'{organism}_{dataset}_interactions'), cls.get_web_semaphore()) if ret_res: return res.result() else: return res
def single_retrieve(self, pdb_id: str, suffix: str, params: Dict, folder: Union[Path, str], semaphore, rate: float = 1.5): return UnsyncFetch.single_task(task=self.task_unit( pdb_id, suffix, params, folder), semaphore=semaphore, rate=rate)
def single_retrieve(cls, suffix: str, params: Dict, folder: Union[Path, str], semaphore, rate: float = 1.5): assert suffix in cls.api_set, f"Invalid suffix! Valid set is \n{cls.api_set}" folder = Path(folder) return UnsyncFetch.single_task(task=cls.task_unit( suffix, params, folder), semaphore=semaphore, rate=rate)
def single_retrieve(cls, pdb, suffix: str, folder: Path, semaphore, file_suffix: Optional[str] = None, rate: float = 1.5): if file_suffix is None: file_suffix = cls.get_file_suffix(suffix) return UnsyncFetch.single_task(task=cls.task_unit( pdb, suffix, file_suffix, folder), semaphore=semaphore, rate=rate)
def single_retrieve(cls, pdb: str, suffix: str, method: str, folder: Union[Path, str], semaphore, rate: float = 1.5, **kwargs): return UnsyncFetch.single_task( task=next(cls.yieldTasks((pdb, ), suffix, method, folder)), semaphore=semaphore, to_do_func=kwargs.get('to_do_func', cls.process), rate=rate)
def single_retrieve(cls, suffix: str, identifier: str, params: Optional[Dict], folder: Union[Path, str], semaphore, rate: float = 1.5, headers: Optional[Dict] = None): assert suffix in cls.api_set, f"Invalid suffix! Valid set is \n{cls.api_set}" folder = Path(folder) return UnsyncFetch.single_task(task=cls.task_unit( suffix, identifier, params, folder, headers), semaphore=semaphore, rate=rate)
def retrieve(cls, pdbs, suffix: str, folder: Path, file_suffix: Optional[str] = None, concur_req: int = 20, rate: float = 1.5, ret_res: bool = True, **kwargs): res = UnsyncFetch.multi_tasks(cls.yieldTasks(pdbs, suffix, file_suffix, folder), concur_req=concur_req, rate=rate, ret_res=ret_res, semaphore=kwargs.get('semaphore', None)) return res
def retrieve(cls, suffix: str, params_collection: Iterable[Dict], folder: Union[Path, str], concur_req: int = 20, rate: float = 1.5, ret_res: bool = True, **kwargs): assert suffix in cls.api_set, f"Invalid suffix! Valid set is \n{cls.api_set}" folder = Path(folder) res = UnsyncFetch.multi_tasks(cls.yieldTasks(suffix, params_collection, folder), concur_req=concur_req, rate=rate, ret_res=ret_res, semaphore=kwargs.get('semaphore', None)) return res
def retrieve(cls, unps, folder: Optional[Union[Path, str]] = None, params: Dict = dict(provider='swissmodel'), concur_req: int = 20, rate: float = 1.5, file_format: str = 'json', ret_res: bool = True, **kwargs): assert file_format in ('json', 'pdb'), "Invalid file format" res = UnsyncFetch.multi_tasks( cls.yieldTasks(unps, params, file_format, cls.folder if folder is None else folder), cls.process, concur_req=concur_req, rate=rate, ret_res=ret_res, semaphore=kwargs.get('semaphore', cls.web_semaphore)) return res
def single_retrieve(cls, unp: str, folder: Optional[Union[Path, str]] = None, semaphore=None, params: Dict = dict(provider='swissmodel'), rate: float = 1.5, file_format: str = 'json'): assert file_format in ('json', 'pdb'), "Invalid file format" task = cls.task_unit(unp, params, file_format, cls.folder if folder is None else folder) if file_format == 'json': candidate = Path(str(task[2]).replace('json', 'tsv')) if candidate.exists(): return unsync_wrap(candidate) return UnsyncFetch.single_task( task=task, semaphore=cls.web_semaphore if semaphore is None else semaphore, to_do_func=cls.process, rate=rate)
def query_sequence(cls, params: Dict, data: Dict, folder: Union[Path, str], fileName: str, semaphore, rate: float = 1.5): ''' Implement `uniparc/sequence` ''' folder = Path(folder) args = dict(url=f'{BASE_URL}uniparc/sequence', headers=cls.headers, params=params, data=data) return UnsyncFetch.single_task( task=('post', args, folder / f'{fileName}.{cls.get_file_suffix()}'), semaphore=semaphore, rate=rate)
def single_retrieve(cls, pdb: str, suffix: str, method: str, folder: Union[Path, str], semaphore, params=None, data_collection=None, rate: float = 1.5, filename='subset'): if params is None or len(params) == 0: params = {'model_nums': 1, 'encoding': 'cif'} return UnsyncFetch.single_task(task=cls.task_unit(pdb, suffix, method, folder, data_collection, params, filename=filename), semaphore=semaphore, rate=rate)
def retrieve(cls, pdbs: Union[Iterable, Iterator], suffix: str, method: str, folder: Union[str, Path], chunksize: int = 20, concur_req: int = 20, rate: float = 1.5, task_id: int = 0, ret_res: bool = True, **kwargs): # t0 = time.perf_counter() res = UnsyncFetch.multi_tasks(cls.yieldTasks(pdbs, suffix, method, folder, chunksize, task_id), cls.process, concur_req=concur_req, rate=rate, ret_res=ret_res, semaphore=kwargs.get('semaphore', None)) # elapsed = time.perf_counter() - t0 # cls.logger.info('{} ids downloaded in {:.2f}s'.format(len(res), elapsed)) return res
def retrieve_all_meta(cls, dataset: str = 'complete', suffix: str = 'user_data/{organism}/download/{dataset}/'): tasks = [cls.task_unit(cls.folder, f'{suffix.format(organism=organism, dataset=dataset)}interactions.dat', '.tsv', f'{organism}_{dataset}_interactions') for organism in cls.organisms] return UnsyncFetch.multi_tasks(tasks, semaphore=cls.get_web_semaphore())
def graphql_retrieve(cls, query, folder, semaphore, to_do_func=None, rate: float = 1.5): return UnsyncFetch.single_task(task=('get', dict(url=cls.graphql_root, params=dict(query=query), headers=cls.headers), Path(folder)/f'{sha1(bytes(query, encoding="utf-8")).hexdigest()}.json'), semaphore=semaphore, to_do_func=to_do_func, rate=rate)
def single_retrieve(cls, identifier: str, suffix: str, folder: Union[Path, str], semaphore, to_do_func=None, rate: float = 1.5): return UnsyncFetch.single_task(task=cls.task_unit(identifier, suffix, folder), semaphore=semaphore, to_do_func=to_do_func, rate=rate)