def get_download_url_by_export_id(export_id): client = API.BaseClient() result = client.get(f"/scrapers/exports/{str(export_id)}/download")['signed_url'] if 'message' in result and result['message'] == "dbr: not found": raise ValueError(f"Export with ID {str(export_id)} was not found") else: return result
def get_job_history(scraper_name): client = API.BaseClient() result = client.get(f"/scrapers/{scraper_name}/current_job/stats/history") if 'message' in result and result['message'] == "dbr: not found": raise ValueError(f"Scraper named {scraper_name} was not found") else: return result
def get_recent_jobs(scraper_name): client = API.BaseClient() result = client.get(f"/scrapers/{scraper_name}/jobs") if 'message' in result and result['message'] == "dbr: not found": raise ValueError(f"Scraper named {scraper_name} was not found") else: return result
def parsing_update(job_id, gid, options): client = API.BaseClient() body = {} body['outputs'] = options['outputs'] body['pages'] = [] body['parsing_status'] = options['status'] return client.put(f"/jobs/{job_id}/pages/{gid}/parsing_update", body)
def get_current_job_stats(scraper_name): client = API.BaseClient() result = client.get(f"/scrapers/{scraper_name}/current_job/stats/current") if 'message' in result and result['message'] == "dbr: not found": raise ValueError(f"Scraper named {scraper_name} was not found or no active job is present") else: return result
def find(scraper_name, options={}): client = API.BaseClient() result = client.get(f"/scrapers/{scraper_name}/current_job", options) if 'message' in result and result['message'] == "dbr: not found": raise ValueError( f"Scraper with name {scraper_name} or current job was not found") else: return result
def get_by_id(job_id): client = API.BaseClient() input_job_id = str(job_id) result = client.get(f"/jobs/{input_job_id}") if 'message' in result and result['message'] == "dbr: not found": raise ValueError(f"Job with ID {input_job_id} was not found") else: return result
def update(scraper_name, params = {}): client = API.BaseClient() query = {} for key in params: query[key] = params[key] query['name'] = scraper_name result = client.put(f"/scrapers/{scraper_name}", params=query) return result
def create(scraper_name, git_repository, params = {}): client = API.BaseClient() query = {} for key in params: query[key] = params[key] query['name'] = scraper_name query['git_repository'] = git_repository result = client.post(f"/scrapers", params=query) return result
def find(job_id, gid, params={}): client = API.BaseClient() return client.get(f"/jobs/{job_id}/pages/{gid}", params)
def all(job_id, params): client = API.BaseClient() return client.get(f"/jobs/{job_id}/pages", params)
def all(scraper_name, params): client = API.BaseClient() return client.get(f"/scrapers/{scraper_name}/current_job/pages", params=params)
def find_content(scraper_name, gid): client = API.BaseClient() return client.get( f"/scrapers/{scraper_name}/current_job/pages/{gid}/content", {'get_raw': 1})
def find_failed_content(gid): client = API.BaseClient() result = client.get(f"/global_pages/{gid}/failed_content") return result
def get_by_name(scraper_name): client = API.BaseClient() result = client.get(f"/scrapers/{scraper_name}") return result
def delete(scraper_name): client = API.BaseClient() result = client.delete(f"/scrapers/{scraper_name}") return result
def all(params = {}): client = API.BaseClient() return client.get('/scrapers', params=params)
def find(gid): client = API.BaseClient() result = client.get(f"/global_pages/{gid}") return result