def test_57_put_files_uuid_replace_locations(self): """Test that a file can replace with the same location.""" self.start_server() token = self.get_token() r = RestClient(self.address, token, timeout=1, retries=1) # define the files to be created metadata = { 'logical_name': '/blah/data/exp/IceCube/blah.dat', 'checksum': {'sha512': hex('foo bar')}, 'file_size': 1, u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah.dat'}] } metadata2 = { 'logical_name': '/blah/data/exp/IceCube/blah.dat', 'checksum': {'sha512': hex('foo bar2')}, 'file_size': 2, u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah.dat'}] } # create the first file; should be OK data = r.request_seq('POST', '/api/files', metadata) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('file', data) url = data['file'] uid = url.split('/')[-1] # try to replace the first file with the second; should be OK data = r.request_seq('PUT', '/api/files/'+uid, metadata2) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('logical_name', data)
def test_get(db_rc: RestClient) -> None: """Run some test queries.""" databases = db_rc.request_seq("GET", "/databases/names") print(databases) for db in databases["databases"]: db_request_body = {"database": db} collections = db_rc.request_seq("GET", "/collections/names", db_request_body) print(collections) for coll in collections["collections"]: coll_request_body = {"database": db, "collection": coll} histograms = db_rc.request_seq( "GET", "/collections/histograms/names", coll_request_body) print(histograms) for histo_name in histograms["histograms"]: histo_request_body = { "database": db, "collection": coll, "name": histo_name, } histo = db_rc.request_seq("GET", "/histogram", histo_request_body) print(histo) filelist = db_rc.request_seq("GET", "/files/names", coll_request_body) print(filelist) db_rc.close()
def test_54_patch_files_uuid_replace_logical_name(self): """Test that a file can be updated with the same logical_name.""" self.start_server() token = self.get_token() r = RestClient(self.address, token, timeout=1, retries=1) # define the file to be created metadata = { 'logical_name': '/blah/data/exp/IceCube/blah.dat', 'checksum': {'sha512': hex('foo bar')}, 'file_size': 1, u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah.dat'}] } # this is a PATCH to metadata; matches the old logical_name patch1 = { 'logical_name': '/blah/data/exp/IceCube/blah.dat', 'checksum': {'sha512': hex('foo bar2')}, 'file_size': 2, u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah.dat'}] } # create the file; should be OK data = r.request_seq('POST', '/api/files', metadata) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('file', data) url = data['file'] uid = url.split('/')[-1] # try to update the file with a patch; should be OK data = r.request_seq('PATCH', '/api/files/' + uid, patch1) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('logical_name', data)
def test_10_files(self): self.start_server() token = self.get_token() r = RestClient(self.address, token, timeout=1, retries=1) metadata = { 'logical_name': 'blah', 'checksum': {'sha512':hex('foo bar')}, 'file_size': 1, u'locations': [{u'site':u'test',u'path':u'blah.dat'}] } data = r.request_seq('POST', '/api/files', metadata) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('file', data) url = data['file'] uid = url.split('/')[-1] data = r.request_seq('GET', '/api/files') self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('files', data) self.assertEqual(len(data['files']), 1) self.assertTrue(any(uid == f['uuid'] for f in data['files'])) for m in ('PUT','DELETE','PATCH'): with self.assertRaises(Exception): r.request_seq(m, '/api/files')
def test_71_snapshot_find(self): self.start_server() token = self.get_token() r = RestClient(self.address, token, timeout=1, retries=1) metadata = { 'collection_name': 'blah', 'owner': 'foo', } data = r.request_seq('POST', '/api/collections', metadata) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('collection', data) url = data['collection'] uid = url.split('/')[-1] data = r.request_seq('GET', '/api/collections/' + uid) data = r.request_seq('POST', '/api/collections/{}/snapshots'.format(uid)) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('snapshot', data) url = data['snapshot'] snap_uid = url.split('/')[-1] data = r.request_seq('GET', '/api/snapshots/' + snap_uid) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('files',data) self.assertEqual(data['files'], [])
def test_15_files_auth(self): appkey = 'secret2' self.edit_config({ 'auth':{ 'secret': 'secret', 'expiration': 82400, } }) self.start_server() token = self.get_token() r = RestClient(self.address, token, timeout=1, retries=1) metadata = { 'logical_name': 'blah', 'checksum': {'sha512':hex('foo bar')}, 'file_size': 1, u'locations': [{u'site':u'test',u'path':u'blah.dat'}] } r2 = RestClient(self.address, 'blah', timeout=1, retries=1) with self.assertRaises(Exception): r2.request_seq('POST', '/api/files', metadata) data = r.request_seq('POST', '/api/files', metadata) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('file', data) url = data['file']
def test_66_patch_files_uuid_locations_1xN(self): """Test locations uniqueness under 1xN multiplicity.""" self.start_server() token = self.get_token() r = RestClient(self.address, token, timeout=1, retries=1) # define the locations to be tested loc1a = {'site': 'WIPAC', 'path': '/data/test/exp/IceCube/foo.dat'} loc1b = {'site': 'DESY', 'path': '/data/test/exp/IceCube/foo.dat'} loc1c = {'site': 'NERSC', 'path': '/data/test/exp/IceCube/foo.dat'} loc1d = {'site': 'OSG', 'path': '/data/test/exp/IceCube/foo.dat'} locs3a = [loc1a, loc1b, loc1c] locs3b = [loc1b, loc1c, loc1d] locs3c = [loc1a, loc1b, loc1d] # define the files to be created metadata = { 'logical_name': '/blah/data/exp/IceCube/blah.dat', 'checksum': {'sha512': hex('foo bar')}, 'file_size': 1, u'locations': [loc1a] } metadata2 = { 'logical_name': '/blah/data/exp/IceCube/blah2.dat', 'checksum': {'sha512': hex('foo bar')}, 'file_size': 1, u'locations': [loc1b] } # this is a PATCH to metadata; steps on metadata2's location patch1 = { 'logical_name': '/blah/data/exp/IceCube/blah.dat', 'checksum': {'sha512': hex('foo bar2')}, 'file_size': 2, u'locations': locs3c } # create the first file; should be OK data = r.request_seq('POST', '/api/files', metadata) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('file', data) url = data['file'] uid = url.split('/')[-1] # create the second file; should be OK data = r.request_seq('POST', '/api/files', metadata2) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('file', data) # try to update the first file with a patch; should NOT be OK with self.assertRaises(Exception): r.request_seq('PATCH', '/api/files/' + uid, patch1)
async def test_92_request_seq(requests_mock: Mock) -> None: """Test `request_seq()`.""" rpc = RestClient("http://test", "passkey", timeout=0.1) def response(req: PreparedRequest, ctx: object) -> bytes: # pylint: disable=W0613 raise Exception() requests_mock.post("/test", content=response) with pytest.raises(Exception): rpc.request_seq("POST", "test", {})
def test_post_files(db_rc: RestClient) -> None: """Failure-test role authorization.""" post_body = { "database": "test_histograms", "collection": "collection_name", "files": ["test.txt"], } with pytest.raises(requests.exceptions.HTTPError) as e: db_rc.request_seq("POST", "/files/names", post_body) assert e.response.status_code == 403 # Forbidden Error db_rc.close()
def test_62_post_files_locations_NxN(self): """Test locations uniqueness under NxN multiplicity.""" self.start_server() token = self.get_token() r = RestClient(self.address, token, timeout=1, retries=1) # define the locations to be tested loc1a = {'site': 'WIPAC', 'path': '/data/test/exp/IceCube/foo.dat'} loc1b = {'site': 'DESY', 'path': '/data/test/exp/IceCube/foo.dat'} loc1c = {'site': 'NERSC', 'path': '/data/test/exp/IceCube/foo.dat'} loc1d = {'site': 'OSG', 'path': '/data/test/exp/IceCube/foo.dat'} locs3a = [loc1a, loc1b, loc1c] locs3b = [loc1b, loc1c, loc1d] locs3c = [loc1a, loc1b, loc1d] # define the files to be created metadata = { 'logical_name': '/blah/data/exp/IceCube/blah.dat', 'checksum': {'sha512': hex('foo bar')}, 'file_size': 1, u'locations': locs3a } metadata2 = { 'logical_name': '/blah/data/exp/IceCube/blah2.dat', 'checksum': {'sha512': hex('foo bar2')}, 'file_size': 2, u'locations': locs3c } # create the first file; should be OK data = r.request_seq('POST', '/api/files', metadata) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('file', data) url = data['file'] uid = url.split('/')[-1] # check that the file was created properly data = r.request_seq('GET', '/api/files') self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('files', data) self.assertEqual(len(data['files']), 1) self.assertTrue(any(uid == f['uuid'] for f in data['files'])) # check that the file was created properly, part deux data = r.request_seq('GET', '/api/files/' + uid) # create the second file; should NOT be OK with self.assertRaises(Exception): r.request_seq('POST', '/api/files', metadata2)
def test_post_histo(db_rc: RestClient) -> None: """Failure-test role authorization.""" post_body = { "database": "test_histograms", "collection": "TEST", "histogram": { "Anything": True }, } with pytest.raises(requests.exceptions.HTTPError) as e: db_rc.request_seq("POST", "/histogram", post_body) assert e.response.status_code == 403 # Forbidden Error db_rc.close()
def test_80_snapshot_files(self): self.start_server() token = self.get_token() r = RestClient(self.address, token, timeout=1, retries=1) metadata = { 'collection_name': 'blah', 'owner': 'foo', } data = r.request_seq('POST', '/api/collections', metadata) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('collection', data) url = data['collection'] uid = url.split('/')[-1] data = r.request_seq('GET', '/api/collections/' + uid) data = r.request_seq('POST', '/api/collections/{}/snapshots'.format(uid)) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('snapshot', data) url = data['snapshot'] snap_uid = url.split('/')[-1] data = r.request_seq('GET', '/api/snapshots/{}/files'.format(snap_uid)) self.assertEqual(data['files'], []) # add a file metadata = { 'logical_name': 'blah', 'checksum': {'sha512':hex('foo bar')}, 'file_size': 1, u'locations': [{u'site':u'test',u'path':u'blah.dat'}] } data = r.request_seq('POST', '/api/files', metadata) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('file', data) url = data['file'] file_uid = url.split('/')[-1] # old snapshot stays empty data = r.request_seq('GET', '/api/snapshots/{}/files'.format(snap_uid)) self.assertEqual(data['files'], []) # new snapshot should have file data = r.request_seq('POST', '/api/collections/{}/snapshots'.format(uid)) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('snapshot', data) url = data['snapshot'] snap_uid = url.split('/')[-1] data = r.request_seq('GET', '/api/snapshots/{}/files'.format(snap_uid), {'keys':'uuid|logical_name|checksum|locations'}) self.assertEqual(len(data['files']), 1) self.assertEqual(data['files'][0]['uuid'], file_uid) self.assertEqual(data['files'][0]['checksum'], metadata['checksum'])
def test_58_patch_files_uuid_unique_locations(self): """Test that locations is unique when updating a file.""" self.start_server() token = self.get_token() r = RestClient(self.address, token, timeout=1, retries=1) # define the files to be created metadata = { 'logical_name': '/blah/data/exp/IceCube/blah.dat', 'checksum': {'sha512': hex('foo bar')}, 'file_size': 1, u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah.dat'}] } metadata2 = { 'logical_name': '/blah/data/exp/IceCube/blah2.dat', 'checksum': {'sha512': hex('foo bar')}, 'file_size': 1, u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah2.dat'}] } # this is a PATCH to metadata; steps on metadata2's location patch1 = { 'logical_name': '/blah/data/exp/IceCube/blah.dat', 'checksum': {'sha512': hex('foo bar2')}, 'file_size': 2, u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah2.dat'}] } # create the first file; should be OK data = r.request_seq('POST', '/api/files', metadata) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('file', data) url = data['file'] uid = url.split('/')[-1] # create the second file; should be OK data = r.request_seq('POST', '/api/files', metadata2) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('file', data) # try to update the first file with a patch; should NOT be OK with self.assertRaises(Exception): r.request_seq('PATCH', '/api/files/' + uuid, patch1)
def test_10_collections(self): self.start_server() token = self.get_token() r = RestClient(self.address, token, timeout=1, retries=1) metadata = { 'collection_name': 'blah', 'owner': 'foo', } data = r.request_seq('POST', '/api/collections', metadata) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('collection', data) url = data['collection'] uid = url.split('/')[-1] data = r.request_seq('GET', '/api/collections') self.assertIn('collections', data) self.assertIn(uid,{row['uuid'] for row in data['collections']})
def test_21_collection_by_name(self): self.start_server() token = self.get_token() r = RestClient(self.address, token, timeout=1, retries=1) metadata = { 'collection_name': 'blah', 'owner': 'foo', } data = r.request_seq('POST', '/api/collections', metadata) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('collection', data) url = data['collection'] uid = url.split('/')[-1] data = r.request_seq('GET', '/api/collections/blah') for k in metadata: self.assertIn(k, data) self.assertEqual(metadata[k], data[k])
def main(): parser = argparse.ArgumentParser(description='manually run IceProd i3exec') parser.add_argument('-t', '--token',help='auth token') parser.add_argument('-d','--dataset',type=int,help='dataset number') parser.add_argument('-j','--job',type=int,help='job number (optional)') args = parser.parse_args() args = vars(args) logging.basicConfig(level=logging.DEBUG) rpc = RestClient('https://iceprod2-api.icecube.wisc.edu', args['token']) datasets = rpc.request_seq('GET', '/datasets', {'keys': 'dataset_id|dataset'}) dataset_id = None for d in datasets: if datasets[d]['dataset'] == args['dataset']: dataset_id = d break else: raise Exception('bad dataset num') dataset = rpc.request_seq('GET', f'/datasets/{dataset_id}') config = rpc.request_seq('GET', f'/config/{dataset_id}') jobs = rpc.request_seq('GET', f'/datasets/{dataset_id}/jobs', {'status': 'processing|errors'}) if args['job']: jobs = {j:jobs[j] for j in jobs if jobs[j]['job_index'] == args['job']} if not jobs: raise Exception('no jobs found') for job_id in jobs: tasks = rpc.request_seq('GET', f'/datasets/{dataset_id}/tasks', {'job_id': job_id, 'keys': 'task_id|task_index|name|depends', 'status': 'waiting|queued|reset|failed'}) for task_id in sorted(tasks, key=lambda t:tasks[t]['task_index']): print(f'processing {dataset["dataset"]} {jobs[job_id]["job_index"]} {tasks[task_id]["name"]}') write_config(config, 'config.json', dataset_id, args['dataset'], task_id) run(token=args['token'], config='config.json', jobs_submitted=dataset['jobs_submitted'], job=jobs[job_id]['job_index'], task=tasks[task_id]['name'])
def test_50_post_files_unique_logical_name(self): """Test that logical_name is unique when creating a new file.""" self.start_server() token = self.get_token() r = RestClient(self.address, token, timeout=1, retries=1) # define the file to be created metadata = { 'logical_name': '/blah/data/exp/IceCube/blah.dat', 'checksum': {'sha512': hex('foo bar')}, 'file_size': 1, u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah.dat'}] } # create the file the first time; should be OK data = r.request_seq('POST', '/api/files', metadata) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('file', data) url = data['file'] uid = url.split('/')[-1] # check that the file was created properly data = r.request_seq('GET', '/api/files') self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('files', data) self.assertEqual(len(data['files']), 1) self.assertTrue(any(uid == f['uuid'] for f in data['files'])) # create the file the second time; should NOT be OK with self.assertRaises(Exception): data = r.request_seq('POST', '/api/files', metadata) # check that the second file was not created data = r.request_seq('GET', '/api/files') self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('files', data) self.assertEqual(len(data['files']), 1) self.assertTrue(any(uid == f['uuid'] for f in data['files']))
def test_30_archive(self): self.start_server() token = self.get_token() r = RestClient(self.address, token, timeout=1, retries=1) metadata = { u'logical_name': u'blah', u'checksum': {u'sha512':hex('foo bar')}, u'file_size': 1, u'locations': [{u'site':u'test',u'path':u'blah.dat'}] } metadata2 = { u'logical_name': u'blah2', u'checksum': {u'sha512':hex('foo bar baz')}, u'file_size': 2, u'locations': [{u'site':u'test',u'path':u'blah.dat',u'archive':True}] } data = r.request_seq('POST', '/api/files', metadata) url = data['file'] uid = url.split('/')[-1] data = r.request_seq('POST', '/api/files', metadata2) url2 = data['file'] uid2 = url2.split('/')[-1] data = r.request_seq('GET', '/api/files') self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('files', data) self.assertEqual(len(data['files']), 1) self.assertTrue(any(uid == f['uuid'] for f in data['files'])) self.assertFalse(any(uid2 == f['uuid'] for f in data['files'])) data = r.request_seq('GET', '/api/files', {'query':json_encode({'locations.archive':True})}) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('files', data) self.assertEqual(len(data['files']), 1) self.assertFalse(any(uid == f['uuid'] for f in data['files'])) self.assertTrue(any(uid2 == f['uuid'] for f in data['files']))
def test_30_collection_files(self): self.start_server() token = self.get_token() r = RestClient(self.address, token, timeout=1, retries=1) metadata = { 'collection_name': 'blah', 'owner': 'foo', } data = r.request_seq('POST', '/api/collections', metadata) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('collection', data) url = data['collection'] uid = url.split('/')[-1] data = r.request_seq('GET', '/api/collections/blah/files') self.assertEqual(data['files'], []) # add a file metadata = { 'logical_name': 'blah', 'checksum': {'sha512':hex('foo bar')}, 'file_size': 1, u'locations': [{u'site':u'test',u'path':u'blah.dat'}] } data = r.request_seq('POST', '/api/files', metadata) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('file', data) url = data['file'] uid = url.split('/')[-1] data = r.request_seq('GET', '/api/collections/blah/files', {'keys':'uuid|logical_name|checksum|locations'}) self.assertEqual(len(data['files']), 1) self.assertEqual(data['files'][0]['uuid'], uid) self.assertEqual(data['files'][0]['checksum'], metadata['checksum'])
async def test_91_request_seq(requests_mock: Mock) -> None: """Test `request_seq()`.""" result = {"result": "the result"} rpc = RestClient("http://test", "passkey", timeout=0.1) def response(req: PreparedRequest, ctx: object) -> bytes: # pylint: disable=W0613 assert req.body is not None _ = json_decode(req.body) return json_encode(result).encode("utf-8") requests_mock.post("/test", content=response) ret = rpc.request_seq("POST", "test", {}) assert requests_mock.called assert ret == result
def _check_fpaths(fpaths: List[str], token: str, thread_id: int) -> List[str]: # setup rc = RestClient( "https://file-catalog.icecube.wisc.edu/", token=token, timeout=60 * 60, # 1 hour retries=24, # 1 day ) # scan nonindexed_fpaths: List[str] = [] for i, fpath in enumerate(fpaths, start=1): if i % 100000 == 1: logging.warning( f"thread-{thread_id} processed total: {i} (found {len(nonindexed_fpaths)} non-indexed)" ) logging.info(f"#{i}") logging.debug(f"Looking at {fpath}") result = rc.request_seq( "GET", "/api/files", { "logical_name": fpath, # filepath may exist as multiple logical_names "query": json.dumps({"locations.path": fpath}), }, ) if result["files"]: logging.debug("file is already indexed") continue logging.info("file is *not* indexed -> appending to list") nonindexed_fpaths.append(fpath) logging.warning( f"Thread-{thread_id} found {len(nonindexed_fpaths)} non-indexed filepaths." ) return nonindexed_fpaths
def test_histo(db_rc: RestClient) -> None: # pylint: disable=R0914 """Run posts with updating.""" def assert_get(histo: Histogram) -> None: get_body = { "database": "test_histograms", "collection": "TEST", "name": histo["name"], } get_resp = db_rc.request_seq("GET", "/histogram", get_body) assert get_resp["histogram"] == histo assert get_resp["history"] histograms = TestDBServerProdRole._create_new_histograms() # use first histogram for updating values in all histograms new_bin_values = histograms[0]["bin_values"] # value will be incremented new_overflow = histograms[0]["overflow"] # value will be incremented new_underflow = histograms[0]["underflow"] # value will be incremented new_nan_count = histograms[0]["nan_count"] # value will be incremented # Test! for orignial_histo in histograms: # 1. POST with no update flag post_body_1 = { "database": "test_histograms", "collection": "TEST", "histogram": orignial_histo, } post_resp_1 = db_rc.request_seq("POST", "/histogram", post_body_1) assert post_resp_1["history"] assert post_resp_1["histogram"] == orignial_histo assert not post_resp_1["updated"] # GET assert_get(orignial_histo) # 2. POST again with no update flag post_body_2 = { "database": "test_histograms", "collection": "TEST", "histogram": orignial_histo, } with pytest.raises(requests.exceptions.HTTPError) as e: _ = db_rc.request_seq("POST", "/histogram", post_body_2) assert e.response.status_code == 409 # Conflict Error # GET assert_get(orignial_histo) # 3. POST with update newer_histo = copy.deepcopy(orignial_histo) newer_histo["bin_values"] = new_bin_values newer_histo["overflow"] = new_overflow newer_histo["underflow"] = new_underflow newer_histo["nan_count"] = new_nan_count post_body_3 = { "database": "test_histograms", "collection": "TEST", "histogram": newer_histo, "update": True, } post_resp_3 = db_rc.request_seq("POST", "/histogram", post_body_3) assert post_resp_3["histogram"] == TestDBServerProdRole._get_updated_histo( orignial_histo, newer_histo ) assert post_resp_3["updated"] assert len(post_resp_3["history"]) == 2 # GET assert_get( TestDBServerProdRole._get_updated_histo(orignial_histo, newer_histo) ) db_rc.close()
def test_file(db_rc: RestClient) -> None: """Run some test posts.""" collection_name = f"TEST-{uuid.uuid4().hex}" def assert_get(_files: List[str]) -> None: get_body = {"database": "test_histograms", "collection": collection_name} get_resp = db_rc.request_seq("GET", "/files/names", get_body) assert get_resp["files"] == _files assert get_resp["history"] # 1. POST with no update flag files = TestDBServerProdRole._create_new_files() post_body_1 = { "database": "test_histograms", "collection": collection_name, "files": files, } post_resp_1 = db_rc.request_seq("POST", "/files/names", post_body_1) assert post_resp_1["files"] == files assert post_resp_1["history"] # GET assert_get(files) # 2. POST again with no update flag post_body_2 = { "database": "test_histograms", "collection": collection_name, "files": files, } with pytest.raises(requests.exceptions.HTTPError) as e: _ = db_rc.request_seq("POST", "/files/names", post_body_2) assert e.response.status_code == 409 # Conflict Error # GET assert_get(files) # 3. POST with update but no new files post_body_3 = { "database": "test_histograms", "collection": collection_name, "files": files, "update": True, } post_resp_3 = db_rc.request_seq("POST", "/files/names", post_body_3) assert post_resp_3["files"] == files assert len(post_resp_3["history"]) == 2 # GET assert_get(files) # 4. POST with update flag and new files new_files = TestDBServerProdRole._create_new_files() post_body_4 = { "database": "test_histograms", "collection": collection_name, "files": new_files, "update": True, } post_resp_4 = db_rc.request_seq("POST", "/files/names", post_body_4) assert post_resp_4["files"] == sorted(set(files) | set(new_files)) assert len(post_resp_4["history"]) == 3 # GET assert_get(sorted(set(files) | set(new_files))) # set-add files db_rc.close()
class ServerComms: """ Setup JSONRPC communications with the IceProd server. Args: url (str): address to connect to passkey (str): passkey for authorization/authentication config (:py:class:`iceprod.server.exe.Config`): Config object **kwargs: passed to JSONRPC """ def __init__(self, url, passkey, config, **kwargs): self.rest = RestClient(address=url,token=passkey,**kwargs) async def download_task(self, gridspec, resources={}): """ Download new task(s) from the server. Args: gridspec (str): gridspec the pilot was submitted from resources (dict): resources available in the pilot Returns: list: list of task configs """ hostname = functions.gethostname() domain = '.'.join(hostname.split('.')[-2:]) try: ifaces = functions.getInterfaces() except Exception: ifaces = None resources = deepcopy(resources) if 'gpu' in resources and isinstance(resources['gpu'],list): resources['gpu'] = len(resources['gpu']) os_type = os.environ['OS_ARCH'] if 'OS_ARCH' in os.environ else None if os_type: resources['os'] = os_type task = await self.rest.request('POST', '/task_actions/process', {'gridspec': gridspec, 'hostname': hostname, 'domain': domain, 'ifaces': ifaces, 'requirements': resources, }) if not task: return None # get config try: config = await self.rest.request('GET', '/config/{}'.format(task['dataset_id'])) if not isinstance(config, dataclasses.Job): config = dict_to_dataclasses(config) except Exception: logging.warning('failed to get dataset config for dataset %s', task['dataset_id']) await self.task_kill(task['task_id'], dataset_id=task['dataset_id'], reason='failed to download dataset config') raise # fill in options if 'options' not in config: config['options'] = {} config['options']['task_id'] = task['task_id'] config['options']['job_id'] = task['job_id'] config['options']['dataset_id'] = task['dataset_id'] config['options']['task'] = task['task_index'] if 'requirements' in task: config['options']['resources'] = {k:task['requirements'][k] for k in Resources.defaults} try: job = await self.rest.request('GET', '/jobs/{}'.format(task['job_id'])) config['options']['job'] = job['job_index'] except Exception: logging.warning('failed to get job %s', task['job_id']) await self.task_kill(task['task_id'], dataset_id=task['dataset_id'], reason='failed to download job') raise try: dataset = await self.rest.request('GET', '/datasets/{}'.format(task['dataset_id'])) config['options']['dataset'] = dataset['dataset'] config['options']['jobs_submitted'] = dataset['jobs_submitted'] config['options']['tasks_submitted'] = dataset['tasks_submitted'] config['options']['debug'] = dataset['debug'] except Exception: logging.warning('failed to get dataset %s', task['dataset_id']) await self.task_kill(task['task_id'], dataset_id=task['dataset_id'], reason='failed to download dataset') raise return [config] async def task_files(self, dataset_id, task_id): """ Get the task files for a dataset and task. Args: dataset_id (str): dataset_id task_id (str): task_id Returns: list: list of :py:class:`iceprod.core.dataclasses.Data` objects """ ret = await self.rest.request('GET', '/datasets/{}/task_files/{}'.format(dataset_id, task_id)) data = [] for r in ret['files']: d = dataclasses.Data(r) if not d.valid(): raise Exception('returned Data not valid') data.append(d) return data async def processing(self, task_id): """ Tell the server that we are processing this task. Only used for single task config, not for pilots. Args: task_id (str): task_id to mark as processing """ await self.rest.request('PUT', '/tasks/{}/status'.format(task_id), {'status': 'processing'}) async def finish_task(self, task_id, dataset_id=None, stats={}, stat_filter=None, start_time=None, resources=None): """ Finish a task. Args: task_id (str): task_id of task dataset_id (str): (optional) dataset_id of task stats (dict): (optional) task statistics stat_filter (iterable): (optional) stat filter by keywords start_time (float): (optional) task start time in unix seconds resources (dict): (optional) task resource usage """ if stat_filter: # filter task stats stats = {k:stats[k] for k in stats if k in stat_filter} hostname = functions.gethostname() domain = '.'.join(hostname.split('.')[-2:]) if start_time: t = time.time() - start_time elif resources and 'time' in resources and resources['time']: t = int(resources['time']*3600) else: t = None iceprod_stats = { 'hostname': hostname, 'domain': domain, 'time_used': t, 'task_stats': stats, 'time': datetime.utcnow().isoformat(), } if resources: iceprod_stats['resources'] = resources if dataset_id: iceprod_stats['dataset_id'] = dataset_id await self.rest.request('POST', '/tasks/{}/task_stats'.format(task_id), iceprod_stats) data = {} if t: data['time_used'] = t await self.rest.request('POST', '/tasks/{}/task_actions/complete'.format(task_id), data) async def still_running(self, task_id): """ Check if the task should still be running according to the DB. Args: task_id (str): task_id of task """ ret = await self.rest.request('GET', '/tasks/{}'.format(task_id)) if (not ret) or 'status' not in ret or ret['status'] != 'processing': raise Exception('task should be stopped') async def task_error(self, task_id, dataset_id=None, stats={}, start_time=None, reason=None, resources=None): """ Tell the server about the error experienced Args: task_id (str): task_id of task dataset_id (str): (optional) dataset_id of task stats (dict): (optional) task statistics start_time (float): (optional) task start time in unix seconds reason (str): (optional) one-line summary of error resources (dict): (optional) task resource usage """ iceprod_stats = {} try: hostname = functions.gethostname() domain = '.'.join(hostname.split('.')[-2:]) if start_time: t = time.time() - start_time elif resources and 'time' in resources and resources['time']: t = int(resources['time']*3600) else: t = None iceprod_stats = { 'task_id': task_id, 'hostname': hostname, 'domain': domain, 'time_used': t, 'task_stats': json.dumps(stats), 'time': datetime.utcnow().isoformat(), 'error_summary': reason if reason else '', } if dataset_id: iceprod_stats['dataset_id'] = dataset_id if resources: iceprod_stats['resources'] = resources except Exception: logging.warning('failed to collect error info', exc_info=True) try: await self.rest.request('POST', '/tasks/{}/task_stats'.format(task_id), iceprod_stats) except Exception: logging.warning('failed to post task_stats for %r', task_id, exc_info=True) data = {} if t: data['time_used'] = t if resources: data['resources'] = resources if reason: data['reason'] = reason await self.rest.request('POST', '/tasks/{}/task_actions/reset'.format(task_id), data) async def task_kill(self, task_id, dataset_id=None, resources=None, reason=None, message=None): """ Tell the server that we killed a task. Args: task_id (str): task_id of task dataset_id (str): (optional) dataset_id of task resources (dict): (optional) used resources reason (str): (optional) short summary for kill message (str): (optional) long message to replace log upload """ if not reason: reason = 'killed' if not message: message = reason try: hostname = functions.gethostname() domain = '.'.join(hostname.split('.')[-2:]) iceprod_stats = { 'task_id': task_id, 'hostname': hostname, 'domain': domain, 'time': datetime.utcnow().isoformat(), 'error_summary': reason if reason else '', } if dataset_id: iceprod_stats['dataset_id'] = dataset_id if resources: iceprod_stats['resources'] = resources except Exception: logging.warning('failed to collect error info', exc_info=True) iceprod_stats = {} try: await self.rest.request('POST', '/tasks/{}/task_stats'.format(task_id), iceprod_stats) except Exception: logging.warning('failed to post task_stats for %r', task_id, exc_info=True) data = {} if resources and 'time' in resources and resources['time']: data['time_used'] = resources['time']*3600. if resources: data['resources'] = resources if reason: data['reason'] = reason else: data['data'] = 'task killed' await self.rest.request('POST', '/tasks/{}/task_actions/reset'.format(task_id), data) data = {'name': 'stdlog', 'task_id': task_id} if dataset_id: data['dataset_id'] = dataset_id if message: data['data'] = message elif reason: data['data'] = reason else: data['data'] = 'task killed' await self.rest.request('POST', '/logs', data) data.update({'name':'stdout', 'data': ''}) await self.rest.request('POST', '/logs', data) data.update({'name':'stderr', 'data': ''}) await self.rest.request('POST', '/logs', data) async def _upload_logfile(self, name, filename, task_id=None, dataset_id=None): """Upload a log file""" data = {'name': name} if task_id: data['task_id'] = task_id if dataset_id: data['dataset_id'] = dataset_id try: with open(filename) as f: data['data'] = f.read() except Exception as e: data['data'] = str(e) await self.rest.request('POST', '/logs', data) async def uploadLog(self, **kwargs): """Upload log file""" logging.getLogger().handlers[0].flush() await self._upload_logfile('stdlog', os.path.abspath(constants['stdlog']), **kwargs) async def uploadErr(self, filename=None, **kwargs): """Upload stderr file""" if not filename: sys.stderr.flush() filename = os.path.abspath(constants['stderr']) await self._upload_logfile('stderr', filename, **kwargs) async def uploadOut(self, filename=None, **kwargs): """Upload stdout file""" if not filename: sys.stdout.flush() filename = os.path.abspath(constants['stdout']) await self._upload_logfile('stdout', filename, **kwargs) async def create_pilot(self, **kwargs): """ Create an entry in the pilot table. Args: **kwargs: passed through to rest function Returns: str: pilot id """ ret = await self.rest.request('POST', '/pilots', kwargs) return ret['result'] async def update_pilot(self, pilot_id, **kwargs): """ Update the pilot table. Args: pilot_id (str): pilot id **kwargs: passed through to rest function """ await self.rest.request('PATCH', '/pilots/{}'.format(pilot_id), kwargs) async def delete_pilot(self, pilot_id, **kwargs): """ Delete the pilot. Args: pilot_id (str): pilot id """ await self.rest.request('DELETE', '/pilots/{}'.format(pilot_id)) # --- synchronous versions to be used from a signal handler # --- or other non-async code def task_kill_sync(self, task_id, dataset_id=None, resources=None, reason=None, message=None): """ Tell the server that we killed a task (synchronous version). Args: task_id (str): task_id of task dataset_id (str): (optional) dataset_id of task resources (dict): (optional) used resources reason (str): (optional) short summary for kill message (str): (optional) long message to replace log upload """ if not reason: reason = 'killed' if not message: message = reason try: hostname = functions.gethostname() domain = '.'.join(hostname.split('.')[-2:]) iceprod_stats = { 'task_id': task_id, 'hostname': hostname, 'domain': domain, 'time': datetime.utcnow().isoformat(), 'error_summary': reason if reason else '', } if dataset_id: iceprod_stats['dataset_id'] = dataset_id if resources: iceprod_stats['resources'] = resources except Exception: logging.warning('failed to collect error info', exc_info=True) iceprod_stats = {} try: self.rest.request_seq('POST', '/tasks/{}/task_stats'.format(task_id), iceprod_stats) except Exception: logging.warning('failed to post task_stats for %r', task_id, exc_info=True) data = {} if resources and 'time' in resources and resources['time']: data['time_used'] = resources['time']*3600. if resources: data['resources'] = resources if reason: data['reason'] = reason else: data['data'] = 'task killed' self.rest.request_seq('POST', '/tasks/{}/task_actions/reset'.format(task_id), data) data = {'name': 'stdlog', 'task_id': task_id} if dataset_id: data['dataset_id'] = dataset_id if message: data['data'] = message elif reason: data['data'] = reason else: data['data'] = 'task killed' self.rest.request_seq('POST', '/logs', data) data.update({'name':'stdout', 'data': ''}) self.rest.request_seq('POST', '/logs', data) data.update({'name':'stderr', 'data': ''}) self.rest.request_seq('POST', '/logs', data) def update_pilot_sync(self, pilot_id, **kwargs): """ Update the pilot table (synchronous version). Args: pilot_id (str): pilot id **kwargs: passed through to rpc function """ self.rest.request_seq('PATCH', '/pilots/{}'.format(pilot_id), kwargs) def delete_pilot_sync(self, pilot_id, **kwargs): """ Delete the pilot (synchronous version). Args: pilot_id (str): pilot id """ self.rest.request_seq('DELETE', '/pilots/{}'.format(pilot_id))
def test_20_file(self): self.start_server() token = self.get_token() r = RestClient(self.address, token, timeout=1, retries=1) metadata = { u'logical_name': u'blah', u'checksum': {u'sha512':hex('foo bar')}, u'file_size': 1, u'locations': [{u'site':u'test',u'path':u'blah.dat'}] } data = r.request_seq('POST', '/api/files', metadata) url = data['file'] data = r.request_seq('GET', url) data.pop('_links') data.pop('meta_modify_date') data.pop('uuid') self.assertDictEqual(metadata, data) metadata['test'] = 100 metadata_cpy = metadata.copy() metadata_cpy['uuid'] = 'something else' with self.assertRaises(Exception): data = r.request_seq('PUT', url, metadata_cpy) data = r.request_seq('PUT', url, metadata) data.pop('_links') data.pop('meta_modify_date') data.pop('uuid') self.assertDictEqual(metadata, data) data = r.request_seq('GET', url) data.pop('_links') data.pop('meta_modify_date') data.pop('uuid') self.assertDictEqual(metadata, data) metadata['test2'] = 200 data = r.request_seq('PATCH', url, {'test2':200}) data.pop('_links') data.pop('meta_modify_date') data.pop('uuid') self.assertDictEqual(metadata, data) data = r.request_seq('GET', url) data.pop('_links') data.pop('meta_modify_date') data.pop('uuid') self.assertDictEqual(metadata, data) data = r.request_seq('DELETE', url) # second delete should raise error with self.assertRaises(Exception): data = r.request_seq('DELETE', url) with self.assertRaises(Exception): data = r.request_seq('POST', url)
def test_40_simple_query(self): self.start_server() token = self.get_token() r = RestClient(self.address, token, timeout=1, retries=1) metadata = { u'logical_name': u'blah', u'checksum': {u'sha512':hex('foo bar')}, u'file_size': 1, u'locations': [{u'site':u'test',u'path':u'blah.dat'}], u'processing_level':u'level2', u'run_number':12345, u'first_event':345, u'last_event':456, u'iceprod':{ u'dataset':23453, }, u'offline':{ u'season':2017, }, } metadata2 = { u'logical_name': u'blah2', u'checksum': {u'sha512':hex('foo bar baz')}, u'file_size': 2, u'locations': [{u'site':u'test',u'path':u'blah2.dat'}], u'processing_level':u'level2', r'run_number':12356, u'first_event':578, u'last_event':698, u'iceprod':{ u'dataset':23454, }, u'offline':{ u'season':2017, }, } data = r.request_seq('POST', '/api/files', metadata) url = data['file'] uid = url.split('/')[-1] data = r.request_seq('POST', '/api/files', metadata2) url2 = data['file'] uid2 = url2.split('/')[-1] data = r.request_seq('GET', '/api/files') self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('files', data) self.assertEqual(len(data['files']), 2) self.assertTrue(any(uid == f['uuid'] for f in data['files'])) self.assertTrue(any(uid2 == f['uuid'] for f in data['files'])) data = r.request_seq('GET', '/api/files', {'processing_level':'level2'}) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('files', data) self.assertEqual(len(data['files']), 2) self.assertTrue(any(uid == f['uuid'] for f in data['files'])) self.assertTrue(any(uid2 == f['uuid'] for f in data['files'])) data = r.request_seq('GET', '/api/files', {'run_number':12345}) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('files', data) self.assertEqual(len(data['files']), 1) self.assertTrue(any(uid == f['uuid'] for f in data['files'])) self.assertFalse(any(uid2 == f['uuid'] for f in data['files'])) data = r.request_seq('GET', '/api/files', {'dataset':23454}) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('files', data) self.assertEqual(len(data['files']), 1) self.assertFalse(any(uid == f['uuid'] for f in data['files'])) self.assertTrue(any(uid2 == f['uuid'] for f in data['files'])) data = r.request_seq('GET', '/api/files', {'event_id':400}) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('files', data) self.assertEqual(len(data['files']), 1) self.assertTrue(any(uid == f['uuid'] for f in data['files'])) self.assertFalse(any(uid2 == f['uuid'] for f in data['files'])) data = r.request_seq('GET', '/api/files', {'season':2017}) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('files', data) self.assertEqual(len(data['files']), 2) self.assertTrue(any(uid == f['uuid'] for f in data['files'])) self.assertTrue(any(uid2 == f['uuid'] for f in data['files'])) data = r.request_seq('GET', '/api/files', {'event_id':400, 'keys':'|'.join(['checksum','file_size','uuid'])}) self.assertIn('_links', data) self.assertIn('self', data['_links']) self.assertIn('files', data) self.assertEqual(len(data['files']), 1) self.assertTrue(any(uid == f['uuid'] for f in data['files'])) self.assertFalse(any(uid2 == f['uuid'] for f in data['files'])) self.assertIn('checksum', data['files'][0]) self.assertIn('file_size', data['files'][0])