Пример #1
0
    def test_57_put_files_uuid_replace_locations(self):
        """Test that a file can replace with the same location."""
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        # define the files to be created
        metadata = {
            'logical_name': '/blah/data/exp/IceCube/blah.dat',
            'checksum': {'sha512': hex('foo bar')},
            'file_size': 1,
            u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah.dat'}]
        }
        metadata2 = {
            'logical_name': '/blah/data/exp/IceCube/blah.dat',
            'checksum': {'sha512': hex('foo bar2')},
            'file_size': 2,
            u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah.dat'}]
        }

        # create the first file; should be OK
        data = r.request_seq('POST', '/api/files', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)
        url = data['file']
        uid = url.split('/')[-1]

        # try to replace the first file with the second; should be OK
        data = r.request_seq('PUT', '/api/files/'+uid, metadata2)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('logical_name', data)
Пример #2
0
    def test_get(db_rc: RestClient) -> None:
        """Run some test queries."""
        databases = db_rc.request_seq("GET", "/databases/names")
        print(databases)

        for db in databases["databases"]:
            db_request_body = {"database": db}
            collections = db_rc.request_seq("GET", "/collections/names",
                                            db_request_body)
            print(collections)
            for coll in collections["collections"]:
                coll_request_body = {"database": db, "collection": coll}
                histograms = db_rc.request_seq(
                    "GET", "/collections/histograms/names", coll_request_body)
                print(histograms)
                for histo_name in histograms["histograms"]:
                    histo_request_body = {
                        "database": db,
                        "collection": coll,
                        "name": histo_name,
                    }
                    histo = db_rc.request_seq("GET", "/histogram",
                                              histo_request_body)
                    print(histo)
                filelist = db_rc.request_seq("GET", "/files/names",
                                             coll_request_body)
                print(filelist)

        db_rc.close()
Пример #3
0
    def test_54_patch_files_uuid_replace_logical_name(self):
        """Test that a file can be updated with the same logical_name."""
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        # define the file to be created
        metadata = {
            'logical_name': '/blah/data/exp/IceCube/blah.dat',
            'checksum': {'sha512': hex('foo bar')},
            'file_size': 1,
            u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah.dat'}]
        }

        # this is a PATCH to metadata; matches the old logical_name
        patch1 = {
            'logical_name': '/blah/data/exp/IceCube/blah.dat',
            'checksum': {'sha512': hex('foo bar2')},
            'file_size': 2,
            u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah.dat'}]
        }

        # create the file; should be OK
        data = r.request_seq('POST', '/api/files', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)
        url = data['file']
        uid = url.split('/')[-1]

        # try to update the file with a patch; should be OK
        data = r.request_seq('PATCH', '/api/files/' + uid, patch1)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('logical_name', data)
Пример #4
0
    def test_10_files(self):
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        metadata = {
            'logical_name': 'blah',
            'checksum': {'sha512':hex('foo bar')},
            'file_size': 1,
            u'locations': [{u'site':u'test',u'path':u'blah.dat'}]
        }
        data = r.request_seq('POST', '/api/files', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)
        url = data['file']
        uid = url.split('/')[-1]

        data = r.request_seq('GET', '/api/files')
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 1)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))

        for m in ('PUT','DELETE','PATCH'):
            with self.assertRaises(Exception):
                r.request_seq(m, '/api/files')
Пример #5
0
    def test_71_snapshot_find(self):
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        metadata = {
            'collection_name': 'blah',
            'owner': 'foo',
        }
        data = r.request_seq('POST', '/api/collections', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('collection', data)
        url = data['collection']
        uid = url.split('/')[-1]

        data = r.request_seq('GET', '/api/collections/' + uid)

        data = r.request_seq('POST', '/api/collections/{}/snapshots'.format(uid))
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('snapshot', data)
        url = data['snapshot']
        snap_uid = url.split('/')[-1]

        data = r.request_seq('GET', '/api/snapshots/' + snap_uid)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files',data)
        self.assertEqual(data['files'], [])
Пример #6
0
    def test_15_files_auth(self):
        appkey = 'secret2'
        self.edit_config({
            'auth':{
                'secret': 'secret',
                'expiration': 82400,
            }
        })
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        metadata = {
            'logical_name': 'blah',
            'checksum': {'sha512':hex('foo bar')},
            'file_size': 1,
            u'locations': [{u'site':u'test',u'path':u'blah.dat'}]
        }
        r2 = RestClient(self.address, 'blah', timeout=1, retries=1)
        with self.assertRaises(Exception):
            r2.request_seq('POST', '/api/files', metadata)

        data = r.request_seq('POST', '/api/files', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)
        url = data['file']
Пример #7
0
    def test_66_patch_files_uuid_locations_1xN(self):
        """Test locations uniqueness under 1xN multiplicity."""
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        # define the locations to be tested
        loc1a = {'site': 'WIPAC', 'path': '/data/test/exp/IceCube/foo.dat'}
        loc1b = {'site': 'DESY', 'path': '/data/test/exp/IceCube/foo.dat'}
        loc1c = {'site': 'NERSC', 'path': '/data/test/exp/IceCube/foo.dat'}
        loc1d = {'site': 'OSG', 'path': '/data/test/exp/IceCube/foo.dat'}
        locs3a = [loc1a, loc1b, loc1c]
        locs3b = [loc1b, loc1c, loc1d]
        locs3c = [loc1a, loc1b, loc1d]

        # define the files to be created
        metadata = {
            'logical_name': '/blah/data/exp/IceCube/blah.dat',
            'checksum': {'sha512': hex('foo bar')},
            'file_size': 1,
            u'locations': [loc1a]
        }
        metadata2 = {
            'logical_name': '/blah/data/exp/IceCube/blah2.dat',
            'checksum': {'sha512': hex('foo bar')},
            'file_size': 1,
            u'locations': [loc1b]
        }

        # this is a PATCH to metadata; steps on metadata2's location
        patch1 = {
            'logical_name': '/blah/data/exp/IceCube/blah.dat',
            'checksum': {'sha512': hex('foo bar2')},
            'file_size': 2,
            u'locations': locs3c
        }

        # create the first file; should be OK
        data = r.request_seq('POST', '/api/files', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)
        url = data['file']
        uid = url.split('/')[-1]

        # create the second file; should be OK
        data = r.request_seq('POST', '/api/files', metadata2)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)

        # try to update the first file with a patch; should NOT be OK
        with self.assertRaises(Exception):
            r.request_seq('PATCH', '/api/files/' + uid, patch1)
Пример #8
0
async def test_92_request_seq(requests_mock: Mock) -> None:
    """Test `request_seq()`."""
    rpc = RestClient("http://test", "passkey", timeout=0.1)

    def response(req: PreparedRequest, ctx: object) -> bytes:  # pylint: disable=W0613
        raise Exception()

    requests_mock.post("/test", content=response)

    with pytest.raises(Exception):
        rpc.request_seq("POST", "test", {})
Пример #9
0
    def test_post_files(db_rc: RestClient) -> None:
        """Failure-test role authorization."""
        post_body = {
            "database": "test_histograms",
            "collection": "collection_name",
            "files": ["test.txt"],
        }
        with pytest.raises(requests.exceptions.HTTPError) as e:
            db_rc.request_seq("POST", "/files/names", post_body)
            assert e.response.status_code == 403  # Forbidden Error

        db_rc.close()
Пример #10
0
    def test_62_post_files_locations_NxN(self):
        """Test locations uniqueness under NxN multiplicity."""
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        # define the locations to be tested
        loc1a = {'site': 'WIPAC', 'path': '/data/test/exp/IceCube/foo.dat'}
        loc1b = {'site': 'DESY', 'path': '/data/test/exp/IceCube/foo.dat'}
        loc1c = {'site': 'NERSC', 'path': '/data/test/exp/IceCube/foo.dat'}
        loc1d = {'site': 'OSG', 'path': '/data/test/exp/IceCube/foo.dat'}
        locs3a = [loc1a, loc1b, loc1c]
        locs3b = [loc1b, loc1c, loc1d]
        locs3c = [loc1a, loc1b, loc1d]

        # define the files to be created
        metadata = {
            'logical_name': '/blah/data/exp/IceCube/blah.dat',
            'checksum': {'sha512': hex('foo bar')},
            'file_size': 1,
            u'locations': locs3a
        }
        metadata2 = {
            'logical_name': '/blah/data/exp/IceCube/blah2.dat',
            'checksum': {'sha512': hex('foo bar2')},
            'file_size': 2,
            u'locations': locs3c
        }

        # create the first file; should be OK
        data = r.request_seq('POST', '/api/files', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)
        url = data['file']
        uid = url.split('/')[-1]

        # check that the file was created properly
        data = r.request_seq('GET', '/api/files')
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 1)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))

        # check that the file was created properly, part deux
        data = r.request_seq('GET', '/api/files/' + uid)

        # create the second file; should NOT be OK
        with self.assertRaises(Exception):
            r.request_seq('POST', '/api/files', metadata2)
Пример #11
0
    def test_post_histo(db_rc: RestClient) -> None:
        """Failure-test role authorization."""
        post_body = {
            "database": "test_histograms",
            "collection": "TEST",
            "histogram": {
                "Anything": True
            },
        }
        with pytest.raises(requests.exceptions.HTTPError) as e:
            db_rc.request_seq("POST", "/histogram", post_body)
            assert e.response.status_code == 403  # Forbidden Error

        db_rc.close()
Пример #12
0
    def test_80_snapshot_files(self):
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        metadata = {
            'collection_name': 'blah',
            'owner': 'foo',
        }
        data = r.request_seq('POST', '/api/collections', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('collection', data)
        url = data['collection']
        uid = url.split('/')[-1]

        data = r.request_seq('GET', '/api/collections/' + uid)

        data = r.request_seq('POST', '/api/collections/{}/snapshots'.format(uid))
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('snapshot', data)
        url = data['snapshot']
        snap_uid = url.split('/')[-1]

        data = r.request_seq('GET', '/api/snapshots/{}/files'.format(snap_uid))
        self.assertEqual(data['files'], [])

        # add a file
        metadata = {
            'logical_name': 'blah',
            'checksum': {'sha512':hex('foo bar')},
            'file_size': 1,
            u'locations': [{u'site':u'test',u'path':u'blah.dat'}]
        }
        data = r.request_seq('POST', '/api/files', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)
        url = data['file']
        file_uid = url.split('/')[-1]
        
        # old snapshot stays empty
        data = r.request_seq('GET', '/api/snapshots/{}/files'.format(snap_uid))
        self.assertEqual(data['files'], [])

        # new snapshot should have file
        data = r.request_seq('POST', '/api/collections/{}/snapshots'.format(uid))
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('snapshot', data)
        url = data['snapshot']
        snap_uid = url.split('/')[-1]

        data = r.request_seq('GET', '/api/snapshots/{}/files'.format(snap_uid),
                             {'keys':'uuid|logical_name|checksum|locations'})
        self.assertEqual(len(data['files']), 1)
        self.assertEqual(data['files'][0]['uuid'], file_uid)
        self.assertEqual(data['files'][0]['checksum'], metadata['checksum'])
Пример #13
0
    def test_58_patch_files_uuid_unique_locations(self):
        """Test that locations is unique when updating a file."""
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        # define the files to be created
        metadata = {
            'logical_name': '/blah/data/exp/IceCube/blah.dat',
            'checksum': {'sha512': hex('foo bar')},
            'file_size': 1,
            u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah.dat'}]
        }
        metadata2 = {
            'logical_name': '/blah/data/exp/IceCube/blah2.dat',
            'checksum': {'sha512': hex('foo bar')},
            'file_size': 1,
            u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah2.dat'}]
        }

        # this is a PATCH to metadata; steps on metadata2's location
        patch1 = {
            'logical_name': '/blah/data/exp/IceCube/blah.dat',
            'checksum': {'sha512': hex('foo bar2')},
            'file_size': 2,
            u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah2.dat'}]
        }

        # create the first file; should be OK
        data = r.request_seq('POST', '/api/files', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)
        url = data['file']
        uid = url.split('/')[-1]

        # create the second file; should be OK
        data = r.request_seq('POST', '/api/files', metadata2)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)

        # try to update the first file with a patch; should NOT be OK
        with self.assertRaises(Exception):
            r.request_seq('PATCH', '/api/files/' + uuid, patch1)
Пример #14
0
    def test_10_collections(self):
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        metadata = {
            'collection_name': 'blah',
            'owner': 'foo',
        }
        data = r.request_seq('POST', '/api/collections', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('collection', data)
        url = data['collection']
        uid = url.split('/')[-1]

        data = r.request_seq('GET', '/api/collections')
        self.assertIn('collections', data)
        self.assertIn(uid,{row['uuid'] for row in data['collections']})
Пример #15
0
    def test_21_collection_by_name(self):
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        metadata = {
            'collection_name': 'blah',
            'owner': 'foo',
        }
        data = r.request_seq('POST', '/api/collections', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('collection', data)
        url = data['collection']
        uid = url.split('/')[-1]

        data = r.request_seq('GET', '/api/collections/blah')
        for k in metadata:
            self.assertIn(k, data)
            self.assertEqual(metadata[k], data[k])
Пример #16
0
def main():
    parser = argparse.ArgumentParser(description='manually run IceProd i3exec')
    parser.add_argument('-t', '--token',help='auth token')
    parser.add_argument('-d','--dataset',type=int,help='dataset number')
    parser.add_argument('-j','--job',type=int,help='job number (optional)')
    
    args = parser.parse_args()
    args = vars(args)

    logging.basicConfig(level=logging.DEBUG)
    
    rpc = RestClient('https://iceprod2-api.icecube.wisc.edu', args['token'])
    
    datasets = rpc.request_seq('GET', '/datasets', {'keys': 'dataset_id|dataset'})
    dataset_id = None
    for d in datasets:
        if datasets[d]['dataset'] == args['dataset']:
            dataset_id = d
            break
    else:
        raise Exception('bad dataset num')
    dataset = rpc.request_seq('GET', f'/datasets/{dataset_id}')
    config = rpc.request_seq('GET', f'/config/{dataset_id}')

    jobs = rpc.request_seq('GET', f'/datasets/{dataset_id}/jobs', {'status': 'processing|errors'})
    if args['job']:
        jobs = {j:jobs[j] for j in jobs if jobs[j]['job_index'] == args['job']}
    if not jobs:
        raise Exception('no jobs found')

    for job_id in jobs:
        tasks = rpc.request_seq('GET', f'/datasets/{dataset_id}/tasks',
                                {'job_id': job_id, 'keys': 'task_id|task_index|name|depends',
                                 'status': 'waiting|queued|reset|failed'})
        for task_id in sorted(tasks, key=lambda t:tasks[t]['task_index']):
            print(f'processing {dataset["dataset"]} {jobs[job_id]["job_index"]} {tasks[task_id]["name"]}')
            write_config(config, 'config.json', dataset_id, args['dataset'], task_id)
            run(token=args['token'], config='config.json',
                jobs_submitted=dataset['jobs_submitted'],
                job=jobs[job_id]['job_index'],
                task=tasks[task_id]['name'])
Пример #17
0
    def test_50_post_files_unique_logical_name(self):
        """Test that logical_name is unique when creating a new file."""
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        # define the file to be created
        metadata = {
            'logical_name': '/blah/data/exp/IceCube/blah.dat',
            'checksum': {'sha512': hex('foo bar')},
            'file_size': 1,
            u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah.dat'}]
        }

        # create the file the first time; should be OK
        data = r.request_seq('POST', '/api/files', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)
        url = data['file']
        uid = url.split('/')[-1]

        # check that the file was created properly
        data = r.request_seq('GET', '/api/files')
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 1)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))

        # create the file the second time; should NOT be OK
        with self.assertRaises(Exception):
            data = r.request_seq('POST', '/api/files', metadata)

        # check that the second file was not created
        data = r.request_seq('GET', '/api/files')
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 1)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))
Пример #18
0
    def test_30_archive(self):
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        metadata = {
            u'logical_name': u'blah',
            u'checksum': {u'sha512':hex('foo bar')},
            u'file_size': 1,
            u'locations': [{u'site':u'test',u'path':u'blah.dat'}]
        }
        metadata2 = {
            u'logical_name': u'blah2',
            u'checksum': {u'sha512':hex('foo bar baz')},
            u'file_size': 2,
            u'locations': [{u'site':u'test',u'path':u'blah.dat',u'archive':True}]
        }
        data = r.request_seq('POST', '/api/files', metadata)
        url = data['file']
        uid = url.split('/')[-1]
        data = r.request_seq('POST', '/api/files', metadata2)
        url2 = data['file']
        uid2 = url2.split('/')[-1]

        data = r.request_seq('GET', '/api/files')
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 1)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))
        self.assertFalse(any(uid2 == f['uuid'] for f in data['files']))

        data = r.request_seq('GET', '/api/files', {'query':json_encode({'locations.archive':True})})
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 1)
        self.assertFalse(any(uid == f['uuid'] for f in data['files']))
        self.assertTrue(any(uid2 == f['uuid'] for f in data['files']))
Пример #19
0
    def test_30_collection_files(self):
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        metadata = {
            'collection_name': 'blah',
            'owner': 'foo',
        }
        data = r.request_seq('POST', '/api/collections', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('collection', data)
        url = data['collection']
        uid = url.split('/')[-1]

        data = r.request_seq('GET', '/api/collections/blah/files')
        self.assertEqual(data['files'], [])

        # add a file
        metadata = {
            'logical_name': 'blah',
            'checksum': {'sha512':hex('foo bar')},
            'file_size': 1,
            u'locations': [{u'site':u'test',u'path':u'blah.dat'}]
        }
        data = r.request_seq('POST', '/api/files', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)
        url = data['file']
        uid = url.split('/')[-1]

        data = r.request_seq('GET', '/api/collections/blah/files',
                             {'keys':'uuid|logical_name|checksum|locations'})
        self.assertEqual(len(data['files']), 1)
        self.assertEqual(data['files'][0]['uuid'], uid)
        self.assertEqual(data['files'][0]['checksum'], metadata['checksum'])
Пример #20
0
async def test_91_request_seq(requests_mock: Mock) -> None:
    """Test `request_seq()`."""
    result = {"result": "the result"}
    rpc = RestClient("http://test", "passkey", timeout=0.1)

    def response(req: PreparedRequest, ctx: object) -> bytes:  # pylint: disable=W0613
        assert req.body is not None
        _ = json_decode(req.body)
        return json_encode(result).encode("utf-8")

    requests_mock.post("/test", content=response)
    ret = rpc.request_seq("POST", "test", {})

    assert requests_mock.called
    assert ret == result
Пример #21
0
def _check_fpaths(fpaths: List[str], token: str, thread_id: int) -> List[str]:
    # setup
    rc = RestClient(
        "https://file-catalog.icecube.wisc.edu/",
        token=token,
        timeout=60 * 60,  # 1 hour
        retries=24,  # 1 day
    )

    # scan
    nonindexed_fpaths: List[str] = []
    for i, fpath in enumerate(fpaths, start=1):
        if i % 100000 == 1:
            logging.warning(
                f"thread-{thread_id} processed total: {i} (found {len(nonindexed_fpaths)} non-indexed)"
            )
        logging.info(f"#{i}")
        logging.debug(f"Looking at {fpath}")
        result = rc.request_seq(
            "GET",
            "/api/files",
            {
                "logical_name":
                fpath,  # filepath may exist as multiple logical_names
                "query": json.dumps({"locations.path": fpath}),
            },
        )
        if result["files"]:
            logging.debug("file is already indexed")
            continue
        logging.info("file is *not* indexed -> appending to list")
        nonindexed_fpaths.append(fpath)

    logging.warning(
        f"Thread-{thread_id} found {len(nonindexed_fpaths)} non-indexed filepaths."
    )
    return nonindexed_fpaths
Пример #22
0
    def test_histo(db_rc: RestClient) -> None:  # pylint: disable=R0914
        """Run posts with updating."""

        def assert_get(histo: Histogram) -> None:
            get_body = {
                "database": "test_histograms",
                "collection": "TEST",
                "name": histo["name"],
            }
            get_resp = db_rc.request_seq("GET", "/histogram", get_body)
            assert get_resp["histogram"] == histo
            assert get_resp["history"]

        histograms = TestDBServerProdRole._create_new_histograms()
        # use first histogram for updating values in all histograms
        new_bin_values = histograms[0]["bin_values"]  # value will be incremented
        new_overflow = histograms[0]["overflow"]  # value will be incremented
        new_underflow = histograms[0]["underflow"]  # value will be incremented
        new_nan_count = histograms[0]["nan_count"]  # value will be incremented

        # Test!
        for orignial_histo in histograms:
            # 1. POST with no update flag
            post_body_1 = {
                "database": "test_histograms",
                "collection": "TEST",
                "histogram": orignial_histo,
            }
            post_resp_1 = db_rc.request_seq("POST", "/histogram", post_body_1)
            assert post_resp_1["history"]
            assert post_resp_1["histogram"] == orignial_histo
            assert not post_resp_1["updated"]

            # GET
            assert_get(orignial_histo)

            # 2. POST again with no update flag
            post_body_2 = {
                "database": "test_histograms",
                "collection": "TEST",
                "histogram": orignial_histo,
            }
            with pytest.raises(requests.exceptions.HTTPError) as e:
                _ = db_rc.request_seq("POST", "/histogram", post_body_2)
                assert e.response.status_code == 409  # Conflict Error

            # GET
            assert_get(orignial_histo)

            # 3. POST with update
            newer_histo = copy.deepcopy(orignial_histo)
            newer_histo["bin_values"] = new_bin_values
            newer_histo["overflow"] = new_overflow
            newer_histo["underflow"] = new_underflow
            newer_histo["nan_count"] = new_nan_count
            post_body_3 = {
                "database": "test_histograms",
                "collection": "TEST",
                "histogram": newer_histo,
                "update": True,
            }
            post_resp_3 = db_rc.request_seq("POST", "/histogram", post_body_3)
            assert post_resp_3["histogram"] == TestDBServerProdRole._get_updated_histo(
                orignial_histo, newer_histo
            )
            assert post_resp_3["updated"]
            assert len(post_resp_3["history"]) == 2

            # GET
            assert_get(
                TestDBServerProdRole._get_updated_histo(orignial_histo, newer_histo)
            )

        db_rc.close()
Пример #23
0
    def test_file(db_rc: RestClient) -> None:
        """Run some test posts."""
        collection_name = f"TEST-{uuid.uuid4().hex}"

        def assert_get(_files: List[str]) -> None:
            get_body = {"database": "test_histograms", "collection": collection_name}
            get_resp = db_rc.request_seq("GET", "/files/names", get_body)
            assert get_resp["files"] == _files
            assert get_resp["history"]

        # 1. POST with no update flag
        files = TestDBServerProdRole._create_new_files()
        post_body_1 = {
            "database": "test_histograms",
            "collection": collection_name,
            "files": files,
        }
        post_resp_1 = db_rc.request_seq("POST", "/files/names", post_body_1)
        assert post_resp_1["files"] == files
        assert post_resp_1["history"]

        # GET
        assert_get(files)

        # 2. POST again with no update flag
        post_body_2 = {
            "database": "test_histograms",
            "collection": collection_name,
            "files": files,
        }
        with pytest.raises(requests.exceptions.HTTPError) as e:
            _ = db_rc.request_seq("POST", "/files/names", post_body_2)
            assert e.response.status_code == 409  # Conflict Error

        # GET
        assert_get(files)

        # 3. POST with update but no new files
        post_body_3 = {
            "database": "test_histograms",
            "collection": collection_name,
            "files": files,
            "update": True,
        }
        post_resp_3 = db_rc.request_seq("POST", "/files/names", post_body_3)
        assert post_resp_3["files"] == files
        assert len(post_resp_3["history"]) == 2

        # GET
        assert_get(files)

        # 4. POST with update flag and new files
        new_files = TestDBServerProdRole._create_new_files()
        post_body_4 = {
            "database": "test_histograms",
            "collection": collection_name,
            "files": new_files,
            "update": True,
        }
        post_resp_4 = db_rc.request_seq("POST", "/files/names", post_body_4)
        assert post_resp_4["files"] == sorted(set(files) | set(new_files))
        assert len(post_resp_4["history"]) == 3

        # GET
        assert_get(sorted(set(files) | set(new_files)))  # set-add files

        db_rc.close()
Пример #24
0
class ServerComms:
    """
    Setup JSONRPC communications with the IceProd server.

    Args:
        url (str): address to connect to
        passkey (str): passkey for authorization/authentication
        config (:py:class:`iceprod.server.exe.Config`): Config object
        **kwargs: passed to JSONRPC
    """
    def __init__(self, url, passkey, config, **kwargs):
        self.rest = RestClient(address=url,token=passkey,**kwargs)

    async def download_task(self, gridspec, resources={}):
        """
        Download new task(s) from the server.

        Args:
            gridspec (str): gridspec the pilot was submitted from
            resources (dict): resources available in the pilot

        Returns:
            list: list of task configs
        """
        hostname = functions.gethostname()
        domain = '.'.join(hostname.split('.')[-2:])
        try:
            ifaces = functions.getInterfaces()
        except Exception:
            ifaces = None
        resources = deepcopy(resources)
        if 'gpu' in resources and isinstance(resources['gpu'],list):
            resources['gpu'] = len(resources['gpu'])
        os_type = os.environ['OS_ARCH'] if 'OS_ARCH' in os.environ else None
        if os_type:
            resources['os'] = os_type
        task = await self.rest.request('POST', '/task_actions/process',
                {'gridspec': gridspec,
                 'hostname': hostname, 
                 'domain': domain,
                 'ifaces': ifaces,
                 'requirements': resources,
                })
        if not task:
            return None

        # get config
        try:
            config = await self.rest.request('GET', '/config/{}'.format(task['dataset_id']))
            if not isinstance(config, dataclasses.Job):
                config = dict_to_dataclasses(config)
        except Exception:
            logging.warning('failed to get dataset config for dataset %s', task['dataset_id'])
            await self.task_kill(task['task_id'], dataset_id=task['dataset_id'],
                                 reason='failed to download dataset config')
            raise

        # fill in options
        if 'options' not in config:
            config['options'] = {}
        config['options']['task_id'] = task['task_id']
        config['options']['job_id'] = task['job_id']
        config['options']['dataset_id'] = task['dataset_id']
        config['options']['task'] = task['task_index']
        if 'requirements' in task:
            config['options']['resources'] = {k:task['requirements'][k] for k in Resources.defaults}
        try:
            job = await self.rest.request('GET', '/jobs/{}'.format(task['job_id']))
            config['options']['job'] = job['job_index']
        except Exception:
            logging.warning('failed to get job %s', task['job_id'])
            await self.task_kill(task['task_id'], dataset_id=task['dataset_id'],
                                 reason='failed to download job')
            raise
        try:
            dataset = await self.rest.request('GET', '/datasets/{}'.format(task['dataset_id']))
            config['options']['dataset'] = dataset['dataset']
            config['options']['jobs_submitted'] = dataset['jobs_submitted']
            config['options']['tasks_submitted'] = dataset['tasks_submitted']
            config['options']['debug'] = dataset['debug']
        except Exception:
            logging.warning('failed to get dataset %s', task['dataset_id'])
            await self.task_kill(task['task_id'], dataset_id=task['dataset_id'],
                                 reason='failed to download dataset')
            raise
        return [config]

    async def task_files(self, dataset_id, task_id):
        """
        Get the task files for a dataset and task.

        Args:
            dataset_id (str): dataset_id
            task_id (str): task_id

        Returns:
            list: list of :py:class:`iceprod.core.dataclasses.Data` objects
        """
        ret = await self.rest.request('GET', '/datasets/{}/task_files/{}'.format(dataset_id, task_id))
        data = []
        for r in ret['files']:
            d = dataclasses.Data(r)
            if not d.valid():
                raise Exception('returned Data not valid')
            data.append(d)
        return data

    async def processing(self, task_id):
        """
        Tell the server that we are processing this task.

        Only used for single task config, not for pilots.

        Args:
            task_id (str): task_id to mark as processing
        """
        await self.rest.request('PUT', '/tasks/{}/status'.format(task_id),
                              {'status': 'processing'})

    async def finish_task(self, task_id, dataset_id=None, stats={},
                          stat_filter=None, start_time=None, resources=None):
        """
        Finish a task.

        Args:
            task_id (str): task_id of task
            dataset_id (str): (optional) dataset_id of task
            stats (dict): (optional) task statistics
            stat_filter (iterable): (optional) stat filter by keywords
            start_time (float): (optional) task start time in unix seconds
            resources (dict): (optional) task resource usage
        """
        if stat_filter:
            # filter task stats
            stats = {k:stats[k] for k in stats if k in stat_filter}

        hostname = functions.gethostname()
        domain = '.'.join(hostname.split('.')[-2:])
        if start_time:
            t = time.time() - start_time
        elif resources and 'time' in resources and resources['time']:
            t = int(resources['time']*3600)
        else:
            t = None
        iceprod_stats = {
            'hostname': hostname,
            'domain': domain,
            'time_used': t,
            'task_stats': stats,
            'time': datetime.utcnow().isoformat(),
        }
        if resources:
            iceprod_stats['resources'] = resources
        if dataset_id:
            iceprod_stats['dataset_id'] = dataset_id

        await self.rest.request('POST', '/tasks/{}/task_stats'.format(task_id),
                              iceprod_stats)

        data = {}
        if t:
            data['time_used'] =  t
        await self.rest.request('POST', '/tasks/{}/task_actions/complete'.format(task_id), data)

    async def still_running(self, task_id):
        """
        Check if the task should still be running according to the DB.

        Args:
            task_id (str): task_id of task
        """
        ret = await self.rest.request('GET', '/tasks/{}'.format(task_id))
        if (not ret) or 'status' not in ret or ret['status'] != 'processing':
            raise Exception('task should be stopped')

    async def task_error(self, task_id, dataset_id=None, stats={}, start_time=None, reason=None, resources=None):
        """
        Tell the server about the error experienced

        Args:
            task_id (str): task_id of task
            dataset_id (str): (optional) dataset_id of task
            stats (dict): (optional) task statistics
            start_time (float): (optional) task start time in unix seconds
            reason (str): (optional) one-line summary of error
            resources (dict): (optional) task resource usage
        """
        iceprod_stats = {}
        try:
            hostname = functions.gethostname()
            domain = '.'.join(hostname.split('.')[-2:])
            if start_time:
                t = time.time() - start_time
            elif resources and 'time' in resources and resources['time']:
                t = int(resources['time']*3600)
            else:
                t = None
            iceprod_stats = {
                'task_id': task_id,
                'hostname': hostname,
                'domain': domain,
                'time_used': t,
                'task_stats': json.dumps(stats),
                'time': datetime.utcnow().isoformat(),
                'error_summary': reason if reason else '',
            }
            if dataset_id:
                iceprod_stats['dataset_id'] = dataset_id
            if resources:
                iceprod_stats['resources'] = resources
        except Exception:
            logging.warning('failed to collect error info', exc_info=True)

        try:
            await self.rest.request('POST', '/tasks/{}/task_stats'.format(task_id),
                                    iceprod_stats)
        except Exception:
            logging.warning('failed to post task_stats for %r', task_id, exc_info=True)

        data = {}
        if t:
            data['time_used'] =  t
        if resources:
            data['resources'] = resources
        if reason:
            data['reason'] = reason
        await self.rest.request('POST', '/tasks/{}/task_actions/reset'.format(task_id), data)

    async def task_kill(self, task_id, dataset_id=None, resources=None, reason=None, message=None):
        """
        Tell the server that we killed a task.

        Args:
            task_id (str): task_id of task
            dataset_id (str): (optional) dataset_id of task
            resources (dict): (optional) used resources
            reason (str): (optional) short summary for kill
            message (str): (optional) long message to replace log upload
        """
        if not reason:
            reason = 'killed'
        if not message:
            message = reason
        try:
            hostname = functions.gethostname()
            domain = '.'.join(hostname.split('.')[-2:])
            iceprod_stats = {
                'task_id': task_id,
                'hostname': hostname,
                'domain': domain,
                'time': datetime.utcnow().isoformat(),
                'error_summary': reason if reason else '',
            }
            if dataset_id:
                iceprod_stats['dataset_id'] = dataset_id
            if resources:
                iceprod_stats['resources'] = resources
        except Exception:
            logging.warning('failed to collect error info', exc_info=True)
            iceprod_stats = {}
        try:
            await self.rest.request('POST', '/tasks/{}/task_stats'.format(task_id),
                                    iceprod_stats)
        except Exception:
            logging.warning('failed to post task_stats for %r', task_id, exc_info=True)

        data = {}
        if resources and 'time' in resources and resources['time']:
            data['time_used'] =  resources['time']*3600.
        if resources:
            data['resources'] = resources
        if reason:
            data['reason'] = reason
        else:
            data['data'] = 'task killed'
        await self.rest.request('POST', '/tasks/{}/task_actions/reset'.format(task_id), data)

        data = {'name': 'stdlog', 'task_id': task_id}
        if dataset_id:
            data['dataset_id'] = dataset_id
        if message:
            data['data'] = message
        elif reason:
            data['data'] = reason
        else:
            data['data'] = 'task killed'
        await self.rest.request('POST', '/logs', data)
        data.update({'name':'stdout', 'data': ''})
        await self.rest.request('POST', '/logs', data)
        data.update({'name':'stderr', 'data': ''})
        await self.rest.request('POST', '/logs', data)

    async def _upload_logfile(self, name, filename, task_id=None, dataset_id=None):
        """Upload a log file"""
        data = {'name': name}
        if task_id:
            data['task_id'] = task_id
        if dataset_id:
            data['dataset_id'] = dataset_id
        try:
            with open(filename) as f:
                data['data'] = f.read()
        except Exception as e:
            data['data'] = str(e)
        await self.rest.request('POST', '/logs', data)

    async def uploadLog(self, **kwargs):
        """Upload log file"""
        logging.getLogger().handlers[0].flush()
        await self._upload_logfile('stdlog', os.path.abspath(constants['stdlog']), **kwargs)

    async def uploadErr(self, filename=None, **kwargs):
        """Upload stderr file"""
        if not filename:
            sys.stderr.flush()
            filename = os.path.abspath(constants['stderr'])
        await self._upload_logfile('stderr', filename, **kwargs)

    async def uploadOut(self, filename=None, **kwargs):
        """Upload stdout file"""
        if not filename:
            sys.stdout.flush()
            filename = os.path.abspath(constants['stdout'])
        await self._upload_logfile('stdout', filename, **kwargs)

    async def create_pilot(self, **kwargs):
        """
        Create an entry in the pilot table.

        Args:
            **kwargs: passed through to rest function
        Returns:
            str: pilot id
        """
        ret = await self.rest.request('POST', '/pilots', kwargs)
        return ret['result']

    async def update_pilot(self, pilot_id, **kwargs):
        """
        Update the pilot table.

        Args:
            pilot_id (str): pilot id
            **kwargs: passed through to rest function
        """
        await self.rest.request('PATCH', '/pilots/{}'.format(pilot_id), kwargs)

    async def delete_pilot(self, pilot_id, **kwargs):
        """
        Delete the pilot.

        Args:
            pilot_id (str): pilot id
        """
        await self.rest.request('DELETE', '/pilots/{}'.format(pilot_id))


    # --- synchronous versions to be used from a signal handler
    # --- or other non-async code

    def task_kill_sync(self, task_id, dataset_id=None, resources=None, reason=None, message=None):
        """
        Tell the server that we killed a task (synchronous version).

        Args:
            task_id (str): task_id of task
            dataset_id (str): (optional) dataset_id of task
            resources (dict): (optional) used resources
            reason (str): (optional) short summary for kill
            message (str): (optional) long message to replace log upload
        """
        if not reason:
            reason = 'killed'
        if not message:
            message = reason
        try:
            hostname = functions.gethostname()
            domain = '.'.join(hostname.split('.')[-2:])
            iceprod_stats = {
                'task_id': task_id,
                'hostname': hostname,
                'domain': domain,
                'time': datetime.utcnow().isoformat(),
                'error_summary': reason if reason else '',
            }
            if dataset_id:
                iceprod_stats['dataset_id'] = dataset_id
            if resources:
                iceprod_stats['resources'] = resources
        except Exception:
            logging.warning('failed to collect error info', exc_info=True)
            iceprod_stats = {}
        try:
            self.rest.request_seq('POST', '/tasks/{}/task_stats'.format(task_id),
                                  iceprod_stats)
        except Exception:
            logging.warning('failed to post task_stats for %r', task_id, exc_info=True)

        data = {}
        if resources and 'time' in resources and resources['time']:
            data['time_used'] =  resources['time']*3600.
        if resources:
            data['resources'] = resources
        if reason:
            data['reason'] = reason
        else:
            data['data'] = 'task killed'
        self.rest.request_seq('POST', '/tasks/{}/task_actions/reset'.format(task_id), data)

        data = {'name': 'stdlog', 'task_id': task_id}
        if dataset_id:
            data['dataset_id'] = dataset_id
        if message:
            data['data'] = message
        elif reason:
            data['data'] = reason
        else:
            data['data'] = 'task killed'
        self.rest.request_seq('POST', '/logs', data)
        data.update({'name':'stdout', 'data': ''})
        self.rest.request_seq('POST', '/logs', data)
        data.update({'name':'stderr', 'data': ''})
        self.rest.request_seq('POST', '/logs', data)

    def update_pilot_sync(self, pilot_id, **kwargs):
        """
        Update the pilot table (synchronous version).

        Args:
            pilot_id (str): pilot id
            **kwargs: passed through to rpc function
        """
        self.rest.request_seq('PATCH', '/pilots/{}'.format(pilot_id), kwargs)

    def delete_pilot_sync(self, pilot_id, **kwargs):
        """
        Delete the pilot (synchronous version).

        Args:
            pilot_id (str): pilot id
        """
        self.rest.request_seq('DELETE', '/pilots/{}'.format(pilot_id))
Пример #25
0
    def test_20_file(self):
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        metadata = {
            u'logical_name': u'blah',
            u'checksum': {u'sha512':hex('foo bar')},
            u'file_size': 1,
            u'locations': [{u'site':u'test',u'path':u'blah.dat'}]
        }
        data = r.request_seq('POST', '/api/files', metadata)

        url = data['file']

        data = r.request_seq('GET', url)
        data.pop('_links')
        data.pop('meta_modify_date')
        data.pop('uuid')
        self.assertDictEqual(metadata, data)

        metadata['test'] = 100

        metadata_cpy = metadata.copy()
        metadata_cpy['uuid'] = 'something else'
        with self.assertRaises(Exception):
            data = r.request_seq('PUT', url, metadata_cpy)

        data = r.request_seq('PUT', url, metadata)
        data.pop('_links')
        data.pop('meta_modify_date')
        data.pop('uuid')
        self.assertDictEqual(metadata, data)

        data = r.request_seq('GET', url)
        data.pop('_links')
        data.pop('meta_modify_date')
        data.pop('uuid')
        self.assertDictEqual(metadata, data)

        metadata['test2'] = 200
        data = r.request_seq('PATCH', url, {'test2':200})
        data.pop('_links')
        data.pop('meta_modify_date')
        data.pop('uuid')
        self.assertDictEqual(metadata, data)

        data = r.request_seq('GET', url)
        data.pop('_links')
        data.pop('meta_modify_date')
        data.pop('uuid')
        self.assertDictEqual(metadata, data)

        data = r.request_seq('DELETE', url)

        # second delete should raise error
        with self.assertRaises(Exception):
            data = r.request_seq('DELETE', url)

        with self.assertRaises(Exception):
            data = r.request_seq('POST', url)
Пример #26
0
    def test_40_simple_query(self):
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        metadata = {
            u'logical_name': u'blah',
            u'checksum': {u'sha512':hex('foo bar')},
            u'file_size': 1,
            u'locations': [{u'site':u'test',u'path':u'blah.dat'}],
            u'processing_level':u'level2',
            u'run_number':12345,
            u'first_event':345,
            u'last_event':456,
            u'iceprod':{
                u'dataset':23453,
            },
            u'offline':{
                u'season':2017,
            },
        }
        metadata2 = {
            u'logical_name': u'blah2',
            u'checksum': {u'sha512':hex('foo bar baz')},
            u'file_size': 2,
            u'locations': [{u'site':u'test',u'path':u'blah2.dat'}],
            u'processing_level':u'level2',
            r'run_number':12356,
            u'first_event':578,
            u'last_event':698,
            u'iceprod':{
                u'dataset':23454,
            },
            u'offline':{
                u'season':2017,
            },
        }
        data = r.request_seq('POST', '/api/files', metadata)
        url = data['file']
        uid = url.split('/')[-1]
        data = r.request_seq('POST', '/api/files', metadata2)
        url2 = data['file']
        uid2 = url2.split('/')[-1]

        data = r.request_seq('GET', '/api/files')
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 2)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))
        self.assertTrue(any(uid2 == f['uuid'] for f in data['files']))

        data = r.request_seq('GET', '/api/files', {'processing_level':'level2'})
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 2)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))
        self.assertTrue(any(uid2 == f['uuid'] for f in data['files']))

        data = r.request_seq('GET', '/api/files', {'run_number':12345})
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 1)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))
        self.assertFalse(any(uid2 == f['uuid'] for f in data['files']))

        data = r.request_seq('GET', '/api/files', {'dataset':23454})
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 1)
        self.assertFalse(any(uid == f['uuid'] for f in data['files']))
        self.assertTrue(any(uid2 == f['uuid'] for f in data['files']))

        data = r.request_seq('GET', '/api/files', {'event_id':400})
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 1)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))
        self.assertFalse(any(uid2 == f['uuid'] for f in data['files']))

        data = r.request_seq('GET', '/api/files', {'season':2017})
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 2)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))
        self.assertTrue(any(uid2 == f['uuid'] for f in data['files']))

        data = r.request_seq('GET', '/api/files', {'event_id':400, 'keys':'|'.join(['checksum','file_size','uuid'])})
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 1)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))
        self.assertFalse(any(uid2 == f['uuid'] for f in data['files']))
        self.assertIn('checksum', data['files'][0])
        self.assertIn('file_size', data['files'][0])