Exemplo n.º 1
0
 def test_mock_from_point_query(self):
     """
     Checks the real PAIRS point query service against the mock used.
     """
     # get real data
     self.pairsServerMock.stop()
     testPointQueryRasterReal = paw.PAIRSQuery(
         json.load(
             open(
                 os.path.join(TEST_DATA_DIR,
                              'point-data-sample-request-raster.json'))),
         'https://' + PAIRS_SERVER,
         auth=PAIRS_CREDENTIALS,
         baseURI=PAIRS_BASE_URI,
     )
     testPointQueryVectorReal = paw.PAIRSQuery(
         json.load(
             open(
                 os.path.join(TEST_DATA_DIR,
                              'point-data-sample-request-vector.json'))),
         'https://' + PAIRS_SERVER,
         auth=PAIRS_CREDENTIALS,
         baseURI=PAIRS_BASE_URI,
     )
     self.pairsServerMock.start()
     # get mock data
     testPointQueryRasterMock = paw.PAIRSQuery(
         json.load(
             open(
                 os.path.join(TEST_DATA_DIR,
                              'point-data-sample-request-raster.json'))),
         'https://' + PAIRS_SERVER,
         auth=PAIRS_CREDENTIALS,
         baseURI=PAIRS_BASE_URI,
     )
     testPointQueryVectorMock = paw.PAIRSQuery(
         json.load(
             open(
                 os.path.join(TEST_DATA_DIR,
                              'point-data-sample-request-vector.json'))),
         'https://' + PAIRS_SERVER,
         auth=PAIRS_CREDENTIALS,
         baseURI=PAIRS_BASE_URI,
     )
     # compare data entry keys
     self.assertListEqual(
         sorted(
             testPointQueryRasterReal.querySubmit.json()['data'][0].keys()),
         sorted(
             testPointQueryRasterMock.querySubmit.json()['data'][0].keys()),
     )
     self.assertListEqual(
         sorted(
             testPointQueryVectorReal.querySubmit.json()['data'][0].keys()),
         sorted(
             testPointQueryVectorMock.querySubmit.json()['data'][0].keys()),
     )
Exemplo n.º 2
0
 def TO_BE_IMPLEMENTED_test_dataframe_generation(self):
     """
     Tests functions that massage the received data to the *unified* PAW dataframe.
     """
     # query mocked data
     logging.info(
         "TEST: Generation of unified PAW dataframe for raster data.")
     testRasterQuery = paw.PAIRSQuery(
         json.load(
             open(
                 os.path.join(TEST_DATA_DIR,
                              'raster-data-sample-request.json'))),
         'https://' + PAIRS_SERVER,
         auth=PAIRS_CREDENTIALS,
         baseURI=PAIRS_BASE_URI,
     )
     testRasterQuery.submit()
     testRasterQuery.poll_till_finished(printStatus=True)
     testRasterQuery.download()
     # create dataframe from ratser data
     testRasterQuery.create_dataframe()
     # check that the dataset and datalayer column names have been added
     self.assertIn(
         'layerName', testRasterQuery.dataframe[list(
             testRasterQuery.metadata.keys())[0]].columns)
def create_pairs_query(query, config):
    query_dict = create_pairs_query_dict(query)

    print(query_dict)

    pairs_query = paw.PAIRSQuery(query_dict,
                                 config['server'],
                                 (config['username'], config['password']),
                                 downloadDir=config['download_dir'])

    return pairs_query
Exemplo n.º 4
0
 def test_from_point_query_vector(self):
     """
     Test querying vector point data.
     """
     # query mocked data
     logging.info("TEST: Query (mocked) point data.")
     # define point query
     testPointQuery = paw.PAIRSQuery(
         json.load(
             open(
                 os.path.join(TEST_DATA_DIR,
                              'point-data-sample-request-vector.json'))),
         'https://' + PAIRS_SERVER,
         auth=PAIRS_CREDENTIALS,
         baseURI=PAIRS_BASE_URI,
     )
     # submit point query
     testPointQuery.submit()
     # for complience with general PAW query scheme, perform fake poll and download
     testPointQuery.poll_till_finished()
     testPointQuery.download()
     testPointQuery.create_layers()
     # check vector data frame
     ## number of data points is correct
     logging.info("TEST: Perform vector data frame tests.")
     self.assertEqual(2, len(testPointQuery.vdf))
     ## column names agree with data response
     self.assertListEqual(
         sorted(list(testPointQuery.querySubmit.json()['data'][0].keys())),
         sorted(testPointQuery.vdf.columns),
     )
     ## check (some) data types from response
     self.assertIsInstance(
         testPointQuery.vdf.timestamp[0],
         datetime.datetime,
     )
     self.assertIsInstance(
         testPointQuery.vdf.value[0],
         string_type,
     )
     # check property string column splitting
     colsBeforeSplit = len(testPointQuery.vdf.columns)
     testPointQuery.split_property_string_column()
     colsAfterSplit = len(testPointQuery.vdf.columns)
     if paw.PROPERTY_STRING_COL_NAME_POINT in testPointQuery.vdf.columns:
         self.assertLess(colsBeforeSplit, colsAfterSplit)
     else:
         self.assertEqual(colsBeforeSplit, colsAfterSplit)
     # run twice to double-check it is not increasing the number of columns
     testPointQuery.split_property_string_column()
     colsAfter2ndSplit = len(testPointQuery.vdf.columns)
     self.assertEqual(colsAfterSplit, colsAfter2ndSplit)
Exemplo n.º 5
0
 def test_from_point_query_raster(self):
     """
     Test querying raster point data.
     """
     # query mocked data
     logging.info("TEST: Query (mocked) point data.")
     # define point query
     testPointQuery = paw.PAIRSQuery(
         json.load(
             open(
                 os.path.join(TEST_DATA_DIR,
                              'point-data-sample-request-raster.json'))),
         'https://' + PAIRS_SERVER,
         auth=PAIRS_CREDENTIALS,
         baseURI=PAIRS_BASE_URI,
     )
     # submit point query
     testPointQuery.submit()
     # for complience with general PAW query scheme, perform fake poll and download
     testPointQuery.poll_till_finished()
     testPointQuery.download()
     testPointQuery.create_layers()
     # try to split property string column (although having no effect, it should run through)
     colsBeforeSplit = len(testPointQuery.vdf.columns)
     testPointQuery.split_property_string_column()
     colsAfterSplit = len(testPointQuery.vdf.columns)
     self.assertEqual(colsBeforeSplit, colsAfterSplit)
     # check vector data frame
     ## number of data points is correct
     logging.info("TEST: Perform vector data frame tests.")
     self.assertEqual(2, len(testPointQuery.vdf))
     ## column names agree with data response
     self.assertListEqual(
         sorted(
             list(testPointQuery.querySubmit.json()['data'][0].keys()) \
           + [paw.PAIRS_VECTOR_GEOMETRY_COLUMN_NAME]
         ),
         sorted(testPointQuery.vdf.columns),
     )
     ## check (some) data types from response
     self.assertIsInstance(
         testPointQuery.vdf.longitude[0],
         float,
     )
     self.assertIsInstance(
         testPointQuery.vdf.timestamp[0],
         datetime.datetime,
     )
     self.assertIsInstance(
         testPointQuery.vdf.value[0],
         string_type,
     )
Exemplo n.º 6
0
def query_local(layerID):
    coronaQueryDef = {
        "layers": [
            {"id": layerID},
        ],
        "spatial": {
            "type" :        "square",
            "coordinates" : [-89, -179, 89, 179], 
        },
        "temporal": {
            "intervals": [
                {
                    "start": "2019-03-01T00:00:00Z",
                    "end": "2030-03-10T23:59:59Z"
                }
            ]
        },
        "outputType": "csv"
    }

    # create PAIRS query instance
    coronaQuery = paw.PAIRSQuery(
        coronaQueryDef,
        pairsHost = 'https://'+PAIRS_SERVER,
        auth = PAIRS_CREDENTIALS,
        baseURI = BASE_URI,
        inMemory    = True,
    )
    # submit and download modified query
    coronaQuery.submit()
    coronaQuery.poll_till_finished(printStatus=True)
    coronaQuery.download()
    coronaQuery.create_layers()

    # associate vector data frame, and show the vector data
    coronaQuery.vdf = coronaQuery.data[list(coronaQuery.metadata.keys())[0]]

    # split property string into individual columns
    #coronaQuery.split_property_string_column()

    new = coronaQuery.vdf['Region'].str.replace(':', '.').str.split('.', expand=True)
    coronaQuery.vdf['pairs_id'] = new[0]
    coronaQuery.vdf['State'] = new[1]
    coronaQuery.vdf['County'] = new[2]
    
    return coronaQuery
Exemplo n.º 7
0
    def __init__(self,
                 queryList,
                 auth=None,
                 downloadDir='./downloads',
                 overwriteExisting=False,
                 maxConcurrent=2,
                 logEverySeconds=30):

        if maxConcurrent > MAX_CONCURRENT:
            raise Exception('Maximum value for maxConcurrent is {}.'.format(
                MAX_CONCURRENT))

        self.maxConcurrent = maxConcurrent
        self.logEverySeconds = logEverySeconds

        self.queries = {
            'queued': deque(),
            'running': deque(),
            'completed': deque(),
            'failed': deque()
        }
        for q in queryList:
            if isinstance(q, paw.PAIRSQuery):
                if q.querySubmit is None:
                    self.queries['queued'].append(q)
                elif q.queryStatus is None:
                    self.queries['running'].append(q)
                elif q.queryStatus.json()['statusCode'] < 20:
                    self.queries['running'].append(q)
                elif q.queryStatus.json()['statusCode'] == 20:
                    self.queries['completed'].append(q)
                elif q.queryStatus.json()['statusCode'] > 20:
                    self.queries['failed'].append(q)
                else:
                    raise Exception(
                        'Cannot determine status of PAIRSQuery object.')
            else:
                self.queries['queued'].append(
                    paw.PAIRSQuery(q,
                                   auth=auth,
                                   downloadDir=downloadDir,
                                   overwriteExisting=overwriteExisting))
Exemplo n.º 8
0
 def test_dataframe_generation(self):
     """
     Tests functions that massage the received data to the *unified* PAW dataframe.
     """
     # query mocked data
     logging.info(
         "TEST: Generation of unified PAW dataframe for point data.")
     testPointQuery = paw.PAIRSQuery(
         json.load(
             open(
                 os.path.join(TEST_DATA_DIR,
                              'point-data-sample-request-raster.json'))),
         'https://' + PAIRS_SERVER,
         auth=PAIRS_CREDENTIALS,
         baseURI=PAIRS_BASE_URI,
     )
     # submit query
     testPointQuery.submit()
     # set timestamp column
     testPointQuery.set_timestamp_column('timestamp')
     # set point coordinate columns
     testPointQuery.set_lat_lon_columns('latitude', 'longitude', 'geometry')
Exemplo n.º 9
0
def query_PAIRS(query_json, raw_data_path, path_to_credentials='./ibmpairspass.txt'):
  """
  Sends a request to PAIRS server and downloads the images in the area specified
  by coords. The raw images are saved in RAW_DATA_PATH
  """
  with open(path_to_credentials, 'r') as creds:
    creds = creds.read().split(':')
  
  # PAIRS server, and authentication
  pairs_server, user_name, password = creds
  pairs_server = 'https://' + pairs_server
  pairs_auth = (user_name, password)


  # Make request to IBM server for images from area within coordinates
  query = paw.PAIRSQuery(
    query_json,
    pairs_server,
    pairs_auth,
    baseURI='/',
    downloadDir=raw_data_path
  )
  
  # Submit query and wait until downloaded
  query.submit()
  query.poll_till_finished()
  query.download()
  query.create_layers()

  # Sort in reverse to get channels in R, G, B order.
  data_keys = sorted(query.data.keys(), reverse=True)
  images = [query.data[k] for k in data_keys]

  # Delete the zip file.
  zip_file_path = os.path.join(raw_data_path, query.zipFilePath)
  os.remove(zip_file_path)

  return images
Exemplo n.º 10
0
    def __init__(self,
                 queryList,
                 auth=None,
                 downloadDir='./downloads',
                 overwriteExisting=False,
                 maxConcurrent=2,
                 logEverySeconds=30):
        '''
        :param queryList:           list containing a mix of PAIRS query JSONs and paw.PAIRSQuery objects.
                                    For paw.PAIRSQuery objects, only those which have not been submitted
                                    yet will be submitted.
        :type queryList:            list
        :param auth:                user name and password as tuple for access to pairsHost
        :type auth:                 (str, str)
        :param overwriteExisting:   destroy locally cached data, if existing, otherwise grab the latest
                                    locally cached data, `latest` is defined by alphanumerical ordering
                                    of the PAIRS query ID
        :type overwriteExisting:    bool
        :param downloadDir:         directory where to store downloaded data
        :type downloadDir:          str
        :param maxConcurrent:       maximum number of concurrent queries. Note that the maximum number
                                    of concurrent queries might be limited server side for a particular
                                    user. There is no guarantee that a user can submit
        maxConcurrent queries at a given time.
        :type maxConcurrent:        int
        :param logEverySeconds:     time interval at which the class will send status messages to its
                                    logger in seconds (logging.INFO)
        :type logEverySeconds:      int
        '''

        if maxConcurrent > MAX_CONCURRENT:
            raise Exception('Maximum value for maxConcurrent is {}.'.format(
                MAX_CONCURRENT))

        self.maxConcurrent = maxConcurrent
        self.logEverySeconds = logEverySeconds

        self.queries = {
            'queued': deque(),
            'running': deque(),
            'completed': deque(),
            'failed': deque()
        }
        for q in queryList:
            if isinstance(q, paw.PAIRSQuery):
                if q.querySubmit is None:
                    self.queries['queued'].append(q)
                elif q.queryStatus is None:
                    self.queries['running'].append(q)
                elif q.queryStatus.json()['statusCode'] < 20:
                    self.queries['running'].append(q)
                elif q.queryStatus.json()['statusCode'] == 20:
                    self.queries['completed'].append(q)
                elif q.queryStatus.json()['statusCode'] > 20:
                    self.queries['failed'].append(q)
                else:
                    raise Exception(
                        'Cannot determine status of PAIRSQuery object.')
            else:
                self.queries['queued'].append(
                    paw.PAIRSQuery(q,
                                   auth=auth,
                                   downloadDir=downloadDir,
                                   overwriteExisting=overwriteExisting))
Exemplo n.º 11
0
 def test_mock_raster_query(self):
     """
     Checks the real PAIRS raster query service against the mock used.
     """
     # get real data
     # prevent the responses module to complain about unused URL endponts of the mock
     try:
         self.pairsServerMock.stop()
     except Exception as e:
         # catch not all requests called error
         logging.warning(
             'Stopping the mocked PAIRS server caused (potentially irrelevant) trouble: {}'
             .format(e))
     # check query submit
     logging.info("TEST: Perform query to real PAIRS server.")
     subResp = requests.post(
         'https://' + PAIRS_SERVER + PAIRS_BASE_URI + QUERY_ENDPOINT,
         json=json.load(
             open(
                 os.path.join(TEST_DATA_DIR,
                              'raster-data-sample-request.json'))),
         auth=PAIRS_CREDENTIALS,
     ).json()
     self.assertIn('id', subResp.keys())
     self.assertIsInstance(subResp['id'], string_type)
     # check query poll
     while True:
         statResp = requests.get(
             'https://' + PAIRS_SERVER + PAIRS_BASE_URI + STATUS_ENDPOINT +
             subResp['id'],
             auth=PAIRS_CREDENTIALS,
         ).json()
         assert set(['id', 'rtStatus', 'statusCode']) <= set(
             statResp.keys())
         self.assertIsInstance(statResp['statusCode'], int)
         if statResp['statusCode'] >= 20:
             break
     # check query result
     downloadResp = requests.get(
         'https://' + PAIRS_SERVER + PAIRS_BASE_URI + DOWNLOAD_ENDPOINT +
         subResp['id'],
         auth=PAIRS_CREDENTIALS,
         stream=True,
     )
     pairsDataZip = '/tmp/pairs-test-raster-download-{}.zip'.format(
         subResp['id'])
     with open(pairsDataZip, 'wb') as f:
         for chunk in downloadResp.iter_content(chunk_size=1024):
             if chunk:
                 f.write(chunk)
     self.pairsServerMock.start()
     # basic test of real data
     self.assertTrue(zipfile.is_zipfile(pairsDataZip))
     # get mock data
     testRasterQuery = paw.PAIRSQuery(
         json.load(
             open(
                 os.path.join(TEST_DATA_DIR,
                              'raster-data-sample-request.json'))),
         'https://' + PAIRS_SERVER,
         auth=PAIRS_CREDENTIALS,
         baseURI=PAIRS_BASE_URI,
     )
     testRasterQuery.submit()
     testRasterQuery.poll_till_finished(printStatus=True)
     testRasterQuery.download()
     pairsMockZip = testRasterQuery.queryDir + '.zip'
     # make sure that files in mock are available in real download
     # and that the size of the data and the mock are approximately the same
     logging.info(
         "TEST: Check that all files from the mock exist in the real data queried."
     )
     with zipfile.ZipFile(pairsMockZip, 'r') as mock, \
          zipfile.ZipFile(pairsDataZip, 'r') as real:
         # generate info dictionaries
         mockInfo = {f.filename: f.file_size for f in mock.infolist()}
         realInfo = {f.filename: f.file_size for f in real.infolist()}
         # check that files in mock are contained in real data (in terms of names)
         assert set(mockInfo.keys()) <= set(realInfo.keys())
         # check that file sizes are approximately the same
         for key in mockInfo.keys():
             self.assertAlmostEqual(mockInfo[key],
                                    realInfo[key],
                                    delta=self.REL_FILESIZE_DEV *
                                    realInfo[key])
Exemplo n.º 12
0
 def vector_query(self, useLocalZip=False):
     """
     Query vector data in various ways.
     """
     # query mocked data
     logging.info("TEST: Query (mocked) data.")
     testVectorQuery = paw.PAIRSQuery(
         json.load(open(os.path.join(TEST_DATA_DIR,'vector-data-sample-request.json'))) \
         if not useLocalZip else self.PAIRS_VECTOR_ZIP_PATH,
         'https://'+PAIRS_SERVER,
         auth        = PAIRS_CREDENTIALS,
         baseURI     = PAIRS_BASE_URI,
     )
     # check that query got submitted
     testVectorQuery.submit()
     if not useLocalZip:
         self.assertTrue(testVectorQuery.querySubmit.ok)
     # poll and check that data status is finished
     testVectorQuery.poll_till_finished(printStatus=True)
     if not useLocalZip:
         self.assertTrue(testVectorQuery.queryStatus.ok)
     # check that certain files exist
     testVectorQuery.download()
     self.assertTrue(os.path.exists(testVectorQuery.zipFilePath))
     logging.info("TEST: Check files downloaded.")
     with zipfile.ZipFile(testVectorQuery.zipFilePath) as zf:
         pass
         # test the existence of the basic meta file
         # ATTENTION: disabled for now, because it needs to be implemented
         #for fileName in ['output.info', ]:
         #    self.assertTrue(
         #            fileName in zf.namelist()
         #    )
     # load raster meta data
     logging.info("TEST: Load vector meta data.")
     testVectorQuery.list_layers()
     # check that all data are listed as type vector
     self.assertTrue(
         all([
             'vector' == meta['layerType']
             for meta in testVectorQuery.metadata.values()
         ]))
     logging.info("TEST: Create dataframe from raster data.")
     # load the raster data into a NumPy array
     testVectorQuery.create_layers()
     # access the vector dataframe
     for name, meta in testVectorQuery.metadata.items():
         if meta['layerType'] == 'vector':
             self.assertIsInstance(
                 testVectorQuery.data[name],
                 pandas.DataFrame,
             )
             # try to split property string column (if any)
             testVectorQuery.vdf = testVectorQuery.data[name]
             # check property string column splitting
             colsBeforeSplit = len(testVectorQuery.vdf.columns)
             testVectorQuery.split_property_string_column()
             colsAfterSplit = len(testVectorQuery.vdf.columns)
             if paw.PROPERTY_STRING_COL_NAME in testVectorQuery.vdf.columns:
                 self.assertLess(colsBeforeSplit, colsAfterSplit)
             else:
                 self.assertEqual(colsBeforeSplit, colsAfterSplit)
             # run twice to double-check it is not increasing the number of columns
             testVectorQuery.split_property_string_column()
             colsAfter2ndSplit = len(testVectorQuery.vdf.columns)
             self.assertEqual(colsAfterSplit, colsAfter2ndSplit)
     # check that the data acknowledgement statement is not empty
     self.assertIsNotNone(testVectorQuery.dataAcknowledgeText)
Exemplo n.º 13
0
 def raster_aggregation_query(self, useLocalZip=False):
     """
     Query aggregated raster data.
     """
     # query mocked data
     logging.info("TEST: Query (mocked) aggregation data.")
     testRasterAggQuery = paw.PAIRSQuery(
         json.load(open(os.path.join(TEST_DATA_DIR,'aggregation-data-sample-request.json'))) \
         if not useLocalZip else self.PAIRS_AGG_RASTER_ZIP_PATH,
         'https://'+PAIRS_SERVER,
         auth        = PAIRS_CREDENTIALS,
         baseURI     = PAIRS_BASE_URI,
     )
     # check that query got submitted
     testRasterAggQuery.submit()
     if not useLocalZip:
         self.assertTrue(testRasterAggQuery.querySubmit.ok)
     # poll and check that data status is finished
     testRasterAggQuery.poll_till_finished(printStatus=True)
     if not useLocalZip:
         self.assertTrue(testRasterAggQuery.queryStatus.ok)
     # check that certain files exist
     testRasterAggQuery.download()
     self.assertTrue(os.path.exists(testRasterAggQuery.zipFilePath))
     logging.info("TEST: Check files downloaded.")
     with zipfile.ZipFile(testRasterAggQuery.zipFilePath) as zf:
         # test the existence of the basic meta file
         for fileName in [
                 'output.info',
         ]:
             self.assertTrue(fileName in zf.namelist())
         # check that for each aggregated CSV file there exists a corresonding JSON meta file
         for rasterFilePath in zf.namelist():
             # find all PAIRS GeoTiff files
             if rasterFilePath.endswith('.csv'):
                 # check a corresponding JSON file exists
                 self.assertTrue(rasterFilePath + '.json' in zf.namelist())
                 # try to temporarily open the JSON file
                 json.loads(zf.read(rasterFilePath + '.json'))
     # load aggregated raster meta data (which are actually vector-type data!)
     logging.info("TEST: Load aggregated raster meta data.")
     testRasterAggQuery.list_layers()
     # check that 'details' of raster data have been successfully loaded by
     # getting the spatial reference information
     self.assertIsInstance(
         list(testRasterAggQuery.metadata.values())[0]["details"]
         ["spatialRef"], string_type)
     # check that all data are listed as type vector
     self.assertTrue(
         all([
             'vector' == meta['layerType']
             for meta in testRasterAggQuery.metadata.values()
         ]))
     logging.info(
         "TEST: Create Pandas dataframes from aggregated raster data.")
     # load the aggregated raster data as vector data into Pandas dataframes
     testRasterAggQuery.create_layers()
     # access the numpy array
     for name, meta in testRasterAggQuery.metadata.items():
         if meta['layerType'] == 'vector':
             self.assertIsInstance(
                 testRasterAggQuery.data[name],
                 pandas.DataFrame,
             )
     # check that the data acknowledgement statement is not empty
     self.assertIsNotNone(testRasterAggQuery.dataAcknowledgeText)