Exemplo n.º 1
0
def import_from_file_s2(in_csv, block_size):
    import pprint
    n, m = (0, block_size)
    api = Api()
    for n in range(0, count_lines(in_csv), m):
        print 'Range: <%d:%d>' % (n, n + m)
        datasets = sentinel_harvester(in_csv, n, m)
        out = api.create_dataset(datasets)
        pprint.pprint(out)
Exemplo n.º 2
0
def import_from_sentinel_catalog(sensor, start_date, api_url):
    import numpy
    api = Api(api_url)

    max_cloud_ratio = 1.0
    ag_season_start = dateutil.parser.parse(start_date)
    ag_season_end = ag_season_start + datetime.timedelta(days=1)

    for lon in numpy.arange(-180, 180, 9):
        for lat in numpy.arange(-90, 90, 9):
            lon_end = lon + 9
            lat_end = lat + 9

            aoi_se = (lon_end, lat)
            aoi_nw = (lon, lat_end)
            aoi_ne = (aoi_se[0], aoi_nw[1])
            aoi_sw = (aoi_nw[0], aoi_se[1])
            aoi = [aoi_nw, aoi_ne, aoi_se, aoi_sw, aoi_nw]

            cat = SentinelCatalog()
            datasets = cat.find(sensor, aoi, ag_season_start, ag_season_end,
                                max_cloud_ratio)

            if datasets != None:
                ds_found = list()
                ds_missing = list()
                for counter, ds in enumerate(datasets):
                    catalog_ds = api.get_dataset(ds.entity_id)
                    if catalog_ds is None or len(catalog_ds) == 0:
                        ds_missing.append(ds)
                    elif len(catalog_ds) == 1:
                        ds_found.append(catalog_ds)
                    else:
                        print 'More in catalog found: %s (%d)' % (
                            ds.entity_id, len(catalog_ds))
                    if (counter % 25) == 0:
                        print counter, len(datasets)
                print 'already registered: ', len(ds_found), len(datasets)
                print 'missing: ', len(ds_missing), len(datasets)

                for counter, ds_obj in enumerate(ds_missing):
                    new_ds = api.create_dataset(ds_obj)
                    if not new_ds is None:
                        print new_ds
                    if (counter % 25) == 0:
                        print counter, len(ds_missing)
            else:
                print 'No data found in catalog for %s from %s to %s' % (
                    sensor, ag_season_start.strftime("%Y-%m-%d"),
                    ag_season_end.strftime("%Y-%m-%d"))
Exemplo n.º 3
0
class ApiTest(unittest.TestCase):
    """Tests ConfigManager and local/remote config access."""
    def setUp(self):
        self.api = Api()  # url='http://api.eoss.cloud'

    def testCreateConfig(self):
        """
        Create simple config object from string
        """

        ds = self.api.get_dataset('LC81920272016240LGN00')
        self.assertEqual(len(ds), 1)

        ds = self.api.get_dataset('LE71010172003151EDC00')
        self.assertEqual(len(ds), 1)

        ds = self.api.get_dataset(
            'S2A_OPER_PRD_MSIL1C_PDMC_20160806T202847_R142_V20160805T192909_20160805T192909'
        )
        self.assertEqual(len(ds), 21)

    def testCatalogSearch(self):
        aoi_nw = (-91.5175095, 16.8333384)
        aoi_se = (-91.3617268, 16.8135385)
        aoi_ne = (aoi_se[0], aoi_nw[1])
        aoi_sw = (aoi_nw[0], aoi_se[1])
        aoi = [aoi_nw, aoi_ne, aoi_se, aoi_sw, aoi_nw]

        # Object representation
        results = self.api.search_dataset(aoi,
                                          100,
                                          parse('2015-01-01'),
                                          parse('2015-03-01'),
                                          'landsat8',
                                          full_objects=True)
        self.assertEqual(len(results), 3)
        for item in results:
            self.assertTrue(type(item).__name__ == 'Catalog_Dataset')

        results = self.api.search_dataset(aoi,
                                          100,
                                          parse('2015-01-01'),
                                          parse('2015-03-01'),
                                          'landsat8',
                                          full_objects=False)
        # JSON representation
        self.assertEqual(len(results), 3)
        for item in results:
            self.assertTrue(type(item) == dict)
def import_from_pipe_ls(lines):
    datasets = landsat_harvester_line(lines)
    api = Api()
    skipped = list()
    registered = list()

    for c, ds in enumerate(datasets):
        try:
            out = api.create_dataset(ds)
            if not 'already' in str(out):
                registered.append(c)
            else:
                skipped.append(c)
        except Exception, e:
            print e
def import_from_landsat_catalog(sensor, start_date, api_url):
    api = Api(api_url)

    max_cloud_ratio = 1.0
    ag_season_start = dateutil.parser.parse(start_date)
    ag_season_end = ag_season_start + datetime.timedelta(days=1)
    aoi_se = (180, -90)
    aoi_nw = (-180, 90)
    aoi_ne = (aoi_se[0], aoi_nw[1])
    aoi_sw = (aoi_nw[0], aoi_se[1])
    aoi = [aoi_nw, aoi_ne, aoi_se, aoi_sw, aoi_nw]

    cat = USGSCatalog()
    # "LANDSAT_8", "LANDSAT_ETM_SLC_OFF", "LANDSAT_ETM"
    datasets = cat.find(sensor, aoi, ag_season_start, ag_season_end,
                        max_cloud_ratio)

    if datasets != None:
        ds_found = list()
        ds_missing = list()
        for counter, ds in enumerate(datasets):
            catalog_ds = api.get_dataset(ds.entity_id)
            if catalog_ds is None or len(catalog_ds) == 0:
                ds_missing.append(ds)
            elif len(catalog_ds) == 1:
                ds_found.append(catalog_ds)
            else:
                print 'More in catalog found: %s (%d)' % (ds.entity_id,
                                                          len(catalog_ds))
            if (counter % 25) == 0:
                print counter, len(datasets)
        print 'already registered: ', len(ds_found), len(datasets)
        print 'missing: ', len(ds_missing), len(datasets)

        for counter, ds_obj in enumerate(ds_missing):
            new_ds = api.create_dataset(ds_obj)
            if not new_ds is None:
                print new_ds
            if (counter % 25) == 0:
                print counter, len(ds_missing)
    else:
        print 'No data found in catalog for sentinel from %s to %s' % (
            ag_season_start.strftime("%Y-%m-%d"),
            ag_season_end.strftime("%Y-%m-%d"))
def import_from_file_ls(in_csv):
    datasets = landsat_harvester(in_csv)

    api = Api()
    skipped = list()
    registered = list()

    for c, ds in enumerate(datasets):
        try:
            out = api.create_dataset(ds)

            if not 'title' in str(out):
                registered.append(c)
            else:
                skipped.append(c)
        except Exception, e:
            print e
        if c % 100 == 0:
            print c
            print 'skipped:', skipped
            print 'registered:', registered
            skipped = list()
            registered = list()
Exemplo n.º 7
0
def update_catalog(queue_name, api_endpoint):
    api = Api(api_endpoint)
    sqs = boto3.resource('sqs')

    if queue_name not in get_all_queues():
        raise Exception('Queue %s does not exist in %s' %
                        (queue_name, get_all_queues()))
    queue = sqs.get_queue_by_name(QueueName=queue_name)
    should_break = False
    counter = 1

    while not should_break:
        if int(queue.attributes.get('ApproximateNumberOfMessages')) == 0:
            time_interval = 60
        else:
            time_interval = 2
        logger.info('Getting messages from SQS: %s (%d sec. interval)' %
                    (queue_name, time_interval))

        messages_to_delete = list()
        for message_obj in queue.receive_messages(
                MaxNumberOfMessages=MAX_MESSAGES,
                WaitTimeSeconds=10,
                VisibilityTimeout=60,
        ):
            messages_to_delete = list()
            notification = ujson.loads(message_obj.body)
            message = ujson.loads(notification[u'Message'])

            if get_message_type(message) == 'landsat':
                for rec in message[u'Records']:
                    s3 = extract_s3_structure(rec)
                    s3['metadata'] = os.path.join(s3['s3_http'], s3['s3_path'],
                                                  s3['entity_id'] + '_MTL.txt')
                    s3['metadata_json'] = os.path.join(
                        s3['s3_http'], s3['s3_path'],
                        s3['entity_id'] + '_MTL.json')
                    s3['quicklook'] = os.path.join(
                        s3['s3_http'], s3['s3_path'],
                        s3['entity_id'] + '_thumb_large.jpg')
                    req = requests.get(s3['metadata_json'])

                    try:
                        obj = parse_l1_metadata_file(req.json(), s3)
                        new_ds = api.create_dataset(obj)
                        if not new_ds is None:
                            print new_ds
                        counter += 1
                        messages_to_delete.append({
                            'Id':
                            message_obj.message_id,
                            'ReceiptHandle':
                            message_obj.receipt_handle
                        })
                    except ValueError:
                        logger.exception(
                            'ERROR: metadata location structure corrupted',
                            extra={'metadata_response': req.text})
                    except Exception, e:
                        logging.exception('General Error ooccured',
                                          extra={'request_url': req.url})
                        should_break = True
                    finally:
                        if len(messages_to_delete) > 0:
                            messages_to_delete = remove_messages_from_queue(
                                queue, messages_to_delete)
Exemplo n.º 8
0
 def setUp(self):
     self.api = Api()  # url='http://api.eoss.cloud'
Exemplo n.º 9
0
def import_from_pipe_s2(lines):
    import pprint
    api = Api()
    datasets = sentinel_harvester_line(lines)
    out = api.create_dataset(datasets)
    pprint.pprint(out)
def main(sensor, start_date, days, api_endpoint):
    api = Api(api_endpoint)
    logger.info('Checking consistencty for %s between %s + %s' % (sensor, start_date, days))

    aoi_nw = (-180, 90)
    aoi_se = (180, -90)
    aoi_ne = (aoi_se[0], aoi_nw[1])
    aoi_sw = (aoi_nw[0], aoi_se[1])
    aoi = [aoi_nw, aoi_ne, aoi_se, aoi_sw, aoi_nw]
    wrong_urls = list()

    for delta_day in range(1, days):
        start_time = time.time()
        start_date_date = parse(start_date)+ datetime.timedelta(days=delta_day)
        end_date_date = start_date_date + datetime.timedelta(days=1)
        logger.info('Checking consistencty for %s between %s and %s' % (sensor, start_date_date.isoformat(), end_date_date.isoformat()))

        # Object representation
        results = api.search_dataset(aoi, 100, start_date_date, end_date_date, sensor, full_objects=False)

        url_resources = list()
        missing_urls = list()
        missing_types = list()

        for r in results:
            if r['resources']['s3public']['zip'] !=  None:
                url_resources.append(r['resources']['s3public']['zip'])
            else:
                missing_urls.append('%s:%s' % (r['tile_identifier'], r['entity_id']))
                missing_types.append('zip')
            if r['resources']['metadata']!=  None:
                url_resources.append(r['resources']['metadata'])
            else:
                missing_urls.append('%s:%s' % (r['tile_identifier'], r['entity_id']))
                missing_types.append('metadata')
            if r['resources']['quicklook'] != None:
                url_resources.append(r['resources']['quicklook'])
            else:
                missing_urls.append('%s:%s' % (r['tile_identifier'], r['entity_id']))
                missing_types.append('quicklook')


        logger.info('total scans: %d' %len(url_resources))
        logger.info('already missed resources: %d' %len(missing_urls))

        if False:
            for counter, res in enumerate(url_resources):
                req = requests.head(res)
                if req.status_code != requests.codes.ok:
                    print res, req.status_code
                    missing_urls.append(res)
                print res
                if (counter % 25) == 0:
                    print counter
        else:
            counter = 0
            for url_parts in chunks(url_resources, 500):
                counter+=1
                rs = (grequests.head(u) for u in url_parts)
                res = grequests.map(rs)
                for req in res:
                    if req is not None:
                        if req.status_code != requests.codes.ok:
                            wrong_urls.append(res)
                            missing_types.append('zip_registered')
                    else:
                        print req.url, req

        if len(wrong_urls) > 0:
            for item in wrong_urls:
                print item
                for req in item:
                    if req.status_code != requests.codes.ok:
                        append_data('/tmp/wrong_urls.txt', req.url)
        if len(missing_urls) > 0:
            append_data('/tmp/missing_urls.txt', missing_urls)

        if len(missing_types) > 0:
            for type in ['zip_registered', 'quicklook', 'metadata', 'zip']:
                logger.info('%d:%s' % (operator.countOf(missing_types, type), type))

        logger.info('wrong resources resources: %d' % len(wrong_urls))
        logger.info('Executed in %f secs.' % (time.time()-start_time))
    print 'Wrong URLs:', wrong_urls