Exemplo n.º 1
0
def index_connect(local_config: LocalConfig = None,
                  application_name: str = None,
                  validate_connection: bool = True) -> Index:
    """
    Create a Data Cube Index that can connect to a PostgreSQL server

    It contains all the required connection parameters, but doesn't actually
    check that the server is available.

    :param application_name: A short, alphanumeric name to identify this application.
    :param local_config: Config object to use. (optional)
    :param validate_connection: Validate database connection and schema immediately
    :raises datacube.index.Exceptions.IndexSetupError:
    """
    if local_config is None:
        local_config = LocalConfig.find()

    driver_name = local_config.get('index_driver', 'default')
    index_driver = index_driver_by_name(driver_name)
    if not index_driver:
        raise RuntimeError(
            "No index driver found for %r. %s available: %s" %
            (driver_name, len(index_drivers()), ', '.join(index_drivers())))

    return index_driver.connect_to_index(
        local_config,
        application_name=application_name,
        validate_connection=validate_connection)
Exemplo n.º 2
0
def check(local_config: LocalConfig):
    """
    Verify & view current configuration
    """
    def echo_field(name, value):
        echo('{:<15}'.format(name + ':') + style(str(value), bold=True))

    echo_field('Version', datacube.__version__)
    echo_field('Config files', ','.join(local_config.files_loaded))
    echo_field(
        'Host', '{}:{}'.format(local_config['db_hostname'] or 'localhost',
                               local_config.get('db_port', None) or '5432'))

    echo_field('Database', local_config['db_database'])
    echo_field('User', local_config['db_username'])
    echo_field('Environment', local_config['env'])
    echo_field('Index Driver', local_config['index_driver'])

    echo()
    echo('Valid connection:\t', nl=False)
    try:
        index = index_connect(local_config=local_config)
        echo(style('YES', bold=True))
        for role, user, description in index.users.list_users():
            if user == local_config['db_username']:
                echo('You have %s privileges.' %
                     style(role.upper(), bold=True))
    except OperationalError as e:
        handle_exception('Error Connecting to Database: %s', e)
    except IndexSetupError as e:
        handle_exception('Database not initialised: %s', e)
Exemplo n.º 3
0
def test_search_returning(index: Index, local_config: LocalConfig,
                          pseudo_ls8_type: DatasetType,
                          pseudo_ls8_dataset: Dataset,
                          ls5_dataset_w_children) -> None:

    assert index.datasets.count() == 4, "Expected four test datasets"

    # Expect one product with our one dataset.
    results = list(
        index.datasets.search_returning(
            ('id', 'sat_path', 'sat_row'),
            platform='LANDSAT_8',
            instrument='OLI_TIRS',
        ))
    assert len(results) == 1
    id_, path_range, sat_range = results[0]
    assert id_ == pseudo_ls8_dataset.id
    # TODO: output nicer types?
    assert path_range == NumericRange(Decimal('116'), Decimal('116'), '[]')
    assert sat_range == NumericRange(Decimal('74'), Decimal('84'), '[]')

    results = list(
        index.datasets.search_returning(
            (
                'id',
                'metadata_doc',
            ),
            platform='LANDSAT_8',
            instrument='OLI_TIRS',
        ))
    assert len(results) == 1
    id_, document = results[0]
    assert id_ == pseudo_ls8_dataset.id
    assert document == pseudo_ls8_dataset.metadata_doc

    my_username = local_config.get('db_username', DEFAULT_DB_USER)

    # Mixture of document and native fields
    results = list(
        index.datasets.search_returning(
            ('id', 'creation_time', 'format', 'label'),
            platform='LANDSAT_8',
            indexed_by=my_username,
        ))
    assert len(results) == 1

    id_, creation_time, format_, label = results[0]

    assert id_ == pseudo_ls8_dataset.id
    assert format_ == 'PSEUDOMD'

    # It's always UTC in the document
    expected_time = creation_time.astimezone(tz.tzutc()).replace(tzinfo=None)
    assert expected_time.isoformat(
    ) == pseudo_ls8_dataset.metadata_doc['creation_dt']
    assert label == pseudo_ls8_dataset.metadata_doc['ga_label']
Exemplo n.º 4
0
class DatacubeReplicator(object):
    def __init__(self, config):
        self.remote_host = config['remote_host']
        self.remote_user = config['remote_user']
        self.db_password = config['db_password']
        self.remote_dir = config['remote_dir']
        self.local_dir = config['local_dir']
        self.replication_defns = config['replicated_data']

        self.client = None
        self.sftp = None
        self.tunnel = None
        self.remote_dc_config = None
        self.remote_dc = None
        self.local_index = index_connect()

    def run(self):
        self.connect()
        self.read_remote_config()
        self.connect_to_db()
        self.replicate_all()
        self.disconnect()

    def connect(self):
        client = SSHClient()
        client.load_system_host_keys()
        client.set_missing_host_key_policy(WarningPolicy())
        client.connect(hostname=self.remote_host, username=self.remote_user)

        LOG.debug(client)
        self.client = client
        self.sftp = client.open_sftp()

    def disconnect(self):
        self.client.close()
        self.tunnel.stop()

    def read_remote_config(self):
        remote_config = ConfigParser()
        remote_config.read_string(_DEFAULT_CONF)
        with self.sftp.open('.datacube.conf') as fin:
            remote_config.read_file(fin)
        self.remote_dc_config = LocalConfig(remote_config)

    def connect_to_db(self):
        self.tunnel = SSHTunnelForwarder(
            self.remote_host,
            ssh_username=self.remote_user,
            remote_bind_address=(self.remote_dc_config.get(
                'db_hostname',
                '127.0.0.1'), int(self.remote_dc_config.get('db_port', 5432))))
        self.tunnel.start()

        # pylint: disable=protected-access
        self.remote_dc_config._config['datacube']['db_hostname'] = '127.0.0.1'
        self.remote_dc_config._config['datacube']['db_port'] = str(
            self.tunnel.local_bind_port)
        self.remote_dc_config._config['datacube'][
            'db_username'] = self.remote_user
        self.remote_dc_config._config['datacube'][
            'db_password'] = self.db_password

        # This requires the password from somewhere
        # Parsing it out of .pgpass sounds error prone and fragile
        # Lets put it in the configuration for now
        LOG.debug('Remote configuration loaded %s', self.remote_dc_config)

        self.remote_dc = Datacube(config=self.remote_dc_config)

    def replicate_all(self):

        for defn in tqdm(self.replication_defns, 'Replicating products'):
            self.replicate(defn)

    def replicate_all_products(self):
        products = self.remote_dc.index.products.get_all()
        for product in products:
            self.local_index.products.add(product)

    def replicate(self, defn):
        datasets = list(self.remote_dc.find_datasets(**defn))

        if not datasets:
            LOG.info('No remote datasets found matching %s', defn)
            return

        # TODO: use generator not list
        product = datasets[0].type
        LOG.info('Ensuring remote product is in local index. %s', product)

        self.local_index.products.add(product)

        for dataset in tqdm(datasets, 'Datasets'):
            # dataset = remote_dc.index.datasets.get(dataset.id, include_sources=True)
            # We would need to pull the parent products down too
            # TODO: Include parent source datasets + product definitions
            dataset.sources = {}

            LOG.debug('Replicating dataset %s', dataset)
            remote_path = uri_to_path(dataset.local_uri)
            local_path = self.remote_to_local(uri_to_path(dataset.local_uri))

            # Ensure local path exists
            Path(local_path).parent.mkdir(parents=True, exist_ok=True)

            # Download file
            self.sftp.get(remote_path, local_path)

            # Add to local index
            dataset.local_uri = 'file://' + local_path
            self.local_index.datasets.add(dataset)
            LOG.debug('Downloaded to %s', local_path)

    def remote_to_local(self, remote):
        return remote.replace(self.remote_dir, self.local_dir)