def dump_to_odc(data_stream, dc: Datacube, products: list, **kwargs) -> Tuple[int, int]: # TODO: Get right combination of flags for **kwargs in low validation/no-lineage mode expand_stream = ((d.url, d.data) for d in data_stream if d.data is not None) ds_stream = from_yaml_doc_stream(expand_stream, dc.index, products=products, **kwargs) ds_added = 0 ds_failed = 0 # Consume chained streams to DB for result in ds_stream: ds, err = result if err is not None: logging.error(err) ds_failed += 1 else: logging.info(ds) # TODO: Potentially wrap this in transactions and batch to DB # TODO: Capture UUID's from YAML and perform a bulk has try: dc.index.datasets.add(ds) ds_added += 1 except Exception as e: logging.error(e) ds_failed += 1 return ds_added, ds_failed
def dump_list_to_odc( account_url, container_name, yaml_content_list: List[Tuple[bytes, str, str]], dc: Datacube, products: List[str], **kwargs, ): expand_stream = ((account_url + "/" + container_name + "/" + d[1][:d[1].rfind("/") + 1], d[0]) for d in yaml_content_list if d[0] is not None) ds_stream = from_yaml_doc_stream(expand_stream, dc.index, products=products, **kwargs) ds_added = 0 ds_failed = 0 # Consume chained streams to DB for result in ds_stream: ds, err = result if err is not None: logging.error(err) ds_failed += 1 else: logging.info(ds) try: dc.index.datasets.add(ds) ds_added += 1 except Exception as e: logging.error(e) ds_failed += 1 return ds_added, ds_failed
def dump_list_to_odc( yaml_content_list: List[Tuple[bytes, str, str]], dc: Datacube, products: List[str], **kwargs, ): expand_stream = (("https://" + d[1], d[0]) for d in yaml_content_list if d[0] is not None) ds_stream = from_yaml_doc_stream(expand_stream, dc.index, transform=None, products=products, **kwargs) ds_added = 0 ds_failed = 0 # Consume chained streams to DB for result in ds_stream: ds, err = result if err is not None: logging.error(err) ds_failed += 1 else: logging.info(ds) # TODO: Potentially wrap this in transactions and batch to DB # TODO: Capture UUID's from YAML and perform a bulk has try: dc.index.datasets.add(ds) ds_added += 1 except Exception as e: logging.error(e) ds_failed += 1 return ds_added, ds_failed
def from_tar_file(tarfname, index, mk_uri, mode, doc_transform=None, **kwargs): """ returns a sequence of tuples where each tuple is either (ds, None) or (None, error_message) """ def untar(tarfname, mk_uri): for doc_name, doc in tar_doc_stream(tarfname, mode=mode): yield mk_uri(doc_name), doc return from_yaml_doc_stream(untar(tarfname, mk_uri), index, transform=doc_transform, **kwargs)
def dump_to_odc( data_stream, dc: Datacube, products: list, transform=None, update=False, allow_unsafe=False, **kwargs, ) -> Tuple[int, int]: # TODO: Get right combination of flags for **kwargs in low validation/no-lineage mode expand_stream = ((d.url, d.data) for d in data_stream if d.data is not None) ds_stream = from_yaml_doc_stream(expand_stream, dc.index, products=products, transform=transform, **kwargs) ds_added = 0 ds_failed = 0 # Consume chained streams to DB for result in ds_stream: ds, err = result if err is not None: logging.error(err) ds_failed += 1 else: logging.info(ds) # TODO: Potentially wrap this in transactions and batch to DB # TODO: Capture UUID's from dataset doc and perform a bulk has try: if update: updates = {} if allow_unsafe: updates = {tuple(): changes.allow_any} dc.index.datasets.update(ds, updates_allowed=updates) else: dc.index.datasets.add(ds) ds_added += 1 except Exception as e: logging.error(e) ds_failed += 1 return ds_added, ds_failed