Exemplo n.º 1
0
def merge_all_items(source_catalog,
                    target_catalog,
                    move_assets=False,
                    ignore_conflicts=False):
    """Merge all items from source_catalog into target_catalog.

    Calls merge_items on any items that have the same ID between the two catalogs.
    Any items that don't exist in the taret_catalog will be added to the target_catalog.
    If the target_catalog is a Collection, it will be set as the collection of any
    new items.

    Args:
        source_catalog (Catalog or Colletion): The catalog or collection that items
            will be drawn from to merge into the target catalog.
            This catalog is not mutated in this operation.
        target_item (Catalog or Colletion): The target catalog that will be merged into.
            This catalog will not be mutated in this operation.
        move_assets (bool): If true, move the asset files alongside the target item.
        ignore_conflicts (bool): If True, assets with the same keys will not be merged,
            and asset files that would be moved to overwrite an existing file
            will not be moved. If False, either of these situations will throw an error.

    Returns:
        Catalog or Colletion: The target_catalog
    """
    source_items = source_catalog.get_all_items()
    ids_to_items = {item.id: item for item in source_items}

    for item in target_catalog.get_all_items():
        source_item = ids_to_items.get(item.id)
        if source_item is not None:
            merge_items(source_item,
                        item,
                        move_assets=move_assets,
                        ignore_conflicts=ignore_conflicts)
            del ids_to_items[item.id]

    # Process source items that did not match existing target items
    layout_strategy = BestPracticesLayoutStrategy()
    parent_dir = os.path.dirname(target_catalog.get_self_href())
    for item in ids_to_items.values():
        item_copy = item.clone()
        item_copy.set_self_href(
            layout_strategy.get_item_href(item_copy, parent_dir))
        target_catalog.add_item(item_copy)

        if isinstance(target_catalog, pystac.Collection):
            item_copy.set_collection(target_catalog)
        else:
            item_copy.set_collection(None)

        if move_assets:
            do_move_assets(item_copy, copy=False)

    if target_catalog.STAC_OBJECT_TYPE == pystac.STACObjectType.COLLECTION:
        target_catalog.update_extent_from_items()

    return target_catalog
Exemplo n.º 2
0
 def test_produces_fallback_layout_for_catalog(self) -> None:
     fallback = BestPracticesLayoutStrategy()
     strategy = TemplateLayoutStrategy(
         collection_template=self.TEST_COLLECTION_TEMPLATE,
         item_template=self.TEST_ITEM_TEMPLATE,
         fallback_strategy=fallback,
     )
     cat = pystac.Catalog(id="test", description="test desc")
     href = strategy.get_href(cat, parent_dir="http://example.com")
     expected = fallback.get_href(cat, parent_dir="http://example.com")
     self.assertEqual(href, expected)
Exemplo n.º 3
0
 def test_produces_fallback_layout_for_catalog(self) -> None:
     fallback = BestPracticesLayoutStrategy()
     strategy = CustomLayoutStrategy(
         collection_func=self.get_custom_collection_func(),
         item_func=self.get_custom_item_func(),
         fallback_strategy=fallback,
     )
     cat = pystac.Catalog(id="test", description="test desc")
     href = strategy.get_href(cat, parent_dir="http://example.com")
     expected = fallback.get_href(cat, parent_dir="http://example.com")
     self.assertEqual(href, expected)
Exemplo n.º 4
0
 def test_produces_fallback_layout_for_item(self) -> None:
     fallback = BestPracticesLayoutStrategy()
     strategy = TemplateLayoutStrategy(
         catalog_template=self.TEST_CATALOG_TEMPLATE,
         collection_template=self.TEST_COLLECTION_TEMPLATE,
         fallback_strategy=fallback,
     )
     collection = self._get_collection()
     item = next(iter(collection.get_all_items()))
     href = strategy.get_href(item, parent_dir="http://example.com")
     expected = fallback.get_href(item, parent_dir="http://example.com")
     self.assertEqual(href, expected)
Exemplo n.º 5
0
 def test_produces_fallback_layout_for_collection(self) -> None:
     fallback = BestPracticesLayoutStrategy()
     strategy = TemplateLayoutStrategy(
         catalog_template=self.TEST_CATALOG_TEMPLATE,
         item_template=self.TEST_ITEM_TEMPLATE,
         fallback_strategy=fallback,
     )
     collection = self._get_collection()
     href = strategy.get_href(collection, parent_dir="http://example.com")
     expected = fallback.get_href(collection,
                                  parent_dir="http://example.com")
     self.assertEqual(href, expected)
Exemplo n.º 6
0
 def test_produces_fallback_layout_for_item(self) -> None:
     fallback = BestPracticesLayoutStrategy()
     strategy = CustomLayoutStrategy(
         catalog_func=self.get_custom_catalog_func(),
         collection_func=self.get_custom_collection_func(),
         fallback_strategy=fallback,
     )
     collection = TestCases.test_case_8()
     item = next(iter(collection.get_all_items()))
     href = strategy.get_href(item, parent_dir="http://example.com")
     expected = fallback.get_href(item, parent_dir="http://example.com")
     self.assertEqual(href, expected)
Exemplo n.º 7
0
    def add_item(
        self,
        item: "Item_Type",
        title: Optional[str] = None,
        strategy: Optional[HrefLayoutStrategy] = None,
    ) -> None:
        """Adds a link to an :class:`~pystac.Item`.
        This method will set the item's parent to this object, and its root to
        this Catalog's root.

        Args:
            item : The item to add.
            title : Optional title to give to the :class:`~pystac.Link`
        """

        # Prevent typo confusion
        if isinstance(item, pystac.Catalog):
            raise pystac.STACError("Cannot add catalog as item. Use add_child instead.")

        if strategy is None:
            strategy = BestPracticesLayoutStrategy()

        item.set_root(self.get_root())
        item.set_parent(self)

        # set self link
        self_href = self.get_self_href()
        if self_href:
            item_href = strategy.get_href(item, os.path.dirname(self_href))
            item.set_self_href(item_href)

        self.add_link(Link.item(item, title=title))
Exemplo n.º 8
0
    def add_child(
        self,
        child: Union["Catalog", "Collection_Type"],
        title: Optional[str] = None,
        strategy: Optional[HrefLayoutStrategy] = None,
    ) -> None:
        """Adds a link to a child :class:`~pystac.Catalog` or
        :class:`~pystac.Collection`. This method will set the child's parent to this
        object, and its root to this Catalog's root.

        Args:
            child : The child to add.
            title : Optional title to give to the :class:`~pystac.Link`
            strategy : The layout strategy to use for setting the
                self href of the child.
        """

        # Prevent typo confusion
        if isinstance(child, pystac.Item):
            raise pystac.STACError("Cannot add item as child. Use add_item instead.")

        if strategy is None:
            strategy = BestPracticesLayoutStrategy()

        child.set_root(self.get_root())
        child.set_parent(self)

        # set self link
        self_href = self.get_self_href()
        if self_href:
            child_href = strategy.get_href(child, os.path.dirname(self_href))
            child.set_self_href(child_href)

        self.add_link(Link.child(child, title=title))
Exemplo n.º 9
0
def add_item(source_item: Item,
             target_catalog: Catalog,
             move_assets: bool = False) -> None:
    """Add a item into a catalog.

    Args:
        source_item (pystac.Item): The Item that will be added.
            This item is not mutated in this operation.
        target_catalog (pystac.Item): The destination catalog.
            This catalog will be mutated in this operation.
        move_assets (bool): If true, move the asset files alongside the target item.
    """

    target_item_ids = [item.id for item in target_catalog.get_all_items()]
    if source_item.id in target_item_ids:
        raise ValueError(
            f'An item with ID {source_item.id} already exists in the target catalog'
        )
    self_href = target_catalog.get_self_href()
    if self_href:
        parent_dir = os.path.dirname(self_href)
        layout_strategy = BestPracticesLayoutStrategy()
        item_copy = source_item.clone()
        item_copy.set_self_href(
            layout_strategy.get_item_href(item_copy, parent_dir))
        target_catalog.add_item(item_copy)

        if isinstance(target_catalog, Collection):
            item_copy.set_collection(target_catalog)
            target_catalog.update_extent_from_items()
        else:
            item_copy.set_collection(None)

        if move_assets:
            do_move_assets(item_copy, copy=False)
    else:
        raise ValueError(
            f"Cannot add Item {source_item.id} because {target_catalog} does not have a self href."
        )
Exemplo n.º 10
0
class BestPracticesLayoutStrategyTest(unittest.TestCase):
    def setUp(self) -> None:
        self.strategy = BestPracticesLayoutStrategy()

    def test_produces_layout_for_root_catalog(self) -> None:
        cat = pystac.Catalog(id="test", description="test desc")
        href = self.strategy.get_href(cat,
                                      parent_dir="http://example.com",
                                      is_root=True)
        self.assertEqual(href, "http://example.com/catalog.json")

    def test_produces_layout_for_child_catalog(self) -> None:
        cat = pystac.Catalog(id="test", description="test desc")
        href = self.strategy.get_href(cat, parent_dir="http://example.com")
        self.assertEqual(href, "http://example.com/test/catalog.json")

    def test_produces_layout_for_root_collection(self) -> None:
        collection = TestCases.test_case_8()
        href = self.strategy.get_href(collection,
                                      parent_dir="http://example.com",
                                      is_root=True)
        self.assertEqual(href, "http://example.com/collection.json")

    def test_produces_layout_for_child_collection(self) -> None:
        collection = TestCases.test_case_8()
        href = self.strategy.get_href(collection,
                                      parent_dir="http://example.com")
        self.assertEqual(
            href,
            "http://example.com/{}/collection.json".format(collection.id))

    def test_produces_layout_for_item(self) -> None:
        collection = TestCases.test_case_8()
        item = next(iter(collection.get_all_items()))
        href = self.strategy.get_href(item, parent_dir="http://example.com")
        expected = "http://example.com/{}/{}.json".format(item.id, item.id)
        self.assertEqual(href, expected)
Exemplo n.º 11
0
    def normalize_hrefs(
        self, root_href: str, strategy: Optional[HrefLayoutStrategy] = None
    ) -> None:
        """Normalize HREFs will regenerate all link HREFs based on
        an absolute root_href and the canonical catalog layout as specified
        in the STAC specification's best practices.

        This method mutates the entire catalog tree.

        Args:
            root_href : The absolute HREF that all links will be normalized against.
            strategy : The layout strategy to use in setting the HREFS
                for this catalog. Defaults to
                :class:`~pystac.layout.BestPracticesLayoutStrategy`

        See:
            :stac-spec:`STAC best practices document <best-practices.md#catalog-layout>`
            for the canonical layout of a STAC.
        """
        if strategy is None:
            _strategy: HrefLayoutStrategy = BestPracticesLayoutStrategy()
        else:
            _strategy = strategy

        # Normalizing requires an absolute path
        if not is_absolute_href(root_href):
            root_href = make_absolute_href(root_href, os.getcwd(), start_is_dir=True)

        def process_item(item: "Item_Type", _root_href: str) -> Callable[[], None]:
            item.resolve_links()

            new_self_href = _strategy.get_href(item, _root_href)

            def fn() -> None:
                item.set_self_href(new_self_href)

            return fn

        def process_catalog(
            cat: Catalog, _root_href: str, is_root: bool
        ) -> List[Callable[[], None]]:
            setter_funcs: List[Callable[[], None]] = []

            cat.resolve_links()

            new_self_href = _strategy.get_href(cat, _root_href, is_root)
            new_root = os.path.dirname(new_self_href)

            for item in cat.get_items():
                setter_funcs.append(process_item(item, new_root))

            for child in cat.get_children():
                setter_funcs.extend(process_catalog(child, new_root, is_root=False))

            def fn() -> None:
                cat.set_self_href(new_self_href)

            setter_funcs.append(fn)

            return setter_funcs

        # Collect functions that will actually mutate the objects.
        # Delay mutation as setting hrefs while walking the catalog
        # can result in bad links.
        setter_funcs = process_catalog(self, root_href, is_root=True)

        for fn in setter_funcs:
            fn()
Exemplo n.º 12
0
def merge_all_items(source_catalog,
                    target_catalog,
                    move_assets=False,
                    ignore_conflicts=False,
                    as_child=False,
                    child_folder=None):
    """Merge all items from source_catalog into target_catalog.

    Calls merge_items on any items that have the same ID between the two catalogs.
    Any items that don't exist in the taret_catalog will be added to the target_catalog.
    If the target_catalog is a Collection, it will be set as the collection of any
    new items.

    Args:
        source_catalog (Catalog or Collection): The catalog or collection that items
            will be drawn from to merge into the target catalog.
            This catalog is not mutated in this operation.
        target_item (Catalog or Collection): The target catalog that will be merged into.
            This catalog will not be mutated in this operation.
        move_assets (bool): If true, move the asset files alongside the target item.
        ignore_conflicts (bool): If True, assets with the same keys will not be merged,
            and asset files that would be moved to overwrite an existing file
            will not be moved. If False, either of these situations will throw an error.
        as_child (bool): If True, a child catalog will be added with the content of the
            source catalog. Otherwise, items will be added directly to the destination
            catalog.
        child_folder (str): name of the subfolder to use in case the as_child option is
            set to True. If None, the id of the catalog will be used as folder name.

    Returns:
        Catalog or Collection: The target_catalog
    """
    source_items = source_catalog.get_all_items()
    ids_to_items = {item.id: item for item in source_items}

    parent_dir = os.path.dirname(target_catalog.get_self_href())
    if as_child:
        child_dir = os.path.join(parent_dir, child_folder or source_catalog.id)
        copy_catalog(source_catalog, child_dir, source_catalog.catalog_type,
                     move_assets)
        child_catalog_path = os.path.join(
            child_dir, os.path.basename(source_catalog.get_self_href()))
        source_catalog = pystac.read_file(child_catalog_path)
        target_catalog.add_child(source_catalog, source_catalog.title)
    else:
        for item in target_catalog.get_all_items():
            source_item = ids_to_items.get(item.id)
            if source_item is not None:
                merge_items(source_item,
                            item,
                            move_assets=move_assets,
                            ignore_conflicts=ignore_conflicts)
                del ids_to_items[item.id]

        # Process source items that did not match existing target items
        layout_strategy = BestPracticesLayoutStrategy()
        for item in ids_to_items.values():
            item_copy = item.clone()
            item_copy.set_self_href(
                layout_strategy.get_item_href(item_copy, parent_dir))
            target_catalog.add_item(item_copy)

            if isinstance(target_catalog, pystac.Collection):
                item_copy.set_collection(target_catalog)
            else:
                item_copy.set_collection(None)

            if move_assets:
                do_move_assets(item_copy, copy=False)

    if target_catalog.STAC_OBJECT_TYPE == pystac.STACObjectType.COLLECTION:
        target_catalog.update_extent_from_items()

    return target_catalog
Exemplo n.º 13
0
    def normalize_hrefs(self, root_href, strategy=None):
        """Normalize HREFs will regenerate all link HREFs based on
        an absolute root_href and the canonical catalog layout as specified
        in the STAC specification's best practices.

        This method mutates the entire catalog tree.

        Args:
            root_href (str): The absolute HREF that all links will be normalized against.
            strategy (HrefLayoutStrategy): The layout strategy to use in setting the HREFS
                for this catalog. Defaults to :class:`~pystac.layout.BestPracticesLayoutStrategy`

        See:
            `STAC best practices document <https://github.com/radiantearth/stac-spec/blob/v0.8.1/best-practices.md#catalog-layout>`_ for the canonical layout of a STAC.
        """ # noqa E501
        if strategy is None:
            strategy = BestPracticesLayoutStrategy()

        # Normalizing requires an absolute path
        if not is_absolute_href(root_href):
            root_href = make_absolute_href(root_href,
                                           os.getcwd(),
                                           start_is_dir=True)

        def process_item(item, _root_href):
            item.resolve_links()

            new_self_href = strategy.get_href(item, _root_href)

            def fn():
                item.set_self_href(new_self_href)

            return fn

        def process_catalog(cat, _root_href, is_root):
            setter_funcs = []

            cat.resolve_links()

            new_self_href = strategy.get_href(cat, _root_href, is_root)
            new_root = os.path.dirname(new_self_href)

            for item in cat.get_items():
                setter_funcs.append(process_item(item, new_root))

            for child in cat.get_children():
                setter_funcs.extend(
                    process_catalog(child, new_root, is_root=False))

            def fn():
                cat.set_self_href(new_self_href)

            setter_funcs.append(fn)

            return setter_funcs

        # Collect functions that will actually mutate the objects.
        # Delay mutation as setting hrefs while walking the catalog
        # can result in bad links.
        setter_funcs = process_catalog(self, root_href, is_root=True)

        for fn in setter_funcs:
            fn()

        return self
Exemplo n.º 14
0
 def setUp(self) -> None:
     self.strategy = BestPracticesLayoutStrategy()