示例#1
0
    def test_build_obj_from_classmethod_with_extra_attributes(self):
        incorrect_attr = 'bogus_attr'

        image = ImageData()

        # Get list of attributes, and set base image values to obj.attribute = ATTRIBUTE
        attr_dict = dict([(x, x.upper()) for x in image._list_attributes()
                          if not isinstance(x, list)])
        for attr, val in attr_dict.items():
            # Code automatically appends image extension to filename if missing.
            if attr.lower() == 'filename':
                val += f".{self.IMAGE_EXT}"

            setattr(image, attr, val)
        attr_dict[incorrect_attr] = incorrect_attr.upper()

        # Build ImageData object using class method
        test_image = ImageData.build_obj(dictionary=attr_dict)

        for attr, val in attr_dict.items():

            # This attribute should not exist/be defined
            if attr == incorrect_attr:
                assert (not hasattr(test_image, incorrect_attr))

            else:
                # Verify each attribute value matches the image object attribute value
                assert_equals(getattr(image, attr), getattr(test_image, attr))

                # Code automatically appends image extension to filename if missing.
                if attr.lower() == 'filename':
                    val += f".{self.IMAGE_EXT}"

                # Verify each attribute value matches the expected value (obj.attribute = ATTRIBUTE)
                assert_equals(getattr(test_image, attr), val)
示例#2
0
    def test_build_obj_from_classmethod_without_id(self):
        image = ImageData()

        # Get list of attributes, and set base image values to obj.attribute = ATTRIBUTE
        attr_dict = dict([(x, x.upper()) for x in image._list_attributes()
                          if not isinstance(x, list)])
        print(attr_dict.keys())
        del attr_dict[ImageData.ID]
        attr_dict[ImageData.FILENAME] = self.DNE_FILENAME
        for attr, val in attr_dict.items():
            setattr(image, attr, val)

        # Build ImageData object using class method
        test_image = ImageData.build_obj(dictionary=attr_dict)

        for attr, val in attr_dict.items():

            # Verify ID is defined as the filename without the extension
            if attr == ImageData.ID:
                assert_equals(getattr(test_image, ImageData.ID),
                              self.DNE_FILENAME.split(',')[0])

            # Verify each attribute value matches the image object attribute value
            assert_equals(getattr(image, attr), getattr(test_image, attr))

            # Verify each attribute value matches the expected value (obj.attribute = ATTRIBUTE)
            assert_equals(getattr(test_image, attr), val)
示例#3
0
    def test_verify_locations_dne(self):
        image_path = DL_DIR

        obj_1 = ImageData()
        obj_1.filename = self.DNE_FILENAME
        obj_1.locations.append(image_path)
        valid_locations = obj_1._verify_locations(obj_1)
        assert_equals(valid_locations, [])
示例#4
0
    def test_convert_to_dict(self):
        image = ImageData()
        attr_dict = dict([(x, x.upper()) for x in image._list_attributes()
                          if not isinstance(x, list)])
        for attr, val in attr_dict.items():
            setattr(image, attr, val)

        image_dict = image.to_dict()
        assert_equals(attr_dict, image_dict)
示例#5
0
    def test_add_obj_return_new_using_combine(self):
        obj_1 = ImageData(image_id=self.DNE_FILENAME.split('.')[0])
        obj_2 = ImageData()

        obj_3 = obj_1.combine(other=obj_2, use_self=False)

        assert_equals(obj_3.id_, obj_1.id_)
        assert_equals(obj_3.filename, obj_1.filename)
        assert_equals(obj_3.description, obj_1.description)
        assert_equals(obj_3.dl_status, obj_1.dl_status)
        assert_equals(obj_3.image_name, obj_1.image_name)
示例#6
0
    def test_add_obj_return_new(self):
        obj_1 = ImageData()
        obj_2 = ImageData()

        obj_3 = obj_1 + obj_2

        assert_equals(obj_3.filename, obj_2.filename)
        assert_equals(obj_3.id_, obj_1.id_)
        assert_equals(obj_3.description, obj_1.description)
        assert_equals(obj_3.dl_status, obj_1.dl_status)
        assert_equals(obj_3.image_name, obj_1.image_name)
示例#7
0
    def __init__(self,
                 image_url: str,
                 dl_dir: str,
                 url_split_token: str = None,
                 image_info: ImageData = None,
                 use_wget: bool = False,
                 test: bool = False) -> None:
        """
        Instantiate an instance of DownloadPX Class

        :param image_url: (str) URL of image to download
        :param dl_dir: (str) Local location where to store downloaded image
        :param url_split_token: (str) Key to determining name of image in URL
        :param image_info: (ImageData) Object used to track image metadata
        :param use_wget: (bool) Use wget instead of requests. Default = False
        :param test: (bool) If testing, don't parse image info

        """
        super(DownloadPX, self).__init__(image_url=image_url, dl_dir=dl_dir)
        self.url_split_token = url_split_token or self.URL_KEY
        self.image_info = image_info or ImageData()
        self.use_wget = use_wget
        self.test = test

        self.id_ = None
        self.image_name = None
        self.dl_file_spec = None
        self._status = Status.NOT_SET

        self.parse_image_info()
示例#8
0
    def _build_inventory_dict(
            self, content_list: List[dict]) -> Dict[str, ImageData]:
        """
        Build the inventory dictionary from the contents of the JSON files.

        Iterates through the content, creates ImageData objects per image, and stores
        in dictionary:
          key = image_name
          value = ImageData object

        Checks for duplicates, and will save the duplicates. When the checking is complete,
        check image metadata from dups, and if DL'd image's ImageData attribute info is not
        populate, populate from Dup ImageData [1].

        [1] - This is done because there will be DUPs that were DL'd before this functionality
        was available.

        :param content_list: List of JSON blobs (1 blob per file)
        :return: Dictionary of Images in inventory (key=image_name, value=ImageData of image)

        """
        inv = dict()
        dups = dict()
        for images_info in content_list:
            for image_name, image_info in images_info.items():
                image_obj = ImageData.build_obj(image_info)
                if image_name.endswith('jpg'):
                    image_name = image_name.split(".")[0]

                # If the image name is not in the inventory...
                if image_name not in inv.keys():

                    # If the image was DL'd
                    if image_info[
                            ImageData.DL_STATUS] == DownloadStatus.DOWNLOADED:
                        inv[image_name] = image_obj

                    # Otherwise it may have already existed and included in the JSON,
                    # so classify as a DUP.
                    else:
                        self._add_to_dups(dups, image_obj)
                        LOG.debug(
                            f"Download Status for '{image_obj.image_name}': "
                            f"{image_obj.dl_status}")
                        LOG.debug(
                            f"Duplicate in the inventory? {image_obj.image_name in inv}"
                        )

                # Image was already in the inventory.
                else:
                    self._add_to_dups(dups, image_obj)
                    LOG.debug(f"Download Status for '{image_obj.image_name}': "
                              f"{image_obj.dl_status}")
                    LOG.debug(
                        f"Duplicate in the inventory? {image_obj.image_name in inv}"
                    )

        # Update the metadata of the inventory, if needed, and return the inventory dict.
        return self._update_info(inv, dups)
示例#9
0
    def _add_imagedata_object(self, file_id: str) -> None:
        """
        Used to initialize each dictionary entry in _inventory with an ImageData object
        :param file_id: Image id to add to _inventory

        :return: None

        """
        if file_id not in self._inventory:
            self._inventory[file_id] = ImageData()
            setattr(self._inventory[file_id], ImageData.DL_STATUS,
                    DownloadStatus.DOWNLOADED)
            setattr(self._inventory[file_id], ImageData.FILENAME,
                    f"{file_id}{self.INV_FILE_EXT}")
示例#10
0
    def test_build_obj_from_classmethod(self):
        image = ImageData()

        # Get list of attributes, and set base image values to obj.attribute = ATTRIBUTE
        attr_dict = dict([(x, x.upper()) for x in image._list_attributes()
                          if not isinstance(x, list)])
        for attr, val in attr_dict.items():
            setattr(image, attr, val)
            if attr.lower() == 'filename':
                image.filename += f".{self.IMAGE_EXT}"

        # Build ImageData object using class method
        test_image = ImageData.build_obj(dictionary=attr_dict)

        for attr, val in attr_dict.items():
            # Verify each attribute value matches the image object attribute value
            assert_equals(getattr(image, attr), getattr(test_image, attr))

            # Code automatically appends image extension to filename if missing.
            if attr.lower() == 'filename':
                val += f".{self.IMAGE_EXT}"

            # Verify each attribute value matches the expected value (obj.attribute = ATTRIBUTE)
            assert_equals(getattr(test_image, attr), val)
示例#11
0
def build_data_element(index: int) -> ImageData:
    """
    Builds artificial ImageData object based on index

    :param index: Value to identify the object in the data set

    :return: Instantiated ImageData Object

    """
    LOG.debug(f"Building dataset #:{index}")
    metadata = [f"category_{x}" for x in range(1, 7)]

    return ImageData.build_obj({
        'image_name':
        f"image_{index}",
        'description':
        f"Mock Image Data - {index}",
        'page_url':
        'http://foo.com/page/{0:<16}'.format(str(index) * 8).strip(),
        'image_url':
        'http://foo.com/image/{0:<16}'.format(str(index) * 8).strip(),
        'author':
        f"Picasso{index}",
        'filename':
        'test_data_{0}.jpg'.format(index),
        'image_date':
        "01/{0:02d}/19".format(index % 31),
        'resolution':
        "1600x7{0:02d}".format(index),
        'downloaded_on':
        "08/{0:02d}/19".format(index % 30),
        'classification_metadata':
        sorted(
            list(set(choices(population=metadata, k=randint(0,
                                                            len(metadata)))))),
        'download_duration':
        index,
        'locations':
        '/tmp/pdl/images',
        'dl_status':
        choice(DownloadStatus.get_statuses()),
        'mod_status':
        ModStatus.NEW,
        'error_info':
        None,
    })
示例#12
0
from random import choice

from PDL.engine.images.image_info import ImageData
from PDL.engine.images.status import DownloadStatus as Status
from PDL.reporting.summary import ReportingSummary

data = list()
num_data = 100

statuses = Status.get_statuses()

for _ in range(num_data):
    dummy_image = ImageData()
    dummy_image.dl_status = getattr(Status, choice(statuses))
    data.append(dummy_image)


class TestReportingSummary(object):
    def test_reporting_summary_values(self):
        summary = ReportingSummary(image_data=data)
        results = summary.tally_status_results()

        data_tally = summary.init_status_dict_()
        for image in data:
            data_tally[image.dl_status] += 1

        for status in statuses:
            status_value = getattr(Status, status)
            assert results[status_value] == data_tally[status_value]
示例#13
0
    def _build_test_objs(description: str = "description_1",
                         status: str = Status.DOWNLOADED,
                         name: str = 'obj_1',
                         filename: str = DNE_FILENAME):

        obj_1 = ImageData()
        obj_2 = ImageData()

        obj_1.description = description
        obj_1.dl_status = status
        obj_1.image_name = name
        obj_1.filename = filename
        obj_1.id_ = filename.split('.')[0]

        obj_2.filename = filename
        obj_2.author = 'author_2'
        obj_2.image_name = 'obj_2'
        obj_2.id_ = filename.split('.')[0]
        return obj_1, obj_2
示例#14
0
 def __init__(self, page_url: str) -> None:
     super(ParseDisplayPage, self).__init__(page_url=page_url)
     self.image_info = ImageData()
     self.source_list = None
     self._metadata = None