def test_build_obj_from_classmethod_with_extra_attributes(self): incorrect_attr = 'bogus_attr' image = ImageData() # Get list of attributes, and set base image values to obj.attribute = ATTRIBUTE attr_dict = dict([(x, x.upper()) for x in image._list_attributes() if not isinstance(x, list)]) for attr, val in attr_dict.items(): # Code automatically appends image extension to filename if missing. if attr.lower() == 'filename': val += f".{self.IMAGE_EXT}" setattr(image, attr, val) attr_dict[incorrect_attr] = incorrect_attr.upper() # Build ImageData object using class method test_image = ImageData.build_obj(dictionary=attr_dict) for attr, val in attr_dict.items(): # This attribute should not exist/be defined if attr == incorrect_attr: assert (not hasattr(test_image, incorrect_attr)) else: # Verify each attribute value matches the image object attribute value assert_equals(getattr(image, attr), getattr(test_image, attr)) # Code automatically appends image extension to filename if missing. if attr.lower() == 'filename': val += f".{self.IMAGE_EXT}" # Verify each attribute value matches the expected value (obj.attribute = ATTRIBUTE) assert_equals(getattr(test_image, attr), val)
def test_build_obj_from_classmethod_without_id(self): image = ImageData() # Get list of attributes, and set base image values to obj.attribute = ATTRIBUTE attr_dict = dict([(x, x.upper()) for x in image._list_attributes() if not isinstance(x, list)]) print(attr_dict.keys()) del attr_dict[ImageData.ID] attr_dict[ImageData.FILENAME] = self.DNE_FILENAME for attr, val in attr_dict.items(): setattr(image, attr, val) # Build ImageData object using class method test_image = ImageData.build_obj(dictionary=attr_dict) for attr, val in attr_dict.items(): # Verify ID is defined as the filename without the extension if attr == ImageData.ID: assert_equals(getattr(test_image, ImageData.ID), self.DNE_FILENAME.split(',')[0]) # Verify each attribute value matches the image object attribute value assert_equals(getattr(image, attr), getattr(test_image, attr)) # Verify each attribute value matches the expected value (obj.attribute = ATTRIBUTE) assert_equals(getattr(test_image, attr), val)
def test_verify_locations_dne(self): image_path = DL_DIR obj_1 = ImageData() obj_1.filename = self.DNE_FILENAME obj_1.locations.append(image_path) valid_locations = obj_1._verify_locations(obj_1) assert_equals(valid_locations, [])
def test_convert_to_dict(self): image = ImageData() attr_dict = dict([(x, x.upper()) for x in image._list_attributes() if not isinstance(x, list)]) for attr, val in attr_dict.items(): setattr(image, attr, val) image_dict = image.to_dict() assert_equals(attr_dict, image_dict)
def test_add_obj_return_new_using_combine(self): obj_1 = ImageData(image_id=self.DNE_FILENAME.split('.')[0]) obj_2 = ImageData() obj_3 = obj_1.combine(other=obj_2, use_self=False) assert_equals(obj_3.id_, obj_1.id_) assert_equals(obj_3.filename, obj_1.filename) assert_equals(obj_3.description, obj_1.description) assert_equals(obj_3.dl_status, obj_1.dl_status) assert_equals(obj_3.image_name, obj_1.image_name)
def test_add_obj_return_new(self): obj_1 = ImageData() obj_2 = ImageData() obj_3 = obj_1 + obj_2 assert_equals(obj_3.filename, obj_2.filename) assert_equals(obj_3.id_, obj_1.id_) assert_equals(obj_3.description, obj_1.description) assert_equals(obj_3.dl_status, obj_1.dl_status) assert_equals(obj_3.image_name, obj_1.image_name)
def __init__(self, image_url: str, dl_dir: str, url_split_token: str = None, image_info: ImageData = None, use_wget: bool = False, test: bool = False) -> None: """ Instantiate an instance of DownloadPX Class :param image_url: (str) URL of image to download :param dl_dir: (str) Local location where to store downloaded image :param url_split_token: (str) Key to determining name of image in URL :param image_info: (ImageData) Object used to track image metadata :param use_wget: (bool) Use wget instead of requests. Default = False :param test: (bool) If testing, don't parse image info """ super(DownloadPX, self).__init__(image_url=image_url, dl_dir=dl_dir) self.url_split_token = url_split_token or self.URL_KEY self.image_info = image_info or ImageData() self.use_wget = use_wget self.test = test self.id_ = None self.image_name = None self.dl_file_spec = None self._status = Status.NOT_SET self.parse_image_info()
def _build_inventory_dict( self, content_list: List[dict]) -> Dict[str, ImageData]: """ Build the inventory dictionary from the contents of the JSON files. Iterates through the content, creates ImageData objects per image, and stores in dictionary: key = image_name value = ImageData object Checks for duplicates, and will save the duplicates. When the checking is complete, check image metadata from dups, and if DL'd image's ImageData attribute info is not populate, populate from Dup ImageData [1]. [1] - This is done because there will be DUPs that were DL'd before this functionality was available. :param content_list: List of JSON blobs (1 blob per file) :return: Dictionary of Images in inventory (key=image_name, value=ImageData of image) """ inv = dict() dups = dict() for images_info in content_list: for image_name, image_info in images_info.items(): image_obj = ImageData.build_obj(image_info) if image_name.endswith('jpg'): image_name = image_name.split(".")[0] # If the image name is not in the inventory... if image_name not in inv.keys(): # If the image was DL'd if image_info[ ImageData.DL_STATUS] == DownloadStatus.DOWNLOADED: inv[image_name] = image_obj # Otherwise it may have already existed and included in the JSON, # so classify as a DUP. else: self._add_to_dups(dups, image_obj) LOG.debug( f"Download Status for '{image_obj.image_name}': " f"{image_obj.dl_status}") LOG.debug( f"Duplicate in the inventory? {image_obj.image_name in inv}" ) # Image was already in the inventory. else: self._add_to_dups(dups, image_obj) LOG.debug(f"Download Status for '{image_obj.image_name}': " f"{image_obj.dl_status}") LOG.debug( f"Duplicate in the inventory? {image_obj.image_name in inv}" ) # Update the metadata of the inventory, if needed, and return the inventory dict. return self._update_info(inv, dups)
def _add_imagedata_object(self, file_id: str) -> None: """ Used to initialize each dictionary entry in _inventory with an ImageData object :param file_id: Image id to add to _inventory :return: None """ if file_id not in self._inventory: self._inventory[file_id] = ImageData() setattr(self._inventory[file_id], ImageData.DL_STATUS, DownloadStatus.DOWNLOADED) setattr(self._inventory[file_id], ImageData.FILENAME, f"{file_id}{self.INV_FILE_EXT}")
def test_build_obj_from_classmethod(self): image = ImageData() # Get list of attributes, and set base image values to obj.attribute = ATTRIBUTE attr_dict = dict([(x, x.upper()) for x in image._list_attributes() if not isinstance(x, list)]) for attr, val in attr_dict.items(): setattr(image, attr, val) if attr.lower() == 'filename': image.filename += f".{self.IMAGE_EXT}" # Build ImageData object using class method test_image = ImageData.build_obj(dictionary=attr_dict) for attr, val in attr_dict.items(): # Verify each attribute value matches the image object attribute value assert_equals(getattr(image, attr), getattr(test_image, attr)) # Code automatically appends image extension to filename if missing. if attr.lower() == 'filename': val += f".{self.IMAGE_EXT}" # Verify each attribute value matches the expected value (obj.attribute = ATTRIBUTE) assert_equals(getattr(test_image, attr), val)
def build_data_element(index: int) -> ImageData: """ Builds artificial ImageData object based on index :param index: Value to identify the object in the data set :return: Instantiated ImageData Object """ LOG.debug(f"Building dataset #:{index}") metadata = [f"category_{x}" for x in range(1, 7)] return ImageData.build_obj({ 'image_name': f"image_{index}", 'description': f"Mock Image Data - {index}", 'page_url': 'http://foo.com/page/{0:<16}'.format(str(index) * 8).strip(), 'image_url': 'http://foo.com/image/{0:<16}'.format(str(index) * 8).strip(), 'author': f"Picasso{index}", 'filename': 'test_data_{0}.jpg'.format(index), 'image_date': "01/{0:02d}/19".format(index % 31), 'resolution': "1600x7{0:02d}".format(index), 'downloaded_on': "08/{0:02d}/19".format(index % 30), 'classification_metadata': sorted( list(set(choices(population=metadata, k=randint(0, len(metadata)))))), 'download_duration': index, 'locations': '/tmp/pdl/images', 'dl_status': choice(DownloadStatus.get_statuses()), 'mod_status': ModStatus.NEW, 'error_info': None, })
from random import choice from PDL.engine.images.image_info import ImageData from PDL.engine.images.status import DownloadStatus as Status from PDL.reporting.summary import ReportingSummary data = list() num_data = 100 statuses = Status.get_statuses() for _ in range(num_data): dummy_image = ImageData() dummy_image.dl_status = getattr(Status, choice(statuses)) data.append(dummy_image) class TestReportingSummary(object): def test_reporting_summary_values(self): summary = ReportingSummary(image_data=data) results = summary.tally_status_results() data_tally = summary.init_status_dict_() for image in data: data_tally[image.dl_status] += 1 for status in statuses: status_value = getattr(Status, status) assert results[status_value] == data_tally[status_value]
def _build_test_objs(description: str = "description_1", status: str = Status.DOWNLOADED, name: str = 'obj_1', filename: str = DNE_FILENAME): obj_1 = ImageData() obj_2 = ImageData() obj_1.description = description obj_1.dl_status = status obj_1.image_name = name obj_1.filename = filename obj_1.id_ = filename.split('.')[0] obj_2.filename = filename obj_2.author = 'author_2' obj_2.image_name = 'obj_2' obj_2.id_ = filename.split('.')[0] return obj_1, obj_2
def __init__(self, page_url: str) -> None: super(ParseDisplayPage, self).__init__(page_url=page_url) self.image_info = ImageData() self.source_list = None self._metadata = None