示例#1
0
def execute(file_filter_include, file_filter_exclude):

    LocalLibrary.load_library('raw')

    # Result format is:
    # {
    #     "acronym": [list of image paths],
    #         ...
    # }
    result = {}
    albums_seen = {}

    cache = LocalLibrary.cache_raw()
    albums = cache['albums']
    images = cache['images']

    for image in images:

        image_name = image['name']
        image_path = image['path']

        if file_filter_exclude and image_name.find(file_filter_exclude) > -1:
            continue
        if file_filter_include and image_path.find(file_filter_include) < 0:
            continue

        # Need to rerun local library caching
        if not os.path.exists(image_path):
            msg = "Local library not updated.  Please rerun download_local_library again"
            sys.exit(msg)
def find(et, album_path_filter, file_filter_include, file_filter_exclude):

    LocalLibrary.load_library('raw')

    # Result structure is of the form:
    #     result = {
    #         "model name": None,
    #             ...
    #     }
    result = {}

    album_path_filter_leaf = None
    if album_path_filter:
        album_path_filter_leaf = os.path.basename(album_path_filter)

    # Walk through each file, split its file name for
    # comparison, and get date shot metadata
    cache = LocalLibrary.cache_raw()
    images = cache['images']
    albums = cache['albums']

    for image in images:
        image_name = image['name']
        image_path = image['path']

        if file_filter_exclude and image_name.find(file_filter_exclude) > -1:
            continue
        if file_filter_include and image_path.find(file_filter_include) < 0:
            continue
        if album_path_filter and not image_path.startswith(album_path_filter):
            continue

        # Get model tag value
        value = None
        try:
            value = et.get_tag("Model", image_path)
        except Exception as e:
            value = None

        # Add the value to result
        result[value] = None

    saveto_filename = "get_unique_models"
    if album_path_filter_leaf:
        saveto_filename += '_d' + album_path_filter_leaf
    if file_filter_include is not None:
        saveto_filename += '_' + file_filter_include

    saveto_filename += '.json'
    saveto = os.path.join(gphoto.cache_dir(), saveto_filename)
    print(f"Saving to: '{saveto}'")

    with open(saveto, "w") as cache_file:
        json.dump(result, cache_file, indent=2)
def execute(album_path_filter):

    LocalLibrary.load_library('raw')

    album_path_filter_leaf = None
    if album_path_filter:
        album_path_filter_leaf = os.path.basename(album_path_filter)

    # The result is going to be of the form
    #     {
    #         "word": none,
    #             ...
    #     }
    result = {}

    # hold sections of cache as local variables
    cache = LocalLibrary.cache_raw()
    albums = cache['albums']
    album_paths = cache['album_paths']
    images = cache['images']
    image_ids = cache['image_ids']    


    # Loop through each album, get caption from it
    # if it follows standard naming convention
    for album in albums:

        # if folder is in the include list then continue
        # Otherwise ignore this album
        album_name = album['name']
        album_path = album['path']
        album_images = album['images']

        # filter out albums
        if album_path_filter and not album_path.startswith(album_path_filter):
            continue

        # Get words from album name
        words = album_name.split(' ')
        for word in words:
            if len(word) <= 3:
                result[word.capitalize()] = None

    saveto_filename = "get_small_words_in_album_names"
    if album_path_filter_leaf:
        saveto_filename += '_d' + album_path_filter_leaf

    saveto_filename += '.json'
    saveto = os.path.join(gphoto.cache_dir(), saveto_filename)
    print(f"Saving to: '{saveto}'")

    with open(saveto, "w") as cache_file:
        json.dump(result, cache_file, indent=2)
示例#4
0
  def find():
    """
    This method builds the cache for local raw pics folder,
    traverses it and find image name duplicates
    """
    gphoto.init()
    LocalLibrary.cache_raw_library("p:\\pics")
    LocalLibrary.save_library('raw')

    # The dups dict holds
    #   key: image name
    #   list: list of image paths
    name_to_paths = {}

    # traverse the images list. For each image add its name
    cache = LocalLibrary.cache_raw()
    cache_images = cache['images']
    cache_image_ids = cache['image_ids']

    for image in cache_images:
      imagename = image['name']
      imagepath = image['path']

      if imagename not in name_to_paths:
        name_to_paths[imagename] = [imagepath]
      else:
        name_to_paths[imagename].append(imagepath)

    # review the dups where imagename is holding multiple image paths
    dups = []
    for imagename, imagelist in name_to_paths.items():
      if len(imagelist) > 1:
        dup = {
          'name': imagename,
          'paths': []
        }

        paths = dup['paths']
        for imagepath in imagelist:
          paths.append(imagepath)
        
        dups.append(dup)
    
    return dups
def do_work(et, google_image_filter, album_folder_path, list_only):

    # Find folder album in the database
    LocalLibrary.load_library('raw')
    local_library_cache = LocalLibrary.cache_raw()
    images = local_library_cache['images']
    albums = local_library_cache['albums']
    album_paths = local_library_cache['album_paths']

    album_idx = album_paths[album_folder_path]
    album = albums[album_idx]
    local_album_path = album['path']

    print(f"[INFO]: Found album '{local_album_path}'")

    # Collect list of local album files
    local_files_results = []
    local_album_images = album['images']
    for image_idx in local_album_images:
        image = images[image_idx]
        image_name = image['name']
        image_path = image['path']
        local_files_results.append(image_path)

    sorted(local_files_results)
    util.pprint(local_files_results)
    print(f"[INFO] Local files count '{len(local_files_results)}'")

    # Collect a list of images from google photos
    # Each element in this list will be an object:
    #     {'path': 'image path', 'caption': 'images caption...'}
    google_images_results = []
    gphoto.init()
    GoogleImages.load_images()
    google_image_cache = GoogleImages.cache()
    google_images = google_image_cache['list']
    for google_image in google_images:
        image_name = google_image['filename']
        if image_name.find(google_image_filter) < 0:
            continue
        image_desc = google_image['description']

        google_images_results.append((image_name, image_desc))

    google_images_results = sorted(google_images_results,
                                   key=lambda record: record[0])
    util.pprint(google_images_results)
    print(f"[INFO] Google files count '{len(google_images_results)}'")

    # Perform basic validations
    # If counts are not the same then error out
    if len(local_files_results) != len(google_images_results):
        print(
            f"[ERROR]: Count mismatch local: '{len(local_files_results)}', google: '{len(google_images_results)}'.  Aborting"
        )

    # Now loop through the list of folder images, get its
    # equivalent caption from the corresponding google image
    if list_only:
        return

    for image_idx, local_image_path in enumerate(local_files_results):
        desc = google_images_results[image_idx][1]

        # Get image extension and identify it as an image or video
        image_name = os.path.basename(local_image_path)
        image_ext = ImageUtils.get_file_extension(image_name)
        is_video = ImageUtils.is_ext_video(image_ext)

        # Set the caption now
        ImageUtils.set_caption(et, local_image_path, desc, is_video)
def main_with_exiftool(et, file_filter_pattern):
    """
    If date shot is missing in iPhone file then get it from
    the filename of the format is like: "2015-02-17 19.30.28.jpg"
    then update the dateshot from the filename
    """
    LocalLibrary.load_library('raw')

    result = {}

    # Walk through each file, split its file name for
    # comparison, and get date shot metadata
    cache = LocalLibrary.cache_raw()
    images = cache['images']
    albums = cache['albums']

    for image in images:
        image_name = image['name']
        image_path = image['path']
        image_ext = ImageUtils.get_file_extension(image_name)

        # if filter is specified and does not match to the file path
        # then ignore the file
        if file_filter_pattern and image_path.find(file_filter_pattern) < 0:
            continue

        if not os.path.exists(image_path):
            continue

        is_video = image_ext in gphoto.core.VIDEO_EXTENSIONS

        # If the file has dateshot then ignore it
        tag = None
        if not is_video:
            tag = et.get_Tag("Exif:DateTimeOriginal", image_path)
        else:
            tag = et.get_Tag("QuickTime:CreateDate", image_path)
        if tag is not None:
            continue

        # at this point dateshot is missing
        # parse the file and check for format as "2015-02-17 19.30.28.jpg"
        splits = image_name.split(' ')
        if len(splits) < 2:
            continue
        file_date = splits[0]
        file_time = splits[1]
        if file_date is None or file_time is None:
            continue

        file_date_splits = file_date.split('-')
        if len(file_date_splits) < 3:
            continue

        file_time_splits = file_time.split('.')
        if len(file_time_splits) < 4:
            continue

        dateshot = ':'.join(file_date_splits) + ' ' + ':'.join(
            file_time_splits[0:3])

        # cmd = "\"-" + ImageUtils._TagIPTCObjectName + '=' + dateshot + '"'
        # cmd += "\" -" + ImageUtils._TagIPTCCaptionAbstract + '=' + dateshot + '"'
        # cmd += "\" -" + ImageUtils._TagExifImageDescription + '=' + dateshot + '"'
        # cmd += "\" -" + ImageUtils._TagXmpDescription + '=' + dateshot + '"'

        # ret = subprocess.run(["exiftool", f"-EXIF:DateTimeOriginal={dateshot}", "-EXIF:CreateDate={dateshot}", "-overwrite_original", "-P", image_path])

        ret = None
        if not is_video:
            ret = subprocess.run([
                "exiftool", f"-EXIF:DateTimeOriginal={dateshot}",
                "-overwrite_original", image_path
            ])
            print(f"Image Date Set: {image_path}")
        else:
            ret = subprocess.run([
                "exiftool", f"-QuickTime:CreateDate={dateshot}",
                "-overwrite_original", "-ext", "mov", "-ext", "mp4", image_path
            ])
            print(f"Video Date Set: {image_path}")

        print(f"retcode: {ret.returncode}, {dateshot}, {image_path}")
def execute(file_filter_include, file_filter_exclude):

    LocalLibrary.load_library('raw')

    # Result format is:
    # {
    #     "acronym": [list of image paths],
    #         ...
    # }
    result = {}
    albums_seen = {}

    cache = LocalLibrary.cache_raw()
    albums = cache['albums']
    images = cache['images']

    for image in images:

        image_name = image['name']
        image_path = image['path']

        if file_filter_exclude and image_name.find(file_filter_exclude) > -1:
            continue
        if file_filter_include and image_path.find(file_filter_include) < 0:
            continue

        # Need to rerun local library caching
        if not os.path.exists(image_path):
            msg = "Local library not updated.  Please rerun download_local_library again"
            sys.exit(msg)

        # check if file name conforms to yyyymmdd_hhmmss_XXXX
        if len(image_name) < _FILEPREFIX_PATTERN_LEN:
            continue
        image_basename = os.path.splitext(image_name)[0]
        image_name_splits = image_basename.split('_')
        if len(image_name_splits) < 3:
            continue
        image_date = image_name_splits[0]
        image_time = image_name_splits[1]
        image_acronym = '_'.join(image_name_splits[2:])
        if len(image_date) < 8 or len(image_time) < 6:
            continue

        # Get parent album
        album_idx = image['parent']
        album = albums[album_idx]
        album_name = album['name']
        album_path = album['path']

        # if the combination of album name and acronym has already
        # been seen the ignore rest of the images in this album
        album_plus_acronym = album_name + '__' + image_acronym
        if album_plus_acronym in albums_seen:
            continue
        else:
            albums_seen[album_plus_acronym] = None

        # add image acronym and image_path to the result
        if image_acronym not in result:
            image_list = [image_path]
            result[image_acronym] = image_list
        else:
            image_list = result[image_acronym]
            image_list.append(image_path)

    # filter out acronyms where there are no duplicates
    final_result = {}
    for acronym in result.keys():

        image_list = result[acronym]
        if len(image_list) > 1:
            final_result[acronym] = image_list

    saveto_filename = "test_dup_file_acronym"
    if file_filter_include is not None:
        saveto_filename += '_' + file_filter_include

    saveto_filename += '.json'
    saveto = os.path.join(gphoto.cache_dir(), saveto_filename)
    print(f"Saving to: '{saveto}'")

    with open(saveto, "w") as cache_file:
        json.dump(final_result, cache_file, indent=2)
示例#8
0
def check_album_readiness(et, album_path_filter_year, file_filter_include,
                          file_filter_exclude, test_missing_date_shot,
                          test_bad_date_shot, test_filename_FMT,
                          test_Tag_mismatch, test_missing_caption,
                          test_unique_caption, test_missing_caption_year,
                          test_missing_geotags):
    """
    Images should follow the format:
    YYYYMMMDD_HHmmSS....

    If it does not follow this format then that is and
    indication that the file name does not match date shot

    The result is of the form
    {
        "album_path": {
            "reason value": [list of image paths],
                ...
        },
            ...
    }
    """
    print(f"-------------------- args --------------------------")
    print(f"album_path_filter_pattern = {album_path_filter_year}")
    print(f"file_filter_include = {file_filter_include}")
    print(f"file_filter_exclude = {file_filter_exclude}")
    print(f"test_missing_date_shot = {test_missing_date_shot}")
    print(f"test_bad_date_shot = {test_bad_date_shot}")
    print(f"test_filename_FMT = {test_filename_FMT}")
    print(f"test_Tag_mismatch = {test_Tag_mismatch}")
    print(f"test_missing_caption = {test_missing_caption}")
    print(f"test_unique_caption = {test_unique_caption}")
    print(f"test_missing_caption_year = {test_missing_caption_year}")
    print(f"test_missing_geotags = {test_missing_geotags}")
    print(f"----------------------------------------------------")

    unique_caption_reason = "non-unique-captions"
    mismatch_album_image_caption_reason = "mismatch-album-image-captions"
    missing_geotags_reason = "missing-geotags"

    LocalLibrary.load_library('raw')

    result = {}

    album_path_filter_pattern = f"\\{album_path_filter_year}\\"
    print(f"album_path_filter_pattern = {album_path_filter_pattern}")

    # Walk through each file, split its file name for
    # comparison, and get date shot metadata
    cache = LocalLibrary.cache_raw()
    images = cache.get('images')
    albums = cache.get('albums')

    for album in albums:

        album_name = album['name']
        album_path = album['path']

        if album_path_filter_pattern and album_path.find(
                album_path_filter_pattern) < 0:
            continue

        album_splits = album_name.split(' ')
        album_year = album_splits[0].split('-')[0]
        album_caption = album_year + ' ' + ' '.join(album_splits[1:])
        # print(f"album_caption = {album_caption}")

        # Album level results captured here
        # Duplicate captions table.  Every caption of images
        # is hashed here
        unique_caption_dict = {}

        album_images = album['images']
        for image_idx in album_images:
            image = images[image_idx]
            image_name = image['name']
            image_path = image['path']

            if file_filter_exclude and image_name.find(
                    file_filter_exclude) > -1:
                continue
            if file_filter_include and image_path.find(
                    file_filter_include) < 0:
                continue

            image_ext = ImageUtils.get_file_extension(image_name)
            is_video = ImageUtils.is_ext_video(image_ext)

            # Need to rerun local library caching
            if not os.path.exists(image_path):
                msg = "Local library not updated.  Please rerun download_local_library again"
                print(msg)
                sys.exit(msg)

            # Nothing is mismatched yet
            # Each test returns a result as tuple with 3 values:
            #   ("name of the test", True|False if test failed, "extra info")
            mismatched = False
            test_results = []

            # if image date shot does not match images name
            # then add it to the mismatched list.  For PNG use PNG:CreationTime
            tag = None
            if test_missing_date_shot:
                tag = et.get_tag("Exif:DateTimeOriginal", image_path)
                if tag is None or len(tag) <= 0:
                    tag = et.get_tag("Exif:CreateDate", image_path)
                    if tag is None or len(tag) <= 0:
                        tag = et.get_tag("QuickTime:CreateDate", image_path)
                        if tag is None or len(tag) <= 0:
                            mismatched = True
                            test_results.append("missing-date-shot")

            tagsplit = None
            if test_missing_date_shot and test_bad_date_shot and not mismatched:
                tagsplit = tag.split(' ')
                if len(tagsplit) < 2:
                    mismatched = True
                    test_results.append(("bad-date-shot", tag))

            # If image does not follow correct pattern
            # Then add it to album list
            mismatched_filename_format = False
            if test_filename_FMT:
                if len(image_name) < _IMAGE_PATTERN_LEN:
                    mismatched = True
                    mismatched_filename_format = True
                    test_results.append("filename-FMT")

            filedatetime = None
            if test_filename_FMT and not mismatched_filename_format:
                filedatetime = image_name.split('_')
                if len(filedatetime) < 2:
                    mismatched = True
                    mismatched_filename_format = True
                    test_results.append("filename-FMT")

            if test_Tag_mismatch and not mismatched_filename_format:
                file_date = filedatetime[0]
                file_time = filedatetime[1][0:3]
                tag_date = ''.join(tagsplit[0].split(':'))
                tag_time = ''.join(tagsplit[1].split(':'))[0:3]

                if tag_date != file_date or tag_time != file_time:
                    mismatched = True
                    test_results.append(("tag-mismatch", tag))

            # Check missing Caption: check if any of the tags have any value
            caption = None
            if test_missing_caption:
                caption = ImageUtils.get_caption(et, image_path, is_video)
                if caption is None or len(caption) <= 0:
                    mismatched = True
                    test_results.append("missing-caption")
                elif test_unique_caption:
                    year = None
                    if len(caption) > 4:
                        year = caption[0:4]
                        if not year.isdecimal():
                            unique_caption_dict[caption] = None

            # Check missing Caption year
            if test_missing_caption_year and caption is not None:
                if not test_missing_caption:
                    caption = ImageUtils.get_caption(et, image_path, is_video)
                if not test_missing_caption and (caption is None
                                                 or len(caption) <= 0):
                    mismatched = True
                    test_results.append("missing-caption")
                elif not test_missing_caption and len(caption) < 5:
                    mismatched = True
                    test_results.append("missing-caption")
                else:
                    caption_year = caption[0:4]
                    if not caption_year.isdecimal():
                        mismatched = True
                        test_results.append(("missing-caption-year", caption))

            # If caption has full date then report it
            if caption is not None and len(caption) > 11:
                caption_year = caption[0:4]
                first_dash = caption[4]
                second_dash = caption[7]
                if caption_year.isdecimal(
                ) and first_dash == '-' and second_dash == '-':
                    mismatched = True
                    test_results.append(("full-date-prefix", caption))

            # If caption different from album then report it
            if caption is not None and caption != album_caption:
                unique_caption_dict[caption] = None

            # Test missing geotags
            if test_missing_geotags and not is_video:
                geotags = None
                try:
                    geotags = et.get_tags([
                        "GPSLatitude", "GPSLongitude", "GPSLatitudeRef",
                        "GPSLongitudeRef"
                    ], image_path)
                except Exception as e:
                    geotags = None

                if geotags is None or len(geotags) < 4:
                    mismatched = True
                    test_results.append(missing_geotags_reason)

            if mismatched:
                for test_result in test_results:
                    mismatch_reason = None
                    mismatch_desc = None
                    if type(test_result) is not tuple:
                        mismatch_reason = test_result
                    else:
                        mismatch_reason = test_result[0]
                        mismatch_desc = test_result[1]

                    reason_result = None
                    if mismatch_reason not in result:
                        reason_result = {}
                        result[mismatch_reason] = reason_result
                    else:
                        reason_result = result[mismatch_reason]

                    album_result = None
                    if album_path not in reason_result:
                        album_result = []
                        reason_result[album_path] = album_result
                    else:
                        album_result = reason_result[album_path]

                    if type(test_result) is not tuple:
                        album_result.append(image_path)
                    else:
                        album_result.append((mismatch_desc, image_path))

        # add duplicate caption results
        if len(unique_caption_dict) > 1:
            unique_caption_result = None
            if unique_caption_reason not in result:
                unique_caption_result = {}
                result[unique_caption_reason] = unique_caption_result
            else:
                unique_caption_result = result[unique_caption_reason]

            unique_caption_result[album_path] = list(
                unique_caption_dict.keys())

        # If caption is same for all images but diff from album then report it
        if len(unique_caption_dict) == 1:
            image_caption = str(next(iter(unique_caption_dict)))

            # Strip the month and day from the album name
            splits = album_name.split(' ')
            album_date = splits[0]
            album_desc = splits[1:]
            album_year = album_date[0:4]
            album_caption = album_year + ' ' + ' '.join(album_desc)

            if album_caption != image_caption:
                mismatch_album_image_caption_result = None
                if mismatch_album_image_caption_reason not in result:
                    mismatch_album_image_caption_result = {}
                    result[
                        mismatch_album_image_caption_reason] = mismatch_album_image_caption_result
                else:
                    mismatch_album_image_caption_result = result[
                        mismatch_album_image_caption_reason]

                mismatch_album_image_caption_result[album_path] = {
                    'album_caption': album_caption,
                    'image_caption': image_caption
                }

    saveto_filename = "check_album_readiness"
    if album_path_filter_year:
        saveto_filename += '_d' + album_path_filter_year
    if file_filter_include is not None:
        saveto_filename += '_' + file_filter_include

    if test_missing_date_shot or test_bad_date_shot:
        saveto_filename += "_dtshot"
    if test_filename_FMT:
        saveto_filename += "_ffmt"
    if test_Tag_mismatch:
        saveto_filename += "_Tagmm"
    if test_missing_caption:
        saveto_filename += "_miscap"
    if test_unique_caption:
        saveto_filename += "_dupcap"

    saveto_filename += '.json'
    saveto = os.path.join(gphoto.cache_dir(), saveto_filename)
    print(f"Saving to: '{saveto}'")

    with open(saveto, "w") as cache_file:
        json.dump(result, cache_file, indent=2)