def main(argv):
    db_file = None
    list_file = None
    img_path = None
    ext = ".jpg"
    if_check = False
    help_msg = "dataset_create_imagelist.py -i <lmdb> -p <image path> -o <list>\
--check\n\
-i <lmdb>           The input lmdb database file\n\
-o <list>           The output image list file\n\
-p <image path>     The path which store the downloaded images\n\
--check [optional]  Force to check if the jpg image can be loaded.\n\
                    Which will slow down the process. Default False"
    try:
        opts, args = getopt.getopt(argv, "hi:p:o:", ["check"])
    except getopt.GetoptError:
        print help_msg
        sys.exit(2)

    for opt, arg in opts:
        if opt == "-h":
            print help_msg
            sys.exit()
        elif opt == "-i":
            db_file = arg
        elif opt == "-o":
            list_file = arg
        elif opt == "-p":
            img_path = arg
        elif opt == "--check":
            if_check = True
        else:
            print help_msg
            sys.exit(2)
    # Check arguments
    if db_file is None or list_file is None or img_path is None:
        print help_msg
        sys.exit(2)

    # Check if the image path exists
    log.info("Check image path %s" % img_path)
    if os.path.exists(img_path) is False:
        log.fatal("Can not locate the image path %s" % img_path)
        sys.exit(2)
    # Create the text list file
    log.info("Open the image list file %s" % list_file)
    try:
        fp = open(list_file, "w")
    except IOError:
        log.fatal("Can not open %s for writing" % list_file)
        sys.exit(2)
    # open the lmdb file
    log.info("Open db file %s" % db_file)
    db = lt.open_db_ro(db_file)
    db_stat = db.stat()
    log.info("Total Entries: %d" % db_stat["entries"])
    bar = eb.EasyProgressBar()
    bar.set_end(db_stat["entries"])
    bar.start()
    counter = 0
    err_counter = 0
    # Iter the whole database
    with db.begin(write=False) as txn:
        with txn.cursor() as cur:
            for key, val in cur:
                counter += 1
                # Get the avaliable url to download photo
                try:
                    val_dic = yaml.load(val)
                    photo = myxml.parse_xml_to_etree(val_dic["photo"])
                    photo_id = photo.get("id")
                    focal_in35 = int(val_dic["focal_in35"])
                except:
                    err_counter += 1
                    continue
                # Filter our some error value
                if focal_in35 < 5 or focal_in35 > 200:
                    continue
                # Get the image full name
                if img_path[-1] == r"/":
                    img_name = img_path + photo_id + ext
                else:
                    img_name = img_path + r"/" + photo_id + ext
                img_name = os.path.abspath(img_name)
                # Check if the image exists
                if if_check:
                    # Load the image
                    try:
                        Image.open(img_name)
                    except:
                        err_counter += 1
                        continue
                else:
                    if os.path.exists(img_name) is False:
                        err_counter += 1
                        continue

                # Write the info to list file
                fp.writelines(img_name + " %d\n" % focal_in35)
                bar.update(counter)
    # Finish the loop
    db.close()
    fp.close()
    bar.finish()
    log.info("Finished. errors: %d" % err_counter)
Пример #2
0
def main(argv):
    db_file = None
    skip_num = None
    data_path = '../data'
    overwrite = False
    help_msg = 'download_image.py -i <lmdbfile> -o[optional] <datapath>\
--overwrite[optional] --skip <num>\n\
-i <lmdbfile>       The input lmdb file contains the exif of photos\n\
-o <datapath>       The path where to store the downloaded photos\n\
--overwrite         If set, overwrite the exists photos, default not\n\
--skip <num>        Skip the first XX photos'

    try:
        opts, args = getopt.getopt(argv, 'hi:o:', ['overwrite', 'skip='])
    except getopt.GetoptError:
        print help_msg
        sys.exit(2)

    for opt, arg in opts:
        if opt == '-h':
            print help_msg
            sys.exit()
        elif opt == '-i':
            db_file = arg
        elif opt == '-o':
            data_path = arg
        elif opt == '--overwrite':
            overwrite = True
        elif opt == '--skip':
            skip_num = int(arg)
        else:
            print help_msg
            sys.exit(2)

    if db_file is None:
        print help_msg
        sys.exit(2)

    # Try to open the database file
    db = lt.open_db_ro(db_file)
    if db is None:
        log.fatal('\033[0;31mCan not open %s\033[0m' % db_file)
        sys.exit(2)

    # Get the entries from the database
    entries = db.stat()['entries']
    # Entries counter
    counter = 0
    # Check the data path
    if not tb.check_path(data_path):
        log.info('Create new dir %s' % data_path)
    # Iter the data base
    if skip_num is not None:
        log.info('Skipping the first %d entries...' % skip_num)
    with db.begin(write=False) as txn:
        with txn.cursor() as cur:
            for key, val in cur:
                counter += 1
                if skip_num is not None and counter < skip_num:
                    continue
                # Parse the val into dict
                val_dic = yaml.load(val)
                # Get the avaliable url to download photo
                photo = myxml.parse_xml_to_etree(val_dic['photo'])
                url = tb.get_url(photo, val_dic['urls'], True)
                # Download the url and save image
                log.info('Download %s from %s [%d/%d]' %
                         (key, url, counter, entries))
                try:
                    tb.download_url_and_save(url, key, overwrite, data_path)
                except:
                    log.error('\033[0;31mFailed to download %s from %s\033[0m'
                              % (key, url))
                    continue

    db.close()
Пример #3
0
def main(argv):
    db_file = None
    helpmsg = 'analyze_lmdb_info.py -i <lmdbfile>'
    rst = {}
    vector = []

    try:
        opts, args = getopt.getopt(argv, 'hi:')
    except getopt.GetoptError:
        print helpmsg
        sys.exit(2)

    for opt, arg in opts:
        if opt == '-h':
            print helpmsg
            sys.exit()
        elif opt == '-i':
            db_file = arg

    # Open the lmdb file
    if db_file is None:
        print helpmsg
        sys.exit(2)

    log.info('Open the lmdb file %s' % db_file)
    db = lt.open_db_ro(db_file)
    db_stat = db.stat()
    log.info('Total Entries: %d' % db_stat['entries'])
    vector = np.zeros(db_stat['entries'])
    bar = eb.EasyProgressBar()
    bar.set_end(db_stat['entries'])
    bar.start()
    counter = 0
    err_counter = 0
    # Iter the the database
    with db.begin(write=False) as txn:
        with txn.cursor() as cur:
            for key, val in cur:
                counter += 1
                try:
                    val_dic = yaml.load(val)
                    focal_in35 = int(val_dic['focal_in35'])
                except:
                    err_counter += 1
                    continue
                # Filter our some error value
                if focal_in35 < 5 or focal_in35 > 200:
                    continue
                vector[counter-1] = focal_in35
                if focal_in35 in rst:
                    rst[focal_in35] += 1
                else:
                    rst[focal_in35] = 1
                bar.update(counter)
    db.close()
    bar.finish()
    vector = list(vector[vector != 0])
    log.info('Finished, errors: %d, The result:' % err_counter)
    print str(rst)
    # Draw the plot
    plt.hist(vector, bins=50)
    plt.xlabel('Focal')
    plt.ylabel('Photos')
    plt.title('Photo count in each focal')
    plt.show()
def main(argv):
    db_file = None
    list_file = None
    img_path = None
    ext = '.jpg'
    if_check = False
    help_msg = 'dataset_create_imagelist.py -i <lmdb> -p <image path> -o <list>\
--check\n\
-i <lmdb>           The input lmdb database file\n\
-o <list>           The output image list file\n\
-p <image path>     The path which store the downloaded images\n\
--check [optional]  Force to check if the jpg image can be loaded.\n\
                    Which will slow down the process. Default False'

    try:
        opts, args = getopt.getopt(argv, 'hi:p:o:', ['check'])
    except getopt.GetoptError:
        print help_msg
        sys.exit(2)

    for opt, arg in opts:
        if opt == '-h':
            print help_msg
            sys.exit()
        elif opt == '-i':
            db_file = arg
        elif opt == '-o':
            list_file = arg
        elif opt == '-p':
            img_path = arg
        elif opt == '--check':
            if_check = True
        else:
            print help_msg
            sys.exit(2)
    # Check arguments
    if db_file is None or list_file is None or img_path is None:
        print help_msg
        sys.exit(2)

    # Check if the image path exists
    log.info('Check image path %s' % img_path)
    if os.path.exists(img_path) is False:
        log.fatal('Can not locate the image path %s' % img_path)
        sys.exit(2)
    # Create the text list file
    log.info('Open the image list file %s' % list_file)
    try:
        fp = open(list_file, 'w')
    except IOError:
        log.fatal('Can not open %s for writing' % list_file)
        sys.exit(2)
    # open the lmdb file
    log.info('Open db file %s' % db_file)
    db = lt.open_db_ro(db_file)
    db_stat = db.stat()
    log.info('Total Entries: %d' % db_stat['entries'])
    bar = eb.EasyProgressBar()
    bar.set_end(db_stat['entries'])
    bar.start()
    counter = 0
    err_counter = 0
    # Iter the whole database
    with db.begin(write=False) as txn:
        with txn.cursor() as cur:
            for key, val in cur:
                counter += 1
                # Get the avaliable url to download photo
                try:
                    val_dic = yaml.load(val)
                    photo = myxml.parse_xml_to_etree(val_dic['photo'])
                    photo_id = photo.get('id')
                    focal_in35 = int(val_dic['focal_in35'])
                except:
                    err_counter += 1
                    continue
                # Filter our some error value
                if focal_in35 < 5 or focal_in35 > 200:
                    continue
                # Get the image full name
                if img_path[-1] == r'/':
                    img_name = img_path + photo_id + ext
                else:
                    img_name = img_path + r'/' + photo_id + ext
                img_name = os.path.abspath(img_name)
                # Check if the image exists
                if if_check:
                    # Load the image
                    try:
                        Image.open(img_name)
                    except:
                        err_counter += 1
                        continue
                else:
                    if os.path.exists(img_name) is False:
                        err_counter += 1
                        continue

                # Write the info to list file
                fp.writelines(img_name + ' %d\n' % focal_in35)
                bar.update(counter)
    # Finish the loop
    db.close()
    fp.close()
    bar.finish()
    log.info('Finished. errors: %d' % err_counter)
Пример #5
0
def main(argv):
    db_file = None
    skip_num = None
    data_path = '../data'
    overwrite = False
    help_msg = 'download_image.py -i <lmdbfile> -o[optional] <datapath>\
--overwrite[optional] --skip <num>\n\
-i <lmdbfile>       The input lmdb file contains the exif of photos\n\
-o <datapath>       The path where to store the downloaded photos\n\
--overwrite         If set, overwrite the exists photos, default not\n\
--skip <num>        Skip the first XX photos'

    try:
        opts, args = getopt.getopt(argv, 'hi:o:', ['overwrite', 'skip='])
    except getopt.GetoptError:
        print help_msg
        sys.exit(2)

    for opt, arg in opts:
        if opt == '-h':
            print help_msg
            sys.exit()
        elif opt == '-i':
            db_file = arg
        elif opt == '-o':
            data_path = arg
        elif opt == '--overwrite':
            overwrite = True
        elif opt == '--skip':
            skip_num = int(arg)
        else:
            print help_msg
            sys.exit(2)

    if db_file is None:
        print help_msg
        sys.exit(2)

    # Try to open the database file
    db = lt.open_db_ro(db_file)
    if db is None:
        log.fatal('\033[0;31mCan not open %s\033[0m' % db_file)
        sys.exit(2)

    # Get the entries from the database
    entries = db.stat()['entries']
    # Entries counter
    counter = 0
    # Check the data path
    if not tb.check_path(data_path):
        log.info('Create new dir %s' % data_path)
    # Iter the data base
    if skip_num is not None:
        log.info('Skipping the first %d entries...' % skip_num)
    with db.begin(write=False) as txn:
        with txn.cursor() as cur:
            for key, val in cur:
                counter += 1
                if skip_num is not None and counter < skip_num:
                    continue
                # Parse the val into dict
                val_dic = yaml.load(val)
                # Get the avaliable url to download photo
                photo = myxml.parse_xml_to_etree(val_dic['photo'])
                url = tb.get_url(photo, val_dic['urls'], True)
                # Download the url and save image
                log.info('Download %s from %s [%d/%d]' %
                         (key, url, counter, entries))
                try:
                    tb.download_url_and_save(url, key, overwrite, data_path)
                except:
                    log.error(
                        '\033[0;31mFailed to download %s from %s\033[0m' %
                        (key, url))
                    continue

    db.close()
Пример #6
0
def main(argv):
    db_file = None
    helpmsg = "analyze_lmdb_info.py -i <lmdbfile>"
    rst = {}
    vector = []

    try:
        opts, args = getopt.getopt(argv, "hi:")
    except getopt.GetoptError:
        print helpmsg
        sys.exit(2)

    for opt, arg in opts:
        if opt == "-h":
            print helpmsg
            sys.exit()
        elif opt == "-i":
            db_file = arg

    # Open the lmdb file
    if db_file is None:
        print helpmsg
        sys.exit(2)

    log.info("Open the lmdb file %s" % db_file)
    db = lt.open_db_ro(db_file)
    db_stat = db.stat()
    log.info("Total Entries: %d" % db_stat["entries"])
    vector = np.zeros(db_stat["entries"])
    bar = eb.EasyProgressBar()
    bar.set_end(db_stat["entries"])
    bar.start()
    counter = 0
    err_counter = 0
    # Iter the the database
    with db.begin(write=False) as txn:
        with txn.cursor() as cur:
            for key, val in cur:
                counter += 1
                try:
                    val_dic = yaml.load(val)
                    focal_in35 = int(val_dic["focal_in35"])
                except:
                    err_counter += 1
                    continue
                # Filter our some error value
                if focal_in35 < 5 or focal_in35 > 200:
                    continue
                vector[counter - 1] = focal_in35
                if focal_in35 in rst:
                    rst[focal_in35] += 1
                else:
                    rst[focal_in35] = 1
                bar.update(counter)
    db.close()
    bar.finish()
    vector = list(vector[vector != 0])
    log.info("Finished, errors: %d, The result:" % err_counter)
    print str(rst)
    # Draw the plot
    plt.hist(vector, bins=50)
    plt.xlabel("Focal")
    plt.ylabel("Photos")
    plt.title("Photo count in each focal")
    plt.show()