def main(argv): db_file = None list_file = None img_path = None ext = ".jpg" if_check = False help_msg = "dataset_create_imagelist.py -i <lmdb> -p <image path> -o <list>\ --check\n\ -i <lmdb> The input lmdb database file\n\ -o <list> The output image list file\n\ -p <image path> The path which store the downloaded images\n\ --check [optional] Force to check if the jpg image can be loaded.\n\ Which will slow down the process. Default False" try: opts, args = getopt.getopt(argv, "hi:p:o:", ["check"]) except getopt.GetoptError: print help_msg sys.exit(2) for opt, arg in opts: if opt == "-h": print help_msg sys.exit() elif opt == "-i": db_file = arg elif opt == "-o": list_file = arg elif opt == "-p": img_path = arg elif opt == "--check": if_check = True else: print help_msg sys.exit(2) # Check arguments if db_file is None or list_file is None or img_path is None: print help_msg sys.exit(2) # Check if the image path exists log.info("Check image path %s" % img_path) if os.path.exists(img_path) is False: log.fatal("Can not locate the image path %s" % img_path) sys.exit(2) # Create the text list file log.info("Open the image list file %s" % list_file) try: fp = open(list_file, "w") except IOError: log.fatal("Can not open %s for writing" % list_file) sys.exit(2) # open the lmdb file log.info("Open db file %s" % db_file) db = lt.open_db_ro(db_file) db_stat = db.stat() log.info("Total Entries: %d" % db_stat["entries"]) bar = eb.EasyProgressBar() bar.set_end(db_stat["entries"]) bar.start() counter = 0 err_counter = 0 # Iter the whole database with db.begin(write=False) as txn: with txn.cursor() as cur: for key, val in cur: counter += 1 # Get the avaliable url to download photo try: val_dic = yaml.load(val) photo = myxml.parse_xml_to_etree(val_dic["photo"]) photo_id = photo.get("id") focal_in35 = int(val_dic["focal_in35"]) except: err_counter += 1 continue # Filter our some error value if focal_in35 < 5 or focal_in35 > 200: continue # Get the image full name if img_path[-1] == r"/": img_name = img_path + photo_id + ext else: img_name = img_path + r"/" + photo_id + ext img_name = os.path.abspath(img_name) # Check if the image exists if if_check: # Load the image try: Image.open(img_name) except: err_counter += 1 continue else: if os.path.exists(img_name) is False: err_counter += 1 continue # Write the info to list file fp.writelines(img_name + " %d\n" % focal_in35) bar.update(counter) # Finish the loop db.close() fp.close() bar.finish() log.info("Finished. errors: %d" % err_counter)
def main(argv): db_file = None skip_num = None data_path = '../data' overwrite = False help_msg = 'download_image.py -i <lmdbfile> -o[optional] <datapath>\ --overwrite[optional] --skip <num>\n\ -i <lmdbfile> The input lmdb file contains the exif of photos\n\ -o <datapath> The path where to store the downloaded photos\n\ --overwrite If set, overwrite the exists photos, default not\n\ --skip <num> Skip the first XX photos' try: opts, args = getopt.getopt(argv, 'hi:o:', ['overwrite', 'skip=']) except getopt.GetoptError: print help_msg sys.exit(2) for opt, arg in opts: if opt == '-h': print help_msg sys.exit() elif opt == '-i': db_file = arg elif opt == '-o': data_path = arg elif opt == '--overwrite': overwrite = True elif opt == '--skip': skip_num = int(arg) else: print help_msg sys.exit(2) if db_file is None: print help_msg sys.exit(2) # Try to open the database file db = lt.open_db_ro(db_file) if db is None: log.fatal('\033[0;31mCan not open %s\033[0m' % db_file) sys.exit(2) # Get the entries from the database entries = db.stat()['entries'] # Entries counter counter = 0 # Check the data path if not tb.check_path(data_path): log.info('Create new dir %s' % data_path) # Iter the data base if skip_num is not None: log.info('Skipping the first %d entries...' % skip_num) with db.begin(write=False) as txn: with txn.cursor() as cur: for key, val in cur: counter += 1 if skip_num is not None and counter < skip_num: continue # Parse the val into dict val_dic = yaml.load(val) # Get the avaliable url to download photo photo = myxml.parse_xml_to_etree(val_dic['photo']) url = tb.get_url(photo, val_dic['urls'], True) # Download the url and save image log.info('Download %s from %s [%d/%d]' % (key, url, counter, entries)) try: tb.download_url_and_save(url, key, overwrite, data_path) except: log.error('\033[0;31mFailed to download %s from %s\033[0m' % (key, url)) continue db.close()
def main(argv): db_file = None helpmsg = 'analyze_lmdb_info.py -i <lmdbfile>' rst = {} vector = [] try: opts, args = getopt.getopt(argv, 'hi:') except getopt.GetoptError: print helpmsg sys.exit(2) for opt, arg in opts: if opt == '-h': print helpmsg sys.exit() elif opt == '-i': db_file = arg # Open the lmdb file if db_file is None: print helpmsg sys.exit(2) log.info('Open the lmdb file %s' % db_file) db = lt.open_db_ro(db_file) db_stat = db.stat() log.info('Total Entries: %d' % db_stat['entries']) vector = np.zeros(db_stat['entries']) bar = eb.EasyProgressBar() bar.set_end(db_stat['entries']) bar.start() counter = 0 err_counter = 0 # Iter the the database with db.begin(write=False) as txn: with txn.cursor() as cur: for key, val in cur: counter += 1 try: val_dic = yaml.load(val) focal_in35 = int(val_dic['focal_in35']) except: err_counter += 1 continue # Filter our some error value if focal_in35 < 5 or focal_in35 > 200: continue vector[counter-1] = focal_in35 if focal_in35 in rst: rst[focal_in35] += 1 else: rst[focal_in35] = 1 bar.update(counter) db.close() bar.finish() vector = list(vector[vector != 0]) log.info('Finished, errors: %d, The result:' % err_counter) print str(rst) # Draw the plot plt.hist(vector, bins=50) plt.xlabel('Focal') plt.ylabel('Photos') plt.title('Photo count in each focal') plt.show()
def main(argv): db_file = None list_file = None img_path = None ext = '.jpg' if_check = False help_msg = 'dataset_create_imagelist.py -i <lmdb> -p <image path> -o <list>\ --check\n\ -i <lmdb> The input lmdb database file\n\ -o <list> The output image list file\n\ -p <image path> The path which store the downloaded images\n\ --check [optional] Force to check if the jpg image can be loaded.\n\ Which will slow down the process. Default False' try: opts, args = getopt.getopt(argv, 'hi:p:o:', ['check']) except getopt.GetoptError: print help_msg sys.exit(2) for opt, arg in opts: if opt == '-h': print help_msg sys.exit() elif opt == '-i': db_file = arg elif opt == '-o': list_file = arg elif opt == '-p': img_path = arg elif opt == '--check': if_check = True else: print help_msg sys.exit(2) # Check arguments if db_file is None or list_file is None or img_path is None: print help_msg sys.exit(2) # Check if the image path exists log.info('Check image path %s' % img_path) if os.path.exists(img_path) is False: log.fatal('Can not locate the image path %s' % img_path) sys.exit(2) # Create the text list file log.info('Open the image list file %s' % list_file) try: fp = open(list_file, 'w') except IOError: log.fatal('Can not open %s for writing' % list_file) sys.exit(2) # open the lmdb file log.info('Open db file %s' % db_file) db = lt.open_db_ro(db_file) db_stat = db.stat() log.info('Total Entries: %d' % db_stat['entries']) bar = eb.EasyProgressBar() bar.set_end(db_stat['entries']) bar.start() counter = 0 err_counter = 0 # Iter the whole database with db.begin(write=False) as txn: with txn.cursor() as cur: for key, val in cur: counter += 1 # Get the avaliable url to download photo try: val_dic = yaml.load(val) photo = myxml.parse_xml_to_etree(val_dic['photo']) photo_id = photo.get('id') focal_in35 = int(val_dic['focal_in35']) except: err_counter += 1 continue # Filter our some error value if focal_in35 < 5 or focal_in35 > 200: continue # Get the image full name if img_path[-1] == r'/': img_name = img_path + photo_id + ext else: img_name = img_path + r'/' + photo_id + ext img_name = os.path.abspath(img_name) # Check if the image exists if if_check: # Load the image try: Image.open(img_name) except: err_counter += 1 continue else: if os.path.exists(img_name) is False: err_counter += 1 continue # Write the info to list file fp.writelines(img_name + ' %d\n' % focal_in35) bar.update(counter) # Finish the loop db.close() fp.close() bar.finish() log.info('Finished. errors: %d' % err_counter)
def main(argv): db_file = None skip_num = None data_path = '../data' overwrite = False help_msg = 'download_image.py -i <lmdbfile> -o[optional] <datapath>\ --overwrite[optional] --skip <num>\n\ -i <lmdbfile> The input lmdb file contains the exif of photos\n\ -o <datapath> The path where to store the downloaded photos\n\ --overwrite If set, overwrite the exists photos, default not\n\ --skip <num> Skip the first XX photos' try: opts, args = getopt.getopt(argv, 'hi:o:', ['overwrite', 'skip=']) except getopt.GetoptError: print help_msg sys.exit(2) for opt, arg in opts: if opt == '-h': print help_msg sys.exit() elif opt == '-i': db_file = arg elif opt == '-o': data_path = arg elif opt == '--overwrite': overwrite = True elif opt == '--skip': skip_num = int(arg) else: print help_msg sys.exit(2) if db_file is None: print help_msg sys.exit(2) # Try to open the database file db = lt.open_db_ro(db_file) if db is None: log.fatal('\033[0;31mCan not open %s\033[0m' % db_file) sys.exit(2) # Get the entries from the database entries = db.stat()['entries'] # Entries counter counter = 0 # Check the data path if not tb.check_path(data_path): log.info('Create new dir %s' % data_path) # Iter the data base if skip_num is not None: log.info('Skipping the first %d entries...' % skip_num) with db.begin(write=False) as txn: with txn.cursor() as cur: for key, val in cur: counter += 1 if skip_num is not None and counter < skip_num: continue # Parse the val into dict val_dic = yaml.load(val) # Get the avaliable url to download photo photo = myxml.parse_xml_to_etree(val_dic['photo']) url = tb.get_url(photo, val_dic['urls'], True) # Download the url and save image log.info('Download %s from %s [%d/%d]' % (key, url, counter, entries)) try: tb.download_url_and_save(url, key, overwrite, data_path) except: log.error( '\033[0;31mFailed to download %s from %s\033[0m' % (key, url)) continue db.close()
def main(argv): db_file = None helpmsg = "analyze_lmdb_info.py -i <lmdbfile>" rst = {} vector = [] try: opts, args = getopt.getopt(argv, "hi:") except getopt.GetoptError: print helpmsg sys.exit(2) for opt, arg in opts: if opt == "-h": print helpmsg sys.exit() elif opt == "-i": db_file = arg # Open the lmdb file if db_file is None: print helpmsg sys.exit(2) log.info("Open the lmdb file %s" % db_file) db = lt.open_db_ro(db_file) db_stat = db.stat() log.info("Total Entries: %d" % db_stat["entries"]) vector = np.zeros(db_stat["entries"]) bar = eb.EasyProgressBar() bar.set_end(db_stat["entries"]) bar.start() counter = 0 err_counter = 0 # Iter the the database with db.begin(write=False) as txn: with txn.cursor() as cur: for key, val in cur: counter += 1 try: val_dic = yaml.load(val) focal_in35 = int(val_dic["focal_in35"]) except: err_counter += 1 continue # Filter our some error value if focal_in35 < 5 or focal_in35 > 200: continue vector[counter - 1] = focal_in35 if focal_in35 in rst: rst[focal_in35] += 1 else: rst[focal_in35] = 1 bar.update(counter) db.close() bar.finish() vector = list(vector[vector != 0]) log.info("Finished, errors: %d, The result:" % err_counter) print str(rst) # Draw the plot plt.hist(vector, bins=50) plt.xlabel("Focal") plt.ylabel("Photos") plt.title("Photo count in each focal") plt.show()