def sync_structure(self): media_data = find_media_files() self.media_data = sync_files_with_db(media_data) if not self.media_data.get('albums'): logger.info("No new photos to upload") return root_node_uri = self.api.get_root_node() # Create new folders for folder_path, files_bundle in self.media_data['albums'].items(): folders = folder_path.strip('/').split('/') prev_node_uri = root_node_uri for cnt, f in enumerate(folders, start=1): if cnt == len(folders): nt = 'Album' else: nt = 'Folder' current_node_uri = self.api.get_or_create_node( prev_node_uri, f, node_type=nt ) if cnt == len(folders): files_bundle['album_uri'] = current_node_uri prev_node_uri = current_node_uri # Process children albums (get uri or create) for folder_path, files_bundle in self.media_data['albums'].items(): with db.atomic(): photos_insert_to_db = [] for f in files_bundle['files']: photos_insert_to_db.append( { 'local_path': f, 'local_md5': get_md5(f), 'status': 'pending' } ) if photos_insert_to_db: logger.info("\tInserting to DB: %d", len(photos_insert_to_db)) for photos_insert_to_db_chunk in chunks( photos_insert_to_db, 300): Photo.insert_many(photos_insert_to_db_chunk).execute() for files_chunk in chunks(files_bundle['files'], 300): Photo.update( ext_album_key=files_bundle['album_uri'] ).where( (Photo.local_path << files_chunk) ).execute()
def update_keywords(): sm_api = SmugmugAPI() def worker(): logger.info('[Worker started]') while True: item = q.get() try: sm_api.update_image_keywords(*item) finally: q.task_done() q = JoinableQueue(maxsize=100) for i in range(50): gevent.spawn(worker) photos = (Photo.select(Photo.local_path, Photo.ext_key).where( (Photo.status == 'uploaded'))) photos = list(photos) print("Total photos to update:", len(photos)) cnt = 0 for p in photos: cnt += 1 print(cnt) keywords = get_keywords(p.local_path) q.put((p.ext_key, keywords)) q.join()
def upload_photos_in_pending(with_failed=True): q_filter = ['pending'] if with_failed: q_filter.append('failed') photos = (Photo.select(Photo.local_path, Photo.ext_album_key).where( (Photo.status << q_filter))) photos = list(photos) def worker(): logger.info('[New worker started]') while True: item = q.get() try: upload_photo(item) finally: q.task_done() q = JoinableQueue(maxsize=10) for i in range(UPLOADING_WORKERS_COUNT): gevent.spawn(worker) for p in photos: q.put((p.local_path, p.ext_album_key)) q.join()
def clean_hidden_files_from_db(): photos_in_db = Photo.select(Photo.local_path) logger.info("Total photos in DB: %d", photos_in_db.count()) hidden_files_to_remove = [] for p in photos_in_db: file_name = os.path.basename(p.local_path) if file_name.startswith('.'): logger.info("Hidden file found: %s", file_name) hidden_files_to_remove.append(p.local_path) logger.info("Total hidden files count: %d", len(hidden_files_to_remove)) if hidden_files_to_remove: with db.atomic(): for files_chunk in chunks(hidden_files_to_remove, 300): _removed_cnt = Photo.delete().where( (Photo.local_path << files_chunk) ).execute() logger.info("Removed: %d", _removed_cnt)
def show_stat_by_md5(self): md5_success_count = 0 md5_failed_count = 0 md5_not_found = 0 for photos_chunk in self.api.get_remote_images(): logger.info("[INFO] Total photos in API response: %d", len(photos_chunk)) for p in photos_chunk: p_md5 = p.get('ArchivedMD5') if not p_md5: md5_failed_count += 1 logger.info("[ERROR] ArchivedMD5 is NULL") continue p_db = Photo.select().where(Photo.local_md5 == p_md5) if not p_db: logger.info("[ERROR] MD5 %s not found in DB for file: %s", p_md5, p.get('FileName')) md5_not_found += 1 continue else: p_db = p_db[0] if p.get('FileName') == os.path.basename(p_db.local_path): p_db.ext_md5 = p_md5 p_db.save() md5_success_count += 1 else: logger.info("[ERROR] MD5 not equal. local: %s remote: %s", p_db.local_path, p.get('FileName')) md5_failed_count += 1 logger.info( "\n\nSuccess: %d\nFailed: %d\nNot found: %d\nTotal: %d", md5_success_count, md5_failed_count, md5_not_found, md5_failed_count + md5_not_found + md5_success_count)
def sync_files_with_db(files_tree): photos_to_upload = {root_path: {} for root_path in files_tree.keys()} total_new_photos = 0 photos = set() for root_path, folders in files_tree.items(): for folder, files_bundle in folders.items(): files = files_bundle['files'] logger.info('Album: %s', folder) logger.info("\tTotal photos: %d", len(files)) for paths_chunk in chunks(files, 300): _photos = (Photo.select(Photo.local_path).where( (Photo.local_path << paths_chunk))) photos.update(set(_photos)) db_photos = {_p.local_path for _p in photos} logger.info("\tPhotos exist in DB: %d", len(db_photos)) local_photos = set(files) new_photos = local_photos - db_photos if new_photos: photos_to_upload[root_path][folder] = { 'files': list(new_photos), 'album_uri': None, } total_new_photos += len(new_photos) else: logger.info( "All photos already exist in DB. Upload skipped\n\n") del files_tree if total_new_photos: logger.info("Total new photos to upload: %d", total_new_photos) return photos_to_upload
def upload_photo(photo_item): api = SmugmugAPI() attempts = 5 img_path, album_uri = photo_item while attempts: try: file_name = os.path.basename(img_path) headers = { 'User-Agent': 'Safari', 'X-Smug-ResponseType': 'JSON', 'X-Smug-Version': 'v2', 'Content-Type': guess_type(file_name)[0], 'X-Smug-AlbumUri': album_uri, 'X-Smug-FileName': file_name, 'Content-Length': str(path.getsize(img_path)), 'Content-MD5': get_md5(img_path), 'X-Smug-Keywords': get_keywords(img_path), } if DEBUG: logger.debug(["Uploading:", img_path, 'to:', album_uri]) with open(img_path, "rb") as f: data = f.read() response = api.r.post('http://upload.smugmug.com/', headers=headers, data=data, header_auth=True) r = json.loads(response.content) or {} del data if r.get('stat') == 'ok': logger.info('\t\tPhoto uploaded: %s', file_name) field_data = { "status": "uploaded", "ext_key": r.get('Image').get('ImageUri'), "ext_album_key": album_uri, } with db.atomic(): Photo.update(**field_data).where( (Photo.local_path == img_path)).execute() break else: logger.exception( "Something goes wrong while uploading image") try: log = "\n".join([ str(response.content), str(headers), str(response.headers), str(response.status_code), str(response.reason) ]) except Exception: logger.exception( "Something goes wrong while uploading image") log = None field_data = {"status": "failed", "log": log} with db.atomic(): Photo.update(**field_data).where( (Photo.local_path == img_path)).execute() raise Exception("stat is not OK") except Exception: sleep(5) attempts -= 1 field_data = { "status": "failed", } with db.atomic(): Photo.update(**field_data).where( (Photo.local_path == img_path)).execute()
def get_photos_without_md5(): photos = Photo.select().where(Photo.local_md5 == None) logger.info("Total photos without MD5: %d", len(photos)) return photos