class Archive: dynamodb = session.resource('dynamodb', region_name='ap-northeast-2') editableArchiveTable = dynamodb.Table('editableArchive') captionTable = dynamodb.Table('caption') def getArchive(self, id): print(f'getArchive {id}') archive = self.editableArchiveTable.get_item(Key={'id': id})['Item'] captionIds = sorted(archive['items']) captions = [] archive['items'] = [] # TODO : Need to optimize by requesting with list for captionId in captionIds: caption = self.captionTable.get_item(Key={'id': captionId})['Item'] isNoSubtitle = archive['noSubtitle'] if isNoSubtitle: caption['url'] = caption['noSubtitleUrl'] captions.append(caption) archive['items'] = captions print(f'archive from dynamo : {archive}') return archive def setArchive(self, id, title, thumbnailUrl, items, noSubtitle): print(f'setArchive {id} {title} {thumbnailUrl} {items}') self.editableArchiveTable.put_item( Item={ 'id': id, 'title': title, 'thumbnailUrl': thumbnailUrl, 'items': items, 'noSubtitle': noSubtitle }) return {'id': id}
class Capture: RESULT_DIR = "results" S3_BUCKET = "captube.captures" S3_PREFIX = "https://s3.ap-northeast-2.amazonaws.com/captube.captures/" dynamodb = session.resource('dynamodb', region_name='ap-northeast-2') archiveTable = dynamodb.Table('archive') captureItemTable = dynamodb.Table('captureItem') s3_client = session.client('s3') def capture(self, url, language, numberToCapture, startTimeStamp, endTimeStamp): print( f'capture, {url}, {language}, {numberToCapture}, {startTimeStamp}, {endTimeStamp}' ) id = str(uuid.uuid4()) try: video_info = self._executeCaptureScript(url, language, numberToCapture, startTimeStamp, endTimeStamp, id) captureItems = self._convertToCaptureItems(video_info, id) self._store(captureItems) finally: self._clearLocalTemporary(id) return captureItems def _executeCaptureScript(self, url, language, numberToCapture, startTimeStamp, endTimeStamp, name): print( f'execute capture script, {url}, {language}, {numberToCapture}, {startTimeStamp}, {endTimeStamp}, {name}' ) video_info = run.make_youtube_info(url, name, language) video_info.save_json() capture.capture_by_subs(video_info) print(f'video_info : {video_info}') return video_info def _convertToCaptureItems(self, captureItemsByScript, id): convretedItems = { "title": captureItemsByScript["title"], "thumbnailUrl": captureItemsByScript["thumbnail"], "id": id, "captureItems": [] } frame_infos = captureItemsByScript["frame_infos"] for frame_info in frame_infos: convretedItems["captureItems"].append({ "id": f'{convretedItems["id"]}_{frame_info["frame_num"]}', "url": frame_info["img_path"], "startTime": frame_info["time_info"], "endTime": frame_info["time_info"], "subtitle": frame_info["script"] }) return convretedItems def _store(self, convertedItems): print(f'store, {convertedItems}') urlAdjustedItems = self._storeImages(convertedItems) self._storeMetadata(urlAdjustedItems) return def _storeImages(self, convertedItems): try: for captureItem in convertedItems["captureItems"]: captureFilePath = captureItem["url"] captureFileName = f'{convertedItems["id"]}_{os.path.basename(captureItem["url"])}' captureItem["url"] = self._convertAsS3Url(captureFileName) self.s3_client.upload_file( captureFilePath, self.S3_BUCKET, captureFileName, ExtraArgs={'ContentType': 'image/jpeg'}) except Exception as e: # TODO : Need exception handling logic, such as removing failed item. raise e return convertedItems def _convertAsS3Url(self, fileName): return f'{self.S3_PREFIX}{fileName}' def _storeMetadata(self, urlAdjustedItems): try: response = self.archiveTable.put_item( Item={ 'id': urlAdjustedItems['id'], 'title': urlAdjustedItems['title'], 'thumbnailUrl': urlAdjustedItems['thumbnailUrl'] }) print(f'Succeed to store Archive {urlAdjustedItems["id"]}') print(json.dumps(response, indent=4)) for captureItem in urlAdjustedItems["captureItems"]: response = self.captureItemTable.put_item(Item=json.loads( json.dumps({ "id": captureItem["id"], "archiveId": urlAdjustedItems["id"], "startTime": captureItem["startTime"], "endTime": captureItem["endTime"], "subtitle": captureItem["subtitle"], "url": captureItem["url"] }), parse_float=Decimal)) print(f'Succeed to store captureItem {captureItem["id"]}') print(json.dumps(response, indent=4)) except Exception as e: # TODO : Need exception handling logic, such as removing failed item. raise e return def _clearLocalTemporary(self, id): shutil.rmtree(f'{self.RESULT_DIR}/{id}') return
class CaptureSaver: S3_BUCKET = "captube.captures" S3_PREFIX = "https://s3.ap-northeast-2.amazonaws.com/captube.captures/" dynamodb = session.resource('dynamodb', region_name='ap-northeast-2') videoTable = dynamodb.Table('video') captionTable = dynamodb.Table('caption') s3_client = session.client('s3') def save(self, captureInformation): print(f'save, {captureInformation}') self._storeVideoMetadata(captureInformation) self._storeImages(captureInformation) return def _storeImages(self, captureInformation): try: toSaveCaptures = self._getToSaveCaptures(captureInformation) for captureItem in toSaveCaptures: self.s3_client.upload_file(captureItem['localFilePath'], self.S3_BUCKET, captureItem['saveFileName'], ExtraArgs={ 'ContentType': 'image/jpeg' }) self.s3_client.upload_file(captureItem['localNoSubtitleFilePath'], self.S3_BUCKET, captureItem['noSubtitleSaveFileName'], ExtraArgs={ 'ContentType': 'image/jpeg' }) response = self.captionTable.put_item( Item=json.loads(json.dumps({ "id": captureItem["id"], "videoId": captureInformation["id"], "timeStamp": captureItem["timeStamp"], "subtitle": captureItem["subtitle"], "url": captureItem["url"], "noSubtitleUrl": captureItem["noSubtitleUrl"] }), parse_float=Decimal)) print(f'Succeed to store captureItem {captureItem["id"]}') print(json.dumps(response, indent=4)) except Exception as e: # TODO : Need exception handling logic, such as removing failed item. raise e def _getToSaveCaptures(self, captureInformation): result = [] videoId = captureInformation['id'] capturedItems = captureInformation['captureItems'] startTime = capturedItems[0]['timeStamp'] endTime = capturedItems[-1]['timeStamp'] if startTime > endTime: return result storedCaptions = self._getCaptions(videoId, startTime, endTime) for captureItem in capturedItems: exist = False for storedCaption in storedCaptions: if captureItem['id'] == storedCaption['id']: exist = True break if not exist: result.append(captureItem) return result def _getCaptions(self, videoId, startTime, endTime): captions = self.captionTable.scan( FilterExpression=Attr('videoId').eq(videoId) & Attr('timeStamp').gte(Decimal(startTime)) & Attr( 'timeStamp').lte( Decimal(endTime)))['Items'] print(f'captions from dynamo : {len(captions)} for {videoId} between {startTime} and {endTime}') return captions def _storeVideoMetadata(self, captureInformation): try: if self._needSaveVideoMetadata(captureInformation['id']): response = self.videoTable.put_item( Item={ 'id': captureInformation['id'], 'title': captureInformation['title'], 'thumbnailUrl': captureInformation['thumbnailUrl'] }) print(f'Succeed to store Archive {captureInformation["id"]}') print(json.dumps(response, indent=4)) except Exception as e: # TODO : Need exception handling logic, such as removing failed item. raise e def _needSaveVideoMetadata(self, id): return self._getVideo(id) == None def _getVideo(self, id): response = self.videoTable.get_item(Key={'id': id}) video = None if 'Item' in response: video = response['Item'] print(f'video from dynamo : {video} for {id}') return video
class Capture: RESULT_DIR = "results" S3_BUCKET = "captube.captures" S3_PREFIX = "https://s3.ap-northeast-2.amazonaws.com/captube.captures/" dynamodb = session.resource('dynamodb', region_name='ap-northeast-2') # TODO : Need DI youtubeIdParser = YoutubeIdParser() captureRunner = CaptureRunner() captureSaver = CaptureSaver() s3_client = session.client('s3') # youtubeObject is need, because we cannot inject core.youtube.youtube. # core.youtube.youtube constructor requires url as parameter _youtube = None def getAvailableLanguage(self, url): try: self._youtube = youtube(url) # FIXME: pytube.exceptions.VideoUnavailable: fTTGALaRZoc is unavailable caption = self._youtube.get_captions() except Exception as e: print(f'Exception occurred during get languages {e}') raise e return {"languages": self._youtube.get_available_langs(caption)} def capture(self, url, language, numberToCapture, startTimeStamp, endTimeStamp): print( f'capture, {url}, {language}, {numberToCapture}, {startTimeStamp}, {endTimeStamp}' ) id = str(f'{self.youtubeIdParser.parse(url)}_{language}') workingPath = str(uuid.uuid4()) try: videoInformation = self.captureRunner.capture( url, language, numberToCapture, startTimeStamp, endTimeStamp, workingPath) captureInformation = self._asCaptureInformation( videoInformation, id) self.captureSaver.save(captureInformation) except Exception as e: print(f'Exception occurred during capture {e}') raise e finally: self._clearLocalTemporary(workingPath) return captureInformation def _asCaptureInformation(self, captureResultByScript, id): result = { "title": captureResultByScript["title"], "thumbnailUrl": captureResultByScript["thumbnail"], "id": id, "captureItems": [] } frame_infos = captureResultByScript["frame_infos"] for frame_info in frame_infos: id = f'{result["id"]}_{frame_info["frame_num"]}' frameNumber = frame_info["frame_num"] path = frame_info["img_path"] noSubtitlePath = self._getNoSubtitleImagePath( frame_info["img_path"]) fileName = f'{result["id"]}_{os.path.basename(path)}' noSubtitleFileName = f'noSub_{result["id"]}_{os.path.basename(noSubtitlePath)}' url = self._convertAsS3Url(fileName) noSubtitleUrl = self._convertAsS3Url(noSubtitleFileName) result["captureItems"].append({ "id": id, "frameNumber": frameNumber, "url": url, "noSubtitleUrl": noSubtitleUrl, "localFilePath": path, "localNoSubtitleFilePath": noSubtitlePath, "saveFileName": fileName, "noSubtitleSaveFileName": noSubtitleFileName, # TODO : video information should provide time stamp # "timeStamp": frame_info["time_info"], "timeStamp": 0, "subtitle": frame_info["script"] }) return result def _convertAsS3Url(self, fileName): return f'{self.S3_PREFIX}{fileName}' def _getNoSubtitleImagePath(self, imagePath): return f'{os.path.dirname(imagePath)}/nosub/{os.path.basename(imagePath)}' def _clearLocalTemporary(self, id): shutil.rmtree(f'{self.RESULT_DIR}/{id}') return
from boto3.dynamodb.conditions import Attr from deprecated import deprecated from business import session dynamodb = session.resource('dynamodb', region_name='ap-northeast-2') archiveTable = dynamodb.Table('archive') captureItemTable = dynamodb.Table('captureItem') @deprecated class Archive: _DEFAULT_PAGE_SIZE = 25 def getPagedArchive(self, pageKey, pageSize): print(f'getPagedArchive, pageKey : {pageKey} pageSize :{pageSize}') limit = self._DEFAULT_PAGE_SIZE if pageSize is None else pageSize if pageKey is None: print(f'getPagedArchive - query only with limit {limit}') queryResult = archiveTable.scan(Limit=int(limit)) else: print(f'getPagedArchive - query with limit {limit} and ExclusiveStartKey {pageKey}') queryResult = archiveTable.scan( Limit=int(limit), ExclusiveStartKey=pageKey) print(f'paged archives from dynamo : {queryResult}') response = {'archives': queryResult['Items'], 'nextPageKey': queryResult['LastEvaluatedKey']} \ if 'LastEvaluatedKey' in queryResult else \ {'archives': queryResult['Items']}