示例#1
0
class Archive:
    dynamodb = session.resource('dynamodb', region_name='ap-northeast-2')
    editableArchiveTable = dynamodb.Table('editableArchive')
    captionTable = dynamodb.Table('caption')

    def getArchive(self, id):
        print(f'getArchive {id}')

        archive = self.editableArchiveTable.get_item(Key={'id': id})['Item']
        captionIds = sorted(archive['items'])
        captions = []
        archive['items'] = []

        # TODO : Need to optimize by requesting with list
        for captionId in captionIds:
            caption = self.captionTable.get_item(Key={'id': captionId})['Item']

            isNoSubtitle = archive['noSubtitle']

            if isNoSubtitle:
                caption['url'] = caption['noSubtitleUrl']

            captions.append(caption)

        archive['items'] = captions

        print(f'archive from dynamo : {archive}')

        return archive

    def setArchive(self, id, title, thumbnailUrl, items, noSubtitle):
        print(f'setArchive {id} {title} {thumbnailUrl} {items}')

        self.editableArchiveTable.put_item(
            Item={
                'id': id,
                'title': title,
                'thumbnailUrl': thumbnailUrl,
                'items': items,
                'noSubtitle': noSubtitle
            })

        return {'id': id}
示例#2
0
class Capture:
    RESULT_DIR = "results"
    S3_BUCKET = "captube.captures"
    S3_PREFIX = "https://s3.ap-northeast-2.amazonaws.com/captube.captures/"
    dynamodb = session.resource('dynamodb', region_name='ap-northeast-2')
    archiveTable = dynamodb.Table('archive')
    captureItemTable = dynamodb.Table('captureItem')

    s3_client = session.client('s3')

    def capture(self, url, language, numberToCapture, startTimeStamp,
                endTimeStamp):
        print(
            f'capture, {url}, {language}, {numberToCapture}, {startTimeStamp}, {endTimeStamp}'
        )
        id = str(uuid.uuid4())

        try:
            video_info = self._executeCaptureScript(url, language,
                                                    numberToCapture,
                                                    startTimeStamp,
                                                    endTimeStamp, id)
            captureItems = self._convertToCaptureItems(video_info, id)
            self._store(captureItems)
        finally:
            self._clearLocalTemporary(id)

        return captureItems

    def _executeCaptureScript(self, url, language, numberToCapture,
                              startTimeStamp, endTimeStamp, name):
        print(
            f'execute capture script, {url}, {language}, {numberToCapture}, {startTimeStamp}, {endTimeStamp}, {name}'
        )
        video_info = run.make_youtube_info(url, name, language)
        video_info.save_json()
        capture.capture_by_subs(video_info)
        print(f'video_info : {video_info}')
        return video_info

    def _convertToCaptureItems(self, captureItemsByScript, id):
        convretedItems = {
            "title": captureItemsByScript["title"],
            "thumbnailUrl": captureItemsByScript["thumbnail"],
            "id": id,
            "captureItems": []
        }

        frame_infos = captureItemsByScript["frame_infos"]
        for frame_info in frame_infos:
            convretedItems["captureItems"].append({
                "id":
                f'{convretedItems["id"]}_{frame_info["frame_num"]}',
                "url":
                frame_info["img_path"],
                "startTime":
                frame_info["time_info"],
                "endTime":
                frame_info["time_info"],
                "subtitle":
                frame_info["script"]
            })

        return convretedItems

    def _store(self, convertedItems):
        print(f'store, {convertedItems}')
        urlAdjustedItems = self._storeImages(convertedItems)
        self._storeMetadata(urlAdjustedItems)
        return

    def _storeImages(self, convertedItems):
        try:
            for captureItem in convertedItems["captureItems"]:
                captureFilePath = captureItem["url"]
                captureFileName = f'{convertedItems["id"]}_{os.path.basename(captureItem["url"])}'
                captureItem["url"] = self._convertAsS3Url(captureFileName)
                self.s3_client.upload_file(
                    captureFilePath,
                    self.S3_BUCKET,
                    captureFileName,
                    ExtraArgs={'ContentType': 'image/jpeg'})
        except Exception as e:
            # TODO : Need exception handling logic, such as removing failed item.
            raise e

        return convertedItems

    def _convertAsS3Url(self, fileName):
        return f'{self.S3_PREFIX}{fileName}'

    def _storeMetadata(self, urlAdjustedItems):
        try:
            response = self.archiveTable.put_item(
                Item={
                    'id': urlAdjustedItems['id'],
                    'title': urlAdjustedItems['title'],
                    'thumbnailUrl': urlAdjustedItems['thumbnailUrl']
                })

            print(f'Succeed to store Archive {urlAdjustedItems["id"]}')
            print(json.dumps(response, indent=4))

            for captureItem in urlAdjustedItems["captureItems"]:
                response = self.captureItemTable.put_item(Item=json.loads(
                    json.dumps({
                        "id": captureItem["id"],
                        "archiveId": urlAdjustedItems["id"],
                        "startTime": captureItem["startTime"],
                        "endTime": captureItem["endTime"],
                        "subtitle": captureItem["subtitle"],
                        "url": captureItem["url"]
                    }),
                    parse_float=Decimal))

                print(f'Succeed to store captureItem {captureItem["id"]}')
                print(json.dumps(response, indent=4))

        except Exception as e:
            # TODO : Need exception handling logic, such as removing failed item.
            raise e

        return

    def _clearLocalTemporary(self, id):
        shutil.rmtree(f'{self.RESULT_DIR}/{id}')
        return
示例#3
0
class CaptureSaver:
    S3_BUCKET = "captube.captures"
    S3_PREFIX = "https://s3.ap-northeast-2.amazonaws.com/captube.captures/"
    dynamodb = session.resource('dynamodb', region_name='ap-northeast-2')
    videoTable = dynamodb.Table('video')
    captionTable = dynamodb.Table('caption')

    s3_client = session.client('s3')

    def save(self, captureInformation):
        print(f'save, {captureInformation}')
        self._storeVideoMetadata(captureInformation)
        self._storeImages(captureInformation)
        return

    def _storeImages(self, captureInformation):
        try:
            toSaveCaptures = self._getToSaveCaptures(captureInformation)

            for captureItem in toSaveCaptures:
                self.s3_client.upload_file(captureItem['localFilePath'], self.S3_BUCKET, captureItem['saveFileName'],
                                           ExtraArgs={
                                               'ContentType': 'image/jpeg'
                                           })
                self.s3_client.upload_file(captureItem['localNoSubtitleFilePath'], self.S3_BUCKET, captureItem['noSubtitleSaveFileName'],
                                           ExtraArgs={
                                               'ContentType': 'image/jpeg'
                                           })

                response = self.captionTable.put_item(
                    Item=json.loads(json.dumps({
                        "id": captureItem["id"],
                        "videoId": captureInformation["id"],
                        "timeStamp": captureItem["timeStamp"],
                        "subtitle": captureItem["subtitle"],
                        "url": captureItem["url"],
                        "noSubtitleUrl": captureItem["noSubtitleUrl"]
                    }), parse_float=Decimal))

                print(f'Succeed to store captureItem {captureItem["id"]}')
                print(json.dumps(response, indent=4))

        except Exception as e:
            # TODO : Need exception handling logic, such as removing failed item.
            raise e

    def _getToSaveCaptures(self, captureInformation):
        result = []
        videoId = captureInformation['id']
        capturedItems = captureInformation['captureItems']
        startTime = capturedItems[0]['timeStamp']
        endTime = capturedItems[-1]['timeStamp']
        if startTime > endTime:
            return result

        storedCaptions = self._getCaptions(videoId, startTime, endTime)

        for captureItem in capturedItems:
            exist = False
            for storedCaption in storedCaptions:
                if captureItem['id'] == storedCaption['id']:
                    exist = True
                    break

            if not exist:
                result.append(captureItem)

        return result

    def _getCaptions(self, videoId, startTime, endTime):
        captions = self.captionTable.scan(
            FilterExpression=Attr('videoId').eq(videoId) & Attr('timeStamp').gte(Decimal(startTime)) & Attr(
                'timeStamp').lte(
                Decimal(endTime)))['Items']
        print(f'captions from dynamo : {len(captions)} for {videoId} between {startTime} and {endTime}')

        return captions

    def _storeVideoMetadata(self, captureInformation):
        try:
            if self._needSaveVideoMetadata(captureInformation['id']):
                response = self.videoTable.put_item(
                    Item={
                        'id': captureInformation['id'],
                        'title': captureInformation['title'],
                        'thumbnailUrl': captureInformation['thumbnailUrl']
                    })

                print(f'Succeed to store Archive {captureInformation["id"]}')
                print(json.dumps(response, indent=4))
        except Exception as e:
            # TODO : Need exception handling logic, such as removing failed item.
            raise e

    def _needSaveVideoMetadata(self, id):
        return self._getVideo(id) == None

    def _getVideo(self, id):
        response = self.videoTable.get_item(Key={'id': id})
        video = None
        if 'Item' in response:
            video = response['Item']

        print(f'video from dynamo : {video} for {id}')

        return video
示例#4
0
class Capture:
    RESULT_DIR = "results"
    S3_BUCKET = "captube.captures"
    S3_PREFIX = "https://s3.ap-northeast-2.amazonaws.com/captube.captures/"
    dynamodb = session.resource('dynamodb', region_name='ap-northeast-2')
    # TODO : Need DI
    youtubeIdParser = YoutubeIdParser()
    captureRunner = CaptureRunner()
    captureSaver = CaptureSaver()

    s3_client = session.client('s3')

    # youtubeObject is need, because we cannot inject core.youtube.youtube.
    # core.youtube.youtube constructor requires url as parameter
    _youtube = None

    def getAvailableLanguage(self, url):
        try:
            self._youtube = youtube(url)
            # FIXME: pytube.exceptions.VideoUnavailable: fTTGALaRZoc is unavailable
            caption = self._youtube.get_captions()
        except Exception as e:
            print(f'Exception occurred during get languages {e}')
            raise e

        return {"languages": self._youtube.get_available_langs(caption)}

    def capture(self, url, language, numberToCapture, startTimeStamp,
                endTimeStamp):
        print(
            f'capture, {url}, {language}, {numberToCapture}, {startTimeStamp}, {endTimeStamp}'
        )
        id = str(f'{self.youtubeIdParser.parse(url)}_{language}')
        workingPath = str(uuid.uuid4())

        try:
            videoInformation = self.captureRunner.capture(
                url, language, numberToCapture, startTimeStamp, endTimeStamp,
                workingPath)
            captureInformation = self._asCaptureInformation(
                videoInformation, id)
            self.captureSaver.save(captureInformation)
        except Exception as e:
            print(f'Exception occurred during capture {e}')
            raise e
        finally:
            self._clearLocalTemporary(workingPath)

        return captureInformation

    def _asCaptureInformation(self, captureResultByScript, id):
        result = {
            "title": captureResultByScript["title"],
            "thumbnailUrl": captureResultByScript["thumbnail"],
            "id": id,
            "captureItems": []
        }

        frame_infos = captureResultByScript["frame_infos"]

        for frame_info in frame_infos:
            id = f'{result["id"]}_{frame_info["frame_num"]}'
            frameNumber = frame_info["frame_num"]
            path = frame_info["img_path"]
            noSubtitlePath = self._getNoSubtitleImagePath(
                frame_info["img_path"])
            fileName = f'{result["id"]}_{os.path.basename(path)}'
            noSubtitleFileName = f'noSub_{result["id"]}_{os.path.basename(noSubtitlePath)}'
            url = self._convertAsS3Url(fileName)
            noSubtitleUrl = self._convertAsS3Url(noSubtitleFileName)

            result["captureItems"].append({
                "id": id,
                "frameNumber": frameNumber,
                "url": url,
                "noSubtitleUrl": noSubtitleUrl,
                "localFilePath": path,
                "localNoSubtitleFilePath": noSubtitlePath,
                "saveFileName": fileName,
                "noSubtitleSaveFileName": noSubtitleFileName,
                # TODO : video information should provide time stamp
                # "timeStamp": frame_info["time_info"],
                "timeStamp": 0,
                "subtitle": frame_info["script"]
            })

        return result

    def _convertAsS3Url(self, fileName):
        return f'{self.S3_PREFIX}{fileName}'

    def _getNoSubtitleImagePath(self, imagePath):
        return f'{os.path.dirname(imagePath)}/nosub/{os.path.basename(imagePath)}'

    def _clearLocalTemporary(self, id):
        shutil.rmtree(f'{self.RESULT_DIR}/{id}')
        return
示例#5
0
from boto3.dynamodb.conditions import Attr
from deprecated import deprecated

from business import session

dynamodb = session.resource('dynamodb', region_name='ap-northeast-2')
archiveTable = dynamodb.Table('archive')
captureItemTable = dynamodb.Table('captureItem')


@deprecated
class Archive:
    _DEFAULT_PAGE_SIZE = 25

    def getPagedArchive(self, pageKey, pageSize):
        print(f'getPagedArchive, pageKey : {pageKey} pageSize :{pageSize}')
        limit = self._DEFAULT_PAGE_SIZE if pageSize is None else pageSize
        if pageKey is None:
            print(f'getPagedArchive - query only with limit {limit}')
            queryResult = archiveTable.scan(Limit=int(limit))
        else:
            print(f'getPagedArchive - query with limit {limit} and ExclusiveStartKey {pageKey}')
            queryResult = archiveTable.scan(
                Limit=int(limit),
                ExclusiveStartKey=pageKey)

        print(f'paged archives from dynamo : {queryResult}')

        response = {'archives': queryResult['Items'], 'nextPageKey': queryResult['LastEvaluatedKey']} \
            if 'LastEvaluatedKey' in queryResult else \
            {'archives': queryResult['Items']}