Exemplo n.º 1
0
def parse_playlist_metadata(item):
    '''
    Parses and processes raw output and returns playlist_name, playlist_id, playlist_publish_date, playlist_n_videos, channel_id, channel_name, collection_date.
    
    :params item: json document
    :type item: dict

    :returns: parsed dictionary
    :rtype: dict

    '''
    if not isinstance(item, dict):
        return dict()

    playlist_meta = {
        "playlist_name" : item['snippet'].get('title'),
        "playlist_id" : item['id'],
        "playlist_publish_date" : parse_yt_datetime(item['snippet'].get('publishedAt')),
        "playlist_n_videos" : item['contentDetails'].get('itemCount'),
        "channel_id" : item['snippet'].get('channelId'),
        "channel_name" : item['snippet'].get('channelTitle'),
        "collection_date" : datetime.datetime.now()
    }

    return playlist_meta
Exemplo n.º 2
0
def parse_rec_video_metadata(item):
    '''
    Parses and processes raw output and returns video_id, channel_title, channel_id, video_publish_date, video_title, video_description, video_category, video_thumbnail, collection_date.
    
    :params item: json document
    :type item: dict

    :returns: parsed dictionary
    :rtype: dict
    '''
    if not isinstance(item, dict):
        return dict()

    video_meta = {
        "video_id" : item['id'].get('videoId'),
        "channel_title" : item["snippet"].get("channelTitle"),
        "channel_id" : item["snippet"].get("channelId"),
        "video_publish_date" : parse_yt_datetime(item["snippet"].get("publishedAt")),
        "video_title" : item["snippet"].get("title"),
        "video_description" : item["snippet"].get("description"),
        "video_category" : item["snippet"].get("categoryId"),
        "video_thumbnail" : item["snippet"]["thumbnails"]["high"]["url"],
        "collection_date" : datetime.datetime.now()
    }

    return video_meta
Exemplo n.º 3
0
def parse_subscription_descriptive(item):
    '''
    Parses and processes raw output and returns subscription_title, subscription_channel_id, subscription_kind, subscription_publish_date, collection_date.

    
    :params item: json document
    :type item: dict

    :returns: parsed dictionary
    :rtype: dict
    '''
    if not isinstance(item, dict):
        return dict()

    sub_meta = {
        "subscription_title":
        item['snippet']['title'],
        "subscription_channel_id":
        item['snippet']['resourceId'].get('channelId'),
        "subscription_kind":
        item['snippet']['resourceId'].get('kind'),
        "subscription_publish_date":
        parse_yt_datetime(item['snippet'].get('publishedAt')),
        "collection_date":
        datetime.datetime.now()
    }

    return sub_meta
Exemplo n.º 4
0
def parse_channel_metadata(item):
    '''
    Parses and processes raw output and returns channel_id, title, account_creatation_date, keywords, description, view_count, video_count, subscription_count, playlist_id_likes, playlist_id_uploads, topic_ids, country, collection_date.
    
    :params item: json document
    :type item: dict

    :returns: parsed dictionary
    :rtype: dict
    '''
    if not isinstance(item, dict):
        return dict()

    topic = item.get('topicDetails')
    if topic:
        topic = '|'.join(topic.get('topicCategories'))

    channel_meta = {
        "channel_id" : item['id'],
        "title" : item["snippet"].get("title"),
        "account_creation_date" : parse_yt_datetime(item["snippet"].get("publishedAt")),
        "keywords" : item['brandingSettings']['channel'].get('keywords'),
        "description" : item["snippet"].get("description"),
        "view_count" : item["statistics"].get("viewCount"),
        "video_count" : item["statistics"].get("videoCount"),
        "subscription_count" : item["statistics"].get("subscriberCount"),
        "playlist_id_likes" : item['contentDetails']['relatedPlaylists'].get('likes'),
        "playlist_id_uploads" : item['contentDetails']['relatedPlaylists'].get('uploads'),
        "topic_ids" : topic,
        "country" : item['snippet'].get('country'),
        "collection_date" : datetime.datetime.now()
    }

    return channel_meta
Exemplo n.º 5
0
def parse_video_metadata(item):
    '''
    :params item: json document

    :returns: parsed dictionary
    '''

    tags = item["snippet"].get('tags')
    if isinstance(tags, Iterable):
        video_tags = '|'.join(tags)
    else:
        video_tags = ''

    video_meta = OrderedDict(
        video_id=item['id'],
        channel_title=item["snippet"].get("channelTitle"),
        channel_id=item["snippet"].get("channelId"),
        video_publish_date=parse_yt_datetime(
            item["snippet"].get("publishedAt")),
        video_title=item["snippet"].get("title"),
        video_description=item["snippet"].get("description"),
        video_category=item["snippet"].get("categoryId"),
        video_view_count=item["statistics"].get("viewCount"),
        video_comment_count=item["statistics"].get("commentCount"),
        video_like_count=item["statistics"].get("likeCount"),
        video_dislike_count=item["statistics"].get("dislikeCount"),
        video_thumbnail=item["snippet"]["thumbnails"]["high"]["url"],
        video_tags=video_tags,
        collection_date=datetime.datetime.now())

    return video_meta
Exemplo n.º 6
0
def parse_channel_metadata(item):
    '''
    :params item: json document

    :returns: parsed dictionary
    '''

    topic = item.get('topicDetails')
    if topic:
        topic = item.get('topicIds')

    channel_meta = OrderedDict(
        id=item['id'],
        title=item["snippet"].get("title"),
        publish_date=parse_yt_datetime(item["snippet"].get("publishedAt")),
        keywords=item['brandingSettings']['channel'].get('keywords'),
        description=item["snippet"].get("description"),
        view_count=item["statistics"].get("viewCount"),
        video_count=item["statistics"].get("videoCount"),
        subscription_count=item["statistics"].get("subscriberCount"),
        playlist_id_likes=item['contentDetails']['relatedPlaylists'].get(
            'likes'),
        playlist_id_uploads=item['contentDetails']['relatedPlaylists'].get(
            'uploads'),
        topic_ids=json.dumps(topic),
        collection_date=datetime.datetime.now())

    return channel_meta
Exemplo n.º 7
0
def parse_comment_metadata(item):
    '''
    :params item: json document

    :returns: parsed dictionary
    '''

    if item['snippet'].get('topLevelComment'):
        save = item['snippet']
        item = item['snippet']['topLevelComment']

    comment_meta = OrderedDict(
        commenter_channel_url=item["snippet"].get("authorChannelUrl"),
        commenter_channel_display_name=item['snippet'].get(
            'authorDisplayName'),
        comment_id=item.get("id"),
        comment_like_count=item["snippet"].get("likeCount"),
        comment_publish_date=parse_yt_datetime(
            item["snippet"].get("publishedAt")),
        text=item["snippet"].get("textDisplay"),
        video_id=item["snippet"].get("videoId"),
        commenter_rating=item["snippet"].get("viewerRating"),
        comment_parent_id=item["snippet"].get("parentId"),
        collection_date=datetime.datetime.now())
    try:
        comment_meta['reply_count'] = save.get('totalReplyCount')
    except:
        comment_meta['reply_count'] = item.get('totalReplyCount')

    return comment_meta
Exemplo n.º 8
0
    def get_videos_from_playlist_id(self, playlist_id, next_page_token=None,
                                    published_after=datetime.datetime(1990,1,1),
                                    parser=P.parse_video_url, part=['snippet'], **kwargs):
        '''
        Given a `playlist_id`, returns `video_ids` associated with that playlist.

        Note that user uploads for any given channel are from a playlist named "upload playlist id". You can get this value using :meth:`youtube_api.youtube_api.get_channel_metadata` or :meth:`youtube_api.youtube_api_utils.get_upload_playlist_id`. The playlist ID for uploads is always the channel_id with "UU" subbed for "UC".

        Read the docs: https://developers.google.com/youtube/v3/docs/playlistItems

        :param playlist_id: the playlist_id IE: "UUaLfMkkHhSA_LaCta0BzyhQ"
        :type platlist_id: str
        :param next_page_token: a token to continue from a preciously stopped query IE: "CDIQAA"
        :type next_page_token: str
        :param cutoff_date: a date for the minimum publish date for videos from a playlist_id.
        :type cutoff_date: datetime
        :param parser: the function to parse the json document
        :type parser: :mod:`youtube_api.parsers module`
        :param part: The part parameter specifies a comma-separated list of one or more resource properties that the API response will include. Different parameters cost different quota costs from the API.
        :type part: list

        :returns: video ids associated with ``playlist_id``.
        :rtype: list of dict
        '''
        parser=parser if parser else P.raw_json
        part = ','.join(part)
        videos = []
        run = True
        while run:
            http_endpoint = ("https://www.googleapis.com/youtube/v{}/playlistItems"
                             "?part={}&playlistId={}&maxResults=50&key={}".format(
                                 self.api_version, part, playlist_id, self.key))
            for k,v in kwargs.items():
                http_endpoint += '&{}={}'.format(k, v)
            if next_page_token:
                http_endpoint += "&pageToken={}".format(next_page_token)
           
            response_json = self._http_request(http_endpoint, 
                                               timeout_in_n_seconds=20)
            if response_json.get('items'):
                for item in response_json.get('items'):
                    publish_date = parse_yt_datetime(item['snippet'].get('publishedAt'))
                    if publish_date <= published_after:
                        run=False
                        break
                    videos.append(parser(item))
                if response_json.get('nextPageToken'):
                    next_page_token = response_json.get('nextPageToken')
                else:
                    run=False
                    break
            else:
                run=False
                break

        return videos
Exemplo n.º 9
0
    def handle(self, *args, **options):
        api_key = settings.YOUTUBE_API_KEY

        youtube = googleapiclient.discovery.build('youtube',
                                                  'v3',
                                                  developerKey=api_key)

        # Lookup all contents
        for content in ImagoInfoContent.objects.filter(
                type='tvshow', source_id__isnull=False):
            request = youtube.playlistItems().list(
                part="snippet,contentDetails",
                fields="items(id,snippet(title,publishedAt),contentDetails)",
                playlistId=content.source_id)
            response = request.execute()

            # Only consider content from yesterday on
            published_after = datetime.now() - timedelta(days=1)

            # Create new video for each result
            for item in response.get('items'):
                published_at = parse_yt_datetime(
                    item['snippet'].get('publishedAt'))
                if published_at <= published_after:
                    break
                youtube_video_id = item['contentDetails']['videoId']
                video = ImagoInfoVideo(publication_date=published_at,
                                       title=item['snippet']['title'],
                                       content_id=content.content_id,
                                       thumbnail='youtube',
                                       hosting='youtube',
                                       youtube_id=youtube_video_id,
                                       start_time=0,
                                       end_time=0,
                                       type='tvshow')

                # Grab video duration through another API call
                request = youtube.videos().list(
                    part="contentDetails",
                    id=youtube_video_id,
                    fields="items(contentDetails(duration))")
                response = request.execute()

                duration = response.get(
                    'items')[0]['contentDetails']['duration']
                duration_in_seconds = isodate.parse_duration(
                    duration).total_seconds()
                video.duration = duration_in_seconds

                video.save()
Exemplo n.º 10
0
def parse_video_url(item):
    '''
    :params item: json document

    :returns: parsed dictionary
    '''
    publish_date = item['snippet'].get('publishedAt')
    publish_date = parse_yt_datetime(publish_date)
    video_id = item['snippet']['resourceId'].get('videoId')
    channel_id = item['snippet'].get('channelId')

    return OrderedDict(publish_date=publish_date,
                       video_id=video_id,
                       channel_id=channel_id,
                       collection_date=datetime.datetime.now())
Exemplo n.º 11
0
def parse_subscription_descriptive(item):
    '''
    :params item: json document

    :returns: parsed dictionary
    '''

    sub_meta = OrderedDict(
        subscription_title=item['snippet']['title'],
        subscription_channel_id=item['snippet']['resourceId'].get('channelId'),
        subscription_kind=item['snippet']['resourceId'].get('kind'),
        subscription_publish_date=parse_yt_datetime(
            item['snippet'].get('publishedAt')),
        collection_date=datetime.datetime.now())

    return sub_meta
Exemplo n.º 12
0
def parse_comment_metadata(item):
    '''
    Parses and processes raw output and returns video_id, commenter_channel_url,  commenter_channel_display_name, comment_id, comment_like_count, comment_publish_date, text, commenter_rating, comment_parent_id, collection_date.
    
    :params item: json document
    :type item: dict

    :returns: parsed dictionary
    :rtype: dict
    '''
    if not isinstance(item, dict):
        return dict()

    if item['snippet'].get('topLevelComment'):
        save = item['snippet']
        item = item['snippet']['topLevelComment']

    comment_meta = {
        "video_id":
        item["snippet"].get("videoId"),
        "commenter_channel_url":
        item["snippet"].get("authorChannelUrl"),
        "commenter_channel_id":
        item['snippet'].get('authorChannelId').get('value', None),
        "commenter_channel_display_name":
        item['snippet'].get('authorDisplayName'),
        "comment_id":
        item.get("id"),
        "comment_like_count":
        item["snippet"].get("likeCount"),
        "comment_publish_date":
        parse_yt_datetime(item["snippet"].get("publishedAt")),
        "text":
        item["snippet"].get("textDisplay"),
        "commenter_rating":
        item["snippet"].get("viewerRating"),
        "comment_parent_id":
        item["snippet"].get("parentId"),
        "collection_date":
        datetime.datetime.now()
    }
    try:
        comment_meta['reply_count'] = save.get('totalReplyCount')
    except:
        comment_meta['reply_count'] = item.get('totalReplyCount')

    return comment_meta
Exemplo n.º 13
0
def parse_playlist_metadata(item):
    '''
    :params item: json document

    :returns: parsed dictionary
    '''

    playlist_meta = OrderedDict(
        playlist_name=item['snippet'].get('title'),
        playlist_id=item['id'],
        playlist_publish_date=parse_yt_datetime(
            item['snippet'].get('publishedAt')),
        playlist_n_videos=item['contentDetails'].get('itemCount'),
        channel_id=item['snippet'].get('channelId'),
        channel_name=item['snippet'].get('channelTitle'),
        collection_date=datetime.datetime.now())

    return playlist_meta
Exemplo n.º 14
0
def parse_rec_video_metadata(item):
    '''
    :params item: json document

    :returns: parsed dictionary
    '''

    video_meta = OrderedDict(
        video_id=item['id'].get('videoId'),
        channel_title=item["snippet"].get("channelTitle"),
        channel_id=item["snippet"].get("channelId"),
        video_publish_date=parse_yt_datetime(
            item["snippet"].get("publishedAt")),
        video_title=item["snippet"].get("title"),
        video_description=item["snippet"].get("description"),
        video_category=item["snippet"].get("categoryId"),
        video_thumbnail=item["snippet"]["thumbnails"]["high"]["url"],
        collection_date=datetime.datetime.now())

    return video_meta
Exemplo n.º 15
0
def parse_video_metadata(item):
    '''
    Parses and processes raw output and returns video_id, channel_title, channel_id, video_publish_date, video_title, video_description, video_category, video_view_count, video_comment_count, video_like_count, video_dislike_count, video_thumbnail, video_tags, collection_date.

    :params item: json document
    :type item: dict

    :returns: parsed dictionary
    :rtype: dict
    '''
    if not isinstance(item, dict):
        return dict()

    tags = item["snippet"].get('tags')
    if isinstance(tags, Iterable):
        video_tags = '|'.join(tags)
    else:
        video_tags = ''

    video_meta = {
        "video_id": item['id'],
        "channel_title": item["snippet"].get("channelTitle"),
        "channel_id": item["snippet"].get("channelId"),
        "video_publish_date":
        parse_yt_datetime(item["snippet"].get("publishedAt")),
        "video_title": item["snippet"].get("title"),
        "video_description": item["snippet"].get("description"),
        "video_category": item["snippet"].get("categoryId"),
        "video_view_count": item["statistics"].get("viewCount"),
        "video_comment_count": item["statistics"].get("commentCount"),
        "video_like_count": item["statistics"].get("likeCount"),
        "video_dislike_count": item["statistics"].get("dislikeCount"),
        "duration": item["contentDetails"]["duration"],
        "video_thumbnail": item["snippet"]["thumbnails"]["high"]["url"],
        "video_tags": video_tags,
        "collection_date": datetime.datetime.now()
    }

    return video_meta
Exemplo n.º 16
0
def parse_video_url(item):
    '''
    Parses and processes raw output and returns publish_date, video_id, channel_id, collection_date
    
    :params item: json document
    :type item: dict

    :returns: parsed dictionary
    :rtype: dict
    '''
    if not isinstance(item, dict):
        return dict()

    publish_date = item['snippet'].get('publishedAt')
    publish_date = parse_yt_datetime(publish_date)
    video_id = item['snippet']['resourceId'].get('videoId')
    channel_id = item['snippet'].get('channelId')

    return {
        "video_id" : video_id,
        "channel_id" : channel_id,
        "publish_date" : publish_date,
        "collection_date" : datetime.datetime.now()
    }
Exemplo n.º 17
0
 def test_parse_yt_datetime(self):
     ''' #Verified by Megan Brown on 11/30/2018'''
     resp = utils.parse_yt_datetime(self.date)
     self.assertEqual(resp, self.datetime_date)
Exemplo n.º 18
0
 def test_parse_yt_datetime(self):
     resp = utils.parse_yt_datetime(self.date)
     self.assertEqual(resp, self.datetime_date)