示例#1
0
    def save(self, update_userlog=True, *args, **kwargs):
        # To deal with backwards compatibility,
        #   check video_id, whether imported or not.
        if not self.video_id:
            assert kwargs.get("imported", False), "video_id better be set by internal code."
            assert self.youtube_id, "If not video_id, you better have set youtube_id!"
            self.video_id = i18n.get_video_id(self.youtube_id) or self.youtube_id  # for unknown videos, default to the youtube_id

        if not kwargs.get("imported", False):
            self.full_clean()

            # Compute learner status
            already_complete = self.complete
            self.complete = (self.points >= VideoLog.POINTS_PER_VIDEO)
            if not already_complete and self.complete:
                self.completion_timestamp = datetime.now()

            # Tell logins that they are still active (ignoring validation failures).
            #   TODO(bcipolli): Could log video information in the future.
            if update_userlog:
                try:
                    UserLog.update_user_activity(self.user, activity_type="login", update_datetime=(self.completion_timestamp or datetime.now()), language=self.language)
                except ValidationError as e:
                    logging.error("Failed to update userlog during video: %s" % e)

        super(VideoLog, self).save(*args, **kwargs)
示例#2
0
        def add_missing_objects_to_db(youtube_ids_in_filesystem,
                                      videos_marked_at_all):
            # Files that exist, but are not in the DB, should be assumed to be good videos,
            #   and just needing to be added to the DB.  Add them to the DB in this way,
            #   so that these files also trigger the update code below (and trigger cache invalidation)
            youtube_ids_needing_model_creation = list(
                youtube_ids_in_filesystem - videos_marked_at_all)
            new_video_files = []
            if youtube_ids_needing_model_creation:
                for lang_code, youtube_ids in divide_videos_by_language(
                        youtube_ids_needing_model_creation).iteritems():
                    # OK to do bulk_create; cache invalidation triggered via save download
                    lang_video_files = [
                        VideoFile(youtube_id=id,
                                  percent_complete=100,
                                  download_in_progress=False,
                                  language=lang_code) for id in youtube_ids
                    ]
                    VideoFile.objects.bulk_create(lang_video_files)
                    new_video_files += lang_video_files
                    caching.invalidate_all_caches(
                    )  # Do this within the loop, to update users ASAP
                self.stdout.write(
                    "Created %d VideoFile models (and marked them as complete, since the files exist)\n"
                    % len(new_video_files))

            return [
                i18n.get_video_id(video_file.youtube_id)
                for video_file in new_video_files
            ]
示例#3
0
    def save(self, update_userlog=True, *args, **kwargs):
        # To deal with backwards compatibility,
        #   check video_id, whether imported or not.
        if not self.video_id:
            assert kwargs.get(
                "imported", False), "video_id better be set by internal code."
            assert self.youtube_id, "If not video_id, you better have set youtube_id!"
            self.video_id = i18n.get_video_id(
                self.youtube_id
            ) or self.youtube_id  # for unknown videos, default to the youtube_id

        if not kwargs.get("imported", False):
            self.full_clean()

            # Compute learner status
            already_complete = self.complete
            self.complete = (self.points >= VideoLog.POINTS_PER_VIDEO)
            if not already_complete and self.complete:
                self.completion_timestamp = datetime.now()

            # Tell logins that they are still active (ignoring validation failures).
            #   TODO(bcipolli): Could log video information in the future.
            if update_userlog:
                try:
                    UserLog.update_user_activity(
                        self.user,
                        activity_type="login",
                        update_datetime=(self.completion_timestamp
                                         or datetime.now()),
                        language=self.language)
                except ValidationError as e:
                    logging.error("Failed to update userlog during video: %s" %
                                  e)

        super(VideoLog, self).save(*args, **kwargs)
示例#4
0
 def delete_objects_for_incomplete_videos():
     # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken
     video_files_to_delete = VideoFile.objects.filter(download_in_progress=False, percent_complete__gt=0, percent_complete__lt=100)
     deleted_video_ids = [i18n.get_video_id(video_file.youtube_id) for video_file in video_files_to_delete]
     video_files_to_delete.delete()
     if deleted_video_ids:
         self.stdout.write("Deleted %d VideoFile models (to mark them as not downloaded, since they were in a bad state)\n" % len(deleted_video_ids))
     return deleted_video_ids
示例#5
0
        def update_objects_to_be_complete(youtube_ids_in_filesystem):
            # Files that exist, are in the DB, but have percent_complete=0, download_in_progress=False
            updated_video_ids = []
            for chunk in break_into_chunks(youtube_ids_in_filesystem):
                video_files_needing_model_update = VideoFile.objects.filter(percent_complete=0, download_in_progress=False, youtube_id__in=chunk)
                video_files_needing_model_update.update(percent_complete=100, flagged_for_download=False)

                updated_video_ids += [i18n.get_video_id(video_file.youtube_id) for video_file in video_files_needing_model_update]

            if updated_video_ids:
                caching.invalidate_all_caches()
                self.stdout.write("Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % len(updated_video_ids))
            return updated_video_ids
示例#6
0
 def delete_objects_for_incomplete_videos():
     # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken
     video_files_to_delete = VideoFile.objects.filter(
         download_in_progress=False,
         percent_complete__gt=0,
         percent_complete__lt=100)
     deleted_video_ids = [
         i18n.get_video_id(video_file.youtube_id)
         for video_file in video_files_to_delete
     ]
     video_files_to_delete.delete()
     if deleted_video_ids:
         self.stdout.write(
             "Deleted %d VideoFile models (to mark them as not downloaded, since they were in a bad state)\n"
             % len(deleted_video_ids))
     return deleted_video_ids
示例#7
0
        def add_missing_objects_to_db(youtube_ids_in_filesystem, videos_marked_at_all):
            # Files that exist, but are not in the DB, should be assumed to be good videos,
            #   and just needing to be added to the DB.  Add them to the DB in this way,
            #   so that these files also trigger the update code below (and trigger cache invalidation)
            youtube_ids_needing_model_creation = list(youtube_ids_in_filesystem - videos_marked_at_all)
            new_video_files = []
            if youtube_ids_needing_model_creation:
                for lang_code, youtube_ids in divide_videos_by_language(youtube_ids_needing_model_creation).iteritems():
                    # OK to do bulk_create; cache invalidation triggered via save download
                    lang_video_files = [VideoFile(youtube_id=id, percent_complete=100, download_in_progress=False, language=lang_code) for id in youtube_ids]
                    VideoFile.objects.bulk_create(lang_video_files)
                    new_video_files += lang_video_files
                    caching.invalidate_all_caches()  # Do this within the loop, to update users ASAP
                self.stdout.write("Created %d VideoFile models (and marked them as complete, since the files exist)\n" % len(new_video_files))

            return [i18n.get_video_id(video_file.youtube_id) for video_file in new_video_files]
示例#8
0
        def update_objects_to_be_complete(youtube_ids_in_filesystem):
            # Files that exist, are in the DB, but have percent_complete=0, download_in_progress=False
            updated_video_ids = []
            for chunk in break_into_chunks(youtube_ids_in_filesystem):
                video_files_needing_model_update = VideoFile.objects.filter(
                    percent_complete=0,
                    download_in_progress=False,
                    youtube_id__in=chunk)
                video_files_needing_model_update.update(
                    percent_complete=100, flagged_for_download=False)

                updated_video_ids += [
                    i18n.get_video_id(video_file.youtube_id)
                    for video_file in video_files_needing_model_update
                ]

            if updated_video_ids:
                caching.invalidate_all_caches()
                self.stdout.write(
                    "Updated %d VideoFile models (to mark them as complete, since the files exist)\n"
                    % len(updated_video_ids))
            return updated_video_ids
示例#9
0
    def handle(self, *args, **options):
        self.video = None

        handled_youtube_ids = []  # stored to deal with caching
        failed_youtube_ids = []  # stored to avoid requerying failures.

        set_process_priority.lowest(logging=settings.LOG)

        try:
            while True: # loop until the method is aborted
                # Grab any video that hasn't been tried yet
                videos = VideoFile.objects \
                    .filter(flagged_for_download=True, download_in_progress=False) \
                    .exclude(youtube_id__in=failed_youtube_ids)
                video_count = videos.count()
                if video_count == 0:
                    self.stdout.write(_("Nothing to download; exiting.") + "\n")
                    break

                # Grab a video as OURS to handle, set fields to indicate to others that we're on it!
                # Update the video logging
                video = videos[0]
                video.download_in_progress = True
                video.percent_complete = 0
                video.save()
                self.stdout.write((_("Downloading video '%(youtube_id)s'...") + "\n") % {"youtube_id": video.youtube_id})

                # Update the progress logging
                self.set_stages(num_stages=video_count + len(handled_youtube_ids) + len(failed_youtube_ids) + int(options["auto_cache"]))
                if not self.started():
                    self.start(stage_name=video.youtube_id)

                # Initiate the download process
                try:
                    ensure_dir(settings.CONTENT_ROOT)

                    progress_callback = partial(self.download_progress_callback, video)
                    try:
                        # Download via urllib
                        download_video(video.youtube_id, callback=progress_callback)

                    except URLNotFound:
                        # Video was not found on amazon cloud service,
                        #   either due to a KA mistake, or due to the fact
                        #   that it's a dubbed video.
                        #
                        # We can use youtube-dl to get that video!!
                        logging.debug(_("Retrieving youtube video %(youtube_id)s via youtube-dl") % {"youtube_id": video.youtube_id})

                        def youtube_dl_cb(stats, progress_callback, *args, **kwargs):
                            if stats['status'] == "finished":
                                percent = 100.
                            elif stats['status'] == "downloading":
                                percent = 100. * stats['downloaded_bytes'] / stats['total_bytes']
                            else:
                                percent = 0.
                            progress_callback(percent=percent)
                        scrape_video(video.youtube_id, quiet=not settings.DEBUG, callback=partial(youtube_dl_cb, progress_callback=progress_callback))

                    # If we got here, we downloaded ... somehow :)
                    handled_youtube_ids.append(video.youtube_id)
                    self.stdout.write(_("Download is complete!") + "\n")

                except DownloadCancelled:
                    # Cancellation event
                    video.percent_complete = 0
                    video.flagged_for_download = False
                    video.download_in_progress = False
                    video.save()
                    failed_youtube_ids.append(video.youtube_id)

                except Exception as e:
                    # On error, report the error, mark the video as not downloaded,
                    #   and allow the loop to try other videos.
                    msg = _("Error in downloading %(youtube_id)s: %(error_msg)s") % {"youtube_id": video.youtube_id, "error_msg": unicode(e)}
                    self.stderr.write("%s\n" % msg)

                    # If a connection error, we should retry.
                    if isinstance(e, DownloadError):
                        connection_error = "[Errno 8]" in e.message
                    elif isinstance(e, IOError) and hasattr(e, "strerror"):
                        connection_error = e.strerror[0] == 8
                    else:
                        connection_error = False

                    video.download_in_progress = False
                    video.flagged_for_download = connection_error  # Any error other than a connection error is fatal.
                    video.save()

                    # Rather than getting stuck on one video, continue to the next video.
                    self.update_stage(stage_status="error", notes=_("%(error_msg)s; continuing to next video.") % {"error_msg": msg})
                    failed_youtube_ids.append(video.youtube_id)
                    continue

            # This can take a long time, without any further update, so ... best to avoid.
            if options["auto_cache"] and caching.caching_is_enabled() and handled_youtube_ids:
                self.update_stage(stage_name=self.video.youtube_id, stage_percent=0, notes=_("Generating all pages related to videos."))
                caching.regenerate_all_pages_related_to_videos(video_ids=list(set([i18n.get_video_id(yid) or yid for yid in handled_youtube_ids])))

            # Update
            self.complete(notes=_("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully.") % {
                "num_handled_videos": len(handled_youtube_ids),
                "num_total_videos": len(handled_youtube_ids) + len(failed_youtube_ids),
            })

        except Exception as e:
            self.cancel(stage_status="error", notes=_("Error: %(error_msg)s") % {"error_msg": e})
            raise
示例#10
0
def get_video_by_youtube_id(youtube_id):
    # TODO(bcipolli): will need to change for dubbed videos
    video_id = i18n.get_video_id(youtube_id=youtube_id)
    return get_node_cache("Video").get(video_id, [None])[0]
示例#11
0
class TestSaveVideoLog(KALiteTestCase):

    ORIGINAL_POINTS = 84
    ORIGINAL_SECONDS_WATCHED = 32
    NEW_POINTS = 32
    NEW_SECONDS_WATCHED = 15
    YOUTUBE_ID = "aNqG4ChKShI"
    VIDEO_ID = i18n.get_video_id(YOUTUBE_ID) or "dummy"
    YOUTUBE_ID2 = "b22tMEc6Kko"
    VIDEO_ID2 = i18n.get_video_id(YOUTUBE_ID2) or "dummy2"
    USERNAME = "******"
    PASSWORD = "******"

    def setUp(self):
        super(TestSaveVideoLog, self).setUp()
        # create a facility and user that can be referred to in models across tests
        self.facility = Facility(name="Test Facility")
        self.facility.save()
        self.user = FacilityUser(username=self.USERNAME,
                                 facility=self.facility)
        self.user.set_password(self.PASSWORD)
        self.user.save()

        # create an initial VideoLog instance so we have something to update later
        self.original_videolog = VideoLog(video_id=self.VIDEO_ID,
                                          youtube_id=self.YOUTUBE_ID,
                                          user=self.user)
        self.original_videolog.points = self.ORIGINAL_POINTS
        self.original_videolog.total_seconds_watched = self.ORIGINAL_SECONDS_WATCHED
        self.original_videolog.save()

    def test_new_videolog(self):

        # make sure the target video log does not already exist
        videologs = VideoLog.objects.filter(video_id=self.VIDEO_ID2,
                                            user__username=self.USERNAME)
        self.assertEqual(
            videologs.count(), 0,
            "The target video log to be newly created already exists")

        c = KALiteClient()

        # login
        success = c.login(username=self.USERNAME,
                          password=self.PASSWORD,
                          facility=self.facility.id)
        self.assertTrue(success, "Was not able to login as the test user")

        # save a new video log
        result = c.save_video_log(
            video_id=self.VIDEO_ID2,
            youtube_id=self.YOUTUBE_ID2,
            total_seconds_watched=self.ORIGINAL_SECONDS_WATCHED,
            points=self.NEW_POINTS,
        )
        self.assertEqual(
            result.status_code, 200,
            "An error (%d) was thrown while saving the video log." %
            result.status_code)

        # get a reference to the newly created VideoLog
        videolog = VideoLog.objects.get(video_id=self.VIDEO_ID2,
                                        user__username=self.USERNAME)

        # make sure the VideoLog was properly created
        self.assertEqual(videolog.points, self.NEW_POINTS,
                         "The VideoLog's points were not saved correctly.")
        self.assertEqual(
            videolog.total_seconds_watched, self.ORIGINAL_SECONDS_WATCHED,
            "The VideoLog's seconds watched was not saved correctly.")

    def test_update_videolog(self):

        # get a new reference to the existing VideoLog
        videolog = VideoLog.objects.get(id=self.original_videolog.id)

        # make sure the VideoLog hasn't already been changed
        self.assertEqual(videolog.points, self.ORIGINAL_POINTS,
                         "The VideoLog's points have already changed.")
        self.assertEqual(videolog.total_seconds_watched,
                         self.ORIGINAL_SECONDS_WATCHED,
                         "The VideoLog's seconds watched already changed.")

        c = KALiteClient()

        # login
        success = c.login(username=self.USERNAME,
                          password=self.PASSWORD,
                          facility=self.facility.id)
        self.assertTrue(success, "Was not able to login as the test user")

        # save a new record onto the video log, with a correct answer (increasing the points and streak)
        result = c.save_video_log(
            video_id=self.VIDEO_ID,
            youtube_id=self.YOUTUBE_ID,
            total_seconds_watched=self.ORIGINAL_SECONDS_WATCHED +
            self.NEW_SECONDS_WATCHED,
            points=self.ORIGINAL_POINTS + self.NEW_POINTS,
        )
        self.assertEqual(
            result.status_code, 200,
            "An error (%d) was thrown while saving the video log." %
            result.status_code)

        # get a reference to the updated VideoLog
        videolog = VideoLog.objects.get(video_id=self.VIDEO_ID,
                                        user__username=self.USERNAME)

        # make sure the VideoLog was properly updated
        self.assertEqual(videolog.points,
                         self.ORIGINAL_POINTS + self.NEW_POINTS,
                         "The VideoLog's points were not updated correctly.")
        self.assertEqual(
            videolog.total_seconds_watched,
            self.ORIGINAL_SECONDS_WATCHED + self.NEW_SECONDS_WATCHED,
            "The VideoLog's seconds watched was not updated correctly.")
示例#12
0
    def handle(self, *args, **options):
        self.video = None

        handled_youtube_ids = []  # stored to deal with caching
        failed_youtube_ids = []  # stored to avoid requerying failures.

        set_process_priority.lowest(logging=settings.LOG)

        try:
            while True:  # loop until the method is aborted
                # Grab any video that hasn't been tried yet
                videos = VideoFile.objects \
                    .filter(flagged_for_download=True, download_in_progress=False) \
                    .exclude(youtube_id__in=failed_youtube_ids)
                video_count = videos.count()
                if video_count == 0:
                    self.stdout.write(
                        _("Nothing to download; exiting.") + "\n")
                    break

                # Grab a video as OURS to handle, set fields to indicate to others that we're on it!
                # Update the video logging
                video = videos[0]
                video.download_in_progress = True
                video.percent_complete = 0
                video.save()
                self.stdout.write(
                    (_("Downloading video '%(youtube_id)s'...") + "\n") %
                    {"youtube_id": video.youtube_id})

                # Update the progress logging
                self.set_stages(
                    num_stages=video_count + len(handled_youtube_ids) +
                    len(failed_youtube_ids) + int(options["auto_cache"]))
                if not self.started():
                    self.start(stage_name=video.youtube_id)

                # Initiate the download process
                try:
                    ensure_dir(settings.CONTENT_ROOT)

                    progress_callback = partial(
                        self.download_progress_callback, video)
                    try:
                        # Download via urllib
                        download_video(video.youtube_id,
                                       callback=progress_callback)

                    except URLNotFound:
                        # Video was not found on amazon cloud service,
                        #   either due to a KA mistake, or due to the fact
                        #   that it's a dubbed video.
                        #
                        # We can use youtube-dl to get that video!!
                        logging.debug(
                            _("Retrieving youtube video %(youtube_id)s via youtube-dl"
                              ) % {"youtube_id": video.youtube_id})

                        def youtube_dl_cb(stats, progress_callback, *args,
                                          **kwargs):
                            if stats['status'] == "finished":
                                percent = 100.
                            elif stats['status'] == "downloading":
                                percent = 100. * stats[
                                    'downloaded_bytes'] / stats['total_bytes']
                            else:
                                percent = 0.
                            progress_callback(percent=percent)

                        scrape_video(video.youtube_id,
                                     quiet=not settings.DEBUG,
                                     callback=partial(
                                         youtube_dl_cb,
                                         progress_callback=progress_callback))

                    # If we got here, we downloaded ... somehow :)
                    handled_youtube_ids.append(video.youtube_id)
                    self.stdout.write(_("Download is complete!") + "\n")

                except DownloadCancelled:
                    # Cancellation event
                    video.percent_complete = 0
                    video.flagged_for_download = False
                    video.download_in_progress = False
                    video.save()
                    failed_youtube_ids.append(video.youtube_id)

                except Exception as e:
                    # On error, report the error, mark the video as not downloaded,
                    #   and allow the loop to try other videos.
                    msg = _(
                        "Error in downloading %(youtube_id)s: %(error_msg)s"
                    ) % {
                        "youtube_id": video.youtube_id,
                        "error_msg": unicode(e)
                    }
                    self.stderr.write("%s\n" % msg)

                    # If a connection error, we should retry.
                    if isinstance(e, DownloadError):
                        connection_error = "[Errno 8]" in e.message
                    elif isinstance(e, IOError) and hasattr(e, "strerror"):
                        connection_error = e.strerror[0] == 8
                    else:
                        connection_error = False

                    video.download_in_progress = False
                    video.flagged_for_download = connection_error  # Any error other than a connection error is fatal.
                    video.save()

                    # Rather than getting stuck on one video, continue to the next video.
                    self.update_stage(
                        stage_status="error",
                        notes=_("%(error_msg)s; continuing to next video.") %
                        {"error_msg": msg})
                    failed_youtube_ids.append(video.youtube_id)
                    continue

            # This can take a long time, without any further update, so ... best to avoid.
            if options["auto_cache"] and caching.caching_is_enabled(
            ) and handled_youtube_ids:
                self.update_stage(
                    stage_name=self.video.youtube_id,
                    stage_percent=0,
                    notes=_("Generating all pages related to videos."))
                caching.regenerate_all_pages_related_to_videos(video_ids=list(
                    set([
                        i18n.get_video_id(yid) or yid
                        for yid in handled_youtube_ids
                    ])))

            # Update
            self.complete(
                notes=
                _("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully."
                  ) % {
                      "num_handled_videos":
                      len(handled_youtube_ids),
                      "num_total_videos":
                      len(handled_youtube_ids) + len(failed_youtube_ids),
                  })

        except Exception as e:
            self.cancel(stage_status="error",
                        notes=_("Error: %(error_msg)s") % {"error_msg": e})
            raise
示例#13
0
def get_video_by_youtube_id(youtube_id):
    # TODO(bcipolli): will need to change for dubbed videos
    video_id = i18n.get_video_id(youtube_id=youtube_id)
    return get_node_cache("Video").get(video_id, [None])[0]
示例#14
0
文件: views.py 项目: oguya/provision
def show_logs(request, ndays=None):
    """Show file-based logging info for video downloads, language packs, and subtitles"""
    ndays = ndays or int(request.GET.get("days", 7))

    def get_logger_filename(logger_type):
        return stats_logger(logger_type).handlers[0].baseFilename

    def parse_data(logger_type, data_fields, windowsize=128, ndays=None):
        parsed_data = {}
        nparts = len(data_fields)
        summary_data = dict([(fld, {}) for fld in (data_fields + ["date"])])

        filepath = get_logger_filename(logger_type)
        if not os.path.exists(filepath):
            return (parsed_data, summary_data)

        # Group by ip, date, and youtube_id
        old_data = ""
        first_loop = True
        last_loop = False
        with open(filepath, "r") as fp:
            fp.seek(0, 2)  # go to the end of the stream
            while True:
                # Read the next chunk of data
                try:
                    # Get the data
                    try:
                        if first_loop:
                            fp.seek(-windowsize, 1)  # go backwards by a few
                            first_loop = False
                        else:
                            fp.seek(-2 * windowsize, 1)  # go backwards by a few

                        cur_data = fp.read(windowsize) + old_data
                    except:
                        if last_loop and not old_data:
                            raise
                        elif last_loop:
                            cur_data = old_data
                            old_data = ""
                        else:
                            last_loop = True
                            fp.seek(0)
                            cur_data = fp.read(windowsize) + old_data  # could be some overlap...

                    if not cur_data:
                        break
                except:
                    break

                # Parse the data
                lines = cur_data.split("\n")
                old_data = lines[0] if len(lines) > 1 else ""
                new_data = lines[1:] if len(lines) > 1 else lines
                for l in new_data:
                    if not l:
                        continue

                    # All start with a date
                    parts = l.split(" - ", 2)
                    if len(parts) != 2:
                        continue
                    tim = parts[0]
                    dat = tim.split(" ")[0]

                    # Validate that this date is within the accepted range
                    parsed_date = datetime.datetime.strptime(dat, "%Y-%m-%d")
                    logging.debug("%s %s" % (parsed_date, (datetime.datetime.now() - timedelta(days=ndays))))
                    if ndays is not None and datetime.datetime.now() - timedelta(days=ndays) > parsed_date:
                        last_loop = True
                        old_data = ""
                        break

                    # The rest is semicolon-delimited
                    parts = parts[1].split(";")  # vd;127.0.0.1;xvnpSRO9IDM

                    # Now save things off
                    parsed_data[tim] = dict([(data_fields[idx], parts[idx]) for idx in range(nparts)])
                    summary_data["date"][dat] = 1 + summary_data["date"].get(dat, 0)
                    for idx in range(nparts):
                        summary_data[data_fields[idx]][parts[idx]] = 1 + summary_data[data_fields[idx]].get(
                            parts[idx], 0
                        )

        for key, val in summary_data.iteritems():
            summary_data[key] = sorted_dict(val, key=lambda t: t[0])

        return (parsed_data, summary_data)

    (video_raw_data, video_summary_data) = parse_data("videos", ["task_id", "ip_address", "youtube_id"], ndays=ndays)
    (lp_raw_data, lp_summary_data) = parse_data(
        "language_packs", ["task_id", "ip_address", "lang_code", "version"], ndays=ndays
    )
    (srt_raw_data, srt_summary_data) = parse_data(
        "subtitles", ["task_id", "ip_address", "lang_code", "youtube_id"], ndays=ndays
    )

    return {
        "ndays": ndays,
        "videos": {
            "raw": video_raw_data,
            "dates": video_summary_data["date"],
            "ips": video_summary_data["ip_address"],
            "slugs": sum_counter(
                video_summary_data["youtube_id"], fn=lambda yid: get_id2slug_map().get(get_video_id(yid))
            ),
            "lang_codes": sum_counter(video_summary_data["youtube_id"], fn=lambda yid: get_video_language(yid)),
        },
        "language_packs": {
            "raw": lp_raw_data,
            "dates": lp_summary_data["date"],
            "ips": lp_summary_data["ip_address"],
            "lang_codes": lp_summary_data["lang_code"],
            "versions": lp_summary_data["version"],
        },
        "subtitles": {
            "raw": srt_raw_data,
            "dates": srt_summary_data["date"],
            "ips": srt_summary_data["ip_address"],
            "lang_codes": srt_summary_data["lang_code"],
        },
    }
示例#15
0
class TestVideoLogs(KALiteTestCase):

    ORIGINAL_POINTS = 37
    ORIGINAL_SECONDS_WATCHED = 3
    NEW_POINTS = 22
    NEW_SECONDS_WATCHED = 5
    YOUTUBE_ID = "aNqG4ChKShI"
    VIDEO_ID = i18n.get_video_id(YOUTUBE_ID) or "dummy"

    def setUp(self):
        super(TestVideoLogs, self).setUp()
        # create a facility and user that can be referred to in models across tests
        self.facility = Facility(name="Test Facility")
        self.facility.save()
        self.user = FacilityUser(username="******", facility=self.facility)
        self.user.set_password("dumber")
        self.user.save()

        # create an initial VideoLog instance so we have something to collide with later
        self.original_videolog = VideoLog(video_id=self.VIDEO_ID,
                                          youtube_id=self.YOUTUBE_ID,
                                          user=self.user)
        self.original_videolog.points = self.ORIGINAL_POINTS
        self.original_videolog.total_seconds_watched = self.ORIGINAL_SECONDS_WATCHED
        self.original_videolog.save()

        # get a new reference to the existing VideoLog
        videolog = VideoLog.objects.get(id=self.original_videolog.id)

        # make sure the VideoLog was created correctly
        self.assertEqual(videolog.points, self.ORIGINAL_POINTS,
                         "The VideoLog's points have already changed.")
        self.assertEqual(
            videolog.total_seconds_watched, self.ORIGINAL_SECONDS_WATCHED,
            "The VideoLog's total seconds watched have already changed.")

    def test_videolog_update(self):

        # get a new reference to the existing VideoLog
        videolog = VideoLog.objects.get(id=self.original_videolog.id)

        # update the VideoLog
        videolog.points = self.NEW_POINTS
        videolog.total_seconds_watched = self.NEW_SECONDS_WATCHED
        videolog.save()

        # get a new reference to the existing VideoLog
        videolog2 = VideoLog.objects.get(id=self.original_videolog.id)

        # make sure the VideoLog was updated
        self.assertEqual(videolog2.points, self.NEW_POINTS,
                         "The VideoLog's points were not updated.")
        self.assertEqual(
            videolog2.total_seconds_watched, self.NEW_SECONDS_WATCHED,
            "The VideoLog's total seconds watched were not updated.")

    @unittest.skip("Auto-merging is not yet automatic, so skip this")
    def test_videolog_collision(self):

        # create a new video log with the same youtube_id and user, but different points/total seconds watched
        videolog = VideoLog(video_id=self.VIDEO_ID,
                            youtube_id=self.YOUTUBE_ID,
                            user=self.user)
        videolog.points = self.NEW_POINTS
        videolog.total_seconds_watched = self.NEW_SECONDS_WATCHED

        # try saving the new VideoLog: this is where the collision will happen, hopefully leading to a merge
        videolog.save()

        # get a new reference to the existing VideoLog
        videolog2 = VideoLog.objects.get(id=self.original_videolog.id)

        # make sure the VideoLog has been properly merged
        self.assertEqual(videolog.points,
                         max(self.ORIGINAL_POINTS, self.NEW_POINTS),
                         "The VideoLog's points were not properly merged.")
        self.assertEqual(
            videolog.total_seconds_watched,
            max(self.ORIGINAL_ATTEMPTS, self.NEW_SECONDS_WATCHED),
            "The VideoLog's total seconds watched have already changed.")
 def forwards(self, orm):
     # Setting the video ID
     for vlog in orm["main.VideoLog"].objects.all():
         vlog.video_id = i18n.get_video_id(vlog.youtube_id) or vlog.youtube_id
         vlog.save()
示例#17
0
def show_logs(request, ndays=None):
    """Show file-based logging info for video downloads, language packs, and subtitles"""
    ndays = ndays or int(request.GET.get("days", 7))

    def get_logger_filename(logger_type):
        return stats_logger(logger_type).handlers[0].baseFilename

    def parse_data(logger_type, data_fields, windowsize=128, ndays=None):
        parsed_data = {}
        nparts = len(data_fields)
        summary_data = dict([(fld, {}) for fld in (data_fields + ["date"])])

        filepath = get_logger_filename(logger_type)
        if not os.path.exists(filepath):
            return (parsed_data, summary_data)

        # Group by ip, date, and youtube_id
        old_data = ""
        first_loop = True
        last_loop = False
        with open(filepath, "r") as fp:
            fp.seek(0, 2)  # go to the end of the stream
            while True:
                # Read the next chunk of data
                try:
                    # Get the data
                    try:
                        if first_loop:
                            fp.seek(-windowsize, 1)  # go backwards by a few
                            first_loop = False
                        else:
                            fp.seek(-2 * windowsize,
                                    1)  # go backwards by a few

                        cur_data = fp.read(windowsize) + old_data
                    except:
                        if last_loop and not old_data:
                            raise
                        elif last_loop:
                            cur_data = old_data
                            old_data = ""
                        else:
                            last_loop = True
                            fp.seek(0)
                            cur_data = fp.read(
                                windowsize
                            ) + old_data  # could be some overlap...

                    if not cur_data:
                        break
                except:
                    break

                # Parse the data
                lines = cur_data.split("\n")
                old_data = lines[0] if len(lines) > 1 else ""
                new_data = lines[1:] if len(lines) > 1 else lines
                for l in new_data:
                    if not l:
                        continue

                    # All start with a date
                    parts = l.split(" - ", 2)
                    if len(parts) != 2:
                        continue
                    tim = parts[0]
                    dat = tim.split(" ")[0]

                    # Validate that this date is within the accepted range
                    parsed_date = datetime.datetime.strptime(dat, "%Y-%m-%d")
                    logging.debug(
                        "%s %s" %
                        (parsed_date,
                         (datetime.datetime.now() - timedelta(days=ndays))))
                    if ndays is not None and datetime.datetime.now(
                    ) - timedelta(days=ndays) > parsed_date:
                        last_loop = True
                        old_data = ""
                        break

                    # The rest is semicolon-delimited
                    parts = parts[1].split(";")  # vd;127.0.0.1;xvnpSRO9IDM

                    # Now save things off
                    parsed_data[tim] = dict([(data_fields[idx], parts[idx])
                                             for idx in range(nparts)])
                    summary_data["date"][dat] = 1 + summary_data["date"].get(
                        dat, 0)
                    for idx in range(nparts):
                        summary_data[data_fields[idx]][parts[
                            idx]] = 1 + summary_data[data_fields[idx]].get(
                                parts[idx], 0)

        for key, val in summary_data.iteritems():
            summary_data[key] = sorted_dict(val, key=lambda t: t[0])

        return (parsed_data, summary_data)

    (video_raw_data,
     video_summary_data) = parse_data("videos",
                                      ["task_id", "ip_address", "youtube_id"],
                                      ndays=ndays)
    (lp_raw_data, lp_summary_data) = parse_data(
        "language_packs", ["task_id", "ip_address", "lang_code", "version"],
        ndays=ndays)
    (srt_raw_data, srt_summary_data) = parse_data(
        "subtitles", ["task_id", "ip_address", "lang_code", "youtube_id"],
        ndays=ndays)

    return {
        "ndays": ndays,
        "videos": {
            "raw":
            video_raw_data,
            "dates":
            video_summary_data["date"],
            "ips":
            video_summary_data["ip_address"],
            "slugs":
            sum_counter(
                video_summary_data["youtube_id"],
                fn=lambda yid: get_id2slug_map().get(get_video_id(yid))),
            "lang_codes":
            sum_counter(video_summary_data["youtube_id"],
                        fn=lambda yid: get_video_language(yid)),
        },
        "language_packs": {
            "raw": lp_raw_data,
            "dates": lp_summary_data["date"],
            "ips": lp_summary_data["ip_address"],
            "lang_codes": lp_summary_data["lang_code"],
            "versions": lp_summary_data["version"],
        },
        "subtitles": {
            "raw": srt_raw_data,
            "dates": srt_summary_data["date"],
            "ips": srt_summary_data["ip_address"],
            "lang_codes": srt_summary_data["lang_code"],
        },
    }