Exemplo n.º 1
0
    def test_simple(self):
        load_gin_config('backend/ml_model/config/featureless_config.gin')
        username = '******'
        dj_user = DjangoUser.objects.create(username=username)
        up = UserPreferences.objects.create(user=dj_user)
        v1 = Video.objects.create(video_id="online-1", reliability=0)
        v2 = Video.objects.create(video_id="online-2", reliability=1.0)
        VideoRating.objects.create(video=v1, user=up, reliability=0)
        VideoRating.objects.create(video=v2, user=up, reliability=1.0)
        r = ExpertRating.objects.create(user=up,
                                        video_1=v1,
                                        video_2=v2,
                                        reliability=100)
        field = 'reliability'

        update_context = OnlineRatingUpdateContext(r, field)

        def get_scores_online(rating):
            result = compute_online_update(
                rating_value=rating,
                idx_set=update_context.idx_set,
                model_tensor_orig=update_context.model_tensor,
                mb_np_orig=update_context.mb_np)
            update_context.write_updates_to_db(result['new_model_tensor'])

            s1 = VideoRating.objects.get(video=v1, user=up).reliability
            s2 = VideoRating.objects.get(video=v2, user=up).reliability
            return s1, s2

        s1, s2 = get_scores_online(-1)
        assert s1 > s2

        s1, s2 = get_scores_online(1)
        assert s1 < s2
Exemplo n.º 2
0
 def test_gin_files(self):
     for root, dirs, files in os.walk(BASE_DIR):
         for file in files:
             path = os.path.join(root, file)
             if file.endswith('.gin') and os.path.isfile(path):
                 load_gin_config(path)
                 gin.clear_config()
Exemplo n.º 3
0
    def setUp(self):
        load_gin_config("backend/ml_model/config/common.gin")
        load_gin_config("backend/ml_model/config/featureless_config.gin")
        self.v1 = Video.objects.create(video_id="a")
        self.v2 = Video.objects.create(video_id="b")
        self.videos = [self.v1, self.v2]

        for v in self.videos:
            v.set_embedding(np.random.randn(Video.EMBEDDING_LEN))
            v.save()

        self.djangouser = DjangoUser.objects.create_user(username="******",
                                                         password="******")
        self.prefs = UserPreferences.objects.create(user=self.djangouser)

        # making the user verified
        self.ui = UserInformation.objects.create(user=self.djangouser)
        accepted_domain = create_accepted_domain()

        self.vemail = VerifiableEmail.objects.create(
            user=self.ui,
            email=f"{uuid1()}{accepted_domain}",
            is_verified=True)

        data = {k: np.random.rand() * 100 for k in VIDEO_FIELDS}
        self.rating = ExpertRating.objects.create(video_1=self.v1,
                                                  video_2=self.v2,
                                                  user=self.prefs,
                                                  **data)
Exemplo n.º 4
0
def cached_class_instance(rating_pk, field, timeout_sec=100):
    """Get a cached instance or create one"""
    cache = current_cache()
    current_time = time()

    # key to search for in the cache
    key = (rating_pk, field)

    # invalidating old caches...
    # TODO: replace with a faster implementation
    for existing_key in deepcopy(list(cache.keys())):
        if current_time - cache[existing_key][0] >= timeout_sec:
            del cache[existing_key]

    # need to add a new element, and there are too many already...
    if key not in cache and len(cache) >= MAX_CACHE_PER_THREAD:
        raise ValueError(
            "Too many elements in cache, please wait and try again...")

    # creating an item if it doesn't exist or if it's too old
    if key not in cache or current_time - cache[key][0] >= timeout_sec:
        load_gin_config('backend/ml_model/config/featureless_config.gin')
        cache[key] = (current_time,
                      OnlineRatingUpdateContext(
                          expert_rating=ExpertRating.objects.get(pk=rating_pk),
                          feature=field))

    # returning the object
    return {'cache': cache[key][1], 'cache_created': cache[key][0]}
Exemplo n.º 5
0
    def handle(self, **options):
        load_gin_config(options['config'])
        try:
            vm = VideoManager(only_download=options['only_download'])
        except Exception as e:
            print("Arguments are wrong, initialization failed")
            print(e)
            return None

        if options['console']:
            commands = [
                x for x in dir(vm)
                if not x.startswith('_') and callable(getattr(vm, x))
            ]
            print("Available commands: ", commands)
            print("Type exit to stop")
            while True:
                print("> ", end="")
                inp = input().strip()
                if inp in commands:
                    f = getattr(vm, inp)
                    print(signature(f))
                    f()
                elif inp == 'exit':
                    return None
                else:
                    print("Unknown command")
                    print("Available commands: ", commands)
                    print("Type exit to stop")
        else:
            print("Doing default import")
            vm.fill_info()
            vm.add_videos_in_folder_to_db()
            vm.clear_info()
Exemplo n.º 6
0
    def handle(self, **options):
        print_memory(stage="Command init")

        features = options["features"]

        if features is None:
            features = VIDEO_FIELDS

        for f in features:
            assert f in VIDEO_FIELDS, f"Feature {f} not recognized, {VIDEO_FIELDS}"

        print(f"Using features {', '.join(features)}")

        for config in options["config"]:
            print("Loading config", config)
            load_gin_config(config)

        # running parallel hparam tuning with Ray
        if options["tune"]:

            def pre_parse():
                """Load django before reading configuration (otherwise have import error)."""
                import os

                os.environ.setdefault("DJANGO_SETTINGS_MODULE",
                                      "django_react.settings")

                import django

                django.setup()

            if options["tune_resume"]:
                gin.bind_parameter('tune_run.resume', True)

            tune_gin(experiment, pre_parse=pre_parse)

        # regular training
        else:
            print_memory(stage="pre-learner init")
            learner_obj = learner()(features=features)

            print_memory(stage="learner created")

            #            print("pre-fit reached... entering infinite loop")
            #            from time import sleep
            #            while True:
            #                sleep(1)
            #
            #            print_mem_epoch = partial(print_memory, stage='EPOCH')
            learner_obj.fit(epochs=options["epochs_override"])

            print_memory(stage="post train")

            learner_obj.update_features()

            print_memory(stage="post update")
Exemplo n.º 7
0
    def setUp(self):
        load_gin_config("backend/ml_model/config/featureless_config.gin")
        gin.bind_parameter(
            "FeaturelessMedianPreferenceAverageRegularizationAggregator.epochs",
            1000)

        # creating videos
        self.videos = [
            Video.objects.create(video_id=f"video{i}") for i in tqdm(range(2))
        ]

        # creating users
        self.djangousers = [
            DjangoUser.objects.create_user(username=f"rater{i}",
                                           password=f"1234{i}")
            for i in tqdm(range(2))
        ]
        self.userprefs = [
            UserPreferences.objects.create(user=u) for u in self.djangousers
        ]

        # making the user verified
        self.userinfos = [
            UserInformation.objects.create(user=u) for u in self.djangousers
        ]
        self.verify = [False, True]
        accepted_domain = create_accepted_domain()
        self.vemails = [
            VerifiableEmail.objects.create(user=ui,
                                           email=f"{uuid1()}{accepted_domain}",
                                           is_verified=verify)
            for ui, verify in zip(self.userinfos, self.verify)
        ]

        data_rest = {k: 50 for k in VIDEO_FIELDS[1:]}
        self.f = VIDEO_FIELDS[0]

        # rater0 likes video0, rater1 likes video1
        ExpertRating.objects.create(user=self.userprefs[0],
                                    video_1=self.videos[0],
                                    video_2=self.videos[1],
                                    **data_rest,
                                    **{self.f: 0})
        ExpertRating.objects.create(user=self.userprefs[1],
                                    video_1=self.videos[0],
                                    video_2=self.videos[1],
                                    **data_rest,
                                    **{self.f: 100})
Exemplo n.º 8
0
    def setUp(self):
        load_gin_config("backend/ml_model/config/featureless_config.gin")
        gin.bind_parameter(
            "FeaturelessMedianPreferenceAverageRegularizationAggregator.epochs",
            1000)

        # creating videos
        self.videos = [
            Video.objects.create(video_id=f"video{i}")
            for i in tqdm(range(200))
        ]

        # creating users
        self.djangousers = [
            DjangoUser.objects.create_user(username=f"rater{i}",
                                           password=f"1234{i}")
            for i in tqdm(range(10))
        ]
        self.userprefs = [
            UserPreferences.objects.create(user=u) for u in self.djangousers
        ]

        # making the user verified
        self.userinfos = [
            UserInformation.objects.create(user=u) for u in self.djangousers
        ]
        accepted_domain = create_accepted_domain()
        self.vemails = [
            VerifiableEmail.objects.create(user=ui,
                                           email=f"{uuid1()}{accepted_domain}",
                                           is_verified=True)
            for ui in self.userinfos
        ]

        # creating expert ratings
        ratings = []
        for _ in tqdm(range(1000)):
            v1 = np.random.choice(self.videos)
            v2 = np.random.choice(self.videos)
            u = np.random.choice(self.userprefs)
            data = {k: np.random.rand() * 100 for k in VIDEO_FIELDS}
            ratings.append(ExpertRating(video_1=v1, video_2=v2, user=u,
                                        **data))

        ExpertRating.objects.bulk_create(ratings, ignore_conflicts=True)
Exemplo n.º 9
0
def load_videos_thread(config):
    """Download metadata for a few videos."""
    to_download = qs_videos_to_download()

    to_download_subsample = qs_subsample_to_download(to_download)
    ids = [x.video_id for x in to_download_subsample]

    if not ids:
        print("No videos, exiting!")
        return

    print(f"Total to-download: {to_download.count()} subsampled: {len(ids)}")

    load_gin_config(config)
    vm = VideoManager(only_download=ids)
    vm.fill_info()
    vm.add_videos_in_folder_to_db()
    vm.clear_info()
Exemplo n.º 10
0
    def test_download_metadata(self):
        CharField.register_lookup(Length, 'length')
        TextField.register_lookup(Length, 'length')
        load_gin_config('backend/add_videos.gin')

        example_unlisted_video = 'dHFYikxUatY'
        example_correct_video = '9bZkp7q19f0'
        example_wrong_url_video = 'w$url'
        example_notfound_video = 'notfoundvid'
        test_videos = [
            example_unlisted_video, example_correct_video,
            example_wrong_url_video, example_notfound_video
        ]
        test_responses = [{
            'name__length__gt': 5,
            'is_unlisted': True,
            'description__length__gt': 1,
            'publication_date__isnull': False,
            'language__isnull': False,
            'views__isnull': False,
            'metadata_timestamp__isnull': False,
            'uploader__isnull': False,
            'duration__isnull': False,
            'last_download_time__isnull': False,
            'download_attempts__gt': 0,
            'add_time__isnull': False,
            'download_failed': False
        }, {
            'name__length__gt': 5,
            'is_unlisted': False,
            'views__isnull': False,
            'description__length__gt': 1,
            'publication_date__isnull': False,
            'uploader__isnull': False,
            'duration__isnull': False,
            'language__isnull': False,
            'metadata_timestamp__isnull': False,
            'wrong_url': False,
            'last_download_time__isnull': False,
            'download_attempts__gt': 0,
            'add_time__isnull': False,
            'download_failed': False
        }, {
            'name': "",
            'is_unlisted': False,
            'metadata_timestamp__isnull': True,
            'wrong_url': True,
            'last_download_time__isnull': False,
            'download_attempts__gt': 0,
            'add_time__isnull': False,
            'download_failed': True
        }, {
            'name': "",
            'is_unlisted': False,
            'metadata_timestamp__isnull': True,
            'wrong_url': False,
            'last_download_time__isnull': False,
            'download_attempts__gt': 0,
            'add_time__isnull': False,
            'download_failed': True
        }]

        for v, resp in zip(test_videos, test_responses):
            Video.objects.filter(video_id=v).delete()
            vm = VideoManager(only_download=[v])
            vm.fill_info()
            vm.add_videos_in_folder_to_db()
            qs = Video.objects.filter(video_id=v)
            assert qs.count() == 1

            # format: key, val
            errors = []
            for key, val in resp.items():
                sub_qs = qs.filter(**{key: val})
                if sub_qs.count() != 1:
                    errors.append((key, val))

            assert not errors, (v, resp, errors)

            o = qs.get()
            print(o)