Exemplo n.º 1
0
 def _probe_settings_yaml(self):
     yaml_filename = self.customized("nbhosting.yaml")
     logger.debug(f"yaml filename {yaml_filename}")
     if not yaml_filename:
         return False
     try:
         with open(yaml_filename) as feed:
             yaml_config = yaml.safe_load(feed.read())
         #
         if 'static-mappings' not in yaml_config:
             self.static_mappings = StaticMapping.defaults()
         else:
             logger.debug(f"populating static-mappings from yaml")
             self.static_mappings = [
                 StaticMapping(D['source'], D['destination'])
                 for D in yaml_config['static-mappings']
             ]
         #
         if 'builds' not in yaml_config:
             self.builds = []
         else:
             self.builds = [Build(D) for D in yaml_config['builds']]
         self._yaml_config = yaml_config
     except Exception:
         logger.exception(
             f"could not load yaml file {yaml_filename} - ignoring")
         return False
     return True
Exemplo n.º 2
0
def notebooks_by_pattern(coursedir, pattern):
    """
    return a sorted list of all notebooks (relative paths)
    matching some pattern from coursedir
    """
    logger.debug(f"notebooks_by_pattern in {coursedir} with {pattern}")
    root = Path(coursedir.notebooks_dir).absolute()
    absolutes = root.glob(pattern)
    probed = [path.relative_to(root) for path in absolutes]
    notebooks = [Notebook(coursedir, path) for path in probed]
    notebooks.sort(key=lambda n: n.path)
    return notebooks
Exemplo n.º 3
0
    def _fetch_course_custom_tracks(self):
        """
        locate and load <course>/nbhosting/tracks.py

        objective is to make this customizable so that some
        notebooks in the repo can be ignored
        and the others organized along different view points

        the tracks() function will receive self as its single parameter
        it is expected to return a dictionary
           track_name -> Track instance
        see flotpython/nbhosting/tracks.py for a realistic example

        the keys in this dictionary are used in the web interface
        to propose the list of available tracks

        absence of tracks.py, or inability to run it, triggers
        the default policy (per directory) implemented in model_track.py
        """

        course_tracks_py = self.customized("tracks.py")

        if course_tracks_py:
            modulename = (f"{self.coursename}_tracks".replace("-", "_"))
            try:
                logger.debug(f"{self} loading module {course_tracks_py}")
                spec = spec_from_file_location(
                    modulename,
                    course_tracks_py,
                )
                module = module_from_spec(spec)
                spec.loader.exec_module(module)
                tracks_fun = module.tracks
                logger.debug(f"triggerring {tracks_fun.__qualname__}()")
                tracks = tracks_fun(self)
                if self._check_tracks(tracks):
                    return tracks
            except Exception:
                logger.exception(f"{self} could not do load custom tracks")
            finally:
                # make sure to reload the python code next time
                # we will need it, in case the course has published an update
                if modulename in sys.modules:
                    del sys.modules[modulename]
        else:
            logger.info(f"{self} no tracks.py hook found")
        logger.warning(f"{self} resorting to generic filesystem-based track")
        return [generic_track(self)]
Exemplo n.º 4
0
def notebooks_by_patterns(coursedir, patterns):
    """
    return a concatenation of notebooks_by_pattern on
    all patterns
    each bunch is sorted internally, but the concatenation
    remains in the order specified in patterns
    Returns:
       list of all notebooks (relative paths)
    """
    logger.debug(f"notebooks_by_patterns in {coursedir} with")
    for pattern in patterns:
        logger.debug(f"  pattern {pattern}")
    result = []
    for pattern in patterns:
        result.extend(notebooks_by_pattern(coursedir, pattern))
    return result
Exemplo n.º 5
0
def track_by_directory(coursedir,
                       *,
                       name="",
                       description,
                       notebooks,
                       directory_labels=None):
    """
    from a list of relative paths, returns a list of
    Section objects corresponding to directories

    optional directory_labels allows to provide a mapping
    "dirname" -> "displayed name"
    """
    def mapped_name(dirname):
        dirname = str(dirname)
        if not directory_labels:
            return dirname
        return directory_labels.get(dirname, dirname)

    logger.debug(f"track_by_directory in {coursedir}")
    root = coursedir.notebooks_dir

    hash_per_dir = defaultdict(list)

    for notebook in notebooks:
        hash_per_dir[notebook.absolute().parent].append(notebook)

    result = []

    for absolute, notebooks_per_dir in hash_per_dir.items():
        result.append(
            Section(name=absolute.relative_to(root),
                    coursedir=coursedir,
                    notebooks=notebooks_per_dir))

    # sort *before* applying the name mapping
    result.sort(key=lambda s: s.name)
    for section in result:
        section.name = mapped_name(section.name)
        section.notebooks.sort(key=lambda n: n.path)
    return Track(coursedir, result, name=name, description=description)
Exemplo n.º 6
0
    def tracks(self):
        """
        returns a list of known tracks

        does this optimally, first use memory cache,
        disk cache in courses/<coursename>/.tracks.json
        and only then triggers course-specific tracks.py if provided
        """
        self.probe()
        # in memory ?
        if self._tracks is not None:
            return self._tracks
        # in cache ?
        tracks_path = self.notebooks_dir / ".tracks.json"
        if tracks_path.exists():
            logger.debug(f"{tracks_path} found")
            tracks = read_tracks(self, tracks_path)
            self._tracks = tracks
            return tracks
        # compute from yaml config
        if self._yaml_config and 'tracks' in self._yaml_config:
            logger.debug(f"computing tracks from yaml")
            tracks = tracks_from_yaml_config(self, self._yaml_config['tracks'])
        else:
            # compute from course
            logger.debug(f"{tracks_path} not found - recomputing")
            tracks = self._fetch_course_custom_tracks()
        tracks = sanitize_tracks(tracks)
        self._tracks = tracks
        write_tracks(tracks, tracks_path)
        return tracks
Exemplo n.º 7
0
    def material_usage(self):
        """
        read the events file and produce data about relations
        between notebooks and students
        remember we cannot serialize a set, plus a sorted result is better
        'nbstudents' : how many students are considered (test students are removed..)
        'nbstudents_per_notebook' : a sorted list of tuples (notebook, nb_students)
                                  how many students have read this notebook
        'nbstudents_per_notebook_animated' : same but animated over time
        'nbstudents_per_nbnotebooks' : a sorted list of tuples (nb_notebooks, nb_students)
                                  how many students have read exactly that number of notebooks
        'heatmap' : a complete matrix notebook x student ready to feed to plotly.heatmap
                    comes with 'x', 'y' and 'z' keys
        """

        events_path = self.notebook_events_path()
        # a dict notebook -> set of students
        set_by_notebook = defaultdict(set)
        nbstudents_per_notebook_buckets = TimeBuckets(grain=timedelta(hours=6),
                                                      time_format=time_format)
        # a dict student -> set of notebooks
        set_by_student = defaultdict(set)
        # a dict hashed on a tuple (notebook, student) -> number of visits
        raw_counts = defaultdict(int)
        #
        staff_names = {username for username in
                       CourseDir.objects.get(coursename=self.coursename).staff_usernames.split()}
        try:
            with events_path.open() as f:
                for _lineno, line in enumerate(f, 1):
                    date, _, student, notebook, action, *_ = line.split()
                    # action 'killing' needs to be ignored
                    if action in ('killing',):
                        continue
                    # ignore staff or other artefact users
                    if student in staff_names or artefact_user(student):
                        logger.debug(f"ignoring staff or artefact student {student}")
                        continue
                    # animated data must be taken care of before anything else
                    previous, next, changed = nbstudents_per_notebook_buckets.prepare(date)
                    if changed:
                        nspn = [ (notebook, len(set_by_notebook[notebook]))
                                for notebook in sorted(set_by_notebook)]
                        nbstudents_per_notebook_buckets.record_data(nspn, previous, next)
                    notebook = canonicalize(notebook)
                    set_by_notebook[notebook].add(student)
                    set_by_student[student].add(notebook)
                    raw_counts[notebook, student] += 1
        except Exception as _exc:
            logger.exception(f"could not read {events_path} to count students per notebook")

        finally:
            nbstudents_per_notebook = [
                (notebook, len(set_by_notebook[notebook]))
                for notebook in sorted(set_by_notebook)
            ]
            nb_by_student = { student: len(s) for (student, s) in set_by_student.items() }

            nbstudents_per_notebook_animated = nbstudents_per_notebook_buckets.wrap(nbstudents_per_notebook)

            # counting in the other direction is surprisingly tedious
            nbstudents_per_nbnotebooks = [
                (number, iter_len(v))
                for (number, v) in itertools.groupby(sorted(nb_by_student.values()))
            ]
            # the heatmap
            heatmap_notebooks = sorted(set_by_notebook.keys())
            heatmap_students = sorted(set_by_student.keys())
            # a first attempt at showing the number of times a given notebook was open
            # by a given student resulted in poor outcome
            # problem being mostly with colorscale, we'd need to have '0' stick out
            # as transparent or something, but OTOH sending None instead or 0
            heatmap_z = [
                [raw_counts.get( (notebook, student,), None) for notebook in heatmap_notebooks]
                for student in heatmap_students
            ]
            # sort students on total number of opened notebooks
            heatmap_z.sort(key = lambda student_line: sum(x for x in student_line if x))

            zmax = max((max(x for x in line if x) for line in heatmap_z),
                       default=0)
            zmin = min((min(x for x in line if x) for line in heatmap_z),
                       default=0)

            return {
                'nbnotebooks' : len(set_by_notebook),
                'nbstudents' : len(set_by_student),
                'nbstudents_per_notebook' : nbstudents_per_notebook,
                'nbstudents_per_notebook_animated' : nbstudents_per_notebook_animated,
                'nbstudents_per_nbnotebooks' : nbstudents_per_nbnotebooks,
                'heatmap' : {'x' : heatmap_notebooks, 'y' : heatmap_students,
                             'z' : heatmap_z,
                             'zmin' : zmin, 'zmax' : zmax,
                },
            }