def _probe_settings_yaml(self): yaml_filename = self.customized("nbhosting.yaml") logger.debug(f"yaml filename {yaml_filename}") if not yaml_filename: return False try: with open(yaml_filename) as feed: yaml_config = yaml.safe_load(feed.read()) # if 'static-mappings' not in yaml_config: self.static_mappings = StaticMapping.defaults() else: logger.debug(f"populating static-mappings from yaml") self.static_mappings = [ StaticMapping(D['source'], D['destination']) for D in yaml_config['static-mappings'] ] # if 'builds' not in yaml_config: self.builds = [] else: self.builds = [Build(D) for D in yaml_config['builds']] self._yaml_config = yaml_config except Exception: logger.exception( f"could not load yaml file {yaml_filename} - ignoring") return False return True
def notebooks_by_pattern(coursedir, pattern): """ return a sorted list of all notebooks (relative paths) matching some pattern from coursedir """ logger.debug(f"notebooks_by_pattern in {coursedir} with {pattern}") root = Path(coursedir.notebooks_dir).absolute() absolutes = root.glob(pattern) probed = [path.relative_to(root) for path in absolutes] notebooks = [Notebook(coursedir, path) for path in probed] notebooks.sort(key=lambda n: n.path) return notebooks
def _fetch_course_custom_tracks(self): """ locate and load <course>/nbhosting/tracks.py objective is to make this customizable so that some notebooks in the repo can be ignored and the others organized along different view points the tracks() function will receive self as its single parameter it is expected to return a dictionary track_name -> Track instance see flotpython/nbhosting/tracks.py for a realistic example the keys in this dictionary are used in the web interface to propose the list of available tracks absence of tracks.py, or inability to run it, triggers the default policy (per directory) implemented in model_track.py """ course_tracks_py = self.customized("tracks.py") if course_tracks_py: modulename = (f"{self.coursename}_tracks".replace("-", "_")) try: logger.debug(f"{self} loading module {course_tracks_py}") spec = spec_from_file_location( modulename, course_tracks_py, ) module = module_from_spec(spec) spec.loader.exec_module(module) tracks_fun = module.tracks logger.debug(f"triggerring {tracks_fun.__qualname__}()") tracks = tracks_fun(self) if self._check_tracks(tracks): return tracks except Exception: logger.exception(f"{self} could not do load custom tracks") finally: # make sure to reload the python code next time # we will need it, in case the course has published an update if modulename in sys.modules: del sys.modules[modulename] else: logger.info(f"{self} no tracks.py hook found") logger.warning(f"{self} resorting to generic filesystem-based track") return [generic_track(self)]
def notebooks_by_patterns(coursedir, patterns): """ return a concatenation of notebooks_by_pattern on all patterns each bunch is sorted internally, but the concatenation remains in the order specified in patterns Returns: list of all notebooks (relative paths) """ logger.debug(f"notebooks_by_patterns in {coursedir} with") for pattern in patterns: logger.debug(f" pattern {pattern}") result = [] for pattern in patterns: result.extend(notebooks_by_pattern(coursedir, pattern)) return result
def track_by_directory(coursedir, *, name="", description, notebooks, directory_labels=None): """ from a list of relative paths, returns a list of Section objects corresponding to directories optional directory_labels allows to provide a mapping "dirname" -> "displayed name" """ def mapped_name(dirname): dirname = str(dirname) if not directory_labels: return dirname return directory_labels.get(dirname, dirname) logger.debug(f"track_by_directory in {coursedir}") root = coursedir.notebooks_dir hash_per_dir = defaultdict(list) for notebook in notebooks: hash_per_dir[notebook.absolute().parent].append(notebook) result = [] for absolute, notebooks_per_dir in hash_per_dir.items(): result.append( Section(name=absolute.relative_to(root), coursedir=coursedir, notebooks=notebooks_per_dir)) # sort *before* applying the name mapping result.sort(key=lambda s: s.name) for section in result: section.name = mapped_name(section.name) section.notebooks.sort(key=lambda n: n.path) return Track(coursedir, result, name=name, description=description)
def tracks(self): """ returns a list of known tracks does this optimally, first use memory cache, disk cache in courses/<coursename>/.tracks.json and only then triggers course-specific tracks.py if provided """ self.probe() # in memory ? if self._tracks is not None: return self._tracks # in cache ? tracks_path = self.notebooks_dir / ".tracks.json" if tracks_path.exists(): logger.debug(f"{tracks_path} found") tracks = read_tracks(self, tracks_path) self._tracks = tracks return tracks # compute from yaml config if self._yaml_config and 'tracks' in self._yaml_config: logger.debug(f"computing tracks from yaml") tracks = tracks_from_yaml_config(self, self._yaml_config['tracks']) else: # compute from course logger.debug(f"{tracks_path} not found - recomputing") tracks = self._fetch_course_custom_tracks() tracks = sanitize_tracks(tracks) self._tracks = tracks write_tracks(tracks, tracks_path) return tracks
def material_usage(self): """ read the events file and produce data about relations between notebooks and students remember we cannot serialize a set, plus a sorted result is better 'nbstudents' : how many students are considered (test students are removed..) 'nbstudents_per_notebook' : a sorted list of tuples (notebook, nb_students) how many students have read this notebook 'nbstudents_per_notebook_animated' : same but animated over time 'nbstudents_per_nbnotebooks' : a sorted list of tuples (nb_notebooks, nb_students) how many students have read exactly that number of notebooks 'heatmap' : a complete matrix notebook x student ready to feed to plotly.heatmap comes with 'x', 'y' and 'z' keys """ events_path = self.notebook_events_path() # a dict notebook -> set of students set_by_notebook = defaultdict(set) nbstudents_per_notebook_buckets = TimeBuckets(grain=timedelta(hours=6), time_format=time_format) # a dict student -> set of notebooks set_by_student = defaultdict(set) # a dict hashed on a tuple (notebook, student) -> number of visits raw_counts = defaultdict(int) # staff_names = {username for username in CourseDir.objects.get(coursename=self.coursename).staff_usernames.split()} try: with events_path.open() as f: for _lineno, line in enumerate(f, 1): date, _, student, notebook, action, *_ = line.split() # action 'killing' needs to be ignored if action in ('killing',): continue # ignore staff or other artefact users if student in staff_names or artefact_user(student): logger.debug(f"ignoring staff or artefact student {student}") continue # animated data must be taken care of before anything else previous, next, changed = nbstudents_per_notebook_buckets.prepare(date) if changed: nspn = [ (notebook, len(set_by_notebook[notebook])) for notebook in sorted(set_by_notebook)] nbstudents_per_notebook_buckets.record_data(nspn, previous, next) notebook = canonicalize(notebook) set_by_notebook[notebook].add(student) set_by_student[student].add(notebook) raw_counts[notebook, student] += 1 except Exception as _exc: logger.exception(f"could not read {events_path} to count students per notebook") finally: nbstudents_per_notebook = [ (notebook, len(set_by_notebook[notebook])) for notebook in sorted(set_by_notebook) ] nb_by_student = { student: len(s) for (student, s) in set_by_student.items() } nbstudents_per_notebook_animated = nbstudents_per_notebook_buckets.wrap(nbstudents_per_notebook) # counting in the other direction is surprisingly tedious nbstudents_per_nbnotebooks = [ (number, iter_len(v)) for (number, v) in itertools.groupby(sorted(nb_by_student.values())) ] # the heatmap heatmap_notebooks = sorted(set_by_notebook.keys()) heatmap_students = sorted(set_by_student.keys()) # a first attempt at showing the number of times a given notebook was open # by a given student resulted in poor outcome # problem being mostly with colorscale, we'd need to have '0' stick out # as transparent or something, but OTOH sending None instead or 0 heatmap_z = [ [raw_counts.get( (notebook, student,), None) for notebook in heatmap_notebooks] for student in heatmap_students ] # sort students on total number of opened notebooks heatmap_z.sort(key = lambda student_line: sum(x for x in student_line if x)) zmax = max((max(x for x in line if x) for line in heatmap_z), default=0) zmin = min((min(x for x in line if x) for line in heatmap_z), default=0) return { 'nbnotebooks' : len(set_by_notebook), 'nbstudents' : len(set_by_student), 'nbstudents_per_notebook' : nbstudents_per_notebook, 'nbstudents_per_notebook_animated' : nbstudents_per_notebook_animated, 'nbstudents_per_nbnotebooks' : nbstudents_per_nbnotebooks, 'heatmap' : {'x' : heatmap_notebooks, 'y' : heatmap_students, 'z' : heatmap_z, 'zmin' : zmin, 'zmax' : zmax, }, }