def get_speaking_language(nb_id): # gather the markdown cells md_cells = data.get_md_cells(nb_id) # if there are no markdown cells, return immediately if len(md_cells) == 0: return None # get the language of the markdown cells return speaking_language(md_cells)
def frequency(nb_id): # check if markdown cells exist if len(data.get_md_cells(nb_id)) == 0: return None # get the number of code <-> markdown switches and the number of total switches cm_switches = count_switches(nb_id) total_switches = len(data.get_cells(nb_id)) - 1 # calculate proportion return float(cm_switches) / float(total_switches)
def num_headers(nb_id): # get the markdown cells md_cells = data.get_md_cells(nb_id) # count the number of headers across the cells num_headers = 0 for cell in md_cells: num_headers += count_headers(cell) # return the total return num_headers
def md_formatting(nb_id): # gather markdown cells md_cells = data.get_md_cells(nb_id) # if there are no markdown cells, return immediately if len(md_cells) == 0: return None # iterate through and check whether they have special formatting for cell in md_cells: if has_extra_formatting(cell): return True return False
def has_author(nb_id): # get all the markdown cells and author names md_cells = data.get_md_cells(nb_id) author_name = get_author(nb_id) author_login = get_username(nb_id) all_names = get_contributors(nb_id) + [author_name, author_login] # check each cell for an author for cell in md_cells: if 'source' in cell.keys(): lines = cell['source'] # scan all the lines in the cell for line in lines: # filter out links before searching for author link = re.search(regex.link, line) while link != None: line = line.replace(link.group(0), "") link = re.search(regex.link, line) # check all possible authors for name in all_names: if name != None and name in line: return True # gather comments comments = data.get_comments(nb_id) if comments != None: # search each comment for the author names for comment in comments: for name in all_names: if name != None and name in comment: return True # no instances of authors found return False
def has_equations(nb_id): # get all the markdown cells md_cells = data.get_md_cells(nb_id) # if there are no markdown cells, return immediately if len(md_cells) == 0: return None # search the markdown cells for equations for cell in md_cells: # check if markdown cell has source field if 'source' not in cell.keys(): continue for line in cell['source']: if re.search(regex.equation, line): return True return False
def is_education(nb_id): # get the markdown cells md_cells = data.get_md_cells(nb_id) # go through the markdown cells and search for the keywords for cell in md_cells: if 'source' in cell.keys(): for line in cell['source']: # make sure to search for each keyword for pattern in regex.education: if re.search(pattern, line.lower()): return True # check the file path (including the file name) for the keywords as well path = data.get_path(nb_id) for pattern in regex.education: if re.search(pattern, path.lower()): return True return False
def markdown_prop(nb_id): num_cells = len(data.get_cells(nb_id)) num_md = len(data.get_md_cells(nb_id)) return (float(num_md) / float(num_cells))