示例#1
0
def extract_units(url, headers, file_formats):
	"""
	Parses a webpage and extracts its resources e.g. video_url, sub_url, etc.
	"""
	#logging.info("Processing '%s'", url)

	page = get_page_contents(url, headers)
	page_extractor = get_page_extractor(url)
	units = page_extractor.extract_units_from_html(page, BASE_URL, file_formats)
	return units
示例#2
0
def get_available_sections(url, headers):
    """
	Extracts the sections and subsections from a given url
	"""
    logging.debug("Extracting sections for :" + url)

    page = get_page_contents(url, headers)
    page_extractor = get_page_extractor(url)
    sections = page_extractor.extract_sections_from_html(page, BASE_URL)

    logging.debug("Extracted sections: " + str(sections))
    return sections
示例#3
0
def get_courses_info(url, headers):
    """
	Extracts the courses information from the dashboard.
	"""
    logging.info('Extracting course information from dashboard.')

    page = get_page_contents(url, headers)
    page_extractor = get_page_extractor(url)
    courses = page_extractor.extract_courses_from_html(page, BASE_URL)

    logging.debug('Data extracted: %s', courses)

    return courses