Пример #1
0
# -*- coding: utf-8 -*-
"""
Parsing and extraction functions
"""
import re
import json

from datetime import timedelta, datetime

from six.moves import html_parser
from bs4 import BeautifulSoup as BeautifulSoup_

from .common import Course, Section, SubSection, Unit, Video

# Force use of bs4 with html.parser
BeautifulSoup = lambda page: BeautifulSoup_(page, 'html.parser')


def edx_json2srt(o):
    """
    Transform the dict 'o' into the srt subtitles format
    """
    if o == {}:
        return ''

    base_time = datetime(1, 1, 1)
    output = []

    for i, (s, e, t) in enumerate(zip(o['start'], o['end'], o['text'])):
        if t == '':
            continue
Пример #2
0
def beautiful_soup(page):
    from bs4 import BeautifulSoup as BeautifulSoup_
    return BeautifulSoup_(page, 'html.parser')
Пример #3
0
import StringIO
import subprocess
import sys
import tempfile
import time
import urllib
import urllib2

try:
    from BeautifulSoup import BeautifulSoup
except ImportError:
    from bs4 import BeautifulSoup as BeautifulSoup_
    # Use html5lib for parsing if available
    try:
        import html5lib
        BeautifulSoup = lambda page: BeautifulSoup_(page, 'html5lib')
    except ImportError:
        BeautifulSoup = BeautifulSoup_

csrftoken = ''
session = ''
AUTH_URL = 'https://www.coursera.org/maestro/api/user/login'


class ClassNotFound(BaseException):
    """
    Class to be thrown if a course is not found in Coursera's site.
    """

    pass
Пример #4
0
def BeautifulSoup(page):
    return BeautifulSoup_(page, 'html.parser')