Exemplo n.º 1
0
def get_course_id_and_title(course_url):
    page = con.session.get(helper.embbed_link(course_url))
    soup = BeautifulSoup(page.text, 'html.parser')
    try:
        title = soup.find('title').getText().split('|')[0].strip()
    except Exception as e:
        message = e.args
        return
    id = soup.find("input", {"name": "course_id"})['value']
    return id, title
Exemplo n.º 2
0
def download_track(track, folder, videos_download):
    page = con.session.get(helper.embbed_link(track.link))
    soup = BeautifulSoup(page.text, 'html.parser')
    all_courses = soup.findAll('a', {
        'href': re.compile('^/courses/'),
        'class': re.compile('^course')
    })
    track_title = soup.find('title').getText().split('|')[0].strip()
    folder = os.path.join(folder, track_title)

    all_links = ['https://www.datacamp.com' + x['href'] for x in all_courses]
    sys.stdout.write(f'{bcolors.BKBLUE}  {track_title}  {bcolors.BKENDC}\n')
    for i, link in enumerate(all_links):
        download_course(link, folder, videos_download, i + 1)
Exemplo n.º 3
0
def download_course_data(conn, link, path):
    page = con.session.get(helper.embbed_link(link))
    soup = BeautifulSoup(page.text, 'html.parser')

    dataset = soup.findAll('a', {
        'href': re.compile('^https'),
        'class': re.compile('^link-borderless')
    })

    titles = [x.text.strip() for x in dataset]
    all_links = [x['href'] for x in dataset]
    sys.stdout.write(f'{bcolors.BOLD}Downloading dataset...{bcolors.ENDC}\n')
    if not os.path.exists(path):
        os.mkdir(path)
    if (not os.path.exists(os.path.join(path, 'Dataset'))):
        os.mkdir(os.path.join(path, 'Dataset'))
    for link, title in zip(all_links, titles):
        dir = os.path.join(path, 'Dataset', title)
        dir = dir + '.' + link.split('.')[-1]
        download_file(con, link, dir)