示例#1
0
def menu_item(str_item, mode):

    sleep(style.line_sleep)
    if mode == 1:
        print((' ' * 8) + str_item)
    else:
        print('\n' + style.string_color((' ' * 4) + str_item, "blue"))
示例#2
0
def get_user_input(options):
    """
    :param options: list of valid options
    :return: validated user input, ready to use
    """
    sleep(style.line_sleep)
    option = input(style.string_color("\n    => Option number: ", "cyan"))

    if option not in options:
        print(
            style.string_color((' ' * 7) + f"Option {option} not valid",
                               "red"))
        sleep(style.end_line_sleep)
        get_user_input(options)
    else:
        return option
示例#3
0
def init():
    header("  scraper setup", "", "cyan", "left", 1, "-", "green", 0)

    print(style.string_color("  Base URL:", "blue"))

    while True:
        try:
            response = url_tool.get_base_url()
            if "/clases/" in response[0]:
                update_mode(1)

            with open("000 - Preview.html", "w") as page:
                page.writelines(response[1].content.decode("UTF-8"))

            if tools.continue_dialog():
                return

            break

        except (KeyboardInterrupt, EOFError):
            if tools.exit_dialog():
                return

    if mode == 2:
        try:
            scrape_courses(response[1])
        except (KeyboardInterrupt, EOFError):
            if tools.exit_dialog():
                return
    else:
        try:
            scrape_course("", response[1], response[0])
        except (KeyboardInterrupt, EOFError):
            if tools.exit_dialog():
                return
示例#4
0
def download_page(url, target_path, name):
    tools.check_path(target_path)
    p_print.line_char("-", "yellow", 2)
    print(style.string_color("    Step 1: Downloading Webpage\n", "blue"))

    r = make_requests(url)
    if not os.path.exists(target_path + '/' + name):
        with open(f"{target_path}/{name}", "w") as page:
            page.writelines(r.content.decode("UTF-8"))
            print_dialog("\n", "Webpage downloaded", 3)
    else:
        print_dialog("\n", "File already exists, skipping", 2)
示例#5
0
def print_dialog(pre_char, message, m_type):
    """
    Print a program dialog

    :param pre_char: Character to put at begin or message
    :param message: Text to show in dialog
    :param m_type: 1=error dialog, 2=warning dialog, 3=success dialog
    :return:
    """

    message = "        " + message
    if m_type == 1:
        print(
            style.string_color(pre_char + "    Error: " + "\n" + message,
                               "red"))
    elif m_type == 2:
        print(
            style.string_color(pre_char + "    Warning: " + "\n" + message,
                               "yellow"))
    else:
        print(
            style.string_color(pre_char + "    Success" + "\n" + message,
                               "green"))
示例#6
0
def line_char(char, color, spaces):
    """
    Print a line filled with characters from begin to end

    :param color: Color for chars
    :param char: Char to fill line
    :param spaces: Spaces at begin and the end
    """

    update_terminal_cols()

    if spaces == 0:
        str_output = char * TERMINAL_COLS
    else:
        str_output = char * (TERMINAL_COLS - (spaces * 2))
        str_output = ' ' * spaces + str_output + ' ' * spaces

    str_output = style.string_color(str_output, color)

    print(str_output)
示例#7
0
def header(title, extra, color, pos, str_case, decoration, d_color, d_spaces):
    """
    Print a special header for current task

    :param title: Task title
    :param extra: Extra information about task
    :param color: Color title
    :param pos: Title position
    :param str_case: 1 upper case, all other is lower case
    :param decoration: Character to fill borders
    :param d_color: Borders color
    :param d_spaces: Spaces at begin and end of border
    :return:
    """

    str_title = ""

    if str_case == 1:
        for char in title:
            if char == " ":
                str_title += char * 2
                continue

            str_title += ' ' + char.upper()

        str_title = str_title[1:]

    str_title = style.string_position(str_title, pos, 0)
    str_title = style.string_color(str_title, color)

    line_char(decoration, d_color, d_spaces)
    print(str_title)

    if len(extra) > 0:
        if type(extra) == str:
            print(extra)
        else:
            for i in extra:
                print(i)
    line_char(decoration, d_color, d_spaces)
示例#8
0
def scrape_courses(r_object):
    tools.clear_screen()
    em = "    Load and select courses to process"
    header("  scraper setup", em, "cyan", "left", 1, "-", "green", 0)
    print(style.string_color("  Course list:", "blue"))

    target_page = html.fromstring(r_object.content)
    course_list = []
    course_count = 0

    for course in target_page.xpath('//div[@class="RoutesList-items"]'):
        for link in course.xpath('a[@class="RoutesList-item"]/@href'):
            name = style.format_name_string(
                course.xpath('a[@href="' + link + '"]/h4/text()')[0])

            url = url_tool.base_url + link

            course_list.append({
                "name": name,
                "url": url,
                "index": course_count,
                "active": True
            })

            course_count += 1

    # print info about data loaded from r_object
    sleep(0.2)
    print(string_color(f"\n    Total courses: {course_count}", "cyan"))

    print()
    p_print.line_char("-", "cyan", 2)
    for data in course_list:
        print(f"    Name: {string_color(data['name'], 'green')}")
        sleep(style.line_sleep)
        print(f"    URL: {string_color(data['url'], 'green')}")
        sleep(style.line_sleep)
        print(f"    Index: {string_color(data['index'], 'cyan')}")
        sleep(style.line_sleep)
        print()

    while True:
        try:
            exclude_list = input("\n  Courses to exclude (index) => ")
            if len(exclude_list) > 0:
                for index in exclude_list.split(" "):
                    if int(index) <= len(course_list) - 1:
                        course_list[int(index)]["active"] = False

                print(
                    string_color("\n  Courses excluded from list: \n", "blue"))
                p_print.line_char("-", "red", 2)
                for i in course_list:
                    if not i["active"]:
                        print(f"    Name: {string_color(i['name'], 'red')}")
                        sleep(style.line_sleep)
                        print(f"    URL: {string_color(i['url'], 'red')}")
                        sleep(style.line_sleep)
                        print(f"    Index: {string_color(i['index'], 'red')}")
                        sleep(style.line_sleep)
                        print()
            break
        except (KeyboardInterrupt, EOFError):
            if tools.exit_dialog():
                return

    if tools.continue_dialog():
        return

    course_count = 1
    for i in course_list:
        if course_count > 9:
            cn = "0"
        else:
            cn = "00"

        if i["active"]:
            while True:
                r = url_tool.make_requests(i["url"])
                if r.status_code == 200:
                    scrape_course(f"{cn}{course_count}", r, i["url"])
                    course_count += 1
                    break
                else:
                    if not tools.retry_dialog():
                        break
示例#9
0
def scrape_course(numeration, r_object, course_url):
    data = {}
    tools.clear_screen()

    target_page = html.fromstring(r_object.content)

    course_title = style.format_name_string(
        target_page.xpath('//h1[@class="CourseDetail-left-title"]/text()')[0])

    if mode == 2:
        course_title = f"{numeration} - {course_title}"

    em = style.format_string(f"    Processing: {course_title}", 16)
    header("  downloading", em, "cyan", "left", 1, "-", "green", 0)

    print(style.string_color("  Course information: \n", "blue"))
    sleep(style.line_sleep)
    print(style.format_string(f"    Course: {course_title}", 12))

    # Download course page
    helper.download_page(course_url, course_title, "000 - Preview.html")
    course_url = course_url.replace("/clases/", "/cursos/")
    helper.download_page(course_url, course_title, "Course Details.html")

    sections = target_page.xpath('//div[@class="Material-concept"]')
    s_count = 1

    for s in sections:

        sn = "00"
        if s_count > 9:
            sn = "0"

        s_title = s.xpath('div[@class="Material-concept-edit"]'
                          '/h3[@class="Material-title"]/text()')[0]

        s_title = f"{sn}{s_count} - {style.format_name_string(s_title)}"

        # print()
        # p_print.line_char("-", "cyan", 2)
        # print(style.format_string(f"    Section: {s_title}\n", 13))

        l_count = 1
        for lesson in s.xpath('div[@class="MaterialItem-content"]'):

            ln = "00"
            if l_count > 9:
                ln = "0"

            lock_element = 'div/div[@class="MaterialItem-copy"]' \
                           '/div[@class="MaterialItem-copy-actions"]/div[' \
                           '@class="MaterialItem-copy-actions-anchor"]/i/@class '

            if not len(lesson.xpath(lock_element)) > 0:
                if len(lesson.xpath(
                        'div/div[@class="MaterialItem-video"]')) > 0:
                    # lesson_type = "[VIDEO_NAME]"
                    # course_data["type"] = "video"
                    l_type = "video"
                else:
                    # lesson_type = "[MATERIAL_NAME]"
                    # course_data["type"] = "material"
                    l_type = "material"

                l_title = lesson.xpath('div/div[@class="MaterialItem-copy"]'
                                       '/p[@class="MaterialItem-copy-title"]'
                                       '/text()')

                l_title = f"{ln}{l_count} - {style.format_name_string(l_title[0])}"
                # print(style.format_string(f"      Lesson: {l_title}", 14))

                l_url = lesson.xpath('a[@class="MaterialItem-anchor"]'
                                     '/@href')[0]

                l_url = url_tool.base_url + l_url
                # print(style.format_string(f"        URL: {l_url}", 13))

                # set data and start lesson download action
                data["path"] = course_title + '/' + s_title
                data["name"] = l_title
                data["url"] = l_url
                data["type"] = l_type
                if l_type == "video":
                    data["extra_path"] = f"{course_title}/{s_title}/"
                    data["extra_path"] += f"{ln}{l_count} - extra_files"
                    data["webpage"] = f"{ln}{l_count} - webpage.html"

                helper.download_lesson(data)
                tools.clear_screen()

            l_count += 1
        s_count += 1

    input("\n\n    Press enter to continue")