示例#1
0
    def download_file_or_doc(self, unit_name, file_url, unit, path, file_dict, text_file):
        """Download file or document unit_name on to path in the local 
        directory."""

        s = requests.session()
        s.get(file_url, auth=(self.username, self.password))
        # reaccess site with authentication since Chalk always returns an error
        # page; access is obtained only after the second attempt
        r = s.get(file_url, stream=True, auth=(self.username, self.password))

        file_dict["format"] = r.headers.get("content-type")
        destination = "{:}/{:}/{:}".format(self.default_folder, path, check_folder_name(unit_name))

        # Deleting apostrophes to prevent unterminated quote strings
        file_dict["path"] = os.path.abspath(destination).replace("'", "")
        delete_file_dict = False

        if self.need_to_update(r, file_dict):
            print("downloading {:}".format(unit_name))
            make_dirs(self.course_material_dict, self.default_folder)
            # Downloading process
            with open(file_dict["path"], "wb") as f:
                r.raw.decode_content = True
                f.write(r.content)
            # Obtain body of file depending on file format
            if "pdf" in file_dict["format"]:
                try:
                    file_dict["body"] = convert_pdf(file_dict["path"])
                except:
                    file_dict["body"] = ""
            elif "txt" in file_dict["format"]:
                file_dict["body"] = r.content
            else:
                file_dict["body"] = ""

            if file_dict["heading"] not in text_file:
                # Adding heading and description of each file
                return text_file + file_dict["heading"] + "\n" + file_dict["description"] + "\n\n", delete_file_dict

        # If the file already exists, the file_dict is deleted and no
        # downloading occurs
        else:
            print("{:} already up to date".format(unit_name))
            delete_file_dict = True

        return text_file, delete_file_dict
示例#2
0
    def gen_folder(self, unit, path, folder_dict, course):
        """Generate a folder and perform operations within that folder"""

        unit.find_element_by_tag_name("a").click()
        if self.check_id_exists("content_listContainer"):
            num_of_items = len(self.browser.find_element_by_id("content_listContainer").find_elements_by_tag_name("li"))

            text_file = ""
            for unit_index in range(num_of_items):
                inner_unit = self.browser.find_element_by_id("content_listContainer").find_elements_by_tag_name("li")[
                    unit_index
                ]

                if self.check_tag_exists_in_web_element(inner_unit, "img"):
                    img = inner_unit.find_element_by_tag_name("img")
                    if img.get_attribute("class") == "item_icon":
                        # if icon is a folder
                        if "folder_on" in img.get_attribute("src"):
                            folder_name = check_folder_name(inner_unit.find_element_by_tag_name("a").text)

                            folder_dict[folder_name] = {}
                            make_dirs(self.course_material_dict, self.default_folder)
                            # Recursively generate folders within folders
                            self.gen_folder(
                                inner_unit, path + "/{:}".format(folder_name), folder_dict[folder_name], course
                            )

                        # if icon is a file
                        elif "file_on" in img.get_attribute("src"):
                            unit_name = inner_unit.find_element_by_tag_name("a").text

                            file_url = inner_unit.find_element_by_tag_name("a").get_attribute("href")

                            heading = inner_unit.find_element_by_tag_name("h3").text

                            file_dict = {"course": course, "heading": heading, "description": ""}

                            text_file, delete_file_dict = self.download_file_or_doc(
                                unit_name, file_url, inner_unit, path, file_dict, text_file
                            )
                            # if file already exists, delete file_dict, else
                            # append it to file_list
                            if delete_file_dict:
                                del file_dict
                            else:
                                self.file_list.append(file_dict)

                        # if icon is a document
                        elif "document_on" in img.get_attribute("src"):
                            if self.check_tag_exists_in_web_element(inner_unit, "a"):
                                for download_file in inner_unit.find_elements_by_tag_name("a"):

                                    unit_name = download_file.text
                                    file_url = download_file.get_attribute("href")
                                    heading = inner_unit.find_element_by_tag_name("h3").text
                                    description = ""

                                    for paragraph in inner_unit.find_elements_by_tag_name("p"):
                                        description += paragraph.text + "\n"

                                    file_dict = {"course": course, "heading": heading, "description": description}

                                    text_file, delete_file_dict = self.download_file_or_doc(
                                        unit_name, file_url, download_file, path, file_dict, text_file
                                    )

                                    # if file already exists, delete
                                    # file_dict, else append it to
                                    # file_list
                                    if delete_file_dict:
                                        del file_dict
                                    else:
                                        self.file_list.append(file_dict)
            # download descriptions text describing all headers and descriptions
            # of each file in a folder if the descriptions text is not empty
            if text_file != "":
                self.download_text("Chalk context for files", text_file, path)

        self.browser.execute_script("window.history.go(-1)")

        return None
示例#3
0
    def build_course_dict(self, course_info, material_dict, prof_list, course, course_list):
        """Crawls a course in Chalk to download course materials into the correct
        path in the local directory, and to compile a list of dictionaries with 
        information of each file"""

        # Click course link on Chalk home page
        self.browser.find_element_by_partial_link_text(course).click()

        for item_index in range(
            len(self.browser.find_element_by_id("courseMenuPalette_contents").find_elements_by_tag_name("li"))
        ):
            # For each item on the left panel (i.e. Announcements, Syllabus...)
            item = self.browser.find_element_by_id("courseMenuPalette_contents").find_elements_by_tag_name("li")[
                item_index
            ]
            item_name = item.text

            if item_name == "Announcements":
                material_dict[item_name] = {}
                # Generate item_name folder
                make_dirs(self.course_material_dict, self.default_folder)

                item.find_element_by_tag_name("a").click()

                if self.check_id_exists("content_listContainer"):
                    content_list_container = self.browser.find_element_by_id("content_listContainer")
                    announcement_text = ""

                    # Adds text of each icon on to announcement_text
                    for unit in content_list_container.find_elements_by_tag_name("li"):
                        announcement_text += unit.text + "\n\n"

                else:  # if no container exists
                    content = self.browser.find_element_by_id("content")
                    # if announcements is a list of text
                    if self.check_id_exists("announcementList"):
                        announcement_text = content.find_element_by_id("announcementList").text

                    else:
                        announcement_text = ""  # No announcements

                if announcement_text != "":

                    self.download_text(
                        "Announcements", announcement_text, "{:}/Announcements".format(str(check_folder_name(course)))
                    )

            elif item_name == "Send Email":
                list_of_tas = []
                list_of_students = []
                item.find_element_by_tag_name("a").click()

                self.browser.find_element_by_link_text("All Teaching Assistant Users").click()

                # If TA's present and available
                if not self.check_id_exists("inlineReceipt_bad"):
                    list_of_tas = (
                        self.browser.find_element_by_id("stepcontent1")
                        .find_elements_by_tag_name("li")[0]
                        .text[3:]
                        .split("; ")
                    )

                course_list.append(list_of_tas)
                # Navigate browser back one page
                self.browser.execute_script("window.history.go(-1)")

                if self.check_link_text_exists("Select Users"):
                    self.browser.find_element_by_link_text("Select Users").click()

                    list_of_students_web_elements = (
                        self.browser.find_element_by_id("stepcontent1")
                        .find_element_by_name("USERS_AVAIL")
                        .find_elements_by_tag_name("option")
                    )

                    compare_profs = []
                    for professor in prof_list:
                        prof_str = professor.split(" ")[1] + ", " + professor.split(" ")[0]
                        compare_profs.append(prof_str)

                    for student_web_element in list_of_students_web_elements:
                        # excluding profs and TA's from list of students
                        if (
                            student_web_element.text not in compare_profs
                            and student_web_element.text not in list_of_tas
                            and "PreviewUser" not in student_web_element.text
                        ):

                            list_of_students.append(student_web_element.text)
                    # Navigate browser back one page
                    self.browser.execute_script("window.history.go(-1)")

                course_list.append(list_of_students)
                self.course_info.append(course_list)

            elif item_name not in [
                "Home",
                "Announcements",
                "Send Email",
                "My Grades",
                "Discussion Board",
                "Discussions",
                "Library Course Reserves",
                "Tools",
                "Groups",
                "Calendar",
            ]:

                component = check_folder_name(item_name)
                material_dict[component] = {}
                folder_empty = True
                # Generates item_name folder in folder path
                make_dirs(self.course_material_dict, self.default_folder)
                item.find_element_by_tag_name("a").click()

                if self.check_xpath_exists('//*div[@class = "noItems' 'container-empty"]'):
                    continue

                elif self.check_id_exists("content_listContainer"):
                    num_of_items = len(
                        self.browser.find_element_by_id("content_listContainer").find_elements_by_tag_name("li")
                    )

                    text_file = ""

                    for unit_index in range(num_of_items):
                        time.sleep(1)  # Wait for element to be found

                        # each unit on the content panel
                        unit = self.browser.find_element_by_id("content_listContainer").find_elements_by_tag_name("li")[
                            unit_index
                        ]

                        if self.check_tag_exists_in_web_element(unit, "img"):

                            img = unit.find_element_by_tag_name("img")
                            if img.get_attribute("class") == "item_icon":
                                # if icon is a folder
                                if "folder_on" in img.get_attribute("src"):
                                    folder_empty = False
                                    folder_name = check_folder_name(unit.find_element_by_tag_name("a").text)

                                    material_dict[component][folder_name] = {}
                                    # Generate new folder
                                    make_dirs(self.course_material_dict, self.default_folder)

                                    self.gen_folder(
                                        unit,
                                        "{:}/{:}/{:}".format(check_folder_name(course), component, folder_name),
                                        material_dict[component][folder_name],
                                        course,
                                    )

                                # if icon is a file
                                elif "file_on" in img.get_attribute("src"):
                                    folder_empty = False
                                    unit_name = unit.find_element_by_tag_name("a").text

                                    file_url = unit.find_element_by_tag_name("a").get_attribute("href")

                                    heading = unit.find_element_by_tag_name("h3").text

                                    file_dict = {"course": course, "heading": heading, "description": ""}

                                    text_file, delete_file_dict = self.download_file_or_doc(
                                        unit_name,
                                        file_url,
                                        unit,
                                        check_folder_name(course) + "/" + component,
                                        file_dict,
                                        text_file,
                                    )

                                    # if file already exists, delete
                                    # file_dict, else append it to file_list
                                    if delete_file_dict:
                                        del file_dict
                                    else:
                                        self.file_list.append(file_dict)

                                # if icon is a document with download links
                                elif "document_on" in img.get_attribute("src"):
                                    folder_empty = False
                                    if self.check_tag_exists_in_web_element(unit, "a"):  # if download links present
                                        for download_file in unit.find_elements_by_tag_name("a"):

                                            unit_name = download_file.text

                                            file_url = download_file.get_attribute("href")

                                            heading = unit.find_element_by_tag_name("h3").text

                                            description = ""
                                            for paragraph in unit.find_elements_by_tag_name("p"):

                                                description += paragraph.text + "\n"

                                            file_dict = {
                                                "course": course,
                                                "heading": heading,
                                                "description": description,
                                            }

                                            text_file, delete_file_dict = self.download_file_or_doc(
                                                unit_name,
                                                file_url,
                                                download_file,
                                                check_folder_name(course) + "/" + component,
                                                file_dict,
                                                text_file,
                                            )

                                            # if file already exists, delete
                                            # file_dict, else append it to
                                            # file_list
                                            if delete_file_dict:
                                                del file_dict
                                            else:
                                                self.file_list.append(file_dict)

                    # downloads text for describing each icon
                    if text_file != "":
                        self.download_text(
                            "Chalk context for files",
                            text_file,
                            "{:}/{:}/".format(str(check_folder_name(course)), str(check_folder_name(item_name))),
                        )

                # deletes folder if empty
                if folder_empty:
                    del material_dict[component]

        make_dirs(self.course_material_dict, self.default_folder)

        # Go back to Chalk Home Page
        self.browser.find_element_by_id("My Chalk").find_element_by_tag_name("a").click()

        return material_dict