def download_url_report(self, report_url, temp_folder_name): """ Downloads a MealTime report. Args: report_url (string): Information pertaining to the path and query string for the report whose access is desired. Any filtering that can be done with a stateful URL should be included. temp_folder_name (string): The name of the folder in which this specific report's download files should be stored. Returns: A Pandas DataFrame of the report contents. """ csv_download_folder_path = self.temp_folder_path + '/' + temp_folder_name # set up the driver for execution self.driver = DriverBuilder().get_driver(csv_download_folder_path, self.headless) #self.driver = configure_selenium_chrome(csv_download_folder_path) self._login() # get the report url self.driver.get(interpret_report_url(self.base_url, report_url)) # select the download format (csv) and execute export_format_select = Select(self.driver.find_element_by_id('ctl00_ctl00_MainContent_reportViewer_ctl01_ctl05_ctl00')) try: export_format_select.select_by_value('CSV') dl_type = 'csv' except NoSuchElementException: export_format_select.select_by_value('EXCELNoHeader') dl_type = 'xls' self.driver.find_element_by_id('ctl00_ctl00_MainContent_reportViewer_ctl01_ctl05_ctl01').click() # wait until file has downloaded to close the browser. We can do this # because we delete the file before we return it, so the temp dir should # always be empty when this command is run # TODO add a try/except block here wait_for_any_file_in_folder(csv_download_folder_path, dl_type) # remove the header rows #xlrd.open_workbook(utils.get_most_recent_file_in_dir(csv_download_folder_path), formatting_info=False) if dl_type == 'csv': report_df = pd.read_csv(get_most_recent_file_in_dir(csv_download_folder_path), header=2) else: report_df = pd.read_excel(get_most_recent_file_in_dir(csv_download_folder_path), header=3) # delete any files in the mealtime temp folder; we don't need them now # TODO: move this out of this function. It should happen as cleanup once # the whole DAG has completed delete_folder_contents(csv_download_folder_path) self.driver.close() # if the dataframe is empty (the report had no data), raise an error if report_df.shape[0] == 0: raise ValueError('No data in report for user {} at url: {}'.format(self.username, interpret_report_url(self.base_url, report_url))) return report_df
def __request_district_export(self, report_type, period_start_date=None, period_end_date=None, write_to_disk=None): """ Logs into Lexia and submits the request to generate a district export :param report_type: The text from one of 'Report type' options listed in the myLexia 'District Exports' modal. :param period_start_date: The start date for the report request (unsure if this actually affects the data returned if it is different from the school year start date set for your Lexia instance) :param period_end_date: The end date for the report request (unsure if this actually affects the data returned if it is different from the day on which the request is made) :param write_to_disk: The path to save the CSV to. :return: Boolean. Whether or not the export request was successful. """ if write_to_disk: csv_download_folder_path = write_to_disk else: csv_download_folder_path = self.temp_folder_path self.driver = DriverBuilder().get_driver(csv_download_folder_path, self.headless) self._login() # use requests to post the download request with requests.Session() as s: for cookie in self.driver.get_cookies(): s.cookies.set(cookie['name'], cookie['value']) payload = { "districtID": self.district_id, "type": report_type, "email": self.district_export_email_address, "startDate": period_start_date.strftime("%Y-%m-%d"), "endDate": period_end_date.strftime("%Y-%m-%d") } self.log.info('{}: Export request payload: {}'.format( self.district_id, payload)) download_response = s.put(self.base_url + '/exportData/progress', data=payload) if download_response.ok: self.log.info( '{}: Export request for {} succeeded for user: {}'.format( self.district_id, report_type, self.username)) j_data = json.loads(download_response.content.decode()) self.log.info(j_data) return True else: self.log.info( '{}: Export request for {} FAILED for user: {}'.format( self.district_id, report_type, self.username)) self.log.info(download_response.content) return False
def download_url_report(self, report_url, write_to_disk=None, **kwargs): """ Downloads a Summit Learning report at a URL that triggers a CSV download Args: report_url (string): Information pertaining to the path and query string for the report whose access is desired. Any filtering that can be done with a stateful URL should be included. write_to_disk (string): The path for a directory to store the downloaded file. If nothing is provided, the file will be stored in a temporary directory and deleted at the end of this function. **kwargs: additional arguments to pass to Pandas read_excel or read_csv (depending on the report_url) Returns: A Pandas DataFrame of the report contents. """ report_download_url = interpret_report_url(self.base_url, report_url) if write_to_disk: csv_download_folder_path = write_to_disk else: csv_download_folder_path = mkdtemp() self.driver = DriverBuilder().get_driver(csv_download_folder_path, self.headless) self._login() self.log.debug( 'Getting report page at: {}'.format(report_download_url)) self.driver.get(report_download_url) self.log.debug('Starting download of: '.format(report_download_url)) wait_for_any_file_in_folder(csv_download_folder_path, "csv") self.log.debug('Download Finished.') df_report = pd.read_csv( get_most_recent_file_in_dir(csv_download_folder_path), **kwargs) # if the dataframe is empty (the report had no data), raise an error if df_report.shape[0] == 0: raise NoDataError( 'No data in report for user {} at url: {}'.format( self.username, interpret_report_url(self.base_url, report_url))) self.driver.close() if not write_to_disk: shutil.rmtree(csv_download_folder_path) return df_report
def test_set_dl_academic_year_invalid_year(self): year = CONFIG['SummitLearning'][ 'test_set_dl_academic_year_invalid_year__academic_year'] self.sl.driver = DriverBuilder().get_driver( headless=CONFIG.getboolean('SummitLearning', 'headless')) self.sl._login() dl_page_url = "{base_url}/sites/{site_id}/data_downloads/".format( base_url=self.sl.base_url, site_id=CONFIG['SummitLearning']['site_id']) self.sl.driver.get(dl_page_url) self.assertRaises(NoSuchElementException, self.sl._set_dl_academic_year, year)
def __navigate_to_custom_report(self, report_name, school_year, download_folder_path=None): """Navigate to the page of the custom report tool that has the custom report on it""" if not download_folder_path: download_folder_path = self.temp_folder_path self.driver = DriverBuilder().get_driver( download_location=download_folder_path, headless=self.headless) self._login() self._set_year(school_year, self.driver) # get the custom reports page custom_reports_url = 'report/customReports' self.driver.get(interpret_report_url(self.base_url, custom_reports_url)) self.__remove_walk_me_and_support() # wait for the page to load and get the maximum number of pages total_num_pages_xpath = '//*[@id="content"]//*[@class="pagination "]/li[@data-page][last()]' elem = WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located((By.XPATH, total_num_pages_xpath))) num_pages = int(elem.get_attribute("data-page")) + 1 current_page = 0 while current_page < num_pages: report_name_xpath = "//tr[td//text()[contains(., '{}')]]".format( report_name) try: elem = self.driver.find_element_by_xpath(report_name_xpath) return current_page except NoSuchElementException: current_page += 1 if current_page < num_pages: next_page_xpath = '//*[@id="content"]//*[@class="pagination "]/li[@data-page={}]/a'.format( current_page) self.driver.find_element_by_xpath(next_page_xpath).click() # scroll back to the top of the page, prevents selenium clicking errors self.driver.execute_script("window.scrollTo(0, 0);") raise ReportNotFound
def test_set_dl_academic_year(self): year = CONFIG['SummitLearning'][ 'test_set_dl_academic_year__academic_year'] self.sl.driver = DriverBuilder().get_driver( headless=CONFIG.getboolean('SummitLearning', 'headless')) self.sl._login() dl_page_url = "{base_url}/sites/{site_id}/data_downloads/".format( base_url=self.sl.base_url, site_id=CONFIG['SummitLearning']['site_id']) self.sl.driver.get(dl_page_url) result = self.sl._set_dl_academic_year(academic_year=year) self.assertTrue(result) self.assertTrue(self.sl.check_dl_academic_year(academic_year=year))
def download_url_report(self, report_url, write_to_disk=None, **kwargs): """ Downloads a Lexia report at a URL for a page with an 'export' button. Args: report_url (string): Information pertaining to the path and query string for the report whose access is desired. Any filtering that can be done with a stateful URL should be included. write_to_disk (string): The path for a directory to store the downloaded file. If nothing is provided, the file will be stored in a temporary directory and deleted at the end of this function. **kwargs: additional arguments to pass to Pandas read_excel or read_csv (depending on the report_url) Returns: A Pandas DataFrame of the report contents. """ report_download_url = interpret_report_url(self.base_url, report_url) # if user is trying to download a manage tab report (for convenience) if '/mylexiaweb/app/index.html#/groups/' in report_download_url: return self.download_manage_tab_report(report_url, write_to_disk, **kwargs) if write_to_disk: csv_download_folder_path = write_to_disk else: csv_download_folder_path = mkdtemp() self.driver = DriverBuilder().get_driver(csv_download_folder_path, self.headless) self._login() self.log.debug( 'Getting report page at: {}'.format(report_download_url)) self.driver.get(report_download_url) # find and click the download button elem = WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located( (By.XPATH, "//button[contains(text(), 'Export')]"))) self.log.debug('Starting download of: '.format(report_download_url)) elem.click() wait_for_any_file_in_folder(csv_download_folder_path, "xlsx") self.log.debug('Downloada Finished.') df_report = pd.read_excel( get_most_recent_file_in_dir(csv_download_folder_path), **kwargs) # if the dataframe is empty (the report had no data), raise an error if df_report.shape[0] == 0: raise ValueError('No data in report for user {} at url: {}'.format( self.username, interpret_report_url(self.base_url, report_url))) self.driver.close() if not write_to_disk: shutil.rmtree(csv_download_folder_path) return df_report
def download_manage_tab_report(self, report_url, write_to_disk=None, **kwargs): """ Downloads a Lexia report from the 'Manage' tab. Args: report_url (string): Information pertaining to the path and query string for the report whose access is desired. Any filtering that can be done with a stateful URL should be included. write_to_disk (string): The path for a directory to store the downloaded file. If nothing is provided, the file will be stored in a temporary directory and deleted at the end of this function. **kwargs: additional arguments to pass to Pandas read_csv Returns: A Pandas DataFrame of the report contents. """ if write_to_disk: csv_download_folder_path = write_to_disk else: csv_download_folder_path = mkdtemp() self.driver = DriverBuilder().get_driver(csv_download_folder_path, self.headless) self._login() report_download_url = interpret_report_url(self.base_url, report_url) self.log.debug( 'Getting report page at: {}'.format(report_download_url)) self.driver.get(report_download_url) # select all users and find the download button def check_for_export_button_enabled(driver, elem_select_all_locator, elem_export_locator): elem_select_all = driver.find_element(*elem_select_all_locator) if not elem_select_all.is_enabled(): return False elem_select_all.click() if not elem_select_all.is_selected(): return False elem_export = driver.find_element(*elem_export_locator) if elem_export.is_enabled() and elem_export.is_displayed(): return elem_export else: return False # have to use a lambda because until expects a callable elem_export = WebDriverWait( self.driver, self.wait_time).until(lambda x: check_for_export_button_enabled( self.driver, (By.NAME, "lexia-select-all"), (By.XPATH, "//button[contains(text(), 'Export')]"))) self.log.debug('Starting download of: '.format(report_download_url)) elem_export.click() wait_for_any_file_in_folder(csv_download_folder_path, "xls") self.log.debug('Download Finished.') df_report = pd.read_csv( get_most_recent_file_in_dir(csv_download_folder_path), sep='\t', **kwargs) # if the dataframe is empty (the report had no data), raise an error if df_report.shape[0] == 0: raise ValueError('No data in report for user {} at url: {}'.format( self.username, interpret_report_url(self.base_url, report_url))) self.driver.close() if not write_to_disk: shutil.rmtree(csv_download_folder_path) return df_report
def download_url_report(self, report_url, school_year, temp_folder_name=None, pandas_read_csv_kwargs={}): """ Downloads a SchoolMint data-stream-table report. Args: report_url (string): Information pertaining to the path and query string for the report whose access is desired. Any filtering that can be done with a stateful URL should be included. school_year (string): The SchoolMint school year to download from (e.g. '2018-2019') temp_folder_name (string): The name for a sub-directory in which the files from the browser will be temporarily stored. If this directory does not exist, it will be created. NOTE: This sub-directory will be pandas_read_csv_kwargs: additional arguments to pass to Pandas read_csv Returns: A Pandas DataFrame of the report contents. """ if temp_folder_name: csv_download_folder_path = self.temp_folder_path + '/' + temp_folder_name else: csv_download_folder_path = mkdtemp(dir=self.temp_folder_path) # set up the driver for execution self.driver = DriverBuilder().get_driver(csv_download_folder_path, self.headless) self._login() self._set_year(school_year, self.driver) # get the report url self.driver.get(interpret_report_url(self.base_url, report_url)) self.__remove_walk_me_and_support() # wait until we have rows in the stream data table before starting to # look for results elem = WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located( (By.XPATH, "//*[@id='stream-table']/tbody/tr[1]/td[1]"))) if not self.check_school_year(school_year): raise ReportNotFound( "Wrong school detected prior to clicking generate.") self.log.debug('Waiting for report-data-summary to load') # wait until the stream table is fully loaded before downloading prev_data_summary_elem = self.driver.find_element_by_id( 'report-data-summary').text # print(prev_data_summary_elem) time.sleep(1) # we use the following count as a proxy for time elapsed, so we can # use the class's wait_time as the number of retries count = 0 while True: # check id=report-data-summary report_data_summary_elem = self.driver.find_element_by_id( 'report-data-summary').text # if it matches, wait a little longer and double deck that it hasn't changed if prev_data_summary_elem == report_data_summary_elem: time.sleep(3) count += 3 report_data_summary_elem = self.driver.find_element_by_id( 'report-data-summary').text if prev_data_summary_elem == report_data_summary_elem: break prev_data_summary_elem = report_data_summary_elem time.sleep(1) count += 1 if count >= self.wait_time: raise TimeoutError( 'SchoolMint Report Data never did not fully load within %d' % self.wait_time) # click the button to download the report self.log.debug('Starting download...') elem = self.driver.find_element_by_class_name("export-table") elem.click() # wait until file has downloaded to close the browser. We can do this # because we delete the file before we return it, so the temp dir should # always be empty when this command is run wait_for_any_file_in_folder(csv_download_folder_path, "csv") self.log.debug('Download finished.') report_df = pd.read_csv( get_most_recent_file_in_dir(csv_download_folder_path), encoding=SCHOOLMINT_DEFAULT_EXPORT_ENCODING, **pandas_read_csv_kwargs) # TODO: move this out of this function. It should happen as cleanup once # the whole DAG has completed #delete_folder_contents(csv_download_folder_path) shutil.rmtree(csv_download_folder_path) # close the driver for this task self.driver.close() # if the dataframe is empty (the report had no data), raise an error if report_df.shape[0] == 0: #delete_folder_contents(csv_download_folder_path) shutil.rmtree(csv_download_folder_path) raise ValueError('No data in report for user {} at url: {}'.format( self.username, interpret_report_url(self.base_url, report_url))) return report_df
def test_login(self): self.sl.driver = DriverBuilder().get_driver() self.sl._login() self.sl.driver.close()
def test_login(self): self.sl.driver = DriverBuilder().get_driver( headless=CONFIG.getboolean('SummitLearning', 'headless')) self.sl._login() self.sl.driver.close()
def download_site_data_download( self, dl_heading, site_id, academic_year, report_generation_wait=REPORT_GENERATION_WAIT, write_to_disk=None, **kwargs): if write_to_disk: csv_download_folder_path = write_to_disk else: csv_download_folder_path = mkdtemp() self.driver = DriverBuilder().get_driver(csv_download_folder_path, self.headless) self._login() dl_page_url = "{base_url}/sites/{site_id}/data_downloads/".format( base_url=self.base_url, site_id=site_id) self.driver.get(dl_page_url) self._set_dl_academic_year(academic_year) if not self.check_dl_academic_year(academic_year): raise ValueError("Academic Year not correctly set") # start the CSV generation process download_button_xpath = "//h3[contains(text(), '{dl_heading}')]/parent::div/parent::div//a[contains(text(), '{button_text}')]" # try to find the "Download CSV" button - old version of the interface old_interface = False try: elem = self.driver.find_element_by_xpath( download_button_xpath.format(dl_heading=dl_heading, button_text='Download CSV')) old_interface = True self.log.info("'Download CSV' interface detected.") elem.click() # if it's not there, it may have changed to a "Refresh" button except NoSuchElementException as e: pass # try to find the "Generate CSV" button - new version of the interface if not old_interface: gen_button_xpath = "//h3[contains(text(), '{dl_heading}')]/parent::div/parent::div//button[contains(text(), '{button_text}')]" try: elem = self.driver.find_element_by_xpath( gen_button_xpath.format(dl_heading=dl_heading, button_text='Generate CSV')) self.log.info("'Generate CSV' interface detected.") elem.click() # if it's not there, it may have changed to a "Refresh" button except NoSuchElementException as e: try: elem = self.driver.find_element_by_xpath( gen_button_xpath.format(dl_heading=dl_heading, button_text='Download')) except NoSuchElementException as e: elem = self.driver.find_element_by_xpath( gen_button_xpath.format(dl_heading=dl_heading, button_text='Refresh')) elem.click() # wait for the refresh command to be issued time.sleep(1) # wait for the report to be available and download it self.log.info( 'Starting download of report "{}" for site_id "{}"'.format( dl_heading, site_id)) dl_button_xpath = "//h3[contains(text(), '{dl_heading}')]/parent::div/parent::div//a[contains(text(), 'Download')]" try: elem = WebDriverWait(self.driver, report_generation_wait).until( EC.presence_of_element_located( (By.XPATH, dl_button_xpath.format(dl_heading=dl_heading)))) elem.click() # if the download is not ready, refresh the page and try one more time except TimeoutException: self.driver.refresh() elem = WebDriverWait(self.driver, report_generation_wait).until( EC.presence_of_element_located( (By.XPATH, dl_button_xpath.format(dl_heading=dl_heading)))) elem.click() wait_for_any_file_in_folder(csv_download_folder_path, "csv") self.log.debug('Download Finished.') df_report = pd.read_csv( get_most_recent_file_in_dir(csv_download_folder_path), **kwargs) # if the dataframe is empty (the report had no data), raise an error if df_report.shape[0] == 0: raise NoDataError('No data in report "{}" for site_id "{}"'.format( dl_heading, site_id)) self.driver.close() if not write_to_disk: shutil.rmtree(csv_download_folder_path) return df_report
def download_data_shared_with_application(self, application_page_url, collection, write_to_disk=None, **kwargs): """ Downloads the students shared with a particular application through Clever. :param application_page_url: The url for the main Clever management page for a particular application. For example, for My Lexia, this would be https://schools.clever.com/applications/lexia-mylexia :param collection: A string of 'schools', 'students', 'sections', 'teachers', 'schooladmins' that indicates which shared data to download :param write_to_disk: A path to a directory where the downloaded CSV should be saved. If nothing is passed, it will not be saved and only a Pandas DataFrame will be returned. :param kwargs: Additional keyword arguments to be passed to the Pandas read_csv function. :return: A Pandas DataFrame of the indicated collection download. """ collection = collection.lower().replace(' ', '') if collection not in ['schools', 'students', 'sections', 'teachers', 'schooladmins']: raise ReportNotFound( ( "Argument for collection '{collection}' is not a valid. Please choose from: " "'schools', 'students', 'sections', 'teachers', 'schooladmins'." ).format(collection=collection) ) report_access_page_url = interpret_report_url(self.base_url, application_page_url) if write_to_disk: csv_download_folder_path = write_to_disk else: csv_download_folder_path = mkdtemp() self.driver = DriverBuilder().get_driver(csv_download_folder_path, self.headless) self._login() self.log.debug('Getting report access page at: {}'.format(report_access_page_url)) self.driver.get(report_access_page_url) # find and click the download button based on the collection desired elem = WebDriverWait(self.driver, self.wait_time).until( EC.presence_of_element_located( (By.XPATH, "//a[contains(@href, '{collection}.csv')]".format(collection=collection)) ) ) self.log.info('Starting download of: {} - {}'.format(report_access_page_url, collection)) elem.click() wait_for_any_file_in_folder(csv_download_folder_path, "csv") self.log.info('Download Finished.') df_report = pd.read_csv(get_most_recent_file_in_dir(csv_download_folder_path), **kwargs) # if the dataframe is empty (the report had no data), raise an error if df_report.shape[0] == 0 and collection != 'schooladmins': raise ValueError('No data in report for user {} at url: {}'.format( self.username, interpret_report_url(self.base_url, application_page_url))) elif df_report.shape[0] == 0: warnings.warn("The 'schooladmins' collection has no data. Ensure that no school admins are shared.") self.driver.close() if not write_to_disk: shutil.rmtree(csv_download_folder_path) return df_report