def upload_csv_to_report_store(rows, csv_name, course_id, timestamp): """ Upload data as a CSV using ReportStore. Arguments: rows: CSV data in the following format (first column may be a header): [ [row1_colum1, row1_colum2, ...], ... ] csv_name: Name of the resulting CSV course_id: ID of the course """ report_store = ReportStore.from_config() report_store.store_rows( course_id, u"{course_prefix}_{csv_name}_{timestamp_str}.csv".format( course_prefix=course_filename_prefix_generator(course_id), csv_name=csv_name, timestamp_str=timestamp.strftime("%Y-%m-%d-%H%M") ), rows ) tracker.emit(REPORT_REQUESTED_EVENT_NAME, {"report_type": csv_name, })
def upload_csv_to_report_store(rows, csv_name, course_id, timestamp, config_name='GRADES_DOWNLOAD'): """ Upload data as a CSV using ReportStore. Arguments: rows: CSV data in the following format (first column may be a header): [ [row1_colum1, row1_colum2, ...], ... ] csv_name: Name of the resulting CSV course_id: ID of the course """ report_store = ReportStore.from_config(config_name) report_store.store_rows( course_id, u"{course_prefix}_{csv_name}_{timestamp_str}.csv".format( course_prefix=course_filename_prefix_generator(course_id), csv_name=csv_name, timestamp_str=timestamp.strftime("%Y-%m-%d-%H%M") ), rows ) tracker.emit(REPORT_REQUESTED_EVENT_NAME, {"report_type": csv_name, })
def push_student_responses_to_s3(_xmodule_instance_args, _entry_id, course_id, _task_input, action_name): """ For a given `course_id`, generate a responses CSV file for students that have submitted problem responses, and store using a `ReportStore`. Once created, the files can be accessed by instantiating another `ReportStore` (via `ReportStore.from_config()`) and calling `link_for()` on it. Writes are buffered, so we'll never write part of a CSV file to S3 -- i.e. any files that are visible in ReportStore will be complete ones. """ start_time = datetime.now(UTC) try: course = get_course_by_id(course_id) except ValueError as e: TASK_LOG.error(e.message) return "failed" rows = student_response_rows(course) # Generate parts of the file name timestamp_str = start_time.strftime("%Y-%m-%d-%H%M") course_id_prefix = urllib.quote(course_id.to_deprecated_string().replace("/", "_")) # Perform the actual upload report_store = ReportStore.from_config() report_store.store_rows( course_id, u"{}_responses_report_{}.csv".format(course_id_prefix, timestamp_str), rows ) return "succeeded"
def test_grading_failure(self, mock_iterate_grades_for, _mock_current_task): """ Test that any grading errors are properly reported in the progress dict and uploaded to the report store. """ # mock an error response from `iterate_grades_for` student = self.create_student(u'username', u'*****@*****.**') error_message = u'Cannöt grade student' mock_iterate_grades_for.return_value = [(student, {}, error_message)] result = upload_problem_grade_report(None, None, self.course.id, None, 'graded') self.assertDictContainsSubset( { 'attempted': 1, 'succeeded': 0, 'failed': 1 }, result) report_store = ReportStore.from_config() self.assertTrue( any('grade_report_err' in item[0] for item in report_store.links_for(self.course.id))) self.verify_rows_in_csv([{ u'Student ID': unicode(student.id), u'Email': student.email, u'Username': student.username, u'error_msg': error_message }])
def verify_rows_in_csv(self, expected_rows, file_index=0, verify_order=True, ignore_other_columns=False): """ Verify that the last ReportStore CSV contains the expected content. Arguments: expected_rows (iterable): An iterable of dictionaries, where each dict represents a row of data in the last ReportStore CSV. Each dict maps keys from the CSV header to values in that row's corresponding cell. file_index (int): Describes which report store file to open. Files are ordered by last modified date, and 0 corresponds to the most recently modified file. verify_order (boolean): When True, we verify that both the content and order of `expected_rows` matches the actual csv rows. When False (default), we only verify that the content matches. ignore_other_columns (boolean): When True, we verify that `expected_rows` contain data which is the subset of actual csv rows. """ report_store = ReportStore.from_config(config_name='GRADES_DOWNLOAD') report_csv_filename = report_store.links_for(self.course.id)[file_index][0] report_path = report_store.path_to(self.course.id, report_csv_filename) with report_store.storage.open(report_path) as csv_file: # Expand the dict reader generator so we don't lose it's content csv_rows = [row for row in unicodecsv.DictReader(csv_file)] if ignore_other_columns: csv_rows = [ {key: row.get(key) for key in expected_rows[index].keys()} for index, row in enumerate(csv_rows) ] if verify_order: self.assertEqual(csv_rows, expected_rows) else: self.assertItemsEqual(csv_rows, expected_rows)
def test_grading_failure(self, error_message, mock_iterate_grades_for, _mock_current_task): """ Test that any grading errors are properly reported in the progress dict and uploaded to the report store. """ # mock an error response from `iterate_grades_for` student = self.create_student(u'username', u'*****@*****.**') mock_iterate_grades_for.return_value = [ (student, {}, error_message) ] result = upload_problem_grade_report(None, None, self.course.id, None, 'graded') self.assertDictContainsSubset( {'attempted': 1, 'succeeded': 0, 'failed': 1}, result) report_store = ReportStore.from_config(config_name='GRADES_DOWNLOAD') self.assertTrue(any('grade_report_err' in item[0] for item in report_store.links_for(self.course.id))) self.verify_rows_in_csv([ { u'Student ID': unicode(student.id), u'Email': student.email, u'Username': student.username, u'error_msg': error_message if error_message else "Unknown error" } ])
def verify_rows_in_csv(self, expected_rows, file_index=0, verify_order=True, ignore_other_columns=False): """ Verify that the last ReportStore CSV contains the expected content. Arguments: expected_rows (iterable): An iterable of dictionaries, where each dict represents a row of data in the last ReportStore CSV. Each dict maps keys from the CSV header to values in that row's corresponding cell. file_index (int): Describes which report store file to open. Files are ordered by last modified date, and 0 corresponds to the most recently modified file. verify_order (boolean): When True, we verify that both the content and order of `expected_rows` matches the actual csv rows. When False (default), we only verify that the content matches. ignore_other_columns (boolean): When True, we verify that `expected_rows` contain data which is the subset of actual csv rows. """ report_store = ReportStore.from_config(config_name="GRADES_DOWNLOAD") report_csv_filename = report_store.links_for(self.course.id)[file_index][0] report_path = report_store.path_to(self.course.id, report_csv_filename) with report_store.storage.open(report_path) as csv_file: # Expand the dict reader generator so we don't lose it's content csv_rows = [row for row in unicodecsv.DictReader(csv_file)] if ignore_other_columns: csv_rows = [ {key: row.get(key) for key in expected_rows[index].keys()} for index, row in enumerate(csv_rows) ] if verify_order: self.assertEqual(csv_rows, expected_rows) else: self.assertItemsEqual(csv_rows, expected_rows)
def create_report_store(self): """ Create and return a DjangoStorageReportStore using the old S3ReportStore configuration. """ connection = boto.connect_s3() connection.create_bucket(settings.GRADES_DOWNLOAD['BUCKET']) return ReportStore.from_config(config_name='GRADES_DOWNLOAD')
def create_report_store(self): """ Create and return a DjangoStorageReportStore configured to use the local filesystem for storage. """ test_settings = copy.deepcopy(settings.GRADES_DOWNLOAD) test_settings['STORAGE_KWARGS'] = {'location': settings.GRADES_DOWNLOAD['ROOT_PATH']} with override_settings(GRADES_DOWNLOAD=test_settings): return ReportStore.from_config(config_name='GRADES_DOWNLOAD')
def tearDown(self): report_store = ReportStore.from_config(config_name='GRADES_DOWNLOAD') try: reports_download_path = report_store.storage.path('') except NotImplementedError: pass # storage backend does not use the local filesystem else: if os.path.exists(reports_download_path): shutil.rmtree(reports_download_path)
def create_report_store(self): """ Create and return a DjangoStorageReportStore configured to use S3 for storage. """ connection = boto.connect_s3() connection.create_bucket( settings.GRADES_DOWNLOAD['STORAGE_KWARGS']['bucket']) return ReportStore.from_config(config_name='GRADES_DOWNLOAD')
def get_csv_row_with_headers(self): """ Helper function to return list with the column names from the CSV file (the first row) """ report_store = ReportStore.from_config(config_name='GRADES_DOWNLOAD') report_csv_filename = report_store.links_for(self.course.id)[0][0] with open(report_store.path_to(self.course.id, report_csv_filename)) as csv_file: rows = unicodecsv.reader(csv_file, encoding='utf-8') return rows.next()
def download_url_for_last_report(self): """ Get the URL for the last report, if any """ # Unfortunately this is a bit inefficient due to the ReportStore API if not self.last_export_result or self.last_export_result['error'] is not None: return None from instructor_task.models import ReportStore report_store = ReportStore.from_config(config_name='GRADES_DOWNLOAD') course_key = getattr(self.scope_ids.usage_id, 'course_key', None) return dict(report_store.links_for(course_key)).get(self.last_export_result['report_filename'])
def get_csv_row_with_headers(self): """ Helper function to return list with the column names from the CSV file (the first row) """ report_store = ReportStore.from_config(config_name='GRADES_DOWNLOAD') report_csv_filename = report_store.links_for(self.course.id)[0][0] report_path = report_store.path_to(self.course.id, report_csv_filename) with report_store.storage.open(report_path) as csv_file: rows = unicodecsv.reader(csv_file, encoding='utf-8') return rows.next()
def test_success(self): self.create_student('student', '*****@*****.**') task_input = {'features': []} with patch('instructor_task.tasks_helper._get_current_task'): result = upload_students_csv(None, None, self.course.id, task_input, 'calculated') report_store = ReportStore.from_config() links = report_store.links_for(self.course.id) self.assertEquals(len(links), 1) self.assertDictContainsSubset({'attempted': 1, 'succeeded': 1, 'failed': 0}, result)
def report_list(request, course, ccx, **kwargs): report_store = ReportStore.from_config('GRADES_DOWNLOAD') reports = report_store.links_for(str(ccx.ccx_course_id)) data = [] for report in reports: data.append({ 'filename': report[0], 'url': report[1], }) return HttpResponse(json.dumps(data), content_type='application/json')
def test_success(self): task_input = {'features': []} with patch('instructor_task.tasks_helper._get_current_task'): result = push_students_csv_to_s3(None, None, self.course.id, task_input, 'calculated') report_store = ReportStore.from_config() links = report_store.links_for(self.course.id) self.assertEquals(len(links), 1) self.assertEquals(result, UPDATE_STATUS_SUCCEEDED)
def export_data(course_id, source_block_id_str): """ Exports all answers to all questions by all students to a CSV file. """ start_timestamp = time.time() response = {} logger.debug("Beginning data export") try: course_key = CourseKey.from_string(course_id) block = modulestore().get_items( course_key, qualifiers={'name': source_block_id_str}, depth=0)[0] except IndexError: raise ValueError("Could not find the specified Block ID.") course_key_str = unicode(course_key) # Define the header row of our CSV: rows = [] header = [ "Course ID", "Block ID", "Student ID", "Quiz Title", "Final Result" ] for order in range(len(block.questions)): header.append("Question {}".format(order + 1)) header.append("Answer") rows.append(header) results = _extract_data(course_key_str, block) rows += results # Generate the CSV: try: from instructor_task.models import ReportStore filename = u"diagnostic-data-export-{}.csv".format( time.strftime("%Y-%m-%d-%H%M%S", time.gmtime(start_timestamp))) report_store = ReportStore.from_config(config_name='GRADES_DOWNLOAD') report_store.store_rows(course_key, filename, rows) generation_time_s = time.time() - start_timestamp logger.debug( "Done data export - took {} seconds".format(generation_time_s)) response = { "error": None, "report_filename": filename, "start_timestamp": start_timestamp, "generation_time_s": generation_time_s, "display_data": [] if len(rows) == 1 else rows } except Exception: pass return response
def _verify_cell_data_in_csv(self, username, column_header, expected_cell_content): """ Verify that the last ReportStore CSV contains the expected content. """ report_store = ReportStore.from_config(config_name='FINANCIAL_REPORTS') report_csv_filename = report_store.links_for(self.course.id)[0][0] with open(report_store.path_to(self.course.id, report_csv_filename)) as csv_file: # Expand the dict reader generator so we don't lose it's content for row in unicodecsv.DictReader(csv_file): if row.get('Username') == username: self.assertEqual(row[column_header], expected_cell_content)
def create_report_store(self): """ Create and return a DjangoStorageReportStore configured to use the local filesystem for storage. """ test_settings = copy.deepcopy(settings.GRADES_DOWNLOAD) test_settings['STORAGE_KWARGS'] = { 'location': settings.GRADES_DOWNLOAD['ROOT_PATH'] } with override_settings(GRADES_DOWNLOAD=test_settings): return ReportStore.from_config(config_name='GRADES_DOWNLOAD')
def download_url_for_last_report(self): """ Get the URL for the last report, if any """ # Unfortunately this is a bit inefficient due to the ReportStore API if not self.last_export_result or self.last_export_result['error'] is not None: return None try: from instructor_task.models import ReportStore report_store = ReportStore.from_config(config_name='GRADES_DOWNLOAD') course_key = getattr(self.scope_ids.usage_id, 'course_key', None) return dict(report_store.links_for(course_key)).get(self.last_export_result['report_filename']) except Exception: pass
def _verify_cell_data_for_user(self, username, course_id, column_header, expected_cell_content): """ Verify cell data in the grades CSV for a particular user. """ with patch('instructor_task.tasks_helper._get_current_task'): result = upload_grades_csv(None, None, course_id, None, 'graded') self.assertDictContainsSubset({'attempted': 2, 'succeeded': 2, 'failed': 0}, result) report_store = ReportStore.from_config() report_csv_filename = report_store.links_for(course_id)[0][0] with open(report_store.path_to(course_id, report_csv_filename)) as csv_file: for row in unicodecsv.DictReader(csv_file): if row.get('username') == username: self.assertEqual(row[column_header], expected_cell_content)
def list_report_downloads(_request, course_id): """ List grade CSV files that are available for download for this course. """ report_store = ReportStore.from_config() response_payload = { 'downloads': [ dict(name=name, url=url, link='<a href="{}">{}</a>'.format(url, name)) for name, url in report_store.links_for(course_id) ] } return JsonResponse(response_payload)
def _verify_csv_data(self, username, expected_data): """ Verify grade report data. """ with patch('instructor_task.tasks_helper._get_current_task'): upload_grades_csv(None, None, self.course.id, None, 'graded') report_store = ReportStore.from_config() report_csv_filename = report_store.links_for(self.course.id)[0][0] with open(report_store.path_to(self.course.id, report_csv_filename)) as csv_file: for row in unicodecsv.DictReader(csv_file): if row.get('username') == username: csv_row_data = [row[column] for column in self.columns_to_check] self.assertEqual(csv_row_data, expected_data)
def export_data(course_id, source_block_id_str): """ Exports all answers to all questions by all students to a CSV file. """ start_timestamp = time.time() response = {} logger.debug("Beginning data export") try: course_key = CourseKey.from_string(course_id) block = modulestore().get_items(course_key, qualifiers={'name': source_block_id_str}, depth=0)[0] except IndexError: raise ValueError("Could not find the specified Block ID.") course_key_str = unicode(course_key) # Define the header row of our CSV: rows = [] header = ["Course ID", "Block ID", "Student ID", "Quiz Title", "Final Result"] for order in range(len(block.questions)): header.append("Question {}".format(order + 1)) header.append("Answer") rows.append(header) results = _extract_data(course_key_str, block) rows += results # Generate the CSV: try: from instructor_task.models import ReportStore filename = u"diagnostic-data-export-{}.csv".format( time.strftime("%Y-%m-%d-%H%M%S", time.gmtime(start_timestamp))) report_store = ReportStore.from_config(config_name='GRADES_DOWNLOAD') report_store.store_rows(course_key, filename, rows) generation_time_s = time.time() - start_timestamp logger.debug("Done data export - took {} seconds".format(generation_time_s)) response = { "error": None, "report_filename": filename, "start_timestamp": start_timestamp, "generation_time_s": generation_time_s, "display_data": [] if len(rows) == 1 else rows } except Exception: pass return response
def test_grading_failure(self, mock_iterate_grades_for, _mock_current_task): """ Test that any grading errors are properly reported in the progress dict and uploaded to the report store. """ # mock an error response from `iterate_grades_for` mock_iterate_grades_for.return_value = [ (self.create_student('username', '*****@*****.**'), {}, 'Cannot grade student') ] result = upload_grades_csv(None, None, self.course.id, None, 'graded') self.assertDictContainsSubset({'attempted': 1, 'succeeded': 0, 'failed': 1}, result) report_store = ReportStore.from_config() self.assertTrue(any('grade_report_err' in item[0] for item in report_store.links_for(self.course.id)))
def test_financial_report_overrides(self): """ Test that CUSTOM_DOMAIN from FINANCIAL_REPORTS is used to construct file url. instead of domain defined via AWS_S3_CUSTOM_DOMAIN setting. """ with override_settings(FINANCIAL_REPORTS={ 'STORAGE_TYPE': 's3', 'BUCKET': 'edx-financial-reports', 'CUSTOM_DOMAIN': 'edx-financial-reports.s3.amazonaws.com', 'ROOT_PATH': 'production', }): report_store = ReportStore.from_config(config_name="FINANCIAL_REPORTS") # Make sure CUSTOM_DOMAIN from FINANCIAL_REPORTS is used to construct file url self.assertIn("edx-financial-reports.s3.amazonaws.com", report_store.storage.url(""))
def list_report_downloads(_request, course_id): """ List grade CSV files that are available for download for this course. """ course_id = SlashSeparatedCourseKey.from_deprecated_string(course_id) report_store = ReportStore.from_config() response_payload = { 'downloads': [ dict(name=name, url=url, link='<a href="{}">{}</a>'.format(url, name)) for name, url in report_store.links_for(course_id) ] } return JsonResponse(response_payload)
def create_report_store(self): """ Create and return a DjangoStorageReportStore configured to use S3 for storage. """ test_settings = copy.deepcopy(settings.GRADES_DOWNLOAD) test_settings['STORAGE_CLASS'] = 'openedx.core.storage.S3ReportStorage' test_settings['STORAGE_KWARGS'] = { 'bucket': settings.GRADES_DOWNLOAD['BUCKET'], 'location': settings.GRADES_DOWNLOAD['ROOT_PATH'], } with override_settings(GRADES_DOWNLOAD=test_settings): connection = boto.connect_s3() connection.create_bucket(settings.GRADES_DOWNLOAD['STORAGE_KWARGS']['bucket']) return ReportStore.from_config(config_name='GRADES_DOWNLOAD')
def _verify_cohort_data(self, course_id, expected_cohort_groups): """ Verify cohort data. """ cohort_groups_in_csv = [] with patch('instructor_task.tasks_helper._get_current_task'): result = upload_grades_csv(None, None, course_id, None, 'graded') self.assertDictContainsSubset({'attempted': 2, 'succeeded': 2, 'failed': 0}, result) report_store = ReportStore.from_config() report_csv_filename = report_store.links_for(course_id)[0][0] with open(report_store.path_to(course_id, report_csv_filename)) as csv_file: for row in unicodecsv.DictReader(csv_file): cohort_groups_in_csv.append(row['Cohort Name']) self.assertEqual(cohort_groups_in_csv, expected_cohort_groups)
def _verify_cohort_data(self, course_id, expected_cohort_groups): """ Verify cohort data. """ cohort_groups_in_csv = [] with patch('instructor_task.tasks_helper._get_current_task'): result = upload_grades_csv(None, None, course_id, None, 'graded') self.assertDictContainsSubset({'attempted': 2, 'succeeded': 2, 'failed': 0}, result) report_store = ReportStore.from_config() report_csv_filename = report_store.links_for(course_id)[0][0] with open(report_store.path_to(course_id, report_csv_filename)) as csv_file: for row in unicodecsv.DictReader(csv_file): cohort_groups_in_csv.append(row['Cohort Group Name']) self.assertEqual(cohort_groups_in_csv, expected_cohort_groups)
def create_report_store(self): """ Create and return a DjangoStorageReportStore configured to use S3 for storage. """ test_settings = copy.deepcopy(settings.GRADES_DOWNLOAD) test_settings['STORAGE_CLASS'] = 'openedx.core.storage.S3ReportStorage' test_settings['STORAGE_KWARGS'] = { 'bucket': settings.GRADES_DOWNLOAD['BUCKET'], 'location': settings.GRADES_DOWNLOAD['ROOT_PATH'], } with override_settings(GRADES_DOWNLOAD=test_settings): connection = boto.connect_s3() connection.create_bucket( settings.GRADES_DOWNLOAD['STORAGE_KWARGS']['bucket']) return ReportStore.from_config(config_name='GRADES_DOWNLOAD')
def verify_rows_in_csv(self, expected_rows): """ Verify that the grades CSV contains the expected content. Arguments: expected_rows (iterable): An iterable of dictionaries, where each dict represents a row of data in the grades report CSV. Each dict maps keys from the CSV header to values in that row's corresponding cell. """ report_store = ReportStore.from_config() report_csv_filename = report_store.links_for(self.course.id)[0][0] with open(report_store.path_to(self.course.id, report_csv_filename)) as csv_file: # Expand the dict reader generator so we don't lose it's content csv_rows = [row for row in csv.DictReader(csv_file)] self.assertEqual(csv_rows, expected_rows)
def list_report_downloads(_request, course_id): """ List grade CSV files that are available for download for this course. Remove "file:///tmp/edx-s3" from the url to permit download """ course_id = SlashSeparatedCourseKey.from_deprecated_string(course_id) report_store = ReportStore.from_config() response_payload = { 'downloads': [ dict(name=name, url=url.replace('file:///tmp/edx-s3', "") , link='<a href="{}">{}</a>'.format(url.replace('file:///tmp/edx-s3', ""), name)) for name, url in report_store.links_for(course_id) ] } return JsonResponse(response_payload)
def test_delete_report(self): report_store = ReportStore.from_config() task_input = {'features': []} links = report_store.links_for(self.course.id) self.assertEquals(len(links), 0) with patch('instructor_task.tasks_helper._get_current_task'): upload_students_csv(None, None, self.course.id, task_input, 'calculated') links = report_store.links_for(self.course.id) self.assertEquals(len(links), 1) filename = links[0][0] report_store.delete_file(self.course.id, filename) links = report_store.links_for(self.course.id) self.assertEquals(len(links), 0)
def test_financial_report_overrides(self): """ Test that CUSTOM_DOMAIN from FINANCIAL_REPORTS is used to construct file url. instead of domain defined via AWS_S3_CUSTOM_DOMAIN setting. """ with override_settings( FINANCIAL_REPORTS={ 'STORAGE_TYPE': 's3', 'BUCKET': 'edx-financial-reports', 'CUSTOM_DOMAIN': 'edx-financial-reports.s3.amazonaws.com', 'ROOT_PATH': 'production', }): report_store = ReportStore.from_config( config_name="FINANCIAL_REPORTS") # Make sure CUSTOM_DOMAIN from FINANCIAL_REPORTS is used to construct file url self.assertIn("edx-financial-reports.s3.amazonaws.com", report_store.storage.url(""))
def list_report_downloads(_request, course_id): """ List grade CSV files that are available for download for this course. Remove "file:///tmp/edx-s3" from the url to permit download """ course_id = SlashSeparatedCourseKey.from_deprecated_string(course_id) report_store = ReportStore.from_config(config_name='GRADES_DOWNLOAD') response_payload = { 'downloads': [ dict(name=name, url='/get-grades/{}/{}'.format( course_id.to_deprecated_string(), name), link='<a href="{}">{}</a>'.format(url, name)) for name, url in report_store.links_for(course_id) ] } return JsonResponse(response_payload)
def upload_csv_to_report_store(rows, csv_name, course_id, timestamp): """ Upload data as a CSV using ReportStore. Arguments: rows: CSV data in the following format (first column may be a header): [ [row1_colum1, row1_colum2, ...], ... ] csv_name: Name of the resulting CSV course_id: ID of the course """ report_store = ReportStore.from_config() report_store.store_rows( course_id, u"{course_prefix}_{csv_name}_{timestamp_str}.csv".format( course_prefix=course_filename_prefix_generator(course_id), csv_name=csv_name, timestamp_str=timestamp.strftime("%Y-%m-%d-%H%M")), rows)
def verify_rows_in_csv(self, expected_rows, verify_order=True): """ Verify that the last ReportStore CSV contains the expected content. Arguments: expected_rows (iterable): An iterable of dictionaries, where each dict represents a row of data in the last ReportStore CSV. Each dict maps keys from the CSV header to values in that row's corresponding cell. verify_order (boolean): When True, we verify that both the content and order of `expected_rows` matches the actual csv rows. When False (default), we only verify that the content matches. """ report_store = ReportStore.from_config() report_csv_filename = report_store.links_for(self.course.id)[0][0] with open(report_store.path_to(self.course.id, report_csv_filename)) as csv_file: # Expand the dict reader generator so we don't lose it's content csv_rows = [row for row in unicodecsv.DictReader(csv_file)] if verify_order: self.assertEqual(csv_rows, expected_rows) else: self.assertItemsEqual(csv_rows, expected_rows)
def upload_csv_to_report_store(rows, csv_name, course_id, timestamp): """ Upload data as a CSV using ReportStore. Arguments: rows: CSV data in the following format (first column may be a header): [ [row1_colum1, row1_colum2, ...], ... ] csv_name: Name of the resulting CSV course_id: ID of the course """ report_store = ReportStore.from_config() report_store.store_rows( course_id, u"{course_prefix}_{csv_name}_{timestamp_str}.csv".format( course_prefix=urllib.quote(unicode(course_id).replace("/", "_")), csv_name=csv_name, timestamp_str=timestamp.strftime("%Y-%m-%d-%H%M") ), rows )
def create_report_store(self): """ Create and return a DjangoStorageReportStore using the old LocalFSReportStore configuration. """ return ReportStore.from_config(config_name='GRADES_DOWNLOAD')
def export_data(course_id, source_block_id_str, block_types, user_ids, match_string): """ Exports student answers to all MCQ questions to a CSV file. """ start_timestamp = time.time() logger.debug("Beginning data export") try: course_key = CourseKey.from_string(course_id) src_block = modulestore().get_items( course_key, qualifiers={'name': source_block_id_str}, depth=0)[0] except IndexError: raise ValueError("Could not find the specified Block ID.") course_key_str = unicode(course_key) type_map = { cls.__name__: cls for cls in [MCQBlock, RatingBlock, AnswerBlock] } if not block_types: block_types = tuple(type_map.values()) else: block_types = tuple(type_map[class_name] for class_name in block_types) # Build an ordered list of blocks to include in the export blocks_to_include = [] def scan_for_blocks(block): """ Recursively scan the course tree for blocks of interest """ if isinstance(block, block_types): blocks_to_include.append(block) elif block.has_children: for child_id in block.children: try: scan_for_blocks(block.runtime.get_block(child_id)) except ItemNotFoundError: # Blocks may refer to missing children. Don't break in this case. pass scan_for_blocks(src_block) # Define the header row of our CSV: rows = [] rows.append([ "Section", "Subsection", "Unit", "Type", "Question", "Answer", "Username" ]) # Collect results for each block in blocks_to_include for block in blocks_to_include: if not user_ids: results = _extract_data(course_key_str, block, None, match_string) rows += results else: for user_id in user_ids: results = _extract_data(course_key_str, block, user_id, match_string) rows += results # Generate the CSV: filename = u"pb-data-export-{}.csv".format( time.strftime("%Y-%m-%d-%H%M%S", time.gmtime(start_timestamp))) report_store = ReportStore.from_config(config_name='GRADES_DOWNLOAD') report_store.store_rows(course_key, filename, rows) generation_time_s = time.time() - start_timestamp logger.debug( "Done data export - took {} seconds".format(generation_time_s)) return { "error": None, "report_filename": filename, "start_timestamp": start_timestamp, "generation_time_s": generation_time_s, "display_data": [] if len(rows) == 1 else rows[1:1001] # Limit to preview of 1000 items }
def export_data(course_id, source_block_id_str, block_types, user_ids, match_string): """ Exports student answers to all MCQ questions to a CSV file. """ start_timestamp = time.time() logger.debug("Beginning data export") try: course_key = CourseKey.from_string(course_id) src_block = modulestore().get_items(course_key, qualifiers={"name": source_block_id_str}, depth=0)[0] except IndexError: raise ValueError("Could not find the specified Block ID.") course_key_str = unicode(course_key) type_map = {cls.__name__: cls for cls in [MCQBlock, RatingBlock, AnswerBlock]} if not block_types: block_types = tuple(type_map.values()) else: block_types = tuple(type_map[class_name] for class_name in block_types) # Build an ordered list of blocks to include in the export blocks_to_include = [] def scan_for_blocks(block): """ Recursively scan the course tree for blocks of interest """ if isinstance(block, block_types): blocks_to_include.append(block) elif block.has_children: for child_id in block.children: try: scan_for_blocks(block.runtime.get_block(child_id)) except ItemNotFoundError: # Blocks may refer to missing children. Don't break in this case. pass scan_for_blocks(src_block) # Define the header row of our CSV: rows = [] rows.append(["Section", "Subsection", "Unit", "Type", "Question", "Answer", "Username"]) # Collect results for each block in blocks_to_include for block in blocks_to_include: if not user_ids: results = _extract_data(course_key_str, block, None, match_string) rows += results else: for user_id in user_ids: results = _extract_data(course_key_str, block, user_id, match_string) rows += results # Generate the CSV: filename = u"pb-data-export-{}.csv".format(time.strftime("%Y-%m-%d-%H%M%S", time.gmtime(start_timestamp))) report_store = ReportStore.from_config(config_name="GRADES_DOWNLOAD") report_store.store_rows(course_key, filename, rows) generation_time_s = time.time() - start_timestamp logger.debug("Done data export - took {} seconds".format(generation_time_s)) return { "error": None, "report_filename": filename, "start_timestamp": start_timestamp, "generation_time_s": generation_time_s, "display_data": [] if len(rows) == 1 else rows[1:1001], # Limit to preview of 1000 items }
def push_grades_to_s3(_xmodule_instance_args, _entry_id, course_id, _task_input, action_name): """ For a given `course_id`, generate a grades CSV file for all students that are enrolled, and store using a `ReportStore`. Once created, the files can be accessed by instantiating another `ReportStore` (via `ReportStore.from_config()`) and calling `link_for()` on it. Writes are buffered, so we'll never write part of a CSV file to S3 -- i.e. any files that are visible in ReportStore will be complete ones. As we start to add more CSV downloads, it will probably be worthwhile to make a more general CSVDoc class instead of building out the rows like we do here. """ start_time = datetime.now(UTC) status_interval = 100 enrolled_students = CourseEnrollment.users_enrolled_in(course_id) num_total = enrolled_students.count() num_attempted = 0 num_succeeded = 0 num_failed = 0 curr_step = "Calculating Grades" def update_task_progress(): """Return a dict containing info about current task""" current_time = datetime.now(UTC) progress = { 'action_name': action_name, 'attempted': num_attempted, 'succeeded': num_succeeded, 'failed': num_failed, 'total': num_total, 'duration_ms': int((current_time - start_time).total_seconds() * 1000), 'step': curr_step, } _get_current_task().update_state(state=PROGRESS, meta=progress) return progress # Loop over all our students and build our CSV lists in memory header = None rows = [] err_rows = [["id", "username", "error_msg"]] for student, gradeset, err_msg in iterate_grades_for(course_id, enrolled_students): # Periodically update task status (this is a cache write) if num_attempted % status_interval == 0: update_task_progress() num_attempted += 1 if gradeset: # We were able to successfully grade this student for this course. num_succeeded += 1 if not header: # Encode the header row in utf-8 encoding in case there are unicode characters header = [section['label'].encode('utf-8') for section in gradeset[u'section_breakdown']] rows.append(["id", "email", "username", "grade"] + header) percents = { section['label']: section.get('percent', 0.0) for section in gradeset[u'section_breakdown'] if 'label' in section } # Not everybody has the same gradable items. If the item is not # found in the user's gradeset, just assume it's a 0. The aggregated # grades for their sections and overall course will be calculated # without regard for the item they didn't have access to, so it's # possible for a student to have a 0.0 show up in their row but # still have 100% for the course. row_percents = [percents.get(label, 0.0) for label in header] rows.append([student.id, student.email, student.username, gradeset['percent']] + row_percents) else: # An empty gradeset means we failed to grade a student. num_failed += 1 err_rows.append([student.id, student.username, err_msg]) # By this point, we've got the rows we're going to stuff into our CSV files. curr_step = "Uploading CSVs" update_task_progress() # Generate parts of the file name timestamp_str = start_time.strftime("%Y-%m-%d-%H%M") course_id_prefix = urllib.quote(course_id.replace("/", "_")) # Perform the actual upload report_store = ReportStore.from_config() report_store.store_rows( course_id, u"{}_grade_report_{}.csv".format(course_id_prefix, timestamp_str), rows ) # If there are any error rows (don't count the header), write them out as well if len(err_rows) > 1: report_store.store_rows( course_id, u"{}_grade_report_{}_err.csv".format(course_id_prefix, timestamp_str), err_rows ) # One last update before we close out... return update_task_progress()
def get(self, request): # pylint: disable=unused-argument """Retrieve all the reports from S3.""" report_store = ReportStore.from_config('GRADES_DOWNLOAD') return Response(data={'exports': report_store.links_for('affiliates')}, status=200, content_type='application/json')
def create_report_store(self): """ Create and return a DjangoStorageReportStore configured to use the local filesystem for storage. """ return ReportStore.from_config(config_name='GRADES_DOWNLOAD')
def push_grades_to_s3(_xmodule_instance_args, _entry_id, course_id, _task_input, action_name): """ For a given `course_id`, generate a grades CSV file for all students that are enrolled, and store using a `ReportStore`. Once created, the files can be accessed by instantiating another `ReportStore` (via `ReportStore.from_config()`) and calling `link_for()` on it. Writes are buffered, so we'll never write part of a CSV file to S3 -- i.e. any files that are visible in ReportStore will be complete ones. As we start to add more CSV downloads, it will probably be worthwhile to make a more general CSVDoc class instead of building out the rows like we do here. """ start_time = datetime.now(UTC) status_interval = 100 enrolled_students = CourseEnrollment.users_enrolled_in(course_id) num_total = enrolled_students.count() num_attempted = 0 num_succeeded = 0 num_failed = 0 curr_step = "Calculating Grades" def update_task_progress(): """Return a dict containing info about current task""" current_time = datetime.now(UTC) progress = { 'action_name': action_name, 'attempted': num_attempted, 'succeeded': num_succeeded, 'failed': num_failed, 'total': num_total, 'duration_ms': int( (current_time - start_time).total_seconds() * 1000), 'step': curr_step, } _get_current_task().update_state(state=PROGRESS, meta=progress) return progress # Loop over all our students and build our CSV lists in memory header = None rows = [] err_rows = [["id", "username", "error_msg"]] for student, gradeset, err_msg in iterate_grades_for( course_id, enrolled_students): # Periodically update task status (this is a cache write) if num_attempted % status_interval == 0: update_task_progress() num_attempted += 1 if gradeset: # We were able to successfully grade this student for this course. num_succeeded += 1 if not header: # Encode the header row in utf-8 encoding in case there are unicode characters header = [ section['label'].encode('utf-8') for section in gradeset[u'section_breakdown'] ] rows.append(["id", "email", "username", "grade"] + header) percents = { section['label']: section.get('percent', 0.0) for section in gradeset[u'section_breakdown'] if 'label' in section } # Not everybody has the same gradable items. If the item is not # found in the user's gradeset, just assume it's a 0. The aggregated # grades for their sections and overall course will be calculated # without regard for the item they didn't have access to, so it's # possible for a student to have a 0.0 show up in their row but # still have 100% for the course. row_percents = [percents.get(label, 0.0) for label in header] rows.append([ student.id, student.email.encode('utf-8'), student.username, gradeset['percent'] ] + row_percents) else: # An empty gradeset means we failed to grade a student. num_failed += 1 err_rows.append([student.id, student.username, err_msg]) # By this point, we've got the rows we're going to stuff into our CSV files. curr_step = "Uploading CSVs" update_task_progress() # Generate parts of the file name timestamp_str = start_time.strftime("%Y-%m-%d-%H%M") course_id_prefix = urllib.quote(course_id.to_deprecated_string().replace( "/", "_")) # Perform the actual upload report_store = ReportStore.from_config() report_store.store_rows( course_id, u"{}_grade_report_{}.csv".format(course_id_prefix, timestamp_str), rows) # If there are any error rows (don't count the header), write them out as well if len(err_rows) > 1: report_store.store_rows( course_id, u"{}_grade_report_{}_err.csv".format(course_id_prefix, timestamp_str), err_rows) # One last update before we close out... return update_task_progress()