def test_get_valid_legacy_org_id(self): self.assertEquals( opaque_key_util.get_org_id_for_course(VALID_LEGACY_COURSE_ID), "org") self.assertEquals( opaque_key_util.get_org_id_for_course( VALID_NONASCII_LEGACY_COURSE_ID), "org")
def save_output(self, results, statuses, output_file): results = results.transpose() # List of fieldnames for the report fieldnames = ['status', 'course_id', 'org_id'] + list(results.columns) writer = csv.DictWriter(output_file, fieldnames) writer.writerow(dict((k, k) for k in fieldnames)) # Write header def format_counts(counts_dict): for k, v in counts_dict.iteritems(): yield k, '-' if numpy.isnan(v) else int(v) for course_id, series in results.iterrows(): # Course_id is passed throughout these reports as a # utf8-encoded str, so it must be locally converted to # unicode before parsing for org. org_id = get_org_id_for_course(course_id.decode('utf-8')) values = { 'course_id': course_id, 'status': self.get_status_for_course(course_id, statuses), 'org_id': org_id or '-', } by_week_values = format_counts(series.to_dict()) values.update(by_week_values) writer.writerow(values)
def _synthetic_event(self, timestamp, event_type, mode, reason, after=None, before=None): """Create a synthetic event.""" # data specific to course enrollment events: event_data = { 'course_id': self.course_id, 'user_id': self.user_id, 'mode': mode, } event_properties = { # main properties: 'time': timestamp, 'event_type': event_type, # stuff for context: 'user_id': self.user_id, 'course_id': self.course_id, 'org_id': opaque_key_util.get_org_id_for_course(self.course_id), # stuff for synthesized: 'reason': reason, } event = self.factory.create_event_dict(event_data, **event_properties) synthesized = event['synthesized'] if after: synthesized['after_time'] = after if before: synthesized['before_time'] = before datestamp = eventlog.timestamp_to_datestamp(timestamp) return datestamp, json.dumps(event)
def _synthetic_event(self, timestamp, event_type, mode, reason, after=None, before=None): """Create a synthetic event.""" # data specific to course enrollment events: event_data = {"course_id": self.course_id, "user_id": self.user_id, "mode": mode} event_properties = { # main properties: "time": timestamp, "event_type": event_type, # stuff for context: "user_id": self.user_id, "course_id": self.course_id, "org_id": opaque_key_util.get_org_id_for_course(self.course_id), # stuff for synthesized: "reason": reason, } event = self.factory.create_event_dict(event_data, **event_properties) synthesized = event["synthesized"] if after: synthesized["after_time"] = after if before: synthesized["before_time"] = before datestamp = eventlog.timestamp_to_datestamp(timestamp) return datestamp, json.dumps(event)
def process_course_run(self, course_run, output_file): record = CourseRecord( course_id=course_run['key'], catalog_course=course_run['course'], catalog_course_title=course_run.get('title'), start_time=DateTimeField().deserialize_from_string( course_run.get('start')), end_time=DateTimeField().deserialize_from_string( course_run.get('end')), enrollment_start_time=DateTimeField().deserialize_from_string( course_run.get('enrollment_start')), enrollment_end_time=DateTimeField().deserialize_from_string( course_run.get('enrollment_end')), content_language=course_run.get('content_language'), pacing_type=course_run.get('pacing_type'), level_type=course_run.get('level_type'), availability=course_run.get('availability'), org_id=get_org_id_for_course(course_run['key']), partner_short_code=course_run.get('partner_short_code'), marketing_url=course_run.get('marketing_url'), min_effort=course_run.get('min_effort'), max_effort=course_run.get('max_effort'), announcement_time=DateTimeField().deserialize_from_string( course_run.get('announcement')), reporting_type=course_run.get('reporting_type'), ) output_file.write(record.to_separated_values(sep=u'\t')) output_file.write('\n')
def format_transaction_table_output(self, audit_code, transaction, orderitem, transaction_amount_per_item=None, transaction_fee_per_item=None): """Generate an output row from an orderitem and transaction.""" # Handle cases where per-item values are defaulted. if transaction: if transaction_amount_per_item is None: transaction_amount_per_item = transaction.amount if transaction_fee_per_item is None: transaction_fee_per_item = transaction.transaction_fee org_id = None if orderitem: org_id = get_org_id_for_course(orderitem.course_id) result = [ audit_code[0], audit_code[1], audit_code[2], orderitem.payment_ref_id if orderitem else transaction.payment_ref_id, orderitem.order_id if orderitem else None, encode_id(orderitem.order_processor, "order_id", orderitem.order_id) if orderitem else None, orderitem.date_placed if orderitem else None, # transaction information transaction.date if transaction else None, transaction.transaction_id if transaction else None, encode_id(transaction.payment_gateway_id, "transaction_id", transaction.transaction_id) if transaction else None, transaction.payment_gateway_id if transaction else None, transaction.payment_gateway_account_id if transaction else None, transaction.transaction_type if transaction else None, transaction.payment_method if transaction else None, transaction.amount if transaction else None, transaction.iso_currency_code if transaction else None, transaction.transaction_fee if transaction else None, # mapping information: part of transaction that applies to this orderitem str(transaction_amount_per_item) if transaction_amount_per_item is not None else None, str(transaction_fee_per_item) if transaction_fee_per_item is not None else None, # orderitem information orderitem.line_item_id if orderitem else None, encode_id(orderitem.order_processor, "line_item_id", orderitem.line_item_id) if orderitem else None, orderitem.line_item_product_id if orderitem else None, orderitem.line_item_price if orderitem else None, orderitem.line_item_unit_price if orderitem else None, orderitem.line_item_quantity if orderitem else None, orderitem.refunded_amount if orderitem else None, orderitem.refunded_quantity if orderitem else None, orderitem.user_id if orderitem else None, orderitem.username if orderitem else None, orderitem.user_email if orderitem else None, orderitem.product_class if orderitem else None, orderitem.product_detail if orderitem else None, orderitem.course_id if orderitem else None, org_id if org_id is not None else None, orderitem.order_processor if orderitem else None, ] return (OrderTransactionRecord(*result).to_tsv(),)
def process_course_run(self, course_run, output_file): for program in course_run.get('programs', []): record = ProgramCourseRecord( program_id=program['uuid'], program_type=program['type'], program_title=program.get('title'), catalog_course=course_run['course'], catalog_course_title=course_run.get('title'), course_id=course_run['key'], org_id=get_org_id_for_course(course_run['key']), partner_short_code=course_run.get('partner_short_code')) output_file.write(record.to_separated_values(sep=u'\t')) output_file.write('\n')
def process_course_run(self, course_run, output_file): for program in course_run.get("programs", []): record = ProgramCourseRecord( program_id=program["uuid"], program_type=program["type"], program_title=program.get("title"), catalog_course=course_run["course"], catalog_course_title=course_run.get("title"), course_id=course_run["key"], org_id=get_org_id_for_course(course_run["key"]), partner_short_code=course_run.get("partner_short_code"), ) output_file.write(record.to_separated_values(sep=u"\t")) output_file.write("\n")
def process_course_run(self, course_run, output_file): for program in course_run.get('programs', []): record = ProgramCourseRecord( program_id=program['uuid'], program_type=program['type'], program_title=program.get('title'), catalog_course=course_run['course'], catalog_course_title=course_run.get('title'), course_id=course_run['key'], org_id=get_org_id_for_course(course_run['key']), partner_short_code=course_run.get('partner_short_code'), program_slot_number=None, ) output_file.write(record.to_separated_values(sep=u'\t')) output_file.write('\n')
def mapper(self, line): fields = line.split('\x01') if len(fields) != 6: log.error("Encountered bad input: %s", line) return (_db_id, user_id_string, encoded_course_id, mysql_created, mysql_is_active, mode) = fields # `created` is of the form '2012-07-25 12:26:22.0', coming out of # mysql. Convert it to isoformat. created = mysql_datetime_to_isoformat(mysql_created) # `is_active` should be a boolean and `user_id` is an int. is_active = (mysql_is_active == "true") user_id = int(user_id_string) # Note that we do not have several standard properties that we # might expect in such an event. These include a username, # host, session_id, agent. These values will be stubbed by # the factory as empty strings. course_id = encoded_course_id.decode('utf-8') # data for the particular type of event: event_data = { 'course_id': course_id, 'user_id': user_id, 'mode': mode, 'is_active': is_active, 'created': created, 'dump_start': self.dump_start_time, 'dump_end': self.dump_end_time, } # stuff for context: event_properties = { 'user_id': user_id, 'course_id': course_id, 'org_id': opaque_key_util.get_org_id_for_course(course_id), } event = self.factory.create_event(event_data, **event_properties) # Use the original utf-8 version of the course_id as the key. # (Note that if we want everything zipped into a single file, # then we can just pass a single dummy value for the key instead of # breaking the output out by course_id.) yield encoded_course_id, event
def test_database_export(self): # An S3 bucket to store the output in. assert ('exporter_output_bucket' in self.config) self.load_data_from_file() self.run_export_task() for course_id in [self.COURSE_ID2, self.COURSE_ID]: org_id = get_org_id_for_course(course_id).lower() self.run_legacy_exporter(org_id, course_id) exported_filename = '{safe_course_id}-{table}-{suffix}-analytics.sql'.format( safe_course_id=get_filename_safe_course_id(course_id, '-'), table=self.TABLE, suffix=self.ENVIRONMENT, ) self.validate_exporter_output(org_id, exported_filename)
def test_database_export(self): # An S3 bucket to store the output in. assert('exporter_output_bucket' in self.config) self.load_data_from_file() self.run_export_task() for course_id in [self.COURSE_ID2, self.COURSE_ID]: org_id = get_org_id_for_course(course_id).lower() self.run_legacy_exporter(org_id, course_id) exported_filename = '{safe_course_id}-{table}-{suffix}-analytics.sql'.format( safe_course_id=get_filename_safe_course_id(course_id, '-'), table=self.TABLE, suffix=self.ENVIRONMENT, ) self.validate_exporter_output(org_id, exported_filename)
def mapper(self, line): """ Args: line: text line from a tracking event log. Yields: (course_id, org_id, problem_id), (timestamp, saved_tags, is_correct) """ value = self.get_event_and_date_string(line) if value is None: return event, _ = value if event.get('event_type') != 'problem_check' or event.get( 'event_source') != 'server': return timestamp = eventlog.get_event_time_string(event) if timestamp is None: return course_id = eventlog.get_course_id(event) if not course_id: return org_id = opaque_key_util.get_org_id_for_course(course_id) event_data = eventlog.get_event_data(event) if event_data is None: return problem_id = event_data.get('problem_id') if not problem_id: return is_correct = event_data.get('success') == 'correct' saved_tags = event.get('context').get('asides', {}).get( 'tagging_aside', {}).get('saved_tags', {}) yield (course_id, org_id, problem_id), (timestamp, saved_tags, is_correct)
def extract_program_mapping(course_run, output_file): """ Given a course_run, write program mappings to the output file. Arguments: course_run (dict): A plain-old-python-object that represents the course run. output_file (file-like): A file handle that program mappings can be written to. Must implement write(str). """ for program in course_run.get('programs', []): record = ProgramCourseRecord( program_id=program['uuid'], program_type=program['type'], program_title=program.get('title'), catalog_course=course_run['course'], catalog_course_title=course_run.get('title'), course_id=course_run['key'], org_id=get_org_id_for_course(course_run['key']), partner_short_code=course_run.get('partner_short_code')) output_file.write(record.to_separated_values(sep=u'\t')) output_file.write('\n')
def process_course_run(self, course_run, output_file): record = CourseRecord( course_id=course_run["key"], catalog_course=course_run["course"], catalog_course_title=course_run.get("title"), start_time=DateTimeField().deserialize_from_string(course_run.get("start")), end_time=DateTimeField().deserialize_from_string(course_run.get("end")), enrollment_start_time=DateTimeField().deserialize_from_string(course_run.get("enrollment_start")), enrollment_end_time=DateTimeField().deserialize_from_string(course_run.get("enrollment_end")), content_language=course_run.get("content_language"), pacing_type=course_run.get("pacing_type"), level_type=course_run.get("level_type"), availability=course_run.get("availability"), org_id=get_org_id_for_course(course_run["key"]), partner_short_code=course_run.get("partner_short_code"), marketing_url=course_run.get("marketing_url"), min_effort=course_run.get("min_effort"), max_effort=course_run.get("max_effort"), ) output_file.write(record.to_separated_values(sep=u"\t")) output_file.write("\n")
def mapper(self, line): """ Args: line: text line from a tracking event log. Yields: (course_id, org_id, problem_id), (timestamp, saved_tags, is_correct) """ value = self.get_event_and_date_string(line) if value is None: return event, _ = value if event.get('event_type') != 'problem_check' or event.get('event_source') != 'server': return timestamp = eventlog.get_event_time_string(event) if timestamp is None: return course_id = eventlog.get_course_id(event) if not course_id: return org_id = opaque_key_util.get_org_id_for_course(course_id) event_data = eventlog.get_event_data(event) if event_data is None: return problem_id = event_data.get('problem_id') if not problem_id: return is_correct = event_data.get('success') == 'correct' saved_tags = event.get('context').get('asides', {}).get('tagging_aside', {}).get('saved_tags', {}) yield (course_id, org_id, problem_id), (timestamp, saved_tags, is_correct)
def process_course_run(self, course_run, output_file): record = CourseRecord( course_id=course_run['key'], catalog_course=course_run['course'], catalog_course_title=course_run.get('title'), start_time=DateTimeField().deserialize_from_string(course_run.get('start')), end_time=DateTimeField().deserialize_from_string(course_run.get('end')), enrollment_start_time=DateTimeField().deserialize_from_string(course_run.get('enrollment_start')), enrollment_end_time=DateTimeField().deserialize_from_string(course_run.get('enrollment_end')), content_language=course_run.get('content_language'), pacing_type=course_run.get('pacing_type'), level_type=course_run.get('level_type'), availability=course_run.get('availability'), org_id=get_org_id_for_course(course_run['key']), partner_short_code=course_run.get('partner_short_code'), marketing_url=course_run.get('marketing_url'), min_effort=course_run.get('min_effort'), max_effort=course_run.get('max_effort'), announcement_time=DateTimeField().deserialize_from_string(course_run.get('announcement')), reporting_type=course_run.get('reporting_type'), ) output_file.write(record.to_separated_values(sep=u'\t')) output_file.write('\n')
def _get_partner(self, course_id): """Heuristic to determine the partner short code of order items from ShoppingCart.""" org = get_org_id_for_course(course_id) return self.shoppingcart_partners_dict.get(org) or self.default_partner_short_code
def test_get_invalid_org_id(self, course_id): self.assertIsNone(opaque_key_util.get_org_id_for_course(course_id))
def test_get_valid_org_id(self, course_id): self.assertEquals(opaque_key_util.get_org_id_for_course(course_id), "org")
def format_transaction_table_output(self, audit_code, transaction, orderitem, transaction_amount_per_item=None, transaction_fee_per_item=None): """Generate an output row from an orderitem and transaction.""" # Handle cases where per-item values are defaulted. if transaction: if transaction_amount_per_item is None: transaction_amount_per_item = transaction.amount if transaction_fee_per_item is None: transaction_fee_per_item = transaction.transaction_fee org_id = None if orderitem: org_id = get_org_id_for_course(orderitem.course_id) result = [ audit_code[0], audit_code[1], audit_code[2], orderitem.partner_short_code if orderitem else self.default_partner_short_code, orderitem.payment_ref_id if orderitem else transaction.payment_ref_id, orderitem.order_id if orderitem else None, encode_id(orderitem.order_processor, "order_id", orderitem.order_id) if orderitem else None, orderitem.date_placed if orderitem else None, # transaction information transaction.date if transaction else None, transaction.transaction_id if transaction else None, encode_id(transaction.payment_gateway_id, "transaction_id", transaction.transaction_id) if transaction else None, transaction.payment_gateway_id if transaction else None, transaction.payment_gateway_account_id if transaction else None, transaction.transaction_type if transaction else None, transaction.payment_method if transaction else None, transaction.amount if transaction else None, transaction.iso_currency_code if transaction else None, transaction.transaction_fee if transaction else None, # mapping information: part of transaction that applies to this orderitem str(transaction_amount_per_item) if transaction_amount_per_item is not None else None, str(transaction_fee_per_item) if transaction_fee_per_item is not None else None, # orderitem information orderitem.line_item_id if orderitem else None, encode_id(orderitem.order_processor, "line_item_id", orderitem.line_item_id) if orderitem else None, orderitem.line_item_product_id if orderitem else None, orderitem.line_item_price if orderitem else None, orderitem.line_item_unit_price if orderitem else None, orderitem.line_item_quantity if orderitem else None, orderitem.coupon_id if orderitem else None, orderitem.discount_amount if orderitem else None, orderitem.voucher_id if orderitem else None, orderitem.voucher_code if orderitem else None, orderitem.refunded_amount if orderitem else None, orderitem.refunded_quantity if orderitem else None, orderitem.user_id if orderitem else None, orderitem.username if orderitem else None, orderitem.user_email if orderitem else None, orderitem.product_class if orderitem else None, orderitem.product_detail if orderitem else None, orderitem.course_id if orderitem else None, org_id if org_id is not None else None, orderitem.order_processor if orderitem else None, ] return (OrderTransactionRecord(*result).to_tsv(),)
def test_get_invalid_legacy_org_id(self): self.assertIsNone(opaque_key_util.get_org_id_for_course(INVALID_LEGACY_COURSE_ID)) self.assertIsNone(opaque_key_util.get_org_id_for_course(NONASCII_LEGACY_COURSE_ID))
def test_get_valid_org_id(self): self.assertEquals(opaque_key_util.get_org_id_for_course(VALID_COURSE_ID), "org")
def test_get_invalid_legacy_org_id(self): self.assertIsNone(opaque_key_util.get_org_id_for_course(INVALID_LEGACY_COURSE_ID)) self.assertIsNone(opaque_key_util.get_org_id_for_course(INVALID_NONASCII_LEGACY_COURSE_ID))
def test_get_valid_legacy_org_id(self): self.assertEquals(opaque_key_util.get_org_id_for_course(VALID_LEGACY_COURSE_ID), "org")