def validate_summary(self):
        """Ensure the summary is correct."""
        data_path = url_path_join(self.warehouse_path, 'course_enrollment_summary', 'dt=2014-08-07')
        raw_output = self.read_dfs_directory(data_path)
        output = StringIO(raw_output.replace('\t\\N', '\t'))
        columns = EnrollmentSummaryRecord.get_fields().keys()
        data = pandas.read_table(output, header=None, names=columns, parse_dates=True)

        expected_output_csv = os.path.join(self.data_dir, 'output', 'acceptance_expected_d_user_course.csv')
        expected = pandas.read_csv(expected_output_csv, parse_dates=True)

        for frame in (data, expected):
            frame.sort('first_enrollment_time', inplace=True, ascending=True)
            frame.reset_index(drop=True, inplace=True)

        self.assert_data_frames_equal(data, expected)
 def schema(self):
     return EnrollmentSummaryRecord.get_bigquery_schema()
 def columns(self):
     return EnrollmentSummaryRecord.get_sql_schema()
Пример #4
0
 def columns(self):
     return EnrollmentSummaryRecord.get_sql_schema()
Пример #5
0
 def columns(self):
     return convert_datetime_to_timestamp_tz(EnrollmentSummaryRecord.get_sql_schema())
 def schema(self):
     return EnrollmentSummaryRecord.get_bigquery_schema()