Пример #1
0
    def test_query_multiple_partitions(self):
        df = ec.query_measurements_original(
            {
                'company': ['Company-1'],
                'site': ['Site-1'],
                'device_group': ['1000'],
                'tester': ['Station-1'],
                'test_name': ['Test-1']
            }, ts[0], ts[5] + timedelta(seconds=0.5))
        self.assertEqual(2, df.count())

        df = ec.query_measurements_original(
            {
                'company': ['Company-1', 'Company-2'],
                'site': ['Site-1'],
                'device_group': ['1000'],
                'tester': ['Station-1'],
                'test_name': ['Test-1']
            }, ts[0], ts[5] + timedelta(seconds=0.5))
        self.assertEqual(3, df.count())

        df = ec.query_measurements_original(
            {
                'company': ['Company-1'],
                'site': ['Site-1'],
                'device_group': ['1000'],
                'tester': ['Station-1', 'Station-3'],
                'test_name': ['Test-1']
            }, ts[0], ts[5] + timedelta(seconds=0.5))
        self.assertEqual(3, df.count())

        df = ec.query_measurements_original(
            {
                'company': ['Company-1', 'Company-2'],
                'site': ['Site-1'],
                'device_group': ['1000'],
                'tester': ['Station-1', 'Station-3'],
                'test_name': ['Test-1']
            }, ts[0], ts[5] + timedelta(seconds=0.5))
        self.assertEqual(4, df.count())

        df = ec.query_measurements_original(
            {
                'company': ['Company-1', 'Company-2'],
                'site': ['Site-1'],
                'device_group': ['1000'],
                'tester': ['Station-1', 'Station-3'],
                'test_name': ['Test-1', 'Test-3']
            }, ts[0], ts[5] + timedelta(seconds=0.5))
        self.assertEqual(5, df.count())
Пример #2
0
 def test_query_array(self):
     df = ec.query_measurements_original(
         {
             'company': 'Company-1',
             'site': 'Site-1',
             'device_group': '1000',
             'tester': 'Station-1',
             'test_name': 'Test-5'
         }, ts[0], ts[5] + timedelta(seconds=0.5))
     self.assertEqual(1, df.count())
     head = df.head()
     self.assertEqual('Company-1', head.company)
     self.assertEqual('Site-1', head.site)
     self.assertEqual('1000', head.device_group)
     self.assertEqual('Station-1', head.tester)
     self.assertEqual(ts[4], head.ts)
     self.assertEqual('101001', head.device_name)
     self.assertEqual('Test-5', head.test_name)
     self.assertEqual('Meas-2', head.meas_name)
     self.assertTrue(
         numpy.array_equal(numpy.array([0.1111, 0.2222, 0.3333, 0.4444]),
                           head.meas_value))
     self.assertEqual('V', head.meas_unit)
     self.assertEqual(
         'PASS',
         head.meas_status,
     )
     self.assertIsNone(head.meas_lower_limit)
     self.assertIsNone(head.meas_upper_limit)
     self.assertEqual('Description', head.meas_description)
     self.assertEqual('PASS', head.device_status)
     self.assertEqual('PASS', head.test_status)
Пример #3
0
 def test_query_double(self):
     df = ec.query_measurements_original({'company': 'Company-1',
                                          'site': 'Site-1',
                                          'station': 'Station-1',
                                          'sensor': 'Sensor-1',
                                          'meas_name': 'Meas-1'},
                                         ts[0],
                                         ts[0] + timedelta(seconds=0.5)
                                         )
     self.assertEqual(1, df.count())
     self.assertEqualRows(
         SensorMeasurement(
             'Company-1',
             'Site-1',
             'Station-1',
             'Sensor-1',
             ts[0],
             'Event-1',
             'Meas-1',
             None,
             45.7,
             'degree C',
             'PASS',
             40.0,
             90.0,
             'Description'),
         df.head())
Пример #4
0
 def test_query_binary(self):
     df = ec.query_measurements_original({'company': 'Company-1',
                                          'site': 'Site-1',
                                          'station': 'Station-1',
                                          'sensor': 'Sensor-1',
                                          'meas_name': 'Meas-5'},
                                         ts[0],
                                         ts[4] + timedelta(seconds=0.5)
                                         )
     self.assertEqual(1, df.count())
     head = df.head()
     self.assertEqual('Company-1', head.company)
     self.assertEqual('Site-1', head.site)
     self.assertEqual('Station-1', head.station)
     self.assertEqual('Sensor-1', head.sensor)
     self.assertEqual(ts[4], head.ts)
     self.assertEqual('Event-1', head.event)
     self.assertEqual('Meas-5', head.meas_name)
     self.assertTrue(numpy.array_equal(numpy.array(
         [0.5555, 0.6666, 0.7777, 0.8888, 0.9999]), head.meas_value))
     self.assertEqual('V', head.meas_unit)
     self.assertEqual('PASS', head.meas_status)
     self.assertIsNone(head.meas_lower_limit)
     self.assertIsNone(head.meas_upper_limit)
     self.assertEqual('Description', head.meas_description)
Пример #5
0
 def test_query_string(self):
     df = ec.query_measurements_original({'company': 'Company-1',
                                          'site': 'Site-1',
                                          'station': 'Station-1',
                                          'sensor': 'Sensor-1',
                                          'meas_name': 'Meas-4'},
                                         ts[0],
                                         ts[4] + timedelta(seconds=0.5)
                                         )
     self.assertEqual(1, df.count())
     self.assertEqualRows(
         SensorMeasurement(
             'Company-1',
             'Site-1',
             'Station-1',
             'Sensor-1',
             ts[3],
             'Event-1',
             'Meas-4',
             None,
             'POWER ON',
             None,
             'PASS',
             None,
             None,
             'Description'),
         df.head())
Пример #6
0
    def test_quartile_empty(self):
        # Test calling quartile on an empty DataFrame.
        df = ec.query_measurements_original({'company': 'Company-1',
                                             'site': 'Site-1',
                                             'device_group': '1000',
                                             'tester': 'Station-NONE'},  # No data for Station-NONE
                                            ts[0],
                                            ts[14] + timedelta(seconds=0.5)).toPandas()

        self.assertEqual(0,
                         outliers(df, 'meas_value', 'quartile').shape[0])
Пример #7
0
 def test_query_double(self):
     df = ec.query_measurements_original(
         {
             'company': 'Company-1',
             'site': 'Site-1',
             'device_group': '1000',
             'tester': 'Station-1',
             'test_name': 'Test-1'
         }, ts[0], ts[0] + timedelta(seconds=0.5))
     self.assertEqual(1, df.count())
     self.assertEqualRows(
         AutomatedTest('Company-1', 'Site-1', '1000', 'Station-1', ts[0],
                       '100001', 'Test-1', 'Meas-1', None, 45.7, 'degree C',
                       'PASS', 40.0, 90.0, 'Description', 'PASS', 'PASS'),
         df.head())
Пример #8
0
 def test_query_string(self):
     df = ec.query_measurements_original(
         {
             'company': 'Company-1',
             'site': 'Site-1',
             'device_group': '1000',
             'tester': 'Station-1',
             'test_name': 'Test-4'
         }, ts[0], ts[5] + timedelta(seconds=0.5))
     self.assertEqual(1, df.count())
     self.assertEqualRows(
         AutomatedTest('Company-1', 'Site-1', '1000', 'Station-1', ts[3],
                       '101001', 'Test-4', 'Meas-2', None, 'POWER ON', None,
                       'PASS', None, None, 'Description', 'PASS', 'PASS'),
         df.head())
Пример #9
0
 def test_query_int(self):
     df = ec.query_measurements_original(
         {
             'company': 'Company-1',
             'site': 'Site-1',
             'device_group': '1000',
             'tester': 'Station-1',
             'test_name': 'Test-3'
         }, ts[0], ts[5] + timedelta(seconds=0.5))
     self.assertEqual(1, df.count())
     large = 3448388841
     self.assertEqualRows(
         AutomatedTest('Company-1', 'Site-1', '1000', 'Station-1', ts[2],
                       '101001', 'Test-3', 'Meas-2', None, large, 'ns',
                       'PASS', large - 1, large + 1, 'Description', 'PASS',
                       'PASS'), df.head())
Пример #10
0
 def test_query_waveform(self):
     df = ec.query_measurements_original(
         {
             'company': 'Company-1',
             'site': 'Site-1',
             'device_group': '1000',
             'tester': 'Station-1',
             'test_name': 'Test-6'
         }, ts[0], ts[5] + timedelta(seconds=0.5))
     self.assertEqual(1, df.count())
     self.assertEqualRows(
         AutomatedTest(
             'Company-1', 'Site-1', '1000', 'Station-1', ts[5], '101001',
             'Test-6', 'Meas-2', None,
             Waveform(ts[5], 0.1234, numpy.array([0.5678, 0.9012, 0.3456])),
             'V', 'PASS', None, None, 'Description', 'PASS', 'PASS'),
         df.head())
Пример #11
0
    def test_insufficient_meas_dist(self):
        df = ec.query_measurements_original({'company': 'Company-1',
                                             'site': 'Site-1',
                                             'device_group': '1000',
                                             'tester': 'Station-1'},
                                            ts[0],
                                            # Omit the second Meas-2
                                            # measurement.
                                            ts[3] + timedelta(seconds=0.5))

        # With the second Meas-2 measurement ommitted there won't be enough
        # measurements to perform IMR.
        with self.assertRaises(Py4JJavaError):
            IMR(df).retrieve()

        # IMR on the first measurement only is fine.
        IMR(df, 'Meas-1').retrieve()
Пример #12
0
 def test_selected_meas_dist(self):
     df = ec.query_measurements_original({'company': 'Company-1',
                                          'site': 'Site-1',
                                          'device_group': '1000',
                                          'tester': 'Station-1'},
                                         ts[0],
                                         ts[4] + timedelta(seconds=0.5))
     # Filtering by meas_name before IMR and within IMR are equivalent.
     self.assertEqualDataFrames(IMR(df.filter(df.meas_name == 'Meas-1')),
                                IMR(df, ['Meas-1']))
     # Filtering that matches all the meas_names is the same as no
     # filtering.
     self.assertEqualDataFrames(IMR(df),
                                IMR(df, ['Meas-1', 'Meas-2']))
     # Filtering with a single name is also supported.
     self.assertEqualDataFrames(IMR(df.filter(df.meas_name == 'Meas-1')),
                                IMR(df, 'Meas-1'))
Пример #13
0
 def test_all_meas_dist(self):
     df = ec.query_measurements_original({'company': 'Company-1',
                                          'site': 'Site-1',
                                          'device_group': '1000',
                                          'tester': 'Station-1'},
                                         ts[0],
                                         ts[4] + timedelta(seconds=0.5))
     imr = IMR(df).toPandas().drop('ts', 1)
     # Compare without 'ts' column due to time representation inconsistencies
     # between systems.
     self.assertEqual(
         '     company    site device_group     tester device_name test_name meas_name meas_datatype  meas_value meas_unit meas_status  meas_lower_limit  meas_upper_limit meas_description device_status test_status     I     I_mean       I_LCL    I_UCL    MR  MR_mean  MR_LCL    MR_UCL\n'
         '0  Company-1  Site-1         1000  Station-1      100001    Test-1    Meas-1          None        45.7  degree C        PASS              40.0              90.0      Description          PASS        PASS  45.7  47.866667  -79.458667  175.192   NaN     1.85     0.0   7.89395\n'
         '1  Company-1  Site-1         1000  Station-1      101001    Test-1    Meas-1          None        49.1  degree C        PASS              40.0              90.0      Description          PASS        PASS  49.1  47.866667  -79.458667  175.192   3.4     1.85     0.0   7.89395\n'
         '2  Company-1  Site-1         1000  Station-1      101001    Test-1    Meas-1          None        48.8  degree C        PASS              40.0              90.0      Description          PASS        PASS  48.8  47.866667  -79.458667  175.192   0.3     1.85     0.0   7.89395\n'
         '3  Company-1  Site-1         1000  Station-1      101001    Test-1    Meas-2          None        88.8  degree C        PASS              40.0              90.0      Description          PASS        PASS  88.8  83.200000 -138.112000  304.512   NaN    11.20     0.0  47.79040\n'
         '4  Company-1  Site-1         1000  Station-1      101001    Test-1    Meas-2          None        77.6  degree C        PASS              40.0              90.0      Description          PASS        PASS  77.6  83.200000 -138.112000  304.512  11.2    11.20     0.0  47.79040',
         imr.to_string())
Пример #14
0
    def test_single_meas_dist(self):
        df = ec.query_measurements_original({'company': 'Company-1',
                                             'site': 'Site-1',
                                             'device_group': '1000',
                                             'tester': 'Station-1',
                                             'meas_name': 'Meas-1'},
                                            ts[0],
                                            ts[4])
        imr = IMR(df)
        self.assertEqual([45.7, 49.1, 48.8], imr.toPandas(
        ).loc[:, 'meas_value'].values.tolist())

        self.assertEqual(
            [45.7, 49.1, 48.8], imr.toPandas().loc[:, 'I'].values.tolist())
        i_mean = (45.7 + 49.1 + 48.8) / 3.0
        self.assertEqual(
            [i_mean] * 3, imr.toPandas().loc[:, 'I_mean'].values.tolist())
        i_lcl = i_mean - 2.66 * i_mean
        self.assertEqual(
            [i_lcl] * 3, imr.toPandas().loc[:, 'I_LCL'].values.tolist())
        i_ucl = i_mean + 2.66 * i_mean
        self.assertEqual(
            [i_ucl] * 3, imr.toPandas().loc[:, 'I_UCL'].values.tolist())

        self.assertEqual([49.1 - 45.7, 49.1 - 48.8],
                         imr.toPandas().loc[:, 'MR'].values.tolist()[1:])
        mr_mean = (49.1 - 45.7 + 49.1 - 48.8) / 2.0
        self.assertEqual(
            [mr_mean] * 3,
            imr.toPandas().loc[
                :,
                'MR_mean'].values.tolist())
        mr_lcl = 0.0
        self.assertEqual(
            [mr_lcl] * 3,
            imr.toPandas().loc[
                :,
                'MR_LCL'].values.tolist())
        mr_ucl = mr_mean + 3.267 * mr_mean
        self.assertEqual(
            [mr_ucl] * 3,
            imr.toPandas().loc[
                :,
                'MR_UCL'].values.tolist())
Пример #15
0
    def test_quartile(self):
        df = ec.query_measurements_original(
            {
                'company': 'Company-1',
                'site': 'Site-1',
                'device_group': '1000',
                'tester': 'Station-2'},
            ts[0],
            ts[14] +
            timedelta(
                seconds=0.5)).toPandas()

        expected = df.ix[10:14, :]
        expected.loc[
            :,
            'meas_flag'] = [
            'mild',
            'extreme',
            'mild',
            'extreme']
        expected.loc[:, 'meas_method'] = ['quartile']

        self.assertEqual(expected.to_string(),
                         outliers(df, 'meas_value', 'quartile').to_string())
Пример #16
0
class DataFrameTests(Base):

    start_time = datetime.fromtimestamp(1428004316.123)
    end_time = datetime.fromtimestamp(1428005326.163)
    df = ec.query_measurements_original(
        {
            'company': 'Company-1',
            'site': 'Site-1',
            'device_group': '1000',
            'tester': 'Station-1'
        }, start_time, end_time)

    def test_cache(self):
        # Not testing caching, just the return value.
        self.assertEqualDataFrames(self.df, self.df.cache())

    def test_count(self):
        self.assertEqual(6, self.df.count())

    def test_describe(self):
        self.assertEqual(
            '       meas_value  meas_lower_limit  meas_upper_limit\n'
            'count         6.0      3.000000e+00      3.000000e+00\n'
            'mean          NaN      1.149463e+09      1.149463e+09\n'
            'std           NaN      1.990928e+09      1.990928e+09\n'
            'min           NaN      4.000000e+01      9.000000e+01\n'
            'max           NaN      3.448389e+09      3.448389e+09',
            self.df.describe().to_string())
        self.assertEqual(
            '       meas_value  meas_lower_limit  meas_upper_limit\n'
            'count    2.000000               2.0               2.0\n'
            'mean    47.400000              40.0              90.0\n'
            'std      2.404163               0.0               0.0\n'
            'min     45.700000              40.0              90.0\n'
            'max     49.100000              40.0              90.0',
            self.df.filter(
                self.df.test_name == 'Test-1').describe().to_string())

    def test_describe_empty(self):
        empty_df = self.df.filter(self.df.meas_name == 'MISSING_NAME')
        self.assertEqual(
            '       meas_value  meas_lower_limit  meas_upper_limit\n'
            'count         0.0               0.0               0.0\n'
            'mean          NaN               NaN               NaN\n'
            'std           NaN               NaN               NaN\n'
            'min           NaN               NaN               NaN\n'
            'max           NaN               NaN               NaN',
            empty_df.describe().to_string())

    def test_describe_by_group(self):
        self.assertEqual(
            '       Test-1.meas_value  Test-1.meas_lower_limit  Test-1.meas_upper_limit Test-3.meas_value Test-3.meas_lower_limit Test-3.meas_upper_limit  Test-4.meas_value  Test-4.meas_lower_limit  Test-4.meas_upper_limit  Test-5.meas_value  Test-5.meas_lower_limit  Test-5.meas_upper_limit  Test-6.meas_value  Test-6.meas_lower_limit  Test-6.meas_upper_limit\n'
            'count           2.000000                      2.0                      2.0                 1                       1                       1                1.0                      0.0                      0.0                1.0                      0.0                      0.0                1.0                      0.0                      0.0\n'
            'mean           47.400000                     40.0                     90.0       3.44839e+09             3.44839e+09             3.44839e+09                NaN                      NaN                      NaN                NaN                      NaN                      NaN                NaN                      NaN                      NaN\n'
            'std             2.404163                      0.0                      0.0               NaN                     NaN                     NaN                NaN                      NaN                      NaN                NaN                      NaN                      NaN                NaN                      NaN                      NaN\n'
            'min            45.700000                     40.0                     90.0        3448388841              3448388840              3448388842                NaN                      NaN                      NaN                NaN                      NaN                      NaN                NaN                      NaN                      NaN\n'
            'max            49.100000                     40.0                     90.0        3448388841              3448388840              3448388842                NaN                      NaN                      NaN                NaN                      NaN                      NaN                NaN                      NaN                      NaN',
            self.df.describe_by_group(
                'test_name',
                ['meas_value', 'meas_lower_limit', 'meas_upper_limit'
                 ]).to_string())

    def test_describe_by_group_empty(self):
        empty_df = self.df.filter(self.df.meas_name == 'MISSING_NAME')
        self.assertEqual(
            'Empty DataFrame\nColumns: []\nIndex: [count, mean, std, min, max]',
            empty_df.describe_by_group(
                'test_name',
                ['meas_value', 'meas_lower_limit', 'meas_upper_limit'
                 ]).to_string())

    def test_distinct(self):
        self.assertEqual(
            set(['Test-1', 'Test-3', 'Test-4', 'Test-5', 'Test-6']),
            set(
                self.df.select(
                    'test_name').distinct().toPandas().ix[:, 0].tolist()))

    def test_filter(self):
        self.assertEqual(2,
                         self.df.filter(self.df.test_name == 'Test-1').count())
        self.assertEqualDataFrames(
            self.df.filter(self.df.test_name == 'Test-1'),
            self.df[self.df.test_name == 'Test-1'])
        self.assertEqualDataFrames(
            self.df.filter(self.df.test_name == 'Test-1'),
            self.df.filter("test_name = 'Test-1'"))
        self.assertEqual(1,
                         self.df.filter(self.df.test_name == 'Test-5').count())
        self.assertEqual(5,
                         self.df.filter(self.df.test_name != 'Test-5').count())

    def test_get_device_data(self):
        self.assertEqual(
            self.df.filter(self.df.device_name == '101001').toPandas().sort(
                'ts').to_string(),
            self.df.get_device_data('101001').to_string())

    def test_get_device_data_without_ts(self):
        # Test without a 'ts' timestamp field to sort by.
        df_device = self.df.select('device_name')
        self.assertEqual(
            df_device.filter(
                df_device.device_name == '101001').toPandas().to_string(),
            df_device.get_device_data('101001').to_string())

    def test_get_device_data_missing(self):
        # Test calling get_device_data on a DataFrame missing a device_name
        # field.
        df = self.df.select('meas_value')
        with self.assertRaises(KeyError):
            df.get_device_data('101001')

    def test_get_meas_data(self):
        self.assertEqual(
            self.df.filter(self.df.meas_name == 'Meas-2').toPandas().sort(
                'ts').to_string(),
            self.df.get_meas_data('Meas-2').to_string())

    def test_get_meas_data_without_ts(self):
        # Test without a 'ts' timestamp field to sort by.
        df_meas = self.df.select('meas_name')
        self.assertEqual(
            df_meas.filter(
                df_meas.meas_name == 'Meas-2').toPandas().to_string(),
            df_meas.get_meas_data('Meas-2').to_string())

    def test_get_meas_data_missing(self):
        # Test calling get_meas_data on a DataFrame missing a meas_name field.
        df = self.df.select('meas_value')
        with self.assertRaises(KeyError):
            df.get_meas_data('Meas-2')

    def test_head(self):
        self.assertEqualRows(
            AutomatedTest('Company-1', 'Site-1', '1000', 'Station-1', ts[0],
                          '100001', 'Test-1', 'Meas-1', None, 45.7, 'degree C',
                          'PASS', 40.0, 90.0, 'Description', 'PASS', 'PASS'),
            self.df.head())
        self.assertEqualRows(
            AutomatedTest('Company-1', 'Site-1', '1000', 'Station-1', ts[1],
                          '101001', 'Test-1', 'Meas-2', None, 49.1, 'degree C',
                          'PASS', 40.0, 90.0, 'Description', 'PASS', 'PASS'),
            self.df.head(2)[1])
        self.assertEqual(3, len(self.df.head(3)))

    def test_limit(self):
        self.assertEqualRows(self.df.head(), self.df.limit(1).retrieve()[0])
        self.assertEqual(3, self.df.limit(3).count())
        for i, j in zip(self.df.head(3), self.df.limit(3).retrieve()):
            self.assertEqualRows(i, j)

    def test_select(self):
        self.assertEqualRows(Row(meas_description='Description'),
                             self.df.select('meas_description').head())
        self.assertEqualRows(Row(meas_description='Description'),
                             self.df[['meas_description']].head())
        self.assertEqualRows(
            Row(meas_description='Description', meas_status='PASS'),
            self.df.select('meas_description', 'meas_status').head())
        self.assertEqualRows(
            Row(meas_description='Description', meas_status='PASS'),
            self.df[['meas_description', 'meas_status']].head())

    def test_sort(self):
        self.assertEqual(ts[5], self.df.sort(self.df.ts.desc()).head().ts)

    def test_show(self):
        self.df.show()

    def test_toPandas(self):
        self.assertEqual(6, len(self.df.toPandas().index))
        values = self.df.toPandas()['meas_value'].tolist()
        self.assertEqual(45.7, values[0])
        self.assertEqual(49.1, values[1])
        self.assertEqual(3448388841, values[2])
        self.assertEqual('POWER ON', values[3])
        self.assertTrue(
            numpy.array_equal(numpy.array([0.1111, 0.2222, 0.3333, 0.4444]),
                              values[4]))
        self.assertEqual(
            Waveform(ts[5], 0.1234, numpy.array([0.5678, 0.9012, 0.3456])),
            values[5])

    def test_union(self):
        test1 = self.df.filter(self.df.test_name == 'Test-1')
        test5 = self.df.filter(self.df.test_name == 'Test-5')
        union = test1.union(test5)
        union2 = test5.union(test1)
        self.assertEqual(3, union.count())
        self.assertEqual(3, union2.count())
        self.assertEqual(
            set(['Test-1', 'Test-5']),
            set(union[['test_name']].distinct().toPandas().ix[:, 0].tolist()))
        self.assertEqual(
            set(['Test-1', 'Test-5']),
            set(union2[['test_name']].distinct().toPandas().ix[:, 0].tolist()))