Exemplo n.º 1
0
 def setUp(self):
     self.runContext = SuiteContext('unittest')
     self.db_context = self.runContext.getDBContext(tag='unittest')
     clear_all(self.db_context)
     self.reader = SafeExcelReader(self.runContext)
     self.reader.db_context = self.db_context
     self.testDataDir = self.runContext.tests_safe_dir
Exemplo n.º 2
0
 def test_valid_mergespec_file_output_format(self):
     self._log.info(
         'test_valid_mergespec_file_output_format() - testing...')
     valid_ds_input_file = os.path.join(self._run_context.tests_safe_dir,
                                        DS_INPUT_FILE_VALID)
     input_tables = {FLAT_TABLE_KEY_NAME: VALID_DS_INPUT_TABLE_NAME}
     dbutilities.drop_table_if_exists(
         db_context=self._run_context.getDBContext(),
         table=VALID_DS_INPUT_TABLE_NAME)
     input_table_reader = SafeExcelReader(
         run_context=self._run_context,
         filename=valid_ds_input_file,
         sheet_name=DS_INPUT_FILE_SHEET_VALID,
         db_context=self._run_context.getDBContext(),
         output_table=VALID_DS_INPUT_TABLE_NAME)
     input_table_reader.createTable()
     mergespec_file = 'C:/{}'.format(MERGESPEC_OUTPUT_FILE)
     if os.path.exists(mergespec_file):
         os.remove(mergespec_file)
     cmpmrg = ComplementaryMerge(run_context=self._run_context)
     cmpmrg.create_mergespec_file(input_table_names=input_tables,
                                  new_mergespec_file=mergespec_file)
     self._log.info('test_valid_mergespec_file_output_format() - tested.')
     filename1 = mergespec_file
     filename2 = os.path.join(self._run_context.tests_safe_dir,
                              COMPARE_FILE_MERGESPEC_OUTPUT)
     assert filecmp.cmp(
         filename1,
         filename2), 'Python test output does not match expected output.'
Exemplo n.º 3
0
    def test_10(self):
        data_file = os.path.join(self.data_dir, _XLS_FILE)
        reader = SafeExcelReader(self.run_context,
                                 data_file,
                                 _XLS_SHEET,
                                 'class_info',
                                 self.db_context,
                                 scan_all=True)
        reader.createTable()
        ttester = TTest('class_info', self.db_context,
                        os.path.join(self.data_dir, _AGG_FILE), 0, True)
        ttester.readAggData()
        ttester.execute()

        answer_dir = os.path.join(self.answer_dir, 'test_10')
        if not os.path.exists(answer_dir):
            os.makedirs(answer_dir)
        answer_file = os.path.join(answer_dir, 'comparison.log')
        specimen_dir = os.path.join(self.specimen_dir, 'ttest_test_10')
        compare_function = lambda row: (int(row.tcrxid), int(row.comp))
        result = compare_tables(answer_file,
                                table="ttest_class",
                                specimen_name=os.path.join(
                                    specimen_dir, 'testresult.XLS'),
                                columns=COLUMNS,
                                table_key_function=compare_function,
                                specimen_key_function=compare_function,
                                db_context=self.db_context)

        self.assertTrue(result, "TTest Test 10 FAILED")
    def setUp(self):
        
        self.run_context = SuiteContext( "unittest" )
        self.db_context = self.run_context.getDBContext( "unittest" )
        self.static_context = self.run_context.getDBContext( "static" )

        self.answer_dir = os.path.join( self.run_context.logs_dir, 'ttest_level_tests' )
        if not os.path.exists( self.answer_dir ):
            os.makedirs( self.answer_dir )
        
        self.data_dir = os.path.join( self.run_context.tests_safe_dir,
                                      'ttest', 'input_data' )
        
        self.specimen_dir = os.path.join( self.run_context.tests_safe_dir,
                                      'ttest', 'sas_outputs' )

        # libname ttest "H:\share\CSSC Folder\Score Report Group\Test Data\lib_TTestLevel";
        # %let agg_file = &cvsroot.\ScoreReportMacros\UnitTested\lib_TTestLevel\test\HI Spring 2008 Aggregations_Melissa.xls;
        # %let sheet=ttestlevel;
        # %SafeExcelRead(filename=&agg_file., sheetname =&sheet., DS_out =aggds);
        self.run_context.debug( "Reading data for ttest_level tests" )
        agg_file = os.path.join( self.data_dir, _AGG_FILE)
        reader = SafeExcelReader( self.run_context,
                agg_file, "ttestlevel", scan_all = True )
        self.agg_ds = [ row for row in reader.getRows() ]
        
        # Import the input datasets
        reader.db_context = self.static_context
        for ( grade, filename, table_name, sheet_name ) in _GRADE_FILES:
            if not table_exists( table_name, self.static_context ):
                self.run_context.debug( "Reading data for grade {}".format( grade ) )
                reader.filename = os.path.join( self.data_dir, filename )
                reader.outputTable = table_name
                reader.sheetName = sheet_name
                reader.createTable()
Exemplo n.º 5
0
def excel_to_vars( run_context, filename, sheetname ):
    rows = [ ]
    if not run_context:
        raise ValueError( 'run_context is null or empty.' )
    if not filename:
        raise ValueError( 'filename is null or empty.' )
    if not sheetname:
        raise ValueError( 'sheetname is null or empty.' )
    reader = SafeExcelReader( run_context=run_context, filename=filename, sheet_name=sheetname )
    rows = [ row for row in reader.getRows( ) ]  # copy the row values
    return rows
Exemplo n.º 6
0
 def setUp(self):
     self.runContext = SuiteContext('unittest')
     self.db_context = self.runContext.getDBContext(tag='unittest')
     clear_all(self.db_context)
     self.reader = SafeExcelReader(self.runContext)
     self.reader.db_context = self.db_context
     self.testDataDir = self.runContext.tests_safe_dir
     self.reader.filename = os.path.join(self.testDataDir, _XLS_FILE)
     self.reader.sheetName = "Data1"
     self.reader.outputTable = "Data1"
     self.reader.createTable()
Exemplo n.º 7
0
    def test_merge_nonfuzzy(self):
        self._log.info('test_merge_nonfuzzy - testing...')
        mergespec_file = os.path.join(self._run_context.tests_safe_dir,
                                      MERGESPEC_FILE_VALID_NONFUZZY)
        valid_ds_input_file = os.path.join(self._run_context.tests_safe_dir,
                                           DS_INPUT_FILE_VALID)

        input_tables = {FLAT_TABLE_KEY_NAME: VALID_DS_INPUT_TABLE_NAME}
        output_tables = {
            FLAT_TABLE_KEY_NAME: VALID_DS_OUTPUT_TABLE_NAME_NONFUZZY
        }

        dbutilities.drop_table_if_exists(
            db_context=self._run_context.getDBContext(),
            table=VALID_DS_INPUT_TABLE_NAME)
        dbutilities.drop_table_if_exists(
            db_context=self._run_context.getDBContext(),
            table=VALID_DS_OUTPUT_TABLE_NAME_NONFUZZY)
        for subject in output_tables:
            table_name = VALID_DS_OUTPUT_TABLE_NAME_NONFUZZY + '_' + subject
            self._log.info('dropping output table [{}]'.format(table_name))
            dbutilities.drop_table_if_exists(
                db_context=self._run_context.getDBContext(), table=table_name)

        input_table_reader = SafeExcelReader(
            run_context=self._run_context,
            filename=valid_ds_input_file,
            sheet_name=DS_INPUT_FILE_SHEET_VALID,
            db_context=self._run_context.getDBContext(),
            output_table=VALID_DS_INPUT_TABLE_NAME)
        input_table_reader.createTable()

        cmpmrg = ComplementaryMerge(
            run_context=self._run_context,
            flat_table_identity_field_name='[import_order]')
        cmpmrg.complementary_merge(mergespec_file=mergespec_file,
                                   input_table_names=input_tables,
                                   output_table_names=output_tables,
                                   force_one_only=True)

        columns_str = (
            'Barcode, ID, LastName, FirstName, Score1, Race, Old, Score2, Score3, Score4, variable_priority,'
            ' record_priority, Attempt1, Attempt2, Attempt3, Attempt4, iep')
        filename1 = self._dump_table(VALID_DS_OUTPUT_TABLE_NAME_NONFUZZY,
                                     columns_str=columns_str,
                                     sort_column_nbrs=[1, 6, 0])
        filename2 = os.path.join(self._run_context.tests_safe_dir,
                                 COMPARE_FILE_NONFUZZY)
        assert filecmp.cmp(
            filename1,
            filename2), 'Python test output does not match SAS test output.'

        self._log.info('test_merge_nonfuzzy - tested.')
Exemplo n.º 8
0
 def read_oat_agg_sheet(self):
     print "Reading aggregation sheet"
     table = dbutilities.get_temp_table(self.db_context)
     data_file = os.path.join(self.data_dir, _AGG_FILE_OAT)
     reader = SafeExcelReader(self.run_context,
                              data_file,
                              _XLS_SHEET,
                              table,
                              self.db_context,
                              scan_all=True)
     reader.createTable()
     table.populate_from_connection()
     return table
Exemplo n.º 9
0
    def testConstructor(self):
        reader = SafeExcelReader(self.runContext,
                                 filename=os.path.join(self.testDataDir,
                                                       _XLS_FILE),
                                 sheet_name='Data1',
                                 db_context=self.runContext.getDBContext(),
                                 output_table='Temp1',
                                 get_names=True,
                                 delimiter=',',
                                 import_order='import_order')
        reader.createTable()

        for name in get_table_names(self.db_context):
            self.assertEqual(
                '[temp1]', name,
                "Found name '{name}' instead of '[temp1]'".format(name=name))
Exemplo n.º 10
0
 def process(self):
      
     """This is the main process that controls the flow of this module"""
      
     SE = SafeExcelReader(self.RC, self.filename, self.sheet_name, self.output_table, db_context=self.dbcontext,get_names=True, delimiter=',', import_order='import_order')
     SE.createTable()
     try:
         self._checktypes()
         self._checkemptyrows()
         self._checkinvalidvariables()
         self._checkvalidranges()
         self._recodings_and_create_dicts()
     except Exception as error:
             print 'Module:LAYOUT_CHECK Error=',error
      
     return self.layoutdict, self.maxmindict, self.recodingsdict
Exemplo n.º 11
0
    def test_mergespec_file_invalid_format(self):
        self._log.info('test_mergespec_file_invalid_format() - testing...')
        valid_ds_input_file = os.path.join(self._run_context.tests_safe_dir,
                                           DS_INPUT_FILE_VALID)
        dbutilities.drop_table_if_exists(
            db_context=self._run_context.getDBContext(),
            table=VALID_DS_INPUT_TABLE_NAME)
        dbutilities.drop_table_if_exists(
            db_context=self._run_context.getDBContext(),
            table=VALID_DS_OUTPUT_TABLE_NAME)
        input_table_reader = SafeExcelReader(
            run_context=self._run_context,
            filename=valid_ds_input_file,
            sheet_name=DS_INPUT_FILE_SHEET_VALID,
            db_context=self._run_context.getDBContext(),
            output_table=VALID_DS_INPUT_TABLE_NAME)
        input_table_reader.createTable()
        mergespec_file = os.path.join(self._run_context.tests_safe_dir,
                                      MERGESPEC_FILE_MISSING_COLUMN)
        input_table_names = {FLAT_TABLE_KEY_NAME: VALID_DS_INPUT_TABLE_NAME}
        output_table_names = {FLAT_TABLE_KEY_NAME: VALID_DS_OUTPUT_TABLE_NAME}
        cmpmrg1 = ComplementaryMerge(run_context=self._run_context)
        with self.assertRaises(EOFError):
            cmpmrg1.complementary_merge(mergespec_file=mergespec_file,
                                        input_table_names=input_table_names,
                                        output_table_names=output_table_names,
                                        force_one_only=True)

        mergespec_file = os.path.join(self._run_context.tests_safe_dir,
                                      MERGESPEC_FILE_INVALID_COLUMN)
        cmpmrg2 = ComplementaryMerge(run_context=self._run_context)
        with self.assertRaises(ValueError):
            cmpmrg2.complementary_merge(mergespec_file=mergespec_file,
                                        input_table_names=input_table_names,
                                        output_table_names=output_table_names,
                                        force_one_only=True)
        self._log.info('test_mergespec_file_invalid_format() - tested.')

        mergespec_file = os.path.join(self._run_context.tests_safe_dir,
                                      MERGESPEC_FILE_NO_VARIABLE_PRIORITY)
        cmpmrg2 = ComplementaryMerge(run_context=self._run_context)
        with self.assertRaises(ValueError):
            cmpmrg2.complementary_merge(mergespec_file=mergespec_file,
                                        input_table_names=input_table_names,
                                        output_table_names=output_table_names,
                                        force_one_only=True)
        self._log.info('test_mergespec_file_invalid_format() - tested.')
Exemplo n.º 12
0
    def read_g3(self):
        print "Reading data"
        table = dbutilities.get_temp_table(self.db_context)
        data_file = os.path.join(self.data_dir, _XLS_FILE_G3)
        reader = SafeExcelReader(self.run_context,
                                 data_file,
                                 _XLS_SHEET,
                                 table,
                                 self.db_context,
                                 scan_all=False)
        reader.createTable()

        print "Tweaking data"
        self.db_context.executeNoResults(
            "UPDATE {g3} SET [state_inc_flag] = CASE WHEN ( [schtype] IN ('N','D','H') ) THEN 0 ELSE 1 END"
            .format(g3=table))
        self.db_context.executeNoResults(
            "UPDATE {g3} SET [Rclass_missing_flag] = CASE WHEN ( [Rclass_id] IS NULL OR [Rclass_id]='' ) THEN 1 ELSE 0 END"
            .format(g3=table))
        table.populate_from_connection()
        return table
Exemplo n.º 13
0
 def test_ds_output_already_exists(self):
     self._log.info('test_ds_output_already_exists() - testing...')
     mergespec_file = os.path.join(self._run_context.tests_safe_dir,
                                   MERGESPEC_FILE_VALID)
     valid_ds_input_file = os.path.join(self._run_context.tests_safe_dir,
                                        DS_INPUT_FILE_VALID)
     dbutilities.drop_table_if_exists(
         db_context=self._run_context.getDBContext(),
         table=VALID_DS_INPUT_TABLE_NAME)
     input_table_reader = SafeExcelReader(
         run_context=self._run_context,
         filename=valid_ds_input_file,
         sheet_name=DS_INPUT_FILE_SHEET_VALID,
         db_context=self._run_context.getDBContext(),
         output_table=VALID_DS_INPUT_TABLE_NAME)
     input_table_reader.createTable()
     input_table_names = {FLAT_TABLE_KEY_NAME: VALID_DS_INPUT_TABLE_NAME}
     output_table_names = {FLAT_TABLE_KEY_NAME: VALID_DS_INPUT_TABLE_NAME}
     cmpmrg = ComplementaryMerge(run_context=self._run_context)
     with self.assertRaises(IOError):
         cmpmrg.complementary_merge(mergespec_file=mergespec_file,
                                    input_table_names=input_table_names,
                                    output_table_names=output_table_names)
     self._log.info('test_ds_output_already_exists() - tested.')
    def setUp(self):
        self.run_context = SuiteContext("unittest")
        self.db_context = self.run_context.getDBContext("unittest")
        self.static_context = self.run_context.getDBContext("static")
        self.source_data_dir = self.run_context.getConfigFile(
            "TESTS", "id_generator_test_source_data_dir",
            "%(tests_safe_dir)s/id_generator_test/source_data")
        if not table_exists('student_g3', self.static_context):
            source_file = os.path.join(self.source_data_dir, _XLS_FILE)
            reader = SafeExcelReader(self.run_context,
                                     source_file,
                                     "Sheet1",
                                     'student_g3',
                                     self.static_context,
                                     scan_all=True)
            reader.createTable()

        self.answer_dir = os.path.join(self.run_context.logs_dir,
                                       'id_generator_test')
        if not os.path.exists(self.answer_dir):
            os.makedirs(self.answer_dir)

        self.specimen_dir = os.path.join(self.run_context.tests_safe_dir,
                                         'id_generator_test', 'sas_outputs')
    def setUpData(self):
        clear_all(self.db_context)
        reader = SafeExcelReader(run_context=self.run_context,
                                 db_context=self.db_context,
                                 filename=os.path.join(
                                     self.run_context.tests_safe_dir,
                                     XLS_FILE),
                                 scan_all=True)
        reader.outputTable = reader.sheetName = "Data1"
        reader.createTable()
        reader.outputTable = reader.sheetName = "Data2"
        reader.createTable()

        self.data1 = self.db_context.getTableSpec("Data1")
        self.data2 = self.db_context.getTableSpec("Data2")
def read_spec_file(filename, merge_def):
    """Populate the field definitions of a :class:`MergeDef`
    
    Reads field definitions for a spec file and create the appropriate
    :class:`MergeFieldSpec` items to the merge_def. The left_table, right_table
    and db_context properties of the :MergeDef:` must already be set before
    calling this method.
    
    Parameters
    ----------
    filename : str
        Name of the merge spec file.
        
    merge_def : :class:`MergeDef`
        The merge definition object into which the field definitions will be
        read.
        
    Returns
    -------
    None
    """
    messages = []
    db_context = merge_def.db_context
    run_context = db_context.runContext
    if not merge_def.get_actual_tables(messages):
        for message in messages:
            run_context.error(message)
        raise ValueError("Input tables not properly specified in MergeDef")

    reader = SafeExcelReader(run_context,
                             filename,
                             sheet_name=0,
                             get_names=True)
    del merge_def[:]

    # Checklists to confirm that all variables from each input table appear exactly once
    n_occurred_left = {}
    for field in merge_def.left_input_table.iterkeys():
        n_occurred_left[field] = 0
    n_occurred_right = {}
    for field in merge_def.right_input_table.iterkeys():
        n_occurred_right[field] = 0

    # Read the left side of the merge first
    required_keys = merge_def.required_merge_keys = []
    optional_keys = merge_def.optional_merge_keys = []
    fuzzy_keys = merge_def.fuzzy_merge_keys = []
    for row in reader.getRows():
        field_name, input_name, priority, key_assignment, data_type, data_length = \
                _extract_field_properties( row )
        if input_name not in merge_def.left_input_table:
            raise ValueError('Did not find column named {} in table {}'.format(
                field_name, merge_def.left_input_table))
        left_field = merge_def.left_input_table[input_name]
        n_occurred_left[left_field.field_name] += 1
        if field_name != '<drop>':
            if field_name in merge_def:
                raise ValueError(
                    'Attempting to create two merge fields with the same name')
            merge_field = MergeFieldSpec(left_field, None, None)
            merge_field.field_name = field_name
            merge_def.add(merge_field)
            _write_type(merge_field, data_type, data_length)
            if priority == 'first':
                merge_field.priority_field = PRIORITY_LEFT
            elif priority == 'firstnonmissing':
                merge_field.priority_field = PRIORITY_LEFT_NONMISSING
            elif priority == 'second':
                # Note, this may later get changed to PRIORITY_RIGHT_NONMISSING
                merge_field.priority_field = PRIORITY_RIGHT
            elif priority is None or priority == '':
                merge_field.priority_field = PRIORITY_LEFT_ONLY
            else:
                raise ValueError(
                    "Found priority {}; must be one of \"FIRST\", \"SECOND\", or \"FIRST NON-MISSING\""
                )

        if key_assignment is not None and key_assignment != "":
            key_field = MergeFieldSpec(left_field, None, None)
            key_field.field_name = key_assignment
            match = _KEY_ASSIGNMENT_RE.match(key_assignment)
            if match is None:
                raise ValueError(
                    "Did not know how to interpret key expression {}".format(
                        key_assignment))
            key_type, key_nbr = match.groups((1, 2))
            if key_type in ("primary", "required"):
                _add_key(required_keys, key_field, key_nbr, key_assignment)
            elif key_type in ("secondary", "optional"):
                _add_key(optional_keys, key_field, key_nbr, key_assignment)
            elif key_type == "fuzzy":
                _add_fuzzy_key(fuzzy_keys, key_field, key_nbr, key_assignment)
            else:
                raise AssertionError("This error really should not happen")

    # Now read the right side of the merge
    reader.sheetName = 1
    for row in reader.getRows():
        field_name, input_name, priority, key_assignment, data_type, data_length = \
                _extract_field_properties( row )
        if input_name not in merge_def.right_input_table:
            raise ValueError('Did not find column named {} in table {}'.format(
                field_name, merge_def.right_input_table))
        right_field = merge_def.right_input_table[input_name]
        n_occurred_right[right_field.field_name] += 1
        if field_name != '<drop>':
            if field_name not in merge_def:
                merge_field = MergeFieldSpec(None, right_field,
                                             PRIORITY_RIGHT_ONLY)
                merge_field.field_name = field_name
                merge_def.add(merge_field)
                _write_type(merge_field, data_type, data_length)
                if not (priority is None or priority == ''):
                    raise ValueError(
                        "Field exists only on right side of merge: must have a blank priority"
                    )

            else:
                merge_field = merge_def[field_name]
                merge_field.right_field = copy.copy(right_field)

                # Check for compatible type assignment
                if merge_field.basic_type != data_type:
                    raise ValueError(
                        "Incompatible data types for field {}: {} on left to {} on right"
                        .format(merge_field, merge_field.basic_type,
                                data_type))
                if merge_field.is_charish and merge_field.data_length != data_length:
                    raise ValueError(
                        "Data lengths differ for field {}: {} on left to {} on right"
                        .format(merge_field, merge_field.data_length,
                                data_length))

                # Check for consistent priority assignment
                if merge_field.priority_field in (  PRIORITY_LEFT, PRIORITY_LEFT_NONMISSING ) and \
                        priority != 'second':
                    raise ValueError(
                        "Inconsistent priority designations for field {}".
                        format(merge_field))
                if merge_field.priority_field == PRIORITY_RIGHT and \
                        priority == 'second':
                    raise ValueError(
                        "Inconsistent priority designations for field {}".
                        format(merge_field))
                if merge_field.priority_field == PRIORITY_LEFT_ONLY:
                    raise ValueError(
                        "Blank priority designation not permitted when field appears on both sides of merge"
                    )
                if priority == 'firstnonmissing':
                    merge_field.priority_field = PRIORITY_RIGHT_NONMISSING

            # Do key assignment
            if key_assignment is not None and key_assignment != "":
                match = _KEY_ASSIGNMENT_RE.match(key_assignment)
                if match is None:
                    raise ValueError(
                        "Did not know how to interpret key expression {}".
                        format(key_assignment))
                key_type, key_nbr = match.groups((1, 2))
                key_nbr = int(key_nbr) - 1
                key_field = None
                if key_type in ("primary", "required"):
                    if key_nbr < len(required_keys):
                        key_field = required_keys[key_nbr]
                elif key_type in ("secondary", "optional"):
                    if key_nbr < len(optional_keys):
                        key_field = optional_keys[key_nbr]
                elif key_type == "fuzzy":
                    if key_nbr < len(fuzzy_keys):
                        for key_field in fuzzy_keys[key_nbr]:
                            if key_field.right_field is None:
                                break
                else:
                    raise AssertionError("This error really should not happen")
                if key_field is None:
                    raise ValueError(
                        "Key {} was specified only on right side of merge".
                        format(key_assignment))
                if key_field.right_field is not None:
                    raise ValueError(
                        "Too many keys defined on right side of merge with key designation {}"
                        .format(key_assignment))
                key_field.right_field = right_field

    # Confirm that each input variable was processed exactly once.
    succeed = True
    for k, v in n_occurred_left.items():
        if v != 1:
            succeed = False
            run_context.error(
                "Each variable from left input table must occur exactly once on spec sheet. {} appeared {} times"
                .format(k, v))
    for k, v in n_occurred_right.items():
        if v != 1:
            succeed = False
            run_context.error(
                "Each variable from right input table must occur exactly once on spec sheet. {} appeared {} times"
                .format(k, v))
    if not succeed:
        raise ValueError("Missing or duplicate variables on merge spec sheet")
Exemplo n.º 17
0
class Test(unittest.TestCase):
    def setUp(self):
        self.runContext = SuiteContext('unittest')
        self.db_context = self.runContext.getDBContext(tag='unittest')
        clear_all(self.db_context)
        self.reader = SafeExcelReader(self.runContext)
        self.reader.db_context = self.db_context
        self.testDataDir = self.runContext.tests_safe_dir
        self.reader.filename = os.path.join(self.testDataDir, _XLS_FILE)
        self.reader.sheetName = "Data1"
        self.reader.outputTable = "Data1"
        self.reader.createTable()

    def test_n_obs(self):
        n = n_obs('data1', self.db_context)
        self.assertEqual(
            300, n,
            "Got wrong number of observations: {} instead of 300".format(n))

    def test_get_tablespec_1(self):
        '''Does a tablespec get returned as is
        '''
        t = self.db_context.getTableSpec("Data1")
        t2 = get_table_spec(t)
        self.assertTrue(t is t2, "Did not return the same tablespec")

    def test_get_tablespec_2(self):
        '''Does it work to specify table name and db_context?
        '''
        t = get_table_spec('some_table', self.db_context)
        self.assertTrue(isinstance(t, TableSpec),
                        "Did not return a TableSpec object")
        self.assertEquals(t.table_name, '[some_table]',
                          "Did not return the right name")
        self.assertEquals(t.table_schema, self.db_context.schema,
                          "Did not return the correct schema")

    def test_get_tablespec_3(self):
        '''Can we override the schema?
        '''
        t = get_table_spec('some_table', self.db_context, 'another_schema')
        self.assertTrue(isinstance(t, TableSpec),
                        "Did not return a TableSpec object")
        self.assertEquals(t.table_name, '[some_table]',
                          "Did not return the right name")
        self.assertEquals(t.table_schema, '[another_schema]',
                          "Did not return the correct schema")

    def test_get_tablespec_4(self):
        '''Throw error if db_context conflicts
        '''
        another_context = self.runContext.getDBContext()
        t1 = self.db_context.getTableSpec("Data1")
        try:
            t2 = get_table_spec(t1, another_context)
        except ValueError as e:
            return
        self.fail("Did not throw expected error on conflicting db_context")

    def test_get_tablespec_5(self):
        '''Throw error if schema conflicts
        '''
        t1 = self.db_context.getTableSpec("Data1")
        try:
            t2 = get_table_spec(t1, self.db_context, 'another_schema')
        except ValueError as e:
            return
        self.fail("Did not throw expected error on conflicting schema")

    def test_get_tablespec_6(self):
        '''Correctly normalizing case
        '''
        t = get_table_spec('some_TABLE', self.db_context, 'another_SCHEMA')
        self.assertTrue(isinstance(t, TableSpec),
                        "Did not return a TableSpec object")
        self.assertEquals(t.table_name, '[some_table]',
                          "Did not return the right name")
        self.assertEquals(t.table_schema, '[another_schema]',
                          "Did not return the correct schema")

    def test_get_tablespec_7(self):
        '''Throws error if no db_context specified
        '''
        try:
            t = get_table_spec('some_table')
        except ValueError as e:
            return
        self.fail("Did not throw expected error on missing db_context")

    def test_get_tablespec_8(self):
        '''No error as long as db context matches
        '''
        t = self.db_context.getTableSpec("Data1")
        t2 = get_table_spec(t)
        self.assertTrue(t is t2, "Did not return the same tablespec")

    def test_get_tablespec_9(self):
        '''No error as long as schema matches
        '''
        t = self.db_context.getTableSpec("Data1",
                                         table_schema=self.db_context.schema)
        t2 = get_table_spec(t)
        self.assertTrue(t is t2, "Did not return the same tablespec")

    def test_get_column_names(self):
        cols = get_column_names('data1', self.db_context)
        self.assertListEqual(cols, [
            '[barcode_num]', '[barcode_char]', '[studentid]', '[gender]',
            '[ethnicity]', '[studentlnm]', '[studentfnm]', '[num_1]',
            '[num_2]', '[char_1]', '[char_2]', '[n1]', '[n2]', '[import_order]'
        ], "Did not return correct column names")

    def test_get_table_names1(self):
        tables = get_table_names(self.db_context)
        self.assertListEqual(tables, ['[data1]'],
                             "Did not return correct table names")

    def test_get_table_names2(self):
        '''Should not return any tables from another schema
        '''
        tables = get_table_names(self.db_context,
                                 table_schema="another_schema")
        self.assertListEqual(tables, [], "Did not return correct table names")

    def test_get_table_names3(self):
        '''Should work with quoted schema name
        '''
        tables = get_table_names(self.db_context, table_schema="[dbo]")
        self.assertListEqual(tables, ['[data1]'],
                             "Did not return correct table names")

    def test_get_table_names4(self):
        '''Should work with unquoted schema name
        '''
        tables = get_table_names(self.db_context, table_schema="dbo")
        self.assertListEqual(tables, ['[data1]'],
                             "Did not return correct table names")

    def test_clear_all(self):
        # Create a table with a foreign key constraint
        query = "CREATE TABLE my_table( key1 bigint, var1 VARCHAR(17), FOREIGN KEY( key1 ) REFERENCES data1( import_order ) )"
        self.db_context.executeNoResults(query)
        tables = get_table_names(self.db_context)
        self.assertTrue('[data1]' in tables,
                        "Did not return correct table names")
        self.assertTrue('[my_table]' in tables,
                        "Did not return correct table names")
        self.assertEqual(len(tables), 2, "Did not return correct table names")
        clear_all(self.db_context)
        tables = get_table_names(self.db_context)
        self.assertListEqual(tables, [], "Did not return correct table names")

    def test_table_exists(self):
        self.assertTrue(table_exists('data1', self.db_context),
                        "Failed to find table that exists")
        self.assertFalse(table_exists('zaxxiz', self.db_context),
                         "Found table that does not exist")

    def test_drop_table_if_exists1(self):
        self.assertTrue(table_exists('data1', self.db_context),
                        "Failed to find table that was supposed to exist")
        t = self.db_context.getTableSpec('data1')
        drop_table_if_exists(t)
        self.assertFalse(
            table_exists(t),
            "Table was supposed to be dropped, but its still there")

    def test_drop_table_if_exists2(self):
        '''Different schema
        '''
        t = self.db_context.getTableSpec('data1')
        t.table_schema = 'not_my_schema'
        drop_table_if_exists(t)
        self.assertTrue(table_exists('data1', self.db_context),
                        "Failed to find table that was supposed to exist")

    def test_drop_table_if_exists3(self):
        '''Different name
        '''
        t = self.db_context.getTableSpec('data2')
        drop_table_if_exists(t)
        self.assertTrue(table_exists('data1', self.db_context),
                        "Failed to find table that was supposed to exist")

    def test_assembly_exists(self):
        '''Assumes that the assembly has been created by running the prep_sqlserver
        script
        '''
        self.assertTrue(assembly_exists('ToProperCase', self.db_context))
        self.assertFalse(assembly_exists('asdfasdf', self.db_context))
Exemplo n.º 18
0
def peer_compare(excel='Y',
                 agg_table='',
                 agg_file='',
                 agg_sheet='',
                 indata='',
                 outdata='',
                 pc_data='',
                 crit_val=1.96,
                 db_context=None,
                 odbcconn=''):
    """ Compares Peer group Scores for OAT with a TTEST and computes the percentage 
        of people in each level for the peer group.
        
        Parameters
        ----------------
        excel : String ('Y' or 'N')
            Required. This should be 'Y' if your aggregate information is an excel file, or 'N' if it is a SQL table.
        
        agg_table : String
            If excel='N' this should be the table name that holds the aggregate information. If excel='Y'
            this is not used.
            
        agg_file : String
            If excel='Y' this should be the path to the aggregate file. If excel='N' this is not used.
            
        agg_sheet : String
            If excel='Y' this should be the sheet name in the aggregate file. If excel='N' this is not used.
            
        indata : String
            Required. This should be the SQL table name that holds the input data.
            
        outdata : String
            Required. This will be the name of the SQL table name to hold the output data.
            
        pc_data : String
            Required. This should be the name of the SQL table that holds the peer districts information.
            
        crit_val : float
            Required. This will be the the cutoff point for calculations. It must be between 1.28 and 3.89.
            
        db_context : DBContext object
            This is the DBContext within which all processing will be done. This specifies the DB connection.
            
        odbcconn : String
            This is the name of the ODBC connection you must setup to your database for R. This can be done in 
            Control Panel -> Administrative Tools -> Data Sources.
        
        Notes
        ---------------
        I had to add ISNULL(NULLIF()) and case conditions because the query is too complex for SQL Server. It was not subsetting
        the data until after doing the select, which would get incorrect values that broke the query planner. Changing all values to 
        safe values handles this, and those values that we changed are then thrown out once it subsets to the data we want to use.
        Also note for the final query we add .0000000001 to the number before rounding. This is an attempt to compensate for the
        "smart" rounding in SAS which allows for a small margin of difference (i.e. 4.4999999999999 rounding to 5).
    """
    #defining variables we will need throughout run
    yes_list = ['Y', 'YES']
    no_list = ['N', 'NO']
    tmp_aggtable_name = 'peercompare_tmp_agg'
    tmp_peertable_t_name = 'peercompare_tmp_t'
    tmp_peertable_mean_name = 'peercompare_tmp_mean'
    tmp_similar_districts_table = 'peercompare_tmp_dist'
    tmp_tables_list = []
    #error checking on params
    if db_context is None:
        raise ValueError("ERROR: You must pass a DBContext")
    if odbcconn.strip() == '':
        raise ValueError("ERROR: You must pass an odbc connection")
    if pc_data.strip() == '':
        raise ValueError("ERROR: You must pass a table name for pc_data")
    run_context = db_context.runContext
    if excel.upper() in yes_list:
        drop_table_if_exists(db_context=db_context, table=tmp_aggtable_name)

        # create a DB table with the information from the aggregate file,
        # then we will add columns "type","rdvalue",and "variable_name" to it so
        # it is a valid aggregate file for the means script.
        reader = SafeExcelReader(run_context,
                                 db_context=db_context,
                                 filename=agg_file,
                                 sheet_name=agg_sheet,
                                 output_table=tmp_aggtable_name)
        reader.createTable()
    elif excel.upper() in no_list:
        if agg_table.strip() == '':
            raise ValueError(
                "ERROR: You must specify an aggregate table name when excel='N'"
            )
        drop_table_if_exists(db_context=db_context, table=tmp_aggtable_name)

        # first we copy the aggregate information to a temp table
        db_context.executeNoResults(
            _COPY_TABLE_QUERY.format(tmp_aggtable_name, agg_table))
    else:
        raise Exception(
            "ERROR: parameter 'excel' must have a value of either 'Y' or 'N'")
    tmptab_tablespec = db_context.getTableSpec(tmp_aggtable_name)
    tmp_tables_list.append(tmp_aggtable_name)
    # now we add the 'levelvar' and 'rdvalue' columns to the table we created. These columns
    # are needed for creating a meansspec from the table.
    # adding levelvar column
    db_context.executeNoResults(
        _ADD_COLUMN_QUERY.format(tmp_aggtable_name, 'levelvar', 'varchar(10)'))
    # adding rdvalue column
    db_context.executeNoResults(
        _ADD_COLUMN_QUERY.format(tmp_aggtable_name, 'rdvalue', 'varchar(10)'))

    # add the column variable_name and copy the value from outputvar into it. This column
    # is needed for creating a meansspec from the table.
    length = tmptab_tablespec[db_identifier_quote('inputvar')].data_length + 4
    db_context.executeNoResults(
        _ADD_COLUMN_QUERY.format(tmp_aggtable_name, 'variable_name',
                                 'VARCHAR(' + str(length) + ")"))

    #now update the values for the columns we just added

    # first we change the values for those we are getting Means for
    db_context.executeNoResults(
        _UPDATE_MEAN_INPUT_QUERY.format(tmp_aggtable_name, 'MEAN', "inputvar",
                                        'Mean'))
    # next we update those we are getting T-Tests for
    db_context.executeNoResults(
        _UPDATE_MEAN_INPUT_QUERY.format(tmp_aggtable_name, 'STD',
                                        "RTRIM(LTRIM(inputvar)) + '_seh'",
                                        'TTEST'))

    # get a list of 3-tuples of (subject,wherevar,wherevalue)
    tuples_list = [(x, y, z) for x, y, z in db_context.executeBuffered(
        _SELECT_SUBJECTS_WHEREVAR_QUERY.format(tmp_aggtable_name))]

    cols = [
        '[variable_name]', '[levelvar]', '[wherevar]', '[wherevalue]',
        '[rdvalue]', '[type]', '[subject]'
    ]
    _insert_query = _INSERT_INTO_TABLE_QUERY.format(tmp_aggtable_name,
                                                    ",".join(cols))

    for row in tuples_list:
        subject = row[0]
        wherevar = row[1]
        wherevalue = row[2]
        # harcoding variable_name to pweight_{subject}
        values = [
            'pweight_' + subject, 'dcrxid', wherevar, wherevalue, "", 'N',
            subject
        ]
        db_context.executeNoResults(_insert_query, values)

    # now the aggregate table is ready for means, so run means on it
    means_class = Means(excel='N',
                        agg_ds=tmp_aggtable_name,
                        db_context=db_context,
                        inputds=indata,
                        odbcconn=odbcconn,
                        overwrite='Y')
    means_class.execute()

    if len(means_class.created_tables_list) != 1:
        raise ValueError(
            "Error: Means call returned more than one table. It should only return 'Mean_DCRXID' but it returned "
            + ",".join(means_class.created_tables_list))

    # setup the Peerttest dataset in SAS - This holds the variables we will loop through
    drop_table_if_exists(tmp_peertable_t_name, db_context)
    db_context.executeNoResults(
        _SELECT_PEER_TTEST_QUERY.format(table=tmp_peertable_t_name,
                                        aggtable=tmp_aggtable_name))
    tmp_tables_list.append(tmp_peertable_t_name)

    # setup the Peermean dataset in SAS - This holds the variables we will loop through
    drop_table_if_exists(tmp_peertable_mean_name, db_context)
    db_context.executeNoResults(
        _SELECT_PEER_MEAN_QUERY.format(table=tmp_peertable_mean_name,
                                       aggtable=tmp_aggtable_name))
    tmp_tables_list.append(tmp_peertable_mean_name)

    # getting similar districts into table tmp_similar_districts_table
    drop_table_if_exists(tmp_similar_districts_table, db_context)
    db_context.executeNoResults(
        _SELECT_SIMILAR_DISTRICTS_QUERY.format(
            tmptab=tmp_similar_districts_table,
            disttable=pc_data,
            table=indata))
    tmp_tables_list.append(tmp_similar_districts_table)
    # list of distinct subjects that appear in tmp_aggtable_name
    subj_list = list(set(tup[0] for tup in tuples_list))

    # creating 2 tables, one for means calculations and one for ttest calculations
    vars_list_means = [
        x for x in db_context.executeBuffered(
            _SELECT_PEERMEAN_QUERY.format(table=tmp_peertable_mean_name,
                                          where=''))
    ]
    vars_list_ttest = [
        x for x in db_context.executeBuffered(
            _SELECT_PEERTTEST_QUERY.format(table=tmp_peertable_t_name,
                                           where=''))
    ]
    means_cols = ["F1"] + ["pcount_" + x for x in subj_list]
    ttest_cols = ["F1"]
    for outvar, invar in vars_list_means:
        if outvar is not None and outvar.strip() not in means_cols:
            means_cols.append(outvar.strip())
        if invar is not None and invar.strip() + "_total" not in means_cols:
            means_cols.append(invar.strip() + "_total")
    for predvar, outvar, invar in vars_list_ttest:
        if invar is not None and invar.strip() + "_se" not in ttest_cols:
            ttest_cols.append(invar.strip() + "_se")
    # create temporary table to hold all results from calculations for means, and one for ttest
    tmp_mean_calculated_table = "peermean_tmp_calc_mean"
    tmp_ttest_calculated_table = "peermean_tmp_calc_ttest"
    # first means
    query = "CREATE TABLE " + tmp_mean_calculated_table + " ( [" + """] float,
                        [""".join(means_cols) + "] float )"
    drop_table_if_exists(tmp_mean_calculated_table, db_context)
    db_context.executeNoResults(query)
    tmp_tables_list.append(tmp_mean_calculated_table)
    # now ttest
    query = "CREATE TABLE " + tmp_ttest_calculated_table + " ( [" + """] float,
                        [""".join(ttest_cols) + "] float )"
    drop_table_if_exists(tmp_ttest_calculated_table, db_context)
    db_context.executeNoResults(query)
    tmp_tables_list.append(tmp_ttest_calculated_table)
    # cleanup
    del vars_list_ttest
    del query
    del means_cols
    del ttest_cols

    #define where clause for peermean and peerttest queries
    whereclause = "where subject = '{subj}'"

    # get list of distinct F1 values from out similar districts table
    f1_list = [
        x[0] for x in db_context.executeBuffered(
            _SELECT_F1_QUERY.format(table=tmp_similar_districts_table))
    ]

    # Go through the districts and insert them into the final table one at a time, inserting null for any values not calculated
    for f1 in f1_list:
        # list to hold all the select queries we create. One for the means table and one for the ttest table
        select_list_means = ["A.F1"]
        select_list_ttest = ["A.F1"]
        cols_means = ["F1"]
        cols_ttest = ["F1"]
        select_table_query = _MERGE_SIMILARDISTRICTS.format(
            table=means_class.created_tables_list[0],
            disttable=tmp_similar_districts_table,
            f1=f1)
        ttest_groupby = ["A.F1"]
        for subject in subj_list:
            # means_output_input_list is of the form  [ (outvar,invar) ]
            means_output_input_list = [
                x for x in db_context.executeBuffered(
                    _SELECT_PEERMEAN_QUERY.format(
                        table=tmp_peertable_mean_name,
                        where=whereclause.format(subj=subject)))
            ]
            # ttest_prev_input__list is of the form [ (predvar,outputvar,inputvar) ]
            ttest_prev_input__list = [
                x for x in db_context.executeBuffered(
                    _SELECT_PEERTTEST_QUERY.format(table=tmp_peertable_t_name,
                                                   where=whereclause.format(
                                                       subj=subject)))
            ]
            _count = _SUM_QUERY.format(subject=subject)
            for outvar, invar in means_output_input_list:
                total_query = "SUM( ISNULL([pweight_{subj}],0) * ISNULL([{invar}],0) )".format(
                    subj=subject, invar=invar)
                select_list_means.append(total_query + " as [" + invar +
                                         "_total]")
                select_list_means.append(
                    _MEAN_CASE_QUERY.format(count=_count,
                                            total=total_query,
                                            alias=db_identifier_quote(outvar)))
                cols_means.append("[" + invar + "_total]")
                cols_means.append(db_identifier_quote(outvar))
                # adding count variable
                if _count + " as [pcount_{subj}]".format(
                        subj=subject) not in select_list_means:
                    select_list_means.append(_count +
                                             " as [pcount_{subj}]".format(
                                                 subj=subject))
                    cols_means.append("[pcount_{subj}]".format(subj=subject))
            for predvar, outvar, inputvar in ttest_prev_input__list:
                se_query = _SE_QUERY.format(subj=subject, inputvar=inputvar)
                select_list_ttest.append(
                    _COUNT_CASE_QUERY.format(count="pcount_" + subject,
                                             se=se_query,
                                             pred=predvar,
                                             alias="[" + inputvar + "_se]"))
                cols_ttest.append("[" + inputvar + "_se]")
                ttest_groupby.append(predvar)
            if "pcount_" + subject not in ttest_groupby:
                ttest_groupby.append("pcount_" + subject)
        # now insert into means table
        insert_query = _INSERT_MEANS_CALCS_QUERY.format(
            table=tmp_mean_calculated_table,
            cols="""
                                                ,""".join(cols_means),
            selects="""
                                                ,""".join(select_list_means),
            froms=select_table_query)
        db_context.executeNoResults(insert_query)
        #now insert into ttest table
        insert_query = _INSERT_TTEST_CALCS_QUERY.format(
            table=tmp_ttest_calculated_table,
            cols="""
                                                ,""".join(cols_ttest),
            selects="""
                                                ,""".join(select_list_ttest),
            froms=select_table_query,
            meanstab=tmp_mean_calculated_table,
            groupby=",".join(ttest_groupby))
        db_context.executeNoResults(insert_query)

    # now we go through these output tables and get the t-values
    # setup initial select query list
    select_query_list = ["C.dcrxid"] + list(
        set([
            "ROUND(" + x + " + .0000000001,0) as [" + x + "]"
            for x, y in vars_list_means
        ]))
    for predvar, outvar, invar, subject in [
            x for x in db_context.executeBuffered(
                _SELECT_PEERTTEST_QUERY2.format(table=tmp_peertable_t_name,
                                                where=''))
    ]:
        select_query_list.append(
            _TTEST_CASE_QUERY.format(invar=invar,
                                     pred=predvar,
                                     critval=crit_val,
                                     alias=outvar,
                                     subj=subject))
    final_query = _TTEST_CALCS_QUERY.format(
        sels=",".join(list(set(select_query_list))),
        table=outdata,
        meanstab=tmp_mean_calculated_table,
        ttesttab=tmp_ttest_calculated_table,
        dcrxidtable=means_class.created_tables_list[0])
    drop_table_if_exists(outdata, db_context)
    db_context.executeNoResults(final_query)

    #cleanup temporary tables
    for table in tmp_tables_list:
        drop_table_if_exists(table, db_context)
Exemplo n.º 19
0
    def readAggData(self):
        
        # Validate the input file
        self.ds_in = get_table_spec( self.ds_in, self.db_context )
        self.ds_in.populate_from_connection()
        self.db_context = self.ds_in.db_context
        self.run_context = self.db_context.runContext
        self.run_context.debug( "processing control file" )
        if not table_exists( self.ds_in ):
            raise ValueError( "Input dataset {} does not exist".format( self.ds_in ) )
        
        # Read the control file
        # SAS 3-9
        if self.use_excel:
            reader = SafeExcelReader( self.run_context, self.agg_ds, self.agg_sheet )
            self.agg_data = [ row for row in reader.getRows() ]
        else:
            self.agg_ds = get_table_spec( self.agg_ds, self.db_context )
            self.agg_data = dump( self.agg_ds )
        
        # Validate the control file columns
        # SAS 10-28
        missing_vars = set()
        for var_name in [ 'outputvar', 'inputvar', 'targetlevel', 'targetid', 'wheret', 'wheret_value', 'parentlevel',
                          'parentid', 'wherep', 'wherep_value', 'critval' ]:
            if var_name not in self.agg_data[0]:
                missing_vars.add( var_name )
        if missing_vars:
            raise ValueError( "TTest control sheet lacks required columns: {:', '}".format( Joiner( missing_vars ) ) )
        
        # Validate existence of requested columns
        # SAS 29-86
        for row in self.agg_data:
            if row.wheret is None:
                row.wheret = []
            else:
                row.wheret = [ x.strip().lower() for x in row.wheret.strip().split( '*' )]
                
            if row.wherep is None:
                row.wherep = []
            else:
                row.wherep = [ x.strip().lower() for x in row.wherep.strip().split( '*' )]
                
            if row.wheret_value is None:
                row.wheret_value = []
            else:
                row.wheret_value = [ x.strip() for x in row.wheret_value.strip().split( ' ' )]
                
            if row.wherep_value is None:
                row.wherep_value = []
            else:
                row.wherep_value = [ x.strip() for x in row.wherep_value.strip().split( ' ' )]
                
            row.inputvar = row.inputvar.lower().strip()
            row.targetid = row.targetid.lower().strip()
            row.parentid = row.parentid.lower().strip()
            row.targetlevel = row.targetlevel.lower().strip()
            row.parentlevel = row.parentlevel.lower().strip()

            for var_name in ( row.wheret
                              + row.wherep
                              + [ row.inputvar, row.targetid, row.parentid ] ):
                if var_name != '' and var_name not in self.ds_in:
                    missing_vars.add( var_name )
        if missing_vars:
            raise ValueError( "TTest input data lacks required variables: {:', '}".format( Joiner( missing_vars ) ) )
        
        # Sort control data
        #SAS 87-90
        self.agg_data.sort( key=lambda row : ( row.targetlevel, row.parentlevel ) )
        
        # Check for consistency across "target" and "parent" variables.
        #SAS 91-222
        last_targetlevel = _NONE_LEVEL
        last_parentlevel = _NONE_LEVEL
        self.target_levels = []
        messages = []
        
        for row in self.agg_data:
            wheret = tuple( row.wheret )
            wheret_value = tuple ( row.wheret_value )
            if len( wheret ) != len( wheret_value ):
                messages.append( 'Number of wheret_value items must match number of wheret items ("{0}" vs "{1}")'.format(
                        row.wheret, row.wheret_value ) )
            if row.targetlevel != last_targetlevel.level:
                last_targetlevel = LevelData( row.targetlevel, row.targetid, wheret, wheret_value )
                self.target_levels.append( last_targetlevel )
                last_parentlevel = _NONE_LEVEL
                
                # Create an output table in which to accumulate the results
                table_name = 'ttest_' + row.targetlevel
                last_targetlevel.output_table = TableSpec( self.db_context, table_name )
                last_targetlevel.output_table.add( self.ds_in[ row.targetid ].clone() )
            else:
                last_targetlevel.check( row.targetid, wheret, messages )
            
            wherep = tuple( row.wherep )
            wherep_value = tuple ( row.wherep_value )
            if len( wherep ) != len( wherep_value ):
                messages.append( 'Number of wherep_value items must match number of wherep items ("{0}" vs "{1}")'.format(
                        row.wherep, row.wherep_value ) )
            if row.parentlevel != last_parentlevel.level:
                last_parentlevel = LevelData( row.parentlevel, row.parentid, wherep, wherep_value )
                last_targetlevel.contents.append( last_parentlevel )
            else:
                last_parentlevel.check( row.parentid, wherep, messages )
                
            last_parentlevel.contents.append( row )
            last_targetlevel.output_table.add( FieldSpec( row.outputvar, 'TINYINT' ) )
            
            try:
                row.critval = float( row.critval )
                if not MIN_CRITVAL <= row.critval <= MAX_CRITVAL:
                    messages.append( "Bad critical value {} is not between {} and {}".format(
                            row.critval, MIN_CRITVAL, MAX_CRITVAL ) )
            except ValueError:
                messages.append( "Critical value {} is not a float".format( row.critval ) )
                
            try:
                row.outputvar = db_identifier_quote( row.outputvar )
            except ValueError:
                messages.append( "Output variable name {} is not a valid database identifier".format( row.outputvar ) )
                
            try:
                row.targetlevel = db_identifier_quote( row.targetlevel )
            except ValueError:
                messages.append( "Target level name {} is not a valid database identifier".format( row.targetlevel ) )
                
        for message in messages:
            self.run_context.error( message )
        
        if messages:
            raise ValueError( "Invalid inputs to ttest macro. See log for details" )
Exemplo n.º 20
0
class Test(unittest.TestCase):
    def setUp(self):
        self.runContext = SuiteContext('unittest')
        self.db_context = self.runContext.getDBContext(tag='unittest')
        clear_all(self.db_context)
        self.reader = SafeExcelReader(self.runContext)
        self.reader.db_context = self.db_context
        self.testDataDir = self.runContext.tests_safe_dir

    def testXLS(self):
        self.reader.filename = os.path.join(self.testDataDir, _XLS_FILE)
        self.reader.sheetName = "Data1"
        self.reader.outputTable = "Data1"
        self.reader.createTable()

        table_spec = self.db_context.getTableSpec('Data1')
        primary_key = table_spec.primary_key
        self.assertEquals(len(primary_key), 1)
        self.assertEquals(primary_key[0].field_name, '[import_order]')

    def testConstructor(self):
        reader = SafeExcelReader(self.runContext,
                                 filename=os.path.join(self.testDataDir,
                                                       _XLS_FILE),
                                 sheet_name='Data1',
                                 db_context=self.runContext.getDBContext(),
                                 output_table='Temp1',
                                 get_names=True,
                                 delimiter=',',
                                 import_order='import_order')
        reader.createTable()

        for name in get_table_names(self.db_context):
            self.assertEqual(
                '[temp1]', name,
                "Found name '{name}' instead of '[temp1]'".format(name=name))

    def testIntoPython(self):
        self.reader.filename = os.path.join(self.testDataDir, _XLS_FILE)
        self.reader.sheetName = "Data1"
        rows = [row for row in self.reader.getRows()]
        self.assertEqual(300, len(rows),
                         'Expected 300 rows, found %d' % len(rows))

    def testCSVIntoDB(self):
        self.reader.filename = os.path.join(self.testDataDir, _CSV_FILE)
        self.reader.outputTable = "CSV1"
        self.reader.scan_all = True
        self.reader.getNames = True
        self.reader.delimiter = "|"
        self.reader.skip = 0
        self.reader.range = (0, 0, 100, 1024)
        self.reader.createTable()
        table_spec = self.db_context.getTableSpec('CSV1')
        for col_name in _CSV_COLUMNS:
            self.assertTrue(col_name in table_spec,
                            "Missing column {}".format(col_name))

        self.assertEquals(n_obs(table_spec), 100,
                          "Wrong number of rows in imported data")

    def test_xlcol(self):
        letters = map(lambda x: self.reader._xlcol(x), [0, 3, 25])
        self.assertTrue(letters == ['A', 'D', 'Z'],
                        'xlcol - translating numbers to letters - failed')

    def test_xlcolnumber(self):
        numbers = map(lambda x: self.reader._xlcolnumber(x), ['A', 'D', 'Z'])
        self.assertTrue(
            numbers == [0, 3, 25],
            'xlcolnumber - translating letters to numbers - failed')