def test_schema_hints(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            schemaHints = OrderedDict()
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        #print schema
        #print map(ColumnSpec.getType, schema)
        self.assertEqual(['VARCHAR(40)', 'VARCHAR(40)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'DATETIME', 'TEXT', 'DATETIME', 'TEXT', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'TEXT', 'INT', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'INT', 'INT', 'INT', 'INT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'TEXT', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)'],
                         map(ColumnSpec.getType, schema))

        self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            schemaHints = OrderedDict({'chunkSize' : ColDataType.INT,
                                                           'length' : ColDataType.INT})
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        self.assertEqual(['VARCHAR(40)', 'VARCHAR(40)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'DATETIME', 'TEXT', 'DATETIME', 'TEXT', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'TEXT', 'INT', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'INT', 'INT', 'INT', 'INT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'TEXT', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)'],
                         map(ColumnSpec.getType, schema))
示例#2
0
    def test_schema_hints(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            schemaHints = OrderedDict()
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        #print schema
        #print map(ColumnSpec.getType, schema)
        self.assertEqual(['VARCHAR(40)', 'VARCHAR(40)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'DATETIME', 'TEXT', 'DATETIME', 'TEXT', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'TEXT', 'INT', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'INT', 'INT', 'INT', 'INT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'TEXT', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)'],
                         map(ColumnSpec.getType, schema))

        self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            schemaHints = OrderedDict({'chunkSize' : ColDataType.INT,
                                                           'length' : ColDataType.INT})
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        self.assertEqual(['VARCHAR(40)', 'VARCHAR(40)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'DATETIME', 'TEXT', 'DATETIME', 'TEXT', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'TEXT', 'INT', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'INT', 'INT', 'INT', 'INT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'TEXT', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)'],
                         map(ColumnSpec.getType, schema))
示例#3
0
 def test_edX_tracking_import(self):
     source = InURI(os.path.join(os.path.dirname(__file__),"data/edxTrackLogSample.json"))
     self.fileConverter = JSONToRelation(source, 
                                         OutputFile("testEdXImport.csv", OutputDisposition.OutputFormat.CSV),
                                         mainTableName='EdxTrackEvent'                                            )
     edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
     self.fileConverter.jsonParserInstance = edxJsonToRelParser
     self.fileConverter.convert(prependColHeader=True)
示例#4
0
 def test_embedded_json_strings_comma_escaping(self):
     source = InURI(os.path.join(os.path.dirname(__file__),"data/tinyEdXTrackLog.json"))
     self.fileConverter = JSONToRelation(source, 
                                         OutputFile("testTinyEdXImport.csv", OutputDisposition.OutputFormat.CSV),
                                         mainTableName='EdxTrackEvent'
                                         )
     edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
     self.fileConverter.jsonParserInstance = edxJsonToRelParser
     self.fileConverter.convert(prependColHeader=True)
示例#5
0
 def test_arrays(self):
     source = InURI(os.path.join(os.path.dirname(__file__),"data/jsonArray.json"))
     self.fileConverter = JSONToRelation(source, 
                                         OutputFile("testArrays.csv", OutputDisposition.OutputFormat.CSV),
                                         mainTableName='EdxTrackEvent'
                                         )
     edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
     edxJsonToRelParser.unittesting = True
     self.fileConverter.jsonParserInstance = edxJsonToRelParser
     self.fileConverter.convert(prependColHeader=True)
示例#6
0
    def test_edX_stress_import(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/tracking.log-20130609.gz"))

        print("Stress test: importing lots...")
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testEdXStressImport.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            progressEvery=10
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert(prependColHeader=True)
        print("Stress test done")
 def test_arrays(self):
     source = InURI(os.path.join(os.path.dirname(__file__),"data/jsonArray.json"))
     self.fileConverter = JSONToRelation(source, 
                                         OutputFile("testArrays.csv"),
                                         outputFormat = OutputDisposition.OutputFormat.CSV,
                                         )
     self.fileConverter.convert(prependColHeader=True)
 def setUp(self):
     super(TestJSONToRelation, self).setUp()
     self.tmpLogFile = tempfile.NamedTemporaryFile()
     self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
     self.fileConverter = JSONToRelation(self.stringSource, 
                                         OutputPipe(OutputDisposition.OutputFormat.CSV),
                                         logFile=self.tmpLogFile.name
                                         )
     # Remove various test output files if it exists:
     try:
         os.remove("testOutput.csv")
     except:
         pass
     try:
         os.remove("testOutputWithHeader.csv")
     except:
         pass
     try:
         os.remove("testArrays.csv")
     except:
         pass
     try:
         os.remove("testTinyEdXImport.csv")
     except:
         pass
     try:
         os.remove("testEdXImport.csv")
     except:
         pass
     try:
         os.remove("testEdXStressImport.csv")
     except:
         pass
 def test_edX_tracking_import(self):
     source = InURI(os.path.join(os.path.dirname(__file__),"data/edxTrackLogSample.json"))
     self.fileConverter = JSONToRelation(source, 
                                         OutputFile("testEdXImport.csv"),
                                         outputFormat = OutputDisposition.OutputFormat.CSV,
                                         )
     self.fileConverter.convert(prependColHeader=True)
 def test_embedded_json_strings_comma_escaping(self):
     source = InURI(os.path.join(os.path.dirname(__file__),"data/tinyEdXTrackLog.json"))
     self.fileConverter = JSONToRelation(source, 
                                         OutputFile("testTinyEdXImport.csv"),
                                         outputFormat = OutputDisposition.OutputFormat.CSV,
                                         )
     self.fileConverter.convert(prependColHeader=True)
 def test_edX_tracking_import(self):
     source = InURI(os.path.join(os.path.dirname(__file__),"data/edxTrackLogSample.json"))
     self.fileConverter = JSONToRelation(source, 
                                         OutputFile("testEdXImport.csv", OutputDisposition.OutputFormat.CSV),
                                         mainTableName='EdxTrackEvent'                                            )
     edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
     self.fileConverter.jsonParserInstance = edxJsonToRelParser
     self.fileConverter.convert(prependColHeader=True)
 def test_embedded_json_strings_comma_escaping(self):
     source = InURI(os.path.join(os.path.dirname(__file__),"data/tinyEdXTrackLog.json"))
     self.fileConverter = JSONToRelation(source, 
                                         OutputFile("testTinyEdXImport.csv", OutputDisposition.OutputFormat.CSV),
                                         mainTableName='EdxTrackEvent'
                                         )
     edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
     self.fileConverter.jsonParserInstance = edxJsonToRelParser
     self.fileConverter.convert(prependColHeader=True)
 def test_arrays(self):
     source = InURI(os.path.join(os.path.dirname(__file__), "data/jsonArray.json"))
     self.fileConverter = JSONToRelation(
         source, OutputFile("testArrays.csv", OutputDisposition.OutputFormat.CSV), mainTableName="EdxTrackEvent"
     )
     edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
     edxJsonToRelParser.unittesting = True
     self.fileConverter.jsonParserInstance = edxJsonToRelParser
     self.fileConverter.convert(prependColHeader=True)
示例#14
0
    def test_simple_json_to_file(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV)
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        with open(os.path.join(self.currDir, 'data/simpleJsonToFileTruth.txt'), 'r') as fd:
            expected = fd.read()
#         expected = "asset,sainani.jpg,HRP258,c4x,Medicine,,image/jpeg,sainani.jpg,262144,/c4x/Medicine/HRP258/asset/sainani.jpg," +\
#                     "22333,,2013-05-08T22:47:09.762Z,,ebcb2a60b0d6b7475c4e9a102b82637b\n" +\
#                     "asset,medstats.png,HRP258,c4x,Medicine,,image/png,medstats.png,262144,/c4x/Medicine/HRP258/asset/medstats.png," +\
#                     "86597,,2013-05-08T22:48:38.174Z,,db47f263ac3532874b8f442ad8937d02"
        if UPDATE_TRUTH:
            self.updateTruthFromFile(os.path.join(self.curDir,'testOutput.csv'), 
                                     os.path.join(self.curDir, 'data/rescueJSONTruth1.txt'))
        else:
            self.assertFileContentEquals(expected, "testOutput.csv")
 def test_simple_json_to_file(self):
     self.fileConverter = JSONToRelation(self.stringSource, 
                                         OutputFile("testOutput.csv"),
                                         outputFormat = OutputDisposition.OutputFormat.CSV,
                                         )
     self.fileConverter.convert()
     expected = "asset,sainani.jpg,HRP258,c4x,Medicine,,image/jpeg,sainani.jpg,262144,/c4x/Medicine/HRP258/asset/sainani.jpg," +\
                 "22333,,2013-05-08T22:47:09.762Z,,ebcb2a60b0d6b7475c4e9a102b82637b\n" +\
                 "asset,medstats.png,HRP258,c4x,Medicine,,image/png,medstats.png,262144,/c4x/Medicine/HRP258/asset/medstats.png," +\
                 "86597,,2013-05-08T22:48:38.174Z,,db47f263ac3532874b8f442ad8937d02"
     self.assertFileContentEquals(expected, "testOutput.csv")
    def test_edX_stress_import(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/tracking.log-20130609.gz"))

        print("Stress test: importing lots...")
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testEdXStressImport.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            progressEvery=10
                                            )
        self.fileConverter.convert(prependColHeader=True)
        print("Stress test done")
 def test_json_to_file_with_col_header(self):
     self.fileConverter = JSONToRelation(self.stringSource, 
                                         OutputFile("testOutputWithHeader.csv"),
                                         outputFormat = OutputDisposition.OutputFormat.CSV,
                                         )
     self.fileConverter.convert(prependColHeader=True)
     expected = '"""_id.category""","""_id.name""","""_id.course""","""_id.tag""","""_id.org""","""_id.revision""",contentType,' +\
                'displayname,chunkSize,filename,length,import_path,"""uploadDate.$date""",thumbnail_location,md5\n' +\
                 "asset,sainani.jpg,HRP258,c4x,Medicine,,image/jpeg,sainani.jpg,262144,/c4x/Medicine/HRP258/asset/sainani.jpg," +\
                 "22333,,2013-05-08T22:47:09.762Z,,ebcb2a60b0d6b7475c4e9a102b82637b\n" +\
                 "asset,medstats.png,HRP258,c4x,Medicine,,image/png,medstats.png,262144,/c4x/Medicine/HRP258/asset/medstats.png," +\
                 "86597,,2013-05-08T22:48:38.174Z,,db47f263ac3532874b8f442ad8937d02"
     self.assertFileContentEquals(expected, "testOutputWithHeader.csv")
    def test_edX_stress_import(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/tracking.log-20130609.gz"))

        print("Stress test: importing lots...")
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testEdXStressImport.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            progressEvery=10
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert(prependColHeader=True)
        print("Stress test done")
示例#19
0
 def setUp(self):
     super(TestJSONToRelation, self).setUp()
     self.currDir = os.path.dirname(__file__)
     self.tmpLogFile = tempfile.NamedTemporaryFile()
     self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
     self.fileConverter = JSONToRelation(self.stringSource, 
                                         OutputPipe(OutputDisposition.OutputFormat.CSV),
                                         mainTableName='EdxTrackEvent',
                                         logFile=self.tmpLogFile.name
                                         )
     edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
     self.fileConverter.jsonParserInstance = edxJsonToRelParser
     # Remove various test output files if it exists:
     try:
         os.remove("testOutput.csv")
     except:
         pass
     try:
         os.remove("testOutputWithHeader.csv")
     except:
         pass
     try:
         os.remove("testArrays.csv")
     except:
         pass
     try:
         os.remove("testTinyEdXImport.csv")
     except:
         pass
     try:
         os.remove("testEdXImport.csv")
     except:
         pass
     try:
         os.remove("testEdXStressImport.csv")
     except:
         pass
    def test_schema_hints(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            schemaHints = OrderedDict()
                                            )
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        #print schema
        #print map(ColumnSpec.getType, schema)
        self.assertEqual(['TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'DOUBLE', 'TEXT', 'DOUBLE', 'TEXT', 'TEXT', 'TEXT', 'TEXT'],
                         map(ColumnSpec.getType, schema))

        self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            schemaHints = OrderedDict({'chunkSize' : ColDataType.INT,
                                                           'length' : ColDataType.INT})
                                            )
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        self.assertEqual(['TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'INT', 'TEXT', 'INT', 'TEXT', 'TEXT', 'TEXT', 'TEXT'],
                         map(ColumnSpec.getType, schema))
def convert(inFilePath, destDir, targetFormat, dropTables=False):
    # Output file is name of input file with the
    # .json extension replaced by .sql
    outFullPath = buildOutputFileName(inFilePath, destDir)

    # Log file will go to <destDir>/../TransformLogs, the file being named j2s_<inputFileName>.log:
    logDir = os.path.join(destDir, '..', 'TransformLogs')
    if not os.access(logDir, os.W_OK):
        try:
            os.makedirs(logDir)
        except OSError:
            # Log dir already exists:
            pass

    logFile = os.path.join(logDir, 'j2s_%s.log' % os.path.basename(inFilePath))

    # Create an instance of JSONToRelation, taking input from the given file:
    # and pumping output to the given output path:
    if targetFormat == 'csv':
        outputFormat = OutputDisposition.OutputFormat.CSV
    elif targetFormat == 'sql_dump':
        outputFormat = OutputDisposition.OutputFormat.SQL_INSERT_STATEMENTS
    else:
        outputFormat = OutputDisposition.OutputFormat.SQL_INSERTS_AND_CSV

    outSQLFile = OutputFile(outFullPath, outputFormat,
                            options='wb')  # overwrite any existing sql file
    jsonConverter = JSONToRelation(InURI(inFilePath),
                                   outSQLFile,
                                   mainTableName='EdxTrackEvent',
                                   logFile=logFile,
                                   progressEvery=10000)
    try:
        jsonConverter.setParser(
            EdXTrackLogJSONParser(jsonConverter,
                                  'EdxTrackEvent',
                                  replaceTables=dropTables,
                                  dbName='Edx',
                                  progressEvery=10000))
    except Exception as e:
        with open(logFile, 'w') as fd:
            fd.write(
                "In json2sql: could not create EdXTrackLogJSONParser: %s" %
                ` e `)
        # Try to delete the .sql file that was created when
        # the OutputFile instance was made in the JSONToRelation
        # instantiation statement above:
        try:
            outSQLFile.remove()
        except Exception as e:
            pass
        sys.exit(1)

    jsonConverter.convert()
    def test_simple_json_to_file(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV)
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        with open(os.path.join(self.currDir, 'data/simpleJsonToFileTruth.txt'), 'r') as fd:
            expected = fd.read()
#         expected = "asset,sainani.jpg,HRP258,c4x,Medicine,,image/jpeg,sainani.jpg,262144,/c4x/Medicine/HRP258/asset/sainani.jpg," +\
#                     "22333,,2013-05-08T22:47:09.762Z,,ebcb2a60b0d6b7475c4e9a102b82637b\n" +\
#                     "asset,medstats.png,HRP258,c4x,Medicine,,image/png,medstats.png,262144,/c4x/Medicine/HRP258/asset/medstats.png," +\
#                     "86597,,2013-05-08T22:48:38.174Z,,db47f263ac3532874b8f442ad8937d02"
        if UPDATE_TRUTH:
            self.updateTruthFromFile(os.path.join(self.curDir,'testOutput.csv'), 
                                     os.path.join(self.curDir, 'data/rescueJSONTruth1.txt'))
        else:
            self.assertFileContentEquals(expected, "testOutput.csv")
 def setUp(self):
     super(TestJSONToRelation, self).setUp()
     self.currDir = os.path.dirname(__file__)
     self.tmpLogFile = tempfile.NamedTemporaryFile()
     self.stringSource = InURI(os.path.join(os.path.dirname(__file__), "data/twoJSONRecords.json"))
     self.fileConverter = JSONToRelation(
         self.stringSource,
         OutputPipe(OutputDisposition.OutputFormat.CSV),
         mainTableName="EdxTrackEvent",
         logFile=self.tmpLogFile.name,
     )
     edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
     self.fileConverter.jsonParserInstance = edxJsonToRelParser
     # Remove various test output files if it exists:
     try:
         os.remove("testOutput.csv")
     except:
         pass
     try:
         os.remove("testOutputWithHeader.csv")
     except:
         pass
     try:
         os.remove("testArrays.csv")
     except:
         pass
     try:
         os.remove("testTinyEdXImport.csv")
     except:
         pass
     try:
         os.remove("testEdXImport.csv")
     except:
         pass
     try:
         os.remove("testEdXStressImport.csv")
     except:
         pass
示例#24
0
class TestJSONToRelation(unittest.TestCase):
    
    def setUp(self):
        super(TestJSONToRelation, self).setUp()
        self.currDir = os.path.dirname(__file__)
        self.tmpLogFile = tempfile.NamedTemporaryFile()
        self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputPipe(OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            logFile=self.tmpLogFile.name
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        # Remove various test output files if it exists:
        try:
            os.remove("testOutput.csv")
        except:
            pass
        try:
            os.remove("testOutputWithHeader.csv")
        except:
            pass
        try:
            os.remove("testArrays.csv")
        except:
            pass
        try:
            os.remove("testTinyEdXImport.csv")
        except:
            pass
        try:
            os.remove("testEdXImport.csv")
        except:
            pass
        try:
            os.remove("testEdXStressImport.csv")
        except:
            pass
        

        
    def tearDown(self):
        super(TestJSONToRelation, self).tearDown()
        self.tmpLogFile.close()
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_ensure_mysql_identifier_legal(self):

        # Vanilla, legal name
        newName = self.fileConverter.ensureLegalIdentifierChars("foo")
        self.assertEqual("foo", newName)

        # Comma in name requires quoting
        newName = self.fileConverter.ensureLegalIdentifierChars("foo,")
        self.assertEqual('"foo,"', newName)
        
        # Embedded single quotes:
        newName = self.fileConverter.ensureLegalIdentifierChars("fo'o")
        self.assertEqual('"fo\'o"', newName)

        # Embedded double quotes:
        newName = self.fileConverter.ensureLegalIdentifierChars('fo"o')
        self.assertEqual("'fo\"o'", newName)

        # Embedded double and single quotes:
        newName = self.fileConverter.ensureLegalIdentifierChars('fo"o\'bar')
        self.assertEqual("'fo\"o\'\'bar'", newName)

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")        
    def test_simple_json(self):
        # Prints output to display, which we can't catch without
        # fuzzing with stdout. So just ensure no error happens: 
        self.fileConverter.convert()

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_simple_json_to_file(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV)
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        with open(os.path.join(self.currDir, 'data/simpleJsonToFileTruth.txt'), 'r') as fd:
            expected = fd.read()
#         expected = "asset,sainani.jpg,HRP258,c4x,Medicine,,image/jpeg,sainani.jpg,262144,/c4x/Medicine/HRP258/asset/sainani.jpg," +\
#                     "22333,,2013-05-08T22:47:09.762Z,,ebcb2a60b0d6b7475c4e9a102b82637b\n" +\
#                     "asset,medstats.png,HRP258,c4x,Medicine,,image/png,medstats.png,262144,/c4x/Medicine/HRP258/asset/medstats.png," +\
#                     "86597,,2013-05-08T22:48:38.174Z,,db47f263ac3532874b8f442ad8937d02"
        if UPDATE_TRUTH:
            self.updateTruthFromFile(os.path.join(self.curDir,'testOutput.csv'), 
                                     os.path.join(self.curDir, 'data/rescueJSONTruth1.txt'))
        else:
            self.assertFileContentEquals(expected, "testOutput.csv")

#     @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
#     def test_json_to_file_with_col_header(self):
#         self.fileConverter = JSONToRelation(self.stringSource, 
#                                             OutputFile("testOutputWithHeader.csv", OutputDisposition.OutputFormat.CSV),
#                                             mainTableName='EdxTrackEvent'
#                                             )
#         edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
#         edxJsonToRelParser.unittesting = True
#         self.fileConverter.jsonParserInstance = edxJsonToRelParser
#         self.fileConverter.convert(prependColHeader=True)
#         expected = '"""_id.category""","""_id.name""","""_id.course""","""_id.tag""","""_id.org""","""_id.revision""",contentType,' +\
#                    'displayname,chunkSize,filename,length,import_path,"""uploadDate.$date""",thumbnail_location,md5\n' +\
#                     "asset,sainani.jpg,HRP258,c4x,Medicine,,image/jpeg,sainani.jpg,262144,/c4x/Medicine/HRP258/asset/sainani.jpg," +\
#                     "22333,,2013-05-08T22:47:09.762Z,,ebcb2a60b0d6b7475c4e9a102b82637b\n" +\
#                     "asset,medstats.png,HRP258,c4x,Medicine,,image/png,medstats.png,262144,/c4x/Medicine/HRP258/asset/medstats.png," +\
#                     "86597,,2013-05-08T22:48:38.174Z,,db47f263ac3532874b8f442ad8937d02"
#         *******self.assertFileContentEquals(expected, "testOutputWithHeader.csv")


    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_arrays(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/jsonArray.json"))
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testArrays.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent'
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        edxJsonToRelParser.unittesting = True
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert(prependColHeader=True)


    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_embedded_json_strings_comma_escaping(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/tinyEdXTrackLog.json"))
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testTinyEdXImport.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent'
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert(prependColHeader=True)
    
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_edX_tracking_import(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/edxTrackLogSample.json"))
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testEdXImport.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent'                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert(prependColHeader=True)

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_edX_stress_import(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/tracking.log-20130609.gz"))

        print("Stress test: importing lots...")
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testEdXStressImport.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            progressEvery=10
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert(prependColHeader=True)
        print("Stress test done")
        

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_schema_hints(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            schemaHints = OrderedDict()
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        #print schema
        #print map(ColumnSpec.getType, schema)
        self.assertEqual(['VARCHAR(40)', 'VARCHAR(40)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'DATETIME', 'TEXT', 'DATETIME', 'TEXT', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'TEXT', 'INT', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'INT', 'INT', 'INT', 'INT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'TEXT', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)'],
                         map(ColumnSpec.getType, schema))

        self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            schemaHints = OrderedDict({'chunkSize' : ColDataType.INT,
                                                           'length' : ColDataType.INT})
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        self.assertEqual(['VARCHAR(40)', 'VARCHAR(40)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'DATETIME', 'TEXT', 'DATETIME', 'TEXT', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'TEXT', 'INT', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'INT', 'INT', 'INT', 'INT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'TEXT', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)'],
                         map(ColumnSpec.getType, schema))


    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testInsertStatementConstruction(self):
        
        # No value array in hold-back buffer:
        self.fileConverter.currInsertSig = 'col1, col2'
        self.fileConverter.currOutTable = 'TestTable'
        self.fileConverter.currValsArray = []
        self.assertIsNone(self.fileConverter.finalizeInsertStatement())

        # One value array in hold-back buffer:
        self.fileConverter.currInsertSig = 'col1, col2'
        self.fileConverter.currOutTable = 'MyTable'
        self.fileConverter.currValsArray = [['foo', 10]]
        res = self.fileConverter.finalizeInsertStatement()
        #print res
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('foo',10);", res)
        
        self.fileConverter.currInsertSig = 'col1, col2'
        self.fileConverter.currOutTable = 'MyTable'
        self.fileConverter.currValsArray = [['foo', 10], ['bar', 20]]
        res = self.fileConverter.finalizeInsertStatement()
        #print res
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('foo',10),\n    ('bar',20);", res)
        
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testPrepareMySQLRow(self):
        
        # Pretend to be the edx parser, sending one insert's worth of
        # info. This will fit into the hold-back buffer, and return None:
        insertInfo = ('MyTable', 'col1, col2', [['foo', 10],['bar',20]])
        #print(res)
        self.assertEqual('LoadInfo', self.fileConverter.currOutTable)
        self.assertEqual('load_info_id,load_date_time,load_file', self.fileConverter.currInsertSig)
        currValsArr = self.fileConverter.currValsArray
        self.assertEqual('d4e622ff221b1eec00405f1b69893ff544ac5d75', currValsArr[0][0])
        self.assertEqual('file:///home/paepcke/EclipseWorkspaces/json_to_relation/json_to_relation/test/data/twoJSONRecords.json', currValsArr[0][2])
        
        # Lower the allowed MySQL packet size to force immediate creation of INSERT statement:
        self.fileConverter.__class__.MAX_ALLOWED_PACKET_SIZE = 3
        self.fileConverter.currInsertSig = None
        self.fileConverter.currOutTable = None
        self.fileConverter.currValsArray = []
        res = self.fileConverter.prepareMySQLRow(insertInfo)
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('foo',10),\n    ('bar',20);", res)

        # Set the allowed MySQL packet size to allow a first INSERT statement to be 
        # held back, but a second call must trigger sending of the held back
        # values, holding back the newly submitted values: 
        
        # First call:
        self.fileConverter.__class__.MAX_ALLOWED_PACKET_SIZE = 211
        self.fileConverter.currInsertSig = None
        self.fileConverter.currOutTable = None
        self.fileConverter.currValsArray = []
        res = self.fileConverter.prepareMySQLRow(insertInfo)
        self.assertIsNone(res)
        self.assertEqual('MyTable', self.fileConverter.currOutTable)
        self.assertEqual('col1, col2', self.fileConverter.currInsertSig)
        self.assertEqual([[['foo', 10], ['bar', 20]]], self.fileConverter.currValsArray)

        # Second call:
        insertMoreInfo = ('MyTable', 'col1, col2', [['blue', 30.1],['green',40.99]])
        res = self.fileConverter.prepareMySQLRow(insertMoreInfo)
        # Should have insert statement for the prior submission...
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    (['foo', 10],['bar', 20]);", res)
        # ... and the hold-back buffer should have the new submission:
        self.assertEqual('MyTable', self.fileConverter.currOutTable)
        self.assertEqual('col1, col2', self.fileConverter.currInsertSig)
        self.assertEqual([[['blue', 30.1], ['green', 40.99]]], self.fileConverter.currValsArray)
         
        # Call FLUSH to get the held-back values:
        res = self.fileConverter.prepareMySQLRow('FLUSH')
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    (['blue', 30.1],['green', 40.99]);", res)
        self.assertIsNone(self.fileConverter.currOutTable)
        self.assertIsNone(self.fileConverter.currInsertSig)
        self.assertEqual([], self.fileConverter.currValsArray)
        
#--------------------------------------------------------------------------------------------------    
    def assertFileContentEquals(self, expected, filePath):
        strFile = StringIO.StringIO(expected)
        with open(filePath, 'r') as fd:
            for fileLine in fd:
                expectedLine = strFile.readline()
                self.assertEqual(expectedLine.strip(), fileLine.strip())
            
            if strFile.readline() != "":
                # expected is longer than what's in the file:
                self.fail("Expected string is longer than content of output file %s" % filePath)
        
    def updateTruthFromFile(self, referenceFileName, actualTruthFileName):
        '''
        Copy actualTruthFileName content to referenceFileName. 
        ReferenceFileName is a ground truth file to which test run
        results are compared. When the code changes so much that editing
        that ground truth file to match the new correct output of the
        test, then accept the test output as the future ground truth.
        :param referenceFileName: File to be overwritten
        :type referenceFileName: String
        :param actualTruthFileName: Source file
        :type actualTruthFileName: String
        '''
        shutil.copyfile(actualTruthFileName, referenceFileName)
class TestJSONToRelation(unittest.TestCase):
    
    def setUp(self):
        super(TestJSONToRelation, self).setUp()
        self.tmpLogFile = tempfile.NamedTemporaryFile()
        self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputPipe(OutputDisposition.OutputFormat.CSV),
                                            logFile=self.tmpLogFile.name
                                            )
        # Remove various test output files if it exists:
        try:
            os.remove("testOutput.csv")
        except:
            pass
        try:
            os.remove("testOutputWithHeader.csv")
        except:
            pass
        try:
            os.remove("testArrays.csv")
        except:
            pass
        try:
            os.remove("testTinyEdXImport.csv")
        except:
            pass
        try:
            os.remove("testEdXImport.csv")
        except:
            pass
        try:
            os.remove("testEdXStressImport.csv")
        except:
            pass
        

        
    def tearDown(self):
        super(TestJSONToRelation, self).tearDown()
        self.tmpLogFile.close()
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_ensure_mysql_identifier_legal(self):

        # Vanilla, legal name
        newName = self.fileConverter.ensureLegalIdentifierChars("foo")
        self.assertEqual("foo", newName)

        # Comma in name requires quoting
        newName = self.fileConverter.ensureLegalIdentifierChars("foo,")
        self.assertEqual('"foo,"', newName)
        
        # Embedded single quotes:
        newName = self.fileConverter.ensureLegalIdentifierChars("fo'o")
        self.assertEqual('"fo\'o"', newName)

        # Embedded double quotes:
        newName = self.fileConverter.ensureLegalIdentifierChars('fo"o')
        self.assertEqual("'fo\"o'", newName)

        # Embedded double and single quotes:
        newName = self.fileConverter.ensureLegalIdentifierChars('fo"o\'bar')
        self.assertEqual("'fo\"o\'\'bar'", newName)

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")        
    def test_simple_json(self):
        # Prints output to display, which we can't catch without
        # fuzzing with stdout. So just ensure no error happens: 
        self.fileConverter.convert()

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_simple_json_to_file(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            )
        self.fileConverter.convert()
        expected = "asset,sainani.jpg,HRP258,c4x,Medicine,,image/jpeg,sainani.jpg,262144,/c4x/Medicine/HRP258/asset/sainani.jpg," +\
                    "22333,,2013-05-08T22:47:09.762Z,,ebcb2a60b0d6b7475c4e9a102b82637b\n" +\
                    "asset,medstats.png,HRP258,c4x,Medicine,,image/png,medstats.png,262144,/c4x/Medicine/HRP258/asset/medstats.png," +\
                    "86597,,2013-05-08T22:48:38.174Z,,db47f263ac3532874b8f442ad8937d02"
        self.assertFileContentEquals(expected, "testOutput.csv")

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_json_to_file_with_col_header(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutputWithHeader.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            )
        self.fileConverter.convert(prependColHeader=True)
        expected = '"""_id.category""","""_id.name""","""_id.course""","""_id.tag""","""_id.org""","""_id.revision""",contentType,' +\
                   'displayname,chunkSize,filename,length,import_path,"""uploadDate.$date""",thumbnail_location,md5\n' +\
                    "asset,sainani.jpg,HRP258,c4x,Medicine,,image/jpeg,sainani.jpg,262144,/c4x/Medicine/HRP258/asset/sainani.jpg," +\
                    "22333,,2013-05-08T22:47:09.762Z,,ebcb2a60b0d6b7475c4e9a102b82637b\n" +\
                    "asset,medstats.png,HRP258,c4x,Medicine,,image/png,medstats.png,262144,/c4x/Medicine/HRP258/asset/medstats.png," +\
                    "86597,,2013-05-08T22:48:38.174Z,,db47f263ac3532874b8f442ad8937d02"
        self.assertFileContentEquals(expected, "testOutputWithHeader.csv")


    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_arrays(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/jsonArray.json"))
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testArrays.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            )
        self.fileConverter.convert(prependColHeader=True)


    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_embedded_json_strings_comma_escaping(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/tinyEdXTrackLog.json"))
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testTinyEdXImport.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            )
        self.fileConverter.convert(prependColHeader=True)
    
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_edX_tracking_import(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/edxTrackLogSample.json"))
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testEdXImport.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            )
        self.fileConverter.convert(prependColHeader=True)

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_edX_stress_import(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/tracking.log-20130609.gz"))

        print("Stress test: importing lots...")
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testEdXStressImport.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            progressEvery=10
                                            )
        self.fileConverter.convert(prependColHeader=True)
        print("Stress test done")
        

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_schema_hints(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            schemaHints = OrderedDict()
                                            )
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        #print schema
        #print map(ColumnSpec.getType, schema)
        self.assertEqual(['TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'DOUBLE', 'TEXT', 'DOUBLE', 'TEXT', 'TEXT', 'TEXT', 'TEXT'],
                         map(ColumnSpec.getType, schema))

        self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            schemaHints = OrderedDict({'chunkSize' : ColDataType.INT,
                                                           'length' : ColDataType.INT})
                                            )
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        self.assertEqual(['TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'INT', 'TEXT', 'INT', 'TEXT', 'TEXT', 'TEXT', 'TEXT'],
                         map(ColumnSpec.getType, schema))


    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testInsertStatementConstruction(self):
        
        # No value array in hold-back buffer:
        self.fileConverter.currInsertSig = 'col1, col2'
        self.fileConverter.currOutTable = 'TestTable'
        self.fileConverter.currValsArray = []
        try:
            self.fileConverter.finalizeInsertStatement()
            self.fail("Expected ValueError for empty hold-back buffer")
        except ValueError:
            pass

        # One value array in hold-back buffer:
        self.fileConverter.currInsertSig = 'col1, col2'
        self.fileConverter.currOutTable = 'MyTable'
        self.fileConverter.currValsArray = [['foo', 10]]
        res = self.fileConverter.finalizeInsertStatement()
        #print res
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('foo',10);", res)
        
        self.fileConverter.currInsertSig = 'col1, col2'
        self.fileConverter.currOutTable = 'MyTable'
        self.fileConverter.currValsArray = [['foo', 10], ['bar', 20]]
        res = self.fileConverter.finalizeInsertStatement()
        #print res
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('foo',10),\n    ('bar',20);", res)
        
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testPrepareMySQLRow(self):
        
        # Pretend to be the edx parser, sending one insert's worth of
        # info. This will fit into the hold-back buffer, and return None:
        insertInfo = ('MyTable', 'col1, col2', [['foo', 10],['bar',20]])
        res = self.fileConverter.prepareMySQLRow(insertInfo)
        self.assertIsNone(res)
        self.assertEqual('MyTable', self.fileConverter.currOutTable)
        self.assertEqual('col1, col2', self.fileConverter.currInsertSig)
        self.assertEqual([['foo', 10], ['bar', 20]], self.fileConverter.currValsArray)
        
        # Lower the allowed MySQL packet size to force immediate creation of INSERT statement:
        self.fileConverter.__class__.MAX_ALLOWED_PACKET_SIZE = 3
        self.fileConverter.currInsertSig = None
        self.fileConverter.currOutTable = None
        self.fileConverter.currValsArray = []
        res = self.fileConverter.prepareMySQLRow(insertInfo)
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('foo',10),\n    ('bar',20);", res)

        # Set the allowed MySQL packet size to allow a first INSERT statement to be 
        # held back, but a second call must trigger sending of the held back
        # values, holding back the newly submitted values: 
        
        # First call:
        self.fileConverter.__class__.MAX_ALLOWED_PACKET_SIZE = 60
        self.fileConverter.currInsertSig = None
        self.fileConverter.currOutTable = None
        self.fileConverter.currValsArray = []
        res = self.fileConverter.prepareMySQLRow(insertInfo)
        self.assertIsNone(res)
        self.assertEqual('MyTable', self.fileConverter.currOutTable)
        self.assertEqual('col1, col2', self.fileConverter.currInsertSig)
        self.assertEqual([['foo', 10], ['bar', 20]], self.fileConverter.currValsArray)

        # Second call:
        insertMoreInfo = ('MyTable', 'col1, col2', [['blue', 30.1],['green',40.99]])
        res = self.fileConverter.prepareMySQLRow(insertMoreInfo)
        # Should have insert statement for the prior submission...
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('foo',10),\n    ('bar',20);", res)
        # ... and the hold-back buffer should have the new submission:
        self.assertEqual('MyTable', self.fileConverter.currOutTable)
        self.assertEqual('col1, col2', self.fileConverter.currInsertSig)
        self.assertEqual([['blue', 30.1], ['green', 40.99]], self.fileConverter.currValsArray)
         
        # Call FLUSH to get the held-back values:
        res = self.fileConverter.prepareMySQLRow('FLUSH')
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('blue',30.1),\n    ('green',40.99);", res)
        self.assertIsNone(self.fileConverter.currOutTable)
        self.assertIsNone(self.fileConverter.currInsertSig)
        self.assertEqual([], self.fileConverter.currValsArray)
        
#--------------------------------------------------------------------------------------------------    
    def assertFileContentEquals(self, expected, filePath):
        strFile = StringIO.StringIO(expected)
        with open(filePath, 'r') as fd:
            for fileLine in fd:
                expectedLine = strFile.readline()
                self.assertEqual(expectedLine.strip(), fileLine.strip())
            
            if strFile.readline() != "":
                # expected is longer than what's in the file:
                self.fail("Expected string is longer than content of output file %s" % filePath)
class TestJSONToRelation(unittest.TestCase):
    
    def setUp(self):
        super(TestJSONToRelation, self).setUp()
        self.currDir = os.path.dirname(__file__)
        self.tmpLogFile = tempfile.NamedTemporaryFile()
        self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputPipe(OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            logFile=self.tmpLogFile.name
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        # Remove various test output files if it exists:
        try:
            os.remove("testOutput.csv")
        except:
            pass
        try:
            os.remove("testOutputWithHeader.csv")
        except:
            pass
        try:
            os.remove("testArrays.csv")
        except:
            pass
        try:
            os.remove("testTinyEdXImport.csv")
        except:
            pass
        try:
            os.remove("testEdXImport.csv")
        except:
            pass
        try:
            os.remove("testEdXStressImport.csv")
        except:
            pass
        

        
    def tearDown(self):
        super(TestJSONToRelation, self).tearDown()
        self.tmpLogFile.close()
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_ensure_mysql_identifier_legal(self):

        # Vanilla, legal name
        newName = self.fileConverter.ensureLegalIdentifierChars("foo")
        self.assertEqual("foo", newName)

        # Comma in name requires quoting
        newName = self.fileConverter.ensureLegalIdentifierChars("foo,")
        self.assertEqual('"foo,"', newName)
        
        # Embedded single quotes:
        newName = self.fileConverter.ensureLegalIdentifierChars("fo'o")
        self.assertEqual('"fo\'o"', newName)

        # Embedded double quotes:
        newName = self.fileConverter.ensureLegalIdentifierChars('fo"o')
        self.assertEqual("'fo\"o'", newName)

        # Embedded double and single quotes:
        newName = self.fileConverter.ensureLegalIdentifierChars('fo"o\'bar')
        self.assertEqual("'fo\"o\'\'bar'", newName)

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")        
    def test_simple_json(self):
        # Prints output to display, which we can't catch without
        # fuzzing with stdout. So just ensure no error happens: 
        self.fileConverter.convert()

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_simple_json_to_file(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV)
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        with open(os.path.join(self.currDir, 'data/simpleJsonToFileTruth.txt'), 'r') as fd:
            expected = fd.read()
#         expected = "asset,sainani.jpg,HRP258,c4x,Medicine,,image/jpeg,sainani.jpg,262144,/c4x/Medicine/HRP258/asset/sainani.jpg," +\
#                     "22333,,2013-05-08T22:47:09.762Z,,ebcb2a60b0d6b7475c4e9a102b82637b\n" +\
#                     "asset,medstats.png,HRP258,c4x,Medicine,,image/png,medstats.png,262144,/c4x/Medicine/HRP258/asset/medstats.png," +\
#                     "86597,,2013-05-08T22:48:38.174Z,,db47f263ac3532874b8f442ad8937d02"
        if UPDATE_TRUTH:
            self.updateTruthFromFile(os.path.join(self.curDir,'testOutput.csv'), 
                                     os.path.join(self.curDir, 'data/rescueJSONTruth1.txt'))
        else:
            self.assertFileContentEquals(expected, "testOutput.csv")

#     @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
#     def test_json_to_file_with_col_header(self):
#         self.fileConverter = JSONToRelation(self.stringSource, 
#                                             OutputFile("testOutputWithHeader.csv", OutputDisposition.OutputFormat.CSV),
#                                             mainTableName='EdxTrackEvent'
#                                             )
#         edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
#         edxJsonToRelParser.unittesting = True
#         self.fileConverter.jsonParserInstance = edxJsonToRelParser
#         self.fileConverter.convert(prependColHeader=True)
#         expected = '"""_id.category""","""_id.name""","""_id.course""","""_id.tag""","""_id.org""","""_id.revision""",contentType,' +\
#                    'displayname,chunkSize,filename,length,import_path,"""uploadDate.$date""",thumbnail_location,md5\n' +\
#                     "asset,sainani.jpg,HRP258,c4x,Medicine,,image/jpeg,sainani.jpg,262144,/c4x/Medicine/HRP258/asset/sainani.jpg," +\
#                     "22333,,2013-05-08T22:47:09.762Z,,ebcb2a60b0d6b7475c4e9a102b82637b\n" +\
#                     "asset,medstats.png,HRP258,c4x,Medicine,,image/png,medstats.png,262144,/c4x/Medicine/HRP258/asset/medstats.png," +\
#                     "86597,,2013-05-08T22:48:38.174Z,,db47f263ac3532874b8f442ad8937d02"
#         *******self.assertFileContentEquals(expected, "testOutputWithHeader.csv")


    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_arrays(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/jsonArray.json"))
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testArrays.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent'
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        edxJsonToRelParser.unittesting = True
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert(prependColHeader=True)


    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_embedded_json_strings_comma_escaping(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/tinyEdXTrackLog.json"))
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testTinyEdXImport.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent'
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert(prependColHeader=True)
    
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_edX_tracking_import(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/edxTrackLogSample.json"))
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testEdXImport.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent'                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert(prependColHeader=True)

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_edX_stress_import(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/tracking.log-20130609.gz"))

        print("Stress test: importing lots...")
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testEdXStressImport.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            progressEvery=10
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert(prependColHeader=True)
        print("Stress test done")
        

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_schema_hints(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            schemaHints = OrderedDict()
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        #print schema
        #print map(ColumnSpec.getType, schema)
        self.assertEqual(['VARCHAR(40)', 'VARCHAR(40)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'DATETIME', 'TEXT', 'DATETIME', 'TEXT', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'TEXT', 'INT', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'INT', 'INT', 'INT', 'INT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'TEXT', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)'],
                         map(ColumnSpec.getType, schema))

        self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            schemaHints = OrderedDict({'chunkSize' : ColDataType.INT,
                                                           'length' : ColDataType.INT})
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        self.assertEqual(['VARCHAR(40)', 'VARCHAR(40)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'DATETIME', 'TEXT', 'DATETIME', 'TEXT', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'TEXT', 'INT', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'INT', 'INT', 'INT', 'INT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'TEXT', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)'],
                         map(ColumnSpec.getType, schema))


    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testInsertStatementConstruction(self):
        
        # No value array in hold-back buffer:
        self.fileConverter.currInsertSig = 'col1, col2'
        self.fileConverter.currOutTable = 'TestTable'
        self.fileConverter.currValsArray = []
        self.assertIsNone(self.fileConverter.finalizeInsertStatement())

        # One value array in hold-back buffer:
        self.fileConverter.currInsertSig = 'col1, col2'
        self.fileConverter.currOutTable = 'MyTable'
        self.fileConverter.currValsArray = [['foo', 10]]
        res = self.fileConverter.finalizeInsertStatement()
        #print res
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('foo',10);", res)
        
        self.fileConverter.currInsertSig = 'col1, col2'
        self.fileConverter.currOutTable = 'MyTable'
        self.fileConverter.currValsArray = [['foo', 10], ['bar', 20]]
        res = self.fileConverter.finalizeInsertStatement()
        #print res
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('foo',10),\n    ('bar',20);", res)
        
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testPrepareMySQLRow(self):
        
        # Pretend to be the edx parser, sending one insert's worth of
        # info. This will fit into the hold-back buffer, and return None:
        insertInfo = ('MyTable', 'col1, col2', [['foo', 10],['bar',20]])
        #print(res)
        self.assertEqual('LoadInfo', self.fileConverter.currOutTable)
        self.assertEqual('load_info_id,load_date_time,load_file', self.fileConverter.currInsertSig)
        currValsArr = self.fileConverter.currValsArray
        self.assertEqual('d4e622ff221b1eec00405f1b69893ff544ac5d75', currValsArr[0][0])
        self.assertEqual('file:///home/paepcke/EclipseWorkspaces/json_to_relation/json_to_relation/test/data/twoJSONRecords.json', currValsArr[0][2])
        
        # Lower the allowed MySQL packet size to force immediate creation of INSERT statement:
        self.fileConverter.__class__.MAX_ALLOWED_PACKET_SIZE = 3
        self.fileConverter.currInsertSig = None
        self.fileConverter.currOutTable = None
        self.fileConverter.currValsArray = []
        res = self.fileConverter.prepareMySQLRow(insertInfo)
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('foo',10),\n    ('bar',20);", res)

        # Set the allowed MySQL packet size to allow a first INSERT statement to be 
        # held back, but a second call must trigger sending of the held back
        # values, holding back the newly submitted values: 
        
        # First call:
        self.fileConverter.__class__.MAX_ALLOWED_PACKET_SIZE = 211
        self.fileConverter.currInsertSig = None
        self.fileConverter.currOutTable = None
        self.fileConverter.currValsArray = []
        res = self.fileConverter.prepareMySQLRow(insertInfo)
        self.assertIsNone(res)
        self.assertEqual('MyTable', self.fileConverter.currOutTable)
        self.assertEqual('col1, col2', self.fileConverter.currInsertSig)
        self.assertEqual([[['foo', 10], ['bar', 20]]], self.fileConverter.currValsArray)

        # Second call:
        insertMoreInfo = ('MyTable', 'col1, col2', [['blue', 30.1],['green',40.99]])
        res = self.fileConverter.prepareMySQLRow(insertMoreInfo)
        # Should have insert statement for the prior submission...
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    (['foo', 10],['bar', 20]);", res)
        # ... and the hold-back buffer should have the new submission:
        self.assertEqual('MyTable', self.fileConverter.currOutTable)
        self.assertEqual('col1, col2', self.fileConverter.currInsertSig)
        self.assertEqual([[['blue', 30.1], ['green', 40.99]]], self.fileConverter.currValsArray)
         
        # Call FLUSH to get the held-back values:
        res = self.fileConverter.prepareMySQLRow('FLUSH')
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    (['blue', 30.1],['green', 40.99]);", res)
        self.assertIsNone(self.fileConverter.currOutTable)
        self.assertIsNone(self.fileConverter.currInsertSig)
        self.assertEqual([], self.fileConverter.currValsArray)
        
#--------------------------------------------------------------------------------------------------    
    def assertFileContentEquals(self, expected, filePath):
        strFile = StringIO.StringIO(expected)
        with open(filePath, 'r') as fd:
            for fileLine in fd:
                expectedLine = strFile.readline()
                self.assertEqual(expectedLine.strip(), fileLine.strip())
            
            if strFile.readline() != "":
                # expected is longer than what's in the file:
                self.fail("Expected string is longer than content of output file %s" % filePath)
        
    def updateTruthFromFile(self, referenceFileName, actualTruthFileName):
        '''
        Copy actualTruthFileName content to referenceFileName. 
        ReferenceFileName is a ground truth file to which test run
        results are compared. When the code changes so much that editing
        that ground truth file to match the new correct output of the
        test, then accept the test output as the future ground truth.
        :param referenceFileName: File to be overwritten
        :type referenceFileName: String
        :param actualTruthFileName: Source file
        :type actualTruthFileName: String
        '''
        shutil.copyfile(actualTruthFileName, referenceFileName)