Python JSONToRelation示例，json_to_relation.json_to_relation.JSONToRelation Python示例

示例#1

0

显示文件

文件： test_json_to_relation.py 项目： paepcke/json_to_relation

    def test_schema_hints(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            schemaHints = OrderedDict()
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        #print schema
        #print map(ColumnSpec.getType, schema)
        self.assertEqual(['VARCHAR(40)', 'VARCHAR(40)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'DATETIME', 'TEXT', 'DATETIME', 'TEXT', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'TEXT', 'INT', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'INT', 'INT', 'INT', 'INT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'TEXT', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)'],
                         map(ColumnSpec.getType, schema))

        self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            schemaHints = OrderedDict({'chunkSize' : ColDataType.INT,
                                                           'length' : ColDataType.INT})
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        self.assertEqual(['VARCHAR(40)', 'VARCHAR(40)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'DATETIME', 'TEXT', 'DATETIME', 'TEXT', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'TEXT', 'INT', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'INT', 'INT', 'INT', 'INT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'TEXT', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)'],
                         map(ColumnSpec.getType, schema))

示例#2

0

显示文件

    def test_schema_hints(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            schemaHints = OrderedDict()
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        #print schema
        #print map(ColumnSpec.getType, schema)
        self.assertEqual(['VARCHAR(40)', 'VARCHAR(40)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'DATETIME', 'TEXT', 'DATETIME', 'TEXT', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'TEXT', 'INT', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'INT', 'INT', 'INT', 'INT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'TEXT', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)'],
                         map(ColumnSpec.getType, schema))

        self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            schemaHints = OrderedDict({'chunkSize' : ColDataType.INT,
                                                           'length' : ColDataType.INT})
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        self.assertEqual(['VARCHAR(40)', 'VARCHAR(40)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'DATETIME', 'TEXT', 'DATETIME', 'TEXT', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'TEXT', 'INT', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'INT', 'INT', 'INT', 'INT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'TEXT', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)'],
                         map(ColumnSpec.getType, schema))

示例#3

0

显示文件

 def test_edX_tracking_import(self):
     source = InURI(os.path.join(os.path.dirname(__file__),"data/edxTrackLogSample.json"))
     self.fileConverter = JSONToRelation(source, 
                                         OutputFile("testEdXImport.csv", OutputDisposition.OutputFormat.CSV),
                                         mainTableName='EdxTrackEvent'                                            )
     edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
     self.fileConverter.jsonParserInstance = edxJsonToRelParser
     self.fileConverter.convert(prependColHeader=True)

示例#4

0

显示文件

 def test_embedded_json_strings_comma_escaping(self):
     source = InURI(os.path.join(os.path.dirname(__file__),"data/tinyEdXTrackLog.json"))
     self.fileConverter = JSONToRelation(source, 
                                         OutputFile("testTinyEdXImport.csv", OutputDisposition.OutputFormat.CSV),
                                         mainTableName='EdxTrackEvent'
                                         )
     edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
     self.fileConverter.jsonParserInstance = edxJsonToRelParser
     self.fileConverter.convert(prependColHeader=True)

示例#5

0

显示文件

 def test_arrays(self):
     source = InURI(os.path.join(os.path.dirname(__file__),"data/jsonArray.json"))
     self.fileConverter = JSONToRelation(source, 
                                         OutputFile("testArrays.csv", OutputDisposition.OutputFormat.CSV),
                                         mainTableName='EdxTrackEvent'
                                         )
     edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
     edxJsonToRelParser.unittesting = True
     self.fileConverter.jsonParserInstance = edxJsonToRelParser
     self.fileConverter.convert(prependColHeader=True)

示例#6

0

显示文件

    def test_edX_stress_import(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/tracking.log-20130609.gz"))

        print("Stress test: importing lots...")
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testEdXStressImport.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            progressEvery=10
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert(prependColHeader=True)
        print("Stress test done")

示例#7

0

显示文件

文件： test_json_to_relation.py 项目： imclab/json_to_relation

 def test_arrays(self):
     source = InURI(os.path.join(os.path.dirname(__file__),"data/jsonArray.json"))
     self.fileConverter = JSONToRelation(source, 
                                         OutputFile("testArrays.csv"),
                                         outputFormat = OutputDisposition.OutputFormat.CSV,
                                         )
     self.fileConverter.convert(prependColHeader=True)

示例#8

0

显示文件

文件： test_json_to_relation.py 项目： imclab/json_to_relation

 def setUp(self):
     super(TestJSONToRelation, self).setUp()
     self.tmpLogFile = tempfile.NamedTemporaryFile()
     self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
     self.fileConverter = JSONToRelation(self.stringSource, 
                                         OutputPipe(OutputDisposition.OutputFormat.CSV),
                                         logFile=self.tmpLogFile.name
                                         )
     # Remove various test output files if it exists:
     try:
         os.remove("testOutput.csv")
     except:
         pass
     try:
         os.remove("testOutputWithHeader.csv")
     except:
         pass
     try:
         os.remove("testArrays.csv")
     except:
         pass
     try:
         os.remove("testTinyEdXImport.csv")
     except:
         pass
     try:
         os.remove("testEdXImport.csv")
     except:
         pass
     try:
         os.remove("testEdXStressImport.csv")
     except:
         pass

示例#9

0

显示文件

文件： test_json_to_relation.py 项目： imclab/json_to_relation

 def test_edX_tracking_import(self):
     source = InURI(os.path.join(os.path.dirname(__file__),"data/edxTrackLogSample.json"))
     self.fileConverter = JSONToRelation(source, 
                                         OutputFile("testEdXImport.csv"),
                                         outputFormat = OutputDisposition.OutputFormat.CSV,
                                         )
     self.fileConverter.convert(prependColHeader=True)

示例#10

0

显示文件

文件： test_json_to_relation.py 项目： imclab/json_to_relation

 def test_embedded_json_strings_comma_escaping(self):
     source = InURI(os.path.join(os.path.dirname(__file__),"data/tinyEdXTrackLog.json"))
     self.fileConverter = JSONToRelation(source, 
                                         OutputFile("testTinyEdXImport.csv"),
                                         outputFormat = OutputDisposition.OutputFormat.CSV,
                                         )
     self.fileConverter.convert(prependColHeader=True)

示例#11

0

显示文件

文件： test_json_to_relation.py 项目： paepcke/json_to_relation

 def test_edX_tracking_import(self):
     source = InURI(os.path.join(os.path.dirname(__file__),"data/edxTrackLogSample.json"))
     self.fileConverter = JSONToRelation(source, 
                                         OutputFile("testEdXImport.csv", OutputDisposition.OutputFormat.CSV),
                                         mainTableName='EdxTrackEvent'                                            )
     edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
     self.fileConverter.jsonParserInstance = edxJsonToRelParser
     self.fileConverter.convert(prependColHeader=True)

示例#12

0

显示文件

文件： test_json_to_relation.py 项目： paepcke/json_to_relation

 def test_embedded_json_strings_comma_escaping(self):
     source = InURI(os.path.join(os.path.dirname(__file__),"data/tinyEdXTrackLog.json"))
     self.fileConverter = JSONToRelation(source, 
                                         OutputFile("testTinyEdXImport.csv", OutputDisposition.OutputFormat.CSV),
                                         mainTableName='EdxTrackEvent'
                                         )
     edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
     self.fileConverter.jsonParserInstance = edxJsonToRelParser
     self.fileConverter.convert(prependColHeader=True)

示例#13

0

显示文件

文件： test_json_to_relation.py 项目： qjyzwlz/json_to_relation

 def test_arrays(self):
     source = InURI(os.path.join(os.path.dirname(__file__), "data/jsonArray.json"))
     self.fileConverter = JSONToRelation(
         source, OutputFile("testArrays.csv", OutputDisposition.OutputFormat.CSV), mainTableName="EdxTrackEvent"
     )
     edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
     edxJsonToRelParser.unittesting = True
     self.fileConverter.jsonParserInstance = edxJsonToRelParser
     self.fileConverter.convert(prependColHeader=True)

示例#14

0

显示文件

    def test_simple_json_to_file(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV)
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        with open(os.path.join(self.currDir, 'data/simpleJsonToFileTruth.txt'), 'r') as fd:
            expected = fd.read()
#         expected = "asset,sainani.jpg,HRP258,c4x,Medicine,,image/jpeg,sainani.jpg,262144,/c4x/Medicine/HRP258/asset/sainani.jpg," +\
#                     "22333,,2013-05-08T22:47:09.762Z,,ebcb2a60b0d6b7475c4e9a102b82637b\n" +\
#                     "asset,medstats.png,HRP258,c4x,Medicine,,image/png,medstats.png,262144,/c4x/Medicine/HRP258/asset/medstats.png," +\
#                     "86597,,2013-05-08T22:48:38.174Z,,db47f263ac3532874b8f442ad8937d02"
        if UPDATE_TRUTH:
            self.updateTruthFromFile(os.path.join(self.curDir,'testOutput.csv'), 
                                     os.path.join(self.curDir, 'data/rescueJSONTruth1.txt'))
        else:
            self.assertFileContentEquals(expected, "testOutput.csv")

示例#15

0

显示文件

文件： test_json_to_relation.py 项目： imclab/json_to_relation

 def test_simple_json_to_file(self):
     self.fileConverter = JSONToRelation(self.stringSource, 
                                         OutputFile("testOutput.csv"),
                                         outputFormat = OutputDisposition.OutputFormat.CSV,
                                         )
     self.fileConverter.convert()
     expected = "asset,sainani.jpg,HRP258,c4x,Medicine,,image/jpeg,sainani.jpg,262144,/c4x/Medicine/HRP258/asset/sainani.jpg," +\
                 "22333,,2013-05-08T22:47:09.762Z,,ebcb2a60b0d6b7475c4e9a102b82637b\n" +\
                 "asset,medstats.png,HRP258,c4x,Medicine,,image/png,medstats.png,262144,/c4x/Medicine/HRP258/asset/medstats.png," +\
                 "86597,,2013-05-08T22:48:38.174Z,,db47f263ac3532874b8f442ad8937d02"
     self.assertFileContentEquals(expected, "testOutput.csv")

示例#16

0

显示文件

文件： test_json_to_relation.py 项目： imclab/json_to_relation

    def test_edX_stress_import(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/tracking.log-20130609.gz"))

        print("Stress test: importing lots...")
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testEdXStressImport.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            progressEvery=10
                                            )
        self.fileConverter.convert(prependColHeader=True)
        print("Stress test done")

示例#17

0

显示文件

文件： test_json_to_relation.py 项目： imclab/json_to_relation

 def test_json_to_file_with_col_header(self):
     self.fileConverter = JSONToRelation(self.stringSource, 
                                         OutputFile("testOutputWithHeader.csv"),
                                         outputFormat = OutputDisposition.OutputFormat.CSV,
                                         )
     self.fileConverter.convert(prependColHeader=True)
     expected = '"""_id.category""","""_id.name""","""_id.course""","""_id.tag""","""_id.org""","""_id.revision""",contentType,' +\
                'displayname,chunkSize,filename,length,import_path,"""uploadDate.$date""",thumbnail_location,md5\n' +\
                 "asset,sainani.jpg,HRP258,c4x,Medicine,,image/jpeg,sainani.jpg,262144,/c4x/Medicine/HRP258/asset/sainani.jpg," +\
                 "22333,,2013-05-08T22:47:09.762Z,,ebcb2a60b0d6b7475c4e9a102b82637b\n" +\
                 "asset,medstats.png,HRP258,c4x,Medicine,,image/png,medstats.png,262144,/c4x/Medicine/HRP258/asset/medstats.png," +\
                 "86597,,2013-05-08T22:48:38.174Z,,db47f263ac3532874b8f442ad8937d02"
     self.assertFileContentEquals(expected, "testOutputWithHeader.csv")

示例#18

0

显示文件

文件： test_json_to_relation.py 项目： paepcke/json_to_relation

    def test_edX_stress_import(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/tracking.log-20130609.gz"))

        print("Stress test: importing lots...")
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testEdXStressImport.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            progressEvery=10
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert(prependColHeader=True)
        print("Stress test done")

示例#19

0

显示文件

 def setUp(self):
     super(TestJSONToRelation, self).setUp()
     self.currDir = os.path.dirname(__file__)
     self.tmpLogFile = tempfile.NamedTemporaryFile()
     self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
     self.fileConverter = JSONToRelation(self.stringSource, 
                                         OutputPipe(OutputDisposition.OutputFormat.CSV),
                                         mainTableName='EdxTrackEvent',
                                         logFile=self.tmpLogFile.name
                                         )
     edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
     self.fileConverter.jsonParserInstance = edxJsonToRelParser
     # Remove various test output files if it exists:
     try:
         os.remove("testOutput.csv")
     except:
         pass
     try:
         os.remove("testOutputWithHeader.csv")
     except:
         pass
     try:
         os.remove("testArrays.csv")
     except:
         pass
     try:
         os.remove("testTinyEdXImport.csv")
     except:
         pass
     try:
         os.remove("testEdXImport.csv")
     except:
         pass
     try:
         os.remove("testEdXStressImport.csv")
     except:
         pass

示例#20

0

显示文件

文件： test_json_to_relation.py 项目： imclab/json_to_relation

    def test_schema_hints(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            schemaHints = OrderedDict()
                                            )
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        #print schema
        #print map(ColumnSpec.getType, schema)
        self.assertEqual(['TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'DOUBLE', 'TEXT', 'DOUBLE', 'TEXT', 'TEXT', 'TEXT', 'TEXT'],
                         map(ColumnSpec.getType, schema))

        self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            schemaHints = OrderedDict({'chunkSize' : ColDataType.INT,
                                                           'length' : ColDataType.INT})
                                            )
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        self.assertEqual(['TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'INT', 'TEXT', 'INT', 'TEXT', 'TEXT', 'TEXT', 'TEXT'],
                         map(ColumnSpec.getType, schema))

示例#21

0

显示文件

文件： json2sql.py 项目： johnding1996/MOOC-Learner-Curated

def convert(inFilePath, destDir, targetFormat, dropTables=False):
    # Output file is name of input file with the
    # .json extension replaced by .sql
    outFullPath = buildOutputFileName(inFilePath, destDir)

    # Log file will go to <destDir>/../TransformLogs, the file being named j2s_<inputFileName>.log:
    logDir = os.path.join(destDir, '..', 'TransformLogs')
    if not os.access(logDir, os.W_OK):
        try:
            os.makedirs(logDir)
        except OSError:
            # Log dir already exists:
            pass

    logFile = os.path.join(logDir, 'j2s_%s.log' % os.path.basename(inFilePath))

    # Create an instance of JSONToRelation, taking input from the given file:
    # and pumping output to the given output path:
    if targetFormat == 'csv':
        outputFormat = OutputDisposition.OutputFormat.CSV
    elif targetFormat == 'sql_dump':
        outputFormat = OutputDisposition.OutputFormat.SQL_INSERT_STATEMENTS
    else:
        outputFormat = OutputDisposition.OutputFormat.SQL_INSERTS_AND_CSV

    outSQLFile = OutputFile(outFullPath, outputFormat,
                            options='wb')  # overwrite any existing sql file
    jsonConverter = JSONToRelation(InURI(inFilePath),
                                   outSQLFile,
                                   mainTableName='EdxTrackEvent',
                                   logFile=logFile,
                                   progressEvery=10000)
    try:
        jsonConverter.setParser(
            EdXTrackLogJSONParser(jsonConverter,
                                  'EdxTrackEvent',
                                  replaceTables=dropTables,
                                  dbName='Edx',
                                  progressEvery=10000))
    except Exception as e:
        with open(logFile, 'w') as fd:
            fd.write(
                "In json2sql: could not create EdXTrackLogJSONParser: %s" %
                ` e `)
        # Try to delete the .sql file that was created when
        # the OutputFile instance was made in the JSONToRelation
        # instantiation statement above:
        try:
            outSQLFile.remove()
        except Exception as e:
            pass
        sys.exit(1)

    jsonConverter.convert()

示例#22

0

显示文件

文件： test_json_to_relation.py 项目： paepcke/json_to_relation

    def test_simple_json_to_file(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV)
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        with open(os.path.join(self.currDir, 'data/simpleJsonToFileTruth.txt'), 'r') as fd:
            expected = fd.read()
#         expected = "asset,sainani.jpg,HRP258,c4x,Medicine,,image/jpeg,sainani.jpg,262144,/c4x/Medicine/HRP258/asset/sainani.jpg," +\
#                     "22333,,2013-05-08T22:47:09.762Z,,ebcb2a60b0d6b7475c4e9a102b82637b\n" +\
#                     "asset,medstats.png,HRP258,c4x,Medicine,,image/png,medstats.png,262144,/c4x/Medicine/HRP258/asset/medstats.png," +\
#                     "86597,,2013-05-08T22:48:38.174Z,,db47f263ac3532874b8f442ad8937d02"
        if UPDATE_TRUTH:
            self.updateTruthFromFile(os.path.join(self.curDir,'testOutput.csv'), 
                                     os.path.join(self.curDir, 'data/rescueJSONTruth1.txt'))
        else:
            self.assertFileContentEquals(expected, "testOutput.csv")

示例#23

0

显示文件

文件： test_json_to_relation.py 项目： qjyzwlz/json_to_relation

 def setUp(self):
     super(TestJSONToRelation, self).setUp()
     self.currDir = os.path.dirname(__file__)
     self.tmpLogFile = tempfile.NamedTemporaryFile()
     self.stringSource = InURI(os.path.join(os.path.dirname(__file__), "data/twoJSONRecords.json"))
     self.fileConverter = JSONToRelation(
         self.stringSource,
         OutputPipe(OutputDisposition.OutputFormat.CSV),
         mainTableName="EdxTrackEvent",
         logFile=self.tmpLogFile.name,
     )
     edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
     self.fileConverter.jsonParserInstance = edxJsonToRelParser
     # Remove various test output files if it exists:
     try:
         os.remove("testOutput.csv")
     except:
         pass
     try:
         os.remove("testOutputWithHeader.csv")
     except:
         pass
     try:
         os.remove("testArrays.csv")
     except:
         pass
     try:
         os.remove("testTinyEdXImport.csv")
     except:
         pass
     try:
         os.remove("testEdXImport.csv")
     except:
         pass
     try:
         os.remove("testEdXStressImport.csv")
     except:
         pass

示例#24

0

显示文件

class TestJSONToRelation(unittest.TestCase):
    
    def setUp(self):
        super(TestJSONToRelation, self).setUp()
        self.currDir = os.path.dirname(__file__)
        self.tmpLogFile = tempfile.NamedTemporaryFile()
        self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputPipe(OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            logFile=self.tmpLogFile.name
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        # Remove various test output files if it exists:
        try:
            os.remove("testOutput.csv")
        except:
            pass
        try:
            os.remove("testOutputWithHeader.csv")
        except:
            pass
        try:
            os.remove("testArrays.csv")
        except:
            pass
        try:
            os.remove("testTinyEdXImport.csv")
        except:
            pass
        try:
            os.remove("testEdXImport.csv")
        except:
            pass
        try:
            os.remove("testEdXStressImport.csv")
        except:
            pass
        

        
    def tearDown(self):
        super(TestJSONToRelation, self).tearDown()
        self.tmpLogFile.close()
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_ensure_mysql_identifier_legal(self):

        # Vanilla, legal name
        newName = self.fileConverter.ensureLegalIdentifierChars("foo")
        self.assertEqual("foo", newName)

        # Comma in name requires quoting
        newName = self.fileConverter.ensureLegalIdentifierChars("foo,")
        self.assertEqual('"foo,"', newName)
        
        # Embedded single quotes:
        newName = self.fileConverter.ensureLegalIdentifierChars("fo'o")
        self.assertEqual('"fo\'o"', newName)

        # Embedded double quotes:
        newName = self.fileConverter.ensureLegalIdentifierChars('fo"o')
        self.assertEqual("'fo\"o'", newName)

        # Embedded double and single quotes:
        newName = self.fileConverter.ensureLegalIdentifierChars('fo"o\'bar')
        self.assertEqual("'fo\"o\'\'bar'", newName)

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")        
    def test_simple_json(self):
        # Prints output to display, which we can't catch without
        # fuzzing with stdout. So just ensure no error happens: 
        self.fileConverter.convert()

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_simple_json_to_file(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV)
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        with open(os.path.join(self.currDir, 'data/simpleJsonToFileTruth.txt'), 'r') as fd:
            expected = fd.read()
#         expected = "asset,sainani.jpg,HRP258,c4x,Medicine,,image/jpeg,sainani.jpg,262144,/c4x/Medicine/HRP258/asset/sainani.jpg," +\
#                     "22333,,2013-05-08T22:47:09.762Z,,ebcb2a60b0d6b7475c4e9a102b82637b\n" +\
#                     "asset,medstats.png,HRP258,c4x,Medicine,,image/png,medstats.png,262144,/c4x/Medicine/HRP258/asset/medstats.png," +\
#                     "86597,,2013-05-08T22:48:38.174Z,,db47f263ac3532874b8f442ad8937d02"
        if UPDATE_TRUTH:
            self.updateTruthFromFile(os.path.join(self.curDir,'testOutput.csv'), 
                                     os.path.join(self.curDir, 'data/rescueJSONTruth1.txt'))
        else:
            self.assertFileContentEquals(expected, "testOutput.csv")

#     @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
#     def test_json_to_file_with_col_header(self):
#         self.fileConverter = JSONToRelation(self.stringSource, 
#                                             OutputFile("testOutputWithHeader.csv", OutputDisposition.OutputFormat.CSV),
#                                             mainTableName='EdxTrackEvent'
#                                             )
#         edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
#         edxJsonToRelParser.unittesting = True
#         self.fileConverter.jsonParserInstance = edxJsonToRelParser
#         self.fileConverter.convert(prependColHeader=True)
#         expected = '"""_id.category""","""_id.name""","""_id.course""","""_id.tag""","""_id.org""","""_id.revision""",contentType,' +\
#                    'displayname,chunkSize,filename,length,import_path,"""uploadDate.$date""",thumbnail_location,md5\n' +\
#                     "asset,sainani.jpg,HRP258,c4x,Medicine,,image/jpeg,sainani.jpg,262144,/c4x/Medicine/HRP258/asset/sainani.jpg," +\
#                     "22333,,2013-05-08T22:47:09.762Z,,ebcb2a60b0d6b7475c4e9a102b82637b\n" +\
#                     "asset,medstats.png,HRP258,c4x,Medicine,,image/png,medstats.png,262144,/c4x/Medicine/HRP258/asset/medstats.png," +\
#                     "86597,,2013-05-08T22:48:38.174Z,,db47f263ac3532874b8f442ad8937d02"
#         *******self.assertFileContentEquals(expected, "testOutputWithHeader.csv")


    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_arrays(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/jsonArray.json"))
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testArrays.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent'
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        edxJsonToRelParser.unittesting = True
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert(prependColHeader=True)


    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_embedded_json_strings_comma_escaping(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/tinyEdXTrackLog.json"))
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testTinyEdXImport.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent'
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert(prependColHeader=True)
    
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_edX_tracking_import(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/edxTrackLogSample.json"))
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testEdXImport.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent'                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert(prependColHeader=True)

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_edX_stress_import(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/tracking.log-20130609.gz"))

        print("Stress test: importing lots...")
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testEdXStressImport.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            progressEvery=10
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert(prependColHeader=True)
        print("Stress test done")
        

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_schema_hints(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            schemaHints = OrderedDict()
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        #print schema
        #print map(ColumnSpec.getType, schema)
        self.assertEqual(['VARCHAR(40)', 'VARCHAR(40)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'DATETIME', 'TEXT', 'DATETIME', 'TEXT', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'TEXT', 'INT', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'INT', 'INT', 'INT', 'INT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'TEXT', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)'],
                         map(ColumnSpec.getType, schema))

        self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            schemaHints = OrderedDict({'chunkSize' : ColDataType.INT,
                                                           'length' : ColDataType.INT})
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        self.assertEqual(['VARCHAR(40)', 'VARCHAR(40)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'DATETIME', 'TEXT', 'DATETIME', 'TEXT', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'TEXT', 'INT', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'INT', 'INT', 'INT', 'INT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'TEXT', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)'],
                         map(ColumnSpec.getType, schema))


    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testInsertStatementConstruction(self):
        
        # No value array in hold-back buffer:
        self.fileConverter.currInsertSig = 'col1, col2'
        self.fileConverter.currOutTable = 'TestTable'
        self.fileConverter.currValsArray = []
        self.assertIsNone(self.fileConverter.finalizeInsertStatement())

        # One value array in hold-back buffer:
        self.fileConverter.currInsertSig = 'col1, col2'
        self.fileConverter.currOutTable = 'MyTable'
        self.fileConverter.currValsArray = [['foo', 10]]
        res = self.fileConverter.finalizeInsertStatement()
        #print res
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('foo',10);", res)
        
        self.fileConverter.currInsertSig = 'col1, col2'
        self.fileConverter.currOutTable = 'MyTable'
        self.fileConverter.currValsArray = [['foo', 10], ['bar', 20]]
        res = self.fileConverter.finalizeInsertStatement()
        #print res
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('foo',10),\n    ('bar',20);", res)
        
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testPrepareMySQLRow(self):
        
        # Pretend to be the edx parser, sending one insert's worth of
        # info. This will fit into the hold-back buffer, and return None:
        insertInfo = ('MyTable', 'col1, col2', [['foo', 10],['bar',20]])
        #print(res)
        self.assertEqual('LoadInfo', self.fileConverter.currOutTable)
        self.assertEqual('load_info_id,load_date_time,load_file', self.fileConverter.currInsertSig)
        currValsArr = self.fileConverter.currValsArray
        self.assertEqual('d4e622ff221b1eec00405f1b69893ff544ac5d75', currValsArr[0][0])
        self.assertEqual('file:///home/paepcke/EclipseWorkspaces/json_to_relation/json_to_relation/test/data/twoJSONRecords.json', currValsArr[0][2])
        
        # Lower the allowed MySQL packet size to force immediate creation of INSERT statement:
        self.fileConverter.__class__.MAX_ALLOWED_PACKET_SIZE = 3
        self.fileConverter.currInsertSig = None
        self.fileConverter.currOutTable = None
        self.fileConverter.currValsArray = []
        res = self.fileConverter.prepareMySQLRow(insertInfo)
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('foo',10),\n    ('bar',20);", res)

        # Set the allowed MySQL packet size to allow a first INSERT statement to be 
        # held back, but a second call must trigger sending of the held back
        # values, holding back the newly submitted values: 
        
        # First call:
        self.fileConverter.__class__.MAX_ALLOWED_PACKET_SIZE = 211
        self.fileConverter.currInsertSig = None
        self.fileConverter.currOutTable = None
        self.fileConverter.currValsArray = []
        res = self.fileConverter.prepareMySQLRow(insertInfo)
        self.assertIsNone(res)
        self.assertEqual('MyTable', self.fileConverter.currOutTable)
        self.assertEqual('col1, col2', self.fileConverter.currInsertSig)
        self.assertEqual([[['foo', 10], ['bar', 20]]], self.fileConverter.currValsArray)

        # Second call:
        insertMoreInfo = ('MyTable', 'col1, col2', [['blue', 30.1],['green',40.99]])
        res = self.fileConverter.prepareMySQLRow(insertMoreInfo)
        # Should have insert statement for the prior submission...
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    (['foo', 10],['bar', 20]);", res)
        # ... and the hold-back buffer should have the new submission:
        self.assertEqual('MyTable', self.fileConverter.currOutTable)
        self.assertEqual('col1, col2', self.fileConverter.currInsertSig)
        self.assertEqual([[['blue', 30.1], ['green', 40.99]]], self.fileConverter.currValsArray)
         
        # Call FLUSH to get the held-back values:
        res = self.fileConverter.prepareMySQLRow('FLUSH')
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    (['blue', 30.1],['green', 40.99]);", res)
        self.assertIsNone(self.fileConverter.currOutTable)
        self.assertIsNone(self.fileConverter.currInsertSig)
        self.assertEqual([], self.fileConverter.currValsArray)
        
#--------------------------------------------------------------------------------------------------    
    def assertFileContentEquals(self, expected, filePath):
        strFile = StringIO.StringIO(expected)
        with open(filePath, 'r') as fd:
            for fileLine in fd:
                expectedLine = strFile.readline()
                self.assertEqual(expectedLine.strip(), fileLine.strip())
            
            if strFile.readline() != "":
                # expected is longer than what's in the file:
                self.fail("Expected string is longer than content of output file %s" % filePath)
        
    def updateTruthFromFile(self, referenceFileName, actualTruthFileName):
        '''
        Copy actualTruthFileName content to referenceFileName. 
        ReferenceFileName is a ground truth file to which test run
        results are compared. When the code changes so much that editing
        that ground truth file to match the new correct output of the
        test, then accept the test output as the future ground truth.
        :param referenceFileName: File to be overwritten
        :type referenceFileName: String
        :param actualTruthFileName: Source file
        :type actualTruthFileName: String
        '''
        shutil.copyfile(actualTruthFileName, referenceFileName)

示例#25

0

显示文件

文件： test_json_to_relation.py 项目： imclab/json_to_relation

class TestJSONToRelation(unittest.TestCase):
    
    def setUp(self):
        super(TestJSONToRelation, self).setUp()
        self.tmpLogFile = tempfile.NamedTemporaryFile()
        self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputPipe(OutputDisposition.OutputFormat.CSV),
                                            logFile=self.tmpLogFile.name
                                            )
        # Remove various test output files if it exists:
        try:
            os.remove("testOutput.csv")
        except:
            pass
        try:
            os.remove("testOutputWithHeader.csv")
        except:
            pass
        try:
            os.remove("testArrays.csv")
        except:
            pass
        try:
            os.remove("testTinyEdXImport.csv")
        except:
            pass
        try:
            os.remove("testEdXImport.csv")
        except:
            pass
        try:
            os.remove("testEdXStressImport.csv")
        except:
            pass
        

        
    def tearDown(self):
        super(TestJSONToRelation, self).tearDown()
        self.tmpLogFile.close()
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_ensure_mysql_identifier_legal(self):

        # Vanilla, legal name
        newName = self.fileConverter.ensureLegalIdentifierChars("foo")
        self.assertEqual("foo", newName)

        # Comma in name requires quoting
        newName = self.fileConverter.ensureLegalIdentifierChars("foo,")
        self.assertEqual('"foo,"', newName)
        
        # Embedded single quotes:
        newName = self.fileConverter.ensureLegalIdentifierChars("fo'o")
        self.assertEqual('"fo\'o"', newName)

        # Embedded double quotes:
        newName = self.fileConverter.ensureLegalIdentifierChars('fo"o')
        self.assertEqual("'fo\"o'", newName)

        # Embedded double and single quotes:
        newName = self.fileConverter.ensureLegalIdentifierChars('fo"o\'bar')
        self.assertEqual("'fo\"o\'\'bar'", newName)

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")        
    def test_simple_json(self):
        # Prints output to display, which we can't catch without
        # fuzzing with stdout. So just ensure no error happens: 
        self.fileConverter.convert()

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_simple_json_to_file(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            )
        self.fileConverter.convert()
        expected = "asset,sainani.jpg,HRP258,c4x,Medicine,,image/jpeg,sainani.jpg,262144,/c4x/Medicine/HRP258/asset/sainani.jpg," +\
                    "22333,,2013-05-08T22:47:09.762Z,,ebcb2a60b0d6b7475c4e9a102b82637b\n" +\
                    "asset,medstats.png,HRP258,c4x,Medicine,,image/png,medstats.png,262144,/c4x/Medicine/HRP258/asset/medstats.png," +\
                    "86597,,2013-05-08T22:48:38.174Z,,db47f263ac3532874b8f442ad8937d02"
        self.assertFileContentEquals(expected, "testOutput.csv")

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_json_to_file_with_col_header(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutputWithHeader.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            )
        self.fileConverter.convert(prependColHeader=True)
        expected = '"""_id.category""","""_id.name""","""_id.course""","""_id.tag""","""_id.org""","""_id.revision""",contentType,' +\
                   'displayname,chunkSize,filename,length,import_path,"""uploadDate.$date""",thumbnail_location,md5\n' +\
                    "asset,sainani.jpg,HRP258,c4x,Medicine,,image/jpeg,sainani.jpg,262144,/c4x/Medicine/HRP258/asset/sainani.jpg," +\
                    "22333,,2013-05-08T22:47:09.762Z,,ebcb2a60b0d6b7475c4e9a102b82637b\n" +\
                    "asset,medstats.png,HRP258,c4x,Medicine,,image/png,medstats.png,262144,/c4x/Medicine/HRP258/asset/medstats.png," +\
                    "86597,,2013-05-08T22:48:38.174Z,,db47f263ac3532874b8f442ad8937d02"
        self.assertFileContentEquals(expected, "testOutputWithHeader.csv")


    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_arrays(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/jsonArray.json"))
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testArrays.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            )
        self.fileConverter.convert(prependColHeader=True)


    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_embedded_json_strings_comma_escaping(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/tinyEdXTrackLog.json"))
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testTinyEdXImport.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            )
        self.fileConverter.convert(prependColHeader=True)
    
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_edX_tracking_import(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/edxTrackLogSample.json"))
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testEdXImport.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            )
        self.fileConverter.convert(prependColHeader=True)

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_edX_stress_import(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/tracking.log-20130609.gz"))

        print("Stress test: importing lots...")
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testEdXStressImport.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            progressEvery=10
                                            )
        self.fileConverter.convert(prependColHeader=True)
        print("Stress test done")
        

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_schema_hints(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            schemaHints = OrderedDict()
                                            )
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        #print schema
        #print map(ColumnSpec.getType, schema)
        self.assertEqual(['TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'DOUBLE', 'TEXT', 'DOUBLE', 'TEXT', 'TEXT', 'TEXT', 'TEXT'],
                         map(ColumnSpec.getType, schema))

        self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv"),
                                            outputFormat = OutputDisposition.OutputFormat.CSV,
                                            schemaHints = OrderedDict({'chunkSize' : ColDataType.INT,
                                                           'length' : ColDataType.INT})
                                            )
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        self.assertEqual(['TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'INT', 'TEXT', 'INT', 'TEXT', 'TEXT', 'TEXT', 'TEXT'],
                         map(ColumnSpec.getType, schema))


    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testInsertStatementConstruction(self):
        
        # No value array in hold-back buffer:
        self.fileConverter.currInsertSig = 'col1, col2'
        self.fileConverter.currOutTable = 'TestTable'
        self.fileConverter.currValsArray = []
        try:
            self.fileConverter.finalizeInsertStatement()
            self.fail("Expected ValueError for empty hold-back buffer")
        except ValueError:
            pass

        # One value array in hold-back buffer:
        self.fileConverter.currInsertSig = 'col1, col2'
        self.fileConverter.currOutTable = 'MyTable'
        self.fileConverter.currValsArray = [['foo', 10]]
        res = self.fileConverter.finalizeInsertStatement()
        #print res
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('foo',10);", res)
        
        self.fileConverter.currInsertSig = 'col1, col2'
        self.fileConverter.currOutTable = 'MyTable'
        self.fileConverter.currValsArray = [['foo', 10], ['bar', 20]]
        res = self.fileConverter.finalizeInsertStatement()
        #print res
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('foo',10),\n    ('bar',20);", res)
        
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testPrepareMySQLRow(self):
        
        # Pretend to be the edx parser, sending one insert's worth of
        # info. This will fit into the hold-back buffer, and return None:
        insertInfo = ('MyTable', 'col1, col2', [['foo', 10],['bar',20]])
        res = self.fileConverter.prepareMySQLRow(insertInfo)
        self.assertIsNone(res)
        self.assertEqual('MyTable', self.fileConverter.currOutTable)
        self.assertEqual('col1, col2', self.fileConverter.currInsertSig)
        self.assertEqual([['foo', 10], ['bar', 20]], self.fileConverter.currValsArray)
        
        # Lower the allowed MySQL packet size to force immediate creation of INSERT statement:
        self.fileConverter.__class__.MAX_ALLOWED_PACKET_SIZE = 3
        self.fileConverter.currInsertSig = None
        self.fileConverter.currOutTable = None
        self.fileConverter.currValsArray = []
        res = self.fileConverter.prepareMySQLRow(insertInfo)
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('foo',10),\n    ('bar',20);", res)

        # Set the allowed MySQL packet size to allow a first INSERT statement to be 
        # held back, but a second call must trigger sending of the held back
        # values, holding back the newly submitted values: 
        
        # First call:
        self.fileConverter.__class__.MAX_ALLOWED_PACKET_SIZE = 60
        self.fileConverter.currInsertSig = None
        self.fileConverter.currOutTable = None
        self.fileConverter.currValsArray = []
        res = self.fileConverter.prepareMySQLRow(insertInfo)
        self.assertIsNone(res)
        self.assertEqual('MyTable', self.fileConverter.currOutTable)
        self.assertEqual('col1, col2', self.fileConverter.currInsertSig)
        self.assertEqual([['foo', 10], ['bar', 20]], self.fileConverter.currValsArray)

        # Second call:
        insertMoreInfo = ('MyTable', 'col1, col2', [['blue', 30.1],['green',40.99]])
        res = self.fileConverter.prepareMySQLRow(insertMoreInfo)
        # Should have insert statement for the prior submission...
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('foo',10),\n    ('bar',20);", res)
        # ... and the hold-back buffer should have the new submission:
        self.assertEqual('MyTable', self.fileConverter.currOutTable)
        self.assertEqual('col1, col2', self.fileConverter.currInsertSig)
        self.assertEqual([['blue', 30.1], ['green', 40.99]], self.fileConverter.currValsArray)
         
        # Call FLUSH to get the held-back values:
        res = self.fileConverter.prepareMySQLRow('FLUSH')
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('blue',30.1),\n    ('green',40.99);", res)
        self.assertIsNone(self.fileConverter.currOutTable)
        self.assertIsNone(self.fileConverter.currInsertSig)
        self.assertEqual([], self.fileConverter.currValsArray)
        
#--------------------------------------------------------------------------------------------------    
    def assertFileContentEquals(self, expected, filePath):
        strFile = StringIO.StringIO(expected)
        with open(filePath, 'r') as fd:
            for fileLine in fd:
                expectedLine = strFile.readline()
                self.assertEqual(expectedLine.strip(), fileLine.strip())
            
            if strFile.readline() != "":
                # expected is longer than what's in the file:
                self.fail("Expected string is longer than content of output file %s" % filePath)

示例#26

0

显示文件

文件： test_json_to_relation.py 项目： paepcke/json_to_relation

class TestJSONToRelation(unittest.TestCase):
    
    def setUp(self):
        super(TestJSONToRelation, self).setUp()
        self.currDir = os.path.dirname(__file__)
        self.tmpLogFile = tempfile.NamedTemporaryFile()
        self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputPipe(OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            logFile=self.tmpLogFile.name
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        # Remove various test output files if it exists:
        try:
            os.remove("testOutput.csv")
        except:
            pass
        try:
            os.remove("testOutputWithHeader.csv")
        except:
            pass
        try:
            os.remove("testArrays.csv")
        except:
            pass
        try:
            os.remove("testTinyEdXImport.csv")
        except:
            pass
        try:
            os.remove("testEdXImport.csv")
        except:
            pass
        try:
            os.remove("testEdXStressImport.csv")
        except:
            pass
        

        
    def tearDown(self):
        super(TestJSONToRelation, self).tearDown()
        self.tmpLogFile.close()
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_ensure_mysql_identifier_legal(self):

        # Vanilla, legal name
        newName = self.fileConverter.ensureLegalIdentifierChars("foo")
        self.assertEqual("foo", newName)

        # Comma in name requires quoting
        newName = self.fileConverter.ensureLegalIdentifierChars("foo,")
        self.assertEqual('"foo,"', newName)
        
        # Embedded single quotes:
        newName = self.fileConverter.ensureLegalIdentifierChars("fo'o")
        self.assertEqual('"fo\'o"', newName)

        # Embedded double quotes:
        newName = self.fileConverter.ensureLegalIdentifierChars('fo"o')
        self.assertEqual("'fo\"o'", newName)

        # Embedded double and single quotes:
        newName = self.fileConverter.ensureLegalIdentifierChars('fo"o\'bar')
        self.assertEqual("'fo\"o\'\'bar'", newName)

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")        
    def test_simple_json(self):
        # Prints output to display, which we can't catch without
        # fuzzing with stdout. So just ensure no error happens: 
        self.fileConverter.convert()

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_simple_json_to_file(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV)
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        with open(os.path.join(self.currDir, 'data/simpleJsonToFileTruth.txt'), 'r') as fd:
            expected = fd.read()
#         expected = "asset,sainani.jpg,HRP258,c4x,Medicine,,image/jpeg,sainani.jpg,262144,/c4x/Medicine/HRP258/asset/sainani.jpg," +\
#                     "22333,,2013-05-08T22:47:09.762Z,,ebcb2a60b0d6b7475c4e9a102b82637b\n" +\
#                     "asset,medstats.png,HRP258,c4x,Medicine,,image/png,medstats.png,262144,/c4x/Medicine/HRP258/asset/medstats.png," +\
#                     "86597,,2013-05-08T22:48:38.174Z,,db47f263ac3532874b8f442ad8937d02"
        if UPDATE_TRUTH:
            self.updateTruthFromFile(os.path.join(self.curDir,'testOutput.csv'), 
                                     os.path.join(self.curDir, 'data/rescueJSONTruth1.txt'))
        else:
            self.assertFileContentEquals(expected, "testOutput.csv")

#     @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
#     def test_json_to_file_with_col_header(self):
#         self.fileConverter = JSONToRelation(self.stringSource, 
#                                             OutputFile("testOutputWithHeader.csv", OutputDisposition.OutputFormat.CSV),
#                                             mainTableName='EdxTrackEvent'
#                                             )
#         edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
#         edxJsonToRelParser.unittesting = True
#         self.fileConverter.jsonParserInstance = edxJsonToRelParser
#         self.fileConverter.convert(prependColHeader=True)
#         expected = '"""_id.category""","""_id.name""","""_id.course""","""_id.tag""","""_id.org""","""_id.revision""",contentType,' +\
#                    'displayname,chunkSize,filename,length,import_path,"""uploadDate.$date""",thumbnail_location,md5\n' +\
#                     "asset,sainani.jpg,HRP258,c4x,Medicine,,image/jpeg,sainani.jpg,262144,/c4x/Medicine/HRP258/asset/sainani.jpg," +\
#                     "22333,,2013-05-08T22:47:09.762Z,,ebcb2a60b0d6b7475c4e9a102b82637b\n" +\
#                     "asset,medstats.png,HRP258,c4x,Medicine,,image/png,medstats.png,262144,/c4x/Medicine/HRP258/asset/medstats.png," +\
#                     "86597,,2013-05-08T22:48:38.174Z,,db47f263ac3532874b8f442ad8937d02"
#         *******self.assertFileContentEquals(expected, "testOutputWithHeader.csv")


    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_arrays(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/jsonArray.json"))
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testArrays.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent'
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        edxJsonToRelParser.unittesting = True
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert(prependColHeader=True)


    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_embedded_json_strings_comma_escaping(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/tinyEdXTrackLog.json"))
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testTinyEdXImport.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent'
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert(prependColHeader=True)
    
    
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_edX_tracking_import(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/edxTrackLogSample.json"))
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testEdXImport.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent'                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert(prependColHeader=True)

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_edX_stress_import(self):
        source = InURI(os.path.join(os.path.dirname(__file__),"data/tracking.log-20130609.gz"))

        print("Stress test: importing lots...")
        self.fileConverter = JSONToRelation(source, 
                                            OutputFile("testEdXStressImport.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            progressEvery=10
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert(prependColHeader=True)
        print("Stress test done")
        

    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def test_schema_hints(self):
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            schemaHints = OrderedDict()
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        #print schema
        #print map(ColumnSpec.getType, schema)
        self.assertEqual(['VARCHAR(40)', 'VARCHAR(40)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'DATETIME', 'TEXT', 'DATETIME', 'TEXT', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'TEXT', 'INT', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'INT', 'INT', 'INT', 'INT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'TEXT', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)'],
                         map(ColumnSpec.getType, schema))

        self.stringSource = InURI(os.path.join(os.path.dirname(__file__),"data/twoJSONRecords.json"))
        self.fileConverter = JSONToRelation(self.stringSource, 
                                            OutputFile("testOutput.csv", OutputDisposition.OutputFormat.CSV),
                                            mainTableName='EdxTrackEvent',
                                            schemaHints = OrderedDict({'chunkSize' : ColDataType.INT,
                                                           'length' : ColDataType.INT})
                                            )
        edxJsonToRelParser = EdXTrackLogJSONParser(self.fileConverter, "EdxTrackEvent", useDisplayNameCache=True)
        self.fileConverter.jsonParserInstance = edxJsonToRelParser
        self.fileConverter.convert()
        schema = self.fileConverter.getSchema()
        self.assertEqual(['VARCHAR(40)', 'VARCHAR(40)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'DATETIME', 'TEXT', 'DATETIME', 'TEXT', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'TEXT', 'TEXT', 'INT', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'VARCHAR(255)', 'INT', 'INT', 'VARCHAR(255)', 'TEXT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'TEXT', 'TEXT', 'VARCHAR(255)', 'TEXT', 'TINYINT', 'TEXT', 'INT', 'INT', 'INT', 'INT', 'VARCHAR(255)', 'VARCHAR(255)', 'VARCHAR(255)', 'INT', 'TEXT', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)', 'VARCHAR(40)'],
                         map(ColumnSpec.getType, schema))


    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testInsertStatementConstruction(self):
        
        # No value array in hold-back buffer:
        self.fileConverter.currInsertSig = 'col1, col2'
        self.fileConverter.currOutTable = 'TestTable'
        self.fileConverter.currValsArray = []
        self.assertIsNone(self.fileConverter.finalizeInsertStatement())

        # One value array in hold-back buffer:
        self.fileConverter.currInsertSig = 'col1, col2'
        self.fileConverter.currOutTable = 'MyTable'
        self.fileConverter.currValsArray = [['foo', 10]]
        res = self.fileConverter.finalizeInsertStatement()
        #print res
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('foo',10);", res)
        
        self.fileConverter.currInsertSig = 'col1, col2'
        self.fileConverter.currOutTable = 'MyTable'
        self.fileConverter.currValsArray = [['foo', 10], ['bar', 20]]
        res = self.fileConverter.finalizeInsertStatement()
        #print res
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('foo',10),\n    ('bar',20);", res)
        
    @unittest.skipIf(not TEST_ALL, "Temporarily disabled")
    def testPrepareMySQLRow(self):
        
        # Pretend to be the edx parser, sending one insert's worth of
        # info. This will fit into the hold-back buffer, and return None:
        insertInfo = ('MyTable', 'col1, col2', [['foo', 10],['bar',20]])
        #print(res)
        self.assertEqual('LoadInfo', self.fileConverter.currOutTable)
        self.assertEqual('load_info_id,load_date_time,load_file', self.fileConverter.currInsertSig)
        currValsArr = self.fileConverter.currValsArray
        self.assertEqual('d4e622ff221b1eec00405f1b69893ff544ac5d75', currValsArr[0][0])
        self.assertEqual('file:///home/paepcke/EclipseWorkspaces/json_to_relation/json_to_relation/test/data/twoJSONRecords.json', currValsArr[0][2])
        
        # Lower the allowed MySQL packet size to force immediate creation of INSERT statement:
        self.fileConverter.__class__.MAX_ALLOWED_PACKET_SIZE = 3
        self.fileConverter.currInsertSig = None
        self.fileConverter.currOutTable = None
        self.fileConverter.currValsArray = []
        res = self.fileConverter.prepareMySQLRow(insertInfo)
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    ('foo',10),\n    ('bar',20);", res)

        # Set the allowed MySQL packet size to allow a first INSERT statement to be 
        # held back, but a second call must trigger sending of the held back
        # values, holding back the newly submitted values: 
        
        # First call:
        self.fileConverter.__class__.MAX_ALLOWED_PACKET_SIZE = 211
        self.fileConverter.currInsertSig = None
        self.fileConverter.currOutTable = None
        self.fileConverter.currValsArray = []
        res = self.fileConverter.prepareMySQLRow(insertInfo)
        self.assertIsNone(res)
        self.assertEqual('MyTable', self.fileConverter.currOutTable)
        self.assertEqual('col1, col2', self.fileConverter.currInsertSig)
        self.assertEqual([[['foo', 10], ['bar', 20]]], self.fileConverter.currValsArray)

        # Second call:
        insertMoreInfo = ('MyTable', 'col1, col2', [['blue', 30.1],['green',40.99]])
        res = self.fileConverter.prepareMySQLRow(insertMoreInfo)
        # Should have insert statement for the prior submission...
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    (['foo', 10],['bar', 20]);", res)
        # ... and the hold-back buffer should have the new submission:
        self.assertEqual('MyTable', self.fileConverter.currOutTable)
        self.assertEqual('col1, col2', self.fileConverter.currInsertSig)
        self.assertEqual([[['blue', 30.1], ['green', 40.99]]], self.fileConverter.currValsArray)
         
        # Call FLUSH to get the held-back values:
        res = self.fileConverter.prepareMySQLRow('FLUSH')
        self.assertEqual("INSERT INTO MyTable (col1, col2) VALUES \n    (['blue', 30.1],['green', 40.99]);", res)
        self.assertIsNone(self.fileConverter.currOutTable)
        self.assertIsNone(self.fileConverter.currInsertSig)
        self.assertEqual([], self.fileConverter.currValsArray)
        
#--------------------------------------------------------------------------------------------------    
    def assertFileContentEquals(self, expected, filePath):
        strFile = StringIO.StringIO(expected)
        with open(filePath, 'r') as fd:
            for fileLine in fd:
                expectedLine = strFile.readline()
                self.assertEqual(expectedLine.strip(), fileLine.strip())
            
            if strFile.readline() != "":
                # expected is longer than what's in the file:
                self.fail("Expected string is longer than content of output file %s" % filePath)
        
    def updateTruthFromFile(self, referenceFileName, actualTruthFileName):
        '''
        Copy actualTruthFileName content to referenceFileName. 
        ReferenceFileName is a ground truth file to which test run
        results are compared. When the code changes so much that editing
        that ground truth file to match the new correct output of the
        test, then accept the test output as the future ground truth.
        :param referenceFileName: File to be overwritten
        :type referenceFileName: String
        :param actualTruthFileName: Source file
        :type actualTruthFileName: String
        '''
        shutil.copyfile(actualTruthFileName, referenceFileName)