Пример #1
0
def parse( out, infile, modulestore ):

	logfile = open( out + '/transform.log', 'w')
	outfile = OutputFile( out + '/data', format , options='wb')

	parser = JSONToRelation( InURI( infile ) , outfile, mainTableName='EdxTrackEvent' )
	parser.setParser( EdXTrackLogJSONParser( parser, 'EdxTrackEvent', dbName='Edx', moduleStore = modulestore ) )
	parser.convert()
Пример #2
0
def parse(out, infile, modulestore):

    logfile = open(out + '/transform.log', 'w')
    outfile = OutputFile(out + '/data', format, options='wb')

    parser = JSONToRelation(InURI(infile),
                            outfile,
                            mainTableName='EdxTrackEvent')
    parser.setParser(
        EdXTrackLogJSONParser(parser,
                              'EdxTrackEvent',
                              dbName='Edx',
                              moduleStore=modulestore))
    parser.convert()
Пример #3
0
    # Create an instance of JSONToRelation, taking input from the given file:
    # and pumping output to the given output path:

    if args.targetFormat == 'csv':
        outputFormat = OutputDisposition.OutputFormat.CSV
    elif args.targetFormat == 'sql_dump':
        outputFormat = OutputDisposition.OutputFormat.SQL_INSERT_STATEMENTS
    else:
        outputFormat = OutputDisposition.OutputFormat.SQL_INSERTS_AND_CSV

    outSQLFile = OutputFile(
        outFullPath, outputFormat,
        options='wb')  # overwrite any sql file that's there
    jsonConverter = JSONToRelation(InURI(args.inFilePath),
                                   outSQLFile,
                                   mainTableName='EdxTrackEvent',
                                   logFile=logFile)
    try:
        # Setting useDisplayNameCache to True prevents guaranteed
        # pulling of Modulestore from the backup---and expensive
        # operation. Note that cronRefreshModulestore.sh will
        # cause the cache to be refreshed:

        jsonConverter.setParser(
            EdXTrackLogJSONParser(jsonConverter,
                                  'EdxTrackEvent',
                                  replaceTables=args.dropTables,
                                  dbName='Edx',
                                  useDisplayNameCache=True))
    except Exception as e:
        with open(logFile, 'w') as fd:
Пример #4
0
#    print('logFile: %s' % logFile)

    # Create an instance of JSONToRelation, taking input from the given file:
    # and pumping output to the given output path:

    if args.targetFormat == 'csv':
        outputFormat = OutputDisposition.OutputFormat.CSV
    elif args.targetFormat == 'sql_dump':
        outputFormat = OutputDisposition.OutputFormat.SQL_INSERT_STATEMENTS
    else:
        outputFormat = OutputDisposition.OutputFormat.SQL_INSERTS_AND_CSV

    outSQLFile = OutputFile(outFullPath, outputFormat, options='wb')  # overwrite any sql file that's there
    jsonConverter = JSONToRelation(InURI(args.inFilePath),
                                   outSQLFile,
                                   mainTableName='EdxTrackEvent',
    				               logFile=logFile
                                   )
    try:
        jsonConverter.setParser(EdXTrackLogJSONParser(jsonConverter, 
        						  'EdxTrackEvent', 
        						  replaceTables=args.dropTables, 
        						  dbName='Edx'
        						  ))
    except Exception as e:
        with open(logFile, 'w') as fd:
            fd.write("In json2sql: could not create EdXTrackLogJSONParser: %s" % `e`)
        # Try to delete the .sql file that was created when 
        # the OutputFile instance was made in the JSONToRelation
        # instantiation statement above:
        try:
Пример #5
0
#!/usr/bin/env python

import sys
import os

source_dir = [os.path.join(os.path.dirname(os.path.abspath(__file__)), "../json_to_relation/")]
source_dir.extend(sys.path)
sys.path = source_dir

from json_to_relation import JSONToRelation
from output_disposition import OutputPipe, OutputDisposition
from input_source import InPipe
from edxTrackLogJSONParser import EdXTrackLogJSONParser

if __name__ == "__main__":

    # Create an instance of JSONToRelation, taking input from stdin,
    # and pumping output to stdout. Format output as SQL dump statements.
    jsonConverter = JSONToRelation(InPipe(),
                                   OutputPipe(OutputDisposition.OutputFormat.SQL_INSERT_STATEMENTS),
				   mainTableName='EdxTrackEvent',
				   logFile='/tmp/j2s.log'
                                   )
    jsonConverter.setParser(EdXTrackLogJSONParser(jsonConverter, 'EdxTrackEvent', replaceTables=True, dbName='test'
))
    jsonConverter.convert()
Пример #6
0
    if not os.access(logDir, os.W_OK):
        os.makedirs(logDir)

    logFile = os.path.join(
        logDir,
        'j2s_%s_%s.log' % (os.path.basename(args.inFilePath), fileStamp))

    #    print('xpunge: %s' % args.dropTables)
    #    print('verbose: %s' % args.verbose)
    #    print('destDir: %s' % args.destDir)
    #    print('in=FilePath: %s' % args.inFilePath)
    #    print('outFullPath: %s' % outFullPath)
    #    print('logFile: %s' % logFile)

    # Create an instance of JSONToRelation, taking input from the given file:
    # and pumping output to the given output path:

    jsonConverter = JSONToRelation(
        InURI(args.inFilePath),
        OutputFile(outFullPath,
                   OutputDisposition.OutputFormat.SQL_INSERT_STATEMENTS,
                   options='wb'),  # overwrite any sql file that's there
        mainTableName='EdxTrackEvent',
        logFile=logFile)
    jsonConverter.setParser(
        EdXTrackLogJSONParser(jsonConverter,
                              'EdxTrackEvent',
                              replaceTables=args.dropTables,
                              dbName='Edx'))
    jsonConverter.convert()
Пример #7
0
import sys
import os

source_dir = [
    os.path.join(os.path.dirname(os.path.abspath(__file__)),
                 "../json_to_relation/")
]
source_dir.extend(sys.path)
sys.path = source_dir

from json_to_relation import JSONToRelation
from output_disposition import OutputPipe, OutputDisposition
from input_source import InPipe
from edxTrackLogJSONParser import EdXTrackLogJSONParser

if __name__ == "__main__":

    # Create an instance of JSONToRelation, taking input from stdin,
    # and pumping output to stdout. Format output as SQL dump statements.
    jsonConverter = JSONToRelation(
        InPipe(),
        OutputPipe(OutputDisposition.OutputFormat.SQL_INSERT_STATEMENTS),
        mainTableName='EdxTrackEvent',
        logFile='/tmp/j2s.log')
    jsonConverter.setParser(
        EdXTrackLogJSONParser(jsonConverter,
                              'EdxTrackEvent',
                              replaceTables=True,
                              dbName='test'))
    jsonConverter.convert()