def loadData(fpath, dbFunc): if not(os.path.exists(fpath)): raise NameError('Path not exist') allDone = getAllDone() if allDone.count(fpath): print 'Already Done!' return print 'Loading File "%s"...'%os.path.basename(fpath) subjCont = csvAnalyzer.analyzeFile(fpath) print 'Inserting To DB...' succ = dbFunc(subjCont) if succ: logFile(fpath) print 'Done!' else: print 'Error, please see logs...'
def __init__(self): # 1. Target self.db_name = 'postgres' self.user_name = 'postgres' self.password = '******' self.schema_name = 'DB_Schema_Name' self.tableName = 'DB_Table_Name' # Target Table name, will create it from data source files columns if not exists # 2. Source self.dataDir = '~/Downloads/Kaggle/Data' # folder with files to upload to db - search recursive in this folder for # files that return true for self.filesFormatFunc condition. it can load multipal files to same table. when it fails # next time it starts, its skips the already success files uploaded self.filesFormatFunc = lambda f : re.match('.*Test_.*\.txt') <> None # files filter to upload from dataDir table # 3. Other Tunings and configurations of data parsing and manipulations: self.upload_bulk_size = 1000 # Bulk size for uploading to DB, we can use the same number also for reading file # Put None to cancel bulking and load all the file as 1 bulk self.dataAnalyzerFunLazy = lambda f, h: csvAnalyzer.analyzeFile(f, header = h, delimiter = ',', #delimeter fot csv manipulationFunc = None, #dictionary for functions to manipulate fields in the csv. example 1 additionalConstFields = None, #additionalFields to add fields for table. see example 2 topN = self.upload_bulk_size) # file reading bulk size