def getDeltas(fileOld, fileNew, cfg, directory): loadedNew = loadTweets(fileNew,cfg) timeList = [entry['created_at'] for entry in loadedNew.values()] minTime = min(timeList) if not cfg['OneTimeDump']: loadedOld = {key:item for key, item in loadTweets(fileOld,cfg).iteritems() if item['created_at'] >= minTime} else: loadedOld = dict() merged = deepcopy(loadedOld); merged.update(loadedNew) newKeys = set(loadedNew.keys()) oldKeys = set(loadedOld.keys()) addedKeys = newKeys.difference(oldKeys) removedKeys = oldKeys.difference(newKeys) sameKeys = newKeys.intersection(oldKeys) updatedKeys = set([entry for entry in sameKeys if makeKey(loadedNew[entry],updateKeys) != makeKey(loadedOld[entry],updateKeys)]) expDir = 'studies/'+ cfg['OutDir'] + cfg['Method'] + '/' timeStamp = GISpy.outTime(datetime.datetime.now())['db'] wordWeight = getWordWeights(loadedNew,5,expDir,timeStamp) meta = getMeta(cfg,expDir,timeStamp) fileLocs = [fileNew,wordWeight,meta] addedLoc = removedLoc = updatedLoc = 'null' if len(addedKeys) >= 1: if cfg['OneTimeDump']: descriptor = 'Dumped' operation = 'dump' else: descriptor = 'Added' operation = 'add' addedData = {key:value for key,value in merged.iteritems() if key in addedKeys} addExtra(addedData,{'operation':operation,'operationTime':timeStamp}) addedLoc = writeCSV(addedData,expDir,descriptor,'') fileLocs.append(addedLoc) if len(removedKeys) >= 1: removedData = {key:value for key,value in merged.iteritems() if key in removedKeys} addExtra(removedData,{'operation':'remove','operationTime':timeStamp}) removedLoc = writeCSV(removedData,expDir,"Removed",'') fileLocs.append(removedLoc) if len(updatedKeys) >= 1: updatedData = {key:value for key,value in merged.iteritems() if key in updatedKeys} addExtra(updatedData,{'operation':'updated','operationTime':timeStamp}) updatedLoc = writeCSV(updatedData,expDir,"Updated",'') fileLocs.append(updatedLoc) GISpy.zipData(fileLocs,'dbFiles/'+directory,'DBFeed ',timeStamp,cfg) return {'wordWeight':wordWeight,'meta':meta,'added':addedLoc,'removed':removedLoc,'updated':updatedLoc}
def getDeltas(fileOld, fileNew, cfg, directory): loadedNew = loadTweets(fileNew, cfg) timeList = [entry['created_at'] for entry in loadedNew.values()] minTime = min(timeList) if not cfg['OneTimeDump']: loadedOld = { key: item for key, item in loadTweets(fileOld, cfg).iteritems() if item['created_at'] >= minTime } else: loadedOld = dict() merged = deepcopy(loadedOld) merged.update(loadedNew) newKeys = set(loadedNew.keys()) oldKeys = set(loadedOld.keys()) addedKeys = newKeys.difference(oldKeys) removedKeys = oldKeys.difference(newKeys) sameKeys = newKeys.intersection(oldKeys) updatedKeys = set([ entry for entry in sameKeys if makeKey(loadedNew[entry], updateKeys) != makeKey( loadedOld[entry], updateKeys) ]) expDir = 'studies/' + cfg['OutDir'] + cfg['Method'] + '/' timeStamp = GISpy.outTime(datetime.datetime.now())['db'] wordWeight = getWordWeights(loadedNew, 5, expDir, timeStamp) meta = getMeta(cfg, expDir, timeStamp) fileLocs = [fileNew, wordWeight, meta] addedLoc = removedLoc = updatedLoc = 'null' if len(addedKeys) >= 1: if cfg['OneTimeDump']: descriptor = 'Dumped' operation = 'dump' else: descriptor = 'Added' operation = 'add' addedData = { key: value for key, value in merged.iteritems() if key in addedKeys } addExtra(addedData, { 'operation': operation, 'operationTime': timeStamp }) addedLoc = writeCSV(addedData, expDir, descriptor, '') fileLocs.append(addedLoc) if len(removedKeys) >= 1: removedData = { key: value for key, value in merged.iteritems() if key in removedKeys } addExtra(removedData, { 'operation': 'remove', 'operationTime': timeStamp }) removedLoc = writeCSV(removedData, expDir, "Removed", '') fileLocs.append(removedLoc) if len(updatedKeys) >= 1: updatedData = { key: value for key, value in merged.iteritems() if key in updatedKeys } addExtra(updatedData, { 'operation': 'updated', 'operationTime': timeStamp }) updatedLoc = writeCSV(updatedData, expDir, "Updated", '') fileLocs.append(updatedLoc) GISpy.zipData(fileLocs, 'dbFiles/' + directory, 'DBFeed ', timeStamp, cfg) return { 'wordWeight': wordWeight, 'meta': meta, 'added': addedLoc, 'removed': removedLoc, 'updated': updatedLoc }