Пример #1
0
def getDeltas(fileOld, fileNew, cfg, directory):
    loadedNew = loadTweets(fileNew,cfg)
    timeList = [entry['created_at'] for entry in loadedNew.values()]
    minTime = min(timeList)
    
    if not cfg['OneTimeDump']:
        loadedOld = {key:item for key, item in loadTweets(fileOld,cfg).iteritems() if item['created_at'] >= minTime}
    else:
        loadedOld = dict()
    
    merged = deepcopy(loadedOld); merged.update(loadedNew)
    
    newKeys = set(loadedNew.keys())
    oldKeys = set(loadedOld.keys())
    
    addedKeys = newKeys.difference(oldKeys)
    removedKeys = oldKeys.difference(newKeys)
    sameKeys = newKeys.intersection(oldKeys)
    updatedKeys = set([entry for entry in sameKeys if makeKey(loadedNew[entry],updateKeys) != makeKey(loadedOld[entry],updateKeys)])
    expDir = 'studies/'+ cfg['OutDir'] + cfg['Method'] + '/'
    
    timeStamp =  GISpy.outTime(datetime.datetime.now())['db']
    wordWeight = getWordWeights(loadedNew,5,expDir,timeStamp)
    meta = getMeta(cfg,expDir,timeStamp)
    fileLocs = [fileNew,wordWeight,meta]
    
    addedLoc = removedLoc = updatedLoc = 'null'
    
    
    
    if len(addedKeys) >= 1:
        if cfg['OneTimeDump']:
            descriptor = 'Dumped'
            operation = 'dump'
        else:
            descriptor = 'Added'
            operation = 'add'
        addedData = {key:value for key,value in merged.iteritems() if key in addedKeys}
        addExtra(addedData,{'operation':operation,'operationTime':timeStamp})
        addedLoc = writeCSV(addedData,expDir,descriptor,'')
        fileLocs.append(addedLoc)
    if len(removedKeys) >= 1:
        removedData = {key:value for key,value in merged.iteritems() if key in removedKeys}
        addExtra(removedData,{'operation':'remove','operationTime':timeStamp})
        removedLoc = writeCSV(removedData,expDir,"Removed",'')
        fileLocs.append(removedLoc)
    if len(updatedKeys) >= 1:
        updatedData = {key:value for key,value in merged.iteritems() if key in updatedKeys}
        addExtra(updatedData,{'operation':'updated','operationTime':timeStamp})
        updatedLoc = writeCSV(updatedData,expDir,"Updated",'')
        fileLocs.append(updatedLoc)
    
    
    GISpy.zipData(fileLocs,'dbFiles/'+directory,'DBFeed ',timeStamp,cfg)
    return {'wordWeight':wordWeight,'meta':meta,'added':addedLoc,'removed':removedLoc,'updated':updatedLoc}
Пример #2
0
def getDeltas(fileOld, fileNew, cfg, directory):
    loadedNew = loadTweets(fileNew, cfg)
    timeList = [entry['created_at'] for entry in loadedNew.values()]
    minTime = min(timeList)

    if not cfg['OneTimeDump']:
        loadedOld = {
            key: item
            for key, item in loadTweets(fileOld, cfg).iteritems()
            if item['created_at'] >= minTime
        }
    else:
        loadedOld = dict()

    merged = deepcopy(loadedOld)
    merged.update(loadedNew)

    newKeys = set(loadedNew.keys())
    oldKeys = set(loadedOld.keys())

    addedKeys = newKeys.difference(oldKeys)
    removedKeys = oldKeys.difference(newKeys)
    sameKeys = newKeys.intersection(oldKeys)
    updatedKeys = set([
        entry for entry in sameKeys
        if makeKey(loadedNew[entry], updateKeys) != makeKey(
            loadedOld[entry], updateKeys)
    ])
    expDir = 'studies/' + cfg['OutDir'] + cfg['Method'] + '/'

    timeStamp = GISpy.outTime(datetime.datetime.now())['db']
    wordWeight = getWordWeights(loadedNew, 5, expDir, timeStamp)
    meta = getMeta(cfg, expDir, timeStamp)
    fileLocs = [fileNew, wordWeight, meta]

    addedLoc = removedLoc = updatedLoc = 'null'

    if len(addedKeys) >= 1:
        if cfg['OneTimeDump']:
            descriptor = 'Dumped'
            operation = 'dump'
        else:
            descriptor = 'Added'
            operation = 'add'
        addedData = {
            key: value
            for key, value in merged.iteritems() if key in addedKeys
        }
        addExtra(addedData, {
            'operation': operation,
            'operationTime': timeStamp
        })
        addedLoc = writeCSV(addedData, expDir, descriptor, '')
        fileLocs.append(addedLoc)
    if len(removedKeys) >= 1:
        removedData = {
            key: value
            for key, value in merged.iteritems() if key in removedKeys
        }
        addExtra(removedData, {
            'operation': 'remove',
            'operationTime': timeStamp
        })
        removedLoc = writeCSV(removedData, expDir, "Removed", '')
        fileLocs.append(removedLoc)
    if len(updatedKeys) >= 1:
        updatedData = {
            key: value
            for key, value in merged.iteritems() if key in updatedKeys
        }
        addExtra(updatedData, {
            'operation': 'updated',
            'operationTime': timeStamp
        })
        updatedLoc = writeCSV(updatedData, expDir, "Updated", '')
        fileLocs.append(updatedLoc)

    GISpy.zipData(fileLocs, 'dbFiles/' + directory, 'DBFeed ', timeStamp, cfg)
    return {
        'wordWeight': wordWeight,
        'meta': meta,
        'added': addedLoc,
        'removed': removedLoc,
        'updated': updatedLoc
    }