Пример #1
0
def processFile(filep):
        import log, helpers, settings
        import os
        from ujson import loads
        import gzip

        times = {}
        logger = log.logger('olympics_samples_'+os.path.basename(filep))
        
        try:
            if '.gz' in filep:
                f = gzip.open(filep)
            else:
                f = open(filep)
            
            logger.log( 'finding all records with location for: ' + f.name)
            tot_lines =0
            loc_lines =0
            line = f.readline()
            while line:
                #print line                                                                                               
                rec = loads(line)
                tot_lines += 1
                condition = settings.CheckCondition(rec, settings.keywords)

                if condition:
                    settings.DoSomething(rec,times)
                    loc_lines += 1
                    if (loc_lines%1000==0):
                        logger.log('Count:' + str(loc_lines) + '/' + str(tot_lines))
                        logger.log('Last sample : %s' %(rec['text']))
                line = f.readline()
            
            ret = {'fname':f.name,'tot_lines': tot_lines, 'loc_lines': loc_lines}
            logger.send_final_stats(ret)
        except Exception as e:
            logger.log('Error log: ' + str(e))
        # write results to file
        logger.log('Sending to files now..')
        try:
            helpers.write_day_wise_to_file([times],settings.OUTPUT_DIR)
        except Exception as e:
            logger.log('Error log: ' + str(e))
        logger.log('Done!')    
        return times
Пример #2
0
def processFile(filep):
        import log, parallels, helpers, settings
        import os
        from ujson import loads
        import gzip

        locs = {}
        logger = log.logger('test/AllLocsBigData_'+os.path.basename(filep))
        
        
        # f = open(filep)
        f = gzip.open(filep)
        logger.log( 'finding all records with location for: ' + f.name)
        tot_lines =0
        loc_lines =0
        line = f.readline()
        while line:
            #print line                                                                                               
            rec = loads(line)
            tot_lines += 1
            condition = parallels.bdCheckCondition(rec)
            if condition:
                parallels.bdDoSomethingMemory(rec,locs)
                loc_lines += 1
                if (loc_lines%10000==0):
                    logger.log('Count:' + str(loc_lines) + '/' + str(tot_lines))
            line = f.readline()
        ret = {'fname':f.name,'tot_lines': tot_lines, 'loc_lines': loc_lines}
        logger.send_final_stats(ret)
        try:
            print 'just trying'
        except Exception as e:
            logger.log('Error log: ' + str(e))
        # write results to file
        logger.log('Sending to files now..')
        try:
            helpers.write_day_wise_to_file([locs],settings.OUTPUT_DIR)
        except Exception as e:
            logger.log('Error log: ' + str(e))
        logger.log('Done!')    
        return locs
Пример #3
0
def processFile(filep):
    import log, parallels, helpers, settings
    import os
    from ujson import loads
    import gzip

    locs = {}
    logger = log.logger('test/AllLocsBigData_' + os.path.basename(filep))

    # f = open(filep)
    f = gzip.open(filep)
    logger.log('finding all records with location for: ' + f.name)
    tot_lines = 0
    loc_lines = 0
    line = f.readline()
    while line:
        #print line
        rec = loads(line)
        tot_lines += 1
        condition = parallels.bdCheckCondition(rec)
        if condition:
            parallels.bdDoSomethingMemory(rec, locs)
            loc_lines += 1
            if (loc_lines % 10000 == 0):
                logger.log('Count:' + str(loc_lines) + '/' + str(tot_lines))
        line = f.readline()
    ret = {'fname': f.name, 'tot_lines': tot_lines, 'loc_lines': loc_lines}
    logger.send_final_stats(ret)
    try:
        print 'just trying'
    except Exception as e:
        logger.log('Error log: ' + str(e))
    # write results to file
    logger.log('Sending to files now..')
    try:
        helpers.write_day_wise_to_file([locs], settings.OUTPUT_DIR)
    except Exception as e:
        logger.log('Error log: ' + str(e))
    logger.log('Done!')
    return locs