def split(fpath, outputDir):
    
    global outputFile, fileCount, lastWeek

    print "Splitting data for %s" % (fpath,)
    f = GzipFile(fpath) if fpath.endswith(".gz") else open(fpath)
    for line in f:
        if line.startswith("2010/0"):
            date = line[:10]
            date = date.replace("/", "")
            hours = line[11:13]
            
            dt = datetime.date(int(date[0:4]), int(date[4:6]), int(date[6:8]))
            
            currentWeek = dt.isocalendar()[1]
            if dt.weekday() == 0 and hours <= "06":
                currentWeek -= 1
            if lastWeek != currentWeek:
                if outputFile:
                    outputFile.close()
                outputFile = open(os.path.join(outputDir, "request.log.%s" % (date,)), "w")
                fileCount += 1
                lastWeek = currentWeek
                print "Changed to week of %s" % (date,)
        
            output = ["-----\n"]
            output.append(line)
            try:
                output.append(f.next())
                output.append(f.next())
                output.append(f.next())
            except StopIteration:
                break
            outputFile.write("".join(output))
    f.close()
示例#2
0
def main(path, verify=False, format='json'):
    code = 0

    try:
        if path.endswith('.pkg.tar.gz'):
            f = GzipFile(path)
        elif path.endswith('.pkg.tar.xz'):
            f = LZMAFile(path)
        else:
            print >> sys.stderr, path, 'does not look like a package file.'
            return 1

        f = TarFile(fileobj=f)
        while True:
            info = f.next()
            if info.name == '.PKGINFO':
                break
        else:
            print >> sys.stderr, path, 'does not contain .PKGINFO'
            return 1

        if verify:
            try:
                f._load()
            except IOError:
                print >> sys.stderr, 'failed to verify', path
                code = 2

        ret = {}
        for line in f.extractfile(info).readlines():
            line = line.strip()
            if not line or line.startswith('#'):
                continue
            if format in ('json',):
                key, value = map(str.strip, line.split('=', 1))
                if key in ret:
                    if isinstance(ret[key], list):
                        ret[key].append(value)
                    else:
                        ret[key] = [ret[key], value]
                else:
                    ret[key] = value
            else:
                print line

        if format in ('json',):
            print ujson.dumps(ret)
    except IOError:
        print >> sys.stderr, path, 'is not a valid package file.'
        return 1
    else:
        return code
示例#3
0
def split(fpath, outputDir):

    global outputFile, fileCount, lastWeek

    print("Splitting data for %s" % (fpath, ))
    f = GzipFile(fpath) if fpath.endswith(".gz") else open(fpath)
    for line in f:
        if line.startswith("2010/0"):
            date = line[:10]
            date = date.replace("/", "")
            hours = line[11:13]

            dt = datetime.date(int(date[0:4]), int(date[4:6]), int(date[6:8]))

            currentWeek = dt.isocalendar()[1]
            if dt.weekday() == 0 and hours <= "06":
                currentWeek -= 1
            if lastWeek != currentWeek:
                if outputFile:
                    outputFile.close()
                outputFile = open(
                    os.path.join(outputDir, "request.log.%s" % (date, )), "w")
                fileCount += 1
                lastWeek = currentWeek
                print("Changed to week of %s" % (date, ))

            output = ["-----\n"]
            output.append(line)
            try:
                output.append(f.next())
                line = f.next()
                if line.startswith("Memory"):
                    line = f.next()
                output.append(line)
                output.append(f.next())
            except StopIteration:
                break
            outputFile.write("".join(output))
    f.close()
示例#4
0
def parseStats(logFilePath, donormlize=True, verbose=False):

    fpath = os.path.expanduser(logFilePath)
    if fpath.endswith(".gz"):
        f = GzipFile(fpath)
    else:
        f = open(fpath)

    # Punt past data
    for line in f:
        if line.startswith("---"):
            break

    f.close()

    entries = []
    for line in f:
        bits = line.split("|")
        if len(bits) > COLUMN_query:
            while bits[COLUMN_query].endswith("+"):
                line = f.next()
                newbits = line.split("|")
                bits[COLUMN_query] = bits[COLUMN_query][:-1] + newbits[
                    COLUMN_query]

            pos = bits[COLUMN_query].find("BEGIN:VCALENDAR")
            if pos != -1:
                bits[COLUMN_query] = bits[COLUMN_query][:pos]

            if donormlize:
                bits[COLUMN_query] = sqlnormalize(bits[COLUMN_query].strip())

            if bits[COLUMN_query] not in (
                    "BEGIN",
                    "COMMIT",
                    "ROLLBACK",
            ) and bits[COLUMN_query].find("pg_catalog") == -1:
                bits = [bit.strip() for bit in bits]
                entries.append(bits)
                if verbose and divmod(len(entries), 1000)[1] == 0:
                    print("%d entries" % (len(entries), ))
                # if float(bits[COLUMN_total_time]) > 1:
                #     print(bits[COLUMN_total_time], bits[COLUMN_query])

    if verbose:
        print("Read %d entries" % (len(entries, )))

    sqlStatementsReport(entries)
def parseStats(logFilePath, donormlize=True, verbose=False):

    fpath = os.path.expanduser(logFilePath)
    if fpath.endswith(".gz"):
        f = GzipFile(fpath)
    else:
        f = open(fpath)

    # Punt past data
    for line in f:
        if line.startswith("---"):
            break

    entries = []
    for line in f:
        bits = line.split("|")
        if len(bits) > COLUMN_query:
            while bits[COLUMN_query].endswith("+"):
                line = f.next()
                newbits = line.split("|")
                bits[COLUMN_query] = bits[COLUMN_query][:-1] + newbits[COLUMN_query]

            pos = bits[COLUMN_query].find("BEGIN:VCALENDAR")
            if pos != -1:
                bits[COLUMN_query] = bits[COLUMN_query][:pos]

            if donormlize:
                bits[COLUMN_query] = sqlnormalize(bits[COLUMN_query].strip())

            if bits[COLUMN_query] not in (
                "BEGIN",
                "COMMIT",
                "ROLLBACK",
            ) and bits[COLUMN_query].find("pg_catalog") == -1:
                bits = [bit.strip() for bit in bits]
                entries.append(bits)
                if verbose and divmod(len(entries), 1000)[1] == 0:
                    print("%d entries" % (len(entries),))
                #if float(bits[COLUMN_total_time]) > 1:
                #    print(bits[COLUMN_total_time], bits[COLUMN_query])

    if verbose:
        print("Read %d entries" % (len(entries,)))

    sqlStatementsReport(entries)
示例#6
0
    #exit()


    #for log_path in glob(input_dir + '/*.*'):
    #if 0:    
        base, ext = splitext(key.name)

        if ext == '.gz':
            fp = GzipFile(output_dir + key.name, 'r')
            name = base
        else: print 'file is not gzipped...'
        
        #print name

        fp.next()
        fp.next()
        input_rows = csv.reader(fp, dialect=csv.excel_tab)
        
        for row in input_rows:

            #print row

            try:
                uri = row[7].split('/')

                
                basemap = uri[1]
                x = int(uri[3])
                image_file = uri[4].split('.')
                y = int(image_file[0])