示例#1
0
def reformat(path, day, month, year, time_offset, contents, n_contents, out_file):
    day_existed = False
    for filename in os.listdir(path):
        if '%02d%02d%02d-merged.log' % (year, month, day) == filename:
            print('reading %s' % filename)
            day_existed = True
            with open(filename, 'rt') as in_file:

                for line in in_file:
                    request = parse_line(line)

                    if request['http_request_name'] == 'GET' and request['code'] < 300:

                        try:
                            _, content_id = determine_format_and_content_id(request)
                        except:
                            content_id = None

                        # take content IDs for generating the trace only if they have been found (not None)
                        if content_id is not None:
                            time = request['time'].rpartition('T')[2].rpartition('-')[0]
                            hours, _, time = time.partition(':')
                            minutes, _, seconds = time.partition(':')
                            time = time_offset + int(hours) * 3600 + int(minutes) * 60 + int(seconds)
                            # currently the receiver is constant (one cache scenario), set to 0
                            out_file.write('%d,%d,%d\n' % (time, 0, content_id))

                            # determine weight of content
                            if content_id not in contents or contents[content_id] == 1:
                                is_priority_content = False
                                for priority_content_type in priority_content_types:
                                    if priority_content_type in line:
                                        is_priority_content = True
                                        break

                                weight = 2 if is_priority_content else 1
                                contents[content_id] = weight



                    # clear old records from last-requests dictionary to save memory
                    # do this every 20 seconds because the last 10 seconds are saved
                    #if time_difference(timestamp, request['time']) > 20:
                    #    clear_from_last_requests(request['time'], data)

    return day_existed, contents, n_contents
示例#2
0
def analyze(path, day, month, year, data):
    #timestamp = '%d-%d-%dT00:00:00-02:00' % (year, month, day)

    mp4_versions = {'web360': '0', 'web480': '1', 'web720': '2', 'http200k': '3', 'http400k': '4', 'hls64k': '5', 'hls200k': '6', 'iphone360': '7'}

    for filename in os.listdir(path):
        if '%02d%02d%02d-merged.log' % (year, month, day) == filename:
            print 'reading %s' % filename
            with open(filename, 'rt') as in_file:

                for line in in_file:
                    request = parse_line(line)

                    data['request_names'][request['http_request_name']] += 1

                    if request['http_request_name'] == 'GET' and request['code'] < 300:
                        if request['body_bytes_sent'] == 0:
                            data['zero_bytes'] += 1

                        ip_24 = ''.join(request['ip'][-1::-1].partition('.')[2])[-1::-1]
                        ip_16 = ''.join(ip_24[-1::-1].partition('.')[2])[-1::-1]
                        ip_8 = ''.join(ip_16[-1::-1].partition('.')[2])[-1::-1]

                        data['ip_24_ranges'][ip_24] += 1
                        data['ip_16_ranges'][ip_16] += 1
                        data['ip_8_ranges'][ip_8] += 1

                        byte_size = request['body_bytes_sent']
                        order_of_magnitude = 1
                        while byte_size != 0:
                            byte_size = byte_size / 10
                            order_of_magnitude += 1
                        data['body_bytes_sizes']['10^%d < b < 10^%d' % (order_of_magnitude-1, order_of_magnitude)] += 1

                        format, content_id = determine_format_and_content_id(request)

                        if format is not None:
                            data['content_type'][format] += 1
                            if request['cache_hit_or_miss'] == 'HIT':
                                data['content_type_hits'][format] = True
                                if request['body_bytes_sent'] < data['min_hit_size']:
                                    data['min_hit_size'] = request['body_bytes_sent']

            print_data_dict_compact(data)

    return data