Python sizeof_fmt примеры, common.sizeof_fmt Python примеры использования

Пример #1

0

Показать файл

def do(file_name, output_dir='output', debug=True):
    dir_fb2 = os.path.basename(file_name)
    dir_im = os.path.join(output_dir, dir_fb2)
    os.makedirs(dir_im, exist_ok=True)
    debug and print(dir_im + ':')

    total_image_size = 0

    with open(file_name, encoding='utf8') as fb2:
        pattern = re.compile(
            '<binary ((content-type=".+?") (id=".+?")'
            '|(id=".+?") (content-type=".+?")) *?>(.+?)</binary>', re.DOTALL)

        find_content_type = re.compile('content-type="(.+?)"')
        find_id = re.compile('id="(.+?)"')

        for i, binary in enumerate(pattern.findall(fb2.read()), 1):
            try:
                im_id, content_type, im_base64 = None, None, None

                for part in binary:
                    if not part:
                        continue

                    match_id = find_id.search(part)
                    if im_id is None and match_id is not None:
                        im_id = match_id.group(1)

                    match_content_type = find_content_type.search(part)
                    if content_type is None and match_content_type is not None:
                        content_type = match_content_type.group(1)

                    if match_id is None and match_content_type is None:
                        im_base64 = part

                im_file_name = get_file_name_from_binary(im_id, content_type)
                im_file_name = os.path.join(dir_im, im_file_name)

                im_data = base64.b64decode(im_base64.encode())

                count_bytes = len(im_data)
                total_image_size += count_bytes

                with open(im_file_name, mode='wb') as f:
                    f.write(im_data)

                im = Image.open(io.BytesIO(im_data))
                debug and print('    {}. {} {} format={} size={}'.format(
                    i, im_id, sizeof_fmt(count_bytes), im.format, im.size))

            except:
                import traceback
                traceback.print_exc()

        file_size = os.path.getsize(file_name)
        debug and print()
        debug and print('fb2 file size =', sizeof_fmt(file_size))
        debug and print('total image size = {} ({:.2f}%)'.format(
            sizeof_fmt(total_image_size), total_image_size / file_size * 100))

Пример #2

0

Показать файл

def do(file_name, output_dir='output', debug=True):
    dir_fb2 = os.path.basename(file_name)
    dir_im = os.path.join(output_dir, dir_fb2)
    if not os.path.exists(dir_im):
        os.makedirs(dir_im)
    debug and print(dir_im + ':')

    total_image_size = 0
    number = 1

    tree = ET.parse(file_name)
    root = tree.getroot()

    for child in root:
        tag = child.tag
        if "}" in tag:
            tag = tag[tag.index('}') + 1:]

        if tag != 'binary':
            continue

        try:
            im_id = child.attrib['id']
            content_type = child.attrib['content-type']

            im_file_name = get_file_name_from_binary(im_id, content_type)
            im_file_name = os.path.join(dir_im, im_file_name)

            im_data = base64.b64decode(child.text.encode())

            count_bytes = len(im_data)
            total_image_size += count_bytes

            with open(im_file_name, mode='wb') as f:
                f.write(im_data)

            im = Image.open(io.BytesIO(im_data))
            debug and print('    {}. {} {} format={} size={}'.format(
                number, im_id, sizeof_fmt(count_bytes), im.format, im.size))

            number += 1

        except:
            import traceback
            traceback.print_exc()

    file_size = os.path.getsize(file_name)
    debug and print()
    debug and print('fb2 file size =', sizeof_fmt(file_size))
    debug and print('total image size = {} ({:.2f}%)'.format(
        sizeof_fmt(total_image_size), total_image_size / file_size * 100))

Пример #3

0

Показать файл

        def endElement(self, name):
            if name != 'binary':
                return

            data = PARSE_DATA['last_tag_data']

            try:
                im_id = PARSE_DATA['last_tag_attrs']['id']
                content_type = PARSE_DATA['last_tag_attrs']['content-type']

                im_file_name = get_file_name_from_binary(im_id, content_type)
                im_file_name = os.path.join(dir_im, im_file_name)

                im_data = base64.b64decode(data.encode())

                count_bytes = len(im_data)
                PARSE_DATA['total_image_size'] += count_bytes

                with open(im_file_name, mode='wb') as f:
                    f.write(im_data)

                im = Image.open(io.BytesIO(im_data))
                debug and print('    {}. {} {} format={} size={}'.format(
                    PARSE_DATA['number'], im_id, sizeof_fmt(count_bytes),
                    im.format, im.size))

                PARSE_DATA['number'] += 1

            except:
                import traceback
                traceback.print_exc()

Пример #4

0

Показать файл

Файл: get_dir_total_size__using__os_walk.py Проект: rikkitikkitavvii/SimplePyScripts

def get_dir_total_size(dir_name: str) -> (int, str):
    total_size = 0

    for root, dirs, files in os.walk(dir_name):
        total_size += sum(getsize(join(root, name)) for name in files)

    return total_size, sizeof_fmt(total_size)

Пример #5

0

Показать файл

def main(argv):
    args = read_parameter(argv)
    
    if args.info:
        logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
    if args.debug:
        logging.basicConfig(level=logging.DEBUG, format="%(levelname)s: %(message)s")
    else:
        logging.basicConfig(level=logging.WARNING, format="%(message)s")
    
    logging.info('Input folder: %s' % args.folder)
    
    tar_file = create_tar(args.folder, args.compress)
    logging.info('Created TAR file: %s' % tar_file)
    
    size = get_file_size(tar_file)
    logging.info('File size: %s (%i bytes)' % (sizeof_fmt(size), size))
    
    tree_hash_hex = get_tree_hash_of_file(tar_file)
    logging.info('Hash (SHA-256 treehash): %s' % tree_hash_hex)
    
    description = "files from %s" % re.sub(r'[^\x00-\x7F]+','', args.folder)    # remove non ASCII chars
    archive_id = upload_to_glacier(tar_file, size, description, args.vault, tree_hash_hex)
    
    delete_temp_file(tar_file)
    logging.info('Removed temporary file')
    
    now = datetime.datetime.now()
    print "%s\t%s\t%s\t%s\t%s" % (now, args.folder, args.vault, archive_id, tree_hash_hex)

Пример #6

0

Показать файл

def get_dir_total_size(dir_name: str,
                       ignore_permission_error=True) -> (int, str):
    def _get_sub_size(root_path: str) -> int:
        if isfile(root_path):
            return getsize(root_path)

        total_size = 0

        try:
            for path in listdir(root_path):
                abs_path = join(root_path, path)

                if isfile(abs_path):
                    size = getsize(abs_path)
                else:
                    size = _get_sub_size(abs_path)

                total_size += size

        except Exception as e:
            if type(e) is PermissionError:
                if not ignore_permission_error:
                    print('Error: "{}"'.format(e))
            else:
                print('Path: "{}", error: "{}"'.format(root_path, e))

        return total_size

    total_size = _get_sub_size(dir_name)
    return total_size, sizeof_fmt(total_size)

Пример #7

0

Показать файл

Файл: fb2_pictures__using_bs4.py Проект: ev-ev/SimplePyScripts

def do(file_name, output_dir='output', debug=True):
    dir_fb2 = os.path.basename(file_name)
    dir_im = os.path.join(output_dir, dir_fb2)
    if not os.path.exists(dir_im):
        os.makedirs(dir_im)
    debug and print(dir_im + ':')

    total_image_size = 0

    with open(file_name, 'rb') as fb2:
        root = BeautifulSoup(fb2, 'html.parser')

        binaries = root.select("binary")
        for i, binary in enumerate(binaries, 1):
            try:
                im_id = binary.attrs['id']
                content_type = binary.attrs['content-type']

                im_file_name = get_file_name_from_binary(im_id, content_type)
                im_file_name = os.path.join(dir_im, im_file_name)

                im_data = base64.b64decode(binary.text.encode())

                count_bytes = len(im_data)
                total_image_size += count_bytes

                with open(im_file_name, mode='wb') as f:
                    f.write(im_data)

                im = Image.open(io.BytesIO(im_data))
                debug and print('    {}. {} {} format={} size={}'.format(
                    i, im_id, sizeof_fmt(count_bytes), im.format, im.size
                ))

            except:
                import traceback
                traceback.print_exc()

    file_size = os.path.getsize(file_name)
    debug and print()
    debug and print('fb2 file size =', sizeof_fmt(file_size))
    debug and print('total image size = {} ({:.2f}%)'.format(
        sizeof_fmt(total_image_size), total_image_size / file_size * 100
    ))

Пример #8

0

Показать файл

def print_inventory(inventory):
    print 'Date:', inventory['InventoryDate']
    for archive in inventory['ArchiveList']:
        print '-'
        print 'ID:            ', archive['ArchiveId']
        print 'Description:   ', archive['ArchiveDescription']
        print 'Creation date: ', archive['CreationDate']
        print 'Size:          ', sizeof_fmt(
            archive['Size']), '(%i)' % archive['Size']
        print 'SHA256TreeHash:', archive['SHA256TreeHash']

Пример #9

0

Показать файл

def do(file_name, output_dir='output', debug=True):
    dir_fb2 = os.path.basename(file_name)
    dir_im = os.path.join(output_dir, dir_fb2)
    os.makedirs(dir_im, exist_ok=True)
    debug and print(dir_im + ':')

    total_image_size = 0

    with open(file_name, 'rb') as fb2:
        tree = etree.XML(fb2.read())

        binaries = tree.xpath("//*[local-name()='binary']")
        for i, binary in enumerate(binaries, 1):
            try:
                im_id = binary.attrib['id']
                content_type = binary.attrib['content-type']

                im_file_name = get_file_name_from_binary(im_id, content_type)
                im_file_name = os.path.join(dir_im, im_file_name)

                im_data = base64.b64decode(binary.text.encode())

                count_bytes = len(im_data)
                total_image_size += count_bytes

                with open(im_file_name, mode='wb') as f:
                    f.write(im_data)

                im = Image.open(io.BytesIO(im_data))
                debug and print('    {}. {} {} format={} size={}'.format(
                    i, im_id, sizeof_fmt(count_bytes), im.format, im.size
                ))

            except:
                import traceback
                traceback.print_exc()

    file_size = os.path.getsize(file_name)
    debug and print()
    debug and print('fb2 file size =', sizeof_fmt(file_size))
    debug and print('total image size = {} ({:.2f}%)'.format(
        sizeof_fmt(total_image_size), total_image_size / file_size * 100
    ))

Пример #10

0

Показать файл

Файл: get_dir_total_size__using__pathlib_Path_glob.py Проект: rikkitikkitavvii/SimplePyScripts

def get_dir_total_size(dir_name: str) -> (int, str):
    total_size = 0

    # for file_name in pathlib.Path(dir_name).rglob('*'):
    # OR:
    for file_name in pathlib.Path(dir_name).glob('**/*'):
        if file_name.is_file():
            total_size += file_name.stat().st_size

    return total_size, sizeof_fmt(total_size)

Пример #11

0

Показать файл

def get_dir_total_size(dir_name: str) -> (int, str):
    total_size = 0

    for file_name in iglob(escape(dir_name) + '/**', recursive=True):
        try:
            if os.path.isfile(file_name):
                total_size += os.path.getsize(file_name)

        except Exception as e:
            print('File: "{}", error: "{}"'.format(file_name, e))

    return total_size, sizeof_fmt(total_size)

Пример #12

0

Показать файл

Файл: main.py Проект: Jatin7385/SimplePyScripts

def get_image_info(file_name__or__bytes__or__bytes_io, pretty_json_str=False):
    data = file_name__or__bytes__or__bytes_io
    type_data = type(data)

    # File name
    if type_data == str:
        with open(data, mode='rb') as f:
            data = f.read()

    if type(data) == bytes:
        import io
        data = io.BytesIO(data)

    length = len(data.getvalue())
    exif = get_exif_tags(data)

    from PIL import Image
    img = Image.open(data)

    # Save order
    from collections import OrderedDict
    info = OrderedDict()
    info['length'] = OrderedDict()
    info['length']['value'] = length
    info['length']['text'] = sizeof_fmt(length)

    info['format'] = img.format
    info['mode'] = img.mode
    info['channels'] = len(img.getbands())
    info['bit_color'] = {
        '1': 1,
        'L': 8,
        'P': 8,
        'RGB': 24,
        'RGBA': 32,
        'CMYK': 32,
        'YCbCr': 24,
        'I': 32,
        'F': 32
    }[img.mode]

    info['size'] = OrderedDict()
    info['size']['width'] = img.width
    info['size']['height'] = img.height

    info['exif'] = exif

    if pretty_json_str:
        import json
        info = json.dumps(info, indent=4, ensure_ascii=False)

    return info

Пример #13

0

Показать файл

def main(argv):
    args = read_parameter(argv)
    
    if args.info:
        logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
    if args.debug:
        logging.basicConfig(level=logging.DEBUG, format="%(levelname)s: %(message)s")
    else:
        logging.basicConfig(level=logging.WARNING, format="%(message)s")
    
    logging.info('Archive ID: %s' % args.archive_id)
    
    glacier_client = boto3.client('glacier')
    job_id, tree_hash_hex = start_retrieval_job(glacier_client, args.vault, args.archive_id)
    ## 190 MB job
    #job_id = 'p1Uv1IN7BQaveZLr843hheHQ1rrF47k37HscMl038m_TPtKMlyWFuXGTVc4A_o2Gknqp0wqCFA0sfLjaLHineXHkkBgb'
    #tree_hash_hex = '37436496846009fd0c40fac72db10ff61457074ee4af730ef5a3abb6a06a367b'
    ## 3 MB job
    # job_id = 'hwz0r-BfGXJ69ks5iC6PPA4AnAEVpd222G1mGOUMaG1OuP0qO8oy_iqq7mQxwOzL7Qpz5FlPhd6eVF4Tx0MMdtM4X5be'
    #tree_hash_hex = '3b0a8b676f33708ef577838a68cf1a1c591c981af1f9d03a50a568af79c4965d'
    logging.info('Job is created: %s' % job_id)    
    
    size = monitor_job(glacier_client, args.vault, job_id)
    logging.info('Job is completed, size: %s (%i)' % (sizeof_fmt(size), size))
    
    tar_file_name = download_archive(glacier_client, args.vault, job_id, size, tree_hash_hex)
    logging.info('Downloaded TAR file: %s (%s)' % (tar_file_name, sizeof_fmt(size)))
    
    unpack_tar_file(tar_file_name, args.folder)
    logging.info('Unpacket TAR file to: %s', args.folder)
    
    delete_temp_file(tar_file_name)
    logging.info('Removed temporary TAR file')
    
    now = datetime.datetime.now()
    print "%s\t%s\t%s\t%s\t%s" % (now, args.folder, args.vault, archive_id, tree_hash_hex)

Пример #14

0

Показать файл

Файл: main.py Проект: Jatin7385/SimplePyScripts

def get_file_size():
    print(request.files)

    # check if the post request has the file part
    if 'file' not in request.files:
        return redirect('/')

    length = 0

    file = request.files['file']
    if file:
        data = file.stream.read()
        length = len(data)

    return jsonify({'length': length, 'length_human': sizeof_fmt(length)})

Пример #15

0

Показать файл

def steamUsers(pbar=False):
    logging = common.setupLogging()
    try:
        logging.info("Running Steam Users Online")

        client = MongoClient(host=config.mongodb_ip, port=config.mongodb_port)

        db = client['steam']
        collection = db['steamusers']

        collection.create_index("epochint", unique=True)
        collection.create_index("date", unique=True)

        # pull Steam online users over the last 24 hours
        # https://store.steampowered.com/stats/

        r = requests.get("https://store.steampowered.com/stats/userdata.json")
        if (r.ok):
            data = r.json()[0]['data']

            if (pbar):
                bar = progressbar.ProgressBar(max_value=len(data)).start()

            for i, users in enumerate(data):
                if (pbar):
                    bar.update(i + 1)
                # convert Epoch seconds to UTC time
                # https://stackoverflow.com/questions/1697815/how-do-you-convert-a-python-time-struct-time-object-into-a-datetime-object
                conv_time = datetime.datetime.fromtimestamp(
                    time.mktime(time.gmtime(int(users[0]) / 1000)))
                #update_one will keep whatever information already exists
                collection.update_one({'epochint': int(users[0])}, {
                    '$set': {
                        'numberonlineusers': int(users[1]),
                        'date': conv_time
                    }
                },
                                      upsert=True)
            if (pbar):
                bar.finish()
            logging.info("Finished downloading Steam users online.")
            logging.info("Downloaded: " + common.sizeof_fmt(len(r.content)))
            common.writeBandwidth(db, len(r.content))
        else:
            logging.error("status code: " + str(r.status_code))
    except Exception as e:
        logging.error(str(e))
        time.sleep(1)

Пример #16

0

Показать файл

def downloadAllAppIDs(pbar=False):
	logging = common.setupLogging()
	try:
		logging.info("Downloading All AppIDs")

		# downloads a list of every appid and name from the API
		# and stores in MongoDB collection

		client = MongoClient(host=config.mongodb_ip, port=config.mongodb_port)
		db = client['steam']
		collection = db['apps']
	
		r = requests.get("https://api.steampowered.com/ISteamApps/GetAppList/v0002/")

		if (r.ok):
			data = r.json()

			# create an index for appid, this vastly improves performance
			collection.create_index("appid", unique=True)

			if (pbar):
				bar = progressbar.ProgressBar(max_value=len(data['applist']['apps'])).start()

			requests_list = []
			for i,app in enumerate(data['applist']['apps']):
				if (pbar):
					bar.update(i+1)
				#UpdateOne will keep whatever information already exists
				requests_list.append(UpdateOne({'appid': int(app['appid'])}, {'$set': app}, upsert=True))
				# do bulk writes in batches, instead of one at a time
				if (i % 1000 == 0 or i+1 == len(data['applist']['apps'])):
					try:
						collection.bulk_write(requests_list)
						requests_list = []
					except BulkWriteError as bwe:
						logging.error(bwe)
			if (pbar):
				bar.finish()
			logging.info("Finished downloading AppIDs.")
			logging.info("Downloaded: " + common.sizeof_fmt(len(r.content)))
			common.writeBandwidth(db, len(r.content))
		else:
			logging.error("status code: " + str(r.status_code))
	except Exception as e:
		logging.error(str(e))
		time.sleep(1)

Пример #17

0

Показать файл

Файл: pairclient.py Проект: crizCraig/socket_test

def run():
    total_size = 0

    start = time.time()
    last = start
    last_window = start

    index = 0

    socket = setup()

    while True:
        try:
            start_recv = time.time()
            data = socket.recv()
            print('recv took %rs' % (time.time() - start_recv))
            size = sys.getsizeof(data)
            nparray = np.frombuffer(data)
            print('nparray shape', nparray.shape)
            # del data
            total_size += size
            socket.send(b"client message to server %d" % index)
        except zmq.error.Again:
            print('Waiting for server')
            socket = setup(socket)

        index += 1
        now = time.time()

        if now - last > 1:
            print("bandwidth last 5 seconds is %dMB/s" %
                  (total_size / (now - start) // 10**6))
            print("bandwidth last 5 seconds is %s/s" %
                  common.sizeof_fmt(total_size / (now - start)))
            if now - last_window > 5:
                total_size = 0
                start = now
                last_window = now
            last = now

Пример #18

0

Показать файл

Файл: torrent_list__with_filter.py Проект: rikkitikkitavvii/SimplePyScripts

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

__author__ = 'ipetrash'


# SOURCE: https://github.com/qbittorrent/qbittorrent/wiki/WebUI-API-Documentation#get-torrent-list


from common import get_client, sizeof_fmt


qb = get_client()
torrents = qb.torrents(filter='downloading')
total_size = 0

for i, torrent in enumerate(torrents, 1):
    torrent_size = torrent['total_size']
    total_size += torrent_size

    print('{:<3} {} ({})'.format(i, torrent['name'], sizeof_fmt(torrent_size)))

print()
print('Total torrents: {}, total size: {} ({} bytes)'.format(len(torrents), sizeof_fmt(total_size), total_size))

Пример #19

0

Показать файл

socket = context.socket(zmq.SUB)

print("Collecting updates from weather server...")
socket.connect("tcp://localhost:%s" % port)

if len(sys.argv) > 2:
    socket.connect("tcp://localhost:%s" % port1)

# Subscribe to zipcode, default is NYC, 10001
topicfilter = b"0"
socket.setsockopt(zmq.SUBSCRIBE, topicfilter)

total_size = 0
start = time.time()
last = start
for update_nbr in range(10):
    string = socket.recv()
    topic, index, data = string.split()
    size = sys.getsizeof(data)
    total_size += size
    print('received data size of %r index %r' % (size, index))
    now = time.time()
    del data
    del string

    if now - last > 1:
        print("bandwidth is %dMB/s" % (total_size / (now - start) // 10**6))
        print("bandwidth is %s/s" % common.sizeof_fmt(total_size /
                                                      (now - start)))
        last = now

Пример #20

0

Показать файл

Файл: get_top5_torrent_by_size__use_api_sort_limit.py Проект: rikkitikkitavvii/SimplePyScripts

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

__author__ = 'ipetrash'


from common import sizeof_fmt, get_client


qb = get_client()

torrents_max_top5 = qb.torrents(sort='size', reverse='true', limit='5')
torrents_min_top5 = qb.torrents(sort='size', limit='5')

print('Max top5:')
for i, torrent in enumerate(torrents_max_top5, 1):
    print('    {}. {} ({})'.format(i, torrent['name'], sizeof_fmt(torrent['total_size'])))

print()

print('Min top5:')
for i, torrent in enumerate(torrents_min_top5, 1):
    print('    {}. {} ({})'.format(i, torrent['name'], sizeof_fmt(torrent['total_size'])))

Пример #21

0

Показать файл

Файл: plot.py Проект: IMCG/eurosys21-artifacts

def plot(data=None, output=None):
  WORKDIR = os.getcwd()
  RESULTSDIR = data
  RESULTEXT = '.csv'
  IMAGESTAT = 'imagestats'
  IMAGE_SIZE_KEY = 'image_size'
  NUMSYMS_KEY = 'number_symbols'
  GROUP_BAR_WIDTH = .8
  DEFAULT = '_'

  files = []
  apps = []
  stats = {}
  throughput_max = 0 # maximum observed rx mpps
  bar_colors = {
    'linux-dpdk-vhost-user': '******',
    'linux-dpdk-vhost-net': '#000000',
    'unikraft-vhost-user': '******',
    'unikraft-vhost-net': '#8000CA'
  }
  markers = {
    'linux-dpdk-vhost-user': '******',
    'linux-dpdk-vhost-net': ',',
    'unikraft-vhost-user': '******',
    'unikraft-vhost-net': '4'
  }

  labels = {
    'linux-dpdk-vhost-user': '******',
    'linux-dpdk-vhost-net': 'Linux DPDK with vhost-net',
    'unikraft-vhost-user': '******',
    'unikraft-vhost-net': 'Rhea with vhost-net'
  }

  for f in os.listdir(RESULTSDIR):
    if f.endswith(RESULTEXT):
      index = f.replace(RESULTEXT,'')
      files.append(f)

      unikernel = index

      with open(os.path.join(RESULTSDIR, f), 'r') as csvfile:
        csvdata = csv.reader(csvfile, delimiter="\t")
        
        next(csvdata) # skip header

        for row in csvdata:
          if unikernel not in stats:
            stats[unikernel] = {}
          
          throughput = float(row[1]) * KBYTES * KBYTES
          stats[unikernel][str(row[0])] = throughput

          if throughput > throughput_max:
            throughput_max = throughput

  # General style
  common_style(plt)

  throughput_max += KBYTES * KBYTES * 1 # add "margin" above tallest bar

  # Setup matplotlib axis
  fig = plt.figure(figsize=(8, 4))
  renderer = fig.canvas.get_renderer()

  # image size axis
  ax1 = fig.add_subplot(1,1,1)
  ax1.set_ylabel("Throughout (Mp/s)")
  ax1.set_xlabel("Packet Size (Bytes)")
  ax1.grid(which='major', axis='y', linestyle=':', alpha=0.5, zorder=0)
  ax1_yticks = np.arange(0, throughput_max, step=KBYTES * KBYTES * 2)
  ax1.set_yticks(ax1_yticks, minor=False)
  ax1.set_yticklabels([sizeof_fmt(ytick, suffix='') for ytick in ax1_yticks])
  ax1.set_ylim(0, throughput_max)

  # Plot coordinates
  xlabels = list(stats[list(stats.keys())[0]].keys())

  # Adjust margining
  fig.subplots_adjust(bottom=.15) #, top=1)

  for unikernel in stats.keys():
    ax1.plot(list(stats[unikernel].keys()), list(stats[unikernel].values()),
      marker=markers[unikernel],
      label=labels[unikernel],
      zorder=3,
      linewidth=3,
      markersize=9,
      markeredgewidth=4,
      color=bar_colors[unikernel],
    )

  # set up x-axis labels
  xticks = range(0, len(xlabels))
  ax1.set_xticks(xticks)

  ax1.margins(x=.05)

  # Create a unique legend
  handles, labels = plt.gca().get_legend_handles_labels()
  by_label = dict(zip(labels, handles))
  leg = plt.legend(by_label.values(), by_label.keys(), fontsize=LARGE_SIZE, loc='upper right', ncol=1)
  leg.get_frame().set_linewidth(0.0)

  # Save to file
  fig.tight_layout()
  fig.savefig(output) #, bbox_extra_artists=(ax1,), bbox_inches='tight')

Пример #22

0

Показать файл

# -*- coding: utf-8 -*-

__author__ = 'ipetrash'

from common import get_client, sizeof_fmt

qb = get_client()


def get_torrents(qb, search_name='', **filters) -> list:
    def match(name: str) -> bool:
        return search_name.lower() in name.lower()

    return [
        torrent for torrent in qb.torrents(**filters) if match(torrent['name'])
    ]


torrents = get_torrents(qb, search_name='.mkv')
total_size = 0

for i, torrent in enumerate(torrents, 1):
    torrent_size = torrent['total_size']
    total_size += torrent_size

    print('{:<3} {} ({})'.format(i, torrent['name'], sizeof_fmt(torrent_size)))

print()
print('Total torrents: {}, total size: {} ({} bytes)'.format(
    len(torrents), sizeof_fmt(total_size), total_size))

Пример #23

0

Показать файл

Файл: opencriticgames.py Проект: carlsonp/steam-analysis

def updateOpenCritic(refresh_type="OLDEST", pbar=False):
    logging = common.setupLogging()
    try:
        logging.info("Updating OpenCritic games via " + refresh_type)

        client = MongoClient(host=config.mongodb_ip, port=config.mongodb_port)
        db = client['steam']
        collection_oc = db['opencritic']

        # create an index for id, this vastly improves performance
        collection_oc.create_index("id", unique=True)
        collection_oc.create_index("date")
        collection_oc.create_index("steamId")

        if (refresh_type == "OLDEST"):
            # find a sampling of OpenCritic IDs to work on ordered by date
            # will run on the oldest entries first
            names_cur = collection_oc.aggregate([
                {
                    "$match": {}
                },
                {
                    "$sort": {
                        "date": 1
                    }
                },  # oldest first
                {
                    "$limit": 25
                },
                {
                    "$project": {
                        "id": 1,
                        "_id": 0
                    }
                }
            ])
            # convert cursor to Python list
            to_update = []
            for item in names_cur:
                to_update.append(item['id'])

        if (pbar):
            bar = progressbar.ProgressBar(max_value=len(to_update)).start()

        bytes_downloaded = 0
        for i, oc_id in enumerate(to_update):
            if (pbar):
                bar.update(i + 1)

            try:
                # OpenCritic Game API e.g.
                # https://opencritic.com/api/game/7592
                r = requests.get(
                    requests.Request(
                        'GET', "https://opencritic.com/api/game/" +
                        str(oc_id)).prepare().url)
                if (r.ok):
                    data = r.json()
                    bytes_downloaded = bytes_downloaded + len(r.content)

                    oc = data
                    # add current datetimestamp
                    oc['date'] = datetime.datetime.utcnow()
                    #update_one will keep whatever information already exists
                    collection_oc.update_one({'id': int(oc['id'])},
                                             {'$set': oc},
                                             upsert=True)
                else:
                    logging.error("status code: " + str(r.status_code))
                    logging.error("opencritic game id: " + str(oc_id))

                # sleep for a bit, there's no information on API throttling
                time.sleep(2)  #seconds

                # grab review information which is a separate URL
                # e.g. https://opencritic.com/api/review/game/7592

                r = requests.get(
                    requests.Request(
                        'GET', "https://opencritic.com/api/review/game/" +
                        str(oc_id)).prepare().url)
                if (r.ok):
                    data = r.json()
                    bytes_downloaded = bytes_downloaded + len(r.content)

                    oc['Reviews'] = data
                    #update_one will keep whatever information already exists
                    collection_oc.update_one({'id': int(oc['id'])},
                                             {'$set': oc},
                                             upsert=True)
                else:
                    logging.error("status code: " + str(r.status_code))
                    logging.error("opencritic game id: " + str(oc_id))
            except Exception as e:
                logging.error(str(e) + " - id: " + str(oc_id))

            # sleep for a bit, there's no information on API throttling
            time.sleep(2)  #seconds

        if (pbar):
            bar.finish()
        logging.info("Finished updating OpenCritic games via " + refresh_type)
        logging.info("Downloaded: " + common.sizeof_fmt(bytes_downloaded))
        common.writeBandwidth(db, bytes_downloaded)
    except Exception as e:
        logging.error(str(e))
        time.sleep(1)

Пример #24

0

Показать файл

Файл: plot.py Проект: IMCG/eurosys21-artifacts

def plot(data=None, output=None):
  WORKDIR = os.getcwd()
  RESULTSDIR = data
  RESULTEXT = '.csv'
  IMAGESTAT = 'imagestats'
  IMAGE_SIZE_KEY = 'image_size'
  NUMSYMS_KEY = 'number_symbols'
  GROUP_BAR_WIDTH = .8
  DEFAULT = '_'

  files = []
  labels = []
  apps = []
  imagestats = {}
  imagesize_max = 0 # maximum observed image size
  number_symbols_max = 0 # maximum observed symbol count
  total_apps = 0
  bar_colors = {
    'nginx': '#0C8828',
    'redis': '#CE1216',
    'hello': 'dimgray',
    'sqlite': '#4BA3E1'
  }

  labels = {
    'hermitux': 'Hermitux',
    'linuxuser': '******',
    'lupine': 'Lupine',
    'osv': 'OSv',
    'rump': 'Rumprun',
    'unikraft': 'Unikraft',
    'mirage': 'Mirage'
  }

  # Prepare maxplotlib data by parsing the individual .csv files.  This process
  # goes through all image sizes and number of symbols and populates a dictionary
  # of unikernels and the application "image stats" based on the framework.
  for f in os.listdir(RESULTSDIR):
    if f.endswith(RESULTEXT):
      index = f.replace(RESULTEXT,'')
      files.append(f)

      result = index.split('-')

      unikernel = result[0]
      app = result[1]

      if unikernel not in imagestats:
        imagestats[unikernel] = {}
      
      if app not in imagestats[unikernel]:
        total_apps += 1
        imagestats[unikernel][app] = 0

      if app not in apps:
        apps.append(app)

      with open(os.path.join(RESULTSDIR, f), 'r') as csvfile:
        size= int(csvfile.readline())
        imagestats[unikernel][app] = size

  # General style
  common_style(plt)

  imagesize_max += KBYTES * KBYTES * 12 # add MB "margin"
  number_symbols_max += 2000

  # Setup matplotlib axis
  fig = plt.figure(figsize=(8, 5))
  renderer = fig.canvas.get_renderer()

  # image size axis
  ax1 = fig.add_subplot(1,1,1)
  ax1.set_ylabel("Image size")
  ax1.grid(which='major', axis='y', linestyle=':', alpha=0.5, zorder=0)
  ax1_yticks = np.arange(0, imagesize_max, step=KBYTES*KBYTES*2)
  ax1.set_yticks(ax1_yticks, minor=False)
  ax1.set_yticklabels([sizeof_fmt(ytick) for ytick in ax1_yticks])
  ax1.set_ylim(0, imagesize_max)

  # Plot coordinates
  scale = 1. / len(labels.keys())
  xlabels = []

  # Adjust margining
  fig.subplots_adjust(bottom=.15) #, top=1)

  i = 0
  line_offset = 0
  for unikernel in [
      'unikraft',
      'hermitux',
      'linuxuser',
      'lupine',
      'mirage',
      'osv',
      'rump'
    ]:
    xlabels.append(labels[unikernel])
    apps = imagestats[unikernel]

    # Plot a line beteween unikernel applications
    if i > 0:
      line = plt.Line2D([i * scale, i * scale], [-.02, 1],
          transform=ax1.transAxes, color='black',
          linewidth=1)
      line.set_clip_on(False)
      ax1.add_line(line)

    j = 0
    bar_width = GROUP_BAR_WIDTH / len(apps.keys())
    bar_offset = (bar_width / 2) - (GROUP_BAR_WIDTH / 2)

    # Plot each application
    for app_label in sorted(apps):
      app = imagestats[unikernel][app_label]

      print(unikernel, app_label, app)

      bar = ax1.bar([i + 1 + bar_offset], app,
        label=app_label,
        align='center',
        zorder=3,
        width=bar_width,
        color=bar_colors[app_label],
        linewidth=.5
      )
      
      ax1.text(i + 1 + bar_offset, app + 500000, sizeof_fmt(app),
        ha='center',
        va='bottom',
        fontsize=LARGE_SIZE,
        linespacing=0,
        zorder=2,
        bbox=dict(pad=0, facecolor='white', linewidth=0),
        rotation='vertical'
      )

      bar_offset += bar_width
      j += 1

    i += 1

  # sys.exit(1)

  # set up x-axis labels
  xticks = range(1, len(xlabels) + 1)
  ax1.set_xticks(xticks)
  ax1.set_xticklabels(xlabels, fontsize=LARGE_SIZE, rotation=40, ha='right', rotation_mode='anchor')
  # ax1.set_xticklabels(xlabels, fontsize=LARGE_SIZE, fontweight='bold')
  ax1.set_xlim(.5, len(xlabels) + .5)
  ax1.yaxis.grid(True, zorder=0, linestyle=':')
  ax1.tick_params(axis='both', which='both', length=0)

  # Create a unique legend
  handles, labels = plt.gca().get_legend_handles_labels()
  by_label = dict(zip(labels, handles))
  leg = plt.legend(by_label.values(), by_label.keys(),
    loc='upper left',
    ncol=2,
    fontsize=LARGE_SIZE,
  )
  leg.get_frame().set_linewidth(0.0)

  plt.setp(ax1.lines, linewidth=.5)

  # Save to file
  fig.tight_layout()
  fig.savefig(output) #, bbox_extra_artists=(ax1,), bbox_inches='tight')

Пример #25

0

Показать файл

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

__author__ = 'ipetrash'

from common import sizeof_fmt, get_client

qb = get_client()

torrents = qb.torrents()

torrents_max_top5 = sorted(torrents,
                           key=lambda x: x['total_size'],
                           reverse=True)[:5]
torrents_min_top5 = sorted(torrents, key=lambda x: x['total_size'])[:5]

print('Max top5:')
for i, torrent in enumerate(torrents_max_top5, 1):
    print('    {}. {} ({})'.format(i, torrent['name'],
                                   sizeof_fmt(torrent['total_size'])))

print()

print('Min top5:')
for i, torrent in enumerate(torrents_min_top5, 1):
    print('    {}. {} ({})'.format(i, torrent['name'],
                                   sizeof_fmt(torrent['total_size'])))

Пример #26

0

Показать файл

def do(file_name, output_dir='output', debug=True):
    dir_fb2 = os.path.basename(file_name)
    dir_im = os.path.join(output_dir, dir_fb2)
    if not os.path.exists(dir_im):
        os.makedirs(dir_im)
    debug and print(dir_im + ':')

    # Analog: fb2_pictures__using_xml_expat.py

    PARSE_DATA = {
        'last_start_tag': None,
        'last_tag_attrs': None,
        'last_tag_data': '',
        'total_image_size': 0,
        'number': 1,
    }

    class BinaryHandler(xml.sax.ContentHandler):
        def startElement(self, name, attrs):
            PARSE_DATA['last_start_tag'] = name
            PARSE_DATA['last_tag_attrs'] = attrs
            PARSE_DATA['last_tag_data'] = ''

        def characters(self, content):
            if PARSE_DATA['last_start_tag'] != 'binary':
                return

            PARSE_DATA['last_tag_data'] += content

        def endElement(self, name):
            if name != 'binary':
                return

            data = PARSE_DATA['last_tag_data']

            try:
                im_id = PARSE_DATA['last_tag_attrs']['id']
                content_type = PARSE_DATA['last_tag_attrs']['content-type']

                im_file_name = get_file_name_from_binary(im_id, content_type)
                im_file_name = os.path.join(dir_im, im_file_name)

                im_data = base64.b64decode(data.encode())

                count_bytes = len(im_data)
                PARSE_DATA['total_image_size'] += count_bytes

                with open(im_file_name, mode='wb') as f:
                    f.write(im_data)

                im = Image.open(io.BytesIO(im_data))
                debug and print('    {}. {} {} format={} size={}'.format(
                    PARSE_DATA['number'], im_id, sizeof_fmt(count_bytes),
                    im.format, im.size))

                PARSE_DATA['number'] += 1

            except:
                import traceback
                traceback.print_exc()

    parser = xml.sax.make_parser()
    parser.setContentHandler(BinaryHandler())
    parser.parse(file_name)

    file_size = os.path.getsize(file_name)
    debug and print()
    debug and print('fb2 file size =', sizeof_fmt(file_size))
    debug and print('total image size = {} ({:.2f}%)'.format(
        sizeof_fmt(PARSE_DATA['total_image_size']),
        PARSE_DATA['total_image_size'] / file_size * 100))

Пример #27

0

Показать файл

Файл: updatepricehistory.py Проект: carlsonp/steam-analysis

def updatePriceHistory(refresh_type="FULL", pbar=False):
    logging = common.setupLogging()
    try:
        logging.info("Updating Price History via " + refresh_type)

        client = MongoClient(host=config.mongodb_ip, port=config.mongodb_port)
        db = client['steam']
        collection_hist = db['pricehistory']
        collection_apps = db['apps']

        # create an index for appid, this vastly improves performance
        collection_hist.create_index("appid")
        collection_hist.create_index("date")

        # e.g.: CS Source
        # https://store.steampowered.com/api/appdetails?appids=240&cc=us&l=en

        # https://wiki.teamfortress.com/wiki/User:RJackson/StorefrontAPI#Known_methods
        # https://stackoverflow.com/questions/13784059/how-to-get-the-price-of-an-app-in-steam-webapi

        # find prices for all games and dlc
        to_update = collection_apps.distinct(
            "appid", {
                "updated_date": {
                    "$exists": True
                },
                "type": {
                    "$in": ["game", "dlc"]
                },
                "is_free": False,
                "price_overview": {
                    "$exists": True
                },
                "failureCount": {
                    "$exists": False
                }
            })

        if (refresh_type == "PARTIAL"):
            # sort by newest to oldest updated in pricehistory
            appid_dict = collection_hist.aggregate([
                {
                    "$group": {
                        "_id": "$appid",
                        "maxDate": {
                            "$max": "$date"
                        }
                    }
                },
                {
                    "$sort": {
                        "maxDate": -1
                    }
                }  # newest first
            ])
            for item in appid_dict:
                if len(to_update) == 1200:
                    break
                else:
                    if item['_id'] in to_update:
                        # remove this fairly "new" appid from our list items to run on and refresh
                        to_update.remove(item['_id'])

        if (pbar):
            bar = progressbar.ProgressBar(max_value=len(to_update)).start()

        if (refresh_type == "FULL"):
            # shuffle the appids so we hit new ones each time
            random.shuffle(to_update)  #in-place

        bytes_downloaded = 0
        appids = []
        for i, appid in enumerate(to_update):
            appids.append(appid)
            if (pbar):
                bar.update(i + 1)
            # run 20 or so at a time
            if ((i + 1) % 20 == 0 or (i + 1) == len(to_update)):
                try:
                    # create a comma-delimited string of appids
                    appids_str = ','.join(map(str, appids))
                    # https://github.com/BrakeValve/dataflow/issues/5
                    # e.g.
                    # https://store.steampowered.com/api/appdetails?appids=662400,833310,317832,39150,830810,224540,931720,261900,431290,914410,812110,216464,826503,509681,71115,24679,231474,202452,863900,457100&cc=us&l=en&filters=price_overview
                    r = requests.get(
                        "https://store.steampowered.com/api/appdetails?appids="
                        + appids_str + "&cc=us&l=en&filters=price_overview")
                    if (r.ok):
                        data = r.json()
                        bytes_downloaded = bytes_downloaded + len(r.content)

                        for k, value in data.items():
                            if (value["success"] is True):
                                if (value['data']):
                                    price_hist = value['data'][
                                        'price_overview']
                                    # set the appid based on the key
                                    price_hist['appid'] = int(k)
                                    # add current datetimestamp
                                    price_hist[
                                        'date'] = datetime.datetime.utcnow()
                                    # remove formatted values, not needed
                                    # if they ever get added to the database, this will remove them
                                    # db.getCollection('pricehistory').update({},{"$unset": {"initial_formatted":1, "final_formatted":1, "currency":1}}, {multi: true})
                                    # and to validate that it worked, this should return nothing:
                                    # db.getCollection('pricehistory').find({"$or": [{"initial_formatted":{"$exists":true}}, {"final_formatted":{"$exists":true}}, {"currency":{"$exists":true}} ]})
                                    price_hist.pop('initial_formatted', None)
                                    price_hist.pop('final_formatted', None)
                                    price_hist.pop('currency', None)
                                    collection_hist.insert_one(price_hist)
                                else:
                                    # No price_overview information returned, remove it from the entry
                                    # to prevent future unnecessary calls.  This is also an indicator
                                    # of stale app information.
                                    collection_apps.update_one(
                                        {'appid': int(k)},
                                        {"$unset": {
                                            "price_overview": ""
                                        }})
                                    logging.info(
                                        "No price information returned for appid: "
                                        + str(k) +
                                        " - clearing app price info.")
                    else:
                        logging.error("status code: " + str(r.status_code))
                        logging.error("price history appids: " + appids_str)
                except Exception as e:
                    logging.error(
                        str(e) + " - appids: " + str(appids_str) +
                        " - data: " + str(value))

                appids = []

                # sleep for a bit, the API is throttled
                # limited to 200 requests every 5 minutes or so...
                # 10 requests every 10 seconds
                # 100,000 requests per day
                time.sleep(1.75)  #seconds

        if (pbar):
            bar.finish()
        logging.info("Finished updating price history via " + refresh_type)
        logging.info("Downloaded: " + common.sizeof_fmt(bytes_downloaded))
        common.writeBandwidth(db, bytes_downloaded)
    except Exception as e:
        logging.error(str(e))
        time.sleep(1)

Пример #28

0

Показать файл

Файл: twitchtopgames.py Проект: carlsonp/steam-analysis

def updateTwitchTopGames(refresh_type="TOP", pbar=False):
	logging = common.setupLogging()
	try:
		logging.info("Updating Twitch top games via " + refresh_type)

		client = MongoClient(host=config.mongodb_ip, port=config.mongodb_port)
		db = client['steam']
		collection_twitchhistorical = db['twitchhistorical']
		collection_apps = db['apps']

		# create an index for id, this vastly improves performance
		collection_twitchhistorical.create_index("id")
		collection_twitchhistorical.create_index("date")
		collection_twitchhistorical.create_index("steamId")

		# API page w/examples
        # https://dev.twitch.tv/docs/api/

		# grab the top X number of games on Twitch
		top_x = 100
		# number of results to return in each top games request
		first_x = 50
		# number of streams to return for each game, max 100
		num_streams = 100

		access_token = getTwitchToken(logging)

		if (pbar):
			bar = progressbar.ProgressBar(max_value=int(top_x * num_streams)).start()

		bytes_downloaded = 0
		game_rank = 1 # for game rank/order returned via Twitch
		i = 1 # for progress bar
		while (i < top_x * num_streams):
			try:
				# Twitch Top Games
				# https://dev.twitch.tv/docs/api/reference/#get-top-games
				params = {'first':first_x}
				if i != 1:
					params = {'first':first_x, 'after':pagination}
				r = requests.get("https://api.twitch.tv/helix/games/top", headers={'Client-ID':config.twitch_client_id, 'Authorization':"Bearer "+access_token}, params=params)
				if (r.ok):
					if (int(r.headers['Ratelimit-Remaining']) < 4):
						logging.info("rate limit: " + r.headers['Ratelimit-Limit'])
						logging.info("rate limit remaining: " + r.headers['Ratelimit-Remaining'])
					data = r.json()
					bytes_downloaded = bytes_downloaded + len(r.content)
					if (data['pagination']['cursor']):
						pagination = data['pagination']['cursor']
					else:
						logging.error("Unable to find pagination cursor")
						break # out of while loop

					for value in data['data']:
						# add to our historical listing
						# https://dev.twitch.tv/docs/api/reference/#get-streams
						r_g = requests.get("https://api.twitch.tv/helix/streams", headers={'Client-ID': config.twitch_client_id, 'Authorization':"Bearer "+access_token}, params={'first':num_streams, 'game_id':int(value['id'])})
						if (r_g.ok):
							if (int(r_g.headers['Ratelimit-Remaining']) < 4):
								logging.info("rate limit: " + r_g.headers['Ratelimit-Limit'])
								logging.info("rate limit remaining: " + r_g.headers['Ratelimit-Remaining'])
							data_g = r_g.json()
							for v in data_g['data']:
								v['date'] = datetime.datetime.utcnow()
								v.pop('thumbnail_url', None)
								v['name'] = value['name'] # pull the game name from our top games listing
								v['gamerank'] = game_rank
								appid = getSteamId(value['name'], collection_apps)
								if (appid):
									v['steamId'] = appid
								collection_twitchhistorical.insert_one(v)
								if (pbar):
									bar.update(i)
								i = i + 1
						else:
							logging.error("status code: " + str(r.status_code))
							# check OAuth and tokens
							if (r_g.status_code == 401):
								sys.exit(1)

						game_rank = game_rank + 1
						# https://dev.twitch.tv/docs/api/guide/#rate-limits
						time.sleep(2) #seconds
				else:
					logging.error("status code: " + str(r.status_code))
					# check OAuth and tokens
					if (r.status_code == 401):
						sys.exit(1)

				# sleep for a bit
				# https://dev.twitch.tv/docs/api/guide/#rate-limits
				time.sleep(2) #seconds
				# in some cases, there aren't the max number of streams for a game, thus we can jump ahead
				i = int(game_rank * num_streams)
			except Exception as e:
				logging.error(str(e))
				time.sleep(1)

		if (pbar):
			bar.finish()
		
		logging.info("Finished updating Twitch top games via " + refresh_type)
		logging.info("Downloaded: " + common.sizeof_fmt(bytes_downloaded))
		common.writeBandwidth(db, bytes_downloaded)
	except Exception as e:
		logging.error(str(e))
		time.sleep(1)

Пример #29

0

Показать файл

Файл: ftp_server.py Проект: JohnDoneth/cis457-project2

    async def handle_request(self, reader: StreamReader, writer: StreamWriter):
        while True:
            request = await common.recv_json(reader)

            print(request)

            if request is None:
                break

            if not request["method"]:
                print("Invalid Request: missing method field.")

            if request["method"].upper().startswith("LIST"):
                files = [f for f in os.listdir(".") if os.path.isfile(f)]
                file_sizes = [
                    common.sizeof_fmt(os.path.getsize(f))
                    for f in os.listdir(".") if os.path.isfile(f)
                ]

                files = list(zip(files, file_sizes))

                files = filter(filter_files, files)

                files = list(files)

                await common.send_json(writer, {
                    "files": files,
                })

            elif request["method"].upper().startswith("RETRIEVE"):
                filename = request["filename"]

                if not os.path.exists(filename):
                    await common.send_json(writer,
                                           {"error": "file does not exist"})
                    return

                with open(filename, "rb") as infile:
                    contents = infile.read()

                    # base64 encode the binary file
                    contents = base64.b64encode(contents).decode("utf-8")

                    await common.send_json(writer, {
                        "filename": filename,
                        "content": contents
                    })

            elif request["method"].upper().startswith("STORE"):
                filename = request["filename"]

                with open(filename, "wb") as outfile:
                    # base64 decode from the request body
                    contents = base64.b64decode(request["content"])
                    outfile.write(contents)

                # threaded_print("-> Store Complete")

            elif request["method"].upper().startswith("QUIT"):
                # threaded_print("-> Client disconnected via QUIT")
                pass

            elif request["method"].upper().startswith("DELETE"):

                filename = request["filename"]

                if not os.path.exists(filename):
                    await common.send_json(writer,
                                           {"error": "file does not exist"})
                else:
                    os.remove(filename)
                    await common.send_json(writer, {"success": "file removed"})

            else:
                await common.send_json(writer,
                                       {"error": "Unsupported command"})

        writer.close()
        await writer.wait_closed()

Пример #30

0

Показать файл

Файл: steamreviews.py Проект: carlsonp/steam-analysis

def steamReviews(pbar=False):
    logging = common.setupLogging()
    try:
        logging.info("Running Steam Reviews")

        client = MongoClient(host=config.mongodb_ip, port=config.mongodb_port)

        db = client['steam']
        collection = db['apps']

        to_update = collection.aggregate([
            {
                "$match": {
                    "type": {
                        "$in": ["game", "dlc"]
                    }
                }
            },
            {
                "$sort": {
                    "reviews.last_updated": 1
                }
            },  # oldest first
            {
                "$limit": 50
            },
            {
                "$project": {
                    "appid": 1,
                    "_id": 0
                }
            }
        ])

        to_update = ([item['appid'] for item in to_update])

        if (pbar):
            bar = progressbar.ProgressBar(max_value=len(to_update)).start()

        bytes_downloaded = 0
        for i, appid in enumerate(to_update):
            if (pbar):
                bar.update(i + 1)

            #logging.info("Running on appid: " + str(appid))
            r = requests.get(
                "https://store.steampowered.com/appreviewhistogram/" +
                str(appid) + "?l=english&review_score_preference=0")
            if (r.ok):
                bytes_downloaded = bytes_downloaded + len(r.content)

                data = r.json()['results']

                # add current datetimestamp
                data['last_updated'] = datetime.datetime.utcnow()

                # convert Epoch seconds to UTC time
                # https://stackoverflow.com/questions/1697815/how-do-you-convert-a-python-time-struct-time-object-into-a-datetime-object
                if ('start_date' in data and data['start_date']):
                    data['start_date'] = datetime.datetime.fromtimestamp(
                        time.mktime(
                            time.gmtime(round(float(data['start_date'])))))
                if ('end_date' in data and data['end_date']):
                    data['end_date'] = datetime.datetime.fromtimestamp(
                        time.mktime(time.gmtime(round(float(
                            data['end_date'])))))

                if ('recent_events' in data):
                    for k, event in enumerate(data['recent_events']):
                        if (event['start_date']):
                            data['recent_events'][k][
                                'start_date'] = datetime.datetime.fromtimestamp(
                                    time.mktime(
                                        time.gmtime(
                                            round(float(
                                                event['start_date'])))))
                            data['recent_events'][k][
                                'end_date'] = datetime.datetime.fromtimestamp(
                                    time.mktime(
                                        time.gmtime(
                                            round(float(event['end_date'])))))

                if ('rollups' in data):
                    for k, event in enumerate(data['rollups']):
                        if (event['date']):
                            data['rollups'][k][
                                'date'] = datetime.datetime.fromtimestamp(
                                    time.mktime(
                                        time.gmtime(round(float(
                                            event['date'])))))

                if ('recent' in data):
                    for k, event in enumerate(data['recent']):
                        if (event['date']):
                            data['recent'][k][
                                'date'] = datetime.datetime.fromtimestamp(
                                    time.mktime(
                                        time.gmtime(round(float(
                                            event['date'])))))

                #update_one will keep whatever information already exists
                collection.update_one({'appid': int(appid)},
                                      {'$set': {
                                          'reviews': data
                                      }},
                                      upsert=True)
            else:
                logging.error("status code: " + str(r.status_code))

            if (pbar):
                bar.update(i + 1)

            time.sleep(1)

        if (pbar):
            bar.finish()

        logging.info("Finished downloading Steam reviews.")
        logging.info("Downloaded: " + common.sizeof_fmt(bytes_downloaded))
        common.writeBandwidth(db, bytes_downloaded)

    except Exception as e:
        logging.error(str(e))
        time.sleep(3)

Python sizeof_fmt примеры использования