def assert_extra_slave_data(mesos_state, humanize_output=False): if not slaves_registered(mesos_state): return (' No mesos slaves registered on this cluster!', False) extra_slave_data = get_extra_mesos_slave_data(mesos_state) rows = [('Hostname', 'CPU (free/total)', 'RAM (free/total)', 'Disk (free/total)')] for slave in extra_slave_data: if humanize_output: formatted_line = ( slave['hostname'], '%.2f/%.2f' % (slave['free_resources']['cpus'], slave['total_resources']['cpus']), '%s/%s' % (naturalsize(slave['free_resources']['mem'] * 1024 * 1024, gnu=True), naturalsize(slave['total_resources']['mem'] * 1024 * 1024, gnu=True)), '%s/%s' % (naturalsize(slave['free_resources']['disk'] * 1024 * 1024, gnu=True), naturalsize(slave['total_resources']['disk'] * 1024 * 1024, gnu=True)), ) else: formatted_line = ( slave['hostname'], '%.2f/%.2f' % (slave['free_resources']['cpus'], slave['total_resources']['cpus']), '%.2f/%.2f' % (slave['free_resources']['mem'], slave['total_resources']['mem']), '%.2f/%.2f' % (slave['free_resources']['disk'], slave['total_resources']['disk']), ) rows.append(formatted_line) result = ('\n'.join((' %s' % row for row in format_table(rows)))[2:], True) return result
def download(self, destination, progressbar): r = requests.get(self.url, headers={'referer': self.referer}, stream=True) if not r.ok: progressbar.error("Download failed with code %d" % r.status_code) return size = int(r.headers.get("content-length", 0)) if not size: progressbar.no_progress("Downloading (Unknown Size)") else: progressbar.state = ("Starting downloading (%s)" % humanize.naturalsize(size)) progress = 0 with open(os.path.join(destination, "%s.mp3" % self.track), "w") as f: for chunk in r.iter_content(self.CHUNK_SIZE): f.write(chunk) progress += len(chunk) if size: progressbar.progress = progress / float(size) progressbar.state = ( "Downloading (%s / %s)" % (humanize.naturalsize(progress), humanize.naturalsize(size)) ) progressbar.done("Download complete (%s)" % humanize.naturalsize(progress))
def format_data(self, psutil_data): data = [] # # Load average # load = ' '.join(str(l) for l in psutil_data['Load average']) data.append(('Load average', load)) # # Network # bytes_recv = psutil_data['Network']['eth0']['bytes_recv'] bytes_sent = psutil_data['Network']['eth0']['bytes_sent'] bytes_recv = humanize.naturalsize(bytes_recv, gnu=True) bytes_sent = humanize.naturalsize(bytes_sent, gnu=True) data.append(('Network', (('Bytes sent', bytes_sent), ('Bytes received', bytes_recv)))) # # Swap memory # swap_perc = psutil_data['Swap memory']['percent'] sin = psutil_data['Swap memory']['sin'] sout = psutil_data['Swap memory']['sout'] swap = (('% used (best: 0.0)', swap_perc), ('Pages per second (out) (best: 0)', sin), ('Pages per second (in) (best: 0)', sout),) data.append(('Swap memory', swap)) return data
def cb(tx_bytes, total_bytes): total_time = datetime.now() - start_time total_time = total_time.total_seconds() total_time_s = floor(total_time) if (total_time_s % LOG_INTERVAL) != 0: return nsize_tx = naturalsize(tx_bytes, binary=True, format='%.2f') nsize_total = naturalsize(total_bytes, binary=True, format='%.2f') speed_in_s = tx_bytes / total_time speed_in_s = naturalsize(speed_in_s, binary=True, format='%.2f') _log.info('Downloaded {} / {} in {} ({}/s)'.format( nsize_tx, nsize_total, naturaldelta(datetime.now() - start_time), speed_in_s, total_time_s))
def parse_json(self, json_data): log.info('parsing response') try: bean = json_data['beans'][0] space_used_pc = bean['PercentUsed'] # the way below is more informative #assert type(space_used_pc) == float if re.search(r'e-\d+$', str(space_used_pc)): space_used_pc = 0 if not isFloat(space_used_pc): raise UnknownError("non-float returned for PercentUsed by namenode '{0}:{1}'"\ .format(self.host, self.port)) assert space_used_pc >= 0 stats = {} for stat in ('Total', 'TotalBlocks', 'TotalFiles', 'Used'): stats[stat] = bean[stat] if not isInt(stats[stat]): raise UnknownError("non-integer returned for {0} by namenode '{1}:{2}'"\ .format(stat, self.host, self.port)) stats[stat] = int(stats[stat]) self.ok() self.msg = 'HDFS space used = {0:.2f}% ({1}/{2})'\ .format(space_used_pc, humanize.naturalsize(stats['Used']), humanize.naturalsize(stats['Total'])) self.check_thresholds(space_used_pc) self.msg += ", in {0:d} files spread across {1:d} blocks".format(stats['TotalFiles'], stats['TotalBlocks']) self.msg += " | 'HDFS % space used'={0:f}%{1}".format(space_used_pc, self.get_perf_thresholds()) self.msg += " 'HDFS space used'={0:d}b".format(stats['Used']) self.msg += " 'HDFS file count'={0:d}".format(stats['TotalFiles']) self.msg += " 'HDFS block count'={0:d}".format(stats['TotalBlocks']) except KeyError as _: raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\ .format(self.host, self.port, _, support_msg_api())) except ValueError as _: raise UnknownError("invalid json returned for LiveNodes by Namenode '{0}:{1}': {2}"\ .format(self.host, self.port, _))
def progressHook(num_blocks, block_size, total_size): percent = (num_blocks * block_size / (1.0 * total_size)) * 100.0 sizeH = humanize.naturalsize(num_blocks * block_size, gnu=True) total_sizeH = humanize.naturalsize(total_size, gnu=True) sys.stdout.write('\r%s / %s \t\t%.2f %%' % (sizeH, total_sizeH, percent)) sys.stdout.flush()
def capacity(self): """ Datastore Capacity Information """ self.dialog.infobox( title=self.title, text='Retrieving information ...' ) uncommitted = self.obj.summary.uncommitted if self.obj.summary.uncommitted else 0 elements = [ pvc.widget.form.FormElement( label='Capacity', item=humanize.naturalsize(self.obj.summary.capacity, binary=True) ), pvc.widget.form.FormElement( label='Free Space', item=humanize.naturalsize(self.obj.summary.freeSpace, binary=True) ), pvc.widget.form.FormElement( label='Uncommitted Space', item=humanize.naturalsize(uncommitted, binary=True) ), ] form = pvc.widget.form.Form( dialog=self.dialog, form_elements=elements, title=self.title, text='Datastore capacity information' ) return form.display()
def calculate_summary(self): class ImageStat: def __init__(self): self.total_size = 0 self.linked_size = 0 def add_blob(self, size, linked): self.total_size += size if linked: self.linked_size += size counter_total = collections.defaultdict(lambda: ImageStat()) linked_blobs = set(self.linked_blobs.keys()) for (blob, name) in self.all_blobs.items(): size = self._request_blob_size(name, blob) if size is not None and size > 0: if blob not in linked_blobs: self.detached_blobs[blob] = name counter_total[name].add_blob(size, blob in linked_blobs) for (key, stat) in sorted(counter_total.items(), key=lambda (key, stat): stat.total_size, reverse=True): logging.info("[%s] : %s, linked: %s, detached: %s, size share: %0.2f%%" % (key, humanize.naturalsize(stat.total_size, gnu=True), humanize.naturalsize(stat.linked_size, gnu=True), humanize.naturalsize(stat.total_size - stat.linked_size, gnu=True), 100.0 * stat.linked_size / stat.total_size)) logging.info("Total size: %s" % humanize.naturalsize(sum(map(lambda (key, stat): stat.total_size, counter_total.items())), gnu=True))
def run(self, args, config, storage, remotes): table_lines = [('<b>NAME</b>', '<b>TYPE</b>', '<b>LAST</b>', '<b>NEXT</b>', '<b>LAST SIZE</b>')] for remote in sorted(remotes.list(), key=lambda x: x.name): latest_ref = '%s/latest' % remote.name latest_backup = storage.get_backup(latest_ref) latest_date_text = '-' next_date_text = '-' size = '-' if latest_backup is None: if remote.scheduler is not None: next_date_text = '<color fg=yellow>now</color>' else: size_total = sum(latest_backup.stats.get(x, 0) for x in STATS_TOTAL) size_new = sum(latest_backup.stats.get(x, 0) for x in STATS_NEW) size = '%s (+%s)' % (humanize.naturalsize(size_total, binary=True), humanize.naturalsize(size_new, binary=True)) latest_date_text = latest_backup.start_date.humanize() if remote.scheduler is not None and remote.scheduler['enabled']: next_date = latest_backup.start_date + datetime.timedelta(seconds=remote.scheduler['interval'] * 60) if next_date > arrow.now(): next_date_text = '<color fg=green>%s</color>' % next_date.humanize() else: next_date_text = '<color fg=red>%s</color>' % next_date.humanize() table_lines.append((remote.name, remote.type, latest_date_text, next_date_text, size)) printer.table(table_lines)
def train_to_disk(markovify_class, f, data, state_size): """Train a Markov model, convert it to a disk-readable format, and save it. This function uses markovify machinery (or markovify-compatible machinery) to train the model in the first place. This has the advantage that lots of finicky natural-language code is already written for it, e.g. part-of-speech tagging with nltk and retries to avoid regenerating the source material. Args: - markovify_class: class to use, e.g. markovify.text.Text - f: file handle of the output file to which the model will be written - data: input data, with which the provided class will be trained, and which will also be saved in the model file - state_size: how many tokens to keep in order to predict the next token. """ logging.info("training %d-gram model from %s of data..", state_size, humanize.naturalsize(len(data))) text = markovify_class(data, state_size) logging.info("model trained in memory, building to disk..") logging.info("consuming %s RAM", humanize.naturalsize(get_ram_usage())) build_disk_model(text.chain, f, data=data, model_class=markovify_class) f.flush() size = os.fstat(f.fileno()).st_size logging.info("model built and written to disk (%s)", humanize.naturalsize(size))
def load_from_disk(markovify_class, f, cache_levels, cache_ratio): """Load a Markov model from a saved file on disk and wrap it in a class. Args: - markovify_class: markovify-compatible class (or callable), e.g. markovify.text.Text - f: file handle of model file, which needs to be kept open while using the markov model - cache_levels: maximum number of levels of the tree to cache - cache_ratio: ratio of nodes that are cached at each level (e.g. the 20% most frequent nodes at each eligible level). """ f.seek(0) logging.info("reading training data..") data = read_model_training_data(f) logging.info("read %s of training data", humanize.naturalsize(len(data))) size = os.fstat(f.fileno()).st_size f.seek(0) logging.info("reading model of %s..", humanize.naturalsize(size)) model = read_model(f, size) logging.info("%d-gram model read, total RAM consumption %s", model.header.state_size, humanize.naturalsize(get_ram_usage())) if cache_levels > 0 and cache_ratio > 0: logging.info("caching up to %d levels of the %0.2lf%% most frequent nodes", cache_levels, cache_ratio * 100) model.enable_caching(cache_levels, cache_ratio) logging.info("caching enabled, total RAM consumption %s", humanize.naturalsize(get_ram_usage())) chain_like = MarkovifyInterface(model) return markovify_class( data, state_size=model.header.state_size, chain=chain_like, )
def format_table_column_for_healthcheck_resource_utilization_pair(healthcheck_utilization_pair, humanize): """Given a tuple of (HealthCheckResult, ResourceUtilization), return a string representation of the ResourceUtilization such that it is formatted according to the value of HealthCheckResult.healthy. Further, humanize the string according to the humanize boolean parameter and the metric - be sure to *not* try and humanize if the ResourceUtilization metric is cpus (because we don't want to try and show that as some other unit). :param healthcheck_utilization_pair: a tuple of (HealthCheckResult, ResourceUtilization) :param humanize: a boolean indicating if the string should be humanized :returns: a string representing the ResourceUtilization. """ color_func = PaastaColors.green if healthcheck_utilization_pair[0].healthy else PaastaColors.red utilization = float(healthcheck_utilization_pair[1].total - healthcheck_utilization_pair[1].free) if int(healthcheck_utilization_pair[1].total) == 0: utilization_perc = 100 else: utilization_perc = utilization / float(healthcheck_utilization_pair[1].total) * 100 if humanize and healthcheck_utilization_pair[1].metric != 'cpus': return color_func('%s/%s (%.2f%%)' % ( naturalsize(healthcheck_utilization_pair[1].free * 1024 * 1024, gnu=True), naturalsize(healthcheck_utilization_pair[1].total * 1024 * 1024, gnu=True), utilization_perc, )) else: return color_func('%s/%s (%.2f%%)' % ( healthcheck_utilization_pair[1].free, healthcheck_utilization_pair[1].total, utilization_perc, ))
def print_server_session(session): total_time = str(datetime.now() - session['connected_since'])[:-7] bytes_recv = session['bytes_recv'] bytes_sent = session['bytes_sent'] print('<td>{0!s}</td>'.format(session['username'])) print('<td>{0!s}</td>'.format(session['local_ip'])) print('<td>{0!s}</td>'.format(session['remote_ip'])) print('<td>{0!s}</td>'.format(session['port'])) if 'city' in session and 'country_name' in session: country = session['country_name'] city = session['city'] if city: full_location = '{0!s}, {1!s}'.format(city, country) else: full_location = country flag = 'flags/{0!s}.png'.format(session['location'].lower()) print('<td><img src="{0!s}" title="{1!s}" alt="{1!s}" /> '.format(flag, full_location)) print('{0!s}</td>'.format(full_location)) else: print('<td>{0!s}</td>'.format(session['location'])) print('<td>{0!s} ({1!s})</td>'.format(bytes_recv, naturalsize(bytes_recv, binary=True))) print('<td>{0!s} ({1!s})</td>'.format(bytes_sent, naturalsize(bytes_sent, binary=True))) print('<td>{0!s}</td>'.format( session['connected_since'].strftime('%d/%m/%Y %H:%M:%S'))) if 'last_seen' in session: print('<td>{0!s}</td>'.format( session['last_seen'].strftime('%d/%m/%Y %H:%M:%S'))) else: print('<td>ERROR</td>') print('<td>{0!s}</td>'.format(total_time))
def printlayerinfo(args, layers, outfile=stdout): if args.quiet: for l in layers: print(l[':short_id'], file=outfile) return total_size = 0 fields_fmt = '\t'.join([ '{:<23}' ] + [ '{:<15}' ] * 5) print(fields_fmt.format('REPO TAG', 'IMAGE ID', 'PARENT ID', 'CREATED', 'LAYER SIZE', 'VIRTUAL SIZE'), file=outfile) total_size = sum(( l['Size'] for l in layers )) for layer in layers: try: image_tag = layer[':repo_tags'][0] except IndexError: image_tag = '-' image_id = layer[':short_id'] parent_id = layer[':parent_id'][:12].lower() if not parent_id: parent_id = '-' created = naturaltime(layer[':created_dt']) layer_size = naturalsize(layer['Size']) virt_size = naturalsize(total_size) print(fields_fmt.format(image_tag, image_id, parent_id, created, layer_size, virt_size), file=outfile) total_size -= layer['Size']
def upload_file_to_s3_in_parts(self, source_filename, destination_path): source_file = open(source_filename, 'rb') destination_file = self.s3_location(destination_path) source = os.path.abspath(source_file.name) source_size = os.stat(source).st_size log.info("Uploading from %s (%s) to %s in parts", source, humanize.naturalsize(source_size), destination_file.full) bucket = self.get_bucket(destination_file.bucket) try: with self.a_multipart_upload(bucket, destination_file.key) as mp: for chunk, offset, length in self.determine_chunks(source_size, min_chunk=5242881): with FileChunkIO(source, 'r', offset=offset, bytes=length) as fp: log.info("Uploading chunk %s (%s)", chunk+1, humanize.naturalsize(length)) mp.upload_part_from_file(fp, part_num=chunk+1) except boto.exception.S3ResponseError as error: if error.status is 403: log.error("Seems you are unable to edit this location :(") sys.exit(1) else: raise log.info("Finished uploading")
def handle_media(content): with tempfile.NamedTemporaryFile() as f: f.write(content) media = MediaInfo.parse(f.name) duration = timedelta(seconds=media.tracks[0].duration // 1000) num_tracks = len(media.tracks) - 1 first_video_track = next((track for track in media.tracks if track.track_type == 'Video'), None) first_audio_track = next((track for track in media.tracks if track.track_type == 'Audio'), None) info = "\x02Media Info:\x02 {n} track{s}, {duration}, {size}".format( size=humanize.naturalsize(media.tracks[0].file_size), n=num_tracks, s='s' if num_tracks != 1 else '', duration=duration ) if first_video_track: info += "; {w} x {h} {codec}, {bitrate}bps, {framerate}fps".format( codec=first_video_track.format, bitrate=humanize.naturalsize(first_video_track.bit_rate, gnu=True).lower(), framerate=first_video_track.frame_rate, w=first_video_track.width, h=first_video_track.height ) if first_audio_track: info += "; {ch}ch {codec}, {sr}kHz".format( codec=first_audio_track.format, ch=first_audio_track.channel_s, sr=first_audio_track.sampling_rate // 100 / 10 ) return info
def print_aggregate_data(data): """Print aggregate data in a human-readable format""" collection, values = data print 'Collection: %s' % collection print 'Ops Count: %s' % values['ops_cnt'] print 'Size: %s (in) / %s (out)' % ( humanize.naturalsize(values['size_in']), humanize.naturalsize(values['size_out']) )
def print_running_transfers(): li=[] for tr in sdfiledao.get_files(status=sdconst.TRANSFER_STATUS_RUNNING): current_size=os.path.getsize(tr.get_full_local_path()) if os.path.isfile(tr.get_full_local_path()) else 0 li.append([humanize.naturalsize(current_size,gnu=False),humanize.naturalsize(tr.size,gnu=False),tr.start_date,tr.filename]) if len(li)>0: print tabulate(li,headers=['Current size','Total size','Download start date','Filename'],tablefmt="plain") else: print 'No current download'
def print_op(op): """Print an op in a human-readable format""" print 'Collection: %s' % op['collection'] print 'Size: %s (in) / %s (out)' % ( humanize.naturalsize(op['size_in']), humanize.naturalsize(op['size_out']) ) print 'Client: %s' % op['client'] print 'Reply: %s' % op['data_out'][0].strip()[len('reply '):] print 'Query: %s' % ' '.join(d.strip() for d in op['data_in'])
def print_client_session(session): tuntap_r = session['tuntap_read'] tuntap_w = session['tuntap_write'] tcpudp_r = session['tcpudp_read'] tcpudp_w = session['tcpudp_write'] auth_r = session['auth_read'] output('<td>{0!s} ({1!s})</td>'.format(tuntap_r, naturalsize(tuntap_r, binary=True))) output('<td>{0!s} ({1!s})</td>'.format(tuntap_w, naturalsize(tuntap_w, binary=True))) output('<td>{0!s} ({1!s})</td>'.format(tcpudp_r, naturalsize(tcpudp_w, binary=True))) output('<td>{0!s} ({1!s})</td>'.format(tcpudp_w, naturalsize(tcpudp_w, binary=True))) output('<td>{0!s} ({1!s})</td>'.format(auth_r, naturalsize(auth_r, binary=True)))
def naturalsize(number, type=None): """ Return a humanized (and translated) file size. """ with humanize.i18n.django_language(): if type is None: return humanize.naturalsize(number) if type in ('bin', 'binary'): return humanize.naturalsize(number, binary=True) if type in ('gnu', 'GNU'): return humanize.naturalsize(number, gnu=True)
def print_vpn(self, vpn_id, vpn): if vpn['state']['success'] == 'SUCCESS': pingable = 'Yes' else: pingable = 'No' connection = vpn['state']['connected'] nclients = vpn['stats']['nclients'] bytesin = vpn['stats']['bytesin'] bytesout = vpn['stats']['bytesout'] vpn_mode = vpn['state']['mode'] vpn_sessions = vpn['sessions'] local_ip = vpn['state']['local_ip'] remote_ip = vpn['state']['remote_ip'] up_since = vpn['state']['up_since'] show_disconnect = vpn['show_disconnect'] anchor = vpn['name'].lower().replace(' ', '_') output('<div class="panel panel-success" id="{0!s}">'.format(anchor)) output('<div class="panel-heading"><h3 class="panel-title">{0!s}</h3>'.format( vpn['name'])) output('</div><div class="panel-body">') output('<div class="table-responsive">') output('<table class="table table-condensed table-responsive">') output('<thead><tr><th>VPN Mode</th><th>Status</th><th>Pingable</th>') output('<th>Clients</th><th>Total Bytes In</th><th>Total Bytes Out</th>') output('<th>Up Since</th><th>Local IP Address</th>') if vpn_mode == 'Client': output('<th>Remote IP Address</th>') output('</tr></thead><tbody>') output('<tr><td>{0!s}</td>'.format(vpn_mode)) output('<td>{0!s}</td>'.format(connection)) output('<td>{0!s}</td>'.format(pingable)) output('<td>{0!s}</td>'.format(nclients)) output('<td>{0!s} ({1!s})</td>'.format(bytesin, naturalsize(bytesin, binary=True))) output('<td>{0!s} ({1!s})</td>'.format(bytesout, naturalsize(bytesout, binary=True))) output('<td>{0!s}</td>'.format(up_since.strftime(self.datetime_format))) output('<td>{0!s}</td>'.format(local_ip)) if vpn_mode == 'Client': output('<td>{0!s}</td>'.format(remote_ip)) output('</tr></tbody></table></div>') if vpn_mode == 'Client' or nclients > 0: self.print_session_table_headers(vpn_mode, show_disconnect) self.print_session_table(vpn_id, vpn_mode, vpn_sessions, show_disconnect) self.print_session_table_footer() output('</div>') output('<div class="panel-footer panel-custom">') output('{0!s}'.format(vpn['release'])) output('</div>') output('</div>')
def bitrate(stream): if not isinstance(stream, int): raise TypeError("Argument must be an integer.") try: if isinstance(jsondata["streams"][stream], dict): if "tags" in jsondata["streams"][stream] and "bit_rate" not in jsondata["streams"][stream] and "BPS" in jsondata["streams"][stream]["tags"]: return naturalsize(jsondata["streams"][stream]["tags"]["BPS"]).replace(" MB", "Mbps").replace(" kB", "Kbps") elif "bit_rate" in jsondata["streams"][stream]: return naturalsize(jsondata["streams"][stream]["bit_rate"]).replace(" MB", "Mbps").replace(" kB", "Kbps") else: return None except (KeyError, IndexError): return None
def accountInfos(self): """ Return user account information """ search = requests.get(self.url + '/users/profile/'+ self.uid, headers={'Authorization': self.token}, verify=True) content = search.json() self.username = content['username'] self.gender = content['gender'] self.age = content['age'] self.data_uploaded = humanize.naturalsize(content['uploaded'], binary=True) self.data_downloaded = humanize.naturalsize(content['downloaded'], binary=True) self.data_ratio = str(round(float(content['uploaded'])/float(int(content['downloaded']))))
def get_storage_info(self, human=False): """ Get storage info :param bool human: whether return human-readable size :return: total and used storage :rtype: dict """ res = self._req_get_storage_info() if human: res['total'] = humanize.naturalsize(res['total'], binary=True) res['used'] = humanize.naturalsize(res['used'], binary=True) return res
def _internal(progress): if progress == 'finished': click.echo('') return progress = json.loads(progress) if 'progressDetail' not in progress or 'status' not in progress: return if not progress['progressDetail']: if progress['id'] not in ids: ids[progress['id']] = { 'current': 0, 'total': 0 } else: ids[progress['id']] = progress['progressDetail'] if progress['status'] == 'Already exists': ids[progress['id']] = { 'current': 100, 'total': 100 } done = sum([1 for p in ids.values() if 0 < p['total'] == p['current']]) current = sum([p['current'] for p in ids.values()]) total = sum([p['total'] for p in ids.values()]) if total > 0: pc_done = int(40.0*current/total) else: pc_done = 0 if pc_done < 40: pbar = ('=' * (pc_done - 1)) + '>' + (' ' * (40 - pc_done)) else: pbar = '=' * 40 click.echo( '\r\033[K{0}/{1} layers [{2}] {3}/{4}'.format( done, len(ids.keys()), pbar, humanize.naturalsize(current), humanize.naturalsize(total) ), nl=False )
def GetItem(self,url,grandid=0,parent='',trytime=1): app_url=GetAppUrl() token=GetToken() print(u'getting files from url {}'.format(url)) header={'Authorization': 'Bearer {}'.format(token)} try: r=requests.get(url,headers=header) data=json.loads(r.content) values=data.get('value') if len(values)>0: for value in values: item={} if value.get('folder'): item['type']='folder' item['order']=0 item['name']=convert2unicode(value['name']) item['id']=convert2unicode(value['id']) item['size']=humanize.naturalsize(value['size'], gnu=True) item['lastModtime']=date_to_char(parse(value['lastModifiedDateTime'])) item['grandid']=grandid item['parent']=parent subfodler=items.insert_one(item) if value.get('folder').get('childCount')==0: continue else: url=app_url+'v1.0/me'+value.get('parentReference').get('path')+'/'+value.get('name')+':/children?expand=thumbnails' self.queue.put(dict(url=url,grandid=grandid+1,parent=item['id'],trytime=1)) else: item['type']=GetExt(value['name']) if GetExt(value['name']) in ['bmp','jpg','jpeg','png','gif']: item['order']=3 elif value['name']=='.password': item['order']=1 else: item['order']=2 item['name']=convert2unicode(value['name']) item['id']=convert2unicode(value['id']) item['size']=humanize.naturalsize(value['size'], gnu=True) item['lastModtime']=date_to_char(parse(value['lastModifiedDateTime'])) item['grandid']=grandid item['parent']=parent items.insert_one(item) if data.get('@odata.nextLink'): self.queue.put(dict(url=data.get('@odata.nextLink'),grandid=grandid,parent=parent,trytime=1)) except Exception as e: trytime+=1 print(u'error to opreate GetItem("{}","{}","{}"),try times :{}, reason: {}'.format(url,grandid,parent,trytime,e)) if trytime<=3: self.queue.put(dict(url=url,grandid=grandid,parent=parent,trytime=trytime))
def print_server_session(self, vpn_id, session, show_disconnect): total_time = str(datetime.now() - session['connected_since'])[:-7] bytes_recv = session['bytes_recv'] bytes_sent = session['bytes_sent'] output('<td>{0!s}</td>'.format(session['username'])) output('<td>{0!s}</td>'.format(session['local_ip'])) output('<td>{0!s}</td>'.format(session['remote_ip'])) if 'location' in session and session['location'] is not None: if session['location'] == 'RFC1918': output('<td>RFC1918</td>') else: flag = 'images/flags/{0!s}.png'.format(session['location'].lower()) if 'country' in session and session['country'] is not None: country = session['country'] full_location = country if 'region' in session and session['region'] is not None: region = session['region'] full_location = '{0!s}, {1!s}'.format(region, full_location) if 'city' in session and session['city'] is not None: city = session['city'] full_location = '{0!s}, {1!s}'.format(city, full_location) output('<td><img src="{0!s}" title="{1!s}" alt="{1!s}" /> '.format(flag, full_location)) output('{0!s}</td>'.format(full_location)) else: output('<td>Unknown</td>') output('<td>{0!s} ({1!s})</td>'.format(bytes_recv, naturalsize(bytes_recv, binary=True))) output('<td>{0!s} ({1!s})</td>'.format(bytes_sent, naturalsize(bytes_sent, binary=True))) output('<td>{0!s}</td>'.format( session['connected_since'].strftime(self.datetime_format))) if 'last_seen' in session: output('<td>{0!s}</td>'.format( session['last_seen'].strftime(self.datetime_format))) else: output('<td>ERROR</td>') output('<td>{0!s}</td>'.format(total_time)) if show_disconnect: output('<td><form method="post">') output('<input type="hidden" name="vpn_id" value="{0!s}">'.format(vpn_id)) if 'port' in session: output('<input type="hidden" name="ip" value="{0!s}">'.format(session['remote_ip'])) output('<input type="hidden" name="port" value="{0!s}">'.format(session['port'])) if 'client_id' in session: output('<input type="hidden" name="client_id" value="{0!s}">'.format(session['client_id'])) output('<button type="submit" class="btn btn-xs btn-danger">') output('<span class="glyphicon glyphicon-remove"></span> ') output('Disconnect</button></form></td>')
def resources_info(self): """ Resources usage information """ self.dialog.infobox( title=self.title, text='Retrieving information ...' ) provisioned_storage = self.obj.summary.storage.committed + \ self.obj.summary.storage.uncommitted elements = [ pvc.widget.form.FormElement( label='Consumed Host CPU', item='{} MHz'.format(self.obj.summary.quickStats.overallCpuUsage) ), pvc.widget.form.FormElement( label='Consumed Host Memory', item='{} MB'.format(self.obj.summary.quickStats.hostMemoryUsage) ), pvc.widget.form.FormElement( label='Active Guest Memory', item='{} MB'.format(self.obj.summary.quickStats.guestMemoryUsage) ), pvc.widget.form.FormElement( label='Provisioned Storage', item=humanize.naturalsize(provisioned_storage, binary=True) ), pvc.widget.form.FormElement( label='Non-shared Storage', item=humanize.naturalsize(self.obj.summary.storage.unshared, binary=True) ), pvc.widget.form.FormElement( label='Used Storage', item=humanize.naturalsize(self.obj.summary.storage.committed, binary=True) ), ] form = pvc.widget.form.Form( dialog=self.dialog, form_elements=elements, title=self.title, text='Virtual Machine resource usage' ) return form.display()
def download_track(client, track, output_dir): title = normalize(track['title']) audio_track = os.path.join(output_dir, title) + '.' + track['original_format'] if os.path.exists(audio_track): print u'Track {} already exists'.format(track['id']) return stream_url = track['stream_url'] request = client.request(stream_url, stream=True) downloaded_track = u'{}.part'.format(audio_track) bytes_downloaded = os.stat(downloaded_track).st_size if os.path.exists(downloaded_track) else 0 content_length = int(request.headers['content-length']) + bytes_downloaded CHUNK_SIZE = 100 * 1024 time_before = time.time() with open(downloaded_track, 'wb') as f: for i, chunk in enumerate(request.iter_content(CHUNK_SIZE)): f.write(chunk) f.flush() now = time.time() try: download_speed = (CHUNK_SIZE) / (now - time_before) except ZeroDivisionError: pass time_before = now if i % 2 == 0: print u'\rDownloading track id={}, {:.1f}%, {}/s '.format( track['id'], f.tell() * 100 / content_length, humanize.naturalsize(download_speed)), os.rename(downloaded_track, audio_track)
application.__version__) import os import os.path import db import config from humanize import naturalsize dir = config.config.storage['media_dir'] if os.path.isdir(dir): application.library_size = sum([ os.path.getsize(os.path.join(dir, x)) for x in os.listdir(dir) if os.path.isfile(os.path.join(dir, x)) ]) else: application.library_size = 0 logging.info('Library size is %s.', naturalsize(application.library_size)) logging.info('Working out of directory: %s.', config.config_dir) db.Base.metadata.create_all() from gui.main_frame import MainFrame application.frame = MainFrame(None) application.frame.Show(True) application.frame.Maximize(True) from threading import Thread from server.base import app app.port = args.server_port from twisted.web.server import Site from twisted.internet import reactor, endpoints endpoint_description = "tcp:port={0}:interface={1}".format( args.server_port, args.server_host) endpoint = endpoints.serverFromString(reactor, endpoint_description) endpoint.listen(Site(
def _naturalsize(self, x): if self.unit == 'B': return humanize.naturalsize(x) else: return '%s%s' % (self._naturalfloat(x), self.unit or '')
def process_changes_with_callback(self, callback, callback2): c = self.conn.cursor() res = c.execute( 'SELECT * FROM ajxp_changes WHERE md5="directory" AND location="local" ' 'AND type="create" ORDER BY source,target') mkdirs = [] ids = [] for row in res: r = self.sqlite_row_to_dict(row, load_node=False) ids.append(str(r['row_id'])) mkdirs.append(r['target']) splitsize = 10 for i in range(0, int(math.ceil(float(len(mkdirs)) / float(splitsize)))): callback({ 'type': 'bulk_mkdirs', 'location': 'local', 'pathes': mkdirs[i * splitsize:(i + 1) * splitsize] }) ids_list = str(','.join(ids[i * splitsize:(i + 1) * splitsize])) self.conn.execute('DELETE FROM ajxp_changes WHERE row_id IN (' + ids_list + ')') self.conn.commit() res = c.execute( 'SELECT * FROM ajxp_changes WHERE md5="directory" ORDER BY source,target' ) for row in res: try: output = callback(self.sqlite_row_to_dict(row, load_node=True)) if output: self.conn.execute( 'DELETE FROM ajxp_changes WHERE row_id=?', (row['row_id'], )) except InterruptException as e: break self.conn.commit() #now go to the rest res = c.execute('SELECT * FROM ajxp_changes ORDER BY seq_id ASC') rows_to_process = [] try: for row in res: rows_to_process.append( self.sqlite_row_to_dict(row, load_node=True)) except Exception as e: logging.exception(e) logging.info("Failed to decode " + str(row)) raise SystemExit import threading class Processor_callback(Thread): def __init__(self, change): threading.Thread.__init__(self) self.change = change self.status = "" def run(self): #logging.info("Running change " + str(threading.current_thread()) + " " + str(self.change)) ts = time.time() try: if not callback2(self.change): self.status = "FAILED" logging.info("An error occurred processing " + str(self.change)) else: self.status = "SUCCESS" except InterruptException: self.status = "FAILED" # silent fail (network) """except Exception as e: self.status = "FAILED" self.error = e if not hasattr(e, "code"): logging.exception(e)""" #logging.info("DONE change " + str(threading.current_thread()) + " in " + str(time.time()-ts)) def stop(self): pass # end of Processor_callback def lerunnable(change): p = Processor_callback(change) p.start() return p def processonechange(iter): try: change = next(iter) #logging.info('PROCESSING CHANGE %s' % change) proc = lerunnable(change) return proc except StopIteration: return False it = iter(rows_to_process) logging.info("To be processed " + str(it.__length_hint__())) pool = [] ts = time.time() schedule_exit = False # This is used to indicate that the last change was scheduled self.change_history.LOCKED = True while True: try: for i in pool: if not i.isAlive(): if i.status == "SUCCESS" or ( hasattr(i, "error") and hasattr( i.error, "code") and i.error.code == 1404 ): # file download impossible -> Assume deleted from server self.conn.execute( 'DELETE FROM ajxp_changes WHERE row_id=?', (i.change['row_id'], )) #logging.info("DELETE CHANGE %s" % i.change) if i.change is not None and hasattr( i.change, 'status'): i.change.status = "FAILED" self.change_history.insert_change(i) elif i.status == "FAILED": self.conn.execute( 'DELETE FROM ajxp_changes WHERE row_id=?', (i.change['row_id'], )) self.change_history.insert_change(i) """ Because of consolidation this is no longer useful class Failchange: pass if i.change['row_id'] not in self.failingchanges: self.failingchanges[i.change['row_id']] = Failchange() self.failingchanges[i.change['row_id']].change = i.change self.failingchanges[i.change['row_id']].fail = 1 else: if "fail" in self.failingchanges[i.change['row_id']]: if self.failingchanges[i.change['row_id']].fail > 5: # Try 5 times then delete it and move on, Is this ever reached ? self.conn.execute('DELETE FROM ajxp_changes WHERE row_id=?', (i.change['row_id'],)) if i.change is not None: self.change_history.insert_change(i) del self.failingchanges[i.change['row_id']] else: self.failingchanges[i.change['row_id']].fail += 1 """ pool.remove(i) i.join() #logging.info("Change done " + str(i)) yield str(i) if schedule_exit and len(pool) == 0: break if len(pool) >= self.maxpoolsize: time.sleep(.2) continue else: output = processonechange(it) time.sleep(.01) if not output and not schedule_exit: for op in self.pendingoperations: self.buffer_real_operation(op.location, op.type, op.source, op.target) try: humanize logging.info(" @@@ TOOK " + humanize.naturaltime( time.time() - ts).replace(' ago', '') + " to process changes.") logging.info(" Fails : " + str(len(self.failingchanges))) except NameError: pass # NOP if not humanize lib schedule_exit = True continue else: # waiting for changes to be processed time.sleep(.02) if output and output.isAlive(): pool.append(output) try: humanize current_change = "" if len(pool) == 1: try: current_change = pool[0].change more = "" if current_change: if current_change['node']: if 'node_path' in current_change[ 'node']: more = current_change['node'][ 'node_path'] elif 'source' in current_change: more = current_change['source'] elif 'target' in current_change: more = current_change['target'] logging.info( " Poolsize " + str(len(pool)) + ' Memory usage: %s' % humanize.naturalsize( resource.getrusage( resource.RUSAGE_SELF).ru_maxrss) + " " + more) except Exception as e: logging.exception(e) logging.info( str(type(pool[0].change)) + " " + str(pool[0].change)) except NameError: pass """if hasattr(output, "done"): pool.append(output)""" except InterruptException as e: logging.info("@@@@@@@@@@@ Interrupted @@@@@@@@@@") except Exception as e: logging.exception(e) time.sleep(1) try: self.conn.commit() except Exception as e: logging.exception(e) while True: try: self.change_history.conn.commit() break except sqlite3.OperationalError: pass self.change_history.LOCKED = False try: self.change_history.consolidate() except Exception as e: logging.info("TODO: handle") logging.exception(e)
def total_size(self): return humanize.naturalsize(self.max, gnu=True)
def download_speed(self): if self.avg == 0.0: return "..." return "{0}/s".format(humanize.naturalsize(1 / self.avg))
def print_entry(offset, size, name, human=False): """Print a binwalk entry""" if human: size = humanize.naturalsize(size) print("0x{:<10x} {:<15} {}".format(offset, size, name))
async def uptobox(request, url: str) -> str: """Uptobox direct links generator""" try: link = re.findall(r"\bhttps?://.*uptobox\.com\S+", url)[0] except IndexError: await request.edit("`No uptobox links found.`") return if USR_TOKEN is None: await request.edit("`Set USR_TOKEN_UPTOBOX first!`") return if link.endswith("/"): index = -2 else: index = -1 FILE_CODE = link.split("/")[index] origin = "https://uptobox.com/api/link" """ Retrieve file informations """ uri = f"{origin}/info?fileCodes={FILE_CODE}" await request.edit("`Retrieving file informations...`") async with aiohttp.ClientSession() as session: async with session.get(uri) as response: result = json.loads(await response.text()) data = result.get("data").get("list")[0] if "error" in data: await request.edit( "`[ERROR]`\n" f"`statusCode`: **{data.get('error').get('code')}**\n" f"`reason`: **{data.get('error').get('message')}**") return file_name = data.get("file_name") file_size = naturalsize(data.get("file_size")) """ Get waiting token and direct download link """ uri = f"{origin}?token={USR_TOKEN}&file_code={FILE_CODE}" async with aiohttp.ClientSession() as session: async with session.get(uri) as response: result = json.loads(await response.text()) status = result.get("message") if status == "Waiting needed": wait = result.get("data").get("waiting") waitingToken = result.get("data").get("waitingToken") await request.edit( f"`Waiting for about {time_formatter(wait)}.`") # for some reason it doesn't go as i planned # so make it 1 minute just to be save enough await asyncio.sleep(wait + 60) uri += f"&waitingToken={waitingToken}" async with session.get(uri) as response: await request.edit("`Generating direct download link...`") result = json.loads(await response.text()) status = result.get("message") if status == "Success": webLink = result.get("data").get("dlLink") await request.edit( f"[{file_name} ({file_size})]({webLink})") return else: await request.edit( "`[ERROR]`\n" f"`statusCode`: **{result.get('statusCode')}**\n" f"`reason`: **{result.get('data')}**\n" f"`status`: **{status}**") return elif status == "Success": webLink = result.get("data").get("dlLink") await request.edit(f"[{file_name} ({file_size})]({webLink})") return else: await request.edit( "`[ERROR]`\n" f"`statusCode`: **{result.get('statusCode')}**\n" f"`reason`: **{result.get('data')}**\n" f"`status`: **{status}**") return
def get_evolution_summary(arguments, input_nodes, output_nodes, memory_consumption_probe=100): output = [ str.format('# Basic summary'), str.format('Input nodes: {}', input_nodes), str.format('Intput nodes: {}', output_nodes), str.format('Genotype: {}', genotype_lookup[arguments.genotype]), str.format('Hidden layer nodes: {}', arguments.hidden_layer_nodes), str.format('Weight lower threshold: {}', arguments.weight_lower_threshold), str.format('Weight upper threshold: {}', arguments.weight_upper_threshold), str.format('Population size: {}', arguments.population_size), str.format('Tournament size: {}', arguments.tournament_size), str.format('Duration (hours): {}', arguments.duration), str.format('Use bias: {}', arguments.bias), str.format('# Genotype specific summary') ] if arguments.genotype == simple_genotype_choice: output.append(str.format('Mutation mean: {}', arguments.mutation_mean)) output.append( str.format('Mutation standard deviation: {}', arguments.mutation_standard_deviation)) if arguments.genotype == uncorrelated_one_step_size_genotype_choice or arguments.genotype == uncorrelated_n_step_size_genotype_choice: output.append( str.format('Mutation step size lower threshold: {}', arguments.mutation_step_size_lower_threshold)) output.append( str.format('Mutation step size upper threshold: {}', arguments.mutation_step_size_upper_threshold)) output.append(str.format('Tau 1: {}', arguments.tau1)) if arguments.genotype == uncorrelated_n_step_size_genotype_choice: output.append(str.format('Tau 2: {}', arguments.tau2)) output.append(str.format('# Calculated summary')) number_of_nn_weights = get_number_of_nn_weights( input_nodes, arguments.hidden_layer_nodes, output_nodes) output.append( str.format('Number of neural network weights: {}', number_of_nn_weights)) # Calculating memory consumption demo_genotype_iterator = range(memory_consumption_probe) if arguments.genotype == simple_genotype_choice: demo_genotypes = map( lambda index: SimpleGenotype.get_random_genotype( number_of_nn_weights, arguments.weight_lower_threshold, arguments.weight_upper_threshold), demo_genotype_iterator) demo_genotype_sizes = list( map(lambda demo_genotype: getsizeof(demo_genotype.weights), demo_genotypes)) if arguments.genotype == uncorrelated_one_step_size_genotype_choice: demo_genotypes = map( lambda index: UncorrelatedOneStepSizeGenotype.get_random_genotype( number_of_nn_weights, arguments.weight_lower_threshold, arguments.weight_upper_threshold, arguments. mutation_step_size_lower_threshold, arguments. mutation_step_size_upper_threshold), demo_genotype_iterator) demo_genotype_sizes = list( map( lambda demo_genotype: getsizeof(demo_genotype.weights) + getsizeof([demo_genotype.mutation_step_size]), demo_genotypes)) if arguments.genotype == uncorrelated_n_step_size_genotype_choice: demo_genotypes = map( lambda index: UncorrelatedNStepSizeGenotype.get_random_genotype( number_of_nn_weights, arguments.weight_lower_threshold, arguments.weight_upper_threshold, arguments. mutation_step_size_lower_threshold, arguments. mutation_step_size_upper_threshold), demo_genotype_iterator) demo_genotype_sizes = list( map( lambda demo_genotype: getsizeof(demo_genotype.weights) + getsizeof(demo_genotype.mutation_step_sizes), demo_genotypes)) mean_demo_genotype_size = mean( demo_genotype_sizes) * arguments.population_size output.append( str.format('Calculated memory consumption (Python list): {}', naturalsize(mean_demo_genotype_size))) output.append( str.format('Approximate end time: {}', get_end_datetime(arguments.duration).isoformat(sep=' '))) output.append(str.format('# Utils summary')) output.append(str.format('Epoch summary: {}', arguments.epoch_summary)) if arguments.epoch_summary: output.append( str.format('Epoch summary features: {}', arguments.epoch_summary_features)) output.append( str.format('Epoch summary interval: {}', arguments.epoch_summary_interval)) output.append( str.format('Population backup summary: {}', arguments.population_backup)) if arguments.population_backup: output.append( str.format('Population backup directory: {}', arguments.population_backup_directory)) output.append( str.format('Population backup interval: {}', arguments.population_backup_interval)) output.append( str.format('Population backup file extension: .{}', arguments.population_backup_file_extension)) if arguments.initial_population_directory: output.append( str.format('Initial population directory: {}', arguments.initial_population_directory)) output.append( str.format('Initial population file extension: {}', arguments.initial_population_file_extension)) return '\n'.join(output)
def getDesc(self): size = humanize.naturalsize(self._size) return "{}, {}x{}px".format(self._format.upper(), self._width, self._height, str(size))
def AddResource(data): #检查父文件夹是否在数据库,如果不在则获取添加 grand_path = data.get('parentReference').get('path').replace( '/drive/root:', '') if grand_path == '': parent_id = '' grandid = 0 else: g = GetItemThread(Queue()) parent_id = data.get('parentReference').get('id') grandid = len( data.get('parentReference').get('path').replace( '/drive/root:', '').split('/')) - 1 grand_path = grand_path[1:] parent_path = '' pid = '' for idx, p in enumerate(grand_path.split('/')): parent = items.find_one({'name': p, 'grandid': idx, 'parent': pid}) if parent is not None: pid = parent['id'] parent_path = '/'.join([parent_path, parent['name']]) else: parent_path = '/'.join([parent_path, p]) fdata = g.GetItemByPath(parent_path) item = {} item['type'] = 'folder' item['name'] = fdata.get('name') item['id'] = fdata.get('id') item['size'] = humanize.naturalsize(fdata.get('size'), gnu=True) item['size_order'] = fdata.get('size') item['lastModtime'] = date_to_char( parse(fdata['lastModifiedDateTime'])) item['grandid'] = idx item['parent'] = pid items.insert_one(item) pid = fdata.get('id') #插入数据 item = {} item['type'] = 'file' item['name'] = data.get('name') item['id'] = data.get('id') item['size'] = humanize.naturalsize(data.get('size'), gnu=True) item['size_order'] = data.get('size') item['lastModtime'] = date_to_char(parse(data.get('lastModifiedDateTime'))) item['grandid'] = grandid item['parent'] = parent_id if grand_path == '': path = convert2unicode(data['name']) else: path = grand_path.replace(self.share_path, '', 1) + '/' + convert2unicode(data['name']) if path.startswith('/') and path != '/': path = path[1:] if path == '': path = convert2unicode(data['name']) item['path'] = path if GetExt(data['name']) in ['bmp', 'jpg', 'jpeg', 'png', 'gif']: item['order'] = 3 key1 = 'name:{}'.format(data['id']) key2 = 'path:{}'.format(data['id']) rd.set(key1, data['name']) rd.set(key2, path) elif data['name'] == '.password': item['order'] = 1 else: item['order'] = 2 items.insert_one(item)
def GetItem(self, url, grandid=0, parent='', trytime=1): app_url = GetAppUrl() token = GetToken() print(u'getting files from url {}'.format(url)) header = {'Authorization': 'Bearer {}'.format(token)} try: r = requests.get(url, headers=header) data = json.loads(r.content) if data.get('error'): print('error:{}! waiting 180s'.format( data.get('error').get('message'))) time.sleep(180) self.queue.put( dict(url=url, grandid=grandid, parent=parent, trytime=trytime)) return values = data.get('value') if len(values) > 0: for value in values: item = {} if value.get('folder'): folder = items.find_one({'id': value['id']}) if folder is not None: if folder['size_order'] == value[ 'size']: #文件夹大小未变化,不更新 print( u'path:{},origin size:{},current size:{}'. format(value['name'], folder['size_order'], value['size'])) else: items.delete_one({'id': value['id']}) item['type'] = 'folder' item['order'] = 0 item['name'] = convert2unicode(value['name']) item['id'] = convert2unicode(value['id']) item['size'] = humanize.naturalsize(value['size'], gnu=True) item['size_order'] = int(value['size']) item['lastModtime'] = date_to_char( parse(value['lastModifiedDateTime'])) item['grandid'] = grandid item['parent'] = parent grand_path = value.get('parentReference').get( 'path').replace('/drive/root:', '') if grand_path == '': path = convert2unicode(value['name']) else: path = grand_path.replace( self.share_path, '', 1) + '/' + convert2unicode(value['name']) if path.startswith('/') and path != '/': path = path[1:] if path == '': path = convert2unicode(value['name']) item['path'] = path subfodler = items.insert_one(item) if value.get('folder').get('childCount') == 0: continue else: url = app_url + 'v1.0/me' + value.get( 'parentReference' ).get('path') + '/' + value.get( 'name') + ':/children?expand=thumbnails' self.queue.put( dict(url=url, grandid=grandid + 1, parent=item['id'], trytime=1)) else: if items.find_one({'id': value['id']}) is not None: #文件存在 continue else: item['type'] = GetExt(value['name']) grand_path = value.get('parentReference').get( 'path').replace('/drive/root:', '') if grand_path == '': path = convert2unicode(value['name']) else: path = grand_path.replace( self.share_path, '', 1) + '/' + convert2unicode(value['name']) if path.startswith('/') and path != '/': path = path[1:] if path == '': path = convert2unicode(value['name']) item['path'] = path item['name'] = convert2unicode(value['name']) item['id'] = convert2unicode(value['id']) item['size'] = humanize.naturalsize(value['size'], gnu=True) item['size_order'] = int(value['size']) item['lastModtime'] = date_to_char( parse(value['lastModifiedDateTime'])) item['grandid'] = grandid item['parent'] = parent if GetExt(value['name']) in [ 'bmp', 'jpg', 'jpeg', 'png', 'gif' ]: item['order'] = 3 key1 = 'name:{}'.format(value['id']) key2 = 'path:{}'.format(value['id']) rd.set(key1, value['name']) rd.set(key2, path) elif value['name'] == '.password': item['order'] = 1 else: item['order'] = 2 items.insert_one(item) if data.get('@odata.nextLink'): self.queue.put( dict(url=data.get('@odata.nextLink'), grandid=grandid, parent=parent, trytime=1)) except Exception as e: trytime += 1 print( u'error to opreate GetItem("{}","{}","{}"),try times :{}, reason: {}' .format(url, grandid, parent, trytime, e)) if trytime <= 3: self.queue.put( dict(url=url, grandid=grandid, parent=parent, trytime=trytime))
with open(file_path) as alice_in: pass raw_mod_date = os.path.getmtime(file_path) print("raw mod date:", raw_mod_date) mod_date = datetime.fromtimestamp(raw_mod_date) print("mod date:", mod_date) print("mod month/year: {}/{}".format(mod_date.month, mod_date.year)) file_size = os.path.getsize(file_path) print("file size:", file_size) print("file size:", naturalsize(file_size)) stat_info = os.stat(file_path) print("stat info:", stat_info, '\n') print(os.getuid()) print(os.getcwd()) print(os.getpid(), '\n') print(file_path) print(os.path.basename(file_path)) print(os.path.dirname(file_path)) print(os.path.abspath(file_path), '\n') # absolute path on Windows # C:\....
def columns(df, columns, buckets=40, infer=False, relative_error=1): """ Return statistical information about a specific column in json format :param df: Dataframe to be processed :param columns: Columns that you want to profile :param buckets: Create buckets divided by range. Each bin is equal. :param relative_error: relative error when the percentile is calculated. 0 is more exact as slow 1 more error and faster :return: json object with the """ columns = parse_columns(df, columns) # Get just a sample to infer the column data type # sample_size_number = sample_size(rows_count, 95.0, 2.0) # fraction = sample_size_number / rows_count # sample = df.sample(False, fraction, seed=1) # Initialize Objects columns_info = {} columns_info['columns'] = {} rows_count = df.count() columns_info['rows_count'] = humanize.intword(rows_count) count_dtypes = Profiler.count_data_types(df, columns, infer) columns_info["count_types"] = count_dtypes["count_types"] columns_info['size'] = humanize.naturalsize(df.size()) # Cast columns to the data type infer by count_data_types() df = Profiler.cast_columns(df, columns, count_dtypes).cache() # Calculate stats stats = Profiler.general_stats(df, columns) for col_name in columns: col_info = {} logger.print("------------------------------") logger.print("Processing column '" + col_name + "'...") columns_info['columns'][col_name] = {} col_info["stats"] = stats[col_name] col_info.update(Profiler.frequency(df, col_name, buckets)) col_info.update( Profiler.stats_by_column(col_name, stats, count_dtypes, rows_count)) col_info['column_dtype'] = count_dtypes["columns"][col_name][ 'dtype'] col_info["dtypes_stats"] = count_dtypes["columns"][col_name][ 'details'] column_type = count_dtypes["columns"][col_name]['type'] if column_type == "numeric": col_info["stats"].update( Profiler.extra_numeric_stats(df, col_name, stats, relative_error)) col_info["hist"] = df.cols.hist(col_name, stats[col_name]["min"], stats[col_name]["max"], buckets) if column_type == "categorical" or column_type == "array": col_info["hist"] = Profiler.hist_string(df, col_name, buckets) if column_type == "date": col_info["hist"] = Profiler.hist_date(df, col_name) columns_info['columns'][col_name] = col_info return columns_info
def printm(): process = psutil.Process(os.getpid()) print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss)) print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
host_arr = numpy.concatenate((host_arr, img_arr.reshape(-1))) host_arr = host_arr.astype(numpy.uint8) print dim new_dim = (len(img_names), dim[0], dim[1], dim[2]) print "new dimensions are", new_dim ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) mf = cl.mem_flags a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=host_arr) luma_values_array = numpy.zeros((len(img_names), ), dtype=numpy.long) dest_buf = cl.Buffer(ctx, mf.WRITE_ONLY, luma_values_array.nbytes) print "[%d] Takes " % len(img_names), naturalsize(luma_values_array.nbytes) kernel_code = open("calc_luma.cl").read() % (new_dim[1], new_dim[2], new_dim[3]) prg1 = cl.Program(ctx, kernel_code).build() stime = time.time() prg1.calc_luma(queue, (len(img_names), ), None, a_buf, dest_buf) etime = time.time() print "[%d] GPU takes " % len(img_names), naturaltime(etime - stime) cl.enqueue_copy(queue, luma_values_array, dest_buf) # result is a array of all images combined _index = 0
def gnu_size(cls, size: int) -> str: return humanize.naturalsize(size, gnu=True)
def size_human(self): # type: () -> str return naturalsize(self.size)
#files = [] #for filename in glob.glob('*.pdf'): # files.append(filename) myDir = "." files = [] for root, dirnames, filenames in os.walk(myDir): files.extend(glob.glob(root + "/*.pdf")) #dirList=os.listdir(file_path) #list all the files in the directories file_write = open('pdf_stats_' + timestamp + '.csv', 'w') #writing no.of pages into a csv file file_write.write("No~FileName~PageCount~Size~") file_write.write("\n") file_write.write("\n") print "\n\n" counter = 1 for fname in files: data_find = str(counter) + '~' + fname + '~' + str( pypdftk.get_num_pages(fname)) + '~' + humanize.naturalsize( os.path.getsize( fname)) + '~' # giving file path with the name of the file print data_find # test with printing the data file_write.write(str(data_find)) file_write.write("\n") counter = counter + 1 file_write.close() print "\nWrote the PDF stats to the file " + 'pdf_stats_' + timestamp + '.csv' + '\n\n'
def get_size(self, obj): return humanize.naturalsize(obj.file.size)
def fs_traverse(path, repo, parent=None, render=True, recursive=False, json=None, basepath=None): """Traverse path through its nodes and returns a dictionary of relevant attributes attached to each node Parameters ---------- path: str Path to the directory to be traversed repo: AnnexRepo or GitRepo Repo object the directory belongs too parent: dict Extracted info about parent directory recursive: bool Recurse into subdirectories (note that subdatasets are not traversed) render: bool To render from within function or not. Set to false if results to be manipulated before final render Returns ------- list of dict extracts and returns a (recursive) list of directory info at path does not traverse into annex, git or hidden directories """ fs = fs_extract(path, repo, basepath=basepath or path) if isdir(path): # if node is a directory children = [ fs.copy() ] # store its info in its children dict too (Yarik is not sure why, but I guess for .?) # ATM seems some pieces still rely on having this duplication, so left as is # TODO: strip away for node in listdir(path): nodepath = opj(path, node) # TODO: it might be a subdir which is non-initialized submodule! # if not ignored, append child node info to current nodes dictionary if not ignored(nodepath): # if recursive, create info dictionary (within) each child node too if recursive: subdir = fs_traverse( nodepath, repo, parent=None, # children[0], recursive=recursive, json=json, basepath=basepath or path) subdir.pop('nodes', None) else: # read child metadata from its metadata file if it exists subdir_json = metadata_locator(path=node, ds_path=basepath or path) if exists(subdir_json): with open(subdir_json) as data_file: subdir = js.load(data_file) subdir.pop('nodes', None) # else extract whatever information you can about the child else: # Yarik: this one is way too lean... subdir = fs_extract(nodepath, repo, basepath=basepath or path) # append child metadata to list children.extend([subdir]) # sum sizes of all 1st level children children_size = {} for node in children[1:]: for size_type, child_size in node['size'].items(): children_size[size_type] = children_size.get( size_type, 0) + machinesize(child_size) # update current node sizes to the humanized aggregate children size fs['size'] = children[0]['size'] = \ {size_type: humanize.naturalsize(child_size) for size_type, child_size in children_size.items()} children[0][ 'name'] = '.' # replace current node name with '.' to emulate unix syntax if parent: parent[ 'name'] = '..' # replace parent node name with '..' to emulate unix syntax children.insert( 1, parent ) # insert parent info after current node info in children dict fs['nodes'] = children # add children info to main fs dictionary if render: # render directory node at location(path) fs_render(fs, json=json, ds_path=basepath or path) lgr.info('Directory: %s' % path) return fs
def downloaded(self): return humanize.naturalsize(self.index)
def current_size(self): return humanize.naturalsize(self.index, binary=True)
def _get_context_snap_details(snap_name): try: details = api.get_snap_details(snap_name) except ApiTimeoutError as api_timeout_error: flask.abort(504, str(api_timeout_error)) except ApiResponseDecodeError as api_response_decode_error: flask.abort(502, str(api_response_decode_error)) except ApiResponseErrorList as api_response_error_list: if api_response_error_list.status_code == 404: flask.abort(404, "No snap named {}".format(snap_name)) else: if api_response_error_list.errors: error_messages = ", ".join( api_response_error_list.errors.key()) else: error_messages = "An error occurred." flask.abort(502, error_messages) except ApiResponseError as api_response_error: flask.abort(502, str(api_response_error)) except ApiCircuitBreaker: flask.abort(503) except ApiError as api_error: flask.abort(502, str(api_error)) # When removing all the channel maps of an exsting snap the API, # responds that the snaps still exists with data. # Return a 404 if not channel maps, to avoid having a error. # For example: mir-kiosk-browser if not details.get("channel-map"): flask.abort(404, "No snap named {}".format(snap_name)) clean_description = bleach.clean(details["snap"]["description"], tags=[]) formatted_description = parse_markdown_description(clean_description) channel_maps_list = logic.convert_channel_maps( details.get("channel-map")) latest_channel = logic.get_last_updated_version( details.get("channel-map")) last_updated = latest_channel["created-at"] last_version = latest_channel["version"] binary_filesize = latest_channel["download"]["size"] # filter out banner and banner-icon images from screenshots screenshots = logic.filter_screenshots(details["snap"]["media"]) icons = logic.get_icon(details["snap"]["media"]) videos = logic.get_videos(details["snap"]["media"]) # until default tracks are supported by the API we special case node # to use 10, rather then latest default_track = helpers.get_default_track(details["name"]) lowest_risk_available = logic.get_lowest_available_risk( channel_maps_list, default_track) confinement = logic.get_confinement(channel_maps_list, default_track, lowest_risk_available) last_version = logic.get_version(channel_maps_list, default_track, lowest_risk_available) is_users_snap = False if flask.session and "openid" in flask.session: if (flask.session.get("openid").get("nickname") == details["snap"] ["publisher"]["username"]): is_users_snap = True # build list of categories of a snap categories = logic.get_snap_categories(details["snap"]["categories"]) context = { "snap-id": details.get("snap-id"), # Data direct from details API "snap_title": details["snap"]["title"], "package_name": details["name"], "categories": categories, "icon_url": icons[0] if icons else None, "version": last_version, "license": details["snap"]["license"], "publisher": details["snap"]["publisher"]["display-name"], "username": details["snap"]["publisher"]["username"], "screenshots": screenshots, "videos": videos, "prices": details["snap"]["prices"], "contact": details["snap"].get("contact"), "website": details["snap"].get("website"), "summary": details["snap"]["summary"], "description": formatted_description, "channel_map": channel_maps_list, "has_stable": logic.has_stable(channel_maps_list), "developer_validation": details["snap"]["publisher"]["validation"], "default_track": default_track, "lowest_risk_available": lowest_risk_available, "confinement": confinement, # Transformed API data "filesize": humanize.naturalsize(binary_filesize), "last_updated": logic.convert_date(last_updated), "last_updated_raw": last_updated, "is_users_snap": is_users_snap, } return context
def print_size(num, humanize=False): if humanize: return _humanize.naturalsize(num, gnu=True) else: return str(num)
def train(self, epochs: int, loss: Loss, optimizer: Optimizer, x_train, y_train, x_test, y_test, x_validation=None, y_validation=None, batch_size: int = 1, shuffle: bool = True, validation_pct=None, validation_method='cross-validation', callbacks=[]): """ :param epochs: number of epochs to run :param loss: a loss function :param optimizer: the optimizer to use :param x_train: a 2D array of shape (rows, features) :param y_train: a 2d array of shape (rows, output features), output_features is the number of values we want to predict :param x_test: testing data inputs :param y_test: testing data true values :param validation_method: a string to determine which validation method to use: 'holdout','cross-validation' :return: None For example, our input might be: x_train = [ [0,1,1], [0,2,1], [1,2,1], [0,3,4], ] That is 4 rows with 3 features each, we might do a binary classification on this: y_train = [ [0,1], [0,1], [1,0], [0,1] ] That is, each training input maps to one of these All this will be copied to the device Validation methods are: 1. Specify x_validation,y_validation and the same provided dataset will be used to validate every epoch 2. Specify validation_pct to determine how much of the training set will be set aside as validation. Specify validation_method to determine which method to use: * holdout: the same subset of x_train is used to validate each epoch * cross-validation: at the start of each epoch a random sample of x_train/y_train is set aside """ if validation_pct is not None and x_validation is not None and y_validation is not None: raise ValueError( "Please set either validation_pct or (x_validation,x_validation)" ) if x_validation is not None != x_validation is not None: raise ValueError("Please set both (x_validation and y_validation)") x_train = x_train.astype(dtype) y_train = y_train.astype(dtype) if validation_pct: # slice off the last validation_ct from x_train,y_train if 0 <= validation_pct < 1: training_samples = int(x_train.shape[0] * (1 - validation_pct)) validation_samples = int(x_train.shape[0] * validation_pct) if validation_method == 'holdout': print( f"Holding out last {validation_samples} samples of training data for validation" ) x_train = x_train[:training_samples] y_train = y_train[:training_samples] x_validation = x_train[training_samples:] y_validation = y_train[training_samples:] x_val_gpu = array.to_device(self.queue, x_validation) y_val_gpu = array.to_device(self.queue, y_validation) elif validation_method == 'cross-validation': print( f"Using cross-validation on last {validation_samples}") else: raise ValueError("Invalid validation method") validation_user = False else: raise ValueError( "Validation_pct must be in range 0 <= val% < 1") elif x_validation is not None and y_validation is not None: print("User provided validation") x_validation = x_validation.astype(dtype) y_validation = y_validation.astype(dtype) x_val_gpu = array.to_device(self.queue, x_validation) y_val_gpu = array.to_device(self.queue, y_validation) validation_samples = len(x_validation) training_samples = x_train.shape[0] validation_user = True else: training_samples = x_train.shape[0] if len(x_train) != len(y_train): raise ValueError("X and Y for test/train must be same length") if training_samples % batch_size != 0: raise ValueError( "Training dataset must have rows divisible by batch size") input_features = cltypes.uint(x_train.shape[1]) output_features = cltypes.uint(y_train.shape[1]) if input_features != self.layers[0].input_width: raise ValueError( f"Input features (provided={input_features}) must be the same as layer_0 input width (required={self.layers[0].input_width})" ) # Just copy all training and all testing data to the device for dn, ds in ("x_train", x_train), ("y_train", y_train), ("x_validation", x_validation), ("y_validation", y_validation): try: print("{}\n\tsize={}\n\tshape={}".format( dn, humanize.naturalsize(ds.nbytes), ds.shape)) except AttributeError: pass # x_train_gpu = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=x_train) x_train_gpu = array.to_device(self.queue, x_train) y_train_gpu = array.to_device(self.queue, y_train) # should probably check that our data won't exceed available device memory, # transparently queue up more data once it's been used losses = {'batch': [], 'validation': [], 'testing': []} for i in tqdm(range(epochs), desc='Epoch: ', position=0): # shuffle the rows if shuffle: self.shuffle(x_train_gpu.data, y_train_gpu.data, training_samples, input_features, output_features) for idx in tqdm(range(training_samples // batch_size), desc='Batch: ', position=1, unit=' batch'): idx = cltypes.uint(idx) # idx here is the batch number batch_x_gpu = x_train_gpu[idx * batch_size:idx * batch_size + batch_size] batch_y_gpu = y_train_gpu[idx * batch_size:idx * batch_size + batch_size] # copy all of these to the device? output = self.forward(batch_x_gpu, verbose=False) loss_val = loss.cpu(batch_y_gpu, output) # err = loss(batch_y_gpu, output, ) losses['batch'].append(loss_val) # print(f"Mean Batch Loss={loss_val}") optimizer(loss, self, batch_x_gpu, batch_y_gpu) # if idx % 900 == 0: # for c in callbacks: # if c.batch_end: # c(losses) # run the network and get error for the validation set # this should be a single batch of size validation_samples # will need to allocate specific validation arrays # if validation_user: # # validate with user supplied validation data # output = self.forward(x_val_gpu, 0) # should probably be done as a single batch, # val_loss = loss(y_val_gpu, output, 0) # else: # # idx is the index of the validation set start position # idx = len(x_train) - validation_samples # output = self.forward(x_train_gpu, idx) # val_loss = loss(y_train_gpu, output, idx) # losses['validation'].append(val_loss) # # collect metrics for training set # output = self.forward(x_test, 0) # test_loss = loss(y_test, output, 0) # losses['testing'].append(test_loss) for c in callbacks: c(losses) return losses
def writeReports(data, reportsPath, targetName): subPath = os.path.join(reportsPath, 'subdirs') if (not os.path.isdir(subPath)): os.mkdir(subPath) summaryFile = open(os.path.join(reportsPath, targetName) + '.txt', 'w') summaryFile.write('TOTALS for ' + targetName + ":\n\n") summaryFile.write('TOTAL FILES: ' + "{:,}".format(data['count']) + "\n") summaryFile.write('SPACE USED: ' + humanize.naturalsize(data['size'], gnu=True) + "\n") def getSize(item): return item[1]['size'] summaryFile.write("\nFILE TYPES:\n") sortedCats = sorted(data['cats'].items(), key=getSize, reverse=True) for fileInfo in sortedCats: fileCat = fileInfo[0] summaryFile.write( fileCat + ": " + "{:,}".format(data['cats'][fileCat]['count']) + " (" + humanize.naturalsize(data['cats'][fileCat]['size'], gnu=True) + "), MAX " + humanize.naturalsize( max(data['cats'][fileCat]['sizes']), gnu=True) + "\n") summaryFile.write("\nFILE EXTENSIONS:\n") sortedTypes = sorted(data['types'].items(), key=getSize, reverse=True) for fileInfo in sortedTypes: fileType = fileInfo[0] summaryFile.write( fileType + ": " + "{:,}".format(data['types'][fileType]['count']) + " (" + humanize.naturalsize(data['types'][fileType]['size'], gnu=True) + "), MAX " + humanize.naturalsize( max(data['types'][fileType]['sizes']), gnu=True) + "\n") for folder in data['dirs']: if (folder == '_root'): summaryFile.write("\nFILES IN ROOT FOLDER ONLY\n\n") else: summaryFile.write("\nSUB-FOLDER " + folder + "\n\n") subFile = folder.split('/')[-1] folderFile = open(os.path.join(subPath, subFile) + ".txt", "w") summaryFile.write('TOTAL FILES: ' + "{:,}".format(data['dirs'][folder]['count']) + "\n") summaryFile.write( 'SPACE USED: ' + humanize.naturalsize(data['dirs'][folder]['size'], gnu=True) + "\n") summaryFile.write("\nFILE TYPES:\n") sortedCats = sorted(data['dirs'][folder]['cats'].items(), key=getSize, reverse=True) #for fileType in data['dirs'][folder]['cats']: for fileInfo in sortedCats: fileCat = fileInfo[0] summaryFile.write( fileCat + ": " + "{:,}".format(data['dirs'][folder]['cats'][fileCat]['count']) + " (" + humanize.naturalsize( data['dirs'][folder]['cats'][fileCat]['size'], gnu=True) + ")\n") summaryFile.write("\nFILE EXTENSIONS:\n") sortedTypes = sorted(data['dirs'][folder]['types'].items(), key=getSize, reverse=True) #for fileType in data['dirs'][folder]['types']: for fileInfo in sortedTypes: fileType = fileInfo[0] summaryFile.write(fileType + ": " + "{:,}".format( data['dirs'][folder]['types'][fileType]['count']) + " (" + humanize.naturalsize(data['dirs'][folder] ['types'][fileType]['size'], gnu=True) + ")\n") folderFile.write("SUB-FOLDER " + folder + "\n\n") folderFile.write('TOTAL FILES: ' + "{:,}".format(data['dirs'][folder]['count']) + "\n") folderFile.write( 'SPACE USED: ' + humanize.naturalsize(data['dirs'][folder]['size'], gnu=True) + "\n") folderFile.write("\nFILE TYPES:\n") sortedCats = sorted(data['dirs'][folder]['cats'].items(), key=getSize, reverse=True) for fileInfo in sortedCats: fileCat = fileInfo[0] folderFile.write( fileCat + ": " + "{:,}".format(data['dirs'][folder]['cats'][fileCat]['count']) + " (" + humanize.naturalsize( data['dirs'][folder]['cats'][fileCat]['size'], gnu=True) + ")\n") folderFile.write("\nFILE EXTENSIONS:\n") sortedTypes = sorted(data['dirs'][folder]['types'].items(), key=getSize, reverse=True) #for fileType in data['dirs'][folder]['types']: for fileInfo in sortedTypes: fileType = fileInfo[0] folderFile.write(fileType + ": " + "{:,}".format( data['dirs'][folder]['types'][fileType]['count']) + " (" + humanize.naturalsize(data['dirs'][folder]['types'] [fileType]['size'], gnu=True) + ")\n") folderFile.close() # Include actual file sizes (for building histograms) by type and extension # Can use matplotlib's histogram fuction: # https://stackoverflow.com/questions/5328556/histogram-matplotlib#5328669 summaryFile.write("\nALL SIZES BY FILE TYPE:\n") sortedCats = sorted(data['cats'].items(), key=getSize, reverse=True) for fileInfo in sortedCats: fileCat = fileInfo[0] summaryFile.write(fileCat + ": " + " ".join( map(str, sorted(data['cats'][fileCat]['sizes'], reverse=True))) + "\n") summaryFile.write("\nALL SIZES BY FILE EXTENSION:\n") sortedTypes = sorted(data['types'].items(), key=getSize, reverse=True) for fileInfo in sortedTypes: fileType = fileInfo[0] summaryFile.write(fileType + ": " + " ".join( map(str, sorted(data['types'][fileType]['sizes'], reverse=True))) + "\n") summaryFile.close()
write_log("Data Frame: " + d.name, output) layers = arcpy.mapping.ListLayers(mxd, "", d) for lyr in layers: try: lname = lyr.name datasource = lyr.dataSource wspath = lyr.workspacePath write_log("Layer Name: " + lname, output) write_log("Data Source: " + datasource, output) strwspath = str(wspath) wspatha = wspath.replace("\\", '/') shpsizels = [] shapefiles = glob.glob(os.path.join(wspatha, "{0}*").format(lname)) for shapefile in shapefiles: strshpfile = str(shapefile) stripshp = strshpfile.replace("\\", '/') try: size = os.stat(stripshp).st_size print strshpfile + "size of file is " + str(size) shpsizels.append(str(size)) except: pass print "shp size ls has the following values: " + sum(shpsizels) except: pass print "XXXX" + str(shpsizels) total_size = sum(shpsizels) humansize = humanize.naturalsize(total_size) print " the total size for " + lname + " is " + humansize
def ds_traverse(rootds, parent=None, json=None, recursive=False, all_=False, long_=False): """Hierarchical dataset traverser Parameters ---------- rootds: Dataset Root dataset to be traversed parent: Dataset Parent dataset of the current rootds recursive: bool Recurse into subdirectories of the current dataset all_: bool Recurse into subdatasets of the root dataset Returns ------- list of dict extracts and returns a (recursive) list of dataset(s) info at path """ # extract parent info to pass to traverser fsparent = fs_extract(parent.path, parent.repo, basepath=rootds.path) if parent else None # (recursively) traverse file tree of current dataset fs = fs_traverse(rootds.path, rootds.repo, render=False, parent=fsparent, recursive=all_, json=json) size_list = [fs['size']] # (recursively) traverse each subdataset children = [] # yoh: was in return results branch returning full datasets: # for subds in rootds.subdatasets(result_xfm='datasets'): # but since rpath is needed/used, decided to return relpaths for subds_rpath in rootds.subdatasets(result_xfm='relpaths'): subds_path = opj(rootds.path, subds_rpath) subds = Dataset(subds_path) subds_json = metadata_locator(path='.', ds_path=subds_path) def handle_not_installed(): # for now just traverse as fs lgr.warning("%s is either not installed or lacks meta-data", subds) subfs = fs_extract(subds_path, rootds, basepath=rootds.path) # but add a custom type that it is a not installed subds subfs['type'] = 'uninitialized' # we need to kick it out from 'children' # TODO: this is inefficient and cruel -- "ignored" should be made # smarted to ignore submodules for the repo if fs['nodes']: fs['nodes'] = [ c for c in fs['nodes'] if c['path'] != subds_rpath ] return subfs if not subds.is_installed(): subfs = handle_not_installed() elif recursive: subfs = ds_traverse(subds, json=json, recursive=recursive, all_=all_, parent=rootds) subfs.pop('nodes', None) size_list.append(subfs['size']) # else just pick the data from metadata_file of each subdataset else: lgr.info(subds.path) if exists(subds_json): with open(subds_json) as data_file: subfs = js.load(data_file) subfs.pop('nodes', None) # remove children subfs['path'] = subds_rpath # reassign the path size_list.append(subfs['size']) else: # the same drill as if not installed lgr.warning("%s is installed but no meta-data yet", subds) subfs = handle_not_installed() children.extend([subfs]) # sum sizes of all 1st level children dataset children_size = {} for subdataset_size in size_list: for size_type, subds_size in subdataset_size.items(): children_size[size_type] = children_size.get( size_type, 0) + machinesize(subds_size) # update current dataset sizes to the humanized aggregate subdataset sizes fs['size'] = { size_type: humanize.naturalsize(size) for size_type, size in children_size.items() } fs['nodes'][0]['size'] = fs[ 'size'] # update self's updated size in nodes sublist too! # add dataset specific entries to its dict rootds_model = GitModel(rootds.repo) fs['tags'] = rootds_model.describe fs['branch'] = rootds_model.branch index_file = opj(rootds.path, '.git', 'index') fs['index-mtime'] = time.strftime( u"%Y-%m-%d %H:%M:%S", time.localtime( getmtime(index_file))) if exists(index_file) else '' # append children datasets info to current dataset fs['nodes'].extend(children) # render current dataset lgr.info('Dataset: %s' % rootds.path) fs_render(fs, json=json, ds_path=rootds.path) return fs
def size(cls, size: int) -> str: parts = humanize.naturalsize(size).split() assert len(parts) == 2 number = Format.bold(parts[0]) unit = parts[1] return Format.green(f'{number} {unit}')
def total_size(self): return humanize.naturalsize(self.max, binary=True)