def mk_table(datasets): values = [] total_weight = sum([x[1] * x[0].size() for x in datasets]) train_chars = 1.2e12 for dataset, weight in datasets: size = dataset.size() relative_weight = size * weight / total_weight values.append([ dataset.name(), size, '{:.2%}'.format(relative_weight), train_chars / size * relative_weight, humanbytes(size / dataset.num_docs()) ]) values.sort(key=lambda x: -x[1]) values.append([ '**Total**', sum([x[1] for x in values]), "", "", humanbytes( sum([x[1] for x in values]) / sum(x[0].num_docs() for x in datasets)) ]) values = [[x[0], humanbytes(x[1]), x[2], x[3], x[4]] for x in values] writer = MarkdownTableWriter() writer.table_name = "The Pile™" writer.headers = [ "Component", "Size", "Weight", "Epochs (@1.2TB)", "Mean Document Size" ] writer.value_matrix = values return writer.dumps()
def main(): # # ---> Check for 'restart' argument # arguments = utils.check_arguments(sys.argv) if ("restart" in arguments): restart_clean(db) # # ---> Catch the exit signal to commit the database with last checkpoint # signal.signal(signal.SIGINT, exit_handler) # # --> Marking files for deletion # nb, size = find_for_deletion(db) print(FMT_STR_MARKED_FILES.format(nb, utils.humanbytes(size))) # # --> Deleting/Trashing files # nb_trash, nb_fail, size = move_files(db) print(FMT_STR_TRASHED_FILES.format(nb_trash, trash, utils.humanbytes(size))) return
async def send_to_transfersh_async(file): size = os.path.getsize(file) size_of_file = humanbytes(size) final_date = get_date_in_two_weeks() file_name = os.path.basename(file) print("\nUploading file: {} (size of the file: {})".format(file_name, size_of_file)) url = 'https://transfer.sh/' with open(file, 'rb') as f: async with aiohttp.ClientSession() as session: async with session.post(url, data={str(file): f}) as response: download_link = await response.text() print("Link to download file(will be saved till {}):\n{}".format(final_date, download_link)) return download_link, final_date, size_of_file
def generateRandomSamplesDVR(generator : GroundTruthDatasetGenerator, descriptor_file : str, tfDirectory : str, # folder with transfer functions output_file : str, importance_network_path : str, numImages = 20, # then number of images to create per dataset inputMipmapLevel = 0, # mipmap level, see Voklume::getLevel() inputMipmapFilter = "average", # mipmap level, see Voklume::getLevel() save_config_file = None, # if a string, save camera positions and so on to that csv file restore_config_file = None, # if a string, restore the camera positions from that csv file downsampling_factor = 1, step_size = 0.1, opacity_scaling = [40, 80], #min+max camera_distance = [0.5, 1.0], resolution = 512, dset_name = 'gt',): SIZE = resolution# / downsampling_factor T = 10 # load importance network extra_files = torch._C.ExtraFilesMap() extra_files['settings.json'] = '' importance_network = torch.jit.load(importance_network_path, _extra_files=extra_files) importance_network.to(device=torch.device("cuda")) settingsJson = json.loads(extra_files['settings.json']) network_upscale = int(settingsJson["networkUpscale"]) post_upscale = downsampling_factor//network_upscale # int(settingsJson["postUpscale"]) print("Network loaded, network upscale:", network_upscale, ", post upscale:", post_upscale) if save_config_file is not None and restore_config_file is not None: raise ValueError("either 'save_config_file' or 'restore_config_file' can be non None, not both") with ExitStack() as stack: f = stack.enter_context(h5py.File(output_file, "a")) if save_config_file is not None: config_file = open(save_config_file, "w") config_file.write( "valid\tupX\tupY\tupZ\t" + "originStartX\toriginStartY\toriginStartZ\t" + "originEndX\toriginEndY\toriginEndZ\t" + "lookAtStartX\tlookAtStartY\tlookAtStartZ\t" + "lookAtEndX\tlookAtEndY\tlookAtEndZ\t" + "useShading\tspecularExponent\t" + "lightDirX\tlightDirY\tlightDirZ\t" + "tfIndex\topacityScaling\tvalueScaling\n") if restore_config_file is not None: config = np.loadtxt(restore_config_file, skiprows=1) if config.shape[1] != 24: raise ValueError("Config file must contain 21 columns, but there are only", config.shape[1]) print(config.shape[0], "sample configurations restored") if dset_name in f.keys(): del f[dset_name] dset = f.create_dataset( dset_name, (1, T, 10, SIZE, SIZE), dtype=np.float32, chunks = (1, 1, 10, SIZE, SIZE), maxshape = (None, T, 10, SIZE, SIZE)) dset.attrs["Mode"] = "DVR" settings = generator.getSettingsDict() # common configuration settings[GroundTruthDatasetGenerator.STEPSIZE] = step_size settings[GroundTruthDatasetGenerator.INTERPOLATION] = 1 settings[GroundTruthDatasetGenerator.TIMESTEPS] = T settings[GroundTruthDatasetGenerator.RESOLUTION] = [SIZE, SIZE] settings[GroundTruthDatasetGenerator.MIPMAP_LEVEL] = inputMipmapLevel settings[GroundTruthDatasetGenerator.RENDER_MODE] = 2 maxDist = 0.3 propOnlyCamera = 0.8 propOnlyIso = 1.0 #list all datasets dataset_info = np.genfromtxt(descriptor_file, skip_header=1, dtype=None) num_files = dataset_info.shape[0] print('Datasets:') for j in range(num_files): name = str(dataset_info[j][0].decode('ascii')) min_iso = float(dataset_info[j][1]) max_iso = float(dataset_info[j][2]) print(name," iso=[%f,%f]"%(min_iso, max_iso)) expected_filesize = SIZE * SIZE * 10 * T * num_files * numImages * 4 print("Shape: B=%d, T=%d, C=%d, H=%d, W=%d"%(num_files*numImages, T, 10, SIZE, SIZE)) print("Expected filesize:", humanbytes(expected_filesize)) sample_index = 0 sample_index2 = 0 for j in range(num_files): name = str(dataset_info[j][0].decode('ascii')) inputFile = os.path.abspath(os.path.join(os.path.dirname(descriptor_file), name)) print("Process", inputFile) if not generator.loadVolume(inputFile): print("Unable to load volume") continue if inputMipmapLevel>0: print("Create mipmap level") torch.ops.renderer.create_mipmap_level(inputMipmapLevel, inputMipmapFilter) #load transfer functions transfer_functions = glob.glob(tfDirectory + "/" + name[name.rfind('/') + 1:name.rfind('.')] + "/*.tf") if len(transfer_functions) == 0: print("There is no TF created for this volume.") exit(1) pg = ProgressBar(numImages, 'Render', length=50) i = 0 numAttempts = 0 while i < numImages: pg.print_progress_bar(i) numAttempts += 1 if numAttempts > 5 * numImages: print("Failed to sample enough images for the current volume, running out of attempts") if save_config_file is not None: for i in range(numImages - i): config_file.write( "%d\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%d\t%f\t%f\n"%( False, 0, 0, 0, 0,0,0, 0,0,0, 0,0,0, 0,0,0, 0,0, 0,0,0, 0,0, 0 )) break; if restore_config_file is None: originStart = GroundTruthDatasetGenerator.randomPointOnSphere() * \ GroundTruthDatasetGenerator.randomFloat(camera_distance[0], camera_distance[1]) lookAtStart = GroundTruthDatasetGenerator.randomPointOnSphere() * 0.1 while True: originEnd = GroundTruthDatasetGenerator.randomPointOnSphere() * \ GroundTruthDatasetGenerator.randomFloat(camera_distance[0], camera_distance[1]) if np.linalg.norm(originEnd - originStart) < maxDist: break lookAtEnd = GroundTruthDatasetGenerator.randomPointOnSphere() * 0.1 up = GroundTruthDatasetGenerator.randomPointOnSphere() settings[GroundTruthDatasetGenerator.CAM_UP] = list(up) settings[GroundTruthDatasetGenerator.CAM_ORIGIN_START] = list(originStart) settings[GroundTruthDatasetGenerator.CAM_ORIGIN_END] = list(originEnd) settings[GroundTruthDatasetGenerator.CAM_LOOKAT_START] = list(lookAtStart) settings[GroundTruthDatasetGenerator.CAM_LOOKAT_END] = list(lookAtEnd) chosen_tf_idx = np.random.randint(len(transfer_functions)) tf_json = load_tf_v2(transfer_functions[chosen_tf_idx]) settings[GroundTruthDatasetGenerator.DENSITY_AXIS_OPACITY] = tf_json["densityAxisOpacity"] settings[GroundTruthDatasetGenerator.OPACITY_AXIS] = tf_json["opacityAxis"] settings[GroundTruthDatasetGenerator.DENSITY_AXIS_COLOR] = tf_json["densityAxisColor"] settings[GroundTruthDatasetGenerator.COLOR_AXIS] = tf_json["colorAxis"] settings[GroundTruthDatasetGenerator.MIN_DENSITY] = tf_json["minDensity"] settings[GroundTruthDatasetGenerator.MAX_DENSITY] = tf_json["maxDensity"] opacity = opacity_scaling[0] + \ np.random.rand()*(opacity_scaling[1]-opacity_scaling[0]) settings[GroundTruthDatasetGenerator.OPACITY_SCALING] = opacity useShading = np.random.randint(2) specularExponent = int(2**np.random.randint(2,5)) lightDirection = np.array([ np.random.rand()*2-1, np.random.rand()*2-1, 1]) lightDirection = list(lightDirection / np.linalg.norm(lightDirection)) settings[GroundTruthDatasetGenerator.DVR_USE_SHADING] = useShading settings[GroundTruthDatasetGenerator.SPECULAR_EXPONENT] = specularExponent settings[GroundTruthDatasetGenerator.LIGHT_DIRECTION] = lightDirection settings[GroundTruthDatasetGenerator.VALUE_SCALING] = None output = generator.render(settings, importance_network, network_upscale, post_upscale) if output is not None: dset.resize(sample_index+1, axis=0) dset[sample_index,...] = output sample_index += 1 i += 1 if save_config_file is not None: valueScaling = settings[GroundTruthDatasetGenerator.VALUE_SCALING] config_file.write( "%d\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%d\t%f\t%f\n"%( True, up[0], up[1], up[2], originStart[0], originStart[1], originStart[2], originEnd[0], originEnd[1], originEnd[2], lookAtStart[0], lookAtStart[1], lookAtStart[2], lookAtEnd[0], lookAtEnd[1], lookAtEnd[2], useShading, specularExponent, lightDirection[0], lightDirection[1], lightDirection[2], chosen_tf_idx, opacity, valueScaling )) else: # use configuration from the settings file cfg = config[sample_index2,:] valid = cfg[0]; cfg = cfg[1:] if not valid: sample_index2 += 1 i += 1 continue settings[GroundTruthDatasetGenerator.CAM_UP] = [cfg[0], cfg[1], cfg[2]] settings[GroundTruthDatasetGenerator.CAM_ORIGIN_START] = [cfg[3], cfg[4], cfg[5]] settings[GroundTruthDatasetGenerator.CAM_ORIGIN_END] = [cfg[6], cfg[7], cfg[8]] settings[GroundTruthDatasetGenerator.CAM_LOOKAT_START] = [cfg[9], cfg[10], cfg[11]] settings[GroundTruthDatasetGenerator.CAM_LOOKAT_END] = [cfg[12], cfg[13], cfg[14]] settings[GroundTruthDatasetGenerator.DVR_USE_SHADING] = int(cfg[15]) settings[GroundTruthDatasetGenerator.SPECULAR_EXPONENT] = int(cfg[16]) settings[GroundTruthDatasetGenerator.LIGHT_DIRECTION] = [cfg[17], cfg[18], cfg[19]] settings[GroundTruthDatasetGenerator.OPACITY_SCALING] = int(cfg[21]) settings[GroundTruthDatasetGenerator.VALUE_SCALING] = int(cfg[22]) chosen_tf_idx = int(cfg[20]) tf_json = load_tf_v2(transfer_functions[chosen_tf_idx]) settings[GroundTruthDatasetGenerator.DENSITY_AXIS_OPACITY] = tf_json["densityAxisOpacity"] settings[GroundTruthDatasetGenerator.OPACITY_AXIS] = tf_json["opacityAxis"] settings[GroundTruthDatasetGenerator.DENSITY_AXIS_COLOR] = tf_json["densityAxisColor"] settings[GroundTruthDatasetGenerator.COLOR_AXIS] = tf_json["colorAxis"] settings[GroundTruthDatasetGenerator.MIN_DENSITY] = tf_json["minDensity"] settings[GroundTruthDatasetGenerator.MAX_DENSITY] = tf_json["maxDensity"] output = generator.render(settings, importance_network, network_upscale, post_upscale) assert output is not None dset.resize(sample_index+1, axis=0) dset[sample_index,...] = output sample_index += 1 sample_index2 += 1 i += 1 pg.print_progress_bar(numImages)
async def download_coroutine(session, url, file_name, event, start, bot): CHUNK_SIZE = 1024 * 6 # 2341 downloaded = 0 display_message = "" async with session.get(url) as response: total_length = int(response.headers["Content-Length"]) content_type = response.headers["Content-Type"] if "text" in content_type and total_length < 500: return await response.release() await event.edit( """**Initiating Download** **URL:** {} **File Name:** {} **File Size:** {}""".format( url, os.path.basename(file_name).replace("%20", " "), humanbytes(total_length), ), parse_mode="md", ) with open(file_name, "wb") as f_handle: while True: chunk = await response.content.read(CHUNK_SIZE) if not chunk: break f_handle.write(chunk) downloaded += CHUNK_SIZE now = time.time() diff = now - start if round(diff % 10.00) == 0: #downloaded == total_length: percentage = downloaded * 100 / total_length speed = downloaded / diff elapsed_time = round(diff) * 1000 time_to_completion = (round( (total_length - downloaded) / speed) * 1000) estimated_total_time = elapsed_time + time_to_completion try: if total_length < downloaded: total_length = downloaded current_message = """Downloading : {}% URL: {} File Name: {} File Size: {} Downloaded: {} ETA: {}""".format("%.2f" % (percentage), url, file_name.split("/")[-1], humanbytes(total_length), humanbytes(downloaded), time_formatter(estimated_total_time)) if (current_message != display_message and current_message != "empty"): print(current_message) await event.edit(current_message, parse_mode="html") display_message = current_message except Exception as e: print("Error", e) # logger.info(str(e)) return await response.release()
def main(): # Colorama init init() # # ---> Check for 'restart' argument # arguments = utils.check_arguments(sys.argv) if ("restart" in arguments): restart = True else: restart = False # # ---> Catch the exit signal to commit the database with last checkpoint # signal.signal(signal.SIGINT, exit_handler) # # ---> Read the directory files list # with open(filelist, "r") as f: basepath = f.readlines() #print(basepath) print("Default blocksize for this system is {} bytes.".format( io.DEFAULT_BUFFER_SIZE)) # # ---> DB connection # cnx = db_connect(db, restart) last_step, last_id = get_status(cnx) print("Last step: {}, last ID: {}".format(last_step, last_id)) next_step = False # Looking for files # --- if (last_step == None) | ((last_step == "directory_lookup") & (last_id == "in progress")): t, nb = directories_lookup(cnx, basepath) print("Files lookup duration: {:.2f} sec for {} files.".format(t, nb)) next_step = True else: print("Files lookup already done.") # Calculating pre hash (quick hash on first bytes) # --- if (next_step | ((last_step == "directory_lookup") & (last_id == "all")) | ((last_step == "filelist_pre_hash") & (last_id != "all"))): t = filelist_pre_hash(cnx, 'md5') print("Pre-hash calculation duration: {:.2f} sec. ". format(t)) next_step = True else: print("Pre-hash calculation already done.") # Calculate size of all files # --- res = cnx.execute("select sum(size) FROM filelist") size = res.fetchone()[0] print("Size of all files: {}".format(utils.humanbytes(size))) # Recomputing hashes for duplicates candidates # --- if (next_step | ((last_step == "filelist_pre_hash") & (last_id == "all")) | ((last_step == "pre_duplicates_rehash") & (last_id != "all"))): t, nb = pre_duplicates_rehash(cnx) print("Pre-duplicates rehashing duration: {:.2f} sec. for {} records.". format(t, nb)) next_step = True else: print("Pre-duplicates rehashing already done.") # Dealing with duplicates # --- if (next_step | (last_step == "pre_duplicates_rehash")): t, nb_dup, size_dup = duplicates_update(cnx) else: nb_dup, size_dup = duplicates_select(cnx) # Result summary # --- print("{} files have duplicates, total size of duplicate files is {}.". format(nb_dup, utils.humanbytes(size_dup))) # Closing database # --- cnx.close() return
raise ValueError( 'Não é possível realizar contratações de pacotes para esta linha neste momento.' ) else: bytes = 419430400 pacotes = 1 while json_response['isContratado']: try: response = client.post('contrataPacote.do', data=data, cookies=cookies) except Exception: pass finally: json_response = response.json() if json_response['isContratado']: bytes = bytes + 419430400 pacotes = pacotes + 1 json_dump = json.dumps(json_response, indent=4, sort_keys=True) print(highlight(json_dump, JsonLexer(), TerminalFormatter())) total = humanbytes(bytes) print( f'Pacotes contratados: {pacotes}\nTotal de internet: {total}\nValidade: 7 dias.' ) os.remove(cookies_path) quit()