Exemplo n.º 1
0
def mk_table(datasets):
    values = []

    total_weight = sum([x[1] * x[0].size() for x in datasets])

    train_chars = 1.2e12

    for dataset, weight in datasets:
        size = dataset.size()
        relative_weight = size * weight / total_weight
        values.append([
            dataset.name(), size, '{:.2%}'.format(relative_weight),
            train_chars / size * relative_weight,
            humanbytes(size / dataset.num_docs())
        ])

    values.sort(key=lambda x: -x[1])
    values.append([
        '**Total**',
        sum([x[1] for x in values]), "", "",
        humanbytes(
            sum([x[1] for x in values]) / sum(x[0].num_docs()
                                              for x in datasets))
    ])
    values = [[x[0], humanbytes(x[1]), x[2], x[3], x[4]] for x in values]

    writer = MarkdownTableWriter()
    writer.table_name = "The Pile™"
    writer.headers = [
        "Component", "Size", "Weight", "Epochs (@1.2TB)", "Mean Document Size"
    ]
    writer.value_matrix = values
    return writer.dumps()
Exemplo n.º 2
0
def main():

    #
    # ---> Check for 'restart' argument
    #

    arguments = utils.check_arguments(sys.argv)

    if ("restart" in arguments):
        restart_clean(db)

    #
    # ---> Catch the exit signal to commit the database with last checkpoint
    #

    signal.signal(signal.SIGINT, exit_handler)

    #
    # --> Marking files for deletion
    #

    nb, size = find_for_deletion(db)

    print(FMT_STR_MARKED_FILES.format(nb, utils.humanbytes(size)))

    #
    # --> Deleting/Trashing files
    #

    nb_trash, nb_fail, size = move_files(db)

    print(FMT_STR_TRASHED_FILES.format(nb_trash, trash,
                                       utils.humanbytes(size)))

    return
Exemplo n.º 3
0
async def send_to_transfersh_async(file):
    
    size = os.path.getsize(file)
    size_of_file = humanbytes(size)
    final_date = get_date_in_two_weeks()
    file_name = os.path.basename(file)

    print("\nUploading file: {} (size of the file: {})".format(file_name, size_of_file))
    url = 'https://transfer.sh/'
    
    with open(file, 'rb') as f:
            async with aiohttp.ClientSession() as session:
                async with session.post(url, data={str(file): f}) as response:
                    download_link =  await response.text()
                    
    print("Link to download file(will be saved till {}):\n{}".format(final_date, download_link))
    return download_link, final_date, size_of_file
Exemplo n.º 4
0
def generateRandomSamplesDVR(generator : GroundTruthDatasetGenerator, 
                          descriptor_file : str,
                          tfDirectory : str, # folder with transfer functions
                          output_file : str,
                          importance_network_path : str,
                          numImages = 20, # then number of images to create per dataset
                          inputMipmapLevel = 0, # mipmap level, see Voklume::getLevel()
                          inputMipmapFilter = "average", # mipmap level, see Voklume::getLevel()
                          save_config_file = None, # if a string, save camera positions and so on to that csv file
                          restore_config_file = None, # if a string, restore the camera positions from that csv file
                          downsampling_factor = 1,
                          step_size = 0.1,
                          opacity_scaling = [40, 80],  #min+max
                          camera_distance = [0.5, 1.0],
                          resolution = 512,
                          dset_name = 'gt',):
    SIZE = resolution# / downsampling_factor
    T = 10

    # load importance network
    extra_files = torch._C.ExtraFilesMap()
    extra_files['settings.json'] = ''
    importance_network = torch.jit.load(importance_network_path, _extra_files=extra_files)
    importance_network.to(device=torch.device("cuda"))
    settingsJson = json.loads(extra_files['settings.json'])
    network_upscale = int(settingsJson["networkUpscale"])
    post_upscale = downsampling_factor//network_upscale # int(settingsJson["postUpscale"])
    print("Network loaded, network upscale:", network_upscale, ", post upscale:", post_upscale)

    if save_config_file is not None and restore_config_file is not None:
        raise ValueError("either 'save_config_file' or 'restore_config_file' can be non None, not both")

    with ExitStack() as stack:
        f = stack.enter_context(h5py.File(output_file, "a"))
        if save_config_file is not None:
            config_file = open(save_config_file, "w")
            config_file.write(
                "valid\tupX\tupY\tupZ\t" +
                "originStartX\toriginStartY\toriginStartZ\t" +
                "originEndX\toriginEndY\toriginEndZ\t" +
                "lookAtStartX\tlookAtStartY\tlookAtStartZ\t" + 
                "lookAtEndX\tlookAtEndY\tlookAtEndZ\t" + 
                "useShading\tspecularExponent\t" + 
                "lightDirX\tlightDirY\tlightDirZ\t" + 
                "tfIndex\topacityScaling\tvalueScaling\n")
        if restore_config_file is not None:
            config = np.loadtxt(restore_config_file, skiprows=1)
            if config.shape[1] != 24:
                raise ValueError("Config file must contain 21 columns, but there are only", config.shape[1])
            print(config.shape[0], "sample configurations restored")

        if dset_name in f.keys():
            del f[dset_name]
        dset = f.create_dataset(
            dset_name,
            (1, T, 10, SIZE, SIZE),
            dtype=np.float32,
            chunks = (1, 1, 10, SIZE, SIZE),
            maxshape = (None, T, 10, SIZE, SIZE))
        dset.attrs["Mode"] = "DVR"

        settings = generator.getSettingsDict()

        # common configuration
        settings[GroundTruthDatasetGenerator.STEPSIZE] = step_size
        settings[GroundTruthDatasetGenerator.INTERPOLATION] = 1
        settings[GroundTruthDatasetGenerator.TIMESTEPS] = T
        settings[GroundTruthDatasetGenerator.RESOLUTION] = [SIZE, SIZE]
        settings[GroundTruthDatasetGenerator.MIPMAP_LEVEL] = inputMipmapLevel
        settings[GroundTruthDatasetGenerator.RENDER_MODE] = 2
        maxDist = 0.3

        propOnlyCamera = 0.8
        propOnlyIso = 1.0

        #list all datasets
        dataset_info = np.genfromtxt(descriptor_file, skip_header=1, dtype=None)
        num_files = dataset_info.shape[0]
        print('Datasets:')
        for j in range(num_files):
            name = str(dataset_info[j][0].decode('ascii'))
            min_iso = float(dataset_info[j][1])
            max_iso = float(dataset_info[j][2])
            print(name,"  iso=[%f,%f]"%(min_iso, max_iso))

        expected_filesize = SIZE * SIZE * 10 * T * num_files * numImages * 4
        print("Shape: B=%d, T=%d, C=%d, H=%d, W=%d"%(num_files*numImages, T, 10, SIZE, SIZE))
        print("Expected filesize:", humanbytes(expected_filesize))

        sample_index = 0
        sample_index2 = 0
        for j in range(num_files):
            name = str(dataset_info[j][0].decode('ascii'))

            inputFile = os.path.abspath(os.path.join(os.path.dirname(descriptor_file), name))
            print("Process", inputFile)
            if not generator.loadVolume(inputFile):
                print("Unable to load volume")
                continue
            if inputMipmapLevel>0:
                print("Create mipmap level")
                torch.ops.renderer.create_mipmap_level(inputMipmapLevel, inputMipmapFilter)
                
            #load transfer functions
            transfer_functions = glob.glob(tfDirectory + "/" + name[name.rfind('/') + 1:name.rfind('.')] + "/*.tf")
        
            if len(transfer_functions) == 0:
                print("There is no TF created for this volume.")
                exit(1)

            pg = ProgressBar(numImages, 'Render', length=50)
            i = 0
            numAttempts = 0
            while i < numImages:
                pg.print_progress_bar(i)
                numAttempts += 1
                if numAttempts > 5 * numImages:
                    print("Failed to sample enough images for the current volume, running out of attempts")
                    if save_config_file is not None:
                        for i in range(numImages - i):
                            config_file.write(
                                "%d\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%d\t%f\t%f\n"%(
                                    False, 0, 0, 0,
                                    0,0,0, 0,0,0, 0,0,0, 0,0,0,
                                    0,0, 0,0,0, 0,0, 0
                                    ))
                    break;

                if restore_config_file is None:
                    originStart = GroundTruthDatasetGenerator.randomPointOnSphere() * \
                        GroundTruthDatasetGenerator.randomFloat(camera_distance[0], camera_distance[1])
                    lookAtStart = GroundTruthDatasetGenerator.randomPointOnSphere() * 0.1
                    while True:
                        originEnd = GroundTruthDatasetGenerator.randomPointOnSphere() * \
                            GroundTruthDatasetGenerator.randomFloat(camera_distance[0], camera_distance[1])
                        if np.linalg.norm(originEnd - originStart) < maxDist:
                            break
                    lookAtEnd = GroundTruthDatasetGenerator.randomPointOnSphere() * 0.1
                    up = GroundTruthDatasetGenerator.randomPointOnSphere()

                    settings[GroundTruthDatasetGenerator.CAM_UP] = list(up)
                    settings[GroundTruthDatasetGenerator.CAM_ORIGIN_START] = list(originStart)
                    settings[GroundTruthDatasetGenerator.CAM_ORIGIN_END] = list(originEnd)
                    settings[GroundTruthDatasetGenerator.CAM_LOOKAT_START] = list(lookAtStart)
                    settings[GroundTruthDatasetGenerator.CAM_LOOKAT_END] = list(lookAtEnd)

                    chosen_tf_idx = np.random.randint(len(transfer_functions))
                    tf_json = load_tf_v2(transfer_functions[chosen_tf_idx])
                    settings[GroundTruthDatasetGenerator.DENSITY_AXIS_OPACITY] = tf_json["densityAxisOpacity"]
                    settings[GroundTruthDatasetGenerator.OPACITY_AXIS] = tf_json["opacityAxis"]
                    settings[GroundTruthDatasetGenerator.DENSITY_AXIS_COLOR] = tf_json["densityAxisColor"]
                    settings[GroundTruthDatasetGenerator.COLOR_AXIS] = tf_json["colorAxis"]
                    settings[GroundTruthDatasetGenerator.MIN_DENSITY] = tf_json["minDensity"]
                    settings[GroundTruthDatasetGenerator.MAX_DENSITY] = tf_json["maxDensity"]
                    
                    opacity = opacity_scaling[0] + \
                        np.random.rand()*(opacity_scaling[1]-opacity_scaling[0])
                    settings[GroundTruthDatasetGenerator.OPACITY_SCALING] = opacity

                    useShading = np.random.randint(2)
                    specularExponent = int(2**np.random.randint(2,5))
                    lightDirection = np.array([
                        np.random.rand()*2-1,
                        np.random.rand()*2-1,
                        1])
                    lightDirection = list(lightDirection / np.linalg.norm(lightDirection))
                    settings[GroundTruthDatasetGenerator.DVR_USE_SHADING] = useShading
                    settings[GroundTruthDatasetGenerator.SPECULAR_EXPONENT] = specularExponent
                    settings[GroundTruthDatasetGenerator.LIGHT_DIRECTION] = lightDirection
                    settings[GroundTruthDatasetGenerator.VALUE_SCALING] = None

                    output = generator.render(settings, importance_network, network_upscale, post_upscale)
                    if output is not None:
                        dset.resize(sample_index+1, axis=0)
                        dset[sample_index,...] = output
                        sample_index += 1
                        i += 1

                        if save_config_file is not None:
                            valueScaling = settings[GroundTruthDatasetGenerator.VALUE_SCALING]
                            config_file.write(
                                "%d\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%d\t%f\t%f\n"%(
                                    True, up[0], up[1], up[2],
                                    originStart[0], originStart[1], originStart[2],
                                    originEnd[0], originEnd[1], originEnd[2],
                                    lookAtStart[0], lookAtStart[1], lookAtStart[2],
                                    lookAtEnd[0], lookAtEnd[1], lookAtEnd[2],
                                    useShading, specularExponent,
                                    lightDirection[0], lightDirection[1], lightDirection[2],
                                    chosen_tf_idx, opacity, valueScaling
                                    ))

                else:
                    # use configuration from the settings file
                    cfg = config[sample_index2,:]
                    valid = cfg[0]; cfg = cfg[1:]
                    if not valid:
                        sample_index2 += 1
                        i += 1
                        continue
                    settings[GroundTruthDatasetGenerator.CAM_UP] = [cfg[0], cfg[1], cfg[2]]
                    settings[GroundTruthDatasetGenerator.CAM_ORIGIN_START] = [cfg[3], cfg[4], cfg[5]]
                    settings[GroundTruthDatasetGenerator.CAM_ORIGIN_END] = [cfg[6], cfg[7], cfg[8]]
                    settings[GroundTruthDatasetGenerator.CAM_LOOKAT_START] = [cfg[9], cfg[10], cfg[11]]
                    settings[GroundTruthDatasetGenerator.CAM_LOOKAT_END] = [cfg[12], cfg[13], cfg[14]]
                    settings[GroundTruthDatasetGenerator.DVR_USE_SHADING] = int(cfg[15])
                    settings[GroundTruthDatasetGenerator.SPECULAR_EXPONENT] = int(cfg[16])
                    settings[GroundTruthDatasetGenerator.LIGHT_DIRECTION] = [cfg[17], cfg[18], cfg[19]]
                    settings[GroundTruthDatasetGenerator.OPACITY_SCALING] = int(cfg[21])
                    settings[GroundTruthDatasetGenerator.VALUE_SCALING] = int(cfg[22])

                    chosen_tf_idx = int(cfg[20])
                    tf_json = load_tf_v2(transfer_functions[chosen_tf_idx])
                    settings[GroundTruthDatasetGenerator.DENSITY_AXIS_OPACITY] = tf_json["densityAxisOpacity"]
                    settings[GroundTruthDatasetGenerator.OPACITY_AXIS] = tf_json["opacityAxis"]
                    settings[GroundTruthDatasetGenerator.DENSITY_AXIS_COLOR] = tf_json["densityAxisColor"]
                    settings[GroundTruthDatasetGenerator.COLOR_AXIS] = tf_json["colorAxis"]
                    settings[GroundTruthDatasetGenerator.MIN_DENSITY] = tf_json["minDensity"]
                    settings[GroundTruthDatasetGenerator.MAX_DENSITY] = tf_json["maxDensity"]

                    output = generator.render(settings, importance_network, network_upscale, post_upscale)
                    assert output is not None
                    dset.resize(sample_index+1, axis=0)
                    dset[sample_index,...] = output
                    sample_index += 1
                    sample_index2 += 1
                    i += 1

            pg.print_progress_bar(numImages)
async def download_coroutine(session, url, file_name, event, start, bot):

    CHUNK_SIZE = 1024 * 6  # 2341
    downloaded = 0
    display_message = ""
    async with session.get(url) as response:
        total_length = int(response.headers["Content-Length"])
        content_type = response.headers["Content-Type"]
        if "text" in content_type and total_length < 500:
            return await response.release()
        await event.edit(
            """**Initiating Download**

**URL:** {}

**File Name:** {}

**File Size:** {}""".format(
                url,
                os.path.basename(file_name).replace("%20", " "),
                humanbytes(total_length),
            ),
            parse_mode="md",
        )
        with open(file_name, "wb") as f_handle:
            while True:
                chunk = await response.content.read(CHUNK_SIZE)
                if not chunk:
                    break
                f_handle.write(chunk)
                downloaded += CHUNK_SIZE
                now = time.time()
                diff = now - start
                if round(diff % 10.00) == 0:  #downloaded == total_length:
                    percentage = downloaded * 100 / total_length
                    speed = downloaded / diff
                    elapsed_time = round(diff) * 1000
                    time_to_completion = (round(
                        (total_length - downloaded) / speed) * 1000)
                    estimated_total_time = elapsed_time + time_to_completion
                    try:
                        if total_length < downloaded:
                            total_length = downloaded
                        current_message = """Downloading : {}%

URL: {}

File Name: {}

File Size: {}
Downloaded: {}
ETA: {}""".format("%.2f" % (percentage), url,
                        file_name.split("/")[-1], humanbytes(total_length),
                        humanbytes(downloaded), time_formatter(estimated_total_time))
                        if (current_message != display_message
                                and current_message != "empty"):
                            print(current_message)
                            await event.edit(current_message,
                                             parse_mode="html")

                            display_message = current_message
                    except Exception as e:
                        print("Error", e)
                        # logger.info(str(e))
        return await response.release()
Exemplo n.º 6
0
def main():

    # Colorama init

    init()

    #
    # ---> Check for 'restart' argument
    #

    arguments = utils.check_arguments(sys.argv)

    if ("restart" in arguments):
        restart = True
    else:
        restart = False

    #
    # ---> Catch the exit signal to commit the database with last checkpoint
    #

    signal.signal(signal.SIGINT, exit_handler)

    #
    # ---> Read the directory files list
    #

    with open(filelist, "r") as f:
        basepath = f.readlines()

    #print(basepath)
    print("Default blocksize for this system is {} bytes.".format(
        io.DEFAULT_BUFFER_SIZE))

    #
    # ---> DB connection
    #

    cnx = db_connect(db, restart)

    last_step, last_id = get_status(cnx)
    print("Last step: {}, last ID: {}".format(last_step, last_id))
    next_step = False

    # Looking for files
    # ---

    if (last_step == None) | ((last_step == "directory_lookup") &
                              (last_id == "in progress")):

        t, nb = directories_lookup(cnx, basepath)
        print("Files lookup duration: {:.2f} sec for {} files.".format(t, nb))
        next_step = True

    else:

        print("Files lookup already done.")

    # Calculating pre hash (quick hash on first bytes)
    # ---

    if (next_step | ((last_step == "directory_lookup") & (last_id == "all")) |
        ((last_step == "filelist_pre_hash") & (last_id != "all"))):

        t = filelist_pre_hash(cnx, 'md5')
        print("Pre-hash calculation duration: {:.2f} sec.                  ".
              format(t))
        next_step = True

    else:

        print("Pre-hash calculation already done.")

    # Calculate size of all files
    # ---

    res = cnx.execute("select sum(size) FROM filelist")
    size = res.fetchone()[0]

    print("Size of all files: {}".format(utils.humanbytes(size)))

    # Recomputing hashes for duplicates candidates
    # ---

    if (next_step | ((last_step == "filelist_pre_hash") & (last_id == "all")) |
        ((last_step == "pre_duplicates_rehash") & (last_id != "all"))):

        t, nb = pre_duplicates_rehash(cnx)
        print("Pre-duplicates rehashing duration: {:.2f} sec. for {} records.".
              format(t, nb))
        next_step = True

    else:

        print("Pre-duplicates rehashing already done.")

    # Dealing with duplicates
    # ---

    if (next_step | (last_step == "pre_duplicates_rehash")):

        t, nb_dup, size_dup = duplicates_update(cnx)

    else:

        nb_dup, size_dup = duplicates_select(cnx)

    # Result summary
    # ---
    print("{} files have duplicates, total size of duplicate files is {}.".
          format(nb_dup, utils.humanbytes(size_dup)))

    # Closing database
    # ---

    cnx.close()

    return
Exemplo n.º 7
0
    raise ValueError(
        'Não é possível realizar contratações de pacotes para esta linha neste momento.'
    )
else:
    bytes = 419430400
    pacotes = 1

while json_response['isContratado']:

    try:
        response = client.post('contrataPacote.do', data=data, cookies=cookies)
    except Exception:
        pass
    finally:
        json_response = response.json()
        if json_response['isContratado']:
            bytes = bytes + 419430400
            pacotes = pacotes + 1
            json_dump = json.dumps(json_response, indent=4, sort_keys=True)
            print(highlight(json_dump, JsonLexer(), TerminalFormatter()))

total = humanbytes(bytes)

print(
    f'Pacotes contratados: {pacotes}\nTotal de internet: {total}\nValidade: 7 dias.'
)

os.remove(cookies_path)

quit()