async def retrieve_data():
    print("retrieving data...")
    PORTS = node_ports.ports()

    inputs_path = Path("~/home").expanduser()
    inputs_path.mkdir(exist_ok=True)

    values = {}
    for node_input in PORTS.inputs:
        if not node_input or node_input.value is None:
            continue
        print("getting data from port '{}' with value '{}'...".format(
            node_input.key, node_input.value))
        value = await node_input.get()
        values[node_input.key] = {"type": node_input.type, "value": value}

        if "data:" in node_input.type:
            dest = inputs_path / node_input.key
            dest.mkdir(exist_ok=True, parents=True)
            dest = dest / Path(value).name
            shutil.move(value, dest)
            values[node_input.key] = {
                "type": node_input.type,
                "value": str(dest)
            }

    values_file = inputs_path / "values.json"
    with values_file.open('w') as fp:
        json.dump(values, fp)
示例#2
0
async def upload_data():
    logger.info("uploading data to simcore...")
    PORTS = node_ports.ports()
    outputs_path = Path(_OUTPUTS_FOLDER).expanduser()
    for port in PORTS.outputs:
        logger.debug("uploading data to port '%s' with value '%s'...", port.key, port.value)
        if _FILE_TYPE_PREFIX in port.type:
            src_folder = outputs_path / port.key
            list_files = list(src_folder.glob("*"))
            if len(list_files) == 1:
                # special case, direct upload
                await port.set(list_files[0])
                continue
            # generic case let's create an archive
            if len(list_files) > 1:
                temp_file = tempfile.NamedTemporaryFile(suffix=".tgz")
                temp_file.close()
                for _file in list_files:
                    with tarfile.open(temp_file.name, mode='w:gz') as tar_ptr:
                        for file_path in list_files:
                            tar_ptr.add(str(file_path), arcname=file_path.name, recursive=False)
                try:
                    await port.set(temp_file.name)
                finally:
                    #clean up
                    Path(temp_file.name).unlink()
        else:
            values_file = outputs_path / _KEY_VALUE_FILE_NAME
            if values_file.exists():
                values = json.loads(values_file.read_text())
                if port.key in values and values[port.key] is not None:
                    await port.set(values[port.key])

    logger.info("all data uploaded to simcore")
示例#3
0
async def test_port_file_accessors(special_configuration, storage,
                                   filemanager_cfg, s3_simcore_location,
                                   bucket, item_type, item_value, item_pytype,
                                   config_value):  # pylint: disable=W0613, W0621
    config_dict, project_id, node_uuid = special_configuration(
        inputs=[("in_1", item_type, config_value)],
        outputs=[("out_34", item_type, None)])
    PORTS = node_ports.ports()
    check_config_valid(PORTS, config_dict)
    assert await PORTS.outputs["out_34"].get() is None  # check emptyness
    # with pytest.raises(exceptions.S3InvalidPathError, message="Expecting S3InvalidPathError"):
    #     await PORTS.inputs["in_1"].get()

    # this triggers an upload to S3 + configuration change
    await PORTS.outputs["out_34"].set(item_value)
    # this is the link to S3 storage
    assert PORTS.outputs["out_34"].value == {
        "store":
        s3_simcore_location,
        "path":
        Path(str(project_id), str(node_uuid),
             Path(item_value).name).as_posix()
    }
    # this triggers a download from S3 to a location in /tempdir/simcorefiles/item_key
    assert isinstance(await PORTS.outputs["out_34"].get(), item_pytype)
    assert (await PORTS.outputs["out_34"].get()).exists()
    assert str(await PORTS.outputs["out_34"].get()).startswith(
        str(Path(tempfile.gettempdir(), "simcorefiles", "out_34")))
    filecmp.clear_cache()
    assert filecmp.cmp(item_value, await PORTS.outputs["out_34"].get())
示例#4
0
async def retrieve_data():
    # get all files in the local system and copy them to the input folder
    start_time = time.time()
    PORTS = node_ports.ports()
    download_tasks = []
    for node_input in PORTS.inputs:
        if not node_input or node_input.value is None:
            continue

        # collect coroutines
        download_tasks.append(node_input.get())
    if download_tasks:
        downloaded_files = await asyncio.gather(*download_tasks)
        print("downloaded {} files /tmp <br>".format(len(download_tasks)))
        for local_path in downloaded_files:
            if local_path is None:
                continue
            # log.debug("Completed download of %s in local path %s", node_input.value, local_path)
            if local_path.exists():
                if zipfile.is_zipfile(str(local_path)):
                    zip_ref = zipfile.ZipFile(str(local_path), 'r')
                    zip_ref.extractall(str(_INPUT_PATH))
                    zip_ref.close()
                    log.debug("Unzipped")
                    print("unzipped {file} to {path}<br>".format(file=str(local_path), path=str(_INPUT_PATH)))
                else:
                    log.debug("Start moving %s to input path %s", local_path, _INPUT_PATH)
                    shutil.move(str(local_path), str(_INPUT_PATH / local_path.name))
                    log.debug("Move completed")
                    print("moved {file} to {path}<br>".format(file=str(local_path), path=str(_INPUT_PATH)))
            end_time = time.time()
        print("time to download: {} seconds".format(end_time - start_time))
示例#5
0
    def _process_task_output(self):
        # pylint: disable=too-many-branches

        """ There will be some files in the /output

                - Maybe a output.json (should contain key value for simple things)
                - other files: should be named by the key in the output port

            Files will be pushed to S3 with reference in db. output.json will be parsed
            and the db updated
        """
        PORTS = node_ports.ports()
        directory = self._executor.out_dir
        if not os.path.exists(directory):
            return
        try:
            for root, _dirs, files in os.walk(directory):
                for name in files:
                    filepath = os.path.join(root, name)
                    # the name should match what is in the db!
                    if name == 'output.json':
                        log.debug("POSTRO FOUND output.json")
                        # parse and compare/update with the tasks output ports from db
                        output_ports = dict()
                        with open(filepath) as f:
                            output_ports = json.load(f)
                            task_outputs = PORTS.outputs
                            for to in task_outputs:
                                if to.key in output_ports.keys():
                                    wrap_async_call(to.set(output_ports[to.key]))
                    else:
                        wrap_async_call(PORTS.set_file_by_keymap(Path(filepath)))

        except (OSError, IOError) as _e:
            logging.exception("Could not process output")
示例#6
0
def test_adding_new_ports(special_configuration, session):
    config_dict, project_id, node_uuid = special_configuration()
    PORTS = node_ports.ports()
    check_config_valid(PORTS, config_dict)
    # check empty configuration
    assert not PORTS.inputs
    assert not PORTS.outputs

    # replace the configuration now, add an input
    config_dict["schema"]["inputs"].update({
        "in_15": {
            "label": "additional data",
            "description": "here some additional data",
            "displayOrder": 2,
            "type": "integer"
        }
    })
    config_dict["inputs"].update({"in_15": 15})
    helpers.update_configuration(session, project_id, node_uuid, config_dict)  #pylint: disable=E1101
    check_config_valid(PORTS, config_dict)

    # # replace the configuration now, add an output
    config_dict["schema"]["outputs"].update({
        "out_15": {
            "label": "output data",
            "description": "a cool output",
            "displayOrder": 2,
            "type": "boolean"
        }
    })
    helpers.update_configuration(session, project_id, node_uuid, config_dict)  #pylint: disable=E1101
    check_config_valid(PORTS, config_dict)
示例#7
0
    def _process_task_inputs(self):
        """ Writes input key-value pairs into a dictionary

            if the value of any port starts with 'link.' the corresponding
            output ports a fetched or files dowloaded --> @ jsonld

            The dictionary is dumped to input.json, files are dumped
            as port['key']. Both end up in /input/ of the container
        """
        log.debug('Input parsing for %s and node %s from container', self._task.project_id, self._task.internal_id)

        input_ports = dict()
        PORTS = node_ports.ports()
        for port in PORTS.inputs:
            log.debug(port)
            self._process_task_input(port, input_ports)

        log.debug('DUMPING json')
        #dump json file
        if input_ports:
            file_name = os.path.join(self._executor.in_dir, 'input.json')
            with open(file_name, 'w') as f:
                json.dump(input_ports, f)

        log.debug('DUMPING DONE')
示例#8
0
async def upload_data():
    logger.info("uploading data to simcore...")
    PORTS = node_ports.ports()
    outputs_path = Path(_OUTPUTS_FOLDER).expanduser()
    for port in PORTS.outputs:
        logger.debug("uploading data to port '%s' with value '%s'...",
                     port.key, port.value)
        src_folder = outputs_path / port.key
        list_files = list(src_folder.glob("*"))
        if len(list_files) == 1:
            # special case, direct upload
            await port.set(list_files[0])
            continue
        # generic case let's create an archive
        if len(list_files) > 1:
            temp_file = tempfile.NamedTemporaryFile(suffix=".tgz")
            temp_file.close()
            for _file in list_files:
                with tarfile.open(temp_file.name, mode='w:gz') as tar_ptr:
                    for file_path in list_files:
                        tar_ptr.add(file_path,
                                    arcname=file_path.name,
                                    recursive=False)
            try:
                await port.set(temp_file.name)
            finally:
                #clean up
                Path(temp_file.name).unlink()

    logger.info("all data uploaded to simcore")
示例#9
0
async def download_data():
    logger.info("retrieving data from simcore...")
    print("retrieving data from simcore...")

    # get all files in the local system and copy them to the input folder
    PORTS = node_ports.ports()
    for port in PORTS.inputs:
        if not port or port.value is None:
            continue

        local_path = await port.get()
        dest_path = _INPUTS_FOLDER / port.key
        dest_path.mkdir(exist_ok=True, parents=True)

        # clean up destination directory
        for path in dest_path.iterdir():
            if path.is_file():
                path.unlink()
            elif path.is_dir():
                shutil.rmtree(path)
        # check if local_path is a compressed file
        if tarfile.is_tarfile(local_path):
            with tarfile.open(local_path) as tar_file:
                tar_file.extractall(dest_path,
                                    members=_no_relative_path_tar(tar_file))
        elif zipfile.is_zipfile(local_path):
            with zipfile.ZipFile(local_path) as zip_file:
                zip_file.extractall(dest_path,
                                    members=_no_relative_path_zip(zip_file))
        else:
            dest_path_name = _INPUTS_FOLDER / (port.key + ":" +
                                               Path(local_path).name)
            shutil.move(local_path, dest_path_name)
            shutil.rmtree(Path(local_path).parents[0])
示例#10
0
def pandas_dataframe_to_output_data(data_frame, title, header=False, port_number=0):
    title = title.replace(" ", "_") + ".csv"
    dummy_file_path = Path(title)
    data_frame.to_csv(dummy_file_path, sep=',', header=header, index=False, encoding='utf-8')

    ports = node_ports.ports()
    task = ports.outputs[port_number].set(dummy_file_path)
    asyncio.get_event_loop().run_until_complete( task )
示例#11
0
async def retrieve_data(ports: List[str], cache: Dict) -> int:
    # get all files in the local system and copy them to the input folder
    start_time = time.clock()
    PORTS = node_ports.ports()
    download_tasks = []
    for node_input in PORTS.inputs:
        # if ports contains some keys only download them
        log.info("Checking node %s", node_input.key)
        if ports and node_input.key not in ports:
            continue
        # delete the corresponding file(s) if applicable
        if node_input.key in cache:
            log.info("Deleting files from %s: %s", node_input.key,
                     cache[node_input.key])
            for file_path in cache[node_input.key]:
                Path(file_path).unlink()
            del cache[node_input.key]
        if not node_input or node_input.value is None:
            continue
        # collect coroutines
        download_tasks.append(task(node_input.key, node_input.get))
    log.info("retrieving %s data", len(download_tasks))

    transfer_bytes = 0
    if download_tasks:
        download_results = await asyncio.gather(*download_tasks)
        log.info(
            "completed download, extracting/moving data to final folder...")
        for node_key, local_path in download_results:
            if local_path is None:
                continue

            if not local_path.exists():
                continue
            transfer_bytes = transfer_bytes + local_path.stat().st_size
            if zipfile.is_zipfile(str(local_path)):
                log.info("extracting %s to %s", local_path, input_path())
                zip_ref = zipfile.ZipFile(str(local_path), 'r')
                zip_ref.extractall(str(input_path()))
                cache[node_key] = \
                    [str(input_path() / zipped_file) for zipped_file in zip_ref.namelist()]
                zip_ref.close()
                log.info("extraction completed")
            else:
                log.info("moving %s to input path %s", local_path,
                         input_path())
                dest_path = input_path() / local_path.name
                shutil.move(str(local_path), str(dest_path))
                cache[node_key] = [str(dest_path)]
                log.info("move completed")
        end_time = time.clock()
        log.info("retrieval complete: took %.2fseconds for %s bytes",
                 end_time - start_time, transfer_bytes)
    return transfer_bytes
示例#12
0
async def test_get_value_from_previous_node(special_2nodes_configuration,
                                            node_link, item_type, item_value,
                                            item_pytype):
    config_dict, _, _ = special_2nodes_configuration(
        prev_node_outputs=[("output_123", item_type, item_value)],
        inputs=[("in_15", item_type, node_link("output_123"))])
    PORTS = node_ports.ports()

    check_config_valid(PORTS, config_dict)
    input_value = await PORTS.inputs["in_15"].get()
    assert isinstance(input_value, item_pytype)
    assert await PORTS.inputs["in_15"].get() == item_value
async def upload_data(port_keys: List[str]) -> int:  #pylint: disable=too-many-branches
    logger.info("uploading data to simcore...")
    start_time = time.perf_counter()
    PORTS = node_ports.ports()
    outputs_path = Path(_OUTPUTS_FOLDER).expanduser()

    # let's gather the tasks
    temp_files = []
    upload_tasks = []
    transfer_bytes = 0
    for port in PORTS.outputs:
        logger.info("Checking port %s", port.key)
        if port_keys and port.key not in port_keys:
            continue
        logger.debug("uploading data to port '%s' with value '%s'...",
                     port.key, port.value)
        if _FILE_TYPE_PREFIX in port.type:
            src_folder = outputs_path / port.key
            list_files = list(src_folder.glob("*"))
            if len(list_files) == 1:
                # special case, direct upload
                upload_tasks.append(set_time_wrapped(port, list_files[0]))
                continue
            # generic case let's create an archive
            if len(list_files) > 1:
                temp_file = tempfile.NamedTemporaryFile(suffix=".zip")
                temp_file.close()
                with zipfile.ZipFile(temp_file.name, mode="w") as zip_ptr:
                    for file_path in list_files:
                        zip_ptr.write(str(file_path), arcname=file_path.name)

                temp_files.append(temp_file.name)
                upload_tasks.append(set_time_wrapped(port, temp_file.name))
        else:
            data_file = outputs_path / _KEY_VALUE_FILE_NAME
            if data_file.exists():
                data = json.loads(data_file.read_text())
                if port.key in data and data[port.key] is not None:
                    upload_tasks.append(set_time_wrapped(port, data[port.key]))
    if upload_tasks:
        try:
            results = await asyncio.gather(*upload_tasks)
            transfer_bytes = sum(results)
        finally:
            # clean up possible compressed files
            for file_path in temp_files:
                Path(file_path).unlink()

    stop_time = time.perf_counter()
    logger.info("all data uploaded to simcore in %sseconds",
                stop_time - start_time)
    return transfer_bytes
示例#14
0
async def pandas_dataframe_to_csv(data_frame,
                                  title,
                                  header=False,
                                  port_number=0):
    title = title.replace(" ", "_") + ".csv"
    dummy_file_path = Path(title)
    data_frame.to_csv(dummy_file_path,
                      sep=',',
                      header=header,
                      index=False,
                      encoding='utf-8')

    PORTS = node_ports.ports()
    await PORTS.outputs[port_number].set(dummy_file_path)
async def _initialise_platform(port_configuration_path: Path, file_generator,
                               delete_file):

    with port_configuration_path.open() as file_pointer:
        configuration = json.load(file_pointer)

    if not all(k in configuration for k in ("schema", "inputs", "outputs")):
        raise Exception("invalid port configuration in {}, {}!".format(
            str(port_configuration_path), configuration))

    # init s3 to ensure we have a bucket
    init_s3()
    # set up db
    db = init_db()

    # create a new pipeline
    new_Pipeline = ComputationalPipeline(project_id=str(uuid.uuid4()))
    db.session.add(new_Pipeline)
    db.session.commit()

    # create a new node
    node_uuid = str(uuid.uuid4())
    # now create the node in the db with links to S3
    new_Node = ComputationalTask(project_id=new_Pipeline.project_id,
                                 node_id=node_uuid,
                                 schema=configuration["schema"],
                                 inputs=configuration["inputs"],
                                 outputs=configuration["outputs"])
    db.session.add(new_Node)
    db.session.commit()

    # set up node_ports
    node_ports.node_config.NODE_UUID = node_uuid
    PORTS = node_ports.ports()
    # push the file to the S3 for each input item
    file_index = 0
    for key, input_item in configuration["schema"]["inputs"].items():
        if str(input_item["type"]).startswith("data:"):
            file_to_upload = file_generator(file_index, input_item["type"])
            if file_to_upload is not None:
                # upload to S3
                await PORTS.inputs[key].set(Path(file_to_upload))
                file_index += 1
                if delete_file:
                    Path(file_to_upload).unlink()

    # print the node uuid so that it can be set as env variable from outside
    print("{pipelineid},{nodeuuid}".format(pipelineid=str(new_Node.project_id),
                                           nodeuuid=node_uuid))
示例#16
0
async def retrieve_data():
    print("retrieving data...")
    PORTS = node_ports.ports()

    values = {}
    for node_input in PORTS.inputs:
        if not node_input or node_input.value is None:
            continue
        print("getting data from port '{}' with value '{}'...".format(
            node_input.key, node_input.value))
        value = await node_input.get()
        values[node_input.key] = {"type": node_input.type, "value": value}

    print("json={}".format(json.dumps(values)))
    sys.stdout.flush()
示例#17
0
def test_invalid_ports(special_configuration):
    config_dict, _, _ = special_configuration()
    PORTS = node_ports.ports()
    check_config_valid(PORTS, config_dict)

    assert not PORTS.inputs
    assert not PORTS.outputs

    with pytest.raises(exceptions.UnboundPortError,
                       message="Expecting UnboundPortError"):
        PORTS.inputs[0]

    with pytest.raises(exceptions.UnboundPortError,
                       message="Expecting UnboundPortError"):
        PORTS.outputs[0]
示例#18
0
def test_removing_ports(special_configuration, session):
    config_dict, project_id, node_uuid = special_configuration(
        inputs=[("in_14", "integer", 15), ("in_17", "boolean", False)],
        outputs=[("out_123", "string", "blahblah"),
                 ("out_2", "number", -12.3)])  #pylint: disable=W0612
    PORTS = node_ports.ports()
    check_config_valid(PORTS, config_dict)
    # let's remove the first input
    del config_dict["schema"]["inputs"]["in_14"]
    del config_dict["inputs"]["in_14"]
    helpers.update_configuration(session, project_id, node_uuid, config_dict)  #pylint: disable=E1101
    check_config_valid(PORTS, config_dict)
    # let's do the same for the second output
    del config_dict["schema"]["outputs"]["out_2"]
    del config_dict["outputs"]["out_2"]
    helpers.update_configuration(session, project_id, node_uuid, config_dict)  #pylint: disable=E1101
    check_config_valid(PORTS, config_dict)
示例#19
0
async def test_get_file_from_previous_node(special_2nodes_configuration,
                                           project_id, node_uuid,
                                           filemanager_cfg, node_link,
                                           store_link, item_type, item_value,
                                           item_pytype):
    config_dict, _, _ = special_2nodes_configuration(
        prev_node_outputs=[("output_123", item_type,
                            store_link(item_value, project_id, node_uuid))],
        inputs=[("in_15", item_type, node_link("output_123"))],
        project_id=project_id,
        previous_node_id=node_uuid)
    PORTS = node_ports.ports()
    check_config_valid(PORTS, config_dict)
    file_path = await PORTS.inputs["in_15"].get()
    assert isinstance(file_path, item_pytype)
    assert file_path == Path(tempfile.gettempdir(), "simcorefiles", "in_15",
                             Path(item_value).name)
    assert file_path.exists()
    filecmp.clear_cache()
    assert filecmp.cmp(file_path, item_value)
示例#20
0
def push_output_data():
    input_path = OUTPUT_DIR / 'input.csv'
    cv_path = OUTPUT_DIR / 'CV_plot.csv'
    t_path = OUTPUT_DIR / 't_plot.csv'
    ist_path = OUTPUT_DIR / 'Ist_plot.csv'
    tst_path = OUTPUT_DIR / 'tst_plot.csv'
    qst_path = OUTPUT_DIR / 'CAP_plot.csv'
    vpred_path = OUTPUT_DIR / 'V_pred_plot.csv'
    lpred_path = OUTPUT_DIR / 'Lpred_plot.csv'
    output_files = [
        input_path, cv_path, t_path, ist_path, tst_path, qst_path, vpred_path,
        lpred_path
    ]
    for p in output_files:
        logger.info('file %s', str(p))
        logger.info('exsits %s', p.exists())
    ports = node_ports.ports()
    for idx, path in enumerate(output_files):
        if path.exists():
            task = ports.outputs[idx].set(path)
            asyncio.get_event_loop().run_until_complete(task)
示例#21
0
async def test_port_value_accessors(special_configuration, item_type,
                                    item_value, item_pytype):  # pylint: disable=W0613, W0621
    item_key = "some key"
    config_dict, _, _ = special_configuration(inputs=[(item_key, item_type,
                                                       item_value)],
                                              outputs=[(item_key, item_type,
                                                        None)])
    PORTS = node_ports.ports()
    check_config_valid(PORTS, config_dict)

    assert isinstance(await PORTS.inputs[item_key].get(), item_pytype)
    assert await PORTS.inputs[item_key].get() == item_value
    assert await PORTS.outputs[item_key].get() is None

    assert isinstance(await PORTS.get(item_key), item_pytype)
    assert await PORTS.get(item_key) == item_value

    await PORTS.outputs[item_key].set(item_value)
    assert PORTS.outputs[item_key].value == item_value
    assert isinstance(await PORTS.outputs[item_key].get(), item_pytype)
    assert await PORTS.outputs[item_key].get() == item_value
示例#22
0
async def test_file_mapping(special_configuration, project_id, node_uuid,
                            filemanager_cfg, s3_simcore_location, bucket,
                            store_link, session, item_type, item_value,
                            item_alias, item_pytype):
    config_dict, project_id, node_uuid = special_configuration(
        inputs=[("in_1", item_type,
                 store_link(item_value, project_id, node_uuid))],
        outputs=[("out_1", item_type, None)],
        project_id=project_id,
        node_id=node_uuid)
    PORTS = node_ports.ports()
    check_config_valid(PORTS, config_dict)
    # add a filetokeymap
    config_dict["schema"]["inputs"]["in_1"]["fileToKeyMap"] = {
        item_alias: "in_1"
    }
    config_dict["schema"]["outputs"]["out_1"]["fileToKeyMap"] = {
        item_alias: "out_1"
    }
    helpers.update_configuration(session, project_id, node_uuid, config_dict)  #pylint: disable=E1101
    check_config_valid(PORTS, config_dict)
    file_path = await PORTS.inputs["in_1"].get()
    assert isinstance(file_path, item_pytype)
    assert file_path == Path(tempfile.gettempdir(), "simcorefiles", "in_1",
                             item_alias)

    invalid_alias = Path("invalid_alias.fjfj")
    with pytest.raises(exceptions.PortNotFound,
                       message="Expecting PortNotFound"):
        await PORTS.set_file_by_keymap(invalid_alias)

    await PORTS.set_file_by_keymap(file_path)
    file_id = helpers.file_uuid(file_path, project_id, node_uuid)
    assert PORTS.outputs["out_1"].value == {
        "store": s3_simcore_location,
        "path": file_id
    }
示例#23
0
async def download_data():
    logger.info("retrieving data from simcore...")
    PORTS = node_ports.ports()
    inputs_path = Path(_INPUTS_FOLDER).expanduser()
    values = {}
    for port in PORTS.inputs:
        if not port or port.value is None:
            continue
        logger.debug("downloading data from port '%s' with value '%s'...", port.key, port.value)
        value = await port.get()
        values[port.key] = {"key": port.key, "value": value}

        if _FILE_TYPE_PREFIX in port.type:
            dest_path = inputs_path / port.key
            dest_path.mkdir(exist_ok=True, parents=True)
            values[port.key] = {"key": port.key, "value": str(dest_path)}

            # clean up destination directory
            for path in dest_path.iterdir():
                if path.is_file():
                    path.unlink()
                elif path.is_dir():
                    shutil.rmtree(path)
            # check if value is a compressed file
            if tarfile.is_tarfile(value):
                with tarfile.open(value) as tar_file:
                    tar_file.extractall(dest_path, members=list(_no_relative_path_tar(tar_file)))
            elif zipfile.is_zipfile(value):
                with zipfile.ZipFile(value) as zip_file:
                    zip_file.extractall(dest_path, members=_no_relative_path_zip(zip_file))
            else:
                dest_path = dest_path / Path(value).name
                shutil.move(value, dest_path)

    values_file = inputs_path / _KEY_VALUE_FILE_NAME
    values_file.write_text(json.dumps(values))
    logger.info("all data retrieved from simcore: %s", values)
示例#24
0
def download_all_inputs(n_inputs = 1):
    ports = node_ports.ports()
    tasks = asyncio.gather(*[ports.inputs[n].get() for n in range(n_inputs)])
    paths_to_inputs = asyncio.get_event_loop().run_until_complete( tasks )
    return paths_to_inputs
示例#25
0
def test_default_configuration(default_configuration):  # pylint: disable=W0613, W0621
    config_dict = default_configuration
    check_config_valid(node_ports.ports(), config_dict)
示例#26
0
 def initialize(self): #pylint: disable=no-self-use
     PORTS = node_ports.ports()
     _create_ports_sub_folders(PORTS.inputs, Path(_INPUTS_FOLDER).expanduser())
     _create_ports_sub_folders(PORTS.outputs, Path(_OUTPUTS_FOLDER).expanduser())
async def download_data(port_keys: List[str]) -> int:
    logger.info("retrieving data from simcore...")
    start_time = time.perf_counter()
    PORTS = node_ports.ports()
    inputs_path = Path(_INPUTS_FOLDER).expanduser()
    data = {}

    # let's gather all the data
    download_tasks = []
    for node_input in PORTS.inputs:
        # if port_keys contains some keys only download them
        logger.info("Checking node %s", node_input.key)
        if port_keys and node_input.key not in port_keys:
            continue
        # collect coroutines
        download_tasks.append(get_time_wrapped(node_input))
    logger.info("retrieving %s data", len(download_tasks))

    transfer_bytes = 0
    if download_tasks:
        results = await asyncio.gather(*download_tasks)
        logger.info("completed download %s", results)
        for port, value in results:
            data[port.key] = {"key": port.key, "value": value}

            if _FILE_TYPE_PREFIX in port.type:
                # if there are files, move them to the final destination
                downloaded_file = value
                dest_path = inputs_path / port.key
                # first cleanup
                if dest_path.exists():
                    logger.info("removing %s", dest_path)
                    shutil.rmtree(dest_path)
                if not downloaded_file or not downloaded_file.exists():
                    # the link may be empty
                    continue
                transfer_bytes = transfer_bytes + downloaded_file.stat(
                ).st_size
                # in case of valid file, it is either uncompressed and/or moved to the final directory
                logger.info("creating directory %s", dest_path)
                dest_path.mkdir(exist_ok=True, parents=True)
                data[port.key] = {"key": port.key, "value": str(dest_path)}

                if zipfile.is_zipfile(downloaded_file):
                    logger.info("unzipping %s", downloaded_file)
                    with zipfile.ZipFile(downloaded_file) as zip_file:
                        zip_file.extractall(
                            dest_path, members=_no_relative_path_zip(zip_file))
                    logger.info("all unzipped in %s", dest_path)
                else:
                    logger.info("moving %s", downloaded_file)
                    dest_path = dest_path / Path(downloaded_file).name
                    shutil.move(downloaded_file, dest_path)
                    logger.info("all moved to %s", dest_path)
            else:
                transfer_bytes = transfer_bytes + sys.getsizeof(value)
    # create/update the json file with the new values
    if data:
        data_file = inputs_path / _KEY_VALUE_FILE_NAME
        if data_file.exists():
            current_data = json.loads(data_file.read_text())
            # merge data
            data = {**current_data, **data}
        data_file.write_text(json.dumps(data))
    stop_time = time.perf_counter()
    logger.info("all data retrieved from simcore in %sseconds: %s",
                stop_time - start_time, data)
    return transfer_bytes