Exemplo n.º 1
0
 def save_as_dt(self, data_path='test_data', save_path='outputs'):
     os.makedirs(save_path, exist_ok=True)
     input_df = pd.read_parquet(os.path.join(data_path, 'data.dataset.parquet'), engine='pyarrow')
     df = self.run(input_df)
     dt = DataTable(df)
     OutputHandler.handle_output(data=dt, file_path=save_path,
                                 file_name='data.dataset.parquet', data_type=DataTypes.DATASET)
Exemplo n.º 2
0
def save_parquet1(df, output_path, writeCsv= False):
  from azureml.studio.modulehost.handler.port_io_handler import OutputHandler
  from azureml.studio.common.datatypes import DataTypes
  from azureml.studio.common.datatable.data_table import DataTable
  ensure_folder_exists(output_path)
  #requires alghost 70
  OutputHandler.handle_output(DataTable(df), output_path, 'data.dataset.parquet', DataTypes.DATASET)
  save_datatype(output_path)
  logger.info(f"saved parquet to {output_path}, columns {df.columns}")
Exemplo n.º 3
0
def save_dataframe(df, output_path, writeCsv=False):
    ensure_folder_exists(output_path)
    df = transform_ndarraycol_to_list(df)
    datatable = DataTable(df)
    visualizer = DataTableVisualizer(datatable)
    visualization_data = visualizer.dump_to_dict()
    save_data_frame_to_directory(
        output_path,
        data=df,
        visualization=[JsonVisualizer("Visualization", visualization_data)])
    logger.info(f"saved data to {output_path}, columns {df.columns}")
Exemplo n.º 4
0
def run(data):
    data = json.loads(data)
    input_entry = defaultdict(list)
    for row in data:
        for key, val in row.items():
            input_entry[key].append(decode_nan(val))

    data_frame_directory = create_dfd_from_dict(input_entry, schema_data)
    score_module = ScoreModelModule()
    result, = score_module.run(
        learner=model,
        test_data=DataTable.from_dfd(data_frame_directory),
        append_or_result_only=True)
    return json.dumps({"result": result.data_frame.values.tolist()})
Exemplo n.º 5
0
def entrance(data_path='script/test_data', save_path='script/outputs'):
    my_list = []
    image_list = os.listdir(data_path)
    post_list = ['jfif', 'png', 'jpg', 'jpeg']
    for file_name in image_list:
        lists = file_name.split('.')
        if lists[-1] not in post_list:
            continue
        file_path = os.path.join(data_path, file_name)
        with open(file_path, 'rb') as f:
            s = base64.b64encode(f.read())
        input_data = s.decode('ascii')
        input_data = 'data:image/png;base64,' + input_data
        my_list.append([input_data])
    df = pd.DataFrame(my_list, columns=['image_string'])
    os.makedirs(save_path, exist_ok=True)
    # df.to_parquet(fname=os.path.join(save_path, 'data.dataset.parquet'), engine='pyarrow')
    dt = DataTable(df)
    OutputHandler.handle_output(data=dt, file_path=save_path,
                                file_name='data.dataset.parquet', data_type=DataTypes.DATASET)

    # Dump data_type.json as a work around until SMT deploys
    dct = {
        'Id': 'Dataset',
        'Name': 'Dataset .NET file',
        'ShortName': 'Dataset',
        'Description': 'A serialized DataTable supporting partial reads and writes',
        'IsDirectory': False,
        'Owner': 'Microsoft Corporation',
        'FileExtension': 'dataset.parquet',
        'ContentType': 'application/octet-stream',
        'AllowUpload': False,
        'AllowPromotion': True,
        'AllowModelPromotion': False,
        'AuxiliaryFileExtension': None,
        'AuxiliaryContentType': None
    }
    with open(os.path.join(save_path, 'data_type.json'), 'w') as f:
        json.dump(dct, f)

    print('This experiment has been completed.')
Exemplo n.º 6
0
def save_dataframe(df, output_path, writeCsv=False):
    from azureml.studio.common.datatypes import DataTypes
    from azureml.studio.common.datatable.data_table import DataTable
    from azureml.studio.common.io.data_frame_directory import save_data_frame_to_directory
    from azureml.studio.common.io.visualizer import JsonVisualizer
    from azureml.studio.modulehost.handler.sidecar_files import DataTableVisualizer
    ensure_folder_exists(output_path)
    df = transform_ndarraycol_to_list(df)
    # Use datatable saver to get visualization data
    datatable = DataTable(df)
    visualizer = DataTableVisualizer(datatable)
    visualization_data = visualizer.dump_to_dict()
    save_data_frame_to_directory(
        output_path,
        data=df,
        visualization=[JsonVisualizer("Visualization", visualization_data)])
    # save_data_frame_to_directory save df to _data.parquet instead of conventional data.dataset.parquet, manually save another copy
    df.to_parquet(os.path.join(output_path, "data.dataset.parquet"),
                  engine="pyarrow")
    logger.info(f"saved an additional copy to data.dataset.parquet")
    logger.info(f"saved data to {output_path}, columns {df.columns}")
 def evaluate(self, data_path='test_data', save_path='outputs'):
     os.makedirs(save_path, exist_ok=True)
     df = self._evaluate_with_label(data_path)
     dt = DataTable(df)
     OutputHandler.handle_output(data=dt, file_path=save_path,
                                 file_name='data.dataset.parquet', data_type=DataTypes.DATASET)