def add_run(self, run: Run): """Add Run to the dataset The input Run URI is created by this method Parameters ---------- run Run to add """ run.md_uri = self.service.add_run_processeddataset(run.metadata, self.md_uri)
def test_write_run(self): run = Run(self.tst_run_file) run.metadata = create_run_metadata() run.write() self.assertTrue( filecmp.cmp(self.tst_run_file, self.ref_run_file, shallow=False))
def test_read_run(self): run_read = Run(self.ref_run_file) run_ref_metadata = create_run_metadata() self.assertEqual(run_read.metadata.serialize(), run_ref_metadata.serialize())
def run_merged(self): """Run the process that merge txt number inputs This is the main function that run the process on the experiment data Raises ------ RunnerExecError """ for observer in self._observers: observer.notify({'progress': 0, 'message': 'start'}) # 1- Query all the input data and verify that the size # are equal, if not raise an exception input_data, data_count = self._query_inputs() # 2- Create the ProcessedDataSet processed_dataset = self.experiment.create_processed_dataset( self._output_dataset ) # 3- Create run run = Run() run.metadata.process_name = self.process.metadata.fullname() run.metadata.process_uri = self.process.uri for t in range(len(self._inputs_names)): run.metadata.inputs.append( RunInputContainer( self._inputs_names[t], self._inputs_datasets[t], self._inputs_query[t], self._inputs_origin_output_name[t], ) ) for i in range(0, len(self._process_params), 2): run.metadata.parameters.append( RunParameterContainer( self._process_params[i], self._process_params[i + 1] ) ) processed_dataset.add_run(run) # 4- merge Inputs inputs_values = [0 for i in range(len(self._inputs_names))] for n in range(len(self._inputs_names)): inputs_values[n] = list() for i in range(data_count): data_info = RawData(input_data[n][i]) if ( data_info.metadata.format == "numbercsv" ): with open(data_info.metadata.uri, 'r') as file: value = file.read().replace('\n', '').replace(' ', '') inputs_values[n].append(value) else: raise RunnerExecError( 'run merge can use only number datatype') # 5- save data in tmp files files in the processed dataset dir tmp_inputs_files = [0 for i in range(len(self._inputs_names))] processed_data_dir = processed_dataset.md_uri.replace( "processeddataset.md.json", "" ) for n in range(len(self._inputs_names)): tmp_inputs_files[n] = os.path.join( processed_data_dir, self._inputs_names[n] + '.csv' ) f = open(tmp_inputs_files[n], 'w') for i in range(len(inputs_values[n])): value = str(inputs_values[n][i]) if i < len(inputs_values[n]) - 1: f.write(value + ",") else: f.write(value) f.close() # 6- create input metadata for output .md.json inputs_metadata = [] for n in range(len(tmp_inputs_files)): inp_metadata = ProcessedDataInputContainer() inp_metadata.name = self._inputs_names[n] inp_metadata.uri = tmp_inputs_files[n] inp_metadata.type = 'txt' inputs_metadata.append(inp_metadata) # 7- run process on generated files args = [] # 7.1- inputs for n in range(len(self._inputs_names)): args.append(self._inputs_names[n]) args.append(tmp_inputs_files[n]) # 7.2- params for param in self._process_params: args.append(param) # 4.3- outputs for output in self.process.metadata.outputs: extension = '.' + FormatsAccess.instance().get(output.type).extension # args args.append(output.name) output_file_name = output.name args.append(os.path.join(processed_data_dir, output_file_name + extension)) # output metadata processed_data = ProcessedData() processed_data.metadata.name = output.name processed_data.metadata.author = \ ConfigAccess.instance().get('user')['name'] processed_data.metadata.date = format_date('now') processed_data.metadata.format = output.type processed_data.metadata.run_uri = run.md_uri processed_data.metadata.inputs = inputs_metadata processed_data.metadata.output = { 'name': output.name, 'label': output.description, } # save the metadata and create its md_uri and uri processed_dataset.create_data(processed_data) # 8- exec runner = Runner(self.process) runner.exec(*args) # notify observers for observer in self._observers: observer.notify({'progress': 100, 'message': 'done'})
def run_sequence(self): """Run the process in a sequence This is the main function that run the process on the experiment data Raises ------ RunnerExecError """ # 1- Query all the input data and verify that the size # are equal, if not raise an exception input_data, data_count = self._query_inputs() # 2- Create the ProcessedDataSet processed_dataset = self.experiment.create_processed_dataset( self._output_dataset ) # 3- Create run run = Run() run.metadata.process_name = self.process.metadata.fullname() run.metadata.process_uri = self.process.uri for t in range(len(self._inputs_names)): run.metadata.inputs.append( RunInputContainer( self._inputs_names[t], self._inputs_datasets[t], self._inputs_query[t], self._inputs_origin_output_name[t], ) ) for i in range(0, len(self._process_params), 2): run.metadata.parameters.append( RunParameterContainer( self._process_params[i], self._process_params[i + 1] ) ) processed_dataset.add_run(run) # 4- loop over the input data for i in range(data_count): data_info_zero = RawData(input_data[0][i].uri()) # 4.0- notify observers for observer in self._observers: notification = dict() notification['progress'] = int(100 * i / data_count) notification['message'] = "Process " + \ data_info_zero.metadata.name observer.notify(notification) # 4.1- Parse IO args = [] # get the input arguments inputs_metadata = [] for n in range(len(self._inputs_names)): args.append(self._inputs_names[n]) data_info = RawData(input_data[n][i].uri()) # input data can be a processedData but # we only read the common metadata args.append(data_info.metadata.uri) inp_metadata = ProcessedDataInputContainer() inp_metadata.name = self._inputs_names[n] inp_metadata.uri = input_data[n][i].uri() inp_metadata.type = data_info.metadata.type inputs_metadata.append(inp_metadata) # get the params arguments for param in self._process_params: args.append(param) # setup outputs for output in self.process.metadata.outputs: # output metadata processed_data = ProcessedData() processed_data.metadata.name = ( data_info_zero.metadata.name + "_" + output.name ) processed_data.metadata.author = \ ConfigAccess.instance().get('user')['name'] processed_data.metadata.date = format_date('now') processed_data.metadata.format = output.type processed_data.metadata.run_uri = run.md_uri processed_data.metadata.inputs = inputs_metadata processed_data.metadata.output = { 'name': output.name, 'label': output.description, } # save the metadata and create its md_uri and uri processed_dataset.create_data(processed_data) # args args.append(output.name) args.append(processed_data.metadata.uri) # 4.2- exec runner = Runner(self.process) # print("args = ", args) runner.exec(*args) # 4.0- notify observers for observer in self._observers: observer.notify({'progress': 100, 'message': 'done'})