def post(self, dataset_id: int): parser = reqparse.RequestParser() parser.add_argument('event_time', type=int) parser.add_argument('files', required=True, type=list, location='json', help=_FORMAT_ERROR_MESSAGE.format('files')) parser.add_argument('move', type=bool) parser.add_argument('comment', type=str) body = parser.parse_args() event_time = body.get('event_time') files = body.get('files') move = body.get('move', False) comment = body.get('comment') dataset = Dataset.query.filter_by(id=dataset_id).first() if dataset is None: raise NotFoundException() if event_time is None and dataset.type == DatasetType.STREAMING: raise InvalidArgumentException( details='data_batch.event_time is empty') # TODO: PSI dataset should not allow multi batches # Create batch batch = DataBatch( dataset_id=dataset.id, # Use current timestamp to fill when type is PSI event_time=datetime.datetime.fromtimestamp( event_time or datetime.datetime.now().timestamp()), comment=comment, state=BatchState.NEW, move=move, ) batch_details = dataset_pb2.DataBatch() root_dir = current_app.config.get('STORAGE_ROOT') batch_folder_name = batch.event_time.strftime('%Y%m%d%H%M%S') for file_path in files: file = batch_details.files.add() file.source_path = file_path file_name = file_path.split('/')[-1] file.destination_path = f'{root_dir}/dataset/{dataset.id}' \ f'/batch/{batch_folder_name}/{file_name}' batch.set_details(batch_details) db.session.add(batch) db.session.commit() db.session.refresh(batch) scheduler.wakeup(data_batch_ids=[batch.id]) return {'data': batch.to_dict()}
def get_details(self): if self.details is None: return None proto = dataset_pb2.DataBatch() proto.ParseFromString(self.details) return proto