def setUpClass(cls): cls.test_client = Client(key=os.environ["NEXOSIS_API_TESTKEY"], uri=os.environ["NEXOSIS_API_TESTURI"]) metadata = {'sales': ColumnMetadata({'dataType': 'numeric', 'role': 'target'}), 'transactions': ColumnMetadata({'dataType': 'numeric', 'role': 'none'}), 'timeStamp': ColumnMetadata({'dataType': 'date', 'role': 'timestamp'})} cls.import_response = cls.test_client.imports.import_from_s3('test-python-import', 'nexosis-sample-data', 'LocationA.csv', 'us-east-1', metadata) #give import time to run time.sleep(10)
def __init__(self, data_dict): self._name = data_dict['dataSetName'] cols = data_dict.get('columns') or {} self._column_metadata = { key: ColumnMetadata(value) for key, value in cols.items() }
def test_create_with_measure_type(self): metadata = { 'observed': ColumnMetadata({ 'dataType': 'numericMeasure', 'role': 'target' }), 'timestamp': ColumnMetadata({ 'dataType': 'date', 'role': 'timestamp' }) } result = self.test_client.datasets.create(self.ds_name, self.data, metadata) self.assertEqual(Imputation.mean, result.column_metadata['observed'].imputation) self.assertEqual(Aggregation.mean, result.column_metadata['observed'].aggregation)
def test_create_with_metadata(self): metadata = { 'observed': ColumnMetadata({ 'dataType': 'string', 'role': 'none' }), 'timestamp': ColumnMetadata({ 'dataType': 'date', 'role': 'timestamp' }) } result = self.test_client.datasets.create(self.ds_name, self.data, metadata) self.assertEqual(self.ds_name, result.name) self.assertEqual(metadata['observed'].data_type, result.column_metadata['observed'].data_type) self.assertEqual(metadata['observed'].role, result.column_metadata['observed'].role) self.assertEqual(metadata['timestamp'].data_type, result.column_metadata['timestamp'].data_type) self.assertEqual(metadata['timestamp'].role, result.column_metadata['timestamp'].role)
def __init__(self, data_dict=None): if data_dict is None: data_dict = {} cols = data_dict.get('columns') or {} joins = data_dict.get('joins') or [] self._view_name = data_dict['viewName'] self._dataset_name = data_dict['dataSetName'] self._column_metadata = { key: ColumnMetadata(value) for (key, value) in cols.items() } self._joins = [Join(j) for j in joins]
def __init__(self, data_dict=None): """ A Dataset is the representation of your data as stored by the Nexosis API :arg dict data_dict: the dictionary containing the data for this object """ if data_dict is None: data_dict = {} self._data = data_dict.get('data') self._metadata = { key: ColumnMetadata(value) for (key, value) in data_dict.get('columns', {}).items() } self._links = data_dict.get('links')
def __init__(self, data_dict=None): """ A Dataset is the representation of your data as stored by the Nexosis API :arg dict data_dict: the dictionary containing the data for this object """ if data_dict is None: data_dict = {} self._data = data_dict.get('data') self._metadata = {key: ColumnMetadata(value) for (key, value) in data_dict.get('columns', {}).items()} self._links = data_dict.get('links') self._page_number = data_dict['pageNumber'] if 'pageNumber' in data_dict else 0 self._total_pages = data_dict['totalPages'] if 'totalPages' in data_dict else 0 self._page_size = data_dict['pageSize'] if 'pageSize' in data_dict else 50 self._item_total = data_dict['totalCount'] if 'totalCount' in data_dict else 0
def test_create_assign_imputation_aggregation(self): metadata = { 'observed': ColumnMetadata({ 'dataType': 'numeric', 'role': 'target', 'imputation': 'mode', 'aggregation': 'median' }), 'timestamp': ColumnMetadata({ 'dataType': 'date', 'role': 'timestamp' }) } result = self.test_client.datasets.create(self.ds_name, self.data, metadata) self.assertEqual(self.ds_name, result.name) self.assertEqual(Aggregation.median, result.column_metadata['observed'].aggregation) self.assertEqual(Imputation.mode, result.column_metadata['observed'].imputation)
def __init__(self, data_dict=None): if data_dict is None: data_dict = {} self._model_id = data_dict.get('modelId') self._prediction_domain = data_dict.get('predictionDomain') self._datasource_name = data_dict.get('dataSourceName') self._created_on = data_dict.get('createdDate') self._algorithm = Algorithm(data_dict.get('algorithm')) cols = data_dict.get('columns') or {} self._column_metadata = { key: ColumnMetadata(value) for key, value in cols.items() } self._metrics = data_dict.get('metrics')
def test_create_regression_model(self): columns = { 'R.D.Spend': ColumnMetadata({ 'dataType': 'numeric', 'role': 'feature', 'imputation': 'mode', 'aggregation': 'median' }), 'Administration': ColumnMetadata({ 'dataType': 'numeric', 'role': 'feature', 'imputation': 'mode', 'aggregation': 'median' }), 'Marketing.Spend': ColumnMetadata({ 'dataType': 'numeric', 'role': 'feature', 'imputation': 'mode', 'aggregation': 'median' }), 'Profit': ColumnMetadata({ 'dataType': 'numeric', 'role': 'target' }), 'ny': ColumnMetadata({ 'dataType': 'logical', 'role': 'feature' }), 'florida': ColumnMetadata({ 'dataType': 'logical', 'role': 'feature' }), 'cali': ColumnMetadata({ 'dataType': 'logical', 'role': 'feature' }), } results = self.test_client.sessions.train_regression_model( self.regression_ds_name, 'profit', columns)
def __init__(self, data_dict=None): if data_dict is None: data_dict = {} self._import_id = data_dict['importId'] self._type = ImportType[data_dict['type']] self._status = Status[data_dict['status']] self._dataset_name = data_dict['dataSetName'] self._requested_date = dateutil.parser.parse( data_dict['requestedDate']) self._status_history = data_dict['statusHistory'] self._links = data_dict['links'] self._parameters = data_dict['parameters'] self._messages = data_dict['messages'] md = data_dict.get('metadata') or {} self._column_metadata = { key: ColumnMetadata(value) for (key, value) in md.items() }
def __init__(self, data_dict=None): if data_dict is None: data_dict = {} self._session_id = data_dict['sessionId'] self._type = SessionType[data_dict['type']] self._status = Status[data_dict['status']] self._status_history = data_dict['statusHistory'] self._dataset_name = data_dict['dataSetName'] self._target_column = data_dict['targetColumn'] self._start_date = dateutil.parser.parse(data_dict['startDate']) self._end_date = dateutil.parser.parse(data_dict['endDate']) self._requested_date = dateutil.parser.parse(data_dict['requestedDate']) self._links = data_dict['links'] self._is_estimate = bool(data_dict['isEstimate']) self._extra_parameters = data_dict['extraParameters'] self._result_interval = TimeInterval[data_dict['resultInterval']] \ if 'resultInterval' in data_dict.keys() and data_dict['resultInterval'] \ else TimeInterval.day md = data_dict.get('metadata') or {} self._column_metadata = {key: ColumnMetadata(value) for (key, value) in md.items()}