def remove_collection(): ''' This router function removes a collection, with respect to a database type. @collection, indicates a nosql implementation @entity, indicates a sql database ''' if request.method == 'POST': # local variables response = None entity = Entity() collection = Collection() # programmatic-interface if request.get_json(): r = request.get_json() uid = r['uid'] type = r['type'] cname = r['collection'] if (cname and type == 'collection'): payload = {'properties.uid': uid} response = collection.query(cname, 'drop_collection', payload) elif (type == 'entity'): response = entity.remove_entity(uid, cname) # lastrowid returned must be greater than 0 if response and response['result']: return json.dumps({'response': response['result']}) else: return json.dumps({'response': response})
def document_count(): ''' This router function retrieves the number of documents in a specified collection. ''' if request.method == 'POST': # local variables count = None collection = Collection() # programmatic-interface if request.get_json(): r = request.get_json() cname = r['collection'] count = collection.query(cname, 'count_documents') if ( count and count['status'] and isinstance(count['result'], (int, long)) ): return json.dumps({'count': count['result']}) else: return json.dumps({'count': -1})
def save_premodel_dataset(self): ''' This method saves the entire the dataset collection, as a json document, into the nosql implementation. ''' # save dataset collection = self.premodel_data['properties']['collection'] collection_adjusted = collection.lower().replace(' ', '_') cursor = Collection() document = { 'properties': self.premodel_data['properties'], 'dataset': self.dataset } response = cursor.query(collection_adjusted, 'insert_one', document) # return result if response and response['error']: self.list_error.append(response['error']) return {'result': None, 'error': response['error']} elif response and response['result']: return {'result': response['result'], 'error': None} else: return {'result': None, 'error': 'no dataset provided'}
def save_premodel_dataset(self): ''' This method saves the entire the dataset collection, as a json document, into the nosql implementation. ''' # local variables entity = Entity() cursor = Collection() collection = self.premodel_data['properties']['collection'] collection_adjusted = collection.lower().replace(' ', '_') collection_count = entity.get_collection_count(self.uid) document_count = cursor.query(collection_adjusted, 'count_documents') # enfore collection limit: oldest collection name is obtained from the # sql database. Then, the corresponding collection (i.e. target) is # removed from the nosql database. if (not self.uid and collection_count and collection_count['result'] >= self.max_collection and collection_adjusted): target = entity.get_collections(self.uid)['result'][0] cursor.query(target, 'drop_collection') entity.remove_entity(self.uid, target) collection_count = entity.get_collection_count(self.uid) document_count = cursor.query(collection_adjusted, 'count_documents') # save dataset if (collection_adjusted and collection_count and collection_count['result'] < self.max_collection and document_count and document_count['result'] < self.max_document): current_utc = datetime.datetime.utcnow().strftime( "%Y-%m-%dT%H:%M:%S") self.premodel_data['properties']['datetime_saved'] = current_utc self.premodel_data['properties']['uid'] = self.uid document = { 'properties': self.premodel_data['properties'], 'dataset': self.dataset } response = cursor.query(collection_adjusted, 'insert_one', document) else: response = None # return result if response and response['error']: self.list_error.append(response['error']) return {'result': None, 'error': response['error']} elif response and response['result']: return {'result': response['result'], 'error': None} else: return {'result': None, 'error': 'no dataset provided'}
def save_entity(self, session_type, session_id): ''' This method overrides the identical method from the inherited superclass, 'BaseData'. Specifically, this method updates an existing entity within the corresponding database table, 'tbl_dataset_entity'. @session_id, is synonymous to 'entity_id', and provides context to update 'modified_xx' columns within the 'tbl_dataset_entity' database table. ''' # local variables db_return = None entity = Entity() cursor = Collection() premodel_settings = self.premodel_data['properties'] collection = premodel_settings['collection'] collection_adjusted = collection.lower().replace(' ', '_') collection_count = entity.get_collection_count(self.uid) document_count = cursor.query(collection_adjusted, 'count_documents') # define entity properties premodel_entity = { 'title': premodel_settings.get('session_name', None), 'uid': self.uid, 'id_entity': session_id, } # store entity values in database if (collection_adjusted and collection_count and collection_count['result'] < self.max_collection and document_count and document_count['result'] < self.max_document): db_save = Entity(premodel_entity, session_type) db_return = db_save.save() if db_return and db_return['status']: return {'status': True, 'error': None} else: self.list_error.append(db_return['error']) return {'status': False, 'error': self.list_error} else: return {'status': True, 'error': None}
def generate(model, kernel_type, collection, payload, list_error): ''' This method generates an sv (i.e. svm, or svr) model using feature data, retrieved from the database. The generated model, is then stored within the NoSQL datastore. @grouped_features, a matrix of observations, where each nested vector, or python list, is a collection of features within the containing observation. @encoded_labels, observation labels (dependent variable labels), encoded into a unique integer representation. ''' # local variables sorted_labels = False label_encoder = preprocessing.LabelEncoder() list_model_type = current_app.config.get('MODEL_TYPE') collection_adjusted = collection.lower().replace(' ', '_') cursor = Collection() # get datasets datasets = cursor.query(collection_adjusted, 'aggregate', payload) # restructure dataset into arrays observation_labels = [] grouped_features = [] for dataset in datasets['result']: for observation in dataset['dataset']: indep_variables = observation['independent-variables'] for features in indep_variables: # svm case if model == list_model_type[0]: observation_labels.append( observation['dependent-variable']) sorted_features = [v for k, v in sorted(features.items())] # svr case elif model == list_model_type[1]: observation_labels.append( float(observation['dependent-variable'])) sorted_features = [ float(v) for k, v in sorted(features.items()) ] grouped_features.append(sorted_features) if not sorted_labels: sorted_labels = [k for k, v in sorted(features.items())] # generate svm model if model == list_model_type[0]: # convert observation labels to a unique integer representation label_encoder = preprocessing.LabelEncoder() label_encoder.fit(observation_labels) encoded_labels = label_encoder.transform(observation_labels) # create model clf = svm.SVC(kernel=kernel_type, probability=True) # cache encoded labels Model(label_encoder).cache(model + '_labels', collection_adjusted) # fit model clf.fit(grouped_features, encoded_labels) # generate svr model elif model == list_model_type[1]: # create model clf = svm.SVR(kernel=kernel_type) # fit model clf.fit(grouped_features, observation_labels) # compute, and cache coefficient of determination r2 = clf.score(grouped_features, observation_labels) Hset().cache(model + '_r2', collection_adjusted, r2) # cache model Model(clf).cache(model + '_model', collection_adjusted) # cache feature labels, with respect to given collection Hset().cache(model + '_feature_labels', collection, json.dumps(sorted_labels)) # return error(s) if exists return {'error': list_error}
def save_entity(self, session_type, id_entity=None): ''' This method overrides the identical method from the inherited superclass, 'BaseData'. Specifically, this method updates an existing entity within the corresponding database table, 'tbl_dataset_entity'. @session_id, is synonymous to 'entity_id', and provides context to update 'modified_xx' columns within the 'tbl_dataset_entity' database table. @numeric_model_type, list indices begin at 0, and needs to be corrected by adding 1. This allows the numeric representation of the 'model_type' to relate to another database table, which maps integer values with the corresponding 'model_type' name. The integer column of the mapping table begins at 1. ''' # local variables db_return = None entity = Entity() cursor = Collection() premodel_settings = self.premodel_data['properties'] collection = premodel_settings['collection'] collection_adjusted = collection.lower().replace(' ', '_') collection_count = entity.get_collection_count(self.uid) document_count = cursor.query(collection_adjusted, 'count_documents') # assign numerical representation numeric_model_type = self.list_model_type.index(self.model_type) + 1 # define entity properties premodel_entity = { 'title': premodel_settings.get('session_name', None), 'collection': collection, 'model_type': numeric_model_type, 'uid': self.uid, } # store entity values in database if ( collection_adjusted and collection_count and collection_count['result'] < self.max_collection and document_count and document_count['result'] < self.max_document ): entity = Entity(premodel_entity, session_type) db_return = entity.save() # return if db_return and db_return['error']: self.list_error.append(db_return['error']) return {'status': False, 'error': self.list_error} elif db_return and db_return['status']: return {'status': True, 'error': None, 'id': db_return['id']} else: return {'status': True, 'error': 'Entity was not saved', 'id': None}