def _setup(samples: bool = True): try: collection = MongoClient(getenv('MONGO_URI'))['todos']['main'] collection.delete_many({}) except Exception as e: print("There was an error Setting Up:", e) if samples: todos = [ { 'task': 'Finish This', 'when': datetime.timestamp(datetime.now()), 'by': 'me', 'done': False }, { 'task': 'Goto the Supermarket', 'when': datetime.timestamp(datetime.now()), 'by': 'me', 'done': True }, { 'task': 'Implement from Database', 'when': datetime.timestamp(datetime.now()), 'by': 'someone', 'done': False }, ] r = collection.insert_many(todos) return collection
class DatabaseCache: def __init__(self, database_name="Matsuo", collection_name="ImageCache", expiration_time=datetime.timedelta(minutes=10)): self.client = MongoClient()[database_name][collection_name] self.expiration_delta = expiration_time if self.expiration_delta is not None: self.client.create_index( [(DatabaseCacheItem.expiration_index, pymongo.ASCENDING)], expireAfterSeconds=0) self.client.create_index([(DatabaseCacheItem.expiration_index, pymongo.TEXT)]) def add_item(self, key, data): item = DatabaseCacheItem( key, bson.binary.Binary(data.read()), datetime.datetime.now() + self.expiration_delta) self.client.update_one(item.get_id(), update=item.get_update_form(), upsert=True) def get_item(self, key): element = self.client.find_one({DatabaseCacheItem.filename_key: key}) return element[DatabaseCacheItem.data_key] if element else None def remove_item(self, key): self.client.delete_many({DatabaseCacheItem.filename_key: key})
def setup(): load_dotenv() try: collection = MongoClient(getenv('MONGO_URI'))['todos']['test'] collection.delete_many({}) except Exception as e: print("There was an error Setting Up:", e)
def teardown(): load_dotenv() try: collection = MongoClient(getenv('MONGO_URI'))['todos']['test'] collection.delete_many({}) except Exception as e: print("There was an error Tearing Down:", e)
class DB: def __init__(self): self.collection = MongoClient().local.connections def REMOVEALL(self): self.collection.delete_many({}) def remove(self, connection, field): if ("ip" in field): self.collection.delete_many({"ip": connection.ip}) elif ("hostname" in field): self.collection.delete_many({"hostname": connection.hostname}) else: return False return True def insert(self, connection): data = json.dumps(connection.socket, -1) self.collection.insert_one({ "ip": connection.ip, "hostname": connection.hostname, "uniq": str(connection.unique) }) def getAllConnectionsPrint(self): darr = [] docs = self.collection.find() for doc in docs: darr.append(doc) return darr def getCollection(self): return self.collection
class diaDb(object): def __init__(self, address=config.databaseIp, port=config.databasePort): from pymongo import MongoClient self.__diaset = MongoClient(address, port).diadb.diaset print 'diadb init done' def write(self, user, word, reply): import datetime self.__diaset.insert_one({ "time": datetime.datetime.now(), "user": user, "word": word, "reply": reply }) return 1 def read(self, startTime, endTime): res = self.__diaset.find({"time": {"$gt": startTime, "$lt": endTime}}) if res.count(): return 1, res else: return 0, [] def delete(self, user): if user == '': deleteObj = self.__diaset.delete_many({}) return deleteObj.deleted_count else: deleteObj = self.__diaset.delete_many({}) return deleteObj.deleted_count def all(self): res = self.__diaset.find({}) if res.count(): return 1, res else: return 0, []
segments_37[line[0]] = line[1:] # Generate new cnv collections by copying orignals and updating coordinates cnv_37 = [] for sample in collection_in.find({},{'_id':0}): key = sample['id'] if key in segments_37: sample['start'] = int(segments_37[key][1]) sample['end'] = int(segments_37[key][2]) sample['variantset_id'] = 'AM_VS_GRCH37' cnv_37.append(sample) # write to db collection_out = MongoClient()['arraymap_ga4gh']['variants_cnv_grch37'] collection_out.delete_many({}) collection_out.insert_many(cnv_37) ########################################## ##### Read in lifted file of grch38 ###### ########################################## segments_38 = {} with open('/Users/bogao/DataFiles/tmp/segments_38.txt', 'r') as fi: next(fi) for line in fi: line = line.strip().split('\t') segments_38[line[0]] = line[1:] # Generate new cnv collections by copying orignals and updating coordinates
class Server(object): def __init__(self): self.db = MongoClient().data.service self.classifier = MyRandomForest({}, {}, DATA_MODEL_PATH) self.classifier.train([]) ################ Stuff for use in this file ################ def _create_column(self, column, type_id, column_name, source_name, model, force=False): """ Create a column in a semantic type and return the column's id if it was created successfully. Notes: If the column already exists and force is not set to true, a 409 will be returned and no data will be modified. :param type_id: Id of the semantic type this column belongs to :param column_name: Name of the column to be created :param source_name: Name of the source of the column to be created :param model: Model of the column to be created :param data: Data which will be added to the column on creation :param force: Force create the column, if this is true and the column exists the old column will be deleted (with all of its data) before creation :return: The id of the new column and a response code of 201 if the creation was successful, otherwise it will be an error message with the appropriate error code """ column_id = get_column_id(type_id, column_name, source_name, model) db_body = { ID: column_id, DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id, COLUMN_NAME: column_name, SOURCE_NAME: source_name, MODEL: model } if self.db.find_one(db_body): if force: self.db.delete_many(db_body) else: return "Column already exists", 409 db_body.update(column.to_json()) self.db.insert_one(db_body) return column_id, 201 def _predict_column(self, column_name, source_names, data): """ Predicts the semantic type of a column. :param column_name: Name of the column :param source_names: List of source names :param data: The data to predict based opon :return: A list of dictionaries which each contain the semantic type and confidence score """ att = Column(column_name, source_names[0]) # print(data) for value in data: att.add_value(value) att.semantic_type = "to_predict" att.prepare_data() return att.predict_type( searcher.search_types_data(INDEX_NAME, source_names), searcher.search_similar_text_data(INDEX_NAME, att.value_text, source_names), self.classifier) def _update_bulk_add_model(self, model, column_model): """ Updates the bulk add model in the db and also returns it. :param model: The current bulk add model :param column_model: The model of the columns which are being updated against :return: The updated bulk add model """ for n in model[BAC_GRAPH][BAC_NODES]: if n.get(BAC_COLUMN_NAME): if n[BAC_COLUMN_NAME] == BAC_COLUMN_NAME_FILE_NAME: continue column_id = get_column_id( get_type_id( n[BAC_USER_SEMANTIC_TYPES][0][BAC_CLASS][BAC_URI], n[BAC_USER_SEMANTIC_TYPES][0][BAC_PROPERTY][BAC_URI]), n[BAC_COLUMN_NAME], model[BAC_NAME], column_model) prediction = self._predict_column( n[BAC_COLUMN_NAME], [model[BAC_NAME]], self.db.find_one({ DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id })[DATA]) n[BAC_LEARNED_SEMANTIC_TYPES] = [] for t in prediction: type_info = decode_type_id(t[SL_SEMANTIC_TYPE]) od = collections.OrderedDict() od[BAC_CLASS] = {BAC_URI: type_info[0]} od[BAC_PROPERTY] = {BAC_URI: type_info[1]} od[BAC_CONFIDENCE_SCORE] = t[SL_CONFIDENCE_SCORE] n[BAC_LEARNED_SEMANTIC_TYPES].append(od) self.db.update_one({ DATA_TYPE: DATA_TYPE_MODEL, ID: model[BAC_ID] }, {"$set": { BULK_ADD_MODEL_DATA: model }}) return model ################ Predict ################ def predict_post(self, data, namespaces=None, column_names=None, source_names=None, models=None): """ Predicts the semantic type of the given data. :param namespaces: List of allowed namespaces :param column_names: List of allowed column names :param source_names: List of allowed source names :param models: List of allowed column models :param data: List of the data values to predict. :return: A return message (if it is successful this will be a list of the predicted types) and a return code """ data = [x.strip() for x in data] data = [x for x in data if x] if not data: return "Predicting data cannot be empty", 500 if source_names is None: # If no source names are given just use all of the source names in the db source_names = set() for col in self.db.find({DATA_TYPE: DATA_TYPE_COLUMN}): source_names.add(col[SOURCE_NAME]) source_names = list(source_names) if len(source_names) < 1: return "You must have columns to be able to predict", 400 #### Predict the types ## Do the actual predicting using the semantic labeler predictions = self._predict_column(column_names[0], source_names, data) if len(predictions) < 1: return "No matches found", 404 ## Filter the results allowed_ids_namespaces = None allowed_ids_models = None all_allowed_ids = None if namespaces is not None: allowed_ids_namespaces = set() current_allowed_types = list( self.db.find({ DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE, NAMESPACE: { "$in": namespaces } })) for prediction in current_allowed_types: allowed_ids_namespaces.add(prediction[ID]) if models: allowed_ids_models = set() current_allowed_types = list( self.db.find({ DATA_TYPE: DATA_TYPE_COLUMN, MODEL: { "$in": models } })) for c in current_allowed_types: allowed_ids_models.add(c[TYPE_ID]) if allowed_ids_namespaces is not None and allowed_ids_models is not None: all_allowed_ids = allowed_ids_namespaces & allowed_ids_models elif allowed_ids_namespaces is not None and allowed_ids_models is None: all_allowed_ids = allowed_ids_namespaces elif allowed_ids_namespaces is None and allowed_ids_models is not None: all_allowed_ids = allowed_ids_models return_body = [] for prediction in predictions: print(prediction) for type_id, exact_score in prediction[1]: if all_allowed_ids is not None: if prediction[SL_SEMANTIC_TYPE] not in all_allowed_ids: continue obj_dict = {TYPE_ID_PATH: type_id, SCORE: exact_score} type_class_property = decode_type_id(type_id) obj_dict[CLASS] = type_class_property[0] obj_dict[PROPERTY] = type_class_property[1] return_body.append(obj_dict) return_body.sort(key=lambda x: x[SCORE], reverse=True) return json_response(return_body, 200) ################ SemanticTypes ################ def semantic_types_get(self, class_=None, property_=None, namespaces=None, source_names=None, column_names=None, column_ids=None, models=None, return_columns=False, return_column_data=False): """ Returns all of the semantic types (and optionally their columns and columns' data) filtered by the given parameters. :param class_: The class of the semantic types to get :param property_: The property of the semantic types to get :param namespaces: The possible namespaces of the semantic types to get :param source_names: The possible source names of at least one column of a semantic type must have :param column_names: The possible column names of at least one column of a semantic type must have :param column_ids: The possible column ids of at least one column of a semantic type must have :param models: The possible column model of at least one column of a semantic type must have :param return_columns: True if all of the columns (but not the data in the columns) should be returned with the semantic types :param return_column_data: True if all of the columns and their data should be returned with the semantic types :return: All of the semantic types which fit the following parameters """ # Find all of the type ids that satisfy the class, property, and namespaces db_body = {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE} if class_ is not None: db_body[CLASS] = class_ if property_ is not None: db_body[PROPERTY] = property_ if namespaces is not None: db_body[NAMESPACE] = {"$in": namespaces} possible_result = list(self.db.find(db_body)) possible_type_ids = set() for t in possible_result: possible_type_ids.add(t[ID]) # Find all of the type ids from the columns which satisfy the other parameters if source_names or column_names or column_ids or models: db_body = {DATA_TYPE: DATA_TYPE_COLUMN} if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names} if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names} if column_ids is not None: db_body[ID] = {"$in": column_ids} if models is not None: db_body[MODEL] = {"$in": models} other_possible_ids = set() for col in self.db.find(db_body): other_possible_ids.add(col[TYPE_ID]) possible_type_ids = possible_type_ids & other_possible_ids # Construct the return body return_body = [] for t in possible_result: if t[ID] in possible_type_ids: o = collections.OrderedDict() o[TYPE_ID_PATH] = t[ID] o[CLASS] = t[CLASS] o[PROPERTY] = t[PROPERTY] o[NAMESPACE] = t[NAMESPACE] return_body.append(o) # Add the column data if requested if return_columns: db_body = {DATA_TYPE: DATA_TYPE_COLUMN} for type_ in return_body: db_body[TYPE_ID] = type_[TYPE_ID_PATH] type_[COLUMNS] = clean_columns_output(self.db.find(db_body), return_column_data) if len(return_body) < 1: return "No Semantic types matching the given parameters were found", 404 return json_response(return_body, 200) def semantic_types_post_put(self, class_, property_, force=False): """ Creates a semantic type and returns the id if it was successful. Notes: If the type already exists and force is not set to true a 409 will be returned and no data will be modified :param class_: The class of the semantic type, note that this must be a valid URL :param property_: The property of the semantic type :param force: Force create the semantic type, if this is true and the type already exists the existing type (and all of its columns and data) will be deleted before creation :return: The id of the new semantic type and a response code of 201 if the creation was successful, otherwise it will be an error message with the appropriate error code """ class_ = class_.rstrip("/") property_ = property_.rstrip("/") ## Verify that class is a valid uri and namespace is a valid uri namespace = "/".join(class_.replace("#", "/").split("/")[:-1]) ## Actually add the type type_id = get_type_id(class_, property_) db_body = { ID: type_id, DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE, CLASS: class_, PROPERTY: property_, NAMESPACE: namespace } if self.db.find_one(db_body): if force: self.db.delete_many({ DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id }) self.db.delete_many(db_body) else: return type_id, 409 self.db.insert_one(db_body) return type_id, 201 def semantic_types_delete(self, class_=None, property_=None, type_ids=None, namespaces=None, source_names=None, column_names=None, column_ids=None, models=None, delete_all=False): """ Deletes all of the semantic types (and all of their columns/data) that fit the given parameters. :param class_: The class of the semantic types to delete :param property_: The property of the semantic types to delete :param type_ids: The possible ids of the semantic types to delete :param namespaces: The possible namespaces of the semantic types to delete :param source_names: The possible source names of at least one column of a semantic type must have :param column_names: The possible column names of at least one column of a semantic type must have :param column_ids: The possible column ids of at least one column of a semantic type must have :param models: The possible column model of at least one column of a semantic type must have :param delete_all: Set this to true if all semantic types should be deleted :return: The amount of semantic types deleted and a 200 if it worked, otherwise and error message with the appropriate code """ if class_ is None and property_ is None and type_ids is None and namespaces is None and source_names is None and column_names is None and column_ids is None and models is None and not delete_all: return "To delete all semantic types give deleteAll as true", 400 return "All " + str( self.db.delete_many({ DATA_TYPE: { "$in": [DATA_TYPE_SEMANTIC_TYPE, DATA_TYPE_COLUMN] } }).deleted_count ) + " semantic types and their data were deleted", 200 print str(class_) + " " + str(property_) + " " + str( type_ids) + " " + str(namespaces) + " " + str( source_names) + " " + str(column_names) + " " + str( column_ids) + " " + str(models) + " " + str(delete_all) # Find the parent semantic types and everything below them of everything which meets column requirements type_ids_to_delete = [] db_body = {DATA_TYPE: DATA_TYPE_COLUMN} db_body_id = {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE} if type_ids is not None: db_body[TYPE_ID] = {"$in": type_ids} db_body_id[ID] = {"$in": type_ids} if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names} if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names} if column_ids is not None: db_body[COLUMN_ID_PATH] = {"$in": column_ids} if models is not None: db_body[MODEL] = {"$in": models} for col in self.db.find(db_body): print "col[TYPE_ID] = " + str(col[TYPE_ID]) if col[TYPE_ID] not in type_ids_to_delete: type_ids_to_delete.append(col[TYPE_ID]) for col in self.db.find(db_body_id): print "col[ID] = " + str(col[ID]) if col[ID] not in type_ids_to_delete: type_ids_to_delete.append(col[ID]) # Find the semantic types which meet the other requirements and delete all types which need to be possible_types = [] db_body = {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE} if class_ is not None: db_body[CLASS] = class_ if property_ is not None: db_body[PROPERTY] = property_ if namespaces is not None: db_body[NAMESPACE] = {"$in": namespaces} if type_ids is None and source_names is None and column_names is None and column_ids is None and models is None: deleted = self.db.delete_many(db_body).deleted_count else: for t in self.db.find(db_body): if t[ID] not in possible_types: possible_types.append(t[ID]) for t in self.db.find(db_body_id): if t[ID] not in possible_types: possible_types.append(t[ID]) for id_ in type_ids_to_delete: if id_ not in possible_types: type_ids_to_delete.remove(id_) db_body = { DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: { "$in": type_ids_to_delete } } self.db.delete_many(db_body) deleted = self.db.delete_many({ DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE, ID: { "$in": type_ids_to_delete } }).deleted_count if deleted < 1: return "No semantic types with the given parameters were found", 404 return str( deleted ) + " semantic types matched parameters and were deleted", 200 ################ SemanticTypesColumns ################ def semantic_types_columns_get(self, type_id, column_ids=None, column_names=None, source_names=None, models=None, return_column_data=False): """ Returns all of the columns in a semantic type that fit the given parameters. :param type_id: The id of the semantic type :param column_ids: The possible ids of the columns to be returned :param column_names: The possible names of the columns to be returned :param source_names: The possible source names of the columns to be returned :param models: The possible models of the columns to be returned :param return_column_data: True if all of the data in the column should be returned with the columns :return: All of the columns in the semantic type that fit the given parameters """ print(type_id) db_body = {DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id} if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names} if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names} if column_ids is not None: db_body[ID] = {"$in": column_ids} if models is not None: db_body[MODEL] = {"$in": models} result = list(self.db.find(db_body)) if len(result) < 1: return "No columns matching the given parameters were found", 404 return json_response(clean_columns_output(result, return_column_data), 200) def semantic_types_columns_post_put(self, type_id, column_name, source_name, model, data=[], force=False): """ Create a column in a semantic type, optionally with data. :param type_id: Id of the semantic type to create the column in :param column_name: The name of the column to be created :param source_name: The name of the source of the column to be created :param model: The model of the column to be created :param data: The (optional) list of data to put into the column on creation :param force: True if the column should be replaced if it already exists :return: The id of the newly created with a 201 if it was successful, otherwise an error message with the appropriate error code """ column = Column(column_name, source_name) column.semantic_type = type_id #if the size of the training data is MORE than a threshold value, then sample the threshold values randomly if (len(data) > SAMPLE_SIZE): data = random.sample(data, SAMPLE_SIZE) for value in data: column.add_value(value) result = self._create_column(column, type_id, column_name, source_name, model, force) return result def semantic_types_columns_delete(self, type_id, column_ids=None, column_names=None, source_names=None, models=None): """ Delete all of the columns in a semantic type that match the given parameters. :param type_id: The id of the semantic type to delete the columns from :param column_ids: The possible ids of the columns to delete :param source_names: The possible names of the columns to delete :param column_names: The possible source names of the columns to delete :param models: The possible models of the columns to delete :return: The number of columns deteled with a 200 if successful, otherwise an error message with an appropriate error code """ db_body = {DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id} if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names} if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names} if column_ids is not None: db_body[ID] = {"$in": column_ids} if models is not None: db_body[MODEL] = {"$in": models} found_columns = list(self.db.find(db_body)) if len(found_columns) < 1: return "No columns were found with the given parameters", 404 return str(self.db.delete_many( db_body).deleted_count) + " columns deleted successfully", 200 ################ SemanticTypesColumnData ################ def semantic_types_column_data_get(self, column_id): """ Returns all of the data in the column :param column_id: Id of the column to get the data from :return: The column and all of its info """ result = list( self.db.find({ DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id })) if len(result) < 1: return "No column with that id was found", 404 if len(result) > 1: return "More than one column was found with that id", 500 return json_response(clean_column_output(result[0]), 200) def semantic_types_column_data_post_put(self, column_id, body, force=False): """ Add or replace data on an existing column Notes: If the column does not exist a 404 will be returned :param column_id: Id of the column to add/replace the data of :param body: An array of the new data :param force: True if the current data in the column should be replaced, false if the new data should just be appended :return: A conformation with a 201 if it was added successfully or an error message with an appropriate error code if it was not successful """ column_data = self.db.find_one({ DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id }) if column_data.matched_count < 1: return "No column with that id was found", 404 if column_data.matched_count > 1: return "More than one column was found with that id", 500 column = Column(column_data[COLUMN_NAME], column_data[SOURCE_NAME], get_type_from_column_id(column_id)) if not force: column.read_json_to_column(column_data) for value in body: column.add_value(value) data = column.to_json() self.db.update_many(data) return "Column data updated", 201 def semantic_types_column_data_delete(self, column_id): """ Delete the data from the column with the given id :param column_id: Id of the column to delete the data from :return: A deletion conformation with a 200 if successful, otherwise an error message with an appropriate error code """ result = self.db.update_many( { DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id }, {"$set": { DATA: [] }}) if result.matched_count < 1: return "No column with that id was found", 404 if result.matched_count > 1: return "More than one column was found with that id", 500 column = self.db.find_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id}) self.db.delete_one({ DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: get_type_from_column_id(column_id) }) self.db.delete_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id}) return "Column data deleted", 200 ################ BulkAddModels ################ def bulk_add_models_get(self, model_ids=None, model_names=None, model_desc=None, show_all=False, crunch_data=True): """ Returns the current state of all of the bulk add models. :param model_ids: The possible ids of the models to get :param model_names: The possible names of the models to get :param model_desc: The possible descriptions of the models to get :param show_all: True if the whole model should be returned :param crunch_data: False if learnedSemanticTypes should not be generated and the version in the db should be used instead, note that the data in the db is updated every time a get is run with crunch_data=true :return: All of the models that fit the given parameters """ db_body = {DATA_TYPE: DATA_TYPE_MODEL} if model_ids is not None: db_body[ID] = {"$in": model_ids} if model_names is not None: db_body[NAME] = {"$in": model_names} if model_desc is not None: db_body[MODEL_DESC] = model_desc db_result = list(self.db.find(db_body)) if len(db_result) < 1: return "No models were found with the given parameters", 404 # Construct the return body return_body = [] for mod in db_result: o = collections.OrderedDict() o[MODEL_ID] = mod[ID] o[NAME] = mod[NAME] o[DESC] = mod[DESC] if show_all: o[MODEL] = self._update_bulk_add_model( mod[BULK_ADD_MODEL_DATA], mod[MODEL]) if crunch_data else mod[BULK_ADD_MODEL_DATA] return_body.append(o) return json_response(return_body, 200) def bulk_add_models_post(self, model, column_model=DEFAULT_BULK_MODEL): """ Add a bulk add model. :param column_model: The model that all of the created columns should have :param model: A dictionary of the model :return: Stats of the data added """ #### Assert the required elements exist if BAC_ID not in model: return "The given model must have an id", 400 if BAC_NAME not in model: return "The given model must have a name", 400 if BAC_DESC not in model: return "The given model must have a description", 400 if BAC_GRAPH not in model: return "The given model must have a graph", 400 if BAC_NODES not in model[BAC_GRAPH]: return "The given model must have nodes within the graph", 400 if len(list(self.db.find({ID: model[BAC_ID]}))) > 0: return "Model id already exists", 409 #### Parse and add the model # Try to add of the given semantic types and columns new_type_count = 0 new_column_count = 0 existed_type_count = 0 existed_column_count = 0 for n in model[BAC_GRAPH][BAC_NODES]: if n.get(BAC_USER_SEMANTIC_TYPES): for ust in n[BAC_USER_SEMANTIC_TYPES]: semantic_status = self.semantic_types_post_put( ust[BAC_CLASS][BAC_URI], ust[BAC_PROPERTY][BAC_URI], False) if semantic_status[1] == 201: new_type_count += 1 elif semantic_status[1] == 409: existed_type_count += 1 elif semantic_status[1] == 400: return semantic_status else: return "Error occurred while adding semantic type: " + str( ust), 500 column_status = self._create_column( get_type_id(ust[BAC_CLASS][BAC_URI], ust[BAC_PROPERTY][BAC_URI]), n[BAC_COLUMN_NAME], model[BAC_NAME], column_model) if column_status[1] == 201: new_column_count += 1 elif column_status[1] == 409: existed_column_count += 1 elif column_status[1] == 400: return column_status else: return "Error occurred while adding column for semantic type: " + str( ust), 500 # Nothing bad happened when creating the semantic types and columns, so add the model to the DB self.db.insert_one({ DATA_TYPE: DATA_TYPE_MODEL, ID: model["id"], NAME: model[BAC_NAME], DESC: model["description"], MODEL: column_model, BULK_ADD_MODEL_DATA: model }) return "Model and columns added, " + str(new_type_count) + " semantic types created, " + \ str(existed_type_count) + " semantic types already existed, " + \ str(new_column_count) + " columns created, and " + \ str(existed_column_count) + " columns already existed.", 201 def bulk_add_models_delete(self, model_ids=None, model_names=None, model_desc=None): """ Delete all of the bulk add models which fit the given parameters :param model_ids: The possible ids of the models to delete :param model_names: The possible names of the models to delete :param model_desc: The possible descriptions of the models to delete :return: The amount of models deleted with a 200 if successful, otherwise an error message with the appropriate code """ db_body = {DATA_TYPE: DATA_TYPE_MODEL} if model_ids is not None: db_body[ID] = {"$in": model_ids} if model_names is not None: db_body[NAME] = {"$in": model_names} if model_desc is not None: db_body[MODEL_DESC] = model_desc deleted_count = self.db.delete_many(db_body).deleted_count if deleted_count < 1: return "No models were found with the given parameters", 404 return str(deleted_count) + " models deleted successfully", 200 ################ BulkAddModelData ################ def bulk_add_model_data_get(self, model_id, crunch_data): """ Returns the current state of the bulk add model :param model_id: The id of the model to get :param crunch_data: False if learnedSemanticTypes should not be generated and the version in the db should be used instead, note that the data in the db is updated every time a get is run with crunch_data=true :return: The current state of the bulk add model """ db_result = list( self.db.find({ DATA_TYPE: DATA_TYPE_MODEL, ID: model_id })) if len(db_result) < 1: return "A model was not found with the given id", 404 if len(db_result) > 1: return "More than one model was found with the given id", 500 db_result = db_result[0] return json_response( self._update_bulk_add_model(db_result[BULK_ADD_MODEL_DATA], db_result[MODEL]) if crunch_data else db_result[BULK_ADD_MODEL_DATA], 200) def bulk_add_model_data_post(self, model_id, column_model, data): """ Add data to the service with a bulk add model :param model_id: The id of the model to add off of :param column_model: The model of the columns being used with that model :param data: The list of dictionaries with all of the data to add :return: A conformation message with a 201 if it was successful, otherwise an error message with the appropriate code """ # Get the model and parse the json lines model = list(self.db.find({DATA_TYPE: DATA_TYPE_MODEL, ID: model_id})) if len(model) < 1: return "The given model was not found", 404 if len(model) > 1: return "More than one model was found with the id", 500 model = model[0][BULK_ADD_MODEL_DATA] # Get all of the data in each column for n in model[BAC_GRAPH][BAC_NODES]: column_data = [] for line in data: if n.get(BAC_COLUMN_NAME): column_data.append(line[n[BAC_COLUMN_NAME]]) # Add it to the db if n.get(BAC_USER_SEMANTIC_TYPES): for ust in n[BAC_USER_SEMANTIC_TYPES]: result = self.semantic_types_column_data_post_put( get_column_id( get_type_id(ust[BAC_CLASS][BAC_URI], ust[BAC_PROPERTY][BAC_URI]), n[BAC_COLUMN_NAME], model[BAC_NAME], column_model), column_data, False)[1] if result == 201: continue elif result == 404: return "A required column was not found", 404 else: return "Error occurred while adding data to the column", 500 return "Data successfully added to columns", 201
def post(self, file_id): new_text = request.get_json()['data'] files_collection.update_one( {'_id': ObjectId(file_id)}, {'$set': {'file_text': new_text}}, upsert=True ) return '', 204 api.add_resource(FileServer, '/<string:file_id>') if __name__ == '__main__': if len(sys.argv) == 3: if os.environ.get('WERKZEUG_RUN_MAIN') == 'true': print('Initing node') requests.post( server_util.url_builder(DS_ADDR[0], DS_ADDR[1], 'config'), json={'ip': sys.argv[1], 'port': sys.argv[2]} ) app.run(debug=True, host=sys.argv[1], port=int(sys.argv[2])) requests.delete( server_util.url_builder(DS_ADDR[0], DS_ADDR[1], 'config'), json={'ip': sys.argv[1], 'port': sys.argv[2]} ) # Catastrophic delete EVERYTHING if node goes down! (Purposeful) files_collection.delete_many({}) else: print('Supply an IP and Port')
from scrapy.spiders import Spider from scrapy.selector import Selector from basic_crawler.items import BasicCrawlerItem from scrapy.http import Request import re from pymongo import MongoClient global db db = MongoClient().db.links if not db: print('Connected to DB') res = db.delete_many({}) if res.acknowledged: print('Clean successfull') else: print('Clean unsuccessfull') global visited_links visited_links = [] class MySpider(Spider): name = "basic_crawler" allowed_domains = ['math.hmc.edu'] start_urls = ["https://www.math.hmc.edu/funfacts/"] def parse(self, response): global db global visited_links hxs = Selector(response) url = response.url
class TestDeviceDatabaseMongoDB(unittest.TestCase): def setUp(self): self._database = tests_util.get_mongo_database() self._direct_database = MongoClient()["Hestia"]["testing"] def tearDown(self): self._direct_database.delete_many({}) def test_get_all_devices(self): device_data = self._get_device_data() device_data["_id"] = str(ObjectId()) self._direct_database.insert_one(device_data) retrieved_devices = self._database.get_all_devices() self.assertEqual(1, len(retrieved_devices)) device_data["_id"] = str(ObjectId()) self._direct_database.insert_one(device_data) retrieved_devices = self._database.get_all_devices() self.assertEqual(2, len(retrieved_devices)) def test_get_device(self): device_data = self._get_device_data() device_data["_id"] = str(ObjectId()) self._direct_database.insert_one(device_data) device = self._database.get_device(device_data["_id"]) self.assertEqual(device_data["name"], device.name) self.assertIsInstance(device, Device) def test_add_device(self): device_data = self._get_device_data() initial_count = self._direct_database.count() self._database.add_device(device_data) self.assertEqual(initial_count + 1, self._direct_database.count()) def test_delete_device(self): device_data = self._get_device_data() device_data["_id"] = str(ObjectId()) self._direct_database.insert_one(device_data) initial_count = self._direct_database.count() self._database.delete_device(device_data["_id"]) self.assertEqual(initial_count - 1, self._direct_database.count()) def test_update_field(self): device_data = self._get_device_data() device_data["_id"] = str(ObjectId()) self._direct_database.insert_one(device_data) new_name = "Hestia" self._database.update_field(device_data["_id"], "name", new_name) device = self._direct_database.find_one({"_id": device_data["_id"]}) self.assertEqual(device["name"], new_name) def test_get_field(self): device_data = self._get_device_data() device_data["_id"] = str(ObjectId()) self._direct_database.insert_one(device_data) name = self._database.get_field(device_data["_id"], "name") self.assertEqual(device_data["name"], name) def test_get_activator_field(self): device_data = self._get_device_data() device_data["_id"] = str(ObjectId()) self._direct_database.insert_one(device_data) activators = self._direct_database.find_one( {"_id": device_data["_id"]})["activators"] act_id = list(activators.keys())[0] activator_name = self._database.get_activator_field( device_data["_id"], act_id, "name") real_name = device_data["activators"][act_id]["name"] self.assertEqual(real_name, activator_name) def test_update_activator_field(self): device_data = self._get_device_data() device_data["_id"] = str(ObjectId()) self._direct_database.insert_one(device_data) activators = self._direct_database.find_one( {"_id": device_data["_id"]})["activators"] act_id = list(activators.keys())[0] new_name = "new_name" self._database.update_activator_field(device_data["_id"], act_id, "name", new_name) device_in_db = self._direct_database.find_one( {"_id": device_data["_id"]}) activator_name_in_db = device_in_db["activators"][act_id]["name"] self.assertEqual(activator_name_in_db, new_name) def _get_device_data(self): device_data = { "module": "plugins.mock.devices.lock.Lock", "class": "Lock", "type": "Lock", "name": "TestDevice", "options": { "bridge_ip": "127.0.2.1", "bridge_port": 90 }, "activators": [{ "module": "plugins.mock.activators.ActivateLock", "rank": 0, "class": "ActivateLock", "name": "Activate", "type": "bool", "state": True }] } activators = device_data.pop("activators", None) device_data["activators"] = {} for activator in activators: _id = str(ObjectId()) device_data["activators"][_id] = activator return device_data
class DataBase: def __init__(self, name: str, auth_data: dict): db_name = auth_data['questionnaire'][name] user = auth_data['user'] password = auth_data['password'] appeal = f'mongodb+srv://{user}:{password}@cluster0.sonqc.mongodb.net/{db_name}?retryWrites=true&w=majority' self.db = MongoClient(appeal)['questionnaire'][db_name] self.__lambda_fun() def __lambda_fun(self): self._get_last_id = lambda: len(list(self.db.find())) - 1 self.get_list_data = lambda: list(self.db.find()) self.get_questions_ids = lambda: [el['_id'] for el in self.get_list_data()] self.remove_all_data = lambda: self.db.delete_many({}) self.remove_questions = lambda *ids: [self.db.delete_one({'_id': id_}) for id_ in ids] self._remove_arg = lambda question_id, arg: self.db.update_one({ '_id': question_id }, { '$pull': {'answers': arg} }) def add(self, data: str, question_id: int = None): if question_id is None: last_id = self._get_last_id() post = {'_id': last_id + 1, 'question': data, 'answers': []} self.db.insert_one(post) return self.db.update_one({'_id': question_id}, {'$push': {'answers': data}}) def show_all(self, file_name: str): rows = self.get_list_data() columns = [*rows[0].keys()] if rows else [] with open(f'{file_name}.csv', 'w', newline='') as file: writer = csv.DictWriter(file, delimiter=';', fieldnames=columns) writer.writeheader() writer.writerows(rows) def show_ans(self, question_id: int): file_name = str(question_id) columns = ['answer_id', 'answer'] rows = enumerate(self.db.find_one({'_id': question_id})['answers']) with open(f'{file_name}.csv', 'w', newline='') as file: writer = csv.writer(file, delimiter=';') writer.writerow(columns) writer.writerows(rows) def remove_answers(self, question_id: int, *ids: int): questions = self.get_list_data() for question in questions: if question['_id'] == question_id: answers = question['answers'] break else: return for id_ in ids: answer = answers[id_] self._remove_arg(question_id, answer)
charset='utf8') cursor = cnx.cursor(dictionary=True) # shop_ids = [12988, 12382, 11077, 12823, 10377, 15081, 2397] shop_ids = [17065, 17066] sql = 'select s.*, o.tel ' \ 'from f_shop.shop s ' \ 'inner join f_shop.shop_owner o on s.shop_owner_id = o.id ' \ 'where s.id in (%s)' % ','.join(['%s'] * len(shop_ids)) print(sql) cursor.execute(sql, shop_ids) mc = MongoClient(host='123.56.117.75', port=27017)['profile']['shop'] # 清理旧数据 mc.delete_many({'deprecated.id': {'$in': shop_ids}}) new_shop_records = [{ 'tel': record['tel'], 'password': record['password'], 'name': record['shop_name'], 'avatar': record['avatar'], 'status': 'STATUS_INIT', 'create_time': record['create_time'], 'accounts': [], 'loc': { 'province': record['province'], 'province_code': record['province_code'], 'district': record['district'], 'city': record['city'], 'address': ifnull(record['address']), 'street_code': record['street_code'],
class Server(object): def __init__(self): self.db = MongoClient().data.service self.classifier = MyRandomForest({}, {}, DATA_MODEL_PATH) self.classifier.train([]) ################ Stuff for use in this file ################ def _create_column(self, column, type_id, column_name, source_name, model, force=False): """ Create a column in a semantic type and return the column's id if it was created successfully. Notes: If the column already exists and force is not set to true, a 409 will be returned and no data will be modified. :param type_id: Id of the semantic type this column belongs to :param column_name: Name of the column to be created :param source_name: Name of the source of the column to be created :param model: Model of the column to be created :param data: Data which will be added to the column on creation :param force: Force create the column, if this is true and the column exists the old column will be deleted (with all of its data) before creation :return: The id of the new column and a response code of 201 if the creation was successful, otherwise it will be an error message with the appropriate error code """ column_id = get_column_id(type_id, column_name, source_name, model) db_body = {ID: column_id, DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id, COLUMN_NAME: column_name, SOURCE_NAME: source_name, MODEL: model} if self.db.find_one(db_body): if force: self.db.delete_many(db_body) else: return "Column already exists", 409 db_body.update(column.to_json()) self.db.insert_one(db_body) return column_id, 201 def _predict_column(self, column_name, source_names, data): """ Predicts the semantic type of a column. :param column_name: Name of the column :param source_names: List of source names :param data: The data to predict based opon :return: A list of dictionaries which each contain the semantic type and confidence score """ att = Column(column_name, source_names[0]) # print(data) for value in data: att.add_value(value) att.semantic_type = "to_predict" att.prepare_data() return att.predict_type(searcher.search_types_data(INDEX_NAME, source_names), searcher.search_similar_text_data(INDEX_NAME, att.value_text, source_names), self.classifier) def _update_bulk_add_model(self, model, column_model): """ Updates the bulk add model in the db and also returns it. :param model: The current bulk add model :param column_model: The model of the columns which are being updated against :return: The updated bulk add model """ for n in model[BAC_GRAPH][BAC_NODES]: if n.get(BAC_COLUMN_NAME): if n[BAC_COLUMN_NAME] == BAC_COLUMN_NAME_FILE_NAME: continue column_id = get_column_id(get_type_id(n[BAC_USER_SEMANTIC_TYPES][0][BAC_CLASS][BAC_URI], n[BAC_USER_SEMANTIC_TYPES][0][BAC_PROPERTY][BAC_URI]), n[BAC_COLUMN_NAME], model[BAC_NAME], column_model) prediction = self._predict_column(n[BAC_COLUMN_NAME], [model[BAC_NAME]], self.db.find_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id})[DATA]) n[BAC_LEARNED_SEMANTIC_TYPES] = [] for t in prediction: type_info = decode_type_id(t[SL_SEMANTIC_TYPE]) od = collections.OrderedDict() od[BAC_CLASS] = {BAC_URI: type_info[0]} od[BAC_PROPERTY] = {BAC_URI: type_info[1]} od[BAC_CONFIDENCE_SCORE] = t[SL_CONFIDENCE_SCORE] n[BAC_LEARNED_SEMANTIC_TYPES].append(od) self.db.update_one({DATA_TYPE: DATA_TYPE_MODEL, ID: model[BAC_ID]}, {"$set": {BULK_ADD_MODEL_DATA: model}}) return model ################ Predict ################ def predict_post(self, data, namespaces=None, column_names=None, source_names=None, models=None): """ Predicts the semantic type of the given data. :param namespaces: List of allowed namespaces :param column_names: List of allowed column names :param source_names: List of allowed source names :param models: List of allowed column models :param data: List of the data values to predict. :return: A return message (if it is successful this will be a list of the predicted types) and a return code """ data = [x.strip() for x in data] data = [x for x in data if x] if not data: return "Predicting data cannot be empty", 500 if source_names is None: # If no source names are given just use all of the source names in the db source_names = set() for col in self.db.find({DATA_TYPE: DATA_TYPE_COLUMN}): source_names.add(col[SOURCE_NAME]) source_names = list(source_names) if len(source_names) < 1: return "You must have columns to be able to predict", 400 #### Predict the types ## Do the actual predicting using the semantic labeler predictions = self._predict_column(column_names[0], source_names, data) if len(predictions) < 1: return "No matches found", 404 ## Filter the results allowed_ids_namespaces = None allowed_ids_models = None all_allowed_ids = None if namespaces is not None: allowed_ids_namespaces = set() current_allowed_types = list( self.db.find({DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE, NAMESPACE: {"$in": namespaces}})) for prediction in current_allowed_types: allowed_ids_namespaces.add(prediction[ID]) if models: allowed_ids_models = set() current_allowed_types = list(self.db.find({DATA_TYPE: DATA_TYPE_COLUMN, MODEL: {"$in": models}})) for c in current_allowed_types: allowed_ids_models.add(c[TYPE_ID]) if allowed_ids_namespaces is not None and allowed_ids_models is not None: all_allowed_ids = allowed_ids_namespaces & allowed_ids_models elif allowed_ids_namespaces is not None and allowed_ids_models is None: all_allowed_ids = allowed_ids_namespaces elif allowed_ids_namespaces is None and allowed_ids_models is not None: all_allowed_ids = allowed_ids_models return_body = [] for prediction in predictions: print(prediction) for type_id, exact_score in prediction[1]: if all_allowed_ids is not None: if prediction[SL_SEMANTIC_TYPE] not in all_allowed_ids: continue obj_dict = {TYPE_ID_PATH: type_id, SCORE: exact_score} type_class_property = decode_type_id(type_id) obj_dict[CLASS] = type_class_property[0] obj_dict[PROPERTY] = type_class_property[1] return_body.append(obj_dict) return_body.sort(key=lambda x: x[SCORE], reverse=True) return json_response(return_body, 200) ################ SemanticTypes ################ def semantic_types_get(self, class_=None, property_=None, namespaces=None, source_names=None, column_names=None, column_ids=None, models=None, return_columns=False, return_column_data=False): """ Returns all of the semantic types (and optionally their columns and columns' data) filtered by the given parameters. :param class_: The class of the semantic types to get :param property_: The property of the semantic types to get :param namespaces: The possible namespaces of the semantic types to get :param source_names: The possible source names of at least one column of a semantic type must have :param column_names: The possible column names of at least one column of a semantic type must have :param column_ids: The possible column ids of at least one column of a semantic type must have :param models: The possible column model of at least one column of a semantic type must have :param return_columns: True if all of the columns (but not the data in the columns) should be returned with the semantic types :param return_column_data: True if all of the columns and their data should be returned with the semantic types :return: All of the semantic types which fit the following parameters """ # Find all of the type ids that satisfy the class, property, and namespaces db_body = {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE} if class_ is not None: db_body[CLASS] = class_ if property_ is not None: db_body[PROPERTY] = property_ if namespaces is not None: db_body[NAMESPACE] = {"$in": namespaces} possible_result = list(self.db.find(db_body)) possible_type_ids = set() for t in possible_result: possible_type_ids.add(t[ID]) # Find all of the type ids from the columns which satisfy the other parameters if source_names or column_names or column_ids or models: db_body = {DATA_TYPE: DATA_TYPE_COLUMN} if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names} if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names} if column_ids is not None: db_body[ID] = {"$in": column_ids} if models is not None: db_body[MODEL] = {"$in": models} other_possible_ids = set() for col in self.db.find(db_body): other_possible_ids.add(col[TYPE_ID]) possible_type_ids = possible_type_ids & other_possible_ids # Construct the return body return_body = [] for t in possible_result: if t[ID] in possible_type_ids: o = collections.OrderedDict() o[TYPE_ID_PATH] = t[ID] o[CLASS] = t[CLASS] o[PROPERTY] = t[PROPERTY] o[NAMESPACE] = t[NAMESPACE] return_body.append(o) # Add the column data if requested if return_columns: db_body = {DATA_TYPE: DATA_TYPE_COLUMN} for type_ in return_body: db_body[TYPE_ID] = type_[TYPE_ID_PATH] type_[COLUMNS] = clean_columns_output(self.db.find(db_body), return_column_data) if len(return_body) < 1: return "No Semantic types matching the given parameters were found", 404 return json_response(return_body, 200) def semantic_types_post_put(self, class_, property_, force=False): """ Creates a semantic type and returns the id if it was successful. Notes: If the type already exists and force is not set to true a 409 will be returned and no data will be modified :param class_: The class of the semantic type, note that this must be a valid URL :param property_: The property of the semantic type :param force: Force create the semantic type, if this is true and the type already exists the existing type (and all of its columns and data) will be deleted before creation :return: The id of the new semantic type and a response code of 201 if the creation was successful, otherwise it will be an error message with the appropriate error code """ class_ = class_.rstrip("/") property_ = property_.rstrip("/") ## Verify that class is a valid uri and namespace is a valid uri namespace = "/".join(class_.replace("#", "/").split("/")[:-1]) ## Actually add the type type_id = get_type_id(class_, property_) db_body = {ID: type_id, DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE, CLASS: class_, PROPERTY: property_, NAMESPACE: namespace} if self.db.find_one(db_body): if force: self.db.delete_many({DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id}) self.db.delete_many(db_body) else: return type_id, 409 self.db.insert_one(db_body) return type_id, 201 def semantic_types_delete(self, class_=None, property_=None, type_ids=None, namespaces=None, source_names=None, column_names=None, column_ids=None, models=None, delete_all=False): """ Deletes all of the semantic types (and all of their columns/data) that fit the given parameters. :param class_: The class of the semantic types to delete :param property_: The property of the semantic types to delete :param type_ids: The possible ids of the semantic types to delete :param namespaces: The possible namespaces of the semantic types to delete :param source_names: The possible source names of at least one column of a semantic type must have :param column_names: The possible column names of at least one column of a semantic type must have :param column_ids: The possible column ids of at least one column of a semantic type must have :param models: The possible column model of at least one column of a semantic type must have :param delete_all: Set this to true if all semantic types should be deleted :return: The amount of semantic types deleted and a 200 if it worked, otherwise and error message with the appropriate code """ if class_ is None and property_ is None and type_ids is None and namespaces is None and source_names is None and column_names is None and column_ids is None and models is None and not delete_all: return "To delete all semantic types give deleteAll as true", 400 return "All " + str(self.db.delete_many({DATA_TYPE: {"$in": [DATA_TYPE_SEMANTIC_TYPE, DATA_TYPE_COLUMN]}}).deleted_count) + " semantic types and their data were deleted", 200 print str(class_)+" "+str(property_)+" "+str(type_ids)+" "+str(namespaces)+" "+str(source_names)+" "+str(column_names)+" "+str(column_ids)+" "+str(models)+" "+str(delete_all) # Find the parent semantic types and everything below them of everything which meets column requirements type_ids_to_delete = [] db_body = {DATA_TYPE: DATA_TYPE_COLUMN} db_body_id = {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE} if type_ids is not None: db_body[TYPE_ID] = {"$in": type_ids} db_body_id[ID] = {"$in": type_ids} if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names} if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names} if column_ids is not None: db_body[COLUMN_ID_PATH] = {"$in": column_ids} if models is not None: db_body[MODEL] = {"$in": models} for col in self.db.find(db_body): print "col[TYPE_ID] = "+str(col[TYPE_ID]) if col[TYPE_ID] not in type_ids_to_delete: type_ids_to_delete.append(col[TYPE_ID]) for col in self.db.find(db_body_id): print "col[ID] = "+str(col[ID]) if col[ID] not in type_ids_to_delete: type_ids_to_delete.append(col[ID]) # Find the semantic types which meet the other requirements and delete all types which need to be possible_types = [] db_body = {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE} if class_ is not None: db_body[CLASS] = class_ if property_ is not None: db_body[PROPERTY] = property_ if namespaces is not None: db_body[NAMESPACE] = {"$in": namespaces} if type_ids is None and source_names is None and column_names is None and column_ids is None and models is None: deleted = self.db.delete_many(db_body).deleted_count else: for t in self.db.find(db_body): if t[ID] not in possible_types: possible_types.append(t[ID]) for t in self.db.find(db_body_id): if t[ID] not in possible_types: possible_types.append(t[ID]) for id_ in type_ids_to_delete: if id_ not in possible_types: type_ids_to_delete.remove(id_) db_body = {DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: {"$in": type_ids_to_delete}} self.db.delete_many(db_body) deleted = self.db.delete_many( {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE, ID: {"$in": type_ids_to_delete}}).deleted_count if deleted < 1: return "No semantic types with the given parameters were found", 404 return str(deleted) + " semantic types matched parameters and were deleted", 200 ################ SemanticTypesColumns ################ def semantic_types_columns_get(self, type_id, column_ids=None, column_names=None, source_names=None, models=None, return_column_data=False): """ Returns all of the columns in a semantic type that fit the given parameters. :param type_id: The id of the semantic type :param column_ids: The possible ids of the columns to be returned :param column_names: The possible names of the columns to be returned :param source_names: The possible source names of the columns to be returned :param models: The possible models of the columns to be returned :param return_column_data: True if all of the data in the column should be returned with the columns :return: All of the columns in the semantic type that fit the given parameters """ print(type_id) db_body = {DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id} if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names} if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names} if column_ids is not None: db_body[ID] = {"$in": column_ids} if models is not None: db_body[MODEL] = {"$in": models} result = list(self.db.find(db_body)) if len(result) < 1: return "No columns matching the given parameters were found", 404 return json_response(clean_columns_output(result, return_column_data), 200) def semantic_types_columns_post_put(self, type_id, column_name, source_name, model, data=[], force=False): """ Create a column in a semantic type, optionally with data. :param type_id: Id of the semantic type to create the column in :param column_name: The name of the column to be created :param source_name: The name of the source of the column to be created :param model: The model of the column to be created :param data: The (optional) list of data to put into the column on creation :param force: True if the column should be replaced if it already exists :return: The id of the newly created with a 201 if it was successful, otherwise an error message with the appropriate error code """ column = Column(column_name, source_name) column.semantic_type = type_id #if the size of the training data is MORE than a threshold value, then sample the threshold values randomly if(len(data)>SAMPLE_SIZE): data = random.sample(data, SAMPLE_SIZE) for value in data: column.add_value(value) result = self._create_column(column, type_id, column_name, source_name, model, force) return result def semantic_types_columns_delete(self, type_id, column_ids=None, column_names=None, source_names=None, models=None): """ Delete all of the columns in a semantic type that match the given parameters. :param type_id: The id of the semantic type to delete the columns from :param column_ids: The possible ids of the columns to delete :param source_names: The possible names of the columns to delete :param column_names: The possible source names of the columns to delete :param models: The possible models of the columns to delete :return: The number of columns deteled with a 200 if successful, otherwise an error message with an appropriate error code """ db_body = {DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id} if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names} if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names} if column_ids is not None: db_body[ID] = {"$in": column_ids} if models is not None: db_body[MODEL] = {"$in": models} found_columns = list(self.db.find(db_body)) if len(found_columns) < 1: return "No columns were found with the given parameters", 404 return str(self.db.delete_many(db_body).deleted_count) + " columns deleted successfully", 200 ################ SemanticTypesColumnData ################ def semantic_types_column_data_get(self, column_id): """ Returns all of the data in the column :param column_id: Id of the column to get the data from :return: The column and all of its info """ result = list(self.db.find({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id})) if len(result) < 1: return "No column with that id was found", 404 if len(result) > 1: return "More than one column was found with that id", 500 return json_response(clean_column_output(result[0]), 200) def semantic_types_column_data_post_put(self, column_id, body, force=False): """ Add or replace data on an existing column Notes: If the column does not exist a 404 will be returned :param column_id: Id of the column to add/replace the data of :param body: An array of the new data :param force: True if the current data in the column should be replaced, false if the new data should just be appended :return: A conformation with a 201 if it was added successfully or an error message with an appropriate error code if it was not successful """ column_data = self.db.find_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id}) if column_data.matched_count < 1: return "No column with that id was found", 404 if column_data.matched_count > 1: return "More than one column was found with that id", 500 column = Column(column_data[COLUMN_NAME], column_data[SOURCE_NAME], get_type_from_column_id(column_id)) if not force: column.read_json_to_column(column_data) for value in body: column.add_value(value) data = column.to_json() self.db.update_many(data) return "Column data updated", 201 def semantic_types_column_data_delete(self, column_id): """ Delete the data from the column with the given id :param column_id: Id of the column to delete the data from :return: A deletion conformation with a 200 if successful, otherwise an error message with an appropriate error code """ result = self.db.update_many({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id}, {"$set": {DATA: []}}) if result.matched_count < 1: return "No column with that id was found", 404 if result.matched_count > 1: return "More than one column was found with that id", 500 column = self.db.find_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id}) self.db.delete_one({DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: get_type_from_column_id(column_id)}) self.db.delete_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id}) return "Column data deleted", 200 ################ BulkAddModels ################ def bulk_add_models_get(self, model_ids=None, model_names=None, model_desc=None, show_all=False, crunch_data=True): """ Returns the current state of all of the bulk add models. :param model_ids: The possible ids of the models to get :param model_names: The possible names of the models to get :param model_desc: The possible descriptions of the models to get :param show_all: True if the whole model should be returned :param crunch_data: False if learnedSemanticTypes should not be generated and the version in the db should be used instead, note that the data in the db is updated every time a get is run with crunch_data=true :return: All of the models that fit the given parameters """ db_body = {DATA_TYPE: DATA_TYPE_MODEL} if model_ids is not None: db_body[ID] = {"$in": model_ids} if model_names is not None: db_body[NAME] = {"$in": model_names} if model_desc is not None: db_body[MODEL_DESC] = model_desc db_result = list(self.db.find(db_body)) if len(db_result) < 1: return "No models were found with the given parameters", 404 # Construct the return body return_body = [] for mod in db_result: o = collections.OrderedDict() o[MODEL_ID] = mod[ID] o[NAME] = mod[NAME] o[DESC] = mod[DESC] if show_all: o[MODEL] = self._update_bulk_add_model(mod[BULK_ADD_MODEL_DATA], mod[MODEL]) if crunch_data else mod[BULK_ADD_MODEL_DATA] return_body.append(o) return json_response(return_body, 200) def bulk_add_models_post(self, model, column_model=DEFAULT_BULK_MODEL): """ Add a bulk add model. :param column_model: The model that all of the created columns should have :param model: A dictionary of the model :return: Stats of the data added """ #### Assert the required elements exist if BAC_ID not in model: return "The given model must have an id", 400 if BAC_NAME not in model: return "The given model must have a name", 400 if BAC_DESC not in model: return "The given model must have a description", 400 if BAC_GRAPH not in model: return "The given model must have a graph", 400 if BAC_NODES not in model[BAC_GRAPH]: return "The given model must have nodes within the graph", 400 if len(list(self.db.find({ID: model[BAC_ID]}))) > 0: return "Model id already exists", 409 #### Parse and add the model # Try to add of the given semantic types and columns new_type_count = 0 new_column_count = 0 existed_type_count = 0 existed_column_count = 0 for n in model[BAC_GRAPH][BAC_NODES]: if n.get(BAC_USER_SEMANTIC_TYPES): for ust in n[BAC_USER_SEMANTIC_TYPES]: semantic_status = self.semantic_types_post_put(ust[BAC_CLASS][BAC_URI], ust[BAC_PROPERTY][BAC_URI], False) if semantic_status[1] == 201: new_type_count += 1 elif semantic_status[1] == 409: existed_type_count += 1 elif semantic_status[1] == 400: return semantic_status else: return "Error occurred while adding semantic type: " + str(ust), 500 column_status = self._create_column( get_type_id(ust[BAC_CLASS][BAC_URI], ust[BAC_PROPERTY][BAC_URI]), n[BAC_COLUMN_NAME], model[BAC_NAME], column_model) if column_status[1] == 201: new_column_count += 1 elif column_status[1] == 409: existed_column_count += 1 elif column_status[1] == 400: return column_status else: return "Error occurred while adding column for semantic type: " + str(ust), 500 # Nothing bad happened when creating the semantic types and columns, so add the model to the DB self.db.insert_one( {DATA_TYPE: DATA_TYPE_MODEL, ID: model["id"], NAME: model[BAC_NAME], DESC: model["description"], MODEL: column_model, BULK_ADD_MODEL_DATA: model}) return "Model and columns added, " + str(new_type_count) + " semantic types created, " + \ str(existed_type_count) + " semantic types already existed, " + \ str(new_column_count) + " columns created, and " + \ str(existed_column_count) + " columns already existed.", 201 def bulk_add_models_delete(self, model_ids=None, model_names=None, model_desc=None): """ Delete all of the bulk add models which fit the given parameters :param model_ids: The possible ids of the models to delete :param model_names: The possible names of the models to delete :param model_desc: The possible descriptions of the models to delete :return: The amount of models deleted with a 200 if successful, otherwise an error message with the appropriate code """ db_body = {DATA_TYPE: DATA_TYPE_MODEL} if model_ids is not None: db_body[ID] = {"$in": model_ids} if model_names is not None: db_body[NAME] = {"$in": model_names} if model_desc is not None: db_body[MODEL_DESC] = model_desc deleted_count = self.db.delete_many(db_body).deleted_count if deleted_count < 1: return "No models were found with the given parameters", 404 return str(deleted_count) + " models deleted successfully", 200 ################ BulkAddModelData ################ def bulk_add_model_data_get(self, model_id, crunch_data): """ Returns the current state of the bulk add model :param model_id: The id of the model to get :param crunch_data: False if learnedSemanticTypes should not be generated and the version in the db should be used instead, note that the data in the db is updated every time a get is run with crunch_data=true :return: The current state of the bulk add model """ db_result = list(self.db.find({DATA_TYPE: DATA_TYPE_MODEL, ID: model_id})) if len(db_result) < 1: return "A model was not found with the given id", 404 if len(db_result) > 1: return "More than one model was found with the given id", 500 db_result = db_result[0] return json_response( self._update_bulk_add_model(db_result[BULK_ADD_MODEL_DATA], db_result[MODEL]) if crunch_data else db_result[ BULK_ADD_MODEL_DATA], 200) def bulk_add_model_data_post(self, model_id, column_model, data): """ Add data to the service with a bulk add model :param model_id: The id of the model to add off of :param column_model: The model of the columns being used with that model :param data: The list of dictionaries with all of the data to add :return: A conformation message with a 201 if it was successful, otherwise an error message with the appropriate code """ # Get the model and parse the json lines model = list(self.db.find({DATA_TYPE: DATA_TYPE_MODEL, ID: model_id})) if len(model) < 1: return "The given model was not found", 404 if len(model) > 1: return "More than one model was found with the id", 500 model = model[0][BULK_ADD_MODEL_DATA] # Get all of the data in each column for n in model[BAC_GRAPH][BAC_NODES]: column_data = [] for line in data: if n.get(BAC_COLUMN_NAME): column_data.append(line[n[BAC_COLUMN_NAME]]) # Add it to the db if n.get(BAC_USER_SEMANTIC_TYPES): for ust in n[BAC_USER_SEMANTIC_TYPES]: result = self.semantic_types_column_data_post_put( get_column_id(get_type_id(ust[BAC_CLASS][BAC_URI], ust[BAC_PROPERTY][BAC_URI]), n[BAC_COLUMN_NAME], model[BAC_NAME], column_model), column_data, False)[1] if result == 201: continue elif result == 404: return "A required column was not found", 404 else: return "Error occurred while adding data to the column", 500 return "Data successfully added to columns", 201
from client import TradeClient, Client import time import sys sys.path.append('../') import config bfxclient = Client() order = MongoClient().wtracker.orders lookback = 1 while (1): t = int(time.time()) - lookback cur = order.find({'ts': {'$gt': t}}) order.delete_many({'ts': {'$lt': t - lookback}}) # look for most expensive in asks. # look for cheapest in bids. group = {} disqualified = [] count = 0 base_cur = {} for x in cur: count += 1 if count > 2: break _id = x['ts'] pair = x['pair']
class ConfigurationIO: def __init__(self): self.config_db = MongoClient('localhost', 20000).get_database("tokenizer").get_collection( 'TextLibrary') self.train_db = MongoClient('localhost', 20000).get_database("tokenizer").get_collection( 'TextTrained') # self.label_db = MongoClient('localhost', 20000).get_database("tokenizer").get_collection( # 'Labels') self.task_db = MongoClient('localhost', 20000).get_database("tokenizer").get_collection( 'Tasks') print('configuration initialization done') def insertTextIntoDatabase(self, sentences, database): if (self.config_db.find().count() == 0 and self.train_db.find().count() == 0): max_id = 0 elif (self.config_db.find().count() == 0 and self.train_db.find().count() != 0): max_id = self.train_db.find_one(sort=[("_id", -1)])["_id"] elif (self.config_db.find().count() != 0 and self.train_db.find().count() == 0): max_id = self.config_db.find_one(sort=[("_id", -1)])["_id"] elif (self.config_db.find().count() != 0 and self.train_db.find().count() != 0): max_id = max(self.config_db.find_one(sort=[("_id", -1)])["_id"], self.train_db.find_one(sort=[("_id", -1)])["_id"]) sentence_state = [{"_id": index + 1 + max_id, "text": s, "database": database} for index, s in enumerate(sentences)] saveJsonObj = json.dumps(sentence_state, ensure_ascii=False) print(saveJsonObj) # self.config_db.delete_many({}) self.config_db.insert(json.loads(saveJsonObj)) def insertTask(self, databaseName, type, tags, description): task = {} task['database'] = databaseName task['category'] = type task['description'] = description task['tags'] = tags task['timeAdded'] = strftime("%Y-%m-%d %H:%M:%S", gmtime()) self.task_db.insert(task) def getCategoryOfDatabase(self, database): print(database) cursor = self.task_db.find_one({'database': database}, {'category': 1}) print(cursor) return cursor['category'] def deleteDb(self, dbName): self.config_db.delete_many({'database': dbName}) def getTrainedDatabases(self): cursor = self.train_db.find({'_id': {'$gt': 0}}, {'database': 1}) list = [] for item in cursor: if item['database'] not in list: list.append(item['database']) return list def getUntrainedDatabases(self): cursor = self.config_db.find({'_id': {'$gt': 0}}, {'database': 1}) list = [] for item in cursor: if item['database'] not in list: list.append(item['database']) return list def getUntrainedDatabasesGroupByCategory(self): cursor = self.config_db.find({'_id': {'$gt': 0}}, {'database': 1, 'category': 1}) # list = [{"name": doc['database'], "category": doc['category']} for doc in cursor] list = [] for item in cursor: doc = {"name": item['database'], "category": item['category']} if doc not in list: list.append(doc) return list def getSubmittedSentencesFromDatabase(self, database): text = 'database:' + database trainCursor = self.train_db.find({"database": database}, {"database": 0, "id": 0}) data = [doc for doc in trainCursor] return data
def test_mongo_dump_and_restore(docker_container, tmp_path): # Dummy data insertion docs = [ {'name': 'col1doc1'}, {'name': 'col1doc2'}, {'name': 'col1doc3'}, ] inserted_doc_ids = None port = 27020 client = MongoClient(f'mongodb://localhost:{port}') uri = 'mongodb://localhost/tmpdb' dump_path = str(tmp_path / 'dump1.tgz') with docker_container('mongo:4.0', ports={'27017/tcp': str(port)}, appdir=str(tmp_path)) as container: # noqa: E501 wait_for_mongo_to_be_up(container) cmd_prefix = f'docker exec -i {container.id} ' inserted_doc_ids = client.db1['col1'].insert_many(docs).inserted_ids # Get a dump after inserting the documents with mongo_utils.mongo_dump(cmd_prefix=cmd_prefix, uri=uri, collection='col1', db='db1') as (stream, stats): # noqa: E501 with open(dump_path, 'wb') as fp: fp.write(stream.read()) assert stats.num_docs == 3 # Doesn't count the number of docs if requested not to with mongo_utils.mongo_dump(cmd_prefix=cmd_prefix, uri=uri, collection='col1', db='db1', count=False) as (_, stats): # noqa: E501 pass assert not stats.num_docs # Test if a dummy falsey command throws with pytest.raises(Exception) as exc: with mongo_utils.mongo_dump(cmd_prefix=cmd_prefix + ' false ', uri=uri, collection='col1', db='db1') as _: # noqa: E501 pass assert re.search('exited with error code', str(exc)) with docker_container('mongo:4.0', ports={'27017/tcp': str(port)}, appdir=str(tmp_path)) as container: # noqa: E501 wait_for_mongo_to_be_up(container) def restore_dump(**kwargs): with open(dump_path, 'rb') as fp: return mongo_utils.mongo_restore( stream=fp, cmd_prefix=f'docker exec -i {container.id} ', uri=uri, collection='col2', db='db2', **kwargs, ) # Insert one document and check if it wasn't overwritten col = MongoClient(f'mongodb://localhost:{port}').db2['col2'] col.insert_one({ '_id': inserted_doc_ids[0], 'name': 'test', }) stats = restore_dump() assert {d['name'] for d in col.find()} == { 'test', 'col1doc2', 'col1doc3', } assert stats.num_docs == 2 # Checking if duplicated docs are properly returned col.drop() col.insert_one({ '_id': inserted_doc_ids[0], 'name': 'new doc 1', }) col.insert_one({'name': 'new doc 2'}) stats = restore_dump() assert stats.duplicated_ids == [inserted_doc_ids[0]] r = col.delete_many({'_id': {'$in': stats.duplicated_ids}}) LOGGER.warning(r.raw_result) stats = restore_dump() assert stats.num_docs == 1 assert set(stats.duplicated_ids) == set(inserted_doc_ids[1:]) assert {d['name'] for d in col.find()} == { 'col1doc1', 'new doc 2', 'col1doc2', 'col1doc3', } # Now drop the collection col.insert_one({'name': 'new doc'}) stats = restore_dump(drop=True) assert {d['name'] for d in col.find()} == { 'col1doc1', 'col1doc2', 'col1doc3', } assert stats.num_docs == 3
class MongoDatabase(Database): """ This class implements the abstract class Database and communicates with the MongoDB database. It has several methods for this communication. """ def __init__(self, collection): self._devices = MongoClient()["Hestia"][collection] def get_all_devices(self): """Instantiates all devices in database""" devices = [] for data in self._devices.find(): _id = data["_id"] device = self._get_class(data["module"], data["class"])(self, _id) devices.append(device) return devices def get_device(self, device_id): """Instantiates the device with the given device_id""" data = self.__get_device_data(device_id) device = self._get_class(data["module"], data["class"]) return device(self, device_id) def add_device(self, plugin): """Adds the given plugin info as a new device""" plugin["_id"] = str(ObjectId()) self._devices.insert_one(plugin) def delete_device(self, device_id): self._devices.delete_one({"_id": device_id}) def update_field(self, device_id, field, new_value): self._devices.find_one_and_update({"_id": device_id}, {"$set": { field: new_value }}) def get_field(self, device_id, field): data = self.__get_device_data(device_id) return data[field] def get_activator_field(self, device_id, activator_id, field): data = self.__get_device_data(device_id) activator = self.__get_activator(data, activator_id) return activator[field] def update_activator_field(self, device_id, activator_id, field, new_value): self._devices.find_one_and_update( {"_id": device_id}, {"$set": { "activators." + activator_id + "." + field: new_value }}) def delete_all_devices(self): self._devices.delete_many({}) def __get_device_data(self, device_id): """Get data of device based on its id""" data = self._devices.find_one(device_id) if data is None: raise NotFoundException("device") else: return data @staticmethod def __get_activator(data, activator_id): try: return data["activators"][activator_id] except KeyError as exception: raise NotFoundException("activator")