def delete_currents(): client = MongoClient().pmes.accountso with open("currentbuyer.json") as file: buyer = json.load(file) try: client.delete_one({"public_key":buyer["public_key"]}) except: pass with open("currentbuyer.json", "w") as file: file.write(json.dumps({})) with open("currentseller.json") as file: seller = json.load(file) try: client.delete_one({"public_key":seller["public_key"]}) except: pass with open("currentseller.json", "w") as file: file.write(json.dumps({})) print("Done")
class Stashbin(object): def __init__(self): connStr = 'mongodb://{}:{}@{}/{}'.format(MONGO_DB_USERNAME, MONGO_DB_PASSWORD, MONGO_DB_HOST, MONGO_DB_NAME) self._itemsCollection = MongoClient(connStr)[MONGO_DB_NAME]['items'] def _hash(self, string): return hashlib.sha1(string).hexdigest() def get(self, identifier, username=None, password=None): try: cursor = self._itemsCollection.find({'identifier': identifier}) doc = cursor.next() except StopIteration: raise ItemNotFoundError u = doc.get('username', '') p = doc.get('password', '') if u and (username != u): raise AuthError if p and (self._hash(password) != p): raise AuthError return Item.factory(doc['type']).build(doc) def stash(self, item, username=None, password=None): item.upload() document = item.getDocument() if username: document['username'] = username if password: document['password'] = self._hash(password) key = uuid.uuid1().hex document['key'] = key self._itemsCollection.insert_one(document) return key def delete(self, identifier, username=None, password=None, key=None): try: cursor = self._itemsCollection.find({'identifier': identifier}) doc = cursor.next() except StopIteration: raise ItemNotFoundError u = doc.get('username', '') p = doc.get('password', '') k = doc.get('key') if u or p: if u and username != u: raise AuthError if p and self._hash(password) != p: raise AuthError else: if k != key: raise DeleteKeyError item = Item.factory(doc['type']).build(doc) self._itemsCollection.delete_one({"identifier": identifier}) item.remove()
class MongoStore(Store): def __init__(self, db, collection, url='mongodb://localhost'): self.collection = MongoClient(url)[db][collection] def fetch(self, oid): return self.collection.find_one({'_id':oid}) def fetch_all(self): return self.collection.find() def iter_ids(self): for obj in self.collection.find({}, {'_id':True}): yield obj['_id'] def save(self, obj): self.collection.save(obj) def save_many(self, obj_iter): self.collection.insert(obj_iter) def flush(self): self.collection.drop() def delete(self, oid): self.collection.delete_one({'_id':oid})
def delete_token(token): """ Remove used token for scaling actions to be deleted :param token: token used :return: None """ token = token.split('Bearer ')[-1] client = MongoClient('mongo', 27017)['osm']['tokens'] client.delete_one({'id': token})
def delete_noise_question(): db = MongoClient().zhihu_network.questions id_to_delete = [] for q in db.find(): if len(q['title'].split(' ')) < 3: id_to_delete.append(q['_id']) print(len(id_to_delete)) for _id in id_to_delete: db.delete_one({'_id': _id})
class PluginXmlManager(): def __init__(self): self.pluginPath = MongoClient('localhost', 27017).beat.Plugin self.systemPath = MongoClient('localhost', 27017).beat.System def uploadSystemOnSave(self, xml): my_dict = xmltodict.parse(xml) self.systemPath.insert_one(my_dict) def deleteSystem(self): self.systemPath.drop() def getListOfPlugins(self): pluginList = self.pluginPath.find() list_of_plugins = [] for item in pluginList: list_of_plugins.append(item['Plugin']['Plugin_name']['#text']) return list_of_plugins def deleteSelectedPlugin(self, nameofplugin): myquery = {"Plugin.Plugin_name.#text": nameofplugin} self.pluginPath.delete_one(myquery) def getSelectedPlugin(self, plugin_name): pluginList = self.pluginPath.find() for item in pluginList: if item['Plugin']['Plugin_name']['#text'] == plugin_name: return item for item in projectsList: if item['Project']['BinaryFilePath']['#text'] == project_name: return item def uploadPlugin(self, xml): my_dict = xmltodict.parse(xml) self.pluginPath.insert_one(my_dict) def pluginExists(self, new_plugin_name): pluginList = self.pluginPath.find() for item in pluginList: if item['Plugin']['Plugin_name']['#text'] == new_plugin_name: return True return False def updatePluginDescription(self, old_description, new_description): myquery = {"Plugin.Plugin_Desc.#text": old_description} new_values = {"$set": {"Plugin.Plugin_Desc.#text": new_description}} self.pluginPath.update_one(myquery, new_values) # holder element of where to place xml2 def xmlMerger(self, holder, xml, xml2): tree1 = ET.parse(xml) tree2 = ET.parse(xml2) xml2 = tree2.getroot() for element1 in tree1.findall(holder): element1.append(xml2) return tree1
class Stat: def __init__(self, config_file): self.docker = dockerGuest(config_file) self.config_file = config_file self.collection = MongoClient(os.environ['DB_PORT_27017_TCP_ADDR'], 27017)['test'][self.get_collection()] def get_collection(self): dir_path = os.path.dirname(os.path.realpath(__file__)) file_path = os.path.join(dir_path, self.config_file) return json.load(open(file_path))["collection"] def get_config_file(self): return self.config_file @staticmethod def get_time(): time = datetime.now() return time #return time.strftime('%H:%M:%S') def data_to_save(self): stats = self.docker.get_stats() if stats == None: return None global_stat_dict = {} global_stat_dict["time"] = Stat.get_time() global_stat_dict["stats"] = stats return global_stat_dict def save(self, cap=60): new_data = self.data_to_save() if self.collection.count() == cap: top_doc_time = min(doc['time'] for doc in self.collection.find()) self.collection.delete_one({'time': top_doc_time}) self.collection.insert_one(new_data) logger.info("Saved in DB...") def save_to_db(self): data = self.data_to_save() if data != None: if self.is_db_full(): self.make_space_db() logger.info('DB Save') self.collection.insert_one(data) def make_space_db(self): logger.info('Making space') self.collection.delete_one({'_id': self.collection.find()[0]['_id']}) def is_db_full(self): if self.collection.find({}).count() == 60: return True return False
def remove_redundant(d='xmrc', c='job'): collection = MongoClient()[d][c] if c == 'job': key = 'company_id' else: key = 'job' docs = collection.find() i = 0 for doc in docs: if not doc.get(key): collection.delete_one({'id': doc.get('id')}) i += 1 print(i)
def remove_arguments_from_db(self, p2pworkerarguments): remove_values_from_doc(p2pworkerarguments.object2doc()) filter_ = { "identifier": p2pworkerarguments.p2parguments.args_identifier, 'remote_identifier': p2pworkerarguments.remote_args_identifier } client = MongoClient(host=MONGO_HOST, port=MONGO_PORT)[ self.p2pfunction.db_name][self.p2pfunction.db_collection] client.delete_one(filter_) pass
def remove_morphologically_abnormal_verbs(): abnormal_count = 0 coll = MongoClient(LOCALHOST, PORT)[DB_NAME][VERBS] count = counter(coll.count()) for entry in coll.find(): next(count) verbs = entry[VERB] if isinstance(verbs, str): verbs = [verbs] for verb in verbs: if not (verb.endswith('ω') or verb.endswith('ώ') or verb.endswith('αι')): coll.delete_one({VERB: verb}) abnormal_count += 1 print("\nRemoved {} abnormal verbs".format(abnormal_count))
class DatabaseWrapper: error_msg = lambda self, motive: f'There was an error on {motive}, is the {motive} plausable?' def __init__(self, collection: str): try: self.collection = MongoClient( getenv('MONGO_URI'))['todos'][collection] except Exception as e: print('There is an erro connecting to the database', e) @staticmethod def encode(o: List[dict]) -> List[Dict[str, Any]]: for u in o: if '_id' in u.keys(): u['_id'] = JSONEncoderDocument().encode(u['_id']).replace( '"', '') return o def search(self, query): try: result = list(self.collection.find(query)) except Exception as e: print(self.error_msg('query'), e) return self.encode(result) def insert(self, todo): try: result = self.collection.insert_one(todo).inserted_id except Exception as e: print(self.error_msg('insert'), e) return str(result) def update(self, query, todo): try: self.collection.update_one(query, {"$set": todo}) except Exception as e: print(self.error_msg('update'), e) def delete(self, query): try: self.collection.delete_one(query) except Exception as e: print(self.error_msg('delete'), e)
class ProjectXmlManager(): def __init__(self): self.projectPath = MongoClient('localhost', 27017).beat.Project def uploadProject(self, xml): my_dict = xmltodict.parse( xml) # parse XML into dictionary compatible with MongoDB self.projectPath.insert_one(my_dict) def getListOfProjects(self): projectsList = self.projectPath.find() list_of_projects = [] for item in projectsList: try: list_of_projects.append( item['Project']['Project_name']['#text']) except: continue return list_of_projects def getSelectedProject(self, project_name): projectsList = self.projectPath.find() for item in projectsList: if item['Project']['Project_name']['#text'] == project_name: return item def deleteSelectedProject(self, nameofProject): myquery = {"Project.Project_name.#text": nameofProject} self.projectPath.delete_one(myquery) def updateProjectDescription(self, old_description, new_description): myquery = {"Project.projectDescription.#text": old_description} new_values = { "$set": { "Project.projectDescription.#text": new_description } } self.projectPath.update_one(myquery, new_values) def projectExists(self, new_project_name): projectsList = self.projectPath.find() for item in projectsList: if item['Project']['Project_name']['#text'] == new_project_name: return True return False
class Book(object): def __init__(self, db='pycon2015'): self.collection = MongoClient()[db].book def create(self, book): self.collection.insert_one(book) def read(self, name): return self.collection.find_one({'name': name}) def update(self, name, new_name): self.collection.update_one( {'name': name}, {'$set': {'name': new_name}} ) def delete(self, name): self.collection.delete_one({'name': name})
class NotationIO: def __init__(self): # self.test_db = MongoClient("172.19.241.248", 20000).get_database("tokenizer_qiao").get_collection('sentences_sample') self.test_db = MongoClient("172.19.241.248", 20000).get_database( "tokenizer_qiao").get_collection('sentence4test') self.test_size = self.test_db.find().count() self.test_cursor = self.test_db.find() self.train_db = MongoClient("172.19.241.248", 20000).get_database( "tokenizer_qiao").get_collection('sentence4train') def get_raw_randomly(self): for doc in self.test_cursor: if random.random() > 0.3: yield doc def move_to_train(self, noted_doc): self.train_db.insert_one(noted_doc) self.test_db.delete_one({"_id": noted_doc["_id"]})
class SolutionManager: """ Manage solutions. The form of a solution name: human readable name of a solution alg: algorithm used; Ex.) logistic regression dataset: training/testing data as 2-d array shaped as [# of samples, # of features] situations: a list of situation labels for each sample in the dataset serialized_obj: serialized created model instance desc: description of a solution accuracy: accuracy determined after verifying a model with 70% training dataset and 30% testing dataset domain: domain of a solution owner: id of a user who create the solution visibility: public or private auth_users: users who are accessible to the solution even though the visibility is private """ def __init__(self): self.db = MongoClient().infaas.solutions def add_solution(self, solution): # if the entered solution is not valid, return False. if not self._verify_solution(solution): return False res = self.db.insert_one(solution) if res.acknowledged: return True return False def get_solutions(self, name=None): query = {} if name: query.update({"name": name}) solutions = self.db.find(query) return solutions def update_solution(self, solution): # if the entered solution is not valid, return False. if not self._verify_solution(solution): return False query = {"_id": solution.get("_id")} new = {k: v for k, v in solution.items() if k in ATTRIBUTES} res = self.db.replace_one(query, new) return True if res.modified_count > 0 else False def delete_solution(self, solution): query = {"_id": solution.get("_id")} res = self.db.delete_one(query) return True if res.deleted_count > 0 else False @staticmethod def _verify_solution(solution): if not solution: return False return False
def main() -> int: """Expand station counts into date documents""" coll = MongoClient(environ["MONGO_URI"]).counter.station while item := coll.find_one({"date": {"$exists": False}}): icao = item.pop("_id") print(icao) counts = {} date = None for report, dates in item.items(): for date, count in dates.items(): date = datetime.strptime(date, r"%Y-%m-%d") try: counts[date][report] = count except KeyError: counts[date] = {report: count} updates = [make_update(icao, date, count) for date, count in counts.items()] print(icao, len(updates)) coll.bulk_write(updates, ordered=False) print("Deleting") coll.delete_one({"_id": icao})
class DomainManager: """ Manage domains. The form of a domain name: human readable name of a domain; it should be unique. desc: description of a domain features: a list of features; [{'name', 'desc'}, ...] situations: a list of possible situations; possible inference results; [{'name', 'desc'}, ...] """ def __init__(self): self.db = MongoClient().INFaaS.domains def add_domain(self, domain): verified = self._verify_domain(domain) if not verified or not set(domain.keys()) >= ATTRIBUTES: raise Exception(constants.MSG_INVALID_PARAMS) if 'owner' in domain: domain['owner'] = ObjectId(domain.get('owner')) res = self.db.insert_one(domain) if res.acknowledged: return True return False def get_domains(self, name=None): query = {} if name: query.update({'name': name}) domains = self.db.find(query) return domains def update_domain(self, domain): # if the entered solution is not valid, return False. if not self._verify_domain(domain): return False query = {'_id': domain.get('_id')} new = {k: v for k, v in domain.items() if k in ATTRIBUTES} res = self.db.replace_one(query, new) return True if res.modified_count > 0 else False def delete_domain(self, domain): query = {'_id': domain.get('_id')} res = self.db.delete_one(query) return True if res.deleted_count > 0 else False @staticmethod def _verify_domain(domain): if not domain: return False return True
class MongoStore(Store): def __init__(self, db, collection, uri='mongodb://localhost'): self.collection = MongoClient(uri)[db][collection] @classmethod def get_protocol(cls): return 'mongodb' def iter_ids(self): for obj in self.collection.find({}, {'_id':True}): yield obj['_id'] def flush(self): self.collection.drop() def exists(self, oid): return self.collection.find({'_id':oid}).count() == 1 def delete(self, oid): self.collection.delete_one({'_id':oid}) def fetch(self, oid): return self.collection.find_one({'_id':oid}) def fetch_all(self): return self.collection.find() def save(self, obj): self.collection.save(obj) def save_many(self, obj_iter): self.collection.insert(obj_iter) @classmethod def Get(cls, store_id, uri='mongodb://localhost', **kwargs): db, collection = store_id.split(':') return cls(db, collection, uri)
class NotationIO: def __init__(self): # self.test_db = MongoClient('localhost', 20000).get_database("tokenizer").get_collection('sentences_sample') self.test_db = MongoClient('localhost', 20000).get_database("tokenizer").get_collection( # 'sentence4test') 'TextLibrary') self.test_size = self.test_db.find().count() self.test_cursor = self.test_db.find() self.train_db = MongoClient('localhost', 20000).get_database("tokenizer").get_collection( 'TextTrained') def get_raw_randomly(self): for doc in self.test_cursor: if random.random() > 0.3: yield doc def get_raw_randomly_fromDatabase(self, database): cursor = self.test_db.find({'database': database}) for doc in cursor: if random.random() < 0.2: yield doc def get_size_of_database(self, database): return self.test_db.find({'database': database}).count() def move_to_train(self, noted_doc): doc = noted_doc former_doc = self.test_db.find_one({"_id": noted_doc["_id"]}) doc['database'] = former_doc['database'] self.train_db.insert_one(doc) self.test_db.delete_one({"_id": noted_doc["_id"]}) def getSubmittedSentences(self): cursor = self.train_db.find({}) sentences = [doc for doc in cursor] return sentences
def update_token(project_id): """ NBI Token generator to provide authorization access :param project_id: OSM Project identification :return: token to perform requests in an authorized way """ client = MongoClient('mongo', 27017)['osm']['tokens'] token = os.environ.get('NBI-Token', uuid.uuid4()) date = datetime.datetime.utcnow().timestamp() token_data = client.find_one({'project_id': project_id}, sort=[('expires', DESCENDING)]) token_data['_id'] = token token_data['id'] = token token_data['issued_at'] = date token_data['expires'] = date + 5 try: client.delete_one({'id': token}) except Exception as e: logger.debug(e) client.insert_one(token_data) token = "Bearer " + token return token
class RepositoryMongo(object): def __init__(self, mongo_url, db_selector, collection_selector): self.collection = MongoClient( mongo_url)[db_selector][collection_selector] def create(self, value): return self.collection.insert_one(value) def read(self, selector): return self.collection.find_one(selector) def update(self, selector, value): return self.collection.update(selector, value, upsert=True).modified_count def delete(self, selector): return self.collection.delete_one(selector).deleted_count
class Server(object): def __init__(self): self.db = MongoClient().data.service self.classifier = MyRandomForest({}, {}, DATA_MODEL_PATH) self.classifier.train([]) ################ Stuff for use in this file ################ def _create_column(self, column, type_id, column_name, source_name, model, force=False): """ Create a column in a semantic type and return the column's id if it was created successfully. Notes: If the column already exists and force is not set to true, a 409 will be returned and no data will be modified. :param type_id: Id of the semantic type this column belongs to :param column_name: Name of the column to be created :param source_name: Name of the source of the column to be created :param model: Model of the column to be created :param data: Data which will be added to the column on creation :param force: Force create the column, if this is true and the column exists the old column will be deleted (with all of its data) before creation :return: The id of the new column and a response code of 201 if the creation was successful, otherwise it will be an error message with the appropriate error code """ column_id = get_column_id(type_id, column_name, source_name, model) db_body = {ID: column_id, DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id, COLUMN_NAME: column_name, SOURCE_NAME: source_name, MODEL: model} if self.db.find_one(db_body): if force: self.db.delete_many(db_body) else: return "Column already exists", 409 db_body.update(column.to_json()) self.db.insert_one(db_body) return column_id, 201 def _predict_column(self, column_name, source_names, data): """ Predicts the semantic type of a column. :param column_name: Name of the column :param source_names: List of source names :param data: The data to predict based opon :return: A list of dictionaries which each contain the semantic type and confidence score """ att = Column(column_name, source_names[0]) # print(data) for value in data: att.add_value(value) att.semantic_type = "to_predict" att.prepare_data() return att.predict_type(searcher.search_types_data(INDEX_NAME, source_names), searcher.search_similar_text_data(INDEX_NAME, att.value_text, source_names), self.classifier) def _update_bulk_add_model(self, model, column_model): """ Updates the bulk add model in the db and also returns it. :param model: The current bulk add model :param column_model: The model of the columns which are being updated against :return: The updated bulk add model """ for n in model[BAC_GRAPH][BAC_NODES]: if n.get(BAC_COLUMN_NAME): if n[BAC_COLUMN_NAME] == BAC_COLUMN_NAME_FILE_NAME: continue column_id = get_column_id(get_type_id(n[BAC_USER_SEMANTIC_TYPES][0][BAC_CLASS][BAC_URI], n[BAC_USER_SEMANTIC_TYPES][0][BAC_PROPERTY][BAC_URI]), n[BAC_COLUMN_NAME], model[BAC_NAME], column_model) prediction = self._predict_column(n[BAC_COLUMN_NAME], [model[BAC_NAME]], self.db.find_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id})[DATA]) n[BAC_LEARNED_SEMANTIC_TYPES] = [] for t in prediction: type_info = decode_type_id(t[SL_SEMANTIC_TYPE]) od = collections.OrderedDict() od[BAC_CLASS] = {BAC_URI: type_info[0]} od[BAC_PROPERTY] = {BAC_URI: type_info[1]} od[BAC_CONFIDENCE_SCORE] = t[SL_CONFIDENCE_SCORE] n[BAC_LEARNED_SEMANTIC_TYPES].append(od) self.db.update_one({DATA_TYPE: DATA_TYPE_MODEL, ID: model[BAC_ID]}, {"$set": {BULK_ADD_MODEL_DATA: model}}) return model ################ Predict ################ def predict_post(self, data, namespaces=None, column_names=None, source_names=None, models=None): """ Predicts the semantic type of the given data. :param namespaces: List of allowed namespaces :param column_names: List of allowed column names :param source_names: List of allowed source names :param models: List of allowed column models :param data: List of the data values to predict. :return: A return message (if it is successful this will be a list of the predicted types) and a return code """ data = [x.strip() for x in data] data = [x for x in data if x] if not data: return "Predicting data cannot be empty", 500 if source_names is None: # If no source names are given just use all of the source names in the db source_names = set() for col in self.db.find({DATA_TYPE: DATA_TYPE_COLUMN}): source_names.add(col[SOURCE_NAME]) source_names = list(source_names) if len(source_names) < 1: return "You must have columns to be able to predict", 400 #### Predict the types ## Do the actual predicting using the semantic labeler predictions = self._predict_column(column_names[0], source_names, data) if len(predictions) < 1: return "No matches found", 404 ## Filter the results allowed_ids_namespaces = None allowed_ids_models = None all_allowed_ids = None if namespaces is not None: allowed_ids_namespaces = set() current_allowed_types = list( self.db.find({DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE, NAMESPACE: {"$in": namespaces}})) for prediction in current_allowed_types: allowed_ids_namespaces.add(prediction[ID]) if models: allowed_ids_models = set() current_allowed_types = list(self.db.find({DATA_TYPE: DATA_TYPE_COLUMN, MODEL: {"$in": models}})) for c in current_allowed_types: allowed_ids_models.add(c[TYPE_ID]) if allowed_ids_namespaces is not None and allowed_ids_models is not None: all_allowed_ids = allowed_ids_namespaces & allowed_ids_models elif allowed_ids_namespaces is not None and allowed_ids_models is None: all_allowed_ids = allowed_ids_namespaces elif allowed_ids_namespaces is None and allowed_ids_models is not None: all_allowed_ids = allowed_ids_models return_body = [] for prediction in predictions: print(prediction) for type_id, exact_score in prediction[1]: if all_allowed_ids is not None: if prediction[SL_SEMANTIC_TYPE] not in all_allowed_ids: continue obj_dict = {TYPE_ID_PATH: type_id, SCORE: exact_score} type_class_property = decode_type_id(type_id) obj_dict[CLASS] = type_class_property[0] obj_dict[PROPERTY] = type_class_property[1] return_body.append(obj_dict) return_body.sort(key=lambda x: x[SCORE], reverse=True) return json_response(return_body, 200) ################ SemanticTypes ################ def semantic_types_get(self, class_=None, property_=None, namespaces=None, source_names=None, column_names=None, column_ids=None, models=None, return_columns=False, return_column_data=False): """ Returns all of the semantic types (and optionally their columns and columns' data) filtered by the given parameters. :param class_: The class of the semantic types to get :param property_: The property of the semantic types to get :param namespaces: The possible namespaces of the semantic types to get :param source_names: The possible source names of at least one column of a semantic type must have :param column_names: The possible column names of at least one column of a semantic type must have :param column_ids: The possible column ids of at least one column of a semantic type must have :param models: The possible column model of at least one column of a semantic type must have :param return_columns: True if all of the columns (but not the data in the columns) should be returned with the semantic types :param return_column_data: True if all of the columns and their data should be returned with the semantic types :return: All of the semantic types which fit the following parameters """ # Find all of the type ids that satisfy the class, property, and namespaces db_body = {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE} if class_ is not None: db_body[CLASS] = class_ if property_ is not None: db_body[PROPERTY] = property_ if namespaces is not None: db_body[NAMESPACE] = {"$in": namespaces} possible_result = list(self.db.find(db_body)) possible_type_ids = set() for t in possible_result: possible_type_ids.add(t[ID]) # Find all of the type ids from the columns which satisfy the other parameters if source_names or column_names or column_ids or models: db_body = {DATA_TYPE: DATA_TYPE_COLUMN} if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names} if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names} if column_ids is not None: db_body[ID] = {"$in": column_ids} if models is not None: db_body[MODEL] = {"$in": models} other_possible_ids = set() for col in self.db.find(db_body): other_possible_ids.add(col[TYPE_ID]) possible_type_ids = possible_type_ids & other_possible_ids # Construct the return body return_body = [] for t in possible_result: if t[ID] in possible_type_ids: o = collections.OrderedDict() o[TYPE_ID_PATH] = t[ID] o[CLASS] = t[CLASS] o[PROPERTY] = t[PROPERTY] o[NAMESPACE] = t[NAMESPACE] return_body.append(o) # Add the column data if requested if return_columns: db_body = {DATA_TYPE: DATA_TYPE_COLUMN} for type_ in return_body: db_body[TYPE_ID] = type_[TYPE_ID_PATH] type_[COLUMNS] = clean_columns_output(self.db.find(db_body), return_column_data) if len(return_body) < 1: return "No Semantic types matching the given parameters were found", 404 return json_response(return_body, 200) def semantic_types_post_put(self, class_, property_, force=False): """ Creates a semantic type and returns the id if it was successful. Notes: If the type already exists and force is not set to true a 409 will be returned and no data will be modified :param class_: The class of the semantic type, note that this must be a valid URL :param property_: The property of the semantic type :param force: Force create the semantic type, if this is true and the type already exists the existing type (and all of its columns and data) will be deleted before creation :return: The id of the new semantic type and a response code of 201 if the creation was successful, otherwise it will be an error message with the appropriate error code """ class_ = class_.rstrip("/") property_ = property_.rstrip("/") ## Verify that class is a valid uri and namespace is a valid uri namespace = "/".join(class_.replace("#", "/").split("/")[:-1]) ## Actually add the type type_id = get_type_id(class_, property_) db_body = {ID: type_id, DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE, CLASS: class_, PROPERTY: property_, NAMESPACE: namespace} if self.db.find_one(db_body): if force: self.db.delete_many({DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id}) self.db.delete_many(db_body) else: return type_id, 409 self.db.insert_one(db_body) return type_id, 201 def semantic_types_delete(self, class_=None, property_=None, type_ids=None, namespaces=None, source_names=None, column_names=None, column_ids=None, models=None, delete_all=False): """ Deletes all of the semantic types (and all of their columns/data) that fit the given parameters. :param class_: The class of the semantic types to delete :param property_: The property of the semantic types to delete :param type_ids: The possible ids of the semantic types to delete :param namespaces: The possible namespaces of the semantic types to delete :param source_names: The possible source names of at least one column of a semantic type must have :param column_names: The possible column names of at least one column of a semantic type must have :param column_ids: The possible column ids of at least one column of a semantic type must have :param models: The possible column model of at least one column of a semantic type must have :param delete_all: Set this to true if all semantic types should be deleted :return: The amount of semantic types deleted and a 200 if it worked, otherwise and error message with the appropriate code """ if class_ is None and property_ is None and type_ids is None and namespaces is None and source_names is None and column_names is None and column_ids is None and models is None and not delete_all: return "To delete all semantic types give deleteAll as true", 400 return "All " + str(self.db.delete_many({DATA_TYPE: {"$in": [DATA_TYPE_SEMANTIC_TYPE, DATA_TYPE_COLUMN]}}).deleted_count) + " semantic types and their data were deleted", 200 print str(class_)+" "+str(property_)+" "+str(type_ids)+" "+str(namespaces)+" "+str(source_names)+" "+str(column_names)+" "+str(column_ids)+" "+str(models)+" "+str(delete_all) # Find the parent semantic types and everything below them of everything which meets column requirements type_ids_to_delete = [] db_body = {DATA_TYPE: DATA_TYPE_COLUMN} db_body_id = {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE} if type_ids is not None: db_body[TYPE_ID] = {"$in": type_ids} db_body_id[ID] = {"$in": type_ids} if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names} if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names} if column_ids is not None: db_body[COLUMN_ID_PATH] = {"$in": column_ids} if models is not None: db_body[MODEL] = {"$in": models} for col in self.db.find(db_body): print "col[TYPE_ID] = "+str(col[TYPE_ID]) if col[TYPE_ID] not in type_ids_to_delete: type_ids_to_delete.append(col[TYPE_ID]) for col in self.db.find(db_body_id): print "col[ID] = "+str(col[ID]) if col[ID] not in type_ids_to_delete: type_ids_to_delete.append(col[ID]) # Find the semantic types which meet the other requirements and delete all types which need to be possible_types = [] db_body = {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE} if class_ is not None: db_body[CLASS] = class_ if property_ is not None: db_body[PROPERTY] = property_ if namespaces is not None: db_body[NAMESPACE] = {"$in": namespaces} if type_ids is None and source_names is None and column_names is None and column_ids is None and models is None: deleted = self.db.delete_many(db_body).deleted_count else: for t in self.db.find(db_body): if t[ID] not in possible_types: possible_types.append(t[ID]) for t in self.db.find(db_body_id): if t[ID] not in possible_types: possible_types.append(t[ID]) for id_ in type_ids_to_delete: if id_ not in possible_types: type_ids_to_delete.remove(id_) db_body = {DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: {"$in": type_ids_to_delete}} self.db.delete_many(db_body) deleted = self.db.delete_many( {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE, ID: {"$in": type_ids_to_delete}}).deleted_count if deleted < 1: return "No semantic types with the given parameters were found", 404 return str(deleted) + " semantic types matched parameters and were deleted", 200 ################ SemanticTypesColumns ################ def semantic_types_columns_get(self, type_id, column_ids=None, column_names=None, source_names=None, models=None, return_column_data=False): """ Returns all of the columns in a semantic type that fit the given parameters. :param type_id: The id of the semantic type :param column_ids: The possible ids of the columns to be returned :param column_names: The possible names of the columns to be returned :param source_names: The possible source names of the columns to be returned :param models: The possible models of the columns to be returned :param return_column_data: True if all of the data in the column should be returned with the columns :return: All of the columns in the semantic type that fit the given parameters """ print(type_id) db_body = {DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id} if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names} if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names} if column_ids is not None: db_body[ID] = {"$in": column_ids} if models is not None: db_body[MODEL] = {"$in": models} result = list(self.db.find(db_body)) if len(result) < 1: return "No columns matching the given parameters were found", 404 return json_response(clean_columns_output(result, return_column_data), 200) def semantic_types_columns_post_put(self, type_id, column_name, source_name, model, data=[], force=False): """ Create a column in a semantic type, optionally with data. :param type_id: Id of the semantic type to create the column in :param column_name: The name of the column to be created :param source_name: The name of the source of the column to be created :param model: The model of the column to be created :param data: The (optional) list of data to put into the column on creation :param force: True if the column should be replaced if it already exists :return: The id of the newly created with a 201 if it was successful, otherwise an error message with the appropriate error code """ column = Column(column_name, source_name) column.semantic_type = type_id #if the size of the training data is MORE than a threshold value, then sample the threshold values randomly if(len(data)>SAMPLE_SIZE): data = random.sample(data, SAMPLE_SIZE) for value in data: column.add_value(value) result = self._create_column(column, type_id, column_name, source_name, model, force) return result def semantic_types_columns_delete(self, type_id, column_ids=None, column_names=None, source_names=None, models=None): """ Delete all of the columns in a semantic type that match the given parameters. :param type_id: The id of the semantic type to delete the columns from :param column_ids: The possible ids of the columns to delete :param source_names: The possible names of the columns to delete :param column_names: The possible source names of the columns to delete :param models: The possible models of the columns to delete :return: The number of columns deteled with a 200 if successful, otherwise an error message with an appropriate error code """ db_body = {DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id} if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names} if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names} if column_ids is not None: db_body[ID] = {"$in": column_ids} if models is not None: db_body[MODEL] = {"$in": models} found_columns = list(self.db.find(db_body)) if len(found_columns) < 1: return "No columns were found with the given parameters", 404 return str(self.db.delete_many(db_body).deleted_count) + " columns deleted successfully", 200 ################ SemanticTypesColumnData ################ def semantic_types_column_data_get(self, column_id): """ Returns all of the data in the column :param column_id: Id of the column to get the data from :return: The column and all of its info """ result = list(self.db.find({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id})) if len(result) < 1: return "No column with that id was found", 404 if len(result) > 1: return "More than one column was found with that id", 500 return json_response(clean_column_output(result[0]), 200) def semantic_types_column_data_post_put(self, column_id, body, force=False): """ Add or replace data on an existing column Notes: If the column does not exist a 404 will be returned :param column_id: Id of the column to add/replace the data of :param body: An array of the new data :param force: True if the current data in the column should be replaced, false if the new data should just be appended :return: A conformation with a 201 if it was added successfully or an error message with an appropriate error code if it was not successful """ column_data = self.db.find_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id}) if column_data.matched_count < 1: return "No column with that id was found", 404 if column_data.matched_count > 1: return "More than one column was found with that id", 500 column = Column(column_data[COLUMN_NAME], column_data[SOURCE_NAME], get_type_from_column_id(column_id)) if not force: column.read_json_to_column(column_data) for value in body: column.add_value(value) data = column.to_json() self.db.update_many(data) return "Column data updated", 201 def semantic_types_column_data_delete(self, column_id): """ Delete the data from the column with the given id :param column_id: Id of the column to delete the data from :return: A deletion conformation with a 200 if successful, otherwise an error message with an appropriate error code """ result = self.db.update_many({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id}, {"$set": {DATA: []}}) if result.matched_count < 1: return "No column with that id was found", 404 if result.matched_count > 1: return "More than one column was found with that id", 500 column = self.db.find_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id}) self.db.delete_one({DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: get_type_from_column_id(column_id)}) self.db.delete_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id}) return "Column data deleted", 200 ################ BulkAddModels ################ def bulk_add_models_get(self, model_ids=None, model_names=None, model_desc=None, show_all=False, crunch_data=True): """ Returns the current state of all of the bulk add models. :param model_ids: The possible ids of the models to get :param model_names: The possible names of the models to get :param model_desc: The possible descriptions of the models to get :param show_all: True if the whole model should be returned :param crunch_data: False if learnedSemanticTypes should not be generated and the version in the db should be used instead, note that the data in the db is updated every time a get is run with crunch_data=true :return: All of the models that fit the given parameters """ db_body = {DATA_TYPE: DATA_TYPE_MODEL} if model_ids is not None: db_body[ID] = {"$in": model_ids} if model_names is not None: db_body[NAME] = {"$in": model_names} if model_desc is not None: db_body[MODEL_DESC] = model_desc db_result = list(self.db.find(db_body)) if len(db_result) < 1: return "No models were found with the given parameters", 404 # Construct the return body return_body = [] for mod in db_result: o = collections.OrderedDict() o[MODEL_ID] = mod[ID] o[NAME] = mod[NAME] o[DESC] = mod[DESC] if show_all: o[MODEL] = self._update_bulk_add_model(mod[BULK_ADD_MODEL_DATA], mod[MODEL]) if crunch_data else mod[BULK_ADD_MODEL_DATA] return_body.append(o) return json_response(return_body, 200) def bulk_add_models_post(self, model, column_model=DEFAULT_BULK_MODEL): """ Add a bulk add model. :param column_model: The model that all of the created columns should have :param model: A dictionary of the model :return: Stats of the data added """ #### Assert the required elements exist if BAC_ID not in model: return "The given model must have an id", 400 if BAC_NAME not in model: return "The given model must have a name", 400 if BAC_DESC not in model: return "The given model must have a description", 400 if BAC_GRAPH not in model: return "The given model must have a graph", 400 if BAC_NODES not in model[BAC_GRAPH]: return "The given model must have nodes within the graph", 400 if len(list(self.db.find({ID: model[BAC_ID]}))) > 0: return "Model id already exists", 409 #### Parse and add the model # Try to add of the given semantic types and columns new_type_count = 0 new_column_count = 0 existed_type_count = 0 existed_column_count = 0 for n in model[BAC_GRAPH][BAC_NODES]: if n.get(BAC_USER_SEMANTIC_TYPES): for ust in n[BAC_USER_SEMANTIC_TYPES]: semantic_status = self.semantic_types_post_put(ust[BAC_CLASS][BAC_URI], ust[BAC_PROPERTY][BAC_URI], False) if semantic_status[1] == 201: new_type_count += 1 elif semantic_status[1] == 409: existed_type_count += 1 elif semantic_status[1] == 400: return semantic_status else: return "Error occurred while adding semantic type: " + str(ust), 500 column_status = self._create_column( get_type_id(ust[BAC_CLASS][BAC_URI], ust[BAC_PROPERTY][BAC_URI]), n[BAC_COLUMN_NAME], model[BAC_NAME], column_model) if column_status[1] == 201: new_column_count += 1 elif column_status[1] == 409: existed_column_count += 1 elif column_status[1] == 400: return column_status else: return "Error occurred while adding column for semantic type: " + str(ust), 500 # Nothing bad happened when creating the semantic types and columns, so add the model to the DB self.db.insert_one( {DATA_TYPE: DATA_TYPE_MODEL, ID: model["id"], NAME: model[BAC_NAME], DESC: model["description"], MODEL: column_model, BULK_ADD_MODEL_DATA: model}) return "Model and columns added, " + str(new_type_count) + " semantic types created, " + \ str(existed_type_count) + " semantic types already existed, " + \ str(new_column_count) + " columns created, and " + \ str(existed_column_count) + " columns already existed.", 201 def bulk_add_models_delete(self, model_ids=None, model_names=None, model_desc=None): """ Delete all of the bulk add models which fit the given parameters :param model_ids: The possible ids of the models to delete :param model_names: The possible names of the models to delete :param model_desc: The possible descriptions of the models to delete :return: The amount of models deleted with a 200 if successful, otherwise an error message with the appropriate code """ db_body = {DATA_TYPE: DATA_TYPE_MODEL} if model_ids is not None: db_body[ID] = {"$in": model_ids} if model_names is not None: db_body[NAME] = {"$in": model_names} if model_desc is not None: db_body[MODEL_DESC] = model_desc deleted_count = self.db.delete_many(db_body).deleted_count if deleted_count < 1: return "No models were found with the given parameters", 404 return str(deleted_count) + " models deleted successfully", 200 ################ BulkAddModelData ################ def bulk_add_model_data_get(self, model_id, crunch_data): """ Returns the current state of the bulk add model :param model_id: The id of the model to get :param crunch_data: False if learnedSemanticTypes should not be generated and the version in the db should be used instead, note that the data in the db is updated every time a get is run with crunch_data=true :return: The current state of the bulk add model """ db_result = list(self.db.find({DATA_TYPE: DATA_TYPE_MODEL, ID: model_id})) if len(db_result) < 1: return "A model was not found with the given id", 404 if len(db_result) > 1: return "More than one model was found with the given id", 500 db_result = db_result[0] return json_response( self._update_bulk_add_model(db_result[BULK_ADD_MODEL_DATA], db_result[MODEL]) if crunch_data else db_result[ BULK_ADD_MODEL_DATA], 200) def bulk_add_model_data_post(self, model_id, column_model, data): """ Add data to the service with a bulk add model :param model_id: The id of the model to add off of :param column_model: The model of the columns being used with that model :param data: The list of dictionaries with all of the data to add :return: A conformation message with a 201 if it was successful, otherwise an error message with the appropriate code """ # Get the model and parse the json lines model = list(self.db.find({DATA_TYPE: DATA_TYPE_MODEL, ID: model_id})) if len(model) < 1: return "The given model was not found", 404 if len(model) > 1: return "More than one model was found with the id", 500 model = model[0][BULK_ADD_MODEL_DATA] # Get all of the data in each column for n in model[BAC_GRAPH][BAC_NODES]: column_data = [] for line in data: if n.get(BAC_COLUMN_NAME): column_data.append(line[n[BAC_COLUMN_NAME]]) # Add it to the db if n.get(BAC_USER_SEMANTIC_TYPES): for ust in n[BAC_USER_SEMANTIC_TYPES]: result = self.semantic_types_column_data_post_put( get_column_id(get_type_id(ust[BAC_CLASS][BAC_URI], ust[BAC_PROPERTY][BAC_URI]), n[BAC_COLUMN_NAME], model[BAC_NAME], column_model), column_data, False)[1] if result == 201: continue elif result == 404: return "A required column was not found", 404 else: return "Error occurred while adding data to the column", 500 return "Data successfully added to columns", 201
class Model: STR_FORMAT = 'Name: {} {} Phone: {}' STR_ID_FORMAT = 'ID - {} Name: {} {} Phone: {}' def __init__(self, _data_manager=None): self.data_manager = _data_manager self.database_conn = MongoClient( document_class=OrderedDict).phonebook.contacts def create(self, first_name, last_name, phone_number): if self.check_fields(first_name, last_name, phone_number): return self.check_fields(first_name, last_name, phone_number) if self.contact_check(first_name, last_name, phone_number): return self.contact_check(first_name, last_name, phone_number) self.database_conn.insert( OrderedDict([('_id', self.create_id()), ('first_name', first_name), ('last_name', last_name), ('phone_number', phone_number)])) return 'Contact was successfully created.' def read(self, first_name='', last_name='', phone_number=''): if not (first_name or last_name or phone_number): res = self.database_conn.find() else: res = self.database_conn.find({ '$or': [{ 'first_name': first_name }, { 'last_name': last_name }, { 'phone_number': phone_number }] }) return list(res) if res.count() > 0 else 'Nothing found.' def update(self, _id, first_name, last_name, phone_number): if not isinstance(_id, int) or self.database_conn.find({ "_id": _id }).count() == 0: return 'Wrong ID.' if self.check_fields(first_name, last_name, phone_number): return self.check_fields(first_name, last_name, phone_number) self.database_conn.update_one({'_id': int(_id)}, { '$set': { 'first_name': first_name, 'last_name': last_name, 'phone_number': phone_number } }) return 'Contact was successfully updated.' def delete(self, _id): if not isinstance(_id, int) or self.database_conn.find({ "_id": _id }).count() == 0: return 'Wrong ID.' self.database_conn.delete_one({"_id": int(_id)}) return "Contact was successfully deleted." def contact_check(self, first_name, last_name, phone_number): if self.database_conn.find({ "first_name": first_name, "last_name": last_name, "phone_number": phone_number }).count() > 0: return 'This contact is already in Phone Book.' @staticmethod def check_fields(first_name, last_name, phone_number): if not (first_name and last_name and phone_number): return 'All fields must be filled.' if not first_name.isalpha() or not last_name.isalpha(): return 'Name must be a string.' if not phone_number.isdigit(): return 'Phone number must be an integer.' def create_id(self): try: return [x['_id'] for x in list(self.database_conn.find())].pop() + 1 except IndexError: return 1 @staticmethod def select_id(selected_id, search_res): get_ids = [x['_id'] for x in search_res] if int(selected_id) in get_ids: return int(selected_id) else: return 'ID "{}" is not in the search result.'.format(selected_id) def get_csv_str_format(self): res = 'First name;Last name;Phone number\n' for contact in self.database_conn.find({}, {'_id': False}): res += '{}\n'.format(';'.join(contact.values())) return res def get_txt_str_format(self): res = '' for contact in self.database_conn.find({}, {'_id': False}): res += '{}\n'.format(' '.join(contact.values())) return res
class MongodbUtil(object): """ - .bashrc 또는 .bashprofile 에 MYSQL_PASSWD 를 설정해야 함. """ def __init__(self, mongo_url, db_name, collection_name, auto_connect=False): """ :param mongo_url: host, port, username, password, auth db :param db_name: database name :param collection_name: collection name :param auto_connect: default do not connect for multiprocessing (http://api.mongodb.com/python/current/faq.html#using-pymongo-with-multiprocessing) """ self.mongo_url = mongo_url self.db_name = db_name self.collection_name = collection_name self.auto_connect = auto_connect self.collection = MongoClient(mongo_url, socketKeepAlive=True, connect=auto_connect)[db_name][collection_name] def __repr__(self): return '%s (db_name:%s, collection_name:%s, auto_connect:%s)' % ( StringUtil.mask_passwd_in_url(self.mongo_url), self.db_name, self.collection_name, self.auto_connect) def __str__(self): return self.__repr__() def find(self, query=None, sort=None, limit=0): if query is None: query = {} if sort is None: sort = [('_id', ASCENDING)] for row in self.collection.find(query, no_cursor_timeout=True).sort(sort).limit(limit): yield row def count(self, query=None): if query is None: query = {} return self.collection.count(query, no_cursor_timeout=True) def find_one(self, query: dict, limit=0) -> dict: return self.collection.find_one(query, no_cursor_timeout=True).limit(limit) def create_index(self, field_list=None, unique=False): if field_list is None: field_list = [] for field in field_list: self.collection.create_index([(field, ASCENDING)], background=True, unique=unique) return def insert(self, row: dict): return self.collection.insert_one(row) def update_one(self, where_query: dict, update_content: dict, upsert=False): return self.collection.update_one( where_query, update_content, upsert=upsert ) def update(self, where_query: dict, update_content: dict, upsert=False): return self.collection.update_many( where_query, update_content, upsert=upsert ) def save(self, row): return self.collection.save(row) def delete(self, where_query: dict): result = self.collection.delete_one(where_query) if result: return result.deleted_count return 0 def drop(self): return self.collection.drop()
from pymongo import MongoClient import pprint from pymongo import ASCENDING db = MongoClient().get_database("DATA").get_collection("Twitter_Breixt_9month") cur = db.aggregate([{ "$group": { "_id": { "id": "$id" }, "uniqueIds": { "$addToSet": "$_id" }, "count": { "$sum": 1 } } }, { "$match": { "count": { "$gt": 1 } } }], allowDiskUse=True) duplicateIds = list(cur) pprint.pprint(duplicateIds) raw_input("Any button to remove") for doc in duplicateIds: index = 1 print doc["uniqueIds"] while index < doc["uniqueIds"].length: db.delete_one(doc["uniqueIds"][index]) index += 1 print index print print db.createIndex({"id":ASCENDING},unique=True) print "Done"
class Server(object): def __init__(self): self.db = MongoClient().data.service self.classifier = MyRandomForest({}, {}, DATA_MODEL_PATH) self.classifier.train([]) ################ Stuff for use in this file ################ def _create_column(self, column, type_id, column_name, source_name, model, force=False): """ Create a column in a semantic type and return the column's id if it was created successfully. Notes: If the column already exists and force is not set to true, a 409 will be returned and no data will be modified. :param type_id: Id of the semantic type this column belongs to :param column_name: Name of the column to be created :param source_name: Name of the source of the column to be created :param model: Model of the column to be created :param data: Data which will be added to the column on creation :param force: Force create the column, if this is true and the column exists the old column will be deleted (with all of its data) before creation :return: The id of the new column and a response code of 201 if the creation was successful, otherwise it will be an error message with the appropriate error code """ column_id = get_column_id(type_id, column_name, source_name, model) db_body = { ID: column_id, DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id, COLUMN_NAME: column_name, SOURCE_NAME: source_name, MODEL: model } if self.db.find_one(db_body): if force: self.db.delete_many(db_body) else: return "Column already exists", 409 db_body.update(column.to_json()) self.db.insert_one(db_body) return column_id, 201 def _predict_column(self, column_name, source_names, data): """ Predicts the semantic type of a column. :param column_name: Name of the column :param source_names: List of source names :param data: The data to predict based opon :return: A list of dictionaries which each contain the semantic type and confidence score """ att = Column(column_name, source_names[0]) # print(data) for value in data: att.add_value(value) att.semantic_type = "to_predict" att.prepare_data() return att.predict_type( searcher.search_types_data(INDEX_NAME, source_names), searcher.search_similar_text_data(INDEX_NAME, att.value_text, source_names), self.classifier) def _update_bulk_add_model(self, model, column_model): """ Updates the bulk add model in the db and also returns it. :param model: The current bulk add model :param column_model: The model of the columns which are being updated against :return: The updated bulk add model """ for n in model[BAC_GRAPH][BAC_NODES]: if n.get(BAC_COLUMN_NAME): if n[BAC_COLUMN_NAME] == BAC_COLUMN_NAME_FILE_NAME: continue column_id = get_column_id( get_type_id( n[BAC_USER_SEMANTIC_TYPES][0][BAC_CLASS][BAC_URI], n[BAC_USER_SEMANTIC_TYPES][0][BAC_PROPERTY][BAC_URI]), n[BAC_COLUMN_NAME], model[BAC_NAME], column_model) prediction = self._predict_column( n[BAC_COLUMN_NAME], [model[BAC_NAME]], self.db.find_one({ DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id })[DATA]) n[BAC_LEARNED_SEMANTIC_TYPES] = [] for t in prediction: type_info = decode_type_id(t[SL_SEMANTIC_TYPE]) od = collections.OrderedDict() od[BAC_CLASS] = {BAC_URI: type_info[0]} od[BAC_PROPERTY] = {BAC_URI: type_info[1]} od[BAC_CONFIDENCE_SCORE] = t[SL_CONFIDENCE_SCORE] n[BAC_LEARNED_SEMANTIC_TYPES].append(od) self.db.update_one({ DATA_TYPE: DATA_TYPE_MODEL, ID: model[BAC_ID] }, {"$set": { BULK_ADD_MODEL_DATA: model }}) return model ################ Predict ################ def predict_post(self, data, namespaces=None, column_names=None, source_names=None, models=None): """ Predicts the semantic type of the given data. :param namespaces: List of allowed namespaces :param column_names: List of allowed column names :param source_names: List of allowed source names :param models: List of allowed column models :param data: List of the data values to predict. :return: A return message (if it is successful this will be a list of the predicted types) and a return code """ data = [x.strip() for x in data] data = [x for x in data if x] if not data: return "Predicting data cannot be empty", 500 if source_names is None: # If no source names are given just use all of the source names in the db source_names = set() for col in self.db.find({DATA_TYPE: DATA_TYPE_COLUMN}): source_names.add(col[SOURCE_NAME]) source_names = list(source_names) if len(source_names) < 1: return "You must have columns to be able to predict", 400 #### Predict the types ## Do the actual predicting using the semantic labeler predictions = self._predict_column(column_names[0], source_names, data) if len(predictions) < 1: return "No matches found", 404 ## Filter the results allowed_ids_namespaces = None allowed_ids_models = None all_allowed_ids = None if namespaces is not None: allowed_ids_namespaces = set() current_allowed_types = list( self.db.find({ DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE, NAMESPACE: { "$in": namespaces } })) for prediction in current_allowed_types: allowed_ids_namespaces.add(prediction[ID]) if models: allowed_ids_models = set() current_allowed_types = list( self.db.find({ DATA_TYPE: DATA_TYPE_COLUMN, MODEL: { "$in": models } })) for c in current_allowed_types: allowed_ids_models.add(c[TYPE_ID]) if allowed_ids_namespaces is not None and allowed_ids_models is not None: all_allowed_ids = allowed_ids_namespaces & allowed_ids_models elif allowed_ids_namespaces is not None and allowed_ids_models is None: all_allowed_ids = allowed_ids_namespaces elif allowed_ids_namespaces is None and allowed_ids_models is not None: all_allowed_ids = allowed_ids_models return_body = [] for prediction in predictions: print(prediction) for type_id, exact_score in prediction[1]: if all_allowed_ids is not None: if prediction[SL_SEMANTIC_TYPE] not in all_allowed_ids: continue obj_dict = {TYPE_ID_PATH: type_id, SCORE: exact_score} type_class_property = decode_type_id(type_id) obj_dict[CLASS] = type_class_property[0] obj_dict[PROPERTY] = type_class_property[1] return_body.append(obj_dict) return_body.sort(key=lambda x: x[SCORE], reverse=True) return json_response(return_body, 200) ################ SemanticTypes ################ def semantic_types_get(self, class_=None, property_=None, namespaces=None, source_names=None, column_names=None, column_ids=None, models=None, return_columns=False, return_column_data=False): """ Returns all of the semantic types (and optionally their columns and columns' data) filtered by the given parameters. :param class_: The class of the semantic types to get :param property_: The property of the semantic types to get :param namespaces: The possible namespaces of the semantic types to get :param source_names: The possible source names of at least one column of a semantic type must have :param column_names: The possible column names of at least one column of a semantic type must have :param column_ids: The possible column ids of at least one column of a semantic type must have :param models: The possible column model of at least one column of a semantic type must have :param return_columns: True if all of the columns (but not the data in the columns) should be returned with the semantic types :param return_column_data: True if all of the columns and their data should be returned with the semantic types :return: All of the semantic types which fit the following parameters """ # Find all of the type ids that satisfy the class, property, and namespaces db_body = {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE} if class_ is not None: db_body[CLASS] = class_ if property_ is not None: db_body[PROPERTY] = property_ if namespaces is not None: db_body[NAMESPACE] = {"$in": namespaces} possible_result = list(self.db.find(db_body)) possible_type_ids = set() for t in possible_result: possible_type_ids.add(t[ID]) # Find all of the type ids from the columns which satisfy the other parameters if source_names or column_names or column_ids or models: db_body = {DATA_TYPE: DATA_TYPE_COLUMN} if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names} if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names} if column_ids is not None: db_body[ID] = {"$in": column_ids} if models is not None: db_body[MODEL] = {"$in": models} other_possible_ids = set() for col in self.db.find(db_body): other_possible_ids.add(col[TYPE_ID]) possible_type_ids = possible_type_ids & other_possible_ids # Construct the return body return_body = [] for t in possible_result: if t[ID] in possible_type_ids: o = collections.OrderedDict() o[TYPE_ID_PATH] = t[ID] o[CLASS] = t[CLASS] o[PROPERTY] = t[PROPERTY] o[NAMESPACE] = t[NAMESPACE] return_body.append(o) # Add the column data if requested if return_columns: db_body = {DATA_TYPE: DATA_TYPE_COLUMN} for type_ in return_body: db_body[TYPE_ID] = type_[TYPE_ID_PATH] type_[COLUMNS] = clean_columns_output(self.db.find(db_body), return_column_data) if len(return_body) < 1: return "No Semantic types matching the given parameters were found", 404 return json_response(return_body, 200) def semantic_types_post_put(self, class_, property_, force=False): """ Creates a semantic type and returns the id if it was successful. Notes: If the type already exists and force is not set to true a 409 will be returned and no data will be modified :param class_: The class of the semantic type, note that this must be a valid URL :param property_: The property of the semantic type :param force: Force create the semantic type, if this is true and the type already exists the existing type (and all of its columns and data) will be deleted before creation :return: The id of the new semantic type and a response code of 201 if the creation was successful, otherwise it will be an error message with the appropriate error code """ class_ = class_.rstrip("/") property_ = property_.rstrip("/") ## Verify that class is a valid uri and namespace is a valid uri namespace = "/".join(class_.replace("#", "/").split("/")[:-1]) ## Actually add the type type_id = get_type_id(class_, property_) db_body = { ID: type_id, DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE, CLASS: class_, PROPERTY: property_, NAMESPACE: namespace } if self.db.find_one(db_body): if force: self.db.delete_many({ DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id }) self.db.delete_many(db_body) else: return type_id, 409 self.db.insert_one(db_body) return type_id, 201 def semantic_types_delete(self, class_=None, property_=None, type_ids=None, namespaces=None, source_names=None, column_names=None, column_ids=None, models=None, delete_all=False): """ Deletes all of the semantic types (and all of their columns/data) that fit the given parameters. :param class_: The class of the semantic types to delete :param property_: The property of the semantic types to delete :param type_ids: The possible ids of the semantic types to delete :param namespaces: The possible namespaces of the semantic types to delete :param source_names: The possible source names of at least one column of a semantic type must have :param column_names: The possible column names of at least one column of a semantic type must have :param column_ids: The possible column ids of at least one column of a semantic type must have :param models: The possible column model of at least one column of a semantic type must have :param delete_all: Set this to true if all semantic types should be deleted :return: The amount of semantic types deleted and a 200 if it worked, otherwise and error message with the appropriate code """ if class_ is None and property_ is None and type_ids is None and namespaces is None and source_names is None and column_names is None and column_ids is None and models is None and not delete_all: return "To delete all semantic types give deleteAll as true", 400 return "All " + str( self.db.delete_many({ DATA_TYPE: { "$in": [DATA_TYPE_SEMANTIC_TYPE, DATA_TYPE_COLUMN] } }).deleted_count ) + " semantic types and their data were deleted", 200 print str(class_) + " " + str(property_) + " " + str( type_ids) + " " + str(namespaces) + " " + str( source_names) + " " + str(column_names) + " " + str( column_ids) + " " + str(models) + " " + str(delete_all) # Find the parent semantic types and everything below them of everything which meets column requirements type_ids_to_delete = [] db_body = {DATA_TYPE: DATA_TYPE_COLUMN} db_body_id = {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE} if type_ids is not None: db_body[TYPE_ID] = {"$in": type_ids} db_body_id[ID] = {"$in": type_ids} if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names} if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names} if column_ids is not None: db_body[COLUMN_ID_PATH] = {"$in": column_ids} if models is not None: db_body[MODEL] = {"$in": models} for col in self.db.find(db_body): print "col[TYPE_ID] = " + str(col[TYPE_ID]) if col[TYPE_ID] not in type_ids_to_delete: type_ids_to_delete.append(col[TYPE_ID]) for col in self.db.find(db_body_id): print "col[ID] = " + str(col[ID]) if col[ID] not in type_ids_to_delete: type_ids_to_delete.append(col[ID]) # Find the semantic types which meet the other requirements and delete all types which need to be possible_types = [] db_body = {DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE} if class_ is not None: db_body[CLASS] = class_ if property_ is not None: db_body[PROPERTY] = property_ if namespaces is not None: db_body[NAMESPACE] = {"$in": namespaces} if type_ids is None and source_names is None and column_names is None and column_ids is None and models is None: deleted = self.db.delete_many(db_body).deleted_count else: for t in self.db.find(db_body): if t[ID] not in possible_types: possible_types.append(t[ID]) for t in self.db.find(db_body_id): if t[ID] not in possible_types: possible_types.append(t[ID]) for id_ in type_ids_to_delete: if id_ not in possible_types: type_ids_to_delete.remove(id_) db_body = { DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: { "$in": type_ids_to_delete } } self.db.delete_many(db_body) deleted = self.db.delete_many({ DATA_TYPE: DATA_TYPE_SEMANTIC_TYPE, ID: { "$in": type_ids_to_delete } }).deleted_count if deleted < 1: return "No semantic types with the given parameters were found", 404 return str( deleted ) + " semantic types matched parameters and were deleted", 200 ################ SemanticTypesColumns ################ def semantic_types_columns_get(self, type_id, column_ids=None, column_names=None, source_names=None, models=None, return_column_data=False): """ Returns all of the columns in a semantic type that fit the given parameters. :param type_id: The id of the semantic type :param column_ids: The possible ids of the columns to be returned :param column_names: The possible names of the columns to be returned :param source_names: The possible source names of the columns to be returned :param models: The possible models of the columns to be returned :param return_column_data: True if all of the data in the column should be returned with the columns :return: All of the columns in the semantic type that fit the given parameters """ print(type_id) db_body = {DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id} if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names} if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names} if column_ids is not None: db_body[ID] = {"$in": column_ids} if models is not None: db_body[MODEL] = {"$in": models} result = list(self.db.find(db_body)) if len(result) < 1: return "No columns matching the given parameters were found", 404 return json_response(clean_columns_output(result, return_column_data), 200) def semantic_types_columns_post_put(self, type_id, column_name, source_name, model, data=[], force=False): """ Create a column in a semantic type, optionally with data. :param type_id: Id of the semantic type to create the column in :param column_name: The name of the column to be created :param source_name: The name of the source of the column to be created :param model: The model of the column to be created :param data: The (optional) list of data to put into the column on creation :param force: True if the column should be replaced if it already exists :return: The id of the newly created with a 201 if it was successful, otherwise an error message with the appropriate error code """ column = Column(column_name, source_name) column.semantic_type = type_id #if the size of the training data is MORE than a threshold value, then sample the threshold values randomly if (len(data) > SAMPLE_SIZE): data = random.sample(data, SAMPLE_SIZE) for value in data: column.add_value(value) result = self._create_column(column, type_id, column_name, source_name, model, force) return result def semantic_types_columns_delete(self, type_id, column_ids=None, column_names=None, source_names=None, models=None): """ Delete all of the columns in a semantic type that match the given parameters. :param type_id: The id of the semantic type to delete the columns from :param column_ids: The possible ids of the columns to delete :param source_names: The possible names of the columns to delete :param column_names: The possible source names of the columns to delete :param models: The possible models of the columns to delete :return: The number of columns deteled with a 200 if successful, otherwise an error message with an appropriate error code """ db_body = {DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: type_id} if source_names is not None: db_body[SOURCE_NAME] = {"$in": source_names} if column_names is not None: db_body[COLUMN_NAME] = {"$in": column_names} if column_ids is not None: db_body[ID] = {"$in": column_ids} if models is not None: db_body[MODEL] = {"$in": models} found_columns = list(self.db.find(db_body)) if len(found_columns) < 1: return "No columns were found with the given parameters", 404 return str(self.db.delete_many( db_body).deleted_count) + " columns deleted successfully", 200 ################ SemanticTypesColumnData ################ def semantic_types_column_data_get(self, column_id): """ Returns all of the data in the column :param column_id: Id of the column to get the data from :return: The column and all of its info """ result = list( self.db.find({ DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id })) if len(result) < 1: return "No column with that id was found", 404 if len(result) > 1: return "More than one column was found with that id", 500 return json_response(clean_column_output(result[0]), 200) def semantic_types_column_data_post_put(self, column_id, body, force=False): """ Add or replace data on an existing column Notes: If the column does not exist a 404 will be returned :param column_id: Id of the column to add/replace the data of :param body: An array of the new data :param force: True if the current data in the column should be replaced, false if the new data should just be appended :return: A conformation with a 201 if it was added successfully or an error message with an appropriate error code if it was not successful """ column_data = self.db.find_one({ DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id }) if column_data.matched_count < 1: return "No column with that id was found", 404 if column_data.matched_count > 1: return "More than one column was found with that id", 500 column = Column(column_data[COLUMN_NAME], column_data[SOURCE_NAME], get_type_from_column_id(column_id)) if not force: column.read_json_to_column(column_data) for value in body: column.add_value(value) data = column.to_json() self.db.update_many(data) return "Column data updated", 201 def semantic_types_column_data_delete(self, column_id): """ Delete the data from the column with the given id :param column_id: Id of the column to delete the data from :return: A deletion conformation with a 200 if successful, otherwise an error message with an appropriate error code """ result = self.db.update_many( { DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id }, {"$set": { DATA: [] }}) if result.matched_count < 1: return "No column with that id was found", 404 if result.matched_count > 1: return "More than one column was found with that id", 500 column = self.db.find_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id}) self.db.delete_one({ DATA_TYPE: DATA_TYPE_COLUMN, TYPE_ID: get_type_from_column_id(column_id) }) self.db.delete_one({DATA_TYPE: DATA_TYPE_COLUMN, ID: column_id}) return "Column data deleted", 200 ################ BulkAddModels ################ def bulk_add_models_get(self, model_ids=None, model_names=None, model_desc=None, show_all=False, crunch_data=True): """ Returns the current state of all of the bulk add models. :param model_ids: The possible ids of the models to get :param model_names: The possible names of the models to get :param model_desc: The possible descriptions of the models to get :param show_all: True if the whole model should be returned :param crunch_data: False if learnedSemanticTypes should not be generated and the version in the db should be used instead, note that the data in the db is updated every time a get is run with crunch_data=true :return: All of the models that fit the given parameters """ db_body = {DATA_TYPE: DATA_TYPE_MODEL} if model_ids is not None: db_body[ID] = {"$in": model_ids} if model_names is not None: db_body[NAME] = {"$in": model_names} if model_desc is not None: db_body[MODEL_DESC] = model_desc db_result = list(self.db.find(db_body)) if len(db_result) < 1: return "No models were found with the given parameters", 404 # Construct the return body return_body = [] for mod in db_result: o = collections.OrderedDict() o[MODEL_ID] = mod[ID] o[NAME] = mod[NAME] o[DESC] = mod[DESC] if show_all: o[MODEL] = self._update_bulk_add_model( mod[BULK_ADD_MODEL_DATA], mod[MODEL]) if crunch_data else mod[BULK_ADD_MODEL_DATA] return_body.append(o) return json_response(return_body, 200) def bulk_add_models_post(self, model, column_model=DEFAULT_BULK_MODEL): """ Add a bulk add model. :param column_model: The model that all of the created columns should have :param model: A dictionary of the model :return: Stats of the data added """ #### Assert the required elements exist if BAC_ID not in model: return "The given model must have an id", 400 if BAC_NAME not in model: return "The given model must have a name", 400 if BAC_DESC not in model: return "The given model must have a description", 400 if BAC_GRAPH not in model: return "The given model must have a graph", 400 if BAC_NODES not in model[BAC_GRAPH]: return "The given model must have nodes within the graph", 400 if len(list(self.db.find({ID: model[BAC_ID]}))) > 0: return "Model id already exists", 409 #### Parse and add the model # Try to add of the given semantic types and columns new_type_count = 0 new_column_count = 0 existed_type_count = 0 existed_column_count = 0 for n in model[BAC_GRAPH][BAC_NODES]: if n.get(BAC_USER_SEMANTIC_TYPES): for ust in n[BAC_USER_SEMANTIC_TYPES]: semantic_status = self.semantic_types_post_put( ust[BAC_CLASS][BAC_URI], ust[BAC_PROPERTY][BAC_URI], False) if semantic_status[1] == 201: new_type_count += 1 elif semantic_status[1] == 409: existed_type_count += 1 elif semantic_status[1] == 400: return semantic_status else: return "Error occurred while adding semantic type: " + str( ust), 500 column_status = self._create_column( get_type_id(ust[BAC_CLASS][BAC_URI], ust[BAC_PROPERTY][BAC_URI]), n[BAC_COLUMN_NAME], model[BAC_NAME], column_model) if column_status[1] == 201: new_column_count += 1 elif column_status[1] == 409: existed_column_count += 1 elif column_status[1] == 400: return column_status else: return "Error occurred while adding column for semantic type: " + str( ust), 500 # Nothing bad happened when creating the semantic types and columns, so add the model to the DB self.db.insert_one({ DATA_TYPE: DATA_TYPE_MODEL, ID: model["id"], NAME: model[BAC_NAME], DESC: model["description"], MODEL: column_model, BULK_ADD_MODEL_DATA: model }) return "Model and columns added, " + str(new_type_count) + " semantic types created, " + \ str(existed_type_count) + " semantic types already existed, " + \ str(new_column_count) + " columns created, and " + \ str(existed_column_count) + " columns already existed.", 201 def bulk_add_models_delete(self, model_ids=None, model_names=None, model_desc=None): """ Delete all of the bulk add models which fit the given parameters :param model_ids: The possible ids of the models to delete :param model_names: The possible names of the models to delete :param model_desc: The possible descriptions of the models to delete :return: The amount of models deleted with a 200 if successful, otherwise an error message with the appropriate code """ db_body = {DATA_TYPE: DATA_TYPE_MODEL} if model_ids is not None: db_body[ID] = {"$in": model_ids} if model_names is not None: db_body[NAME] = {"$in": model_names} if model_desc is not None: db_body[MODEL_DESC] = model_desc deleted_count = self.db.delete_many(db_body).deleted_count if deleted_count < 1: return "No models were found with the given parameters", 404 return str(deleted_count) + " models deleted successfully", 200 ################ BulkAddModelData ################ def bulk_add_model_data_get(self, model_id, crunch_data): """ Returns the current state of the bulk add model :param model_id: The id of the model to get :param crunch_data: False if learnedSemanticTypes should not be generated and the version in the db should be used instead, note that the data in the db is updated every time a get is run with crunch_data=true :return: The current state of the bulk add model """ db_result = list( self.db.find({ DATA_TYPE: DATA_TYPE_MODEL, ID: model_id })) if len(db_result) < 1: return "A model was not found with the given id", 404 if len(db_result) > 1: return "More than one model was found with the given id", 500 db_result = db_result[0] return json_response( self._update_bulk_add_model(db_result[BULK_ADD_MODEL_DATA], db_result[MODEL]) if crunch_data else db_result[BULK_ADD_MODEL_DATA], 200) def bulk_add_model_data_post(self, model_id, column_model, data): """ Add data to the service with a bulk add model :param model_id: The id of the model to add off of :param column_model: The model of the columns being used with that model :param data: The list of dictionaries with all of the data to add :return: A conformation message with a 201 if it was successful, otherwise an error message with the appropriate code """ # Get the model and parse the json lines model = list(self.db.find({DATA_TYPE: DATA_TYPE_MODEL, ID: model_id})) if len(model) < 1: return "The given model was not found", 404 if len(model) > 1: return "More than one model was found with the id", 500 model = model[0][BULK_ADD_MODEL_DATA] # Get all of the data in each column for n in model[BAC_GRAPH][BAC_NODES]: column_data = [] for line in data: if n.get(BAC_COLUMN_NAME): column_data.append(line[n[BAC_COLUMN_NAME]]) # Add it to the db if n.get(BAC_USER_SEMANTIC_TYPES): for ust in n[BAC_USER_SEMANTIC_TYPES]: result = self.semantic_types_column_data_post_put( get_column_id( get_type_id(ust[BAC_CLASS][BAC_URI], ust[BAC_PROPERTY][BAC_URI]), n[BAC_COLUMN_NAME], model[BAC_NAME], column_model), column_data, False)[1] if result == 201: continue elif result == 404: return "A required column was not found", 404 else: return "Error occurred while adding data to the column", 500 return "Data successfully added to columns", 201
class DataBase: def __init__(self, name: str, auth_data: dict): db_name = auth_data['questionnaire'][name] user = auth_data['user'] password = auth_data['password'] appeal = f'mongodb+srv://{user}:{password}@cluster0.sonqc.mongodb.net/{db_name}?retryWrites=true&w=majority' self.db = MongoClient(appeal)['questionnaire'][db_name] self.__lambda_fun() def __lambda_fun(self): self._get_last_id = lambda: len(list(self.db.find())) - 1 self.get_list_data = lambda: list(self.db.find()) self.get_questions_ids = lambda: [el['_id'] for el in self.get_list_data()] self.remove_all_data = lambda: self.db.delete_many({}) self.remove_questions = lambda *ids: [self.db.delete_one({'_id': id_}) for id_ in ids] self._remove_arg = lambda question_id, arg: self.db.update_one({ '_id': question_id }, { '$pull': {'answers': arg} }) def add(self, data: str, question_id: int = None): if question_id is None: last_id = self._get_last_id() post = {'_id': last_id + 1, 'question': data, 'answers': []} self.db.insert_one(post) return self.db.update_one({'_id': question_id}, {'$push': {'answers': data}}) def show_all(self, file_name: str): rows = self.get_list_data() columns = [*rows[0].keys()] if rows else [] with open(f'{file_name}.csv', 'w', newline='') as file: writer = csv.DictWriter(file, delimiter=';', fieldnames=columns) writer.writeheader() writer.writerows(rows) def show_ans(self, question_id: int): file_name = str(question_id) columns = ['answer_id', 'answer'] rows = enumerate(self.db.find_one({'_id': question_id})['answers']) with open(f'{file_name}.csv', 'w', newline='') as file: writer = csv.writer(file, delimiter=';') writer.writerow(columns) writer.writerows(rows) def remove_answers(self, question_id: int, *ids: int): questions = self.get_list_data() for question in questions: if question['_id'] == question_id: answers = question['answers'] break else: return for id_ in ids: answer = answers[id_] self._remove_arg(question_id, answer)
class Stat: ''' This class is used for 1. Putting stat_list corresponsing to a specific timestamp 2. Saving in mongoDB ''' def __init__(self,config_file): self.docker = dockerGuest(config_file) self.config_file = config_file self.collection = MongoClient()['test'][self.get_collection()] # connecting to mongodb def get_collection(self): ''' Getting mongodb collection(table) name from config file (This method might be removed later... ''' dir_path = os.path.dirname(os.path.realpath(__file__)) file_path = os.path.join(dir_path,self.config_file) return json.load(open(file_path))["collection"] def get_config_file(self): return self.config_file @staticmethod def get_time(): '''Getting current timestamp Storing python datetime object in mongodb. This might be changed later. ''' time = datetime.now() return time def data_to_save(self): ''' Formats data to be saved in mongodb. JSON: { time: timestamp, stats: [{container1 stat },{ container2 stat } ...] } ''' stats = self.docker.get_stats() if stats == None: # if no stat that is there are no containers return None return None global_stat_dict ={} global_stat_dict["time"]=Stat.get_time() global_stat_dict["stats"] = stats return global_stat_dict def save(self,cap=60): ''' Saving in DB''' new_doc = self.data_to_save() if self.collection.count() == cap: ''' If there are 60 items in db we delete the oldest timestamp data from db and insert the new item. We cannot simply delete an item because insertion in mongodb collection is found out to be random. ''' top_doc_time = min(doc['time'] for doc in self.collection.find()) #oldest timestamp. Simple if datetime objects are stored. self.collection.delete_one({'time':top_doc_time}) #delete oldest timestamp logger.info("Deleted timestamp is...{}".format(top_doc_time)) self.collection.insert_one(new_doc) #insert new data logger.info("Saved in DB...{}".format(new_doc["time"])) def save_data(self): ''' method not used ''' data = self.data_to_save() if data!=None: if self.is_db_full(): self.make_space_db() self.collection.insert_one(data) logger.info('Saved in DB...')
from pymongo import MongoClient from mymodules import cleanings coll = MongoClient().exams['minable'] for doc in coll.find(): cleaned = False old_doc = doc for key in list(doc.keys()): if doc[key] == 'n.c.': del doc[key] cleaned = True if cleaned: coll.delete_one(old_doc) coll.insert_one(doc) cleanings.FinalCleaner(coll).clean([], [], ['Insegnamento']) MongoClient().exams['rawStudentsPr1013'].drop() MongoClient().exams['teachEval_aggr'].drop() MongoClient().exams['sprod'].drop()
class Database: def __init__(self): self.connect_url = "mongodb://{}:{}@{}:{}/".format( USERNAME, PASSWORD, MONGODB_HOST, MONGODB_PORT) self.client = None def connect(self): try: self.client = MongoClient(self.connect_url) self.client = self.client[DATABASE_NAME] # Selecting DB self.client = self.client[COLLECTION_NAME] # Selecting Collection return [True, "Success"] except errors.ServerSelectionTimeoutError: return [False, "Failed to Connect DB"] except errors.ConfigurationError: return [False, "Configurarion Error"] except errors.ConnectionFailure: return [False, "Connection Failure"] def list_documents(self): try: cursor = self.client.find({}, {'_id': False}) return [True, "Success", cursor] except Exception: return [False, "Internal Error"] def create_document(self, document): try: self.client.insert_one(document) return [True, "Success"] except errors.DuplicateKeyError: return [False, "The config name is already exist"] def get_document(self, doc): try: cursor = self.client.find_one({"name": doc}, {'_id': False}) if cursor is not None: return [True, "Success", cursor] else: return [False, "No document found"] except Exception as e: return [False, "Internal Error"] def update_document(self, config_name, data): try: result = self.client.replace_one({"name": config_name}, { "name": config_name, "data": data }) if result.acknowledged: return [True, "Success", result] else: raise Exception except Exception: return [False, "Internal Error"] def purge_document(self, document): try: cursor = self.client.delete_one({"name": document}) return [True, "Success", cursor.deleted_count] except Exception: return [False, "Internal Error"] def query(self, key, val, config_name=None): if config_name is None: query = '[{{"$match": {{}}}}, {{"$unwind":"$data"}}, {{"$match":{{"data.{0}": "{1}"}}}}]' replaced_query = json.loads(query.format(key, val)) else: query = '[{{"$match": {{"name": "{0}"}}}}, {{"$unwind":"$data"}}, {{"$match":{{"data.{1}": "{2}"}}}}]' replaced_query = json.loads(query.format(config_name, key, val)) try: cursor = self.client.aggregate(replaced_query) if cursor is not None: return [True, "Success", cursor] else: return [False, "No document found"] except Exception as e: return [False, str(e)]
class betCog(commands.Cog): global db def __init__(self, client): self.client = client self.mclient = os.environ.get('mclient') self.db = MongoClient(self.mclient).Aeee.game @commands.command(name='betStart') async def betStart(self, ctx): Vrole = discord.utils.get(ctx.message.guild.roles, name="Власть") Prole = discord.utils.get(ctx.message.guild.roles, name="парламентъ") if Vrole in ctx.message.author.roles or Prole in ctx.message.author.roles: betID = ctx.message.content.split(' ')[1] betSubj = ctx.message.content.split(' ')[2] betResults = ctx.message.content.split(' ')[3] splittedResult = betResults.split('/') dbInsBody = {'betid': betID, 'bets': {'default': splittedResult}} self.db.insert_one(dbInsBody) await ctx.send( f'Принимаются ставки на {betSubj}, по исходам: {betResults}, айди: {betID}.\n*для участия пропишите $bet {betID} %ставка% %исход%.*' ) @commands.command(name='betEnd') async def betEnd(self, ctx): Vrole = discord.utils.get(ctx.message.guild.roles, name="Власть") Prole = discord.utils.get(ctx.message.guild.roles, name="парламентъ") if Vrole in ctx.message.author.roles or Prole in ctx.message.author.roles: betID = ctx.message.content.split(' ')[1] betResults = ctx.message.content.split(' ')[2] bets = self.db.find_one({'betid': betID})['bets'] wValue, lValue = 0, 0 for bet in bets: if betResults in bets[bet] and bet != 'default': uI = dataBase.UI(bet) dataBase.addExp(uI[1], round(int(bets[bet][0]) * 1.5), bet) wValue += int(bets[bet][0]) elif bet != 'default': uID, uExp, Ulvl, uAlias = dataBase.UI(bet) dataBase.addExp(uExp, round(-int(bets[bet][0]) * 1.5), bet) lvl = int(uExp**(1 / 4)) if lvl < Ulvl: dataBase.removeLvl(uID, lvl) lValue += int(bets[bet][0]) self.db.delete_one({'betid': betID}) await ctx.send( f'победа исхода: {betResults} id: {betID}\n*выиграно {wValue}, проиграно {lValue}*' ) @commands.command(name='bet') async def bet(self, ctx): betID = ctx.message.content.split(' ')[1] betValue = ctx.message.content.split(' ')[2] betResult = ctx.message.content.split(' ')[3] if str(ctx.message.author.id) not in self.db.find_one({ 'betid': betID })['bets'] and betResult.lower() in self.db.find_one( {'betid': betID})['bets']['default']: self.db.update({'betid': betID}, { '$set': { f'bets.{ctx.message.author.id}': (betValue, betResult) } }, multi=False) await ctx.send(f'вы поставили {betValue} на {betResult}') else: await ctx.send( f'произошла ошибка.\nвозможно вы уже сделали ставку или некорректен исход' )
class FSMongoClient(object): """ This class manages the communications with the remote database, which stores the file system. Object Properties: self.fs_collection: stores a reference to the MongoDB collection in which all the files are stored self.root_id: stores the ObjectId of the document representing the root """ def __init__(self,url,port): # retrieve the collection FUSEPY_FS from the FS_DB database self.fs_collection = MongoClient(url,port).FS_DB.FUSEPY_FS fs_root = self.fs_collection.find_one({"name": '/'}) if fs_root: # File system root exists already print 'Root exists. loading existing root...' self.root_id = fs_root['_id'] else: # no root is defined yet, so insert it. print 'No root exists. Creating a new root...' now = time() meta_data = dict(st_mode=(S_IFDIR | 0755), st_ctime=now, st_mtime=now, st_atime=now, st_nlink=2) fs_root = dict(name='/',type='dir',meta=meta_data,data={}) self.root_id = self.fs_collection.insert_one(fs_root).inserted_id @staticmethod def _encode_dict(data_dict): # used to overcome MongoDB's inability to store keys that contain . or start with $ encoded_dict = {} for name, _id in data_dict.items(): encoded_name = '_'.join([str(format(ord(x),'x')) for x in name]) encoded_dict[encoded_name] = _id return encoded_dict @staticmethod def _decode_dict(data_dict): # used to overcome MongoDB's inability to store keys that contain . or start with $ decoded_dict = {} for name, _id in data_dict.items(): decoded_name = ''.join([chr(int(x,16)) for x in name.split('_')]) decoded_dict[decoded_name] = _id return decoded_dict def id_lookup(self,file_id): # Retrieve a file from the DB using its _id. the _id must be an object of type ObjectId assert type(file_id) == ObjectId file_dict = self.fs_collection.find_one({'_id': file_id}) if file_dict['type'] == 'dir': file_dict['data'] = self._decode_dict(file_dict['data']) return file_dict def insert_file(self,new_file_dict): # Insert a file to the DB. all the file contents should be in new_file_dict. # This method will modify new_file_dict provided to # include a new property '_id' since the dict now represent a file that was inserted into the database. assert {'name','type','meta','data'} == set(new_file_dict.keys()) self.fs_collection.insert_one(new_file_dict) def update_file(self,file_id,field_to_update,field_content): # Update a certain file's property. the property must be one of the following: # name, type, meta, data. field_content is the new value of the file property assert field_to_update in ['name','type','meta','data'] #assert type(field_content) = type(dict) if field_to_update == 'data' and type(field_content) == dict: # means we have a dir field_content = self._encode_dict(field_content) elif field_to_update == 'name': field_content = '_'.join([str(format(ord(x),'x')) for x in field_content]) self.fs_collection.update_one({"_id":file_id}, {"$set": {field_to_update:field_content}}) def remove_file(self,file_id): # Remove a file from the DB by supplying its _id assert type(file_id) == ObjectId self.fs_collection.delete_one({'_id' : file_id}) def print_db(self): # used for debugging print "DATABASE CONTENT\n\t BEGIN DB LIST:" for index, document in enumerate(self.fs_collection.find()): print "------------ {0} -----------".format(index) print "_id: ",document['_id'] print "name: ",document['name'] print "type: ",document['type'] print "data: ",document['data'] print "\t END DB LIST"
label = db.find_one({"entities.name": argv[2]}, { "label": 1, "_id": 0 })['label'] delete_entity(argv[2]) if not list( db.aggregate([{ '$match': { 'label': label } }, { '$project': { 'label': 1, 'size': { '$size': '$entities' } } }]))[0]['size']: db.delete_one({'label': label}) print(argv[2] + " deleted.") elif argv[1] in ["--add-note", "-an"]: ensure_argc(4) ensure_internet_connection() ensure_entity(argv[2]) db.update_one({"entities.name": argv[2]}, {'$set': { 'entities.$.note': argv[3] }}) print("Note added.")
class mongodb(): """ { "push":'collection.update_one(query,{"$push":{f"{locator}":value }})', # Array operation only "pull":'collection.update_one(query,{"$pull":{f"{locator}":value }})', # Array operation only "unset":'collection.update_one(query,{"$unset":{f"{locator}":value }})', # Object/Dictonary operation only "pop":'collection.update_one(query,{"$pop":{f"{locator}":value }})', "set":'collection.update_one(query,{"$set":{f"{locator}":value }})', "set_all":'collection.update_one(query,{"$set":value})', "push_all":'collection.update_one(query,{"$push":value})', "add":'collection.insert_one(value)', "get":'collection.find_one(query)', "filter":'collection.find(query)', "search":'collection.find({"$text": {"$search": query}}).limit(limit_)', "delete":'collection.delete_one(query)' } """ def __init__(self, collection="raw", use=False): self.collection_name = collection self.use = use if self.use: self.credential = MongoCredential.objects.get(status=True, use=self.use) elif self.collection_name: self.credential = MongoCredential.objects.get( status=True, collection=self.collection_name) else: print("Provide atleast collection name or use for mongo operation") self.credential = MongoCredential.objects.find(status=True)[0] self.collection = MongoClient(self.credential.uri)[self.credential.db][ self.credential.collection] def push(self, query, **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs} self.collection.update_one(query, {"$push": kwargs}) def pull(self, query, **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs} self.collection.update_one(query, {"$pull": kwargs}) def unset(self, query, **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs} self.collection.update_one(query, {"$unset": kwargs}) def pop(self, query, **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs} self.collection.update_one(query, {"$pop": kwargs}) def set(self, query, **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs} self.collection.update_one(query, {"$set": kwargs}) def push_all(self, query, value={}, **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs.items()} value.update(kwargs) self.collection.update_one(query, {"$push": value}) def set_all(self, query, value={}, **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs.items()} value.update(kwargs) self.collection.update_one(query, {"$set": value}) def add(self, value={}, **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs.items()} if kwargs: value.update(kwargs) self.collection.insert_one(value) def delete(self, query={}, **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs.items()} query.update(kwargs) self.collection.delete_one(query) def get(self, query={}, sort=[], **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs.items()} query.update(kwargs) return self.collection.find_one(query, sort=sort) def filter(self, query={}, sort=[], **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs.items()} query.update(kwargs) return self.collection.find(query, sort=sort) def search(self, query, limit=10): return self.collection.find({"$text": {"$search": query}}).limit(limit) def search_index(self, search_index=[]): """ e.g. : search_index = [("name","text"),("description","text")] """ self.collection.create_index(search_index)
class MongoDatabase(Database): """ This class implements the abstract class Database and communicates with the MongoDB database. It has several methods for this communication. """ def __init__(self, collection): self._devices = MongoClient()["Hestia"][collection] def get_all_devices(self): """Instantiates all devices in database""" devices = [] for data in self._devices.find(): _id = data["_id"] device = self._get_class(data["module"], data["class"])(self, _id) devices.append(device) return devices def get_device(self, device_id): """Instantiates the device with the given device_id""" data = self.__get_device_data(device_id) device = self._get_class(data["module"], data["class"]) return device(self, device_id) def add_device(self, plugin): """Adds the given plugin info as a new device""" plugin["_id"] = str(ObjectId()) self._devices.insert_one(plugin) def delete_device(self, device_id): self._devices.delete_one({"_id": device_id}) def update_field(self, device_id, field, new_value): self._devices.find_one_and_update({"_id": device_id}, {"$set": { field: new_value }}) def get_field(self, device_id, field): data = self.__get_device_data(device_id) return data[field] def get_activator_field(self, device_id, activator_id, field): data = self.__get_device_data(device_id) activator = self.__get_activator(data, activator_id) return activator[field] def update_activator_field(self, device_id, activator_id, field, new_value): self._devices.find_one_and_update( {"_id": device_id}, {"$set": { "activators." + activator_id + "." + field: new_value }}) def delete_all_devices(self): self._devices.delete_many({}) def __get_device_data(self, device_id): """Get data of device based on its id""" data = self._devices.find_one(device_id) if data is None: raise NotFoundException("device") else: return data @staticmethod def __get_activator(data, activator_id): try: return data["activators"][activator_id] except KeyError as exception: raise NotFoundException("activator")