def clean_feature(self): a_node = FNode(core_docs, [init_fnodes[key]], SimpleDummy(), init_lnode) fc = FConnector(matched=[]) a_doc = fc.collect_doc(a_node) if a_doc: f_id = a_doc["_id"] ft_id = a_doc["essentials"]["f_transform"] f_collection = connect_collection(bucket, project, "Feature") f_collection.delete_one({"_id": f_id}) ft_collection = connect_collection(bucket, project, "FTransform") ft_collection.delete_one({"_id": ft_id}) filepaths = a_doc["filepaths"] if filepaths: for path in filepaths: if "home" in path: _path = [path[k] for k in path] fpath = "/".join(_path + ["Feature", str(f_id) + ".pkl"]) ftpath = "/".join(_path + ["FTransform", str(ft_id) + ".pkl"]) os.remove(fpath) os.remove(ftpath) elif "bucket" in path: _path = [path[k] for k in path] fpath = "/".join(_path + ["Feature", str(f_id) + ".pkl"]) ftpath = "/".join(_path + ["FTransform", str(ft_id) + ".pkl"]) raise NotImplementedError else: raise ValueError("Path unknown") return a_node
def insert_tag(obj, doc): if not isinstance(doc, dict): raise TypeError( "The new tag(s) should be encoded into a dictionary.") if not obj.obj_id: raise AttributeError( "The obj passed has no obj_id attribute, can't find the document." ) try: db_location = obj.db except AttributeError: raise AttributeError( "The obj passed has no db attribute, can't find the location of the document." ) try: element = obj.decide_element() except AttributeError: msg = "The object passed has no decide_element method. Is this object originally designed to be tracked?" raise AttributeError(msg) target_db = connect_collection(db_location["host"], db_location["project"], element) qry = deepcopy(doc) qry = {"$set": qry} target_db.update_one({"_id": obj.obj_id}, qry, upsert=True)
def insert_subdoc(obj, field, subdoc): if not isinstance(subdoc, dict): raise TypeError( "The new updating query should be encoded into a dictionary.") if not obj.obj_id: raise AttributeError( "The obj passed has no obj_id attribute, can't find the document." ) try: db_location = obj.db except AttributeError: raise AttributeError( "The obj passed has no db attribute, can't find the location of the document." ) try: element = obj.decide_element() except AttributeError: msg = "The object passed has no decide_element method. Is this object originally designed to be tracked?" raise AttributeError(msg) if not isinstance(field, str): raise ValueError("A field has to be a string") target_db = connect_collection(db_location["host"], db_location["project"], element) subdoc = deepcopy(subdoc) qry = {"$push": {field: subdoc}} target_db.update_one({"_id": obj.obj_id}, qry, upsert=True)
def init_doc(self, obj): """ The "essentials" attribute of an obj would be used to identify the obj from the db. :param obj: :return: """ try: obj.essentials except AttributeError: raise AttributeError( "An object to be saved in db is supposed to have the essentials attribute" ) if obj.essentials is None: raise AttributeError( "An object to be saved in db should not have NoneType as its essentials" ) print("Saving this object into db: {}".format(type(obj))) start = datetime.now() essen = self.mongo_doc_generator(obj.essentials) document = { "essentials": essen, 'datetime': start, 'filepaths': obj.filepaths } db_location = obj.db element = obj.decide_element() target_db = connect_collection(db_location["host"], db_location["project"], element) doc_created = target_db.insert_one(document) return doc_created.inserted_id
def delete_by_lst_obj_id(lst_obj_id, element, db): host = db["host"] project = db["project"] target_collection = connect_collection(host=host, database=project, collection=element) target_collection.delete_many({"_id": {"$in": lst_obj_id}})
def search_obj_by_tag(tag, element, db): host = db["host"] project = db["project"] target_collection = connect_collection(host=host, database=project, collection=element) qry = {"tag": tag} result = target_collection.find_one(qry) return result
def search_by_obj_id(obj_id, element, db): host = db["host"] project = db["project"] target_collection = connect_collection(host=host, database=project, collection=element) qry = {"_id": obj_id} result = target_collection.find_one(qry) return result
def insert_subdoc_by_id(obj_id, element, db, field, subdoc): if not isinstance(subdoc, dict): raise TypeError( "The new updating query should be encoded into a dictionary.") if not isinstance(field, str): raise ValueError("A field has to be a string") target_db = connect_collection(db["host"], db["project"], element) subdoc = deepcopy(subdoc) qry = {"$push": {field: subdoc}} target_db.update_one({"_id": obj_id}, qry, upsert=True)
def search_by_essentials(self, obj, db): host = db["host"] project = db["project"] element = obj.decide_element() target_collection = connect_collection(host=host, database=project, collection=element) essen = deepcopy(obj.essentials) essen = self.mongo_doc_generator(essen) qry = {} for key in essen: qry["essentials.{}".format(key)] = essen[key] # TODO: to avoid essentials have more keys the essen, the below might be useful: # $where: function() { return Object.keys(this.essentials).length === len(essen) } result = list(target_collection.find(qry)) return result
def init_doc(self, obj, update_dict=True): """ The "essentials" attribute of an obj would be used to identify the obj from the db. :param obj: :param update_dict: bool. If the training is documented in a dictionary locally, this allow users to decide if the pickled documents are to be updated in this function call. :return: """ try: obj.essentials except AttributeError: raise AttributeError( "An object to be saved in db is supposed to have the essentials attribute" ) if obj.essentials is None: raise AttributeError( "An object to be saved in db should not have NoneType as its essentials" ) print("Saving this object into db: {}".format(type(obj))) start = datetime.now() essen = self.mongo_doc_generator(obj.essentials) document = { "essentials": essen, 'datetime': start, 'filepaths': obj.filepaths } db_location = obj.db element = obj.decide_element() host = db_location["host"] project = db_location["project"] target_db = connect_collection(host, project, element) doc_created = target_db.insert_one(document) inserted_id = doc_created.inserted_id return inserted_id