def remove(self): file = utility.File(self.__getSummaryDocsPath()) file.remove() file = utility.File(self.__getVocabPath()) file.remove() self.metaFile.remove() return
def test(): #run with this command : python ObQuery.py employee.json /personalRecord #build the Json Obj tree here #obj_tree = PyObjTree(utl.LoadJSON(path=sys.argv[1]).get_json()).get_root() obj_tree = PyObjTree( utl.LoadJSON(str=utl.File(sys.argv[1]).read()).get_json()).get_root() #apply labels to obj tree #obj_tree = Policy.NodeLabeling(obj_tree,label_file="path_label_policy.json").appy_labels() obj_tree = Policy.NodeLabeling( obj_tree, label_str=utl.File("path_label_policy.json").read()).appy_labels() #print utl.pretty_print (obj_tree.print_json()) #return #querying against give path #print id(obj_tree) path = sys.argv[2] nh = NodeHierarchy() nh.insert("private", "public") nh.insert("protected", "private") oq = ObQuery(obj_tree) #print oq.query_on_condition( {'path':'name', 'op':'=', 'value':'Alice'}, obj_tree) #return #qry = ["/","/personalRecord","/","/personalRecord/identification"] qry = [path] for q in qry: #print "path is {}".format(q) if len(sys.argv) >= 4: res = oq.ac_query(q, nh, sys.argv[3]) else: res = oq.query(q) # we need to iterate through the res. there can be more than one result. #print (res) for r in res: #print r if type(r) is dict: (k, v) = r.items()[0] print utl.pretty_print(v.print_json()) pass elif isinstance(r, PyJSOb): #print r.print_json() print utl.pretty_print(r.print_json()) else: print r
def read(self, fileName): identifier = fileName[:-8] #print('identifier:', identifier) details = super().read(fileName) if not details: return details filePath = os.path.join(self.datasetPath, (identifier + '.json')) fileHandler = utility.File(filePath) otherDetails = fileHandler.read() if otherDetails: otherDetails = json.loads(otherDetails) details['url'] = otherDetails['Url'] details['categories'] = otherDetails['Categories'] if 'Title' in otherDetails.keys(): details['title'] = self._clean(otherDetails['Title']) details['text'] = self._clean( otherDetails['Title']) + '. ' + details['text'] if 'Date' in otherDetails.keys(): details['date'] = otherDetails['Date'] details['timestamp'] = int( datetime.datetime.strptime(otherDetails['Date'], '%Y-%m-%d').strftime("%s")) return details
def _loadNumpy(self, fileName, path): filePath = self._getFilePath(fileName, path) file = utility.File(filePath) if not file.exists(): return None return np.load(filePath)
def read(self, fileName): filePath = os.path.join(self.datasetPath, (fileName + '.txt')) fileHandler = utility.File(filePath) details = {} details['filename'] = fileName details['text'] = self._clean(fileHandler.read()) details['timestamp'] = int(datetime.datetime.now().strftime("%s")) return details
def remove(self): super().remove() file = utility.File(self._getDocLcsPath()) file.remove() csvFile = self.getFile('cwr_gc.csv') csvFile.remove() self.metaFile.remove() return
def test(): #run with this command : python ObQuery.py employee.json /personalRecord #build the Json Obj tree here #obj_tree = PyObjTree(utl.LoadJSON(path=sys.argv[1]).get_json()).get_root() obj_tree = PyObjTree( utl.LoadJSON(str=utl.File(sys.argv[1]).read()).get_json()).get_root() #apply labels to obj tree #obj_tree = Policy.NodeLabeling(obj_tree,label_file="path_label_policy.json").appy_labels() obj_tree = Policy.NodeLabeling( obj_tree, label_str=utl.File("path_label_policy.json").read()).appy_labels() path = sys.argv[2] nh = NodeHierarchy() nh.insert("private", "public") nh.insert("protected", "private") oq = ObQuery(obj_tree) qry = [path] for q in qry: if len(sys.argv) >= 4: res = oq.ac_query(q, nh, sys.argv[3]) else: res = oq.query(q) # we need to iterate through the res. there can be more than one result. for r in res: #print r if type(r) is dict: (k, v) = r.items()[0] print utl.pretty_print(v.print_json()) pass elif isinstance(r, PyJSOb): #print r.print_json() print utl.pretty_print(r.print_json()) else: print r
def __saveSparseCsr(self, vectors): filePath = self._getFilePath('word_cooccurence.npz') file = utility.File(filePath) file.remove() np.savez(filePath, data=vectors.data, indices=vectors.indices, indptr=vectors.indptr, shape=vectors.shape) return
def generate_with_template(template): bigfile = "" for i in range(1, NO_OF_ITER): emp_no = "employee_" + str(i) opt_separator = "" if i == 1 else "," bigfile += opt_separator + "\"{}\":{}".format( emp_no, utl.File(template).read()) return "{" + bigfile + "}"
def __loadSparseCsr(self): filePath = self._getFilePath('word_cooccurence.npz') file = utility.File(filePath) if (not file.exists()): return None loader = np.load(filePath) if ((loader['shape'][0] != loader['shape'][1])): return None return csr_matrix( (loader['data'], loader['indices'], loader['indptr']), shape=loader['shape'])
def apply(self): # ---- Build JSON tree -------# if self.content_file: obj_tree = PyObjTree( utl.LoadJSON(path=self.content_file).get_json()).get_root() elif self.content_str: obj_tree = PyObjTree( utl.LoadJSON(str=self.content_str).get_json()).get_root() # ------- Label JSON tree with Policy -------# if self.policy_file: obj_tree = Policy.NodeLabeling( obj_tree, label_str=utl.File(self.policy_file).read()).appy_labels() elif self.policy_str: obj_tree = Policy.NodeLabeling( obj_tree, label_str=self.policy_str).appy_labels() # ---- get uer Hierarchy ------# hierarchy = UserHierarchy().get_hierarchy() # --- Work with Query class for Querying --- # oq = ObQuery(obj_tree) self.JSONPath = self.JSONPath if self.JSONPath else '/' qry = [self.JSONPath] for q in qry: if self.user_clearance: # if user_clearance is given, user hiearchy is used. res = oq.ac_query(q, hierarchy, self.user_clearance) else: # if user clearance is not given just use the JSONPath to query. res = oq.query(q) # we need to iterate through the res. there can be more than one result. for r in res: if type(r) is dict: (k, v) = r.items()[0] return utl.pretty_print(v.print_json()) pass elif isinstance(r, PyJSOb): #print r.print_json() return utl.pretty_print(r.print_json()) else: return r
def test_cbac(): user_labels = '[{"dominates": ["employee"], "name": "manager"}, {"dominates": ["stuff"], "name": "employee"}]' object_labels = '[{"dominates": ["public"], "name": "protected"}, {"dominates": ["public"], "name": "private"}]' json_policy = '{"read": [{"user_label": "manager", "object_label": "protected"}, {"user_label": "employee", "object_label": "private"}]}' object_labelling = '[{"target": "/", "label": "protected"}, {"target": "/personalabelRecord", "label": "public"}]' cbac_policy = {} cbac_policy['user_labels'] = json.loads(user_labels) cbac_policy['object_labels'] = json.loads(object_labels) cbac_policy['policy'] = json.loads(json_policy) user_clearance = ['employee'] jsonpath = "/" if True: if json_policy and user_clearance and jsonpath and object_labelling: filtered_content = ContentFilter(content_str= utl.File('employee.json').read(),\ labeling_policy_str=object_labelling, \ user_clearance=user_clearance, query=jsonpath, \ cbac_policy=cbac_policy).apply() print ">>", filtered_content
params = scriptParams.get() scriptParams.save(params.data_directory) logging.info("# 2. Preprocessing data") logging.info("# ================================") dataset = Dataset(params.dataset_name, params.data_directory) datasetToProcess = dataset.get(float(params.dataset_percentage), int(params.total_items)) if not datasetToProcess: logging.error('No dataset found') sys.exit() logging.info("# 3. Generating points") logging.info("# ================================") wordProcessor = corpus.RelativeCWR(datasetToProcess, params) points = wordProcessor.getContext(params.word) print(points) logging.info("# 4. Display points") logging.info("# ================================") imageDirectory = os.path.join(params.plot_directory, params.dataset_name) plotter = corpus.Plotter(points) filePath = os.path.join(imageDirectory, params.word + '_cwr_gc_plot.png') file = utility.File(filePath) file.remove() plotter.displayPlot(filePath) print('Finished')
def getFile(self, filename, writeHeader = True): path = os.path.join(self.path, filename) file = utility.File(path, writeHeader) return file
def _saveNumpy(self, fileName, data, path): filePath = self._getFilePath(fileName, path) file = utility.File(filePath) file.remove() np.savez(filePath, data) return
def getFile(self, prefix=''): path = os.path.join(self.path, prefix + 'vocab.csv') file = utility.File(path) return file
def apply(self): # ---- Build JSON tree -------# print "flg0" if self.content_file: obj_tree = PyObjTree( utl.LoadJSON(path=self.content_file).get_json()).get_root() elif self.content_str: obj_tree = PyObjTree( utl.LoadJSON(str=self.content_str).get_json()).get_root() # ------- Label JSON tree with object labels -------# if self.policy_file: obj_tree = Policy.NodeLabeling( obj_tree, label_str=utl.File(self.policy_file).read()).appy_labels() elif self.policy_str: print self.policy_str obj_tree = Policy.NodeLabeling( obj_tree, label_str=self.policy_str).appy_labels() # ---- get uer Hierarchy ------# #hierarchy = UserHierarchy().get_hierarchy() print "flg1" #print self.cbac_policy '''cbac_policy was a dictionary, converting it to string by json.dumps then again returning it to json format by json.loads''' print "1.2" print self.cbac_policy self.cbac_policy = json.dumps(self.cbac_policy) self.cbac_json = json.loads(self.cbac_policy) print self.cbac_json cbac = CBAC.get(file_in_json=self.cbac_json) print "flg1.5" # --- Work with Query class for Querying --- # oq = ObQuery(obj_tree) print "flg2" self.JSONPath = self.JSONPath if self.JSONPath else '/' qry = [self.JSONPath] for q in qry: if self.user_clearance: # if user_clearance is given, user hiearchy is used. res = oq.ac_query(q, cbac, self.user_clearance) else: # if user clearance is not given just use the JSONPath to query. res = oq.query(q) # we need to iterate through the res. there can be more than one result. for r in res: if type(r) is dict: (k, v) = r.items()[0] return utl.pretty_print(v.print_json()) pass elif isinstance(r, PyJSOb): #print r.print_json() return utl.pretty_print(r.print_json()) else: return r
def remove(self): file = utility.File(self._getPath()) file.remove() return
def remove(self): file = utility.File(self._getPointsPath()) file.remove() csvFile = self.getFile(self.prefix + 'cwr_gc.csv') file.remove() return
def _saveInPickel(self, filePath, model): file = utility.File(filePath) if file.exists(): file.remove() pickle.dump(model, open(filePath, 'wb')) return
def remove(self): file = utility.File(os.path.join(self.path, 'embedding_vecs.tsv')) file.remove() file = utility.File(os.path.join(self.path, 'embedding_meta.tsv')) file.remove() return
def _getFromPickel(self, filePath): file = utility.File(filePath) if file.exists(): return pickle.load(open(filePath, 'rb')); return None