Пример #1
0
 def remove(self):
     file = utility.File(self.__getSummaryDocsPath())
     file.remove()
     file = utility.File(self.__getVocabPath())
     file.remove()
     self.metaFile.remove()
     return
Пример #2
0
def test():

    #run with this command :  python ObQuery.py employee.json /personalRecord

    #build the Json Obj tree here
    #obj_tree = PyObjTree(utl.LoadJSON(path=sys.argv[1]).get_json()).get_root()
    obj_tree = PyObjTree(
        utl.LoadJSON(str=utl.File(sys.argv[1]).read()).get_json()).get_root()
    #apply labels to obj tree
    #obj_tree = Policy.NodeLabeling(obj_tree,label_file="path_label_policy.json").appy_labels()

    obj_tree = Policy.NodeLabeling(
        obj_tree,
        label_str=utl.File("path_label_policy.json").read()).appy_labels()

    #print utl.pretty_print (obj_tree.print_json())
    #return
    #querying against give path

    #print id(obj_tree)
    path = sys.argv[2]

    nh = NodeHierarchy()
    nh.insert("private", "public")
    nh.insert("protected", "private")

    oq = ObQuery(obj_tree)

    #print oq.query_on_condition( {'path':'name', 'op':'=', 'value':'Alice'}, obj_tree)
    #return
    #qry = ["/","/personalRecord","/","/personalRecord/identification"]
    qry = [path]

    for q in qry:
        #print "path is {}".format(q)

        if len(sys.argv) >= 4:
            res = oq.ac_query(q, nh, sys.argv[3])
        else:
            res = oq.query(q)
        # we need to iterate through the res. there can be more than one result.
        #print (res)
        for r in res:
            #print r
            if type(r) is dict:
                (k, v) = r.items()[0]
                print utl.pretty_print(v.print_json())
                pass
            elif isinstance(r, PyJSOb):
                #print r.print_json()
                print utl.pretty_print(r.print_json())
            else:
                print r
Пример #3
0
    def read(self, fileName):
        identifier = fileName[:-8]
        #print('identifier:', identifier)
        details = super().read(fileName)
        if not details:
            return details

        filePath = os.path.join(self.datasetPath, (identifier + '.json'))
        fileHandler = utility.File(filePath)
        otherDetails = fileHandler.read()
        if otherDetails:
            otherDetails = json.loads(otherDetails)
            details['url'] = otherDetails['Url']
            details['categories'] = otherDetails['Categories']
            if 'Title' in otherDetails.keys():
                details['title'] = self._clean(otherDetails['Title'])
                details['text'] = self._clean(
                    otherDetails['Title']) + '. ' + details['text']

            if 'Date' in otherDetails.keys():
                details['date'] = otherDetails['Date']
                details['timestamp'] = int(
                    datetime.datetime.strptime(otherDetails['Date'],
                                               '%Y-%m-%d').strftime("%s"))

        return details
Пример #4
0
    def _loadNumpy(self, fileName, path):
        filePath = self._getFilePath(fileName, path)
        file = utility.File(filePath)
        if not file.exists():
            return None

        return np.load(filePath)
Пример #5
0
 def read(self, fileName):
     filePath = os.path.join(self.datasetPath, (fileName + '.txt'))
     fileHandler = utility.File(filePath)
     details = {}
     details['filename'] = fileName
     details['text'] = self._clean(fileHandler.read())
     details['timestamp'] = int(datetime.datetime.now().strftime("%s"))
     return details
Пример #6
0
 def remove(self):
     super().remove()
     file = utility.File(self._getDocLcsPath())
     file.remove()
     csvFile = self.getFile('cwr_gc.csv')
     csvFile.remove()
     self.metaFile.remove()
     return
Пример #7
0
def test():

    #run with this command :  python ObQuery.py employee.json /personalRecord

    #build the Json Obj tree here
    #obj_tree = PyObjTree(utl.LoadJSON(path=sys.argv[1]).get_json()).get_root()
    obj_tree = PyObjTree(
        utl.LoadJSON(str=utl.File(sys.argv[1]).read()).get_json()).get_root()
    #apply labels to obj tree
    #obj_tree = Policy.NodeLabeling(obj_tree,label_file="path_label_policy.json").appy_labels()

    obj_tree = Policy.NodeLabeling(
        obj_tree,
        label_str=utl.File("path_label_policy.json").read()).appy_labels()

    path = sys.argv[2]

    nh = NodeHierarchy()
    nh.insert("private", "public")
    nh.insert("protected", "private")

    oq = ObQuery(obj_tree)

    qry = [path]

    for q in qry:

        if len(sys.argv) >= 4:
            res = oq.ac_query(q, nh, sys.argv[3])
        else:
            res = oq.query(q)
        # we need to iterate through the res. there can be more than one result.
        for r in res:
            #print r
            if type(r) is dict:
                (k, v) = r.items()[0]
                print utl.pretty_print(v.print_json())
                pass
            elif isinstance(r, PyJSOb):
                #print r.print_json()
                print utl.pretty_print(r.print_json())
            else:
                print r
Пример #8
0
 def __saveSparseCsr(self, vectors):
     filePath = self._getFilePath('word_cooccurence.npz')
     file = utility.File(filePath)
     file.remove()
     np.savez(filePath,
              data=vectors.data,
              indices=vectors.indices,
              indptr=vectors.indptr,
              shape=vectors.shape)
     return
Пример #9
0
def generate_with_template(template):
    bigfile = ""
    for i in range(1, NO_OF_ITER):
        emp_no = "employee_" + str(i)
        opt_separator = "" if i == 1 else ","
        bigfile += opt_separator + "\"{}\":{}".format(
            emp_no,
            utl.File(template).read())

    return "{" + bigfile + "}"
Пример #10
0
    def __loadSparseCsr(self):
        filePath = self._getFilePath('word_cooccurence.npz')
        file = utility.File(filePath)
        if (not file.exists()):
            return None

        loader = np.load(filePath)
        if ((loader['shape'][0] != loader['shape'][1])):
            return None

        return csr_matrix(
            (loader['data'], loader['indices'], loader['indptr']),
            shape=loader['shape'])
Пример #11
0
    def apply(self):
        # ---- Build JSON tree -------#
        if self.content_file:
            obj_tree = PyObjTree(
                utl.LoadJSON(path=self.content_file).get_json()).get_root()
        elif self.content_str:
            obj_tree = PyObjTree(
                utl.LoadJSON(str=self.content_str).get_json()).get_root()
        # ------- Label JSON tree with Policy -------#
        if self.policy_file:
            obj_tree = Policy.NodeLabeling(
                obj_tree,
                label_str=utl.File(self.policy_file).read()).appy_labels()
        elif self.policy_str:
            obj_tree = Policy.NodeLabeling(
                obj_tree, label_str=self.policy_str).appy_labels()
        # ---- get uer Hierarchy ------#
        hierarchy = UserHierarchy().get_hierarchy()

        # --- Work with Query class for Querying --- #
        oq = ObQuery(obj_tree)

        self.JSONPath = self.JSONPath if self.JSONPath else '/'
        qry = [self.JSONPath]

        for q in qry:
            if self.user_clearance:  # if user_clearance is given, user hiearchy is used.
                res = oq.ac_query(q, hierarchy, self.user_clearance)
            else:  # if user clearance is not given just use the JSONPath to query.
                res = oq.query(q)
            # we need to iterate through the res. there can be more than one result.
            for r in res:
                if type(r) is dict:
                    (k, v) = r.items()[0]
                    return utl.pretty_print(v.print_json())
                    pass
                elif isinstance(r, PyJSOb):
                    #print r.print_json()
                    return utl.pretty_print(r.print_json())
                else:
                    return r
Пример #12
0
def test_cbac():

    user_labels = '[{"dominates": ["employee"], "name": "manager"}, {"dominates": ["stuff"], "name": "employee"}]'
    object_labels = '[{"dominates": ["public"], "name": "protected"}, {"dominates": ["public"], "name": "private"}]'
    json_policy = '{"read": [{"user_label": "manager", "object_label": "protected"}, {"user_label": "employee", "object_label": "private"}]}'
    object_labelling = '[{"target": "/", "label": "protected"}, {"target": "/personalabelRecord", "label": "public"}]'

    cbac_policy = {}
    cbac_policy['user_labels'] = json.loads(user_labels)
    cbac_policy['object_labels'] = json.loads(object_labels)
    cbac_policy['policy'] = json.loads(json_policy)

    user_clearance = ['employee']
    jsonpath = "/"

    if True:
        if json_policy and user_clearance and jsonpath and object_labelling:
            filtered_content = ContentFilter(content_str= utl.File('employee.json').read(),\
            labeling_policy_str=object_labelling, \
            user_clearance=user_clearance, query=jsonpath, \
            cbac_policy=cbac_policy).apply()

            print ">>", filtered_content
Пример #13
0
params = scriptParams.get()
scriptParams.save(params.data_directory)

logging.info("# 2. Preprocessing data")
logging.info("# ================================")

dataset = Dataset(params.dataset_name, params.data_directory)
datasetToProcess = dataset.get(float(params.dataset_percentage), int(params.total_items))

if not datasetToProcess:
    logging.error('No dataset found')
    sys.exit()

logging.info("# 3. Generating points")
logging.info("# ================================")
wordProcessor = corpus.RelativeCWR(datasetToProcess, params)
points = wordProcessor.getContext(params.word)
print(points)

logging.info("# 4. Display points")
logging.info("# ================================")
imageDirectory = os.path.join(params.plot_directory, params.dataset_name)
plotter = corpus.Plotter(points)
filePath = os.path.join(imageDirectory, params.word + '_cwr_gc_plot.png')
file = utility.File(filePath)
file.remove()
plotter.displayPlot(filePath)
print('Finished')


Пример #14
0
 def getFile(self, filename, writeHeader = True):
     path = os.path.join(self.path, filename)
     file = utility.File(path, writeHeader)
     return file
Пример #15
0
 def _saveNumpy(self, fileName, data, path):
     filePath = self._getFilePath(fileName, path)
     file = utility.File(filePath)
     file.remove()
     np.savez(filePath, data)
     return
Пример #16
0
 def getFile(self, prefix=''):
     path = os.path.join(self.path, prefix + 'vocab.csv')
     file = utility.File(path)
     return file
Пример #17
0
    def apply(self):

        # ---- Build JSON tree -------#

        print "flg0"
        if self.content_file:
            obj_tree = PyObjTree(
                utl.LoadJSON(path=self.content_file).get_json()).get_root()
        elif self.content_str:
            obj_tree = PyObjTree(
                utl.LoadJSON(str=self.content_str).get_json()).get_root()
        # ------- Label JSON tree with object labels -------#
        if self.policy_file:
            obj_tree = Policy.NodeLabeling(
                obj_tree,
                label_str=utl.File(self.policy_file).read()).appy_labels()
        elif self.policy_str:
            print self.policy_str
            obj_tree = Policy.NodeLabeling(
                obj_tree, label_str=self.policy_str).appy_labels()
        # ---- get uer Hierarchy ------#
        #hierarchy = UserHierarchy().get_hierarchy()
        print "flg1"
        #print self.cbac_policy
        '''cbac_policy was a dictionary, converting it to string by json.dumps then again
		returning it to json format by json.loads'''
        print "1.2"
        print self.cbac_policy
        self.cbac_policy = json.dumps(self.cbac_policy)
        self.cbac_json = json.loads(self.cbac_policy)

        print self.cbac_json

        cbac = CBAC.get(file_in_json=self.cbac_json)

        print "flg1.5"

        # --- Work with Query class for Querying --- #
        oq = ObQuery(obj_tree)

        print "flg2"

        self.JSONPath = self.JSONPath if self.JSONPath else '/'
        qry = [self.JSONPath]

        for q in qry:
            if self.user_clearance:  # if user_clearance is given, user hiearchy is used.
                res = oq.ac_query(q, cbac, self.user_clearance)
            else:  # if user clearance is not given just use the JSONPath to query.
                res = oq.query(q)
            # we need to iterate through the res. there can be more than one result.
            for r in res:
                if type(r) is dict:
                    (k, v) = r.items()[0]
                    return utl.pretty_print(v.print_json())
                    pass
                elif isinstance(r, PyJSOb):
                    #print r.print_json()
                    return utl.pretty_print(r.print_json())
                else:
                    return r
Пример #18
0
 def remove(self):
     file = utility.File(self._getPath())
     file.remove()
     return
Пример #19
0
 def remove(self):
     file = utility.File(self._getPointsPath())
     file.remove()
     csvFile = self.getFile(self.prefix + 'cwr_gc.csv')
     file.remove()
     return
Пример #20
0
 def _saveInPickel(self, filePath, model):
     file = utility.File(filePath)
     if file.exists():
         file.remove()
     pickle.dump(model, open(filePath, 'wb'))
     return
Пример #21
0
 def remove(self):
     file = utility.File(os.path.join(self.path, 'embedding_vecs.tsv'))
     file.remove()
     file = utility.File(os.path.join(self.path, 'embedding_meta.tsv'))
     file.remove()
     return
Пример #22
0
 def _getFromPickel(self, filePath):
     file = utility.File(filePath)
     if file.exists():
         return pickle.load(open(filePath, 'rb'));
     return None