def boolean_queries_with_proximity(query,proximity):
    #preprocessing
    posting=Data.read_dataStruct_from_file(Contants.POSTING_LIST_FILE_NAME)    
    wordsIndex=Data.read_dataStruct_from_file(Contants.WORD_INDEX_FILE_NAME)
    
    setlist=[]
    for word in query.split():         
        word=Index.applyFilters(word)         
        if word in posting:            
            setlist.append(set(posting[word]))
        
    wordList= [ Index.applyFilters(word) for word in query.split() if Index.applyFilters(word) in posting]
    DocIDList=list(set.intersection(*setlist))
    
    answer=[]
    
    for word1 in wordList:
        wordList.remove(word1)
        for word2 in wordList:
            for DocID in DocIDList:
                for PosID1 in wordsIndex[word1][0][DocID]: 
                    for PosID2 in wordsIndex[word2][0][DocID]:
                        if abs(PosID1-PosID2)<=proximity:
                            if DocID not in answer:
                                answer.append(DocID)
        
    return list(answer)
Пример #2
0
def createPostingList(sortTepDic):

    posting = {}
    for key in sortTepDic.keys():
        posting[key] = [DocId for DocId in sortTepDic[key][0]]

    Data.write_dataStruct_to_file(Contants.POSTING_LIST_FILE_NAME, posting)
Пример #3
0
def createTermIndex():
    sortTepDic = SortedDict()
    # Structure for each term
    #   sortTepDic['term']=({'DocId1':['Pos1','Pos2'],'DocId2':['Pos1','Pos2']},'termFreq','DocFreq')

    for root, dirs, files in os.walk(Contants.DATA_DIRECTORY_NAME, topdown=True):
        for name in files:
            file_name = os.path.join(root, name)
            #         'r' when the file will only be read
            #         'w' for only writing (an existing file with the same name will be erased)
            #         'a' opens the file for appending; any data written to the file is automatically added to the end.
            #         'r+' opens the file for both reading and writing.

            mode = "r"
            file_object = open(file_name, mode)
            DocId = os.path.split(file_name)[1]

            wordPos = 0
            for word in file_object.read().split():

                wordPos = wordPos + 1  # increment word location
                lamma = applyFilters(word)

                if lamma:
                    if lamma not in sortTepDic:
                        sortTepDic[lamma] = [{DocId: [wordPos]}, 1, 1]  # add a new term

                    else:

                        sortTepDic[lamma][1] = sortTepDic[lamma][1] + 1  # increment the term frequency

                        if DocId in sortTepDic[lamma][0]:
                            sortTepDic[lamma][0][DocId].append(
                                wordPos
                            )  # add new word position for the existing document
                        else:
                            sortTepDic[lamma][0][DocId] = [wordPos]  # add a new document ID and he word position
                            sortTepDic[lamma][2] = sortTepDic[lamma][2] + 1  # increment the document frequecy

    # covert lists to tuples
    for key in sortTepDic.keys():
        for DocId in sortTepDic[key][0]:
            sortTepDic[key][0][DocId] = tuple(sortTepDic[key][0][DocId])
        sortTepDic[key] = tuple(sortTepDic[key])

    Data.write_dataStruct_to_file(Contants.WORD_INDEX_FILE_NAME, sortTepDic)
    createLexicon(sortTepDic)
    createPostingList(sortTepDic)
Пример #4
0
 async def ready(self):
     print("ready")
     activity = Data.get_activity(self, self.name)
     await self.set_presence(activity)
     chosen = Data.one_liner
     await self.send("online log",
                     chosen.replace("%", "Rich Uncle Pennybags"))
     return
Пример #5
0
 def test_compare(self):
     obj1 = Data("foo")
     obj2 = Data("foo")
     obj3 = Data("bar")
     obj4 = Data("bar")
     obj4.addAttribute(BoolAttribute("testAttr", True))
     self.assertEqual(obj1, obj2)
     self.assertNotEqual(obj1, obj3)
     self.assertNotEqual(obj3, obj4)
Пример #6
0
def run_whileloop(while_loop_body):
    code1_and_flag = take_tokens("WHILE", "REPEAT", while_loop_body)
    code2 = while_loop_body

    flag_value = TRUE
    while flag_value == TRUE:
        consume_tokens(copy(code1_and_flag))
        if Data.pop() == TRUE:
            consume_tokens(copy(code2))
        else:
            flag_value = FALSE
Пример #7
0
 async def ready(self):
     # a callback for when the bot is ready
     print("ready")
     # sets presence for the bot
     activity = Data.get_activity(self, self.name)
     await self.set_presence(activity)
     # announce thyself
     chosen = Data.one_liner
     # this starts to get annoying while testing
     await self.send("online log", chosen.replace("%", "Providence"))
     return
Пример #8
0
def run_misc_worker(*args, **kwargs):
    logger = config.get_logger(kwargs['log_path'], kwargs['name'])
    try:
        db = Data(logger, kwargs['redis_host'], kwargs['redis_port'],
                  kwargs['redis_db'])
        p = MiscWorker(logger, kwargs['name'], db, None, kwargs['config_path'])
        logger.info('Starting poller worker: {0}'.format(kwargs['name']))
        p.run(args, kwargs)
    except Exception as e:
        logger.error('ERROR: Exception in run_misc_worker: {0}\r\n{1}'.format(
            e, traceback.format_exc()))
def boolean_queries(query):
    #preprocessing
    posting=Data.read_dataStruct_from_file(Contants.POSTING_LIST_FILE_NAME)
    
    setlist=[]
    for word in query.split():         
        word=Index.applyFilters(word)
         
        if word in posting:
            setlist.append(set(posting[word]))
        
    answer=set.intersection(*setlist)
    
    return list(answer)
Пример #10
0
def wlidCard_queries_using_permuterm_index(query):
    
    posting=Data.read_dataStruct_from_file(Contants.POSTING_LIST_FILE_NAME)
    query=Permuterm.standardize_wildcard_query(query)
    
    DocList=[]
   
    for word in posting:
        permutermIndexes=Permuterm.create_permuterm_indexes(word)
        for permuterm in permutermIndexes:        
            if query in permuterm:
                DocList= DocList+ posting[word]
                break
                
    return set(DocList)
Пример #11
0
    def __init__(self, mode, params_dict, exps=None):
        self.exps = exps
        self.exp_param = ExpParam.Experiment_param()
        # self.results = Results.Results()
        self.measurements = {}
        self.data = Data.Data(self)

        self.navigation_chronograms = None
        self.time_zoom_chronograms = None
        self.mini_PCHs = None

        self.file_name = None
        self.comment = ""

        self.defaultBinSize_s = 0.01  # default : 10ms

        self.new_exp(mode, params_dict)
Пример #12
0
 async def send_bug_report(self, exc, **kwargs):  # sourcery skip
     """
     send a bug report to a channel\n
     :param exc: str, the reported exception
     :param kwargs: any arguments
     """
     # sourcery will be skipped bc this function will grow with more exceptions
     out = self.get_channel(Data.get_channel("bot bugs"))
     if exc == "MemberNotFound":
         msg = f"MemberNotFound: could not find member '{kwargs['name']}' " \
               f"in guild '{kwargs['guild']}'. Command was invoked by user {kwargs['author']}"
     elif exc == "KeyError":
         msg = f"KeyError: member {kwargs['name']} (id {kwargs['key']}) does not exist in {kwargs['data']}"
     else:
         raise ExceptionNotFound(exc)
     await out.send(msg)
     return
Пример #13
0
def boolean_queries_implement_using_lists(query):
    #preprocessing
    posting=Data.read_dataStruct_from_file(Contants.POSTING_LIST_FILE_NAME)
    
    p=[]
    for word in query.split():
         
        word=Index.applyFilters(word)
         
        if word in posting:
            p.append(posting[word])

    index1=0
    index2=0
    p1=p[0][index1]
    p2=p[1][index2]
     
    answer=[]
     
    while True:
        try:
             
            if p1 == p2:
                answer.append(p1)
                index1=index1+1
                index2=index2+1
                 
                p1=p[0][index1]
                p2=p[1][index2]
            elif p1<p2:
                index1=index1+1
                p1=p[0][index1]
            else:
                index2=index2+1
                p2=p[1][index2]
         
        except IndexError:
            break
    
    return answer
Пример #14
0
def trailing_wildCard_queries_using_tree(query):
    
    query=query[:-1]
    print query
    indexedWords=Data.read_dataStruct_from_file(Contants.WORD_INDEX_FILE_NAME)
    bt=BinaryTree.balancedTree(indexedWords)
    
    que=Queue()
    
    if bt.root.left:
        que.put(bt.root.left)
    if bt.root.left:
        que.put(bt.root.right)
    
    while not que.empty():
        node=que.get()
        if node:
            if query in node.value:
                BinaryTree.DepthFirstSearchPrintNode(node)
            else:
                if node.right:
                    que.put(node.left)
                if node.left:
                    que.put(node.right)    
Пример #15
0
'''
Created on Sep 13, 2015

@author: Sheece Gardezi
'''
from core import Index
from core import Data
from core import Contants
from core import BinaryTree
from core import Queries

if __name__ == '__main__':
    #     Index.createTermIndex()

    indexedWords = Data.read_dataStruct_from_file(
        Contants.WORD_INDEX_FILE_NAME)
    lexicons = Data.read_dataStruct_from_file(Contants.LEXICON_FILE_NAME)
    posting = Data.read_dataStruct_from_file(Contants.POSTING_LIST_FILE_NAME)

    bt = BinaryTree.balancedTree(indexedWords)
    #     BinaryTree.DepthFirstSearchPrintNodes(bt)
    #print(posting)

    # example usage
    query = 'four dell'
    proximity = 700
    print Queries.boolean_queries(query)
    print Queries.boolean_queries_implement_using_lists(query)
    print Queries.boolean_queries_with_proximity(query, proximity)
    query = 'bi*sh'
    print Queries.wlidCard_queries_using_permuterm_index(query)
Пример #16
0
def run_doloop(word_list):
    _from, _to = Data.pop(), Data.pop()

    for i in range(_from, _to):
        input_list = _resolve_iterator(i, copy(word_list))
        consume_tokens(input_list)
Пример #17
0
'''
Created on Sep 13, 2015

@author: Sheece Gardezi
'''
from core import Index
from core import Data
from core import Contants
from core import BinaryTree
from core import Queries

if __name__ == '__main__':
#     Index.createTermIndex()
     
    indexedWords=Data.read_dataStruct_from_file(Contants.WORD_INDEX_FILE_NAME)
    lexicons=Data.read_dataStruct_from_file(Contants.LEXICON_FILE_NAME)
    posting=Data.read_dataStruct_from_file(Contants.POSTING_LIST_FILE_NAME)
    
    bt=BinaryTree.balancedTree(indexedWords)
#     BinaryTree.DepthFirstSearchPrintNodes(bt)
    #print(posting)
    
    # example usage
    query='four dell'
    proximity=700
    print Queries.boolean_queries(query)
    print Queries.boolean_queries_implement_using_lists(query)
    print Queries.boolean_queries_with_proximity(query,proximity)
    query='bi*sh'
    print Queries.wlidCard_queries_using_permuterm_index(query)
    Queries.trailing_wildCard_queries_using_tree('del*')
Пример #18
0
def test_balanced_tree():
    wordsIndex=Data.read_dataStruct_from_file(Contants.WORD_INDEX_FILE_NAME)
Пример #19
0
def createLexicon(sortTepDic):

    keys = [key for key in sortTepDic.keys()]
    Data.write_dataStruct_to_file(Contants.LEXICON_FILE_NAME, keys)
Пример #20
0
 def __init__(self, data: dict):
     data = self.data = Data(data)
     self.id_token: str = data['idToken']
Пример #21
0
def createLexicon(sortTepDic):

    keys = [key for key in sortTepDic.keys()]
    Data.write_dataStruct_to_file(Contants.LEXICON_FILE_NAME, keys)