示例#1
0
    def test_makeContext0(self):
        f1_name = 'test_f1.txt'
        f2_name = 'test_f2.txt'
        f1 = open(f1_name, 'w')
        f1.write('Ляляля маму. Мррррррр. Бебебе!')
        f1.close()

        f2 = open(f2_name, 'w')
        f2.write('мамами мыла раму. М')
        f2.close()

        db_name = 'testdb'
        makeDB([f1_name, f2_name], db_name)
        ass = "[(['Ляляля маму'], [[(7, 11)]]), (['мамами мыла раму'], [[(0, 6)]])]"
        db = shelve.open(db_name)
        qres = query('маме', db_name)
        res = str(list(makeContexts(qres).values()))
        db.close()
        self.assertEqual(ass, res)

        os.unlink(f1_name)
        os.unlink(f2_name)
        if os.path.exists(db_name): os.unlink(db_name)
        if os.path.exists(db_name + '.dat'): os.unlink(db_name + '.dat')
        if os.path.exists(db_name + '.bak'): os.unlink(db_name + '.bak')
        if os.path.exists(db_name + '.dir'): os.unlink(db_name + '.dir')
示例#2
0
    def test_makeContexts(self):
        f1 = open('f1.txt', 'w')
        f1.write('foo bar')
        f1.close()

        f1 = open('f2.txt', 'w')
        f1.write('egg foo ham')
        f1.close()

        makeDB.makeDB(['f1.txt', 'f2.txt'], 'testdb')
        ass = "OrderedDict([('f2.txt', ['egg foo ham'])])"
        res = getQuery.query('foo ham', 'testdb')
        res = str(getQuery.makeContexts(res))

        os.unlink('f1.txt')
        os.unlink('f2.txt')
        if os.path.exists('testdb'):
            os.unlink('testdb')
        if os.path.exists('testdb.dat'):
            os.unlink('testdb.dat')
        if os.path.exists('testdb.bak'):
            os.unlink('testdb.bak')
        if os.path.exists('testdb.dir'):
            os.unlink('testdb.dir')
        self.assertEqual(ass, res)
示例#3
0
    def do_POST(self):
        form = cgi.FieldStorage(fp=self.rfile,
                                headers=self.headers,
                                environ={
                                    'REQUEST_METHOD': 'POST',
                                    'CONTENT_TYPE':
                                    self.headers['Content-Type']
                                })
        self.send_response(200)
        self.send_header('Content-type', 'text/html; charset=utf-8')
        self.end_headers()

        inputWords = form.getvalue('query').lower()
        doc_count = form.getvalue('doc_count')
        try:
            doc_count = int(doc_count)
            if doc_count < 0: doc_count = 2
        except:
            doc_count = 2

        if form.getvalue('begin'):
            myHandler.DOC_START = 1
        elif form.getvalue('back'):
            sh = doc_count
            if myHandler.DOC_START - sh < 1:
                sh = myHandler.DOC_START - 1
            myHandler.DOC_START -= sh
        elif form.getfirst('forward'):
            myHandler.DOC_START += doc_count

        result_line = ''
        if inputWords != None:
            if myHandler.QUERY == inputWords and \
               myHandler.DOC_COUNT == doc_count:
                for i in range(len(myHandler.QUTES_COUNTS)):
                    countQuote = form.getvalue('countQuote' + str(i))
                    try:
                        countQuote = int(countQuote)
                    except:
                        countQuote = 10
                    if form.getvalue('beginQuote' + str(i)):
                        myHandler.QUTES_COUNTS[i] = [countQuote, 0]
                    elif form.getvalue('backQuote' + str(i)):
                        sh = countQuote
                        if myHandler.QUTES_COUNTS[i][1] - sh < 0:
                            sh = mmyHandler.QUTES_COUNTS[i][1]
                        myHandler.QUTES_COUNTS[i][1] -= sh
                    elif form.getfirst('forwardQuote' + str(i)):
                        myHandler.QUTES_COUNTS[i][1] += countQuote
                    myHandler.QUTES_COUNTS[i][0] = countQuote
            else:
                myHandler.DOC_START = 1
                myHandler.DOC_COUNT = 2
                myHandler.QUERY = inputWords
                myHandler.QUTES_COUNTS = []
                for i in range(myHandler.DOC_COUNT):
                    myHandler.QUTES_COUNTS.append([5, 0])
            qres = getQuery.query(inputWords, config.DATABASE_NAME,
                                  myHandler.lemma, doc_count,
                                  myHandler.DOC_START, myHandler.QUTES_COUNTS)
            #resDict - { 'path' : ( [ 'context' ], [ [ (stBoldWord_1 , endBoldWord_1), (stBoldWord_2 , endBoldWord_2) ] ] ) }
            resDict = getQuery.makeContexts(qres, myHandler.QUTES_COUNTS)

            newQuotes = myHandler.QUTES_COUNTS == None
            if newQuotes: myHandler.QUTES_COUNTS = []
            for i, path in enumerate(resDict):
                if newQuotes: myHandler.QUTES_COUNTS.append([5, 0])
                #list for documents
                result_line += r'<li>' + r'<b>' + path + r'</b>' + r'<ul>'
                tup = resDict[path]
                for context, positions in zip(tup[0], tup[1]):
                    #list for contexts
                    result_line += r'<li>'
                    result_line += context[:positions[0][0]]
                    for j in range(len(positions) - 1):
                        pos = positions[j]
                        result_line += r'<b>'
                        result_line += context[pos[0]:pos[1]]
                        result_line += r'</b>'
                        result_line += context[pos[1]:positions[j + 1][0]]
                    #code for last bold word in context
                    last_pos = positions[-1]
                    result_line += r'<b>'
                    result_line += context[last_pos[0]:last_pos[1]]
                    result_line += r'</b>'
                    result_line += context[last_pos[1]:]
                    result_line += r'</li>'
                result_line += r'</ul><p>'
                result_line += r'<input type="submit" name="beginQuote' + str(
                    i) + '" value="В начало">&nbsp;'
                result_line += r'<input type="submit" name="backQuote' + str(
                    i) + '" value="Назад">&nbsp;'
                result_line += r'<input type="submit" name="forwardQuote' + str(
                    i) + '" value="Вперед">&nbsp;'
                result_line += r'<input type="text" name="countQuote' + str(
                    i) + r'" value="'
                countQuote = myHandler.QUTES_COUNTS[i][0]
                result_line += str(countQuote) + r'"></li></p>'
            if len(result_line) != 0:
                result_line = r'<ol type="I">' + result_line + r'</ol>'
            else:
                result_line = r'<p>Ничего не найдено. Искать в Яндекс, Google, Mail.ru</p>'
        else:
            result_line = r'<p><p><p>Задан пустой поисковый запрос</p></p></p>'
            inputWords = ''
        myHandler.QUERY = inputWords
        myHandler.DOC_COUNT = doc_count
        self.wfile.write(
            bytes(myHandler.HTML_DOC_1 + '"' + myHandler.QUERY + '"' +
                  myHandler.HTML_DOC_2 + '"' + str(myHandler.DOC_COUNT) +
                  myHandler.HTML_DOC_3 + result_line + myHandler.HTML_DOC_4,
                  encoding='utf-8'))
示例#4
0
 def test_doc_count_0(self):
     lemma = lemmatizer()
     qres = getQuery.query('смотреть', config.DATABASE_NAME, lemma, 2, 1, None)
     #resDict - { 'path' : ( [ 'context' ], [ [ (stBoldWord_1 , endBoldWord_1), (stBoldWord_2 , endBoldWord_2) ] ] ) }
     resDict = getQuery.makeContexts(qres, None)
     self.assertEqual(len(resDict.keys()), 2)
示例#5
0
 def test_doc_names(self):
     lemma = lemmatizer()
     qres = getQuery.query('смотреть', config.DATABASE_NAME, lemma, 2, 1, None)
     resDict = getQuery.makeContexts(qres, None)
     self.assertEqual(sorted(resDict.keys()), ['mid_text_1.txt', 'mid_text_2.txt'])
示例#6
0
 def test_doc_count_4(self):
     lemma = lemmatizer()
     qres = getQuery.query('смотреть', config.DATABASE_NAME, lemma, 1, 3, None)
     resDict = getQuery.makeContexts(qres, None)
     self.assertEqual(len(resDict.keys()), 0)