Пример #1
0
 def create_index(self, url):
     # url = "http://fapl.ru/"
     htmlToTextConverter = HtmlToTextConverter()
     text = htmlToTextConverter.transform_html_into_text(url)
     buildIndex = BuildIndex(text)
     index = buildIndex.getIndex()
     try:
         document = Page.objects.get(url=url)
         for match in Match.objects.filter(page=document):
             word = match.word
             match.delete()
             if not word.pages.all():
                 word.delete()
     except ObjectDoesNotExist:
         document = Page(url=url)
         document.save()
     for word in index.keys():
         positions = " ".join(str(x) for x in index[word])
         try:
             word = Word.objects.get(value=word)
         except ObjectDoesNotExist:
             word = Word(value=word)
             word.save()
         match = Match(word=word, page=document, positions=positions)
         match.save()
Пример #2
0
 def create_index(self, url):
     # url = "http://fapl.ru/"
     htmlToTextConverter = HtmlToTextConverter()
     title, content = htmlToTextConverter.transform_html_into_text(url)
     buildIndex = BuildIndex(content)
     index = buildIndex.get_index()
     number_of_words = buildIndex.get_number_of_words()
     try:
         document = Page.objects.get(url=url)
         for match in Match.objects.filter(page=document):
             word = match.word
             match.delete()
             if not word.pages.all():
                 word.delete()
     except ObjectDoesNotExist:
         document = Page(url=url,
                         number_of_words=number_of_words,
                         title=title,
                         content=content)
         document.save()
     for word in index.keys():
         positions = " ".join(str(x) for x in index[word])
         try:
             word = Word.objects.get(value=word)
         except ObjectDoesNotExist:
             word = Word(value=word)
             word.save()
         match = Match(word=word, page=document, positions=positions)
         match.save()
Пример #3
0
def show_id(request, key):
    return object_detail(request, Page.all(), key)
Пример #4
0
def update_last_updated(request):
    Pages = Page.all().order('update_time').fetch(100)
    for Page in Pages:
        Page.put()
        logging.info("Updated %s" % (Page.searchter_id))
    return TextResponse("Success, Finished!")