Python Document.set_url примеры использования

Язык программирования: Python

Пространство имен/Пакет: Document

Класс/Тип: Document

Метод/Функция: set_url

Примеров на hotexamples.com: 1

Python Document.set_url - 1 пример найден. Это лучшие примеры Python кода для Document.Document.set_url, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Document(30)

all_sentences(11)

__str__(5)

__init__(4)

append(3)

addMention(2)

numOfWords(2)

generateWhole(2)

factory(2)

edit(2)

addMeSH(1)

get_candidates(1)

generate_candidate_anaphor_data(1)

generate_candidate_mention_pairs(1)

generate_document(1)

generate_gold_anaphor_data(1)

generate_gold_mention_pairs(1)

get(1)

getID(1)

getIdentifiant(1)

getUID(1)

get_article(1)

get_clean(1)

from_json(1)

get_cls_byname(1)

get_cluster_data(1)

get_stems(1)

name(1)

__dict__(1)

save_collection(1)

set_body_length(1)

set_url(1)

termFrequency(1)

to_json(1)

write2DB(1)

_edit(1)

from_data_frame(1)

addLien(1)

build_n_grams(1)

addRef(1)

addTexte(1)

addTitre(1)

add_anchor_text(1)

add_body_hits(1)

add_sentence(1)

allDocumentsID(1)

addDocument(1)

addAuteur(1)

availableReplacements(1)

calculate_vectors(1)

Пример #1

Показать файл

Файл: QueryDocParser.py Проект: bharcode/Programming

 def parse(self):
     queries = []
     c_query = None
     c_doc = None
     self.avg_anchor_length = 0
     self.avg_title_length = 0
     self.avg_body_length = 0
     self.docs = 0
     self.titles = 0
     self.anchors = 0
     f = open(self.queryDocPath, 'r')
     line = f.readline().strip()
     while True:
         if line == None or line == "":
             break
         elif line.startswith('query'):
             c_query = Query(self.idf)
             queries.append(c_query)
             c_query.set_query(line.split(':')[1])
         elif line.startswith('url'):
             self.docs+=1
             if not c_doc == None:
                 c_doc.calculate_vectors()
             c_doc = Document(c_query)
             c_query.add_doc(c_doc)
             c_doc.set_url(line)
         elif line.startswith('title'):
             c_doc.set_title(line.split(':')[1])
             self.avg_title_length += len(c_doc.title_terms)
             self.titles += 1
         elif line.startswith('body_hits'):
             x = line.split(':')[1].strip().split()
             term = x[0]
             hits = x[1:]
             c_doc.add_body_hits(term, hits)
         elif line.startswith('body_length'):
             c_doc.set_body_length(line.split(':')[1])
             self.avg_body_length += c_doc.body_length
         elif line.startswith('anchor_text'):
             text = line.split(':')[1].strip()
             line = f.readline()
             count = line.split(':')[1].strip()
             c_doc.add_anchor_text(text, count)
             self.avg_anchor_length += int(count)
             self.avg_anchors_per_doc += int(count)
         line = f.readline().strip()
         
     if not c_doc == None:
             c_doc.calculate_vectors()
     #We calculate avg anchor length as follows:
     #Consider all words in anchor text for a doc as one BIG document, so count up all occurrences of anchor words
     # and divide them by the number of docs
     self.avg_anchor_length = self.avg_anchor_length*1.0/self.docs
     self.avg_title_length = self.avg_title_length*1.0/self.titles 
     self.avg_body_length = self.avg_body_length*1.0/self.docs     
     self.avg_anchors_per_doc = self.avg_anchors_per_doc*1.0 / self.docs  
     return queries