Exemplos de Page em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: celerycrawler.models

Classe / Tipo: Page

Exemplos em hotexamples.com: 7

Page em Python - 7 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de celerycrawler.models.Page em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

get_id_by_url(2)

count(1)

get_by_url(1)

Métodos Frequentes

get_id_by_url (2)

count (1)

get_by_url (1)

Relacionados

get_NRM_dev

TableLayout

rebuild_unpacked_request

parse_font_family

odr

move_into_place

log

Account

check_taxon_labels

Ui_AnnotationDialog

Related in langs

request_essence (PHP)

spam_login_filter_notify_admin (PHP)

AtribuicaoCJ (C#)

Rfc3211WrapEngine (C#)

CGU_BASE_UART3_CLK_PD (C++)

averageColor (C++)

SDL_IsScreenKeyboardShown (Go)

ParseMessage (Go)

BufferTools (Java)

TcpConnectionInterceptorFactoryChain (Java)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: tasks.py Projeto: tanmoydeb07/celery-crawler

def calculate_rank(doc_id): print("in calculate_rank") page = Page.load(settings.db, doc_id) links = Page.get_links_to_url(page.url) rank = 0 for link in links: rank += link[0] / link[1] old_rank = page.rank page.rank = rank * 0.85 if page.rank == 0: n_links = settings.db.view("page/by_url", limit=0).total_rows page.rank = 1.0 / n_links if abs(old_rank - page.rank) > 0.0001: print("%s: %s -> %s" % (page.url, old_rank, page.rank)) page.store(settings.db) for link in page.links: p = Page.get_id_by_url(link, update=False) if p is not None: calculate_rank.delay(p)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: tasks.py Projeto: tanmoydeb07/celery-crawler

def find_links(doc_id): print("in find_links") if doc_id is None: print("doc_id = None") return False doc = Page.load(settings.db, doc_id) if not hasattr(doc, 'content'): print("Got None for the content of %s -> %s." % (doc_id, doc.url)) return False elif not doc['content']: print("tasks.py:elif not doc.content") return False raw_links = [] tree = document_fromstring(doc.content) for a in tree.xpath('//a'): link = urljoin(doc['url'], a.get('href')) doc.links.append(link) doc.store(settings.db) calculate_rank.delay(doc.id) for link in doc.links: p = Page.get_id_by_url(link, update=False) if p is not None: calculate_rank.delay(p) else: retrieve_page.delay(link) print("find_links {} -> {}".format(doc.url, len(doc.links)))

Exemplo n.º 3

0

Exibir arquivo

Arquivo: tasks.py Projeto: andrewjw/celery-crawler

def calculate_rank(doc_id): print("in calculate_rank") page = Page.load(settings.db, doc_id) links = Page.get_links_to_url(page.url) rank = 0 for link in links: rank += link[0] / link[1] old_rank = page.rank page.rank = rank * 0.85 if page.rank == 0: n_links = settings.db.view("page/by_url", limit=0).total_rows page.rank = 1.0 / n_links if abs(old_rank - page.rank) > 0.0001: print("%s: %s -> %s" % (page.url, old_rank, page.rank)) page.store(settings.db) for link in page.links: p = Page.get_id_by_url(link, update=False) if p is not None: calculate_rank.delay(p)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: tasks.py Projeto: andrewjw/celery-crawler

def find_links(doc_id): print("in find_links") if doc_id is None: print("doc_id = None") return False doc = Page.load(settings.db, doc_id) if not hasattr(doc, 'content'): print("Got None for the content of %s -> %s." % (doc_id, doc.url)) return False elif not doc['content']: print("tasks.py:elif not doc.content") return False raw_links = [] tree = document_fromstring(doc.content) for a in tree.xpath('//a'): link = urljoin(doc['url'], a.get('href')) doc.links.append(link) doc.store(settings.db) calculate_rank.delay(doc.id) for link in doc.links: p = Page.get_id_by_url(link, update=False) if p is not None: calculate_rank.delay(p) else: retrieve_page.delay(link) print("find_links {} -> {}".format(doc.url, len(doc.links)))

Exemplo n.º 5

0

Exibir arquivo

Arquivo: tasks.py Projeto: tanmoydeb07/celery-crawler

def retrieve_page(url, rank=None): print("retrieve_page {}".format(url)) page = Page.get_by_url(url, update=True) if page is None: print("Page is None") return if rank is not None: page.rank = rank page.store(settings.db) if page.id is None: page.update() find_links.delay(page.id)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: tasks.py Projeto: andrewjw/celery-crawler

def retrieve_page(url, rank=None): print("retrieve_page {}".format(url)) page = Page.get_by_url(url, update=True) if page is None: print("Page is None") return if rank is not None: page.rank = rank page.store(settings.db) if page.id is None: page.update() find_links.delay(page.id)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: views.py Projeto: ra2003/python-search-engine

def index(req): return render_to_response("index.html", { "doc_count": Page.count(), "top_docs": Page.get_top_by_rank(limit=20) })