def main(page_range=(0, 30_000)): for i in range(*page_range): print(f"Page {i}/{page_range[1]}") fanfic_addresses = get_readlinks(fetch_catalogue_page(i)) with session_scope() as sess: already_exist_obj = sess.query(Novel.url).filter(Novel.url.in_([a['url'] for a in fanfic_addresses])).all() already_exist_lst = [o.url for o in already_exist_obj] for idx, fic_address in enumerate([fa for fa in fanfic_addresses if fa['url'] not in already_exist_lst]): fic_page = requests.get(fic_address['url']) result = parse_fic(fic_page.content.decode()) result.update(fic_address) with session_scope() as sess: sess.add(Novel(**result)) print(f"Text {idx}") time.sleep(random.choice(TIME_INTERVALS))
def calculate_metrics(): with session_scope() as query_session: novels = query_session.query(Novel).filter(and_(Novel.text.isnot(None), Novel.rating.isnot(None), Novel.word_count.is_(None))).all() for novel in novels: with session_scope() as session: text = Text(novel.text) metrics = { Novel.word_count: text.word_count, Novel.ad_to_all_ratio: text.ad_to_all_ratio, Novel.direct_speech_word_ratio: text.direct_speech_word_ratio, Novel.exclamative_sent_word_ratio: text.exclamative_sent_word_ratio, Novel.interrogative_sent_word_ratio: text.interrogative_sent_word_ratio, Novel.word_average_sym_count: text.word_average_sym_count, Novel.word_average_syl_count: text.word_average_syl_count, Novel.noun_to_all_ratio: text.noun_to_all_ratio, Novel.verb_to_all_ratio: text.verb_to_all_ratio, Novel.sent_syl_average: text.sent_syl_average, Novel.sentence_count: text.sentence_count, Novel.sent_word_count_average: text.sent_word_count_average } session.query(Novel).filter(Novel.id == novel.id).update(metrics) print(novel.title)
from db.session_manager import session_scope from db.models import Evento, TipoTicket with session_scope() as session: tipo_ticket = TipoTicket(id_eventos=2, tipo_ticket="Gratis", valor_ticket=5.00, taxa_ticket=0.5) evento = Evento(nome_evento="Carnaval", nome_local="Bloco de Rua", cidade_local="Campinas, sp", data_evento="2010-01-25", hora_evento="19:30", dia_semana="Sab", tipo_tickets=[tipo_ticket]) #session.add(evento) a = session.query(TipoTicket).all() ########## TODO LIST ############### # - Dar um jeito de pegar os tipos de ingresso # Como o captcha está atrapalhando, provavelmente terei q mocar.. # - Fazer o objeto de retorno e criar um PIPELINE pra salvar no banco. # - Popular banco com alguns eventos, na mão (mockado) # - Criar os SQLs pedidos na tarefa # - DOCUMENTAR # - Criar um git e dar autorização
def process_item(self, item, spider): with session_scope() as session: evento = ModelBuilder.build_evento_model(item) session.add(evento) return item
def get_text(idx: int): with session_scope() as session: print(session.query(Novel.id).first())