Пример #1
0
def main(page_range=(0, 30_000)):
    for i in range(*page_range):
        print(f"Page {i}/{page_range[1]}")
        fanfic_addresses = get_readlinks(fetch_catalogue_page(i))
        with session_scope() as sess:
            already_exist_obj = sess.query(Novel.url).filter(Novel.url.in_([a['url'] for a in fanfic_addresses])).all()
            already_exist_lst = [o.url for o in already_exist_obj]
        for idx, fic_address in enumerate([fa for fa in fanfic_addresses if fa['url'] not in already_exist_lst]):
            fic_page = requests.get(fic_address['url'])
            result = parse_fic(fic_page.content.decode())
            result.update(fic_address)
            with session_scope() as sess:
                sess.add(Novel(**result))
                print(f"Text {idx}")
            time.sleep(random.choice(TIME_INTERVALS))
Пример #2
0
def calculate_metrics():
    with session_scope() as query_session:
        novels = query_session.query(Novel).filter(and_(Novel.text.isnot(None),
                                                        Novel.rating.isnot(None),
                                                        Novel.word_count.is_(None))).all()
        for novel in novels:
            with session_scope() as session:
                text = Text(novel.text)
                metrics = {
                    Novel.word_count: text.word_count,
                    Novel.ad_to_all_ratio: text.ad_to_all_ratio,
                    Novel.direct_speech_word_ratio: text.direct_speech_word_ratio,
                    Novel.exclamative_sent_word_ratio: text.exclamative_sent_word_ratio,
                    Novel.interrogative_sent_word_ratio: text.interrogative_sent_word_ratio,
                    Novel.word_average_sym_count: text.word_average_sym_count,
                    Novel.word_average_syl_count: text.word_average_syl_count,
                    Novel.noun_to_all_ratio: text.noun_to_all_ratio,
                    Novel.verb_to_all_ratio: text.verb_to_all_ratio,
                    Novel.sent_syl_average: text.sent_syl_average,
                    Novel.sentence_count: text.sentence_count,
                    Novel.sent_word_count_average: text.sent_word_count_average
                }
                session.query(Novel).filter(Novel.id == novel.id).update(metrics)
                print(novel.title)
Пример #3
0
from db.session_manager import session_scope
from db.models import Evento, TipoTicket

with session_scope() as session:
    tipo_ticket = TipoTicket(id_eventos=2,
                             tipo_ticket="Gratis",
                             valor_ticket=5.00,
                             taxa_ticket=0.5)

    evento = Evento(nome_evento="Carnaval",
                    nome_local="Bloco de Rua",
                    cidade_local="Campinas, sp",
                    data_evento="2010-01-25",
                    hora_evento="19:30",
                    dia_semana="Sab",
                    tipo_tickets=[tipo_ticket])

    #session.add(evento)
    a = session.query(TipoTicket).all()

########## TODO LIST ###############
# - Dar um jeito de pegar os tipos de ingresso
#       Como o captcha está atrapalhando, provavelmente terei q mocar..
# - Fazer o objeto de retorno e criar um PIPELINE pra salvar no banco.
# - Popular banco com alguns eventos, na mão (mockado)
# - Criar os SQLs pedidos na tarefa
# - DOCUMENTAR
# - Criar um git e dar autorização
Пример #4
0
    def process_item(self, item, spider):
        with session_scope() as session:
            evento = ModelBuilder.build_evento_model(item)
            session.add(evento)

        return item
Пример #5
0
def get_text(idx: int):
    with session_scope() as session:
        print(session.query(Novel.id).first())