示例#1
0
文件: main.py 项目: nameerased/Pars
def parse_petition(petitions):
    for petition in petitions:
        # get_petition_status(petition)
        data = []
        petition_url = 'https://petition.president.gov.ua/petition/' + str(petition)
        pages = [petition_url + '/votes/' + str(i) for i in range(1, max_page(petition_url) + 1)]

        for page in pages:
            html = requests.get(page).text
            soup = BeautifulSoup(html, 'lxml')
            rows = soup.find_all('div', class_=re.compile(r'^table_row$'))

            for r in rows:
                position_number = r.find('div', class_='table_cell number').string.replace('.', '')
                username = r.find('div', class_='table_cell name').string
                day, month, year = r.find('div', class_='table_cell date').string.split(' ')
                new_month = m.parse(month)[0].inflect({'nomn'}).word.title()
                sign_date = datetime.strptime(' '.join([day, new_month, year]), '%d %B %Y')

                data.append((petition, position_number, username, sign_date))

        if Vote.select().where(Vote.petition == petition):
            print(f'petition {petition} was in db with',
                  Vote.delete().where(Vote.petition == petition).execute(), 'rows')
        with db.atomic():
            # by default SQLite limits the number of bound variables in a SQL query to 999
            for batch in chunked(data, 249):
                Vote.insert_many(batch, fields=['petition', 'position_number', 'username', 'sign_date']).execute()

        status = get_petition_status(petition)
        Petition.update(status=status).where(Petition.petition_id == petition).execute()
    if petitions:
        set_gender()
示例#2
0
文件: tmp.py 项目: nameerased/Pars
def copy_db():
    # Name_2, Petition_2, User
    # db.drop_tables([Peticia], safe=True)
    # db.create_tables([Peticia], safe=True)

    # db.create_tables([Name], safe=True)
    # query = Name_2.select()
    # data = []
    # for i in query:
    #     data.append((i.username, i.gender))
    #
    # with db.atomic():
    #     # by default SQLite limits the number of bound variables in a SQL query to 999
    #     for batch in chunked(data, 450):
    #         Name.insert_many(batch, fields=[Name.username, Name.gender]).execute()
    #
    #
    #
    # db.create_tables([Petition], safe=True)
    # query = Petition_2.select()
    # for i in query:
    #     Petition.create(
    #         petition_id=i.petition_id,
    #         status=i.status,
    #         title=i.title,
    #         article=i.article,
    #         answer=i.answer
    #     )
    #
    #
    #
    db.create_tables([Vote], safe=True)
    for i in User.select(User.petition_id).distinct():
        data = []
        # p = Petition.get(petition_id=i.petition_id).petition_id
        p = i.petition_id
        query = User.select().where(User.petition == i.petition_id)

        for user in query:
            data.append((p, user.position_number, user.username,
                         user.sign_date, user.gender))

        with db.atomic():
            # by default SQLite limits the number of bound variables in a SQL query to 999
            for batch in chunked(data, 198):
                # User.insert_many(batch, fields=[User.petition, User.position_number,
                #                                 User.username, User.sign_date, User.gender]).execute()
                Vote.insert_many(batch).execute()