Exemplo n.º 1
0
async def start_scraper():
    tags = await db.execute(
        Hashtag.select()
    )
    logging.info("[ENV] CONCURRENCY: %s, PROXY: %s",
                 os.getenv("CONCURRENCY"), os.getenv("PROXY"))

    scraper = Scraper(
        tags,
        concurrency=int(os.getenv("CONCURRENCY", 50)),
        proxy=os.getenv("PROXY")
    )

    asyncio.ensure_future(scraper.parse_all_tags())
    await updater.update_statistics(db, scraper.result_queue)
Exemplo n.º 2
0
    async def get(self):
        n = self.request.match_info.get('n')

        if not n.isdigit():
            return json_response({
                'success': False,
                'errors': 'n must be int'
            },
                                 status=400)

        n = int(n)

        db = self.request.app['objects']
        hashtags = await db.execute(Hashtag.select().order_by(
            Hashtag.total_posts.desc()).limit(n))

        data, _ = HashtagSerializer().dump(hashtags, many=True)

        return json_response(data)
Exemplo n.º 3
0
 def hashtags(self):
     from models.hashtags import Hashtag
     tags = [x.tag for x in Hashtag.select().where(Hashtag.photo == self)]
     return list(map(lambda x: '#' + x, tags))
Exemplo n.º 4
0
import logging

import peewee

from models.base import database
from models.hashtags import Hashtag
from models.posts import Post

logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s',
                    level=logging.INFO)

logger = logging.getLogger()

logger.info('INIT DB')
objects = peewee.PostgresqlDatabase(database)

logger.info('START CREATE TABLES')
Hashtag.create_table(safe=True)
Post.create_table(safe=True)
logger.info('END CREATE TABLES')