示例#1
0
    def start_requests(self):
        total_count = session.query(Star).count()
        if total_count > 0:
            self.index = total_count % self.page_size + 1
            self.page = math.floor(total_count / self.page_size) + 1

        return [scrapy.Request(self.next_url())]
示例#2
0
    def init_start_urls(self):
        """ 获得遗漏的星座和日期,添加到start_urls中 """
        sql = """
            SELECT `date`, COUNT(*) c FROM astro_day
            GROUP BY `date`
            HAVING c < 12
            ORDER BY c DESC
        """
        # 所有的星座id
        astroids = set(range(1, 13))
        result = engine.execute(sql).fetchall()
        for date, count in result:
            date = date.strftime('%Y-%m-%d')
            # 获得该日期下的所有星座id
            res = session.query(AstroDay.astroid).filter_by(date=date).all()
            astroids_date = set(map(lambda x: x[0], res))
            # 求差集, 得到遗漏的星座id
            astroids_diff = list(astroids - astroids_date)
            # 同个星座id和日期构造请求链接
            for astroid in astroids_diff:
                self.astroid = astroid - 1
                self.date = date
                url = self.next_url()
                self.start_urls.append(url)

        res = []
        for url in self.start_urls:
            if self.start_urls.index(url) == 0:
                continue
            yield scrapy.Request(url)
示例#3
0
    def weather_item(self, item):
        """ 天气数据存储 """
        item['date'] = str(item['date'])
        date = '-'.join(
            [item['date'][0:4], item['date'][4:6], item['date'][6:8]])
        i = 0
        for tmp in item['forecast']:
            curr_date = date_operate(date, i)
            i += 1
            info = {
                'city': item['city'],
                'date': curr_date,
                'shidu': item['shidu'],
                'pm25': item['pm25'],
                'pm10': item['pm10'],
                'quality': item['quality'],
                'wendu': item['wendu'],
                'sunrise': tmp['sunrise'],
                'sunset': tmp['sunset'],
                'high': re.sub('[^\.|\d]', '', tmp['high']),  # 只保留数字部分
                'low': re.sub('[^\.|\d]', '', tmp['low']),
                'aqi': tmp['aqi'],
                'fx': tmp['fx'],
                'fl': tmp['fl'],
                'stype': tmp['type'],
                'notice': tmp['notice']
            }
            if not info['pm25']:
                del info['pm25']
                del info['pm10']
                del info['aqi']
                del info['quality']
            weather = Weather(**info)

            # 先判断天气数据是否存在,如果存在则更新
            # 如果不存在则新增
            old_weather = session.query(Weather).filter_by(
                city=item['city']).filter_by(date=curr_date).first()
            if old_weather:
                session.query(Weather).filter_by(
                    id=old_weather.id).update(info)
            else:
                session.add(weather)
            session.commit()
示例#4
0
    def start_requests(self):

        # 初始化爬取链接
        # self.init_start_urls()

        # 查询数据库中最后日期的星座数据
        lastDayAstro = session.query(AstroDay).order_by(
            AstroDay.date.desc()).order_by(AstroDay.astroid.desc()).first()
        if lastDayAstro:
            self.date = lastDayAstro.date.strftime("%Y-%m-%d")
            if lastDayAstro.astroid == 12:
                self.date = date_operate(self.date, 1)
            else:
                self.astroid = lastDayAstro.astroid + 1

        # 查询最后一条周数据
        """ lastWeekAstro = session.query(AstroWeek).order_by(
            AstroWeek.weekth.desc()).order_by(AstroWeek.astroid.desc()).first()
        if lastWeekAstro:
            self.weekth['date'] = lastWeekAstro.weekth
            self.weekth['astroid'] = lastWeekAstro.astroid """

        #查询最后一条月数据
        lastMonthAstro = session.query(AstroMonth).order_by(
            AstroMonth.date.desc()).order_by(
                AstroMonth.astroid.desc()).first()
        if lastMonthAstro:
            self.month['date'] = lastMonthAstro.date
            self.month['astroid'] = lastMonthAstro.astroid

        # 查询最后一条年数据
        lastYearAstro = session.query(AstroYear).order_by(
            AstroYear.date.desc()).order_by(AstroYear.astroid.desc()).first()
        if lastYearAstro:
            self.year['date'] = lastYearAstro.date
            self.year['astroid'] = lastYearAstro.astroid

        self.astroid -= 1
        start_url = self.next_url()
        if start_url:
            return [scrapy.Request(start_url)]
        else:
            self.logger.error('不存在可用的appkey')
            return []
示例#5
0
def weather_data_check(provinces):
    """ 天气数据检查, 返回要爬取的城市索引 """
    from mkspider.lib.db import session
    from mkspider.lib.models import Weather

    curr_date = time.strftime('%Y-%m-%d', time.localtime(time.time()))
    # 查询当前日期最后一个城市名称
    date_weather = session.query(Weather).filter_by(date=curr_date).order_by(
        Weather.id.desc()).first()
    if date_weather:
        city = date_weather.city
        if type(city) != str:
            city = str(city.decode('utf-8'))
        # 判断当前日期是否查询
        city_last_weather = session.query(Weather).filter_by(
            city=city).order_by(Weather.date.desc()).first()
        if city_last_weather and str(city_last_weather.date) == date_operate(
                curr_date, 4):
            return provinces.index(city) + 1
    return 0
示例#6
0
    def start_requests(self):
        # 获得数据库中得最后日期
        lastLunar = session.query(Lunar).order_by(Lunar.year.desc()).order_by(
            Lunar.month.desc()).order_by(Lunar.day.desc()).first()

        if lastLunar:
            last_date = "-".join([
                str(lastLunar.year),
                str(lastLunar.month),
                str(lastLunar.day)
            ])
            self.date = date_operate(last_date, 1)

        return [scrapy.Request(self.next_url())]
示例#7
0
    def astro_item(self, item):
        """ 星座运势 数据存储 """
        if item['year']:
            res = session.query(AstroYear).filter_by(
                astroid=item['astroid']).filter_by(
                    date=item['year']['date']).first()

            if not res:
                item['year']['name'] = item['astroname']
                item['year']['astroid'] = item['astroid']

                astroYear = AstroYear(**item['year'])
                session.add(astroYear)
                session.commit()

        if item['month']:

            year, month = item['month']['date'].split('-')
            item['month']['date'] = "{}{}".format(year, str(month).zfill(2))

            res = session.query(AstroMonth).filter_by(
                astroid=item['astroid']).filter_by(
                    date=item['month']['date']).first()
            if not res:
                item['month']['name'] = item['astroname']
                item['month']['astroid'] = item['astroid']

                astroMonth = AstroMonth(**item['month'])
                session.add(astroMonth)
                session.commit()

        if item['week'] and False:

            item['week']['name'] = item['astroname']
            item['week']['astroid'] = item['astroid']
            item['week']['weekth'] = get_weekth_by_date(item['today']['date'])
            start_date, end_date = item['week']['date'].split('~')
            end_date = "{}-{}".format(start_date.split('-')[0], end_date)
            item['week']['start_date'] = start_date
            item['week']['end_date'] = end_date
            del item['week']['date']

            astroWeek = AstroWeek(**item['week'])
            session.add(astroWeek)
            session.commit()

        if item['today']:

            res = session.query(AstroDay).filter_by(
                astroid=item['astroid']).filter_by(
                    date=item['today']['date']).first()

            item['today']['name'] = item['astroname']
            item['today']['astroid'] = item['astroid']

            try:
                # 判断number属性是否是数字类型
                if not str(item['today']['number']).isdigit():
                    item['today']['number'] = 0
                astroDay = AstroDay(**item['today'])
                session.add(astroDay)
                session.commit()
            except Exception:
                pass
示例#8
0
# -*- coding: utf-8 -*-
""" 提取日历数据中节气数据放到节气字段中 """


import os, sys, json
bin_dir = os.path.dirname(os.path.realpath(__file__))
root_dir = os.path.join(bin_dir, '..')
sys.path.append(root_dir)

from mkspider.lib.db import session
from mkspider.lib.models import Lunar
from mkspider.lib.common import slog

lanurs = session.query(Lunar).order_by(Lunar.id.asc()).all()

for item in lanurs:
    jieqi_data  = json.loads(item.jieqi)
    day = str(item.day)
    if day in jieqi_data and not item.jieqi2:
        slog("DD", "[%s-%s-%s]节气:%s" %
             (item.year, item.month, item.day, jieqi_data[day]))
        session.query(Lunar).filter_by(id=item.id).update({'jieqi2': jieqi_data[day]})

session.commit()

示例#9
0
 def init_types(self):
     """ 初始化天气类型数据 """
     types = session.query(Weather.stype,
                           Weather.notice).group_by('stype').all()
     for item in types:
         self.types[item[0]] = item[1]