Exemplo n.º 1
0
def force_match_city_channels_price(city_name):
    """如果有渠道价格更新,并且所有的渠道都更新完毕则进行所有渠道价格匹配"""
    logger.info(
        'city %s, start to force match channel price and update cinema min price',
        city_name)
    price_manager = PriceManager.clone()
    price_manager.save_all_channels_price(city_name)
    price_manager.cinema_price_match_done(city_name)
    logger.info(
        'city %s, end to force match channel price and update cinema min price',
        city_name)
Exemplo n.º 2
0
 def match_cinema_by_phone(self, cinema_cls, city, step):
     """通过phone进行初略匹配"""
     logger.info("step %d, begin to match cinema by phone with city %s",
                 step, city)
     matched_result = self.manager.load_matched_cinema_by_phone(
         cinema_cls, city, step)
     for matched_item in matched_result:
         self.db.save(matched_item,
                      cinema_cls.get_match2mt_result_table_name())
     logger.info("step %d, end to match cinema by phone with city %s", step,
                 city)
Exemplo n.º 3
0
 def match_cinema_by_ai(self, cinema_cls, city, step):
     """通过AI进行初略匹配"""
     logger.info("step %d, begin to match cinema by ai with city %s", step, city)
     unmatched_cinemas = self.manager.load_unmatched_cinema(cinema_cls, city)
     for cinema in unmatched_cinemas:
         generator = self.ai_matcher.process_item(cinema, None)
         matched_item = next(generator)
         if matched_item is not None:
             matched_item['match_step'] = step
             self.db.save(matched_item, cinema_cls.get_match2mt_result_table_name())
     logger.info("step %d, end to match cinema by ai with city %s", step, city)
Exemplo n.º 4
0
 def start_requests(self):
     cinema_manager = CinemaManager.get_instance()
     cinema_ids = cinema_manager.need_update_cinema_ids(
         CinemaTB, self.city_name)
     logger.info("There are %d cinemas to update", len(cinema_ids))
     # cinema_ids = [37000]
     for cinema_id in cinema_ids:
         # special usage
         proxy = get_random_ip_port()
         # proxy = None
         yield scrapy.Request(self.cinema_url.format(cinema_id),
                              callback=self.parse,
                              meta={
                                  'cinema_id': cinema_id,
                                  'proxy': proxy
                              })
Exemplo n.º 5
0
def match_channels_movie():
    """将更新的渠道电影与豆瓣电影匹配"""
    channels = [MovieLM, MovieMT, MovieTB]
    from hyspider.matchers.movie import MovieMatcher
    matcher = MovieMatcher()
    for channel in channels:
        logger.info('begin to match movie %s to db',
                    channel.get_channel_name())
        matcher.match_2db(channel)
        logger.info('end to match movie %s to db', channel.get_channel_name())
    logger.info('begin to save  match movie result')
    matcher.save_all_channels_match_results()
    logger.info('end to save  match movie result')
Exemplo n.º 6
0
 def update_cinema_min_price(self):
     """根据综合所有渠道后的价格表更新影院最低价格,下面sql与执行的sql等价
         update cinema join (
             select cinema_id, min(min_price) as min_price
             from
             (
                 select cinema_id, min(price_mt) as min_price
                 from price
                 group by cinema_id
                 union all
                 select cinema_id, min(price_tb) as min_price
                 from price
                 group by cinema_id
                 union all
                 select cinema_id, min(price_lm) as min_price
                 from price
                 group by cinema_id
             )t
             group by cinema_id
         )cinema_min_price on (cinema.id_mt = cinema_min_price.cinema_id)
         set cinema.min_price = cinema_min_price.min_price
     """
     sql = '''
          update cinema join 
          (
              select cinema_id, LEAST(mt_min_price,tb_min_price, lm_min_price) as min_price
              from 
              (
                  select cinema_id, min(price_mt) as mt_min_price, 
                  min(price_tb) as tb_min_price,
                  min(price_lm) as lm_min_price
                  from  price
                  GROUP BY cinema_id
              )tmp
          )cinema_min_price on (cinema.id_mt = cinema_min_price.cinema_id)
          set cinema.min_price = cinema_min_price.min_price
      '''
     logger.info("start to update cinema min price")
     self.db.exec_update(sql)
     logger.info("end to update cinema min price")
Exemplo n.º 7
0
def update_city_prices(city_name):
    """
    更新渠道电影价格信息
    InnoDB默认事物级别是repeatable read,防止同一事物中读取的数据前后不一致,即幻读,
    此处需获取cinema表最新状态,每次查询时需开启一个新的事物,即execute_update后要commit或者重新获取CinemaManager
    """
    cinema_manager = CinemaManager.get_instance()
    price_manager = PriceManager.get_instance()
    try:
        city_info = cinema_manager.get_cinema_city_id(city_name)
        done = 0
        channels = [PriceLM, PriceMT, PriceTB]
        for channel in channels:
            ids = cinema_manager.need_update_cinema_ids(
                channel.get_cinema_cls(), city_name)
            count = len(ids)
            if count > 0:
                city_id = city_info['id_{}'.format(channel.get_channel_name())]
                logger.info('run %s of city %s, %d cinemas need to be updated',
                            channel.get_spider_name(), city_name, count)
                run_spider(channel.get_spider_name(), city_id=city_id)
            else:
                done += 1
                logger.info('table %s of city %s need not to be updated',
                            channel.get_table_name(), city_name)
        if done == 3:
            # 状态未更新时更新状态,手动在price_update_status插入一条城市记录
            if not price_manager.is_cinema_price_update_done(city_name):
                price_manager.cinema_price_updated_done(city_name)
            else:
                logger.info('city %s, channels price status already updated',
                            city_name)
    except Exception as e:
        logger.error("Unexpected Update {} Price Error: {}, {}".format(
            city_name, e, traceback.format_exc()))
Exemplo n.º 8
0
def match_city_channels_price(city_name):
    """如果有渠道价格更新,并且所有的渠道都更新完毕则进行所有渠道价格匹配"""
    price_manager = PriceManager.clone()
    if price_manager.is_cinema_price_update_done(city_name):
        if not price_manager.is_cinema_price_match_done(city_name):
            try:
                price_manager.save_all_channels_price(city_name)
                price_manager.cinema_price_match_done(city_name)
            except Exception as e:
                logger.info("Unexpected Match {} Price Error: {}".format(
                    city_name, e))
                price_manager.cinema_price_match_done(city_name, -1)
        else:
            logger.info('city %s, price match status already update',
                        city_name)
    else:
        logger.info('city %s, some channels price do not update done',
                    city_name)
Exemplo n.º 9
0
 def match_all_channels(self, city):
     logger.info("begin to match all channels with city %s", city)
     self.manager.mach_all_channels(city)
     logger.info("end to match all channels with city %s", city)
Exemplo n.º 10
0
    def save_all_channels_price(self, city_name):
        """
        各渠道电影、电影院可能没有匹配成功,渠道也有能没有电影、电影院、电影售价信息
        hive形式sql,mysql无full outer join, 只能使用left join union right join 导致下面执行复杂的sql
        replace into price(cinema_id, movie_id, show_date, begin, end, language, hall,  price_mt, price_tb, price_lm)
        select
        COALESCE(mt_price_tmp.cinema_id, tb_price_tmp.cinema_id, lm_price_tmp.cinema_id) as cinema_id,
        COALESCE(mt_price_tmp.movie_id, tb_price_tmp.movie_id, lm_price_tmp.movie_id) as movie_id,
        COALESCE(mt_price_tmp.show_date, tb_price_tmp.show_date, lm_price_tmp.show_date) as show_date,
        COALESCE(mt_price_tmp.begin, tb_price_tmp.begin, lm_price_tmp.begin) as begin,
        COALESCE(mt_price_tmp.end, tb_price_tmp.end, lm_price_tmp.end) as end,
        COALESCE(mt_price_tmp.language, tb_price_tmp.language, lm_price_tmp.language) as language,
        COALESCE(mt_price_tmp.hall, tb_price_tmp.hall, lm_price_tmp.hall) as hall,
        COALESCE(mt_price_tmp.price, 0) as price_mt,
        COALESCE(tb_price_tmp.price, 0) as price_tb,
        COALESCE(lm_price_tmp.price, 0) as price_lm
        from
        (
            select cinema.id_mt as cinema_id, movie.id_db as movie_id, show_date, begin, end, language, hall, price
            from cinema join price_mt on ( cinema.id_mt=price_mt.cinema_id) join movie on (movie.id_mt=price_mt.movie_id)
        ) mt_price_tmp
        full outer join
        (
            select cinema.id_mt as cinema_id, movie.id_db as movie_id, show_date, begin, end, language, hall, price
            from cinema join price_tb on ( cinema.id_tb=price_tb.cinema_id) join movie on (movie.id_tb=price_tb.movie_id)
        ) tb_price_tmp on (mt_price_tmp.cinema_id = tb_price_tmp.cinema_id and mt_price_tmp.movie_id = tb_price_tmp.movie_id
        and mt_price_tmp.show_date = tb_price_tmp.show_date and mt_price_tmp.begin = tb_price_tmp.begin)
        full outer join
        (
            select cinema.id_mt as cinema_id, movie.id_db as movie_id, show_date, begin, end, language, hall, price
            from cinema join price_lm on ( cinema.id_lm=price_lm.cinema_id) join movie on (movie.id_lm=price_lm.movie_id)
        ) lm_price_tmp on (COALESCE(mt_price_tmp.cinema_id, tb_price_tmp.cinema_id) = lm_price_tmp.cinema_id and COALESCE(mt_price_tmp.movie_id, tb_price_tmp.movie_id) = lm_price_tmp.movie_id
         and COALESCE(mt_price_tmp.show_date, tb_price_tmp.show_date) = lm_price_tmp.show_date and COALESCE(mt_price_tmp.begin, tb_price_tmp.begin) = lm_price_tmp.begin)

        --------------------------------------------------------------------------------------
        下面sql可查出电影在各个渠道同一个影院都有售卖时的价格信息,于上面sql将full join替换为 join时等价
        replace into  price(movie_id, cinema_id, show_date, begin, end, language, hall,  price_mt, price_tb, price_lm)
        select t1.id_db as movie_id, t5.id_mt as cinema_id, t3.show_date as show_date, t3.begin as begin,
        t3.end as end, t3.language as language, t3.hall as hall,
        t3.price as price_mt, t4.price as price_tb, t2.price as price_lm
        from movie t1
        join price_lm t2 on t1.id_lm = t2.movie_id
        join price_mt t3 on t1.id_mt = t3.movie_id
        join price_tb t4 on t1.id_tb = t4.movie_id
        join cinema t5 on (t2.cinema_id = t5.id_lm and t3.cinema_id=t5.id_mt and t4.cinema_id=t5.id_tb
        and t2.show_date=t3.show_date and t2.show_date=t4.show_date
        and t2.begin = t3.begin and t2.begin=t4.begin)
        """
        sql = '''
            replace into price(cinema_id, movie_id, show_date, begin, end, language, hall,  price_mt, price_tb, price_lm)
            select
            COALESCE(mt_cinema_id, tb_cinema_id, lm_cinema_id) as cinema_id,
            COALESCE(mt_movie_id, tb_movie_id, lm_movie_id) as movie_id,
            COALESCE(mt_show_date, tb_show_date, lm_show_date) as show_date,
            COALESCE(mt_begin, tb_begin, lm_begin) as begin,
            COALESCE(mt_end, tb_end, lm_end) as end,
            COALESCE(mt_language, tb_language, lm_language) as language,
            COALESCE(mt_hall, tb_hall, lm_hall) as hall,
            COALESCE(mt_price, 10000) as price_mt,
            COALESCE(tb_price, 10000) as price_tb,
            COALESCE(lm_price, 10000) as price_lm
            from 
            (
                select mt_price_tmp.cinema_id as mt_cinema_id, mt_price_tmp.movie_id as mt_movie_id,
                mt_price_tmp.show_date as mt_show_date, mt_price_tmp.begin as mt_begin, 
                mt_price_tmp.end as mt_end, mt_price_tmp.language as mt_language, 
                mt_price_tmp.hall as mt_hall, mt_price_tmp.price as mt_price,
                tb_price_tmp.cinema_id as tb_cinema_id, tb_price_tmp.movie_id as tb_movie_id,
                tb_price_tmp.show_date as tb_show_date, tb_price_tmp.begin as tb_begin, 
                tb_price_tmp.end as tb_end, tb_price_tmp.language as tb_language, 
                tb_price_tmp.hall as tb_hall, tb_price_tmp.price as tb_price
                from 
                (
                    select cinema.id_mt as cinema_id, movie.id_db as movie_id, show_date, begin, end, language, hall, price
                    from cinema_mt join cinema on(cinema_mt.city='{}' and cinema_mt.id=cinema.id_mt) 
                    join price_mt on ( cinema.id_mt=price_mt.cinema_id) join movie on (movie.id_mt=price_mt.movie_id)
                ) mt_price_tmp
                left join
                (
                     select cinema.id_mt as cinema_id, movie.id_db as movie_id, show_date, begin, end, language, hall, price
                     from cinema_mt join cinema on(cinema_mt.city='{}' and cinema_mt.id=cinema.id_mt) 
                     join price_tb on ( cinema.id_tb=price_tb.cinema_id) join movie on (movie.id_tb=price_tb.movie_id)
                ) tb_price_tmp on (mt_price_tmp.cinema_id = tb_price_tmp.cinema_id and mt_price_tmp.movie_id = tb_price_tmp.movie_id
                and mt_price_tmp.show_date = tb_price_tmp.show_date and mt_price_tmp.begin = tb_price_tmp.begin)
             
                union  all
             
                select mt_price_tmp.cinema_id as mt_cinema_id, mt_price_tmp.movie_id as mt_movie_id,
                mt_price_tmp.show_date as mt_show_date, mt_price_tmp.begin as mt_begin, 
                mt_price_tmp.end as mt_end, mt_price_tmp.language as mt_language, 
                mt_price_tmp.hall as mt_hall, mt_price_tmp.price as mt_price,
                tb_price_tmp.cinema_id as tb_cinema_id, tb_price_tmp.movie_id as tb_movie_id,
                tb_price_tmp.show_date as tb_show_date, tb_price_tmp.begin as tb_begin, 
                tb_price_tmp.end as tb_end, tb_price_tmp.language as tb_language, 
                tb_price_tmp.hall as tb_hall, tb_price_tmp.price as tb_price
                from 
                (
                    select cinema.id_mt as cinema_id, movie.id_db as movie_id, show_date, begin, end, language, hall, price
                    from cinema_mt join cinema on(cinema_mt.city='{}' and cinema_mt.id=cinema.id_mt) 
                    join price_mt on ( cinema.id_mt=price_mt.cinema_id) join movie on (movie.id_mt=price_mt.movie_id)
                ) mt_price_tmp
                right join
                (
                    select cinema.id_mt as cinema_id, movie.id_db as movie_id, show_date, begin, end, language, hall, price
                    from cinema_mt join cinema on(cinema_mt.city='{}' and cinema_mt.id=cinema.id_mt) 
                    join price_tb on ( cinema.id_tb=price_tb.cinema_id) join movie on (movie.id_tb=price_tb.movie_id)
                ) tb_price_tmp on (mt_price_tmp.cinema_id = tb_price_tmp.cinema_id and mt_price_tmp.movie_id = tb_price_tmp.movie_id
                and mt_price_tmp.show_date = tb_price_tmp.show_date and mt_price_tmp.begin = tb_price_tmp.begin)
                where mt_price_tmp.cinema_id is null
            ) mt_tb_price_tmp left join 
            (
                select cinema.id_mt as lm_cinema_id, movie.id_db as lm_movie_id, show_date as lm_show_date, begin as lm_begin, 
                                            end as lm_end, language as lm_language, hall as lm_hall, price as lm_price
                from cinema_mt join cinema on(cinema_mt.city='{}' and cinema_mt.id=cinema.id_mt) 
                join price_lm on ( cinema.id_lm=price_lm.cinema_id) join movie on (movie.id_lm=price_lm.movie_id)
            ) lm_price_tmp
            on (COALESCE(mt_tb_price_tmp.mt_cinema_id, mt_tb_price_tmp.tb_cinema_id) = lm_price_tmp.lm_cinema_id 
            and COALESCE(mt_tb_price_tmp.mt_movie_id, mt_tb_price_tmp.mt_movie_id) = lm_price_tmp.lm_movie_id
            and COALESCE(mt_tb_price_tmp.mt_show_date, mt_tb_price_tmp.tb_show_date) = lm_price_tmp.lm_show_date 
            and COALESCE(mt_tb_price_tmp.mt_begin, mt_tb_price_tmp.tb_begin) = lm_price_tmp.lm_begin)

            union all

            select
            COALESCE(mt_cinema_id, tb_cinema_id, lm_cinema_id) as cinema_id,
            COALESCE(mt_movie_id, tb_movie_id, lm_movie_id) as movie_id,
            COALESCE(mt_show_date, tb_show_date, lm_show_date) as show_date,
            COALESCE(mt_begin, tb_begin, lm_begin) as begin,
            COALESCE(mt_end, tb_end, lm_end) as end,
            COALESCE(mt_language, tb_language, lm_language) as language,
            COALESCE(mt_hall, tb_hall, lm_hall) as hall,
            COALESCE(mt_price, 10000) as price_mt,
            COALESCE(tb_price, 10000) as price_tb,
            COALESCE(lm_price, 10000) as price_lm
            from 
            (
                select mt_price_tmp.cinema_id as mt_cinema_id, mt_price_tmp.movie_id as mt_movie_id,
                mt_price_tmp.show_date as mt_show_date, mt_price_tmp.begin as mt_begin, 
                mt_price_tmp.end as mt_end, mt_price_tmp.language as mt_language, 
                mt_price_tmp.hall as mt_hall, mt_price_tmp.price as mt_price,
                tb_price_tmp.cinema_id as tb_cinema_id, tb_price_tmp.movie_id as tb_movie_id,
                tb_price_tmp.show_date as tb_show_date, tb_price_tmp.begin as tb_begin, 
                tb_price_tmp.end as tb_end, tb_price_tmp.language as tb_language, 
                tb_price_tmp.hall as tb_hall, tb_price_tmp.price as tb_price
                from 
                (
                    select cinema.id_mt as cinema_id, movie.id_db as movie_id, show_date, begin, end, language, hall, price
                    from cinema_mt join cinema on(cinema_mt.city='{}' and cinema_mt.id=cinema.id_mt) 
                    join price_mt on ( cinema.id_mt=price_mt.cinema_id) join movie on (movie.id_mt=price_mt.movie_id)
                ) mt_price_tmp
                left join
                (
                    select cinema.id_mt as cinema_id, movie.id_db as movie_id, show_date, begin, end, language, hall, price
                    from cinema_mt join cinema on(cinema_mt.city='{}' and cinema_mt.id=cinema.id_mt) 
                    join price_tb on ( cinema.id_tb=price_tb.cinema_id) join movie on (movie.id_tb=price_tb.movie_id)
                ) tb_price_tmp on (mt_price_tmp.cinema_id = tb_price_tmp.cinema_id and mt_price_tmp.movie_id = tb_price_tmp.movie_id
                and mt_price_tmp.show_date = tb_price_tmp.show_date and mt_price_tmp.begin = tb_price_tmp.begin)
             
                union  all
             
                select mt_price_tmp.cinema_id as mt_cinema_id, mt_price_tmp.movie_id as mt_movie_id,
                mt_price_tmp.show_date as mt_show_date, mt_price_tmp.begin as mt_begin, 
                mt_price_tmp.end as mt_end, mt_price_tmp.language as mt_language, 
                mt_price_tmp.hall as mt_hall, mt_price_tmp.price as mt_price,
                tb_price_tmp.cinema_id as tb_cinema_id, tb_price_tmp.movie_id as tb_movie_id,
                tb_price_tmp.show_date as tb_show_date, tb_price_tmp.begin as tb_begin, 
                tb_price_tmp.end as tb_end, tb_price_tmp.language as tb_language, 
                tb_price_tmp.hall as tb_hall, tb_price_tmp.price as tb_price
                from 
                (
                    select cinema.id_mt as cinema_id, movie.id_db as movie_id, show_date, begin, end, language, hall, price
                    from cinema_mt join cinema on(cinema_mt.city='{}' and cinema_mt.id=cinema.id_mt) 
                    join price_mt on ( cinema.id_mt=price_mt.cinema_id) join movie on (movie.id_mt=price_mt.movie_id)
                ) mt_price_tmp
                right join
                (
                    select cinema.id_mt as cinema_id, movie.id_db as movie_id, show_date, begin, end, language, hall, price
                    from cinema_mt join cinema on(cinema_mt.city='{}' and cinema_mt.id=cinema.id_mt) 
                    join price_tb on ( cinema.id_tb=price_tb.cinema_id) join movie on (movie.id_tb=price_tb.movie_id)
                ) tb_price_tmp on (mt_price_tmp.cinema_id = tb_price_tmp.cinema_id and mt_price_tmp.movie_id = tb_price_tmp.movie_id
                and mt_price_tmp.show_date = tb_price_tmp.show_date and mt_price_tmp.begin = tb_price_tmp.begin)
                where mt_price_tmp.cinema_id is null
            ) mt_tb_price_tmp right join 
            (
                select cinema.id_mt as lm_cinema_id, movie.id_db as lm_movie_id, show_date as lm_show_date, begin as lm_begin, 
                                            end as lm_end, language as lm_language, hall as lm_hall, price as lm_price
                from cinema_mt join cinema on(cinema_mt.city='{}' and cinema_mt.id=cinema.id_mt) 
                join price_lm on ( cinema.id_lm=price_lm.cinema_id) join movie on (movie.id_lm=price_lm.movie_id)
            ) lm_price_tmp
            on (COALESCE(mt_tb_price_tmp.mt_cinema_id, mt_tb_price_tmp.tb_cinema_id) = lm_price_tmp.lm_cinema_id 
            and COALESCE(mt_tb_price_tmp.mt_movie_id, mt_tb_price_tmp.mt_movie_id) = lm_price_tmp.lm_movie_id
            and COALESCE(mt_tb_price_tmp.mt_show_date, mt_tb_price_tmp.tb_show_date) = lm_price_tmp.lm_show_date 
            and COALESCE(mt_tb_price_tmp.mt_begin, mt_tb_price_tmp.tb_begin) = lm_price_tmp.lm_begin)
            where COALESCE(mt_tb_price_tmp.mt_cinema_id, mt_tb_price_tmp.tb_cinema_id) is null
         '''.format(city_name, city_name, city_name, city_name, city_name, city_name, city_name, city_name, city_name, city_name)
        logger.info("city %s, start to match cinema price", city_name)
        self.db.exec_update(sql)
        logger.info("city %s, end to match cinema price", city_name)
Exemplo n.º 11
0
 def remove_invalid_match(self, cinema_cls, city, step):
     """删除无效匹配"""
     logger.info("step %d, begin to remove invalid match with city %s", step, city)
     self.manager.remove_invalid_match(cinema_cls, step)
     logger.info("step %d, end to remove invalid match with city %s", step, city)
Exemplo n.º 12
0
 def __init__(self, **kwargs):
     self.city_id = kwargs.get('city_id')
     self.city_name = CityLM.get_city_name(self.city_id)
     logger.info("city id %s, city name %s", self.city_id, self.city_name)
Exemplo n.º 13
0
def update_movies():
    """更新渠道电影信息"""
    channels = [MovieLM, MovieMT, MovieTB, MovieDB]
    for channel in channels:
        logger.info('%s to be updated', channel.get_table_name())
        run_spider(channel.get_spider_name(), city_id=289)
Exemplo n.º 14
0
def update_cinema_min_price():
    """更新影院最低价"""
    logger.info('start to update cinema min price')
    price_manager = PriceManager.clone()
    price_manager.update_cinema_min_price()
    logger.info('end to update cinema min price')