Пример #1
0
def count_top10():
    session = Session()
    rows = session.query(
        HouseModel.xiaoqu,
        func.count(HouseModel.xiaoqu).label('count')
    ).group_by(HouseModel.xiaoqu).order_by('count desc').limit(10)
    return [row._asdict() for row in rows]
Пример #2
0
def hot_res():
    session = Session()
    rows = session.query(
        FoodModel.name,
        ((FoodModel.kouwei + FoodModel.huanjin + FoodModel.fuwu) *
         FoodModel.reviewNum).label('sum')).order_by('sum desc').limit(10)
    return [row._asdict() for row in rows]
Пример #3
0
def education_stat():
    session = Session()
    rows = session.query(JobModel.education,
                         func.count(
                             JobModel.education).label('count')).group_by(
                                 'education').order_by('count desc')

    return [row._asdict() for row in rows]
Пример #4
0
def count_top10_lei():
    """商圈前十的分类
    """
    session = Session()
    rows = session.query(FoodModel.fenlei,
                         func.count(FoodModel.fenlei).label('count')).group_by(
                             FoodModel.fenlei).order_by('count desc').limit(10)
    return [row._asdict() for row in rows]
Пример #5
0
def exp_top10_lei():
    """人均消费前十的分类
    """
    session = Session()
    rows = session.query(
        FoodModel.fenlei,
        func.avg(FoodModel.agvExp).cast(Float).label('agvExp')).group_by(
            FoodModel.fenlei).order_by('agvExp desc').limit(10)
    return [row._asdict() for row in rows]
Пример #6
0
def experience_stat():
    session = Session()
    rows = session.query(
        func.concat(JobModel.experience_lower, '-',
                    JobModel.experience_upper).label('experience'),
        func.count('experience').label('count')).group_by(
            'experience').order_by('count desc')

    return [row._asdict() for row in rows]
Пример #7
0
def count_top10():
    """职位数排名前十的城市

    """
    session = Session()
    rows = session.query(
        JobModel.city,
        func.count(JobModel.city).label('count')
    ).group_by(JobModel.city).order_by('count desc').limit(10)
    return [row._asdict() for row in rows]
Пример #8
0
def salary_top10():
    session = Session()
    rows = session.query(
        JobModel.city,
        func.avg((JobModel.salary_lower + JobModel.salary_upper) /
                 2).cast(Float).label('salary')).filter(
                     and_(JobModel.salary_lower > 0,
                          JobModel.salary_upper > 0)).group_by(
                              JobModel.city).order_by('salary desc').limit(10)
    return [row._asdict() for row in rows]
Пример #9
0
def count_top10_lei_1():
    """商圈前十的分类
    """
    session = Session()
    rows = session.query(FoodModel.fenlei,
                         func.count(FoodModel.fenlei).label('count')).group_by(
                             FoodModel.fenlei).order_by('count desc').limit(10)
    res = []
    for row in rows:
        res.append(row.fenlei)
    return res
Пример #10
0
def education_stat():
    """学历要求统计

    """
    session = Session()
    rows = session.query(
        JobModel.education,
        func.count(JobModel.education).label('count')
    ).group_by('education').order_by(desc('count'))
    学历要求统计 = [row._asdict() for row in rows]
    return 学历要求统计
Пример #11
0
def count_top10_quan():
    """商圈前十的商圈
        rows = session.query(
        JobModel.city,
        func.count(JobModel.city).label('count')
    ).group_by(JobModel.city).order_by('count desc').limit(10)
    """
    session = Session()
    rows = session.query(FoodModel.quan,
                         func.count(FoodModel.quan).label('count')).group_by(
                             FoodModel.quan).order_by('count desc').limit(10)
    return [row._asdict() for row in rows]
Пример #12
0
def salary_by_city_and_education():
    session = Session()
    rows = session.query(
        JobModel.city, JobModel.education,
        func.avg(
            (JobModel.salary_lower + JobModel.salary_upper) /
            2).cast(Float).label('salary')).filter(
                and_(JobModel.salary_lower > 0,
                     JobModel.salary_upper > 0)).group_by(
                         JobModel.city,
                         JobModel.education).order_by(JobModel.city.desc())

    return [row._asdict() for row in rows]
Пример #13
0
def fenlei_and_quan():
    """热门分类在热门商圈的人均消费对比

    """
    session = Session()
    rows = session.query(
        FoodModel.fenlei, FoodModel.quan,
        func.avg(FoodModel.agvExp).cast(Float).label('agvExp')).filter(
            and_(FoodModel.fenlei.in_(count_top10_lei_1()),
                 FoodModel.quan.in_(count_top10_quan_1()))).group_by(
                     FoodModel.fenlei,
                     FoodModel.quan).order_by(FoodModel.quan.desc())
    return [row._asdict() for row in rows]
Пример #14
0
def all_huxing():
    session = Session()
    rows = session.query(
        (HouseModel.huxing).label('item'),
        func.count(HouseModel.huxing).label('count'),
        func.count(HouseModel.huxing).label('percent')
    ).group_by(HouseModel.huxing).order_by('count desc')
    result = [row._asdict() for row in rows]
    total = 0
    for row in result:
        total += int(row['count'])
    for row in result:
        row['percent'] = round(row['count']/total,2)
    results = {'total':total,'data':result}
    return results
Пример #15
0
def count_top10_quan_1():
    """商圈前十的商圈
        rows = session.query(
        JobModel.city,
        func.count(JobModel.city).label('count')
    ).group_by(JobModel.city).order_by('count desc').limit(10)
    """
    session = Session()
    rows = session.query(FoodModel.quan,
                         func.count(FoodModel.quan).label('count')).group_by(
                             FoodModel.quan).order_by('count desc').limit(10)
    res = []
    for row in rows:
        res.append(row.quan)
    return res
Пример #16
0
def salary_top10():
    """薪资排名前十的城市

    """
    session = Session()
    rows = session.query(
        JobModel.city,
        func.avg(
            (JobModel.salary_lower + JobModel.salary_upper) / 2
        ).label('salary')
    ).filter(
        and_(JobModel.salary_lower > 0, JobModel.salary_upper > 0)
    ).group_by(JobModel.city).order_by(desc('salary')).limit(10)
    薪资排名列表 = [row._asdict() for row in rows]  
    # 列表中每个元素是字典,字典的 salary 值的数据类型是 Decimal
    # 须将其转换为 float 类型
    for d in 薪资排名列表:
        d['salary'] = float(format(d['salary'], '.2f'))
    return 薪资排名列表
Пример #17
0
def salary_by_city_and_education():
    """同等学历不同城市薪资对比

    """
    session = Session()
    rows = session.query(
        JobModel.city,
        JobModel.education,
        func.avg(
            (JobModel.salary_lower + JobModel.salary_upper) / 2
        ).cast(Float).label('salary')
    ).filter(
        and_(JobModel.salary_lower > 0, JobModel.salary_upper > 0)
    ).group_by(JobModel.city, JobModel.education).order_by(JobModel.city.desc())
    #return [row._asdict() for row in rows]
    同等学历不同城市薪资对比 = [row._asdict() for row in rows]
    for i in 同等学历不同城市薪资对比:
        i['salary'] = float(format(i['salary'],'.2f'))
    return 同等学历不同城市薪资对比
Пример #18
0
class PersistentPipeline(object):
    """持久化数据 Pipeline

    """
    def open_spider(self, spider):
        self.session = Session()

    def close_spider(self, spider):
        self.session.commit()
        self.session.close()

    def process_item(self, item, spider):
        if isinstance(item, JobItem):
            return self._process_job_item(item)
        else:
            return item

    def _process_job_item(self, item):
        city = item['city'].split('·')[0]

        salary_lower, salary_upper = 0, 0
        m = re.match(r'[^\d]*(\d+)k-(\d+)k', item['salary'])
        if m is not None:
            salary_lower, salary_upper = int(m.group(1)), int(m.group(2))

        experience_lower, experience_upper = 0, 0
        m = re.match(r'[^\d]*(\d+)-(\d+)', item['experience'])
        if m is not None:
            experience_lower, experience_upper = int(m.group(1)), int(
                m.group(2))

        tags = ' '.join(item['tags'])

        model = JobModel(
            title=item['title'],
            city=city,
            salary_lower=salary_lower,
            salary_upper=salary_upper,
            experience_lower=experience_lower,
            experience_upper=experience_upper,
            education=item['education'],
            tags=tags,
            company=item['company'],
        )

        self.session.add(model)

        return item
Пример #19
0
class SeiyaPipeline(object):
    def open_spider(self, spider):
        self.session = Session()

    def close_spider(self, spider):
        self.session.commit()
        self.session.close()

    def process_item(self, item, spider):
        if isinstance(item, JobItem):
            return self._process_job_item(item)
        else:
            return item

    def _process_job_item(self, item):
        city = item['city'].split('·')[0]
        m = re.search(r'(\d*)k-(\d*)k', item['salary'])
        if m:
            salary_lower, salary_upper = int(m.group(1)), int(m.group(2))
        else:
            salary_lower, salary_upper = 0, 0

        m = re.search(r'(\d+)-(\d+)', item['experience'])
        if m:
            experience_lower, experience_upper = int(m.group(1)), int(
                m.group(2))
        else:
            experience_lower, experience_upper = 0, 0

        tags = ' '.join(item['tags'])

        jobdata = JobModel(title=item['title'],
                           city=city,
                           salary_lower=salary_lower,
                           salary_upper=salary_upper,
                           experience_lower=experience_lower,
                           experience_upper=experience_upper,
                           education=item['education'],
                           tags=tags,
                           company=item['company'])

        self.session.add(jobdata)
        return item
Пример #20
0
 def open_spider(self, spider):
     #self.session = sessionmaker(bind=engine)()
     self.session = Session()
Пример #21
0
def all_area():
    session = Session()
    rows = session.query(HouseModel.mianji)
    return [float(row._asdict()['mianji']) for row in rows]
Пример #22
0
 def open_spider(self, spider):
     self.session = Session()
Пример #23
0
class PersistentPipeline(object):
    """持久化数据 Pipeline

    """
    def open_spider(self, spider):
        self.session = Session()

    def close_spider(self, spider):
        self.session.commit()
        self.session.close()

    def process_item(self, item, spider):
        if isinstance(item, JobItem):
            return self._process_job_item(item)
        elif isinstance(item, FoodItem):
            return self._process_food_item(item)
        elif isinstance(item, HouseItem):
            return self._process_house_item(item)
        else:
            return item

    def _process_job_item(self, item):
        city = item['city'].split('·')[0]

        salary_lower, salary_upper = 0, 0
        m = re.match(r'[^\d]*(\d+)k-(\d+)k', item['salary'])
        if m is not None:
            salary_lower, salary_upper = int(m.group(1)), int(m.group(2))

        experience_lower, experience_upper = 0, 0
        m = re.match(r'[^\d]*(\d+)-(\d+)', item['experience'])
        if m is not None:
            experience_lower, experience_upper = int(m.group(1)), int(
                m.group(2))

        tags = ' '.join(item['tags'])

        model = JobModel(
            title=item['title'],
            city=city,
            salary_lower=salary_lower,
            salary_upper=salary_upper,
            experience_lower=experience_lower,
            experience_upper=experience_upper,
            education=item['education'],
            tags=tags,
            company=item['company'],
        )

        self.session.add(model)

        return item

    def _process_food_item(self, item):
        agvExp, score = 0, 0
        m = re.match(r'¥(\d+)', item['agvExp'])
        if m is not None:
            agvExp = int(m.group(1))
        m = re.match(r'(.*)(\d{2})', item['score'])
        if m is not None:
            score = int(m.group(2)) / 10.0
        model = FoodModel(
            label=item['label'],
            name=item['name'],
            score=score,
            reviewNum=int(item['reviewNum']),
            agvExp=agvExp,
            fenlei=item['fenlei'],
            quan=item['quan'],
            addr=item['addr'],
            kouwei=float(item['kouwei']),
            huanjin=float(item['huanjin']),
            fuwu=float(item['fuwu']),
        )

        self.session.add(model)

        return item

    def _process_house_item(self, item):
        mianji = item['mianji'].split('平米')[0]
        louceng = item['other'][0]
        years = item['other'][1]
        model = HouseModel(
            area=item['area'],
            name=item['name'],
            xiaoqu=item['xiaoqu'].replace("\xa0\xa0", ""),
            huxing=item['huxing'].replace("\xa0\xa0", ""),
            mianji=float(mianji),
            chaoxiang=item['chaoxiang'],
            quan=item['quan'],
            louceng=louceng,
            years=years,
            labels=' '.join(item['labels']),
            price=int(item['price']),
        )
        self.session.add(model)
        return item