示例#1
0
def get_uuid_date_condition(category):  # 한달 전꺼면 해당 uuid 모두 소집
    from datetime import datetime, timedelta
    from sqlalchemy.sql import func
    thirty_days_ago = func.now() - timedelta(days=30)

    refine_engine = db_setting.get_refineddata_engine()
    refine_db_session = db_setting.get_sesstion(refine_engine)
    uuid_list = refine_db_session.query(RefinedData.uuid).filter(
        RefinedData.update_date <= thirty_days_ago).all()
    # refine의 json category와 카테고리가 일치하는 uuid만 가져오기로 사용
    refine_db_session.close()
    return [category, 'from get_uuid_date_condition', uuid_list]
示例#2
0
def insert_refined(uuid_list):
    log_init('insert_refined' + crawler.get_date())
    from sqlalchemy import and_
    from sqlalchemy.sql import func

    # create connection to crawling db
    rawdata_engine = db_setting.get_rawdata_engine()
    rawdata_db_session = db_setting.get_sesstion(
        rawdata_engine)  # create session
    refine_engine = db_setting.get_refineddata_engine()
    refine_db_session = db_setting.get_sesstion(refine_engine)
    # must be for uuid list , 개별 업데이트 시에는 uuid 넣기전에 리스트로 한번 감싸서 ㄱ ㄱ

    category = uuid_list[0]
    source = uuid_list[1]
    uuid_list = uuid_list[2]

    for uuid in uuid_list:
        try:
            uuid = uuid[0]
            if category == 'movie':
                sub = rawdata_db_session.query(
                    func.max(Rawdata_movie.date).label('lastdate')).filter(
                        Rawdata_movie.uuid == uuid).group_by(
                            Rawdata_movie.source_site).subquery('sub')
                ans = rawdata_db_session.query(
                    Rawdata_movie.source_site, Rawdata_movie.data,
                    Rawdata_movie.recovery).filter(
                        and_(Rawdata_movie.date == sub.c.lastdate)).all()
            else:
                sub = rawdata_db_session.query(
                    func.max(RawdataRestaurant.date).label('lastdate')).filter(
                        RawdataRestaurant.uuid == uuid).group_by(
                            RawdataRestaurant.source_site).subquery('sub')
                ans = rawdata_db_session.query(
                    RawdataRestaurant.source_site, RawdataRestaurant.data,
                    RawdataRestaurant.recovery).filter(
                        and_(RawdataRestaurant.date == sub.c.lastdate)).all()

            json_form = crawler.make_json(category, str(uuid),
                                          ans)  # 완성된 제이슨 데이터!
            entry = RefinedData(uuid, json_form)
            refine_db_session.add(entry)
            refine_db_session.commit()
        except Exception as e:
            logging.error('In uuid :  ' + str(uuid) + ' Exception ' + str(e))
            logging.exception('Got exception.. ')
            logging.error('**********************************')
            continue
    rawdata_db_session.close()
    refine_db_session.close()
示例#3
0
def get_uuid_refine_notexist_review(category):
    from sqlalchemy.sql import func
    refine_engine = db_setting.get_refineddata_engine()
    refine_db_session = db_setting.get_sesstion(refine_engine)
    # create connection to crawling db
    rawdata_engine = db_setting.get_rawdata_engine()
    rawdata_db_session = db_setting.get_sesstion(
        rawdata_engine)  # create session

    review_uuid = rawdata_db_session.query(Rawdata_movie.uuid).group_by(
        Rawdata_movie.uuid).all()
    refine_uuid = refine_db_session.query(RefinedData.uuid).all()

    not_in_review_uuid = list(set(refine_uuid) - set(review_uuid))
    rawdata_db_session.close()
    refine_db_session.close()
    return [category, 'from get_uuid_refine', not_in_review_uuid]
示例#4
0
def get_uuid_not_exist(category):  # refine table에 없는 중복되지않은 모든 uuid를 가져옴
    # create connection to crawling db
    rawdata_engine = db_setting.get_rawdata_engine()
    rawdata_db_session = db_setting.get_sesstion(
        rawdata_engine)  # create session
    refine_engine = db_setting.get_refineddata_engine()
    refine_db_session = db_setting.get_sesstion(refine_engine)

    refine_list = refine_db_session.query(RefinedData.uuid).all()
    if category == 'movie':
        raw_list = rawdata_db_session.query(Rawdata_movie.uuid).group_by(
            Rawdata_movie.uuid).all()
    else:
        raw_list = rawdata_db_session.query(RawdataRestaurant.uuid).group_by(
            RawdataRestaurant.uuid).all()
    # refine에서 카테고리가 일치하는 uuid만 가져오게해서 연산하게 만듬, 밑의 rawdata는 radata_+category로 테이블을 식별하게 만듬
    new_uuid_list = list(set(raw_list) - set(refine_list))
    rawdata_db_session.close()
    refine_db_session.close()

    return [category, 'from get_uuid_not_exist', new_uuid_list]
示例#5
0
def get_uuid_refine(category):
    refine_engine = db_setting.get_refineddata_engine()
    refine_db_session = db_setting.get_sesstion(refine_engine)
    uuid_list = refine_db_session.query(RefinedData.uuid).all()
    refine_db_session.close()
    return [category, 'from get_uuid_refine', uuid_list]