def verify_page_path(path, no): logger.debug(f'verify page {path} , args {no}') no = int(no) if no <= MAXPAGE: return True else: return False
def saveit(tag_info): tag, created = Tag.get_or_create(type_=tag_info.type, value=tag_info.value, defaults={'url': tag_info.link}) if created: logger.debug(f'save tag: {tag}') return tag
def load_db(): msg = '' errmsg = '' if request.POST.submit: upload = request.files.get('dbfile') if upload: logger.debug(upload.filename) name = get_data_path('uploaded.db') upload.save(name, overwrite=True) logger.debug(f'uploaded file saved to {name}') try: tag_file_added, missed_fanhaos = load_tags_db() except DBError: errmsg = '数据库文件错误, 请检查文件是否正确上传' else: urls = [ bus_spider.get_url_by_fanhao(fanhao) for fanhao in missed_fanhaos ] add_download_job(urls) msg = f'上传 {tag_file_added} 条用户打标数据, {len(missed_fanhaos)} 个番号, ' msg += ' 注意: 需要下载其他数据才能开始建模, 请等候一定时间' else: errmsg = '请上传数据库文件' return template('load_db', path=request.path, msg=msg, errmsg=errmsg)
def verify_fanhao(path, fanhao): ''' verify fanhao before add it to queue ''' exists = Item.get_by_fanhao(fanhao) logger.debug( f'verify {fanhao}: , exists:{exists is not None}, skip {path}') return exists is None
def saveit(item, tag): try: item_tag = ItemTag.create(item=item, tag=tag) logger.debug(f'save tag_item: {item_tag}') except Exception as ex: logger.exception(ex) else: return item_tag
def saveit(fanhao, path): local_item = None try: local_item = LocalItem.create(item=fanhao, path=path) logger.debug(f'save LocalItem: {fanhao}') except IntegrityError: logger.debug(f'LocalItem exists: {fanhao}') else: return local_item
def update_play(id): nrows = (LocalItem.update({ LocalItem.last_view_date: get_now_time(), LocalItem.view_times: LocalItem.view_times + 1 }).where(LocalItem.id == id).execute()) logger.debug(f'update LocalItem {id} : rows:{nrows}') return LocalItem.get_by_id(id)
def add_job(job_func, args): ''' add a job to scheduler ''' default_args = (loop, True) default_args = default_args + args logger.debug(default_args) t1 = datetime.now() + timedelta(seconds=10) date_trigger = DateTrigger(run_date=t1) scheduler.add_job(job_func, trigger=date_trigger, args=default_args)
def saveit(rate_type, rate_value, item): try: item_rate = ItemRate.create(item=item, rate_type=rate_type, rate_value=rate_value) logger.debug(f'save ItemRate: {item_rate}') except Exception as ex: logger.exception(ex) return item_rate
def saveit(tag_info): try: tag = Tag.create(type_=tag_info.type, value=tag_info.value, url=tag_info.link) logger.debug(f'save tag: {tag}') except IntegrityError as ex: tag = Tag.get(Tag.value == tag_info.value) return tag
def save(meta_info, tags): item_title = meta_info['title'] try: item = Item.saveit(meta_info) except ExistError: logger.debug(f'item exists: {item_title}') else: for tag_info in tags: tag = Tag.saveit(tag_info) ItemTag.saveit(item, tag)
def saveit(rate_type, rate_value, fanhao): item_rate = None try: item_rate = ItemRate.create(item=fanhao, rate_type=rate_type, rate_value=rate_value) logger.debug(f'save ItemRate: {item_rate}') except IntegrityError: logger.debug(f'ItemRate exists: {fanhao}') else: return item_rate
def process_item(text, path, fanhao): ''' process item page ''' logger.debug(f'process item {fanhao}') url = path meta, tags = parse_item(text) meta.update(url=url) # logger.debug('meta keys', len(meta.keys())) # logger.debug('tag count', len(tags)) save(meta, tags) print(f'item {fanhao} is processed')
def load_tags_db(): ''' load user tags data from uploaded db file Args: file: io.BufferedRandom -> uploaded db file stream ''' db_name = get_data_path('uploaded.db') try: db_upload = SqliteDatabase(db_name) db_upload.get_tables() except DatabaseError: raise DBError() db_is_old = False tag_data = [] missed_fanhaos = [] tag_file_added = 0 sql_old = '''select item_rate.rate_value, item.fanhao from item_rate inner join item on item_rate.item_id = item.id where item_rate.rate_type=1 ''' sql_new = '''select item_rate.rate_value, item.fanhao from item_rate inner join item on item_rate.item_id = item.fanhao where item_rate.rate_type=1 ''' cursor = db_upload.execute_sql(sql_old) res = cursor.fetchone() if res: db_is_old = True if db_is_old: cursor = db_upload.execute_sql(sql_old) else: cursor = db_upload.execute_sql(sql_new) for row in cursor.fetchall(): tag_data.append(row) with db_upload.atomic(): for rate_value, fanhao in tag_data: item_rate = ItemRate.saveit(RATE_TYPE.USER_RATE, rate_value, fanhao) if item_rate: tag_file_added += 1 if not Item.get_by_fanhao(fanhao): # add to get from spider missed_fanhaos.append(fanhao) logger.debug(tag_data) logger.info(f'added user tag rate: {tag_file_added}') logger.info(f'added fanhao to download: {len(missed_fanhaos)}') return tag_file_added, missed_fanhaos
def saveit(meta_info): item_release_date = date.fromisoformat(meta_info.pop('release_date')) item_fanhao = meta_info.pop('fanhao') item_title = meta_info.pop('title') item_url = meta_info.pop('url') item_meta = json.dumps(meta_info) try: item = Item.create(fanhao=item_fanhao, title=item_title, url=item_url, release_date=item_release_date, meta_info=item_meta) logger.debug(f'save item: {item}') except IntegrityError as ex: raise ExistError() return item
def save(meta_info, tags): item_title = meta_info['title'] tag_objs = [] try: item = Item.saveit(meta_info) except ExistError: logger.debug(f'item exists: {item_title}') else: with db.atomic(): for tag_info in tags: tag = Tag.saveit(tag_info) if tag: tag_objs.append(tag) with db.atomic(): for tag_obj in tag_objs: ItemTag.saveit(item, tag_obj)
def tag(fanhao): if request.POST.submit: formid = request.POST.formid item_rate = ItemRate.get_by_fanhao(fanhao) rate_value = request.POST.submit if not item_rate: rate_type = RATE_TYPE.USER_RATE ItemRate.saveit(rate_type, rate_value, fanhao) logger.debug(f'add new item_rate for fanhao:{fanhao}') else: item_rate.rate_value = rate_value item_rate.save() logger.debug(f'updated item_rate for fanhao:{fanhao}') page = int(request.query.get('page', 1)) like = request.query.get('like') url = f'/tagit?page={page}&like={like}' if formid: url += f'#{formid}' redirect(url)
def correct(fanhao): if request.POST.submit: formid = request.POST.formid is_correct = int(request.POST.submit) item_rate = ItemRate.get_by_fanhao(fanhao) if item_rate: item_rate.rate_type = RATE_TYPE.USER_RATE if not is_correct: rate_value = item_rate.rate_value rate_value = 1 if rate_value == 0 else 0 item_rate.rate_value = rate_value item_rate.save() logger.debug( f'updated item fanhao: {fanhao}, {"and correct the rate_value" if not is_correct else ""}' ) page = int(request.query.get('page', 1)) like = int(request.query.get('like', 1)) url = f'/?page={page}&like={like}' if formid: url += f'#{formid}' redirect(url)
def recommend(): ''' use trained model to recommend items ''' ids, X = prepare_predict_data() if len(X) == 0: logger.error(f'no data for recommend') return count = 0 total = len(ids) y_pred = predict(X) for id, y in zip(ids, y_pred): if y == 1: count += 1 rate_type = RATE_TYPE.SYSTEM_RATE rate_value = y item_id = id item_rate = ItemRate(rate_type=rate_type, rate_value=rate_value, item_id=item_id) item_rate.save() logger.debug(f'predicted {total} items, recommended {count}') return total, count
def add_local_fanhao(fanhao, tag_like): ''' Args: fanhao:str - ',' separeted (fanhao, path) ''' rows = fanhao.splitlines() items = [] missed_fanhaos = [] local_file_added = 0 tag_file_added = 0 pattern = r'([A-Z]+)-?([0-9]+)' for row in rows: if ',' in row: fanhao, path = row.split(',') else: fanhao = row path = None fanhao = fanhao.strip().upper() match = re.search(pattern, fanhao) if match and len(match.groups()) == 2: series, num = match.groups() matched_fanhao = f'{series}-{num}' path = path.strip() if path else None logger.debug(f'matched fanhao {matched_fanhao}') items.append((matched_fanhao, path)) with db.atomic(): for item in items: fanhao, path = item # if path is not None, add to local item if path: local_item = LocalItem.saveit(fanhao, path) if local_item: local_file_added += 1 # if tag_like is True, add it to item_rate table if tag_like: item_rate = ItemRate.saveit(RATE_TYPE.USER_RATE, RATE_VALUE.LIKE, fanhao) if item_rate: tag_file_added += 1 if not Item.get_by_fanhao(fanhao): # add to get from spider missed_fanhaos.append(fanhao) logger.debug(f'missed_fanhaos:{missed_fanhaos}') logger.debug(f'tag_file_added:{tag_file_added}') logger.debug(f'local_file_added:{local_file_added}') return missed_fanhaos, local_file_added, tag_file_added
def process_page(text, path, no): ''' process list page ''' logger.debug(f'page {no} has length {len(text)}') print(f'process page {no}')
def local_play(id): local_item = LocalItem.update_play(id) file_path = local_item.path logger.debug(file_path) redirect(file_path)