def complain(): file = request.files['video'] if file and allowed_file(file.filename): filename = secure_filename(file.filename) filepath = current_app.config['VIDEO_FOLDER'] + '/' + filename.rsplit( '.', 1)[0] file.save(os.path.join(filepath, filename)) p = subprocess.Popen(current_app.config['BASH_TEST'] + filename.rsplit('.', 1)[0], shell=True) while not p.poll(): gevent.sleep(1) with open(filepath + "/decode_dnn/log/decode.1.log") as f: s = f.read() r = re.sub( "[A-Za-z0-9\[\`\~\!\@\#\$\^\&\*\(\)\=\|\{\}\'\:\;\'\,\[\]\.\<\>\/\?\~\@\#\&\*\%\-\_\"\+]", "", s) r = r.strip() r = r.lstrip() for k in r.split(" "): if len(k) > 3: Keyword.add(k) with open( current_app.config['VIDEO_FOLDER'] + '/kwsdatadir/raw_keywords.txt', 'w+') as f: for i in Keyword.high_rate_words(): f.write(i.encode('utf8') + '\n') return 'success'
def test_calc_dists(self): kw1 = Keyword(word='munição', requests=[]) kw2 = Keyword(word='pistola', requests=[]) kw3 = Keyword(word='espingarda', requests=[]) self.assertTrue( (self.classifier.calc_dists('arma', [kw1, kw2, kw3]) == np.array( [0.677431, 0.6866069, 0.71047467], dtype='float32')).all())
def index(): """ 关键词管理首页, 列举站点级别的关键词, 并支持简单查询和翻页. """ s = request.args.get('status', u'bare,processed,repeated') k = request.args.get('keyword', '') o = request.args.get('owner', '') p = int(request.args.get('page', '1')) start = (p - 1) * PAGE_COUNT condition = {'level': KeywordLevel.SITE} if k: condition['name'] = k.strip() if o: condition['owner'] = o.strip() status = s.split(u',') if status: condition['status'] = {'$in': status} count = Keyword.count(condition) cursor = Keyword.find(condition, skip=start, limit=PAGE_COUNT, sort=[('baiduIndex', pymongo.DESCENDING)]) keywords = [] for c in cursor: set_index(c) keywords.append(c) pagination = Pagination(p, PAGE_COUNT, count) return render_template('seo/index.html', keywords=keywords, pagination=pagination)
def fetch_keywords(): """ 获取站点级别的关键字并插入数据库中. """ ss.headers['Referer'] = ym r = ss.get(ym + '/girls/all/') r.encoding = 'gbk' tree = html.fromstring(r.text) links = tree.xpath('//div[@class="listap"]/a') print 'Found %s keywords' % len(links) ret = [] for link in links: msg = 'Processing keyword %s' % link.get('title') text = link.get('title').strip() keyword = Keyword.find_one({'name': text}) name = unicode(text) if keyword: msg += ', skipped as existing' else: keyword = Keyword() keyword.name = name keyword.level = KeywordLevel.SITE keyword.refer = u'%s%s' % (ym, link.get('href')) keyword.save() print msg ret.append(name) return ret
def test_base(session, db): request = Request(id=13, url='http://www.google.com', status='done') keyword = Keyword(word='carros', requests=[request]) label = Label(name='veículos', restrict=True, keywords=[keyword]) db.create_all() db.session.add(label) db.session.commit() #test create assert session.query(Request).count() == 1 assert session.query(Keyword).count() == 1 assert session.query(Label).count() == 1 #test read assert Request.query.filter_by( id=13).first().url == "http://www.google.com" assert Keyword.query.filter_by(word='carros').first().requests[0].id == 13 assert Label.query.filter_by(name='veículos').first().restrict == True #test update request.url = "http://www.twitch.tv" db.session.commit() assert Request.query.filter_by(id=13).first().url == "http://www.twitch.tv" label.restrict = False assert not Label.query.filter_by(name='veículos').first().restrict #test delete db.session.delete(label) db.session.commit() assert Label.query.filter_by(name='veículos').first() == None assert Request.query.filter_by(id=13).first().url == "http://www.twitch.tv" assert Keyword.query.filter_by(word='carros').first().requests[0].id == 13
def hearsay(keyword_id): """ 编辑关键字对应的文章. """ keyword = Keyword.find_one({'_id': keyword_id}) if not keyword: abort(404) # Open page if request.method == 'GET': return render_template('seo/hearsay.html', keyword=keyword) # Handle post request else: current_app.logger.info('Try to save hearsay for keyword %s/%s' % (keyword._id, keyword.name)) title = request.form.get('title', '') body = request.form.get('body', '') if not title: return jsonify(success=False, message='文章标题不能为空!') if not body: return jsonify(success=False, message='文章内容不能为空!') is_new = True if not keyword.hearsay else False keyword.hearsay.title = title keyword.hearsay.body = body keyword.updateTime = datetime.now() if is_new: keyword.status = KeywordStatus.PROCESSED keyword.save() if not current_app.debug and is_new: notify_baidu(current_app._get_current_object(), keyword._id) return jsonify(success=True, message='成功保存了你的文章。')
def upfile(): file = request.files['video'] elderid = request.values['elderid'] familyid = bind_list.get(elderid) print(file.filename) if file and allowed_file(file.filename): filename = secure_filename(file.filename) filepath = current_app.config['VIDEO_FOLDER'] + '/' + filename.rsplit( '.', 1)[0] if os.path.exists(filepath): delete_file_folder(filepath) os.makedirs(filepath + '/video') file.save(os.path.join(filepath + '/video', filename)) p = subprocess.Popen(current_app.config['BASH_TEST'] + filename.rsplit('.', 1)[0], shell=True) while not p.poll(): gevent.sleep(1) if os.path.isfile(filepath + '/kwsdir/result.xml'): msg[familyid].put('dangercalling') return 'find' with open(filepath + "/decode_dnn/log/decode.1.log") as f: s = f.read() r = re.sub( "[A-Za-z0-9\[\`\~\!\@\#\$\^\&\*\(\)\=\|\{\}\'\:\;\'\,\[\]\.\<\>\/\?\~\@\#\&\*\%\-\_\"\+]", "", s) r = r.strip() r = r.lstrip() klist = Keyword.high_rate_words() for k in r.split(" "): if k in klist: msg[familyid].put('dangercalling') return 'find' return 'notfind'
def test_keywords_database(self): ''' test additions to keywords database ''' u1 = User(username='******', email='*****@*****.**') db.session.add(u1) db.session.commit() current_user_id = u1.id keywords1 = Keyword(body = 'This is a test', timestamp = '20200417', id_user = current_user_id, user_id = current_user_id) db.session.add(keywords1) db.session.commit() ''' assert that each user has keywords assigned to them, and assert each keyword is included''' assert keywords1.id_user = current_user_id list_keywords = keywords1.body.split(' ') for item in list_keywords: assert item in keywords1.body db.session.delete(u1) db.session.commit() db.session.delete(keywords1) db.session.commit()
def setUp(self): ''' Set up method that will run before every Test ''' self.new_keyword = Keyword( 'Stan Schroeder', 'Your trusted source for breaking news, analysis, exclusive interviews, headlines, and videos at ABCNews.com.', 'https://abcnews.go.com', '2020-05-16T00:34:00Z', 'https://mashable.com/article/bitcoin-halving-2020/')
def refresh(keyword_id): """ 刷新一个指定关键字的长尾关键字. """ keyword = Keyword.find_one({'_id': keyword_id}) if not keyword: abort(404) analyze_keyword(current_app._get_current_object(), keyword) return jsonify(success=True, message='成功触发了刷新请求,请稍候查看最新数据。')
def analyze_keyword(app, keyword): """ 分析站点级别的关键字, 获取其百度指数以及其相关的长尾关键字. 目前是从5118抓取. """ app.logger.info('Try to analyze keyword %s/%s' % (keyword._id, keyword.name)) ss.headers['Referer'] = 'http://www.5118.com/' t = ss.get('http://www.5118.com/seo/words/%s' % url_quote(keyword.name)).text tree = html.fromstring(t) dls = tree.xpath('//div[@class="Fn-ui-list dig-list"]/dl') total = len(dls) for dl in dls: if dl.get('class', '') == 'dl-word': continue name = unicode(dl.xpath('./dd[1]//a[1]/@title')[0].strip()) baidu_index = dl.xpath('./dd[2]/text()')[0].strip() baidu_result = dl.xpath('./dd[3]/text()')[0].strip() if not baidu_index.isdigit(): baidu_index = 0 if not baidu_result.isdigit(): baidu_result = 0 app.logger.info('Found keyword: %s/%s/%s' % (name, baidu_index, baidu_result)) if name == keyword.name: keyword.baiduIndex = int(baidu_index) keyword.baiduResult = int(baidu_result) if total > 2: keyword.total = total - 2 keyword.save() else: long_tail = Keyword.find_one({'name': name}) if not long_tail: long_tail = Keyword() long_tail.name = name long_tail.level = KeywordLevel.LONG_TAIL long_tail.parentId = keyword._id long_tail.baiduIndex = int(baidu_index) long_tail.baiduResult = int(baidu_result) long_tail.save()
def create_keyword(title): title = utils.clean_data(title) print(title) print('create_keyword') keyword = False query = db.session.query(Keyword).filter(Keyword.title == title) if query.count() == 0: keyword = Keyword(title) res = db.session.add(keyword) db.session.commit() else: keyword = query.first() return keyword
def create_keyword(): try: # TODO: verify category belongs to passed user ID data = request.json keyword = Keyword(keyword=data['keyword'], is_excluded=data["is_excluded"], category_id=data['category_id']) db.session.add(keyword) db.session.commit() return APIResponseBuilder.success({"keyword": keyword}) except SQLAlchemyError as e: return APIResponseBuilder.error(f"Issue running query: {e}") except Exception as e: return APIResponseBuilder.error(f"Error encountered: {e}")
def longtail(keyword_id): """ 获取指定站点关键字下的长尾关键字. """ keyword = Keyword.find_one({'_id': keyword_id}) if not keyword: abort(404) s = request.args.get('status', u'bare,processed,repeated') p = int(request.args.get('page', '1')) start = (p - 1) * PAGE_COUNT condition = {'level': KeywordLevel.LONG_TAIL, 'parentId': keyword_id} status = s.split(u',') if status: condition['status'] = {'$in': status} count = Keyword.count(condition) cursor = Keyword.find(condition, skip=start, limit=PAGE_COUNT, sort=[('baiduIndex', pymongo.DESCENDING)]) keywords = [] for c in cursor: keywords.append(c) pagination = Pagination(p, PAGE_COUNT, count) return render_template('seo/longtail.html', keyword=keyword, keywords=keywords, pagination=pagination)
def update_or_create_kws(words, req, db): """ Creates or updates the keywords in the Keyword table. # Input: - words (list): a list of strings to be inserted or modified in the Keyword table. - req (Request): a Request to be associated with the words. - db (database): The app`s database. """ print("updating keywords: ", words) if words is None: return for word in words: kw = Keyword.query.filter_by(word=word).first() if kw is None: kw = Keyword(word=word, requests=[req]) db.session.add(kw) else: kw.requests.append(req) db.session.commit()
def keywords(): print("keyword begin") form = KeywordForm() key_words_chosen = False if form.validate_on_submit(): #print(f'keywords data in form is: {form.keywords.data}') Keyword.query.filter_by(id_user=current_user.id).delete() keywords = Keyword(body=form.keywords.data, id_user=current_user.id) db.session.add(keywords) db.session.commit() flash('You have chosen your keywords!') # return redirect(link) #print("keyword end") key_words_chosen = True redirect(url_for('recommend')) print("returned to keywords") return render_template("keywords.html", form=form, keywords_chosen=key_words_chosen)
def import_data_for_user(): data = request.json try: for category in data["categories"]: # Create new category cat = Category(user_id=data["uuid"], category_name=category["name"]) db.session.add(cat) db.session.commit() # add keywords for keyword in category["keywords"]: k = Keyword(keyword=keyword["keyword"], is_excluded=keyword["is_excluded"], category_id=cat.id) db.session.add(k) db.session.commit() return APIResponseBuilder.success({"success": True}) except SQLAlchemyError as e: return APIResponseBuilder.error(f"Issue running query: {e}") except Exception as e: return APIResponseBuilder.error(f"Error encountered: {e}")
def popdb(): #adding tuples to db k1 = Keyword(name='green') k2 = Keyword(name='yellow') k3 = Keyword(name='red') k4 = Keyword(name='white') k5 = Keyword(name='tan') k6 = Keyword(name='orange') k7 = Keyword(name='brown') k8 = Keyword(name='black') k9 = Keyword(name='vegetable') k10 = Keyword(name='starch') k11 = Keyword(name='squash') k12 = Keyword(name='gourd') k13 = Keyword(name='legume') k14 = Keyword(name='maize') k15 = Keyword(name='spicy') k16 = Keyword(name='sweet') k17 = Keyword(name='tart') k18 = Keyword(name='leafy') k19 = Keyword(name='grain') k20 = Keyword(name='cereal') k21 = Keyword(name='cabbage') db.session.add_all([ k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11, k12, k13, k14, k15, k16, k17, k18, k19, k20, k21 ]) db.session.commit() s1 = Supplier(name='Griggs Farm', address='599 Boston Rd', zipcode=1821, city='Billerica', state='MA') s2 = Supplier(name='Krochmal Farms', address='31 Jennie\'s Way', zipcode=1876, city='Tewksbury', state='MA') s3 = Supplier(name='Great Brook Dairy Farm', address='247 North Rd', zipcode=1741, city='Carlisle', state='MA') s4 = Supplier(name='Farmer Daves', address='437 Parker Rd', zipcode=1826, city='Dracut', state='MA') s5 = Supplier(name='Jones Farm', address='246 Acton Rd', zipcode=1824, city='Chelmsford', state='MA') s6 = Supplier(name='Swenson Farms', address='50 Mill Rd', zipcode=1862, city='Chelmsford', state='MA') s7 = Supplier(name='Drew Farm', address='31 Tadmuck Rd', zipcode=1886, city='Westford', state='MA') s8 = Supplier(name='Clark Farm', address='185 Concord St', zipcode=1741, city='Carlisle', state='MA') s9 = Supplier(name='Parlee Farm', address='135 Pine Hill Rd', zipcode=1824, city='Chelmsford', state='MA') s10 = Supplier(name='Wright-Locke Farm', address='78 Ridge St', zipcode=1890, city='Winchester', state='MA') s11 = Supplier(name='Indian Creek Farm', address='1408 Trumansburg Rd', zipcode=14850, city='Ithaca', state='NY') s12 = Supplier(name='Stick and Stone Farm', address='1605 Trumansburg Rd', zipcode=14850, city='Ithaca', state='NY') s13 = Supplier(name='RoseBarb Farms', address='108 Landon Rd', zipcode=14850, city='Ithaca', state='NY') s14 = Supplier(name='Three Swallows Farm', address='23 Nelson Rd', zipcode=14850, city='Ithaca', state='NY') s15 = Supplier(name='HoneyRock Farm', address='271 Burns Rd', zipcode=14850, city='Ithaca', state='NY') s16 = Supplier(name='Kingdom Farms', address='317 Auburn Rd', zipcode=14882, city='Lansing', state='NY') s17 = Supplier(name='Straw Pocket Farm', address='1388 Ridge Rd', zipcode=14882, city='Lansing', state='NY') s18 = Supplier(name='Dygert Farms', address='260 Central Chapel Rd', zipcode=14817, city='Brooktondale', state='NY') s19 = Supplier(name='TC3 Farm', address='100 Cortland Rd', zipcode=13053, city='Dryden', state='NY') db.session.add_all([ s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19 ]) db.session.commit() p1 = Produce( name='Brocolli', imageRef='https://cdn.mos.cms.futurecdn.net/r8NK24bmcMgSib5zWKKQkW.jpg' ) p2 = Produce( name='Spinach', imageRef= 'https://i.ndtvimg.com/i/2016-11/spinach_620x350_81477995047.jpg') p3 = Produce( name='Kale', imageRef= 'https://post.healthline.com/wp-content/uploads/2020/09/benefits-of-kale-1200x628-facebook-1200x628.jpg' ) p4 = Produce( name='Pumpkin', imageRef= 'https://post.medicalnewstoday.com/wp-content/uploads/sites/3/2020/02/279610_2200-732x549.jpg' ) p5 = Produce( name='Straightneck Squash', imageRef= 'https://d1nw62gticy6e9.cloudfront.net/uploads/Early-Prolific-Straightneck-Squash-Seeds.jpg' ) p6 = Produce( name='Zucchini', imageRef= 'https://www.jessicagavin.com/wp-content/uploads/2018/05/zucchini-2-1200.jpg' ) p7 = Produce( name='Green Beans', imageRef= 'https://images.food52.com/mrPh1x9qA6lTYKO27QJEfDjZ4Y8=/2016x1344/filters:format(webp)/ff7b7650-cacd-42b4-947c-e2e8ba90fa2a--greenbeans.jpg' ) p8 = Produce( name='Lentils', imageRef= 'https://cdn.loveandlemons.com/wp-content/uploads/2019/12/how-to-cook-lentils.jpg' ) p9 = Produce( name='Peas', imageRef= 'https://www.almanac.com/sites/default/files/image_nodes/peas-and-pea-pods.jpg' ) p10 = Produce( name='Potatoes', imageRef= 'https://cdn.cheapism.com/images/081516_national_potato_day_recipes_slide_0_f.max-800x600.jpg' ) p11 = Produce( name='Corn', imageRef= 'https://www.simplyhappyfoodie.com/wp-content/uploads/2018/04/instant-pot-corn-on-the-cob-1-500x500.jpg' ) p12 = Produce( name='Soybean', imageRef= 'https://www.johnnyseeds.com/dw/image/v2/BBBW_PRD/on/demandware.static/-/Sites-jss-master/default/dw3c4875f3/images/products/vegetables/02553_01_tohya.jpg?sw=1120' ) p13 = Produce( name='Oats', imageRef= 'https://post.healthline.com/wp-content/uploads/2020/03/oats-oatmeal-732x549-thumbnail.jpg' ) p14 = Produce( name='Barley', imageRef= 'https://cdn-prod.medicalnewstoday.com/content/images/articles/295/295268/barley-grains-in-a-wooden-bowl.jpg' ) p15 = Produce( name='Flour', imageRef= 'https://www.world-grain.com/ext/resources/Article-Images/2020/05/WholeWheatFlour_Photo-adobe-stock_E.jpg?t=1590171823&width=1080' ) p16 = Produce( name='Turnip', imageRef= 'https://upload.wikimedia.org/wikipedia/commons/d/d3/Turnip_2622027.jpg' ) p17 = Produce( name='Lettuce', imageRef= 'https://i0.wp.com/post.healthline.com/wp-content/uploads/2020/03/romaine-lettuce-1296x728-body.jpg?w=1155&h=1528' ) p18 = Produce( name='Green Peppers', imageRef= 'https://edge.bonnieplants.com/www/tiny/uploads/20200810205434/bonnie-s-green-bell-pepper.jpg' ) p19 = Produce(name='Chili Peppers', imageRef='https://scitechdaily.com/images/Chili-Peppers.jpg') p20 = Produce( name='Cucumber', imageRef= 'https://www.shethepeople.tv/wp-content/uploads/2019/05/cucumber-e1558166231577.jpg' ) db.session.add_all([ p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20 ]) db.session.commit() pk1 = Producetokeyword(kproduct=Produce.query.get(1), tag=Keyword.query.get(1)) pk2 = Producetokeyword(kproduct=Produce.query.get(1), tag=Keyword.query.get(9)) pk3 = Producetokeyword(kproduct=Produce.query.get(1), tag=Keyword.query.get(21)) pk4 = Producetokeyword(kproduct=Produce.query.get(2), tag=Keyword.query.get(1)) pk5 = Producetokeyword(kproduct=Produce.query.get(2), tag=Keyword.query.get(18)) pk6 = Producetokeyword(kproduct=Produce.query.get(2), tag=Keyword.query.get(9)) pk7 = Producetokeyword(kproduct=Produce.query.get(3), tag=Keyword.query.get(1)) pk8 = Producetokeyword(kproduct=Produce.query.get(3), tag=Keyword.query.get(18)) pk9 = Producetokeyword(kproduct=Produce.query.get(3), tag=Keyword.query.get(9)) pk10 = Producetokeyword(kproduct=Produce.query.get(4), tag=Keyword.query.get(6)) pk11 = Producetokeyword(kproduct=Produce.query.get(4), tag=Keyword.query.get(11)) pk12 = Producetokeyword(kproduct=Produce.query.get(4), tag=Keyword.query.get(12)) pk13 = Producetokeyword(kproduct=Produce.query.get(4), tag=Keyword.query.get(16)) pk14 = Producetokeyword(kproduct=Produce.query.get(5), tag=Keyword.query.get(2)) pk15 = Producetokeyword(kproduct=Produce.query.get(5), tag=Keyword.query.get(11)) pk16 = Producetokeyword(kproduct=Produce.query.get(5), tag=Keyword.query.get(12)) pk17 = Producetokeyword(kproduct=Produce.query.get(5), tag=Keyword.query.get(16)) pk18 = Producetokeyword(kproduct=Produce.query.get(6), tag=Keyword.query.get(1)) pk19 = Producetokeyword(kproduct=Produce.query.get(6), tag=Keyword.query.get(11)) pk20 = Producetokeyword(kproduct=Produce.query.get(6), tag=Keyword.query.get(12)) pk21 = Producetokeyword(kproduct=Produce.query.get(6), tag=Keyword.query.get(16)) pk22 = Producetokeyword(kproduct=Produce.query.get(7), tag=Keyword.query.get(1)) pk23 = Producetokeyword(kproduct=Produce.query.get(7), tag=Keyword.query.get(13)) pk24 = Producetokeyword(kproduct=Produce.query.get(7), tag=Keyword.query.get(9)) pk25 = Producetokeyword(kproduct=Produce.query.get(8), tag=Keyword.query.get(1)) pk26 = Producetokeyword(kproduct=Produce.query.get(8), tag=Keyword.query.get(2)) pk27 = Producetokeyword(kproduct=Produce.query.get(8), tag=Keyword.query.get(3)) pk28 = Producetokeyword(kproduct=Produce.query.get(8), tag=Keyword.query.get(4)) pk29 = Producetokeyword(kproduct=Produce.query.get(8), tag=Keyword.query.get(6)) pk30 = Producetokeyword(kproduct=Produce.query.get(8), tag=Keyword.query.get(7)) pk31 = Producetokeyword(kproduct=Produce.query.get(8), tag=Keyword.query.get(8)) pk32 = Producetokeyword(kproduct=Produce.query.get(8), tag=Keyword.query.get(13)) pk33 = Producetokeyword(kproduct=Produce.query.get(8), tag=Keyword.query.get(9)) pk34 = Producetokeyword(kproduct=Produce.query.get(9), tag=Keyword.query.get(1)) pk35 = Producetokeyword(kproduct=Produce.query.get(9), tag=Keyword.query.get(9)) pk36 = Producetokeyword(kproduct=Produce.query.get(9), tag=Keyword.query.get(13)) pk37 = Producetokeyword(kproduct=Produce.query.get(9), tag=Keyword.query.get(16)) pk38 = Producetokeyword(kproduct=Produce.query.get(10), tag=Keyword.query.get(4)) pk39 = Producetokeyword(kproduct=Produce.query.get(10), tag=Keyword.query.get(3)) pk40 = Producetokeyword(kproduct=Produce.query.get(10), tag=Keyword.query.get(2)) pk41 = Producetokeyword(kproduct=Produce.query.get(10), tag=Keyword.query.get(7)) pk42 = Producetokeyword(kproduct=Produce.query.get(10), tag=Keyword.query.get(10)) pk43 = Producetokeyword(kproduct=Produce.query.get(10), tag=Keyword.query.get(16)) pk44 = Producetokeyword(kproduct=Produce.query.get(11), tag=Keyword.query.get(2)) pk45 = Producetokeyword(kproduct=Produce.query.get(11), tag=Keyword.query.get(16)) pk46 = Producetokeyword(kproduct=Produce.query.get(11), tag=Keyword.query.get(20)) pk47 = Producetokeyword(kproduct=Produce.query.get(11), tag=Keyword.query.get(14)) pk48 = Producetokeyword(kproduct=Produce.query.get(11), tag=Keyword.query.get(19)) pk49 = Producetokeyword(kproduct=Produce.query.get(11), tag=Keyword.query.get(9)) pk50 = Producetokeyword(kproduct=Produce.query.get(12), tag=Keyword.query.get(9)) pk51 = Producetokeyword(kproduct=Produce.query.get(12), tag=Keyword.query.get(13)) pk52 = Producetokeyword(kproduct=Produce.query.get(12), tag=Keyword.query.get(1)) pk53 = Producetokeyword(kproduct=Produce.query.get(13), tag=Keyword.query.get(19)) pk54 = Producetokeyword(kproduct=Produce.query.get(13), tag=Keyword.query.get(20)) pk55 = Producetokeyword(kproduct=Produce.query.get(13), tag=Keyword.query.get(10)) pk56 = Producetokeyword(kproduct=Produce.query.get(13), tag=Keyword.query.get(7)) pk57 = Producetokeyword(kproduct=Produce.query.get(14), tag=Keyword.query.get(19)) pk58 = Producetokeyword(kproduct=Produce.query.get(14), tag=Keyword.query.get(20)) pk59 = Producetokeyword(kproduct=Produce.query.get(14), tag=Keyword.query.get(10)) pk60 = Producetokeyword(kproduct=Produce.query.get(14), tag=Keyword.query.get(7)) pk61 = Producetokeyword(kproduct=Produce.query.get(15), tag=Keyword.query.get(19)) pk62 = Producetokeyword(kproduct=Produce.query.get(15), tag=Keyword.query.get(20)) pk63 = Producetokeyword(kproduct=Produce.query.get(15), tag=Keyword.query.get(10)) pk64 = Producetokeyword(kproduct=Produce.query.get(15), tag=Keyword.query.get(4)) pk65 = Producetokeyword(kproduct=Produce.query.get(16), tag=Keyword.query.get(17)) pk66 = Producetokeyword(kproduct=Produce.query.get(16), tag=Keyword.query.get(3)) pk67 = Producetokeyword(kproduct=Produce.query.get(16), tag=Keyword.query.get(4)) pk68 = Producetokeyword(kproduct=Produce.query.get(16), tag=Keyword.query.get(9)) pk69 = Producetokeyword(kproduct=Produce.query.get(16), tag=Keyword.query.get(16)) pk70 = Producetokeyword(kproduct=Produce.query.get(17), tag=Keyword.query.get(1)) pk71 = Producetokeyword(kproduct=Produce.query.get(17), tag=Keyword.query.get(9)) pk72 = Producetokeyword(kproduct=Produce.query.get(17), tag=Keyword.query.get(18)) pk73 = Producetokeyword(kproduct=Produce.query.get(18), tag=Keyword.query.get(1)) pk74 = Producetokeyword(kproduct=Produce.query.get(18), tag=Keyword.query.get(16)) pk75 = Producetokeyword(kproduct=Produce.query.get(18), tag=Keyword.query.get(9)) pk76 = Producetokeyword(kproduct=Produce.query.get(19), tag=Keyword.query.get(15)) pk77 = Producetokeyword(kproduct=Produce.query.get(19), tag=Keyword.query.get(16)) pk78 = Producetokeyword(kproduct=Produce.query.get(19), tag=Keyword.query.get(3)) pk79 = Producetokeyword(kproduct=Produce.query.get(19), tag=Keyword.query.get(9)) pk80 = Producetokeyword(kproduct=Produce.query.get(20), tag=Keyword.query.get(1)) pk81 = Producetokeyword(kproduct=Produce.query.get(20), tag=Keyword.query.get(16)) pk82 = Producetokeyword(kproduct=Produce.query.get(20), tag=Keyword.query.get(9)) pk83 = Producetokeyword(kproduct=Produce.query.get(20), tag=Keyword.query.get(12)) pk84 = Producetokeyword(kproduct=Produce.query.get(20), tag=Keyword.query.get(11)) db.session.add_all([ pk1, pk2, pk3, pk4, pk5, pk6, pk7, pk8, pk9, pk10, pk11, pk12, pk13, pk14, pk15, pk16, pk17, pk18, pk19, pk20, pk21, pk22, pk23, pk24, pk25, pk26, pk27, pk28, pk29, pk30, pk31, pk32, pk33, pk34, pk35, pk36, pk37, pk38, pk39, pk40, pk41, pk42, pk43, pk44, pk45, pk46, pk47, pk48, pk49, pk50, pk51, pk52, pk53, pk54, pk55, pk56, pk57, pk58, pk59, pk60, pk61, pk62, pk63, pk64, pk65, pk66, pk67, pk68, pk69, pk70, pk71, pk72, pk73, pk74, pk75, pk76, pk77, pk78, pk79, pk80, pk81, pk82, pk83, pk84 ]) db.session.commit() l1 = Listing(price=2.56, quantity=551, lproduct=Produce.query.get(2), owner=Supplier.query.get(16)) l2 = Listing(price=3.27, quantity=1059, lproduct=Produce.query.get(1), owner=Supplier.query.get(4)) l3 = Listing(price=0.59, quantity=710, lproduct=Produce.query.get(2), owner=Supplier.query.get(15)) l4 = Listing(price=2.59, quantity=535, lproduct=Produce.query.get(9), owner=Supplier.query.get(10)) l5 = Listing(price=2.04, quantity=682, lproduct=Produce.query.get(6), owner=Supplier.query.get(3)) l6 = Listing(price=3.3, quantity=1254, lproduct=Produce.query.get(6), owner=Supplier.query.get(7)) l7 = Listing(price=3.19, quantity=612, lproduct=Produce.query.get(10), owner=Supplier.query.get(5)) l8 = Listing(price=1.22, quantity=748, lproduct=Produce.query.get(14), owner=Supplier.query.get(4)) l9 = Listing(price=1.83, quantity=1236, lproduct=Produce.query.get(15), owner=Supplier.query.get(10)) l11 = Listing(price=2.38, quantity=460, lproduct=Produce.query.get(6), owner=Supplier.query.get(15)) l12 = Listing(price=3.02, quantity=588, lproduct=Produce.query.get(16), owner=Supplier.query.get(19)) l13 = Listing(price=2.29, quantity=231, lproduct=Produce.query.get(10), owner=Supplier.query.get(14)) l14 = Listing(price=1.84, quantity=717, lproduct=Produce.query.get(17), owner=Supplier.query.get(1)) l16 = Listing(price=0.75, quantity=709, lproduct=Produce.query.get(14), owner=Supplier.query.get(18)) l17 = Listing(price=0.51, quantity=826, lproduct=Produce.query.get(16), owner=Supplier.query.get(4)) l18 = Listing(price=1.32, quantity=623, lproduct=Produce.query.get(13), owner=Supplier.query.get(17)) l20 = Listing(price=1.19, quantity=996, lproduct=Produce.query.get(2), owner=Supplier.query.get(7)) l21 = Listing(price=1.73, quantity=931, lproduct=Produce.query.get(14), owner=Supplier.query.get(5)) l22 = Listing(price=0.81, quantity=166, lproduct=Produce.query.get(8), owner=Supplier.query.get(8)) l23 = Listing(price=2.68, quantity=204, lproduct=Produce.query.get(3), owner=Supplier.query.get(12)) l24 = Listing(price=3.02, quantity=615, lproduct=Produce.query.get(9), owner=Supplier.query.get(9)) l25 = Listing(price=1.73, quantity=832, lproduct=Produce.query.get(4), owner=Supplier.query.get(13)) l26 = Listing(price=1.53, quantity=181, lproduct=Produce.query.get(6), owner=Supplier.query.get(1)) l27 = Listing(price=0.79, quantity=769, lproduct=Produce.query.get(15), owner=Supplier.query.get(13)) l29 = Listing(price=3.28, quantity=962, lproduct=Produce.query.get(16), owner=Supplier.query.get(11)) l30 = Listing(price=1.1, quantity=1198, lproduct=Produce.query.get(20), owner=Supplier.query.get(6)) l31 = Listing(price=2.44, quantity=259, lproduct=Produce.query.get(10), owner=Supplier.query.get(15)) l32 = Listing(price=3.04, quantity=180, lproduct=Produce.query.get(16), owner=Supplier.query.get(5)) l33 = Listing(price=1.8, quantity=330, lproduct=Produce.query.get(1), owner=Supplier.query.get(17)) l34 = Listing(price=1.94, quantity=353, lproduct=Produce.query.get(1), owner=Supplier.query.get(18)) l35 = Listing(price=3.22, quantity=890, lproduct=Produce.query.get(11), owner=Supplier.query.get(6)) l38 = Listing(price=0.87, quantity=1057, lproduct=Produce.query.get(15), owner=Supplier.query.get(15)) l39 = Listing(price=1.64, quantity=956, lproduct=Produce.query.get(6), owner=Supplier.query.get(5)) l40 = Listing(price=3.28, quantity=670, lproduct=Produce.query.get(17), owner=Supplier.query.get(6)) l41 = Listing(price=1.92, quantity=567, lproduct=Produce.query.get(12), owner=Supplier.query.get(1)) l42 = Listing(price=0.68, quantity=1145, lproduct=Produce.query.get(19), owner=Supplier.query.get(11)) l43 = Listing(price=2.71, quantity=885, lproduct=Produce.query.get(18), owner=Supplier.query.get(9)) l44 = Listing(price=0.69, quantity=190, lproduct=Produce.query.get(15), owner=Supplier.query.get(17)) l45 = Listing(price=2, quantity=1109, lproduct=Produce.query.get(5), owner=Supplier.query.get(2)) l46 = Listing(price=1.63, quantity=354, lproduct=Produce.query.get(7), owner=Supplier.query.get(13)) l48 = Listing(price=1.67, quantity=763, lproduct=Produce.query.get(20), owner=Supplier.query.get(5)) l49 = Listing(price=2.21, quantity=716, lproduct=Produce.query.get(4), owner=Supplier.query.get(19)) l50 = Listing(price=2.47, quantity=383, lproduct=Produce.query.get(13), owner=Supplier.query.get(8)) l51 = Listing(price=3.11, quantity=1229, lproduct=Produce.query.get(17), owner=Supplier.query.get(9)) l52 = Listing(price=2.79, quantity=911, lproduct=Produce.query.get(16), owner=Supplier.query.get(16)) l53 = Listing(price=2.24, quantity=635, lproduct=Produce.query.get(3), owner=Supplier.query.get(18)) l54 = Listing(price=2.08, quantity=999, lproduct=Produce.query.get(7), owner=Supplier.query.get(10)) l55 = Listing(price=2.87, quantity=896, lproduct=Produce.query.get(8), owner=Supplier.query.get(16)) l56 = Listing(price=0.67, quantity=251, lproduct=Produce.query.get(16), owner=Supplier.query.get(7)) l57 = Listing(price=2.41, quantity=508, lproduct=Produce.query.get(12), owner=Supplier.query.get(19)) l58 = Listing(price=3.1, quantity=165, lproduct=Produce.query.get(14), owner=Supplier.query.get(11)) l59 = Listing(price=3.2, quantity=1068, lproduct=Produce.query.get(19), owner=Supplier.query.get(19)) l60 = Listing(price=2.98, quantity=846, lproduct=Produce.query.get(19), owner=Supplier.query.get(12)) l61 = Listing(price=1.91, quantity=481, lproduct=Produce.query.get(1), owner=Supplier.query.get(10)) l62 = Listing(price=2.37, quantity=1165, lproduct=Produce.query.get(18), owner=Supplier.query.get(12)) l63 = Listing(price=1.18, quantity=634, lproduct=Produce.query.get(1), owner=Supplier.query.get(16)) l65 = Listing(price=1.46, quantity=1143, lproduct=Produce.query.get(6), owner=Supplier.query.get(13)) l66 = Listing(price=1.38, quantity=491, lproduct=Produce.query.get(20), owner=Supplier.query.get(11)) l67 = Listing(price=0.69, quantity=331, lproduct=Produce.query.get(14), owner=Supplier.query.get(9)) l68 = Listing(price=3.46, quantity=809, lproduct=Produce.query.get(11), owner=Supplier.query.get(14)) l70 = Listing(price=3.2, quantity=1083, lproduct=Produce.query.get(5), owner=Supplier.query.get(3)) l71 = Listing(price=1.21, quantity=318, lproduct=Produce.query.get(14), owner=Supplier.query.get(1)) l72 = Listing(price=2.29, quantity=544, lproduct=Produce.query.get(6), owner=Supplier.query.get(9)) l73 = Listing(price=2.11, quantity=207, lproduct=Produce.query.get(6), owner=Supplier.query.get(17)) l74 = Listing(price=0.64, quantity=882, lproduct=Produce.query.get(9), owner=Supplier.query.get(15)) l75 = Listing(price=2.59, quantity=185, lproduct=Produce.query.get(18), owner=Supplier.query.get(16)) l76 = Listing(price=1.57, quantity=1143, lproduct=Produce.query.get(18), owner=Supplier.query.get(10)) l78 = Listing(price=1.7, quantity=934, lproduct=Produce.query.get(6), owner=Supplier.query.get(10)) l79 = Listing(price=0.65, quantity=501, lproduct=Produce.query.get(11), owner=Supplier.query.get(19)) l80 = Listing(price=1.84, quantity=741, lproduct=Produce.query.get(3), owner=Supplier.query.get(10)) l83 = Listing(price=3.31, quantity=784, lproduct=Produce.query.get(9), owner=Supplier.query.get(14)) l84 = Listing(price=0.57, quantity=106, lproduct=Produce.query.get(17), owner=Supplier.query.get(8)) l85 = Listing(price=2.99, quantity=1225, lproduct=Produce.query.get(19), owner=Supplier.query.get(7)) l86 = Listing(price=1.13, quantity=865, lproduct=Produce.query.get(6), owner=Supplier.query.get(19)) l87 = Listing(price=1.91, quantity=560, lproduct=Produce.query.get(3), owner=Supplier.query.get(19)) l88 = Listing(price=3.45, quantity=883, lproduct=Produce.query.get(17), owner=Supplier.query.get(16)) l89 = Listing(price=2.34, quantity=326, lproduct=Produce.query.get(15), owner=Supplier.query.get(19)) l90 = Listing(price=2.45, quantity=110, lproduct=Produce.query.get(19), owner=Supplier.query.get(5)) l91 = Listing(price=1.49, quantity=230, lproduct=Produce.query.get(19), owner=Supplier.query.get(6)) l92 = Listing(price=2.61, quantity=889, lproduct=Produce.query.get(19), owner=Supplier.query.get(2)) l93 = Listing(price=0.72, quantity=436, lproduct=Produce.query.get(10), owner=Supplier.query.get(7)) l94 = Listing(price=1.58, quantity=1231, lproduct=Produce.query.get(10), owner=Supplier.query.get(10)) l96 = Listing(price=1.19, quantity=582, lproduct=Produce.query.get(12), owner=Supplier.query.get(6)) l97 = Listing(price=3.4, quantity=972, lproduct=Produce.query.get(3), owner=Supplier.query.get(14)) l98 = Listing(price=1.95, quantity=1260, lproduct=Produce.query.get(13), owner=Supplier.query.get(15)) l100 = Listing(price=1.81, quantity=226, lproduct=Produce.query.get(7), owner=Supplier.query.get(12)) db.session.add_all([ l1, l2, l3, l4, l5, l6, l7, l8, l9, l11, l12, l13, l14, l16, l17, l18, l20, l21, l22, l23, l24, l25, l26, l27, l29, l30, l31, l32, l33, l34, l35, l38, l39, l40, l41, l42, l43, l44, l45, l46, l48, l49, l50, l51, l52, l53, l54, l55, l56, l57, l58, l59, l60, l61, l62, l63, l65, l66, l67, l68, l70, l71, l72, l73, l74, l75, l76, l78, l79, l80, l83, l84, l85, l86, l87, l88, l89, l90, l91, l92, l93, l94, l96, l97, l98, l100 ]) db.session.commit() return "Database has been populated."
def analyze_keyword(k): """ 分析站点级别的关键字, 获取其百度指数以及其相关的长尾关键字. """ keyword = Keyword.find_one({'name': k}) if not keyword: print 'Keyword %s does not exist' % k return if keyword.baiduIndex > 0 or keyword.baiduResult > 0: print 'Keyword %s is imported before' % k return print 'Try to analyze keyword %s/%s' % (keyword._id, k) ss.headers['Referer'] = 'http://www.5118.com/' t = ss.get('http://www.5118.com/seo/words/%s' % url_quote(k)).text tree = html.fromstring(t) dls = tree.xpath('//div[@class="Fn-ui-list dig-list"]/dl') total = len(dls) for dl in dls: if dl.get('class', '') == 'dl-word': continue name = unicode(dl.xpath('./dd[1]//a[1]/@title')[0].strip()) baidu_index = dl.xpath('./dd[2]/text()')[0].strip() baidu_result = dl.xpath('./dd[3]/text()')[0].strip() if not baidu_index.isdigit(): baidu_index = 0 if not baidu_result.isdigit(): baidu_result = 0 print 'Found keyword: %s/%s/%s' % (name, baidu_index, baidu_result) if name == k: keyword.baiduIndex = int(baidu_index) keyword.baiduResult = int(baidu_result) if total > 2: keyword.total = total - 2 keyword.save() else: if Keyword.count({'name': name}) > 0: print 'This keyword already exists' continue long_tail = Keyword() long_tail.name = name long_tail.level = KeywordLevel.LONG_TAIL long_tail.parentId = keyword._id long_tail.baiduIndex = int(baidu_index) long_tail.baiduResult = int(baidu_result) long_tail.save() time.sleep(random.randint(5, 15))
def recommend(): '''function is to create the recommender system for view 3''' user_id = current_user rec_sys_routes.news_panda() # show_news_db(date_list=[], category_list=[]) <----not finished just shows full db # num_users = 10 # create_random_user_db(num_users) keywords = Keyword().query.filter(Keyword.id_user == current_user.id) user_key_words = [] for item in keywords: user_key_words = item.body.split(' ') user_key_words = set(user_key_words) user_key_words = list(user_key_words) punctuation_elements = [',', '.', ':', ';', ' ', '', '/'] for element in punctuation_elements: if element in user_key_words: user_key_words.remove(element) # 1:["Corona","Brexit"], # 2:["Music","Golf","Environment"], # 3:["Space","Tesla","Bitcoin"], # 4:["Stocks","Formula","Liberal", "Democrats"]} #print(f'user key words are: {user_key_words}') # date = "20200430" date = "20200515" rec_sys_routes.create_key_word_user_db(user_key_words, date) recs_XL_list = [] # #loads a user, initialises attributes story history id's and creates TFIDF dict of history user = rec_sys_routes.User_recommend( user_key_words, date, w2v_model="news_w2v") #"GoogleNews","news_w2v" # recs_XL_list.append(user.show_recommendations()) recommendations = user.recommendations # print(f'recommendations are: {recommendations}') titles = recommendations['title'].tolist() summaries = recommendations['summary'].tolist() outlets = recommendations['Source'].tolist() links = recommendations['link'].tolist() scores = recommendations['NN_counts'].tolist() total_recommender = [] for i in range(len(titles)): if int(scores[i]) > 0: total_recommender.append( (titles[i], summaries[i], outlets[i], links[i], scores[i])) recommended_list = [] total_recommender = sorted(total_recommender, key=lambda x: x[4]) for item in total_recommender: new = News.query.filter(News.title == item[0]) #print(f'scores listed is: {item[4]}') link = list(new)[0].link outlet = list(new)[0].outlet time = list(new)[0].date title = list(new)[0].title summary = list(new)[0].summary language = list(new)[0].language if len(summary) > 300: summary = summary[0:300] + '...' ## to deal with cartoons ## elif 'cartoon' in title: title = title.replace('- cartoon', '') summary = f'Cartoon: {title}' outlet = list(new)[0].outlet pic_link = list(new)[0].pic_link if pic_link == None: pic_link = "https://static.bbc.co.uk/news/1.312.03569/img/brand/generated/news-light.png" if outlet == 'theguardian': outlet = 'The Guardian' summary = title if outlet == 'Ger': outlet = 'Der Spiegel' if outlet == 'Spa': outlet = 'El Pais' recommended_list.append( (link, title, summary, outlet, language, new, pic_link)) recommended_list.reverse() # print(f'total_recommender is: {total_recommender}') return render_template('recommend.html', recommended=recommended_list)
def test_classifier(self): default_req = [Request(id=0, url=' ', status='done')] default_kw_armas = [ Keyword(word='arma', requests=[default_req[0]]), Keyword(word='munição', requests=[default_req[0]]), Keyword(word='calibre', requests=[default_req[0]]), Keyword(word='revólver', requests=[default_req[0]]), Keyword(word='cano', requests=[default_req[0]]), Keyword(word='carabina', requests=[default_req[0]]), Keyword(word='espingarda', requests=[default_req[0]]) ] default_kw_cigarros = [ Keyword(word='cigarro', requests=[default_req[0]]), Keyword(word='vape', requests=[default_req[0]]), Keyword(word='narguile', requests=[default_req[0]]), Keyword(word='fumar', requests=[default_req[0]]), Keyword(word='tragar', requests=[default_req[0]]), Keyword(word='tabaco', requests=[default_req[0]]), Keyword(word='nicotina', requests=[default_req[0]]), Keyword(word='vaporizador', requests=[default_req[0]]), Keyword(word='ervas', requests=[default_req[0]]) ] default_kw_prost = [ Keyword(word='sexo', requests=[default_req[0]]), Keyword(word='prostituta', requests=[default_req[0]]), Keyword(word='fetiche', requests=[default_req[0]]), Keyword(word='cache', requests=[default_req[0]]), Keyword(word='acompanhante', requests=[default_req[0]]), Keyword(word='programa', requests=[default_req[0]]), Keyword(word='seios', requests=[default_req[0]]), Keyword(word='bunda', requests=[default_req[0]]), Keyword(word='travesti', requests=[default_req[0]]), Keyword(word='gostosa', requests=[default_req[0]]) ] default_kw_remedio = [ Keyword(word='remédio', requests=[default_req[0]]), Keyword(word='aborto', requests=[default_req[0]]), Keyword(word='comprimido', requests=[default_req[0]]), Keyword(word='secundários', requests=[default_req[0]]), Keyword(word='efeitos', requests=[default_req[0]]) ] default_kw_serv = [ Keyword(word='operadora', requests=[default_req[0]]), Keyword(word='cabo', requests=[default_req[0]]), Keyword(word='assinatura', requests=[default_req[0]]), Keyword(word='liberação', requests=[default_req[0]]), Keyword(word='sem', requests=[default_req[0]]), Keyword(word='aparelhos', requests=[default_req[0]]), Keyword(word='net', requests=[default_req[0]]), Keyword(word='vivo', requests=[default_req[0]]) ] labels = [ Label(name='Armas de fogo', restrict=True, keywords=default_kw_armas), Label(name='Cigarros', restrict=True, keywords=default_kw_cigarros), Label(name='Prostutuição', restrict=True, keywords=default_kw_prost), Label(name='Remédios', restrict=True, keywords=default_kw_remedio), Label(name='Serviços ilegais', restrict=True, keywords=default_kw_serv), Label(name='Site permitido', restrict=False, keywords=[]) ] kws = default_kw_armas + default_kw_cigarros + default_kw_prost + default_kw_remedio + default_kw_serv stat = [] for status in self.classifier.classify(url='https://www.uol.com.br/', kws=kws, labels=labels): stat += [status] self.assertEqual(stat[-1]['label'], "permitted")
""" Populates the system's database tables with pre-defined Keywords and Labels to be used in the classifer. """ try: db.session.query(Request).delete() db.session.query(Keyword).delete() db.session.query(Label).delete() db.session.commit() except: print("tried to clean tables, but they don't yet exist") default_req = [Request(id=0, url=' ', status='done')] default_kw_armas = [ Keyword(word='arma', requests=[default_req[0]]), Keyword(word='munição', requests=[default_req[0]]), Keyword(word='calibre', requests=[default_req[0]]), Keyword(word='revólver', requests=[default_req[0]]), Keyword(word='cano', requests=[default_req[0]]), Keyword(word='carabina', requests=[default_req[0]]), Keyword(word='espingarda', requests=[default_req[0]]) ] default_kw_cigarros = [ Keyword(word='cigarro', requests=[default_req[0]]), Keyword(word='vape', requests=[default_req[0]]), Keyword(word='narguile', requests=[default_req[0]]), Keyword(word='fumar', requests=[default_req[0]]), Keyword(word='tragar', requests=[default_req[0]]), Keyword(word='tabaco', requests=[default_req[0]]),
def seed(): users = User.query.all() if len(users) == 0: for i in range(1, 611): email = randomEmail() u = User(id=i, email=email) u.hash_password("123456") db.session.add(u) ratings = Rating.query.all() if len(ratings) == 0: rt = pd.read_csv("dataset/ratings.csv") for index, row in rt.iterrows(): time = datetime.datetime.fromtimestamp(1347517370) rating = int(round(row['rating'])) r = Rating(rating=rating, timestamp=time, user_id=row['userId'], movie_id=row['movieId']) db.session.add(r) movies = Movie.query.all() if len(movies) == 0: mv = pd.read_csv("dataset/movies.csv") links = pd.read_csv("dataset/links.csv") for index, row in mv.iterrows(): tmdb_id = links.loc[index, 'tmdbId'] id = row['movieId'] url = "https://api.themoviedb.org/3/movie/" + \ str(tmdb_id) + "?api_key=" + app.config['API_KEY'] credits_url = "https://api.themoviedb.org/3/movie/" + \ str(tmdb_id) + "/credits?api_key=" + app.config['API_KEY'] keywords_url = "https://api.themoviedb.org/3/movie/" + \ str(tmdb_id) + "/keywords?api_key=" + app.config['API_KEY'] release_url = "https://api.themoviedb.org/3/movie/" + \ str(tmdb_id) + "/release_dates?api_key=" + \ app.config['API_KEY'] video_url = "https://api.themoviedb.org/3/movie/" + \ str(tmdb_id) + "/videos?api_key=" + \ app.config['API_KEY'] result = requests.get(url) res = result.json() if result.status_code == 200 else None credits_res = requests.get(credits_url) credits = credits_res.json( ) if credits_res.status_code == 200 else None keywords_res = requests.get(keywords_url) keywords = keywords_res.json( )['keywords'] if keywords_res.status_code == 200 else [] releases_res = requests.get(release_url) releases = releases_res.json( ) if releases_res.status_code == 200 else None videos_res = requests.get(video_url) videos = videos_res.json( )['results'] if videos_res.status_code == 200 else [] poster_path = app.config['IMG_URL'] + \ str(res['poster_path'] ) if res is not None else app.config['IMG_DEFAULT'] backdrop_path = app.config['IMG_URL'] + \ str(res['backdrop_path'] ) if res is not None else app.config['BACKDROP_DEFAULT'] original_title = res['original_title'] if res is not None else row[ 'title'] vote_average = res['vote_average'] if res is not None else 0 vote_count = res['vote_count'] if res is not None else 0 runtime = res['runtime'] if res is not None else 0 genres = res['genres'] if res is not None else [] release_date = res['release_date'] if res is not None else '2000' overview = res['overview'] if res is not None else '' certification = "G" if releases is not None: release_results = releases['results'] if len(release_results) > 0: release_dates = release_results[0]['release_dates'] if len(release_dates) > 0: certification = release_dates[0]['certification'] casts = credits['cast'] if credits is not None else [] crews = credits['crew'] if credits is not None else [] total = 0 rating_list = Rating.query.filter_by(movie_id=id).all() for r in rating_list: total += r.rating avg = total / len(rating_list) if len(rating_list) > 0 else 0 m = Movie(id=id, title=row['title'], original_title=original_title, tmdb_id=tmdb_id, rating=avg, backdrop_path=backdrop_path, poster_path=poster_path, release_date=release_date, runtime=runtime, overview=overview, vote_average=vote_average, vote_count=vote_count, certification=certification) db.session.add(m) db.session.flush() db.session.refresh(m) for crew in crews: c = Crew.query.filter_by(name=crew['name']).first() if c is None: new_crew = Crew(name=crew['name']) db.session.add(new_crew) db.session.flush() db.session.refresh(new_crew) credit_crew = CreditCrews(movie_id=m.id, crew_id=new_crew.id, department=crew['department']) db.session.add(credit_crew) else: credit_crew = CreditCrews(movie_id=m.id, crew_id=c.id, department=crew['department']) db.session.add(credit_crew) for cast in casts: c = Cast.query.filter_by(name=cast['name']).first() if c is None: image = app.config['IMG_URL'] + str(cast['profile_path']) new_cast = Cast(name=cast['name'], image=image) db.session.add(new_cast) db.session.flush() db.session.refresh(new_cast) credit_cast = CreditCasts(character=cast['character'], movie_id=m.id, cast_id=new_cast.id, order=cast['order']) db.session.add(credit_cast) else: credit_cast = CreditCasts(character=cast['character'], movie_id=m.id, cast_id=c.id, order=cast['order']) db.session.add(credit_cast) for video in videos: v = Video(key=video['key'], name=video['name'], movie_id=m.id) db.session.add(v) for keyword in keywords: k = Keyword.query.filter_by(name=keyword['name']).first() if k is None: new_keyword = Keyword(name=keyword['name']) db.session.add(new_keyword) db.session.flush() db.session.refresh(new_keyword) key = MovieKeywords(movie_id=m.id, keyword_id=new_keyword.id) db.session.add(key) else: key = MovieKeywords(movie_id=m.id, keyword_id=k.id) db.session.add(key) for genre in genres: g = Genre.query.filter_by(name=genre['name']).first() if g is None: new_genre = Genre(name=genre['name']) db.session.add(new_genre) db.session.flush() db.session.refresh(new_genre) gen = MovieGenres(movie_id=m.id, genre_id=new_genre.id) db.session.add(gen) else: gen = MovieGenres(movie_id=m.id, genre_id=g.id) db.session.add(gen) db.session.commit() actions = ActionType.query.all() if len(actions) == 0: with open('seed/ActionType.json') as json_file: data = json.load(json_file) for action in data: action_type = ActionType(id=action['id'], name=action['name'], description=action['description']) db.session.add(action_type) db.session.commit() parameters = ActionParameter.query.all() if len(parameters) == 0: with open('seed/ActionParameter.json') as json_file: data = json.load(json_file) for action in data: action_parameter = ActionParameter(id=action['id'], name=action['name']) db.session.add(action_parameter) db.session.commit()