def main(): parser = argparse.ArgumentParser() parser.add_argument( "-g", "--get", type=str, help= "Print the document in database if exists, otherwise crawl it and then print" ) parser.add_argument("-c", "--crawl", type=str, help="Crawl one's total answer pictures") parser.add_argument("-f", "--find", type=str, help="Print the document in database") parser.add_argument("-d", "--delete", type=str, help="delete one document from database") args = parser.parse_args() db_connection = DBConnection() if args.crawl is not None: crawl_one(args.crawl, db_connection, COOKIES_STR) if args.find is not None: pprint(db_connection.find_one(args.find)) if args.get is not None: pprint(get_one(args.get, db_connection, COOKIES_STR)) if args.delete is not None: print(db_connection.delete_many(args.delete))
def __init__(self): self.bag_of_words = [ 'modi', 'pm', 'visit', 'narendra', 'prime minister' ] self.bag_of_words_set = set(self.bag_of_words) self.db_con = DBConnection().create_connection() self.cursor = self.db_con.cursor()
def init(): print("Begin initialize...") db_connection = DBConnection() url_token_set = db_connection.restore_url_token() origin = Author(ORIGIN_URL_TOKEN, COOKIES_STR, ORIGIN_NAME, ORIGIN_GENDER, ORIGIN_AVATAR_URL_TEMPLATE) return db_connection, url_token_set, origin
def __init__(self): self.year = 2015 self.base_url = "http://timesofindia.indiatimes.com/%s" self.url_to_scrape = None self.db_con = DBConnection().create_connection() self.cursor = self.db_con.cursor() self.request_headers = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36" }
def main(): parameter_names = [ "hero_movespeed", "hero_attackspeed", "hero_attackrange", "hero_attackdamage", "fraction_neutral_left", "neutral_total_eff_hp", "targeted_neutral_eff_hp", "fraction_lane_left", "lane_total_eff_hp", "targeted_lane_eff_hp", "damage_spread_neutral", "damage_spread_lane", "success" ] #starting_weights = [0.0033, -0.85, -0.0015, 0.02, -4, -0.003, 0.003, 1.25, 0.0005, 0, 0, 0] db = DBConnection("doublepull", 1) loop = Loop(parameter_names, db, 100) loop.go()
async def get_account_by_message( request: web.BaseRequest) -> web.json_response: from_date = request.match_info.get('from') to_date = request.match_info.get('to') key_word = request.query.get('q').split(' ') and_list = [] or_list = [] and_list.append(key_word.pop(0)) index = 0 for _ in range(0, int(len(key_word) / 2)): if key_word[index].lower() == 'and': and_list.append(key_word[index + 1]) if key_word[index].lower() == 'or': or_list.append(key_word[index + 1]) index += 2 sub_query = '' for value in and_list: sub_query += f" AND lower(message) like '%{value.lower()}%'" for value in or_list: sub_query += f" OR lower(message) like '%{value.lower()}%'" async with DBConnection(request) as connection, connection.transaction( isolation='serializable'): query = f""" SELECT owner_id, owner_name, sum(engagement) as total_engagement, array_agg(id) as id_list FROM data WHERE time BETWEEN $1 AND $2 {sub_query} GROUP BY owner_id,owner_name ORDER BY total_engagement DESC limit 10 """ print(query) result = await connection.fetch( query, arrow.get(from_date).datetime, arrow.get(to_date).datetime, ) if not result: raise web.HTTPNotFound(text='daily message not found') payload = [] for item in result: item = dict(item) url_id_list = [] for msg in item['id_list']: msg = reverse('message', msg_id=msg) url_id_list.append(msg) item['id_list'] = url_id_list payload.append(item) return web.json_response(payload)
def display_vist_details(): db_con = DBConnection().create_connection() cursor = db_con.cursor() sql = "select place, visit_date from visit_info" cursor.execute(sql) rs_tuple_list = cursor.fetchall() value_dict = {} for rs_tuple in rs_tuple_list: (place, date_of_visit) = rs_tuple value_dict[place] = date_of_visit return render_template('index.html', vist_details=value_dict)
def hello_world(): db_con = DBConnection().create_connection() cursor = db_con.cursor() sql = "select place, visit_date from visit_info" cursor.execute(sql) rs_tuple_list = cursor.fetchall() #return jsonify(rs_tuple_list) value_dict = {} for rs_tuple in rs_tuple_list: (place, date_of_visit) = rs_tuple value_dict[place] = date_of_visit #return 'Hello World' return jsonify(value_dict)
def main(): max_api_calls = api["max_api_calls"] urls_per_call = api["urls_per_call"] isProcessed = False endCursor = urls_per_call listingIds = [] queryListingsList = [] apiCallCount = 0 listingCount = 0 listEndReached = True db = DBConnection(database["host"], database["port"]) db.set_database(database["name"]) db.set_collection(database["collection"]) records = db.get_records() for index, record in enumerate(records): try: if record["offer"]["listing"]["is_processed"] == True: isProcessed = True else: isProcessed = False except KeyError: isProcessed = False if not isProcessed: listingIds.append(record["offer"]["listing"]["listingId"]) listingCount += 1 if listingCount == endCursor: queryListingsList.append(listingIds) listingIds = [] endCursor += urls_per_call apiCallCount += 1 if apiCallCount >= max_api_calls: listEndReached = False break if listEndReached: queryListingsList.append(listingIds) with concurrent.futures.ThreadPoolExecutor() as executor: executor.map(process_listing_metric_data, queryListingsList, repeat(db), repeat(urls_per_call)) if listEndReached: db.check_listings()
async def get_message_by_id(request: web.BaseRequest) -> web.json_response: msg_id = request.match_info.get('msg_id') async with DBConnection(request) as connection, connection.transaction( isolation='serializable'): result = await connection.fetchrow( """ SELECT * FROM data WHERE id = $1 """, msg_id, ) if not result: raise web.HTTPNotFound(text='message not found') payload = dict(result) payload['time'] = payload['time'].isoformat() return web.json_response(payload)
async def get_daily_message_count( request: web.BaseRequest) -> web.json_response: from_date = request.match_info.get('from') to_date = request.match_info.get('to') async with DBConnection(request) as connection, connection.transaction( isolation='serializable'): result = await connection.fetch( """ SELECT to_char(date_trunc('day', time), 'YYYY-MM-DD') as date, count(*) FROM data WHERE time BETWEEN $1 AND $2 GROUP BY date ORDER BY date ASC """, arrow.get(from_date).datetime, arrow.get(to_date).datetime) if not result: raise web.HTTPNotFound(text='daily message not found') return web.json_response(dict(result))
async def get_word_cloud(request: web.FileResponse) -> web.json_response: from_date = request.match_info.get('from') to_date = request.match_info.get('to') cloud_type = request.match_info.get('cloud_type') if cloud_type not in ('wordcloud', 'hashtag', 'mention'): cloud_type = 'wordcloud' if os.path.exists( f"{settings.MEDIA_PATH}/{cloud_type}__{from_date}__{to_date}.png"): return web.FileResponse( f"{settings.MEDIA_PATH}/{cloud_type}__{from_date}__{to_date}.png") async with DBConnection(request) as connection, connection.transaction( isolation='serializable'): result = await connection.fetch( """ SELECT message FROM data WHERE time BETWEEN $1 AND $2 """, arrow.get(from_date).datetime, arrow.get(to_date).datetime) if not result: raise web.HTTPNotFound(text='daily message not found') dictionary = await get_count_by_list(result, cloud_type) wc = WordCloud( background_color="white", width=1000, height=1000, font_path='THSarabunNew.ttf', relative_scaling=0.5, ) wc.generate_from_frequencies(dictionary) plt.imshow(wc, interpolation='bilinear') plt.axis("off") plt.savefig( f"{settings.MEDIA_PATH}/{cloud_type}__{from_date}__{to_date}.png", format="png") return web.FileResponse( f"{settings.MEDIA_PATH}/{cloud_type}__{from_date}__{to_date}.png")
async def get_message_by_engagement( request: web.BaseRequest) -> web.json_response: from_date = request.match_info.get('from') to_date = request.match_info.get('to') async with DBConnection(request) as connection, connection.transaction( isolation='serializable'): result = await connection.fetch( """ SELECT engagement, time::TEXT, message FROM data WHERE time BETWEEN $1 AND $2 ORDER BY engagement DESC limit 10 """, arrow.get(from_date).datetime, arrow.get(to_date).datetime) if not result: raise web.HTTPNotFound(text='daily message not found') payload = [] for item in result: payload.append(dict(item)) return web.json_response(payload)
async def gs(self, ctx, start: typing.Optional[int] = 1, num: typing.Optional[int] = 1, *, arg, skip_cache=False, delete=False): '''Searches the phrase given on google''' if not (start >= 1 and start <= 200 and num >= 1 and num <= 10): await ctx.send('Numbers not in bound') return # sanitization searchTerm = '' for i in arg: if i.isalnum or i in "'+.:": searchTerm += i searchTerm = ''.join(searchTerm) gis = GoogleImagesSearch(secret_api_key, secret_cx_code) _search_params = { 'q': searchTerm, 'start': start, 'num': num, 'safe': 'high', } webhook = await get_web_hook(ctx.channel) with DBConnection('meme_cache.db') as db_conn: cur = db_conn.conn.cursor() if (not skip_cache) or (num != 1): for row in cur.execute(db_conn.select_query % (searchTerm, start)): url = row[0] if url: embed = discord.Embed() embed.set_image(url=url) if not delete: await webhook.send( content=ctx.message.content, embed=embed, username=ctx.message.author.nick, avatar_url=ctx.message.author.avatar_url) else: await webhook.send( embed=embed, username=ctx.message.author.nick, avatar_url=ctx.message.author.avatar_url) await ctx.message.delete() else: await ctx.send('Couldn\'t find the searched image.') return gis.search(search_params=_search_params) embeds = [] for i, img in enumerate(gis.results()): if img.url: embed_data = { 'type': 'image', 'image': { 'url': img.url, }, } embeds.append(discord.Embed.from_dict(embed_data)) try: cur.execute(db_conn.insert_query % (searchTerm, start + i, img.url)) db_conn.conn.commit() except sqlite3.IntegrityError: cur.execute(db_conn.update_query % (img.url, start + i, searchTerm)) if not delete: cont = ctx.message.content await webhook.send(content=cont, embeds=embeds, username=ctx.message.author.display_name, avatar_url=ctx.message.author.avatar_url) else: await webhook.send(embeds=embeds, username=ctx.message.author.display_name, avatar_url=ctx.message.author.avatar_url) await ctx.message.delete()
# -*- coding=utf8 -*- from flask import Flask, request, send_from_directory, jsonify import zhihutu from database import DBConnection app = Flask(__name__, static_url_path='') db_connection = DBConnection() @app.route('/') def index(): return send_from_directory('.', 'index.html') @app.route('/find', methods=['POST']) def find(): url_token = request.form['url_token'] cookies_str = request.form['cookies_str'] # optional param result = db_connection.find_one(url_token) if result is None: if len(cookies_str) == 0: return jsonify({'name': 'Not Found'}) result = zhihutu.get_one(url_token, db_connection, cookies_str) if '_id' in result: result.pop('_id') return jsonify(result)
'dec' ] regex_list = [''] country_capital_list = [] with open('/home/aish/Desktop/capital.txt', 'r') as f: lines = f.readlines() for capital_name in lines: country_capital_list.append(capital_name) country_capital_list.extend(COUNTRY) regex = "|".join(str(item.lower()) for item in country_capital_list) sql = "UPDATE classified_article SET counter = %s WHERE id = %s" dbcon = DBConnection().create_connection() cursor = dbcon.cursor() cursor.execute( "SELECT id,article,counter FROM classified_article WHERE id > 0 AND counter >3 ORDER BY id asc" ) result_set = cursor.fetchall() frequency_dict = {} for result in result_set: (id, content, filter_count) = result #print (id) content = content.lower() content = re.sub("<.*?\>", " ", content) content = re.sub('[,.]', '', content) ''' #Stage 1