def list_searches(): res = defaultdict(int) for s in Search.select(): res[s.thing] += 1 return jsonify(count=Search.select().count(), search_results=[{ '_id': x, 'count': y } for (x, y) in res.items()])
def insert_data(): first_name = input("please enter your first name") last_name = input("please enter your last name") mail = input("please enter your email address") notify_input = input( "please enter 1 if you want to get notification every time an item is uploaded or 0 for once a day" ) # notify_input = check_input(notify_input, int) user = User(first_name=first_name, last_name=last_name, email=mail, notify=notify_input) db.session.add(user) db.session.commit() print( "please enter the items you are looking for and the maximum price for each item" ) items_list = [] while "*" != input("press * when finished"): item_name = input("enter the item name") item_price = input("maximum price for " + item_name) item = Item(name=item_name, max_price=item_price) items_list.append(item) db.session.add(item) db.session.commit() search = Search(user_id=user.id, items=items_list) db.session.add(search) db.session.commit()
def check(): global player, prefs thing = request.args.get('thing', '').lower() if not thing: return render_template('empty_input.html') ''' mongo.db.searches.insert_one({ "ip": request.headers.get('X-Forwarded-For') or request.remote_addr, "url": request.url, "thing": thing }) ''' Search.create(ip=request.headers.get('X-Forwarded-For') or request.remote_addr, url=request.url, thing=thing) # `rand` controls the phrases used when rendering the final template # we accept `rand` as a query parameter for debugging purposes rand = -1.0 try: rand = float(request.args.get('rand', '')) except ValueError: rand = random.random() hates = False likes = False if thing in prefs['hates']: hates = True elif thing in prefs['likes']: likes = True else: try: # email_person("Unknown search", 'Someone asked if you hate %s' % (thing,)) pass except Exception as inst: print(type(inst)) print(inst) return render_template('response.html', person=person, hates=hates, likes=likes, thing=thing, rand=rand)
def api_stream_days(): stream_id = request.args.get('id', type=int) if not stream_id: abort(400) days_facet = Search().get_stream_days(stream_id) return jsonify( {'days': sorted(term['term'] for term in days_facet['terms'])})
def search(keyword): global driver global search try: if driver is None: driver = set_driver(IsHeadless) except Exception as error: error_logger('Seleniumドライバ設定エラー', error) return None try: url = 'https://ja.aliexpress.com/all-wholesale-products.html?spm=a2g0o.category_nav.0.0.300734f6uRXi28' search = Search(url, keyword) search.set_keyword(driver) search.set_currency(driver) search.get_category(driver) eel.message('検索完了、カテゴリを設定しました。絞り込みを必要に応じて設定してください。') eel.set_category_info(search.categories) eel.view_current_url(driver.current_url) except Exception as error: error_logger('検索エラー', error) return None
def api_stream_records(): stream_id = request.args.get('id', type=int) if not stream_id: abort(400) date = request.args.get('date', type=str) if not date: abort(400) records = [] for result in Search().stream_records(stream_id, date=date).results: records.append({ 'title': result['title'], 'duration': result['duration'], 'ts': result['ts'], 'url': full_url_for('play_redirect', record_id=result['record_id']), }) return jsonify({'records': records})
import logging import multiprocessing as mp from app import Retrieve from app import Search from app import Extract logging.basicConfig(level=logging.INFO) sr = Search() retr = Retrieve(useproxy=True, awsprofile='default') ex = Extract(awsprofile='default') # list = sr.incomplete(category='user', step='all', getitems=100000) # # def mp_retrieve(user): # retr.retrieve_user(user) # # pool = mp.Pool(processes=10) # # pool.map(mp_retrieve, list) # list = sr.incomplete(category='picture') # # logging.info(list) # logging.info(len(list)) # # for post in list: # # for post in list[0:1]: # try: # retr.retrieve_picture(post)
import eel import settings from app import Search, Item from scraping import set_driver from datetime import datetime as dt from logging import getLogger, FileHandler, StreamHandler, Formatter, DEBUG import pandas as pd app_name = "web" end_point = "index.html" size = (800, 750) url = 'https://www.buyma.com/r/' search_goods = Search(url) item = Item('') IsHeadless = False driver = None logger = getLogger(__name__) fomatterSetting = Formatter( '[%(asctime)s] %(name)s %(threadName)s %(levelname)s: %(message)s', '%Y-%m-%d %H:%M:%S') handler = FileHandler('logger.log') # handler = StreamHandler() # handler = NullHandler() handler.setLevel(DEBUG) logger.setLevel(DEBUG) handler.setFormatter(fomatterSetting) logger.addHandler(handler) logger.propagate = False
import eel import settings from app import Search, Item from scraping import set_driver, parse_html from datetime import datetime as dt from logging import getLogger, FileHandler, StreamHandler, Formatter, DEBUG import pandas as pd app_name = "web" end_point = "index.html" size = (850, 850) IsHeadless = False driver = None search = Search('', '') items = [] logger = getLogger(__name__) fomatterSetting = Formatter( '[%(asctime)s] %(name)s %(threadName)s %(levelname)s: %(message)s', '%Y-%m-%d %H:%M:%S') # handler = FileHandler('logger.log') handler = StreamHandler() # handler = NullHandler() handler.setLevel(DEBUG) logger.setLevel(DEBUG) handler.setFormatter(fomatterSetting) logger.addHandler(handler) logger.propagate = False # @eel.expose
parser_get.set_defaults(command='get') # Parser for running the program parser_run = subparser.add_parser('run') parser_run.add_argument('category', choices=('location', 'user', 'picture')) parser_run.set_defaults(command='run') if __name__ == '__main__': logging.basicConfig(level=logging.INFO) args = parser.parse_args() logging.info(args.__dict__) # Initialize profiles sr = Search() retr = Retrieve(useproxy=True, awsprofile='default', storage_directory='.') ex = Extract(awsprofile='default', storage_directory='.') dynamo = boto3.resource('dynamodb') tbl_user = dynamo.Table('test4') tbl_pictures = dynamo.Table('test2') tbl_locations = dynamo.Table('test3') pool = mp.Pool() # one location if args.command == 'get' and args.category == 'location': logging.info('%s: Extracting location details', args.key) retr.retrieve_location(int(args.key)) ex.location_details(args.key)