def get_id_from_name(name: str, setting: str = 'DEFAULT') -> int: logger = py_logging.create_logger( 'get_id_from_name', '{}get_id_from_name.log'.format( os.path.dirname(os.path.realpath(__file__)) + os.sep)) config = get_config() html_text = requests.get(config[setting]['NameToIdBaseUrl'] + urllib.parse.quote_plus(name)).text logger.info("Sent request off to find the id for: {}".format(name)) soup = BeautifulSoup(html_text, 'lxml') possible_results = soup.find('table', {"class": "table"}) \ .find_all('tr') for possible_result in possible_results: holder = possible_result.find_all('td') possible_result = possible_result.find('td') if holder: if len(holder) > 1: holder = holder[1] if holder.getText().lower() == name.lower(): return possible_result.getText() return None
def add_all_top_price_items_thread(session_lock: multiprocessing.Lock, runescape_lock: multiprocessing.Lock = None): # print(threading.get_ident()) logger = py_logging.create_logger( 'add_all_top_price_items_thread', '{}add_all_top_price_items.log' .format(os.path.dirname(os.path.realpath(__file__)) + os.sep)) processes = [] names = get_top_price_fall_names() + get_top_price_rise_names() + get_top_price_value_names() + get_top_price_most_traded_names() print(names) if not runescape_lock: runescape_lock = multiprocessing.Lock() for name in names: print(name) if is_item_name_in_database(name, session_lock): logger.info("{} is already in the database".format(name)) print("{} is already in the database".format(name)) continue id = get_id_from_name(name) if id is None: logger.warning("Couldn't find ID for: {}".format(name)) print("Couldn't find ID for: {}".format(name)) continue print("ID: {}".format(id)) process = multiprocessing.Process(target=add_item_to_database_by_id_thread, args=(id, session_lock, runescape_lock)) process.start() processes.append(process) for process in processes: process.join()
def is_item_id_in_database(item_id: int, session_lock: multiprocessing.Lock) -> bool: logger = py_logging.create_logger( "is_item_id_in_database", '{}in_database.log'.format(os.path.dirname(os.path.realpath(__file__)) + os.sep)) with session_lock: session = shared_session() count = session.query(Item).filter(Item.item_id == item_id).count() session.close() logger.debug("Number of {} in database: {}".format(item_id, count)) return bool(count)
def get_singular_ids_in_database(session_lock: multiprocessing.Lock) -> list: logger = py_logging.create_logger( "get_ids_in_database", '{}in_database.log'.format(os.path.dirname(os.path.realpath(__file__)) + os.sep)) with session_lock: session = shared_session() return [x for x in session.query(Item).filter( or_(Item.name == "None", Item.type == "None", Item.is_members_only.is_(None), Item.description == "None"))]
def is_item_name_in_database(name: str, session_lock: multiprocessing.Lock) -> bool: config = get_config() engine = create_engine( '{}:///{}{}'.format( config['DEFAULT']['DatabaseType'], "C:\\Anthony\\Programs\\runescape-grand-exchange-data-analytics\\database_services\\database\\", config['DEFAULT']['DatabaseName'])) Session = sessionmaker(bind=engine) logger = py_logging.create_logger( "item_name_in_database", '{}in_database.log'.format(os.path.dirname(os.path.realpath(__file__)) + os.sep)) with session_lock: session = Session() count = session.query(Item).filter(Item.name == name).count() session.close() logger.debug("Number of {} in database: {}".format(name, count)) return bool(count)
def get_item_from_id_thread(id: int, item: Item, get_lock: threading.Lock, delay: int = 5, second_delay: int = 300) -> Item: logger = py_logging.create_logger( 'get_item_from_id', '{}get_item__from_id.log'.format( os.path.dirname(os.path.realpath(__file__)) + os.sep)) with get_lock: try: response = requests.get(''.join( [get_item_runescape_url(), str(id)])).json() time.sleep(delay) except JSONDecodeError: logger.warning( "{} failed due to either too many requests or a bad id".format( id)) print( "{} failed due to either too many requests or a bad id".format( id)) time.sleep(second_delay) try: response = requests.get(''.join( [get_item_runescape_url(), str(id)])).json() time.sleep(delay) except JSONDecodeError: logger.error("{} failed likely due to bad id".format(id)) print("{} failed likely due to bad id".format(id)) return None name = response['item']['name'] type = response['item']['type'] description = response['item']['description'] is_members_only = True if response['item']['members'] == 'true' else False logger.info('ID = {} and name = {}'.format(id, name)) item.item_id = id item.name = name item.type = type item.is_members_only = is_members_only item.description = description return item
def get_item_from_id(id: int, lock: threading.Lock = None) -> Item: logger = py_logging.create_logger( 'get_item_from_id', '{}get_item__from_id.log'.format( os.path.dirname(os.path.realpath(__file__)) + os.sep)) if lock is not None: with lock: response = requests.get('{}{}'.format(get_item_runescape_url(), id)).json() else: response = requests.get('{}{}'.format(get_item_runescape_url(), id)).json() name = response['item']['name'] type = response['item']['type'] description = response['item']['description'] is_members_only = True if response['item']['members'] == 'true' else False logger.info('ID = {} and name = {}'.format(id, name)) return Item(item_id=id, name=name, type=type, is_members_only=is_members_only, description=description)
def determine_new_days(item_id: int, days: list, session_lock: multiprocessing.Lock) -> list: logger = py_logging.create_logger( "get_ids_in_database", '{}in_database.log'.format(os.path.dirname(os.path.realpath(__file__)) + os.sep)) days_in_database = get_days_in_database(item_id, session_lock) print("Days in database: {}".format(days_in_database)) return [x for x in days if x not in days_in_database]
def get_days_in_database(item_id: int, session_lock: multiprocessing.Lock) -> list: logger = py_logging.create_logger( "get_ids_in_database", '{}in_database.log'.format(os.path.dirname(os.path.realpath(__file__)) + os.sep)) with session_lock: session = shared_session() return [x[0] for x in session.query(Price.runescape_time).filter(Price.item_id == item_id).distinct()]
def get_ids_in_database(session_lock: multiprocessing.Lock) -> list: logger = py_logging.create_logger( "get_ids_in_database", '{}in_database.log'.format(os.path.dirname(os.path.realpath(__file__)) + os.sep)) with session_lock: session = shared_session() return [x[0] for x in session.query(Item.item_id).distinct()]