def _extract_cash_transactions(transactions_table, staleness_detector: _StalenessDetector): for row in transactions_table.find_elements_by_tag_name("tr"): cells = row.find_elements_by_tag_name("td") with staleness_detector.visit(cells[0]): txn_date = datetime.strptime(cells[0].text.strip(), "%d %B %Y") txn_description = cells[1].text.strip() txn_in_amount = Price.fromstring( cells[2].text.strip()).amount_float txn_out_amount = Price.fromstring( cells[3].text.strip()).amount_float txn_amount = txn_in_amount if txn_in_amount is not None else ( txn_out_amount * -1.0) txn_type = "credit" if txn_in_amount is not None else "debit" txn_bal = Price.fromstring(cells[4].text.strip()).amount_float txn_id = "VGXX-" + hashlib.sha224("/".join([ txn_date.isoformat(), txn_description, str(txn_amount), txn_type, str(txn_bal) ]).encode()).hexdigest() yield { "id": txn_id, "date": txn_date, "description": txn_description, "amount": txn_amount, "type": txn_type }
def _check_price(self, course_name): course_is_free = True price_xpath = "//div[contains(@class, 'styles--checkout-pane-outer')]//span[@data-purpose='total-price']//span" price_element = self.driver.find_element_by_xpath(price_xpath) # We are only interested in the element which is displaying the price details if price_element.is_displayed(): _price = price_element.text # This logic should work for different locales and currencies checkout_price = Price.fromstring(_price) # Set the currency for stats if (self.stats.currency_symbol is None and checkout_price.currency is not None): self.stats.currency_symbol = checkout_price.currency if checkout_price.amount is None or checkout_price.amount > 0: logger.debug( f"Skipping course '{course_name}' as it now costs {_price}" ) self.stats.expired += 1 course_is_free = False # Get the listed price of the course for stats if course_is_free: list_price_xpath = "//div[contains(@class, 'styles--checkout-pane-outer')]//td[@data-purpose='list-price']//span" list_price_element = self.driver.find_element_by_xpath( list_price_xpath) list_price = Price.fromstring(list_price_element.text) if list_price.amount is not None: self.stats.prices.append(list_price.amount) return course_is_free
def get_prices(ocr): price = Price(ocr) amounts = { "HT": price.get_price(price.ht), "TVA": price.get_price(price.tva), "Total": price.get_price(price.total) } return amounts
def get_prices(ocr): """This function gets eaten OCR data and find Total, TVA, HT then return an object :param ocr: eaten OCR data from an image :return: object as HT, TVA, Total :raise: need to handle empty OCR data """ price = Price(ocr) amounts = {"HT": price.get_price(price.ht), "TVA": price.get_price(price.tva), "Total": price.get_price(price.total)} return amounts
def _get_accounts_amount(summary_area): synthesis_area = (summary_area.find_element_by_tag_name( "section").find_element_by_tag_name("article")) loan_area, cash_area, *_ = synthesis_area.find_elements_by_tag_name("dl") loan_amount = Price.fromstring( loan_area.find_element_by_tag_name("dd").text) cash_amount = Price.fromstring( cash_area.find_element_by_tag_name("dd").text) return loan_amount.amount_float, cash_amount.amount_float
def parse_price(self, price): currency = Price.fromstring(price).currency if currency == '$': currency = 'USD' elif currency == '€': currency = 'EUR' elif currency == '£': currency = 'GBP' amount = Price.fromstring(price).amount_float return currency, amount
def get_price_currency(input_value): if isinstance(input_value, list): for pricetag in input_value: price = Price.fromstring(pricetag) if price.currency: return price price = Price.fromstring(input_value) if price.currency is None: return 'NOT_FOUND' return price.currency
def parseMaxSalary(self): ''' Returns highest value in salary range or salary value if not a range Disregards non-annual wages, e.g. 'per day' or 'per week' rates for contractors ''' if self.raw_salary == None: return None else: if "year" in self.raw_salary and " - " in self.raw_salary: salary_range = self.raw_salary.split(" - ") # salary_range[0] contains number to left of " - ", i.e. lower value in range return Price.fromstring(salary_range[1]).amount_float elif "year" in self.raw_salary: max_salary = self.raw_salary.rstrip(" a year") return Price.fromstring(max_salary).amount_float
def check_price(): page = requests.get(URL, headers=headers) soup = BeautifulSoup(page.content, 'html.parser') """ m=open('soupw.txt',"wb") m.write(soup.prettify().encode("utf-8")) m.close """ title = soup.find(id="productTitle").get_text().strip() price = soup.find(id="priceblock_dealprice").get_text() main_price = Price.fromstring(price[2:]) print("NAME : "+ title) print("CURRENT PRICE : "+ str(main_price.amount_text)) print("DESIRED PRICE : "+ str(desired_price)) count = 0 if(main_price.amount_float <= desired_price): send_mail() push_notification() print("Email and notification sent") exit(0) else: count += 1 print("Rechecking the price... Last check price at {}".format(datetime.now()))
def getFavorites(self): self.driver.find_element_by_id("dealer-my-favorites").click() perPageDropDown = Select(self.driver.find_element_by_name("PerPage")) perPageDropDown.select_by_value("100") favorites = [] products = self.driver.find_elements_by_class_name("cue-datatable-row") for product in products: favorites.append({ "title": product.find_element_by_css_selector(".product-title a").text, "available": int( product.find_element_by_css_selector( ".product-availability span").text.split()[0]), "price": Price.fromstring( product.find_element_by_class_name( "product-price").text).amount_float, }) return favorites
def search_currency_in_pdf(self, PDF_Info): out_put = [] for term in PDF_Info.cleanData: p = Price.fromstring(PDF_Info.get_term(term)) if p.amount or p.currency: out_put.append([term, [p.currency, p.amount, p.amount_text]]) PDF_Info.RawCurrency = out_put
def __init__(self, item_title, item_description, item_price_str, restaurant_id): self.item_title = item_title.title() self.description = item_description self.price = Price.fromstring(item_price_str) self.title_nlp = nlp(self.item_title) self.description_nlp = nlp(self.description) self.restaurant_id = restaurant_id
def _extract_cash_asset(product_row): amount_str = product_row.find_elements_by_tag_name("td")[5].text.strip() return { "name": "cash", "type": "currency", "value": Price.fromstring(amount_str).amount_float, }
def _get_account_transactions(self, account_id, from_date, to_date): all_txn = [] transactions_table = self._switch_account(account_id) body_area = transactions_table.find_element_by_css_selector( "div.tbody-trans") for row in body_area.find_elements_by_css_selector("div.row"): header_row = row.find_element_by_css_selector("div.th-wrapper") cells = header_row.find_elements_by_css_selector("div.th") _, date_cell, desc_cell, in_cell, out_cell, bal_cell = cells txn_date = datetime.strptime(date_cell.text.strip(), "%a, %d %b %y") if not date_in_range(txn_date, from_date, to_date): continue txn_in_amount = Price.fromstring(in_cell.text.strip()).amount_float txn_out_amount = Price.fromstring( out_cell.text.strip()).amount_float txn_amount = txn_in_amount if txn_in_amount is not None else ( txn_out_amount * -1.0) txn_type = "credit" if txn_in_amount is not None else "debit" more_data_cells = row.find_elements_by_css_selector( "span.additional-data-content") txn_type_cell = more_data_cells[1] txn_description = desc_cell.text.strip() if len(more_data_cells) > 2: txn_description += f" ({more_data_cells[2].get_attribute('innerText').strip()})" txn_description = re.sub(r"\s+", " ", txn_description.strip()) # generate synthetic transaction identifier txn_id = "BCXX-" + hashlib.sha224("/".join([ txn_date.isoformat(), txn_description, str(txn_amount), txn_type, txn_type_cell.text.strip(), str(Price.fromstring(bal_cell.text.strip()).amount_float) ]).encode()).hexdigest() all_txn.append({ "id": txn_id, "date": txn_date, "description": txn_description, "amount": txn_amount, "type": txn_type, }) return all_txn
def parse_price(price: Any, currency: Any) -> Tuple[Optional[str], Optional[str]]: price = _to_string_or_None(price) currency = _to_string_or_None(currency) parsed_price = Price.fromstring(price, currency_hint=currency) if parsed_price.amount is not None: if parsed_price.amount is not None: return str(parsed_price.amount_float), parsed_price.currency return None, None
def _get_balance(balance_area): line_items = balance_area.find_elements_by_css_selector("div.line-item") for entry in line_items: header = entry.find_element_by_css_selector( "div.header-container").text.lower() if "balance" in header: balance_str = entry.find_element_by_css_selector( "div.data-value").text.strip() return Price.fromstring(balance_str) raise Error("unable to get balance")
def _get_account_balance(self, account): dashboard_url = f"{BASE_URL}{account['home_url']}/Dashboard" self._do.get(dashboard_url) value_cell = self._do.wait_element( By.CSS_SELECTOR, "section.portfolio-header div.col-value div.value") return { "account": deepcopy(account["description"]), "balance": Price.fromstring(value_cell.text.strip()).amount_float }
def __init__(self, **kwargs): self.data = {} if 'sku' in kwargs: self.data['sku'] = self._process_as_string(kwargs.pop('sku')) if 'price' in kwargs: self.data['price'] = Price.fromstring(str(kwargs.pop('price'))).amount_float self.data = {**self.data, **kwargs}
def jsonToObject(jsonDict): # Converts json string to object price = None if jsonDict[ AmazonItemKeys.priceAmount] == None or jsonDict[ AmazonItemKeys.priceCurrency] == None else Price.fromstring( str(jsonDict[AmazonItemKeys.priceAmount]) + " " + jsonDict[AmazonItemKeys.priceCurrency]) usedPrice = None if jsonDict[ AmazonItemKeys.usedPriceAmount] == None or jsonDict[ AmazonItemKeys. usedPriceCurrency] == None else Price.fromstring( str(jsonDict[AmazonItemKeys.usedPriceAmount]) + " " + jsonDict[AmazonItemKeys.usedPriceCurrency]) return AmazonItem( jsonDict[AmazonItemKeys.id], jsonDict[AmazonItemKeys.title], jsonDict[AmazonItemKeys.stars], jsonDict[AmazonItemKeys.objLink], jsonDict[AmazonItemKeys.imgLink], price, usedPrice, jsonDict[AmazonItemKeys.discount], jsonDict[AmazonItemKeys.isPrime], datetime.datetime.fromisoformat(jsonDict[AmazonItemKeys.ueDate]))
def get_balances(self): balance_area = self._do.wait_element(By.CSS_SELECTOR, "div.current-balance") balance_amount_str = balance_area.find_element_by_css_selector( "span.value").text.strip() balance_amount = Price.fromstring(balance_amount_str) return { "accounts": [{ "account": deepcopy(self.account), "balance": balance_amount.amount_float * -1.0 }] }
def main(): d = dateparser.parse('2.Mai 2020', languages=['de']) print(d) d = dateparser.parse('2.Abc 2020', languages=['de']) print(d) d = dateparser.parse('2020.12.8') print(d) print() p = Price.fromstring("-114,47 €") print(p) p = Price.fromstring("3 500 руб") print(p) p = Price.fromstring("Rs. 11,499") print(p) p = Price.fromstring("$1499.99") print(p) p = Price.fromstring("199,999.00") print(p) p = Price.fromstring("199.999,00") print(p) print() i = IBAN('LT12 1000 0111 0100 1000') print(i.country) i = IBAN('DE89 3704 0044 0532 0130 00') print(i.country) try: i = IBAN('DE89 3704') print(i.country) except Exception as e: print(e)
def extract_loan(loan_row): items = loan_row.find_elements_by_tag_name("li") loan_name = items[0].find_element_by_css_selector( "strong.project-name").text.strip() loan_rate = float(items[2].text.strip()[:-1]) / 100. loan_outstanding = Price.fromstring(items[5].text.strip()) return { "name": loan_name, "type": "loan", "annual_rate": loan_rate, "value": loan_outstanding.amount_float, "provider_specific": None }
def to_float( price_value: Union[str, int, float], decimals: Union[int, bool] = 2, ): price_str_value: str = text.to_str(price_value) price = Price.fromstring(price=price_str_value).amount_float if not price: return price return round(price, decimals) if decimals is not False else price
def _extract_fund_asset(product_type, product_row): cells = product_row.find_elements_by_tag_name("td") name_cell = cells[0].find_element_by_css_selector("p.content-product-name") product_name = name_cell.text.strip() ongoing_charges = float(cells[1].text.strip()[:-1]) / 100.0 units = float(cells[2].text.strip()) avg_unit_cost = Price.fromstring(cells[3].text.strip()).amount_float last_price = Price.fromstring(cells[4].text.strip()).amount_float total_cost = Price.fromstring(cells[5].text.strip()).amount_float value = Price.fromstring(cells[6].text.strip()).amount_float return { "name": product_name, "type": f"{product_type} fund", "units": units, "value": value, "provider_specific": { "Ongoing charges": ongoing_charges, "Last price": last_price, "Total cost": total_cost, "Average unit cost": avg_unit_cost } }
def get_data_from_webpage(): response = requests.get(XKOM_URL) response.raise_for_status() response_text = bs4.BeautifulSoup(response.text, features="lxml") hot_shot_subtree = response_text.select(HOT_SHOT_ID)[0] product_name = unescape( hot_shot_subtree.find(attrs={("%s" % PRODUCT_NAME_ATTRIBUTE): True}) [PRODUCT_NAME_ATTRIBUTE]) original_price = Price.fromstring( hot_shot_subtree.find(class_=OLD_PRICE_CLASS).text) new_price = Price.fromstring( hot_shot_subtree.find(class_=NEW_PRICE_CLASS).text) percentage_discount = str( int((1 - new_price.amount / original_price.amount) * 100)) + "%" category = get_category(HOT_SHOT_URL) return [ product_name, original_price, new_price, percentage_discount, category ]
def extract_transaction_header(row): summary_id = row.get_attribute("id") details_id = summary_id.replace("trans", "etd") date_cell, description_cell, amount_cell = row.find_elements_by_tag_name( "td") txn_amount = Price.fromstring(amount_cell.text).amount_float return { "date": parse_transaction_date(date_cell.text.strip()), "amount": txn_amount, "description": description_cell.text.strip(), "type": "debit" if txn_amount >= 0 else "credit", "details_id": details_id, "selenium": { "header_row_ref": row } }
def parse_for_ammount(tweet, index): price_dict = {} for item in tweet.full_text.split(' '): if item.find("¢") != -1 or item.find("$") != -1: price = Price.fromstring(item) if price.currency == None: # case for ¢ i = tweet.full_text.find(item) price_dict[i] = price.amount_float / 100.0 else: #case for $ i = tweet.full_text.find(item) price_dict[i] = price.amount_float if len(price_dict.keys()) > 0: needed_key = min(price_dict.keys(), key=lambda x: abs(x - index)) return price_dict[needed_key] else: return 0
def scrape_url(url, driver, items=None, progress=None, progress_task=None): if items is None: items = [] driver.get(url) WebDriverWait(driver, 10).until( EC.presence_of_element_located( (By.CLASS_NAME, "srp-controls__count-heading"))) str_total = driver.find_elements_by_css_selector( ".srp-controls__count-heading > span")[0].text total = locale.atoi(str_total) if progress and progress_task is None: progress_task = progress.add_task(f"scraping {total} sold auctions:", total=total) WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.CLASS_NAME, "srp-results"))) results = driver.find_elements_by_css_selector(".srp-results li.s-item") for result in results: title = result.find_element_by_css_selector(".s-item__title") date = result.find_element_by_css_selector( ".s-item__title--tagblock .POSITIVE") price = result.find_element_by_css_selector(".s-item__price > span") link = result.find_element_by_css_selector("a.s-item__link") if progress: progress.update(progress_task, advance=1) items.append({ "title": title.text, "date": parse_date(date.text.split("Sold")[1]), "price": Price.fromstring(price.text).amount_float, "url": link.get_attribute("href"), }) try: next_button = driver.find_element_by_css_selector("a.pagination__next") next_url = next_button.get_attribute("href") has_more = driver.current_url != next_url if has_more: return scrape_url(next_url, driver, items, progress, progress_task) except NoSuchElementException: # reached end of results pass return items
def get_annual_sales(url): url = 'https://ecommercedb.com/en/store/{0}'.format(url) headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36' } failed = False # Retries 5 times for handling network errors for _ in range(2): print("Retrieving %s" % (url)) try: response = requests.get(url, headers=headers, verify=True, timeout=2) except: # max retries or timeout exception because website is slow return "Unknown" parser = html.fromstring(response.text) print("Done retrieving - status code: ", response.status_code) if response.status_code != 200: failed = True continue else: failed = False break if failed: print( "The ecommercedb.com network is unresponsive or url error. Please try again later (or now)." ) return "URL Failed" raw_value = parser.xpath( '//div[contains(@class, "fancyBox__content")]//text()') value = raw_value[0].strip() if value[len(value) - 1] == 'm': magnitude = 1000000 else: magnitude = 1 number_value = value[3:len(value) - 1] price = Price.fromstring(number_value).amount_float price = price * magnitude price = str(price) return price
def update_legobrasil_prices(): usd_map = {} brl_map = {} idx = 1 while True: # Not sure what the params mean... html = requests.get( 'https://www.legobrasil.com.br/buscapagina?PS=48&sl=09c5c48d-84de-48ea-8bb2-01fe3dad1f9a&cc=48&sm=0&PageNumber={}' .format(idx)) soup = BeautifulSoup(html.text, 'html.parser') article_lst = soup('article') if not article_lst: break for article_tag in article_lst: try: h3 = article_tag.find('h3') name = h3.contents[0].string set = h3.find('span').text.split(': ')[1] url = article_tag.find(itemprop='url')['href'] price_tag = article_tag.find(itemprop='lowPrice') if price_tag: price = Price.fromstring(price_tag.string.strip()).amount brl_set_price = SetPrice( set=set, price=price, name=name, date=datetime.utcnow().isoformat(), url=url) print(brl_set_price) brl_map[brl_set_price.set] = brl_set_price bs_url = f'https://brickset.com/sets/{brl_set_price.set}-1/' usd_set_price = get_brickset_price(bs_url, brl_set_price.set) if usd_set_price: usd_map[usd_set_price.set] = usd_set_price print(usd_set_price) except KeyboardInterrupt: break except: traceback.print_exc() idx += 1 with open('brl.json', 'w') as f: json.dump(brl_map, f, indent='\t') with open('usd.json', 'w') as f: json.dump(usd_map, f, indent='\t')