def kimetsu_search(word, csv, dir): source = [] try: # csvファイルの読み込み df = pd.read_csv(f'{CUR_DIR}/{csv}') source = list(df["name"]) # 検索 if word in source: eel.view_log_js(f'『{word}』はあります') else: eel.view_log_js(f'『{word}』はありません') eel.view_log_js(f'『{word}』追加します') source.append(word) # CSVの更新 if not dir: df = pd.DataFrame(source, columns=["name"]) df.to_csv(f'{CUR_DIR}/{csv}', encoding="utf_8-sig") else: try: df = pd.DataFrame(source, columns=["name"]) df.to_csv(f'{dir}/{csv}', encoding="utf_8-sig") except: eel.view_log_js(f'{dir}/{csv}に保存できませんでした。パスが誤っています。') except OSError: eel.view_log_js(f'{CUR_DIR}/{csv}は存在しません')
def kimetsu_search(word, saveFolder): if saveFolder[-1]=='/': saveFile=saveFolder + "source.csv" else: saveFile=saveFolder + "/source.csv" source = search.kimetsu_search(word, saveFile) str = ','.join(source) sourcejson=json.dumps(str, ensure_ascii=False) eel.view_log_js(sourcejson)
def calc_payment(self, amount): self.payment_amount = int(amount) self.return_amount = self.payment_amount - self.order_toral_amount if self.payment_amount >= self.order_toral_amount: eel.view_log_js( f'お支払い金額:{self.payment_amount}円 おつり:{self.payment_amount-self.order_toral_amount}円' ) else: eel.view_log_js("【ERROR】お支払い金額が不足しています。再度ご入力をお願いします。")
def receipt(self, item_money): dt_now = datetime.datetime.now() return_money = int(item_money) - self.total_price res1 = ''.join(''.join(map(str, x)) for x in self.buy_item_list) res2 = f"{dt_now}\n{res1}\n合計金額{self.total_price}円\nお預かり{item_money}円\nおつり{return_money}円 " with open("レシート.txt", mode='w') as f: f.write(res2) print(res2) eel.view_log_js(res2)
def kimetsu_search(filename, search_value): """ 引数filenameにsearch_value値があるか検索して、ログ出力する Parameters ---------- filename : string 検索対象のファイル名。 search_value : string 検索対象の文字列。 """ result = search.kimetsu_search(filename, search_value) # javascript処理を呼び出す eel.view_log_js(result)
def view_order_item_info(self): self.order_toral_amount = 0 self.order_info = [] for order in self.item_order_list: for item in self.item_master: if order[0] == item.get_item_code(): info = f'・商品名:{item.get_item_name()} 価格:{item.get_price()}円 個数:{order[1]}個 小計:{int(item.get_price())*int(order[1])}円' eel.view_log_js(info) self.order_toral_amount += int(item.get_price()) * int( order[1]) self.order_info.append(info) eel.view_log_js(f"総額:{self.order_toral_amount}円")
def postage_price(self, driver, wait, Todofuken_name): # ===========Seleniumで送料取得================ # postage_price_listの対象をSeleniumでスクレイピング # item_idでitems_idのindexを検索し、 # そのinndex番号に合致するitems_priceの項目を更新する for i, item in enumerate(postage_price_list): print('item') print(item) driver.set_window_size('1920', '1080') driver.get("https://page.auctions.yahoo.co.jp/jp/auction/" + item) wait.until( EC.visibility_of_element_located( (By.CLASS_NAME, "ProductInformation__item"))) driver.execute_script( 'document.querySelector("#js-prMdl-close").click()') postageDetailBuy_element = driver.find_element_by_id( "postageDetailBuy") try: postageDetailBuy_element.click() except ElementNotInteractableException: print('ElementNotInteractableException') continue select_element = driver.find_element_by_xpath( "//div[@class='SelectWrap']/select") # select_element = driver.find_elements_by_class_name("Select") select_object = Select(select_element) select_object.select_by_visible_text(Todofuken_name) # time.sleep(3) postagePrice_element = driver.find_element_by_xpath( "//dd[@class='BidModal__postageDetail']/div[@class='BidModal__postagePrice']" ) postagePrice = (postagePrice_element.text).split('円')[0].replace( ',', '') if (postagePrice == '送料未定'): continue elif (postagePrice == '送料未定(着払い)'): postagePrice = 3000 index = items_id.index(item) items_price[index] = str( int(items_price[index]) + int(postagePrice)) eel.view_log_js( str(i + 1) + "/" + str(len(postage_price_list)) + "商品目")
def kimetsu_search(word, output_file, add_flg): # 検索対象取得 try: df = pd.read_csv(output_file) source = list(df["name"]) except FileNotFoundError: df = pd.DataFrame(["name"]) source = list([]) # 検索 if word in source: print("『{}』はあります".format(word)) eel.view_log_js("『{}』はあります".format(word)) else: print("『{}』はありません".format(word)) eel.view_log_js("『{}』はありません".format(word)) # 追加 # add_flg = input("追加登録しますか?(0:しない 1:する) >> ") if add_flg == "1": source.append(word) eel.view_log_js("『{}』を追加しました".format(word)) # CSV書き込み df = pd.DataFrame(source, columns=["name"]) df.to_csv(output_file, encoding="utf_8-sig") print(source)
def kimetsu_search(word, csv_name): #検索対象取得 df = pd.read_csv("./{}".format(csv_name)) source = list(df["name"]) # source = ["たんじろう", # "ねずこ", # "ぎゆう", # "いのすけ", # "かんろじ", # "かがや", # "むざん", # "炭治郎"] # 検索 if word in source: print("『{}』はいます".format(word)) eel.view_log_js("『{}』はいます".format(word)) else: print("『{}』はありません".format(word)) eel.view_log_js("『{}』はいません".format(word)) eel.view_log_js("『{}』を追加します".format(word)) # 追加 # add_flg=input("追加登録しますか?(0:しない 1:する) >> ") # if add_flg=="1": source.append(word) #CSV書き込み df = pd.DataFrame(source, columns=["name"]) df.to_csv("./{}".format(csv_name), encoding="utf_8-sig") print(source)
def kimetsu_search(word,file_name,reslt_write): # 検索対象取得 df=pd.read_csv(file_name) source=list(df["name"]) # 検索 if word in source: print("『{}』はあります".format(word)) # eel.view_log_js("{}はあります。".format(word)) eel.view_log_js("{}はあります。".format(word)) else: # print("『{}』はありません".format(word)) eel.view_log_js("{}はありません" .format(word)) # 追加 #add_flg=input("追加登録しますか?(0:しない 1:する) >> ") #if add_flg=="1": source.append(word) # CSV書き込み df=pd.DataFrame(source,columns=["name"]) df.to_csv("./source.csv",encoding="utf_8-sig") print(source)
def search_master(self, total_price): self.total_price = total_price for item in self.item_master: for item_order, order_count in zip(self.item_order_list, self.order_count_list): if item.item_code == item_order: print( f"商品コード{item.item_code}:{item.item_name}¥{item.price}円/{order_count}個" ) eel.view_log_js( f"商品コード{item.item_code}:{item.item_name}¥{item.price}円/{order_count}個" ) buy_item = f"{item.item_name} ¥{item.price}円 /{order_count}個\n" self.add_buy_order(buy_item) order_price = int(item.price) * int(order_count) self.total_price = order_price + self.total_price #self.item_order_list.pop() #self.order_count_list.pop() else: pass print(f"\n合計金額は{self.total_price}円") eel.view_log_js(f"\n合計金額は{self.total_price}円")
def main(path): # driverを起動 driver = set_driver(False) try: df = pd.read_excel(path, dtype={1: str, 2: str}) eel.view_log_js("ファイルの読み込みが完了しました") except: eel.view_log_js("ファイルの読み込みに失敗しました") driver.quit() return 1 #データの長さを取得 data_length = len(df.index) print(data_length) #10件ずつ分けたら何回取得が必要か get_data_times = (data_length - 1) // 10 + 1 #10件ずつ処理を行う for i in range(get_data_times): driver.get( "https://k2k.sagawa-exp.co.jp/p/sagawa/web/okurijoinput.jsp") time.sleep(5) data = df.iloc[i * 10:i * 10 + 10, 1].tolist() #値の入力 for j in range(len(data)): xpath = "//input[@tabindex=" + str(j + 1) + "]" driver.find_element_by_xpath(xpath).send_keys(data[j]) #送信 driver.find_element_by_id("main:toiStart").click() time.sleep(5) #結果取得 elements = driver.find_elements_by_xpath('//td[@colspan=3]') count = 0 #データの更新 for element in elements: count += 1 eel.view_log_js(f"{i*10+count}件目が完了しました。") df.iat[i * 10 + count - 1, 2] = element.text #エクセルへの出力 df.to_excel(path, index=False) eel.view_log_js("ファイルの出力が完了しました") #ドライバーを閉じる driver.close()
def kimetsu_search(word, csv_name): # 検索対象取得 df = pd.read_csv("./{}".format(csv_name)) source = list(df["name"]) if word in source: print('{}が見つかりました'.format(word)) eel.view_log_js("『{}』が見つかりました".format(word)) else: print('{}は見つかりませんでした'.format(word)) #リストになかった場合に、キャラクターを(source)追加出来るようにする eel.view_log_js("『{}』は見つかりませんでした".format(word)) add = input('キャラクターを新規に登録しますか? 登録する→0 登録しない→1>>>') if add == '0': source.append(word) print('キャラクターを追加しました') eel.view_log_js("『{}』を追加しました".format(word)) else: print('追加しませんでした') eel.view_log_js("『{}』を追加しませんでした".format(word)) df = pd.DataFrame(source, columns=["name"]) df.to_csv("./{}".format(csv_name), encoding="utf_8-sig") print(source)
def kimetsu_search(word, json_name): # 検索対象取得 df = pd.read_json("./{}".format(json_name)) source = list(df["name"]) tokutyou = list(df["tokutyou"]) # 検索 find_df = df[df["name"] == word] if len(find_df) >= 1: print(f"『{word}』の特徴は{list(find_df['tokutyou'])[0]}です") eel.view_log_js(f"『{word}』の特徴は{list(find_df['tokutyou'])[0]}です") else: print("『{}』はありません".format(word)) eel.view_log_js("『{}』は未登録です".format(word)) eel.view_log_js("『{}』を追加します".format(word)) # データの追加 eel.addData(word)
def kimetsu_search(word,csv): # 検索対象取得 df=pd.read_csv(f"./{csv}") source=list(df["name"]) # 検索 if word in source: eel.view_log_js(f"『{word}』はあります") else: eel.view_log_js(f"『{word}』はありません") eel.view_log_js(f"『{word}』を追加します") # 追加 #add_flg=input("追加登録しますか?(0:しない 1:する) >> ") #if add_flg=="1": source.append(word) # CSV書き込み df=pd.DataFrame(source,columns=["name"]) df.to_csv(f"./{csv}",encoding="utf_8-sig") print(source)
def search_item(max, word): max = int(max) eel.view_log_js("検索開始") item_list = [] for i in range(1, ): url = "https://app.rakuten.co.jp/services/api/IchibaItem/Search/20170706" app_id = "1021298500308407354" search_keyword = word search_params = { "format": "json", "keyword": search_keyword, "applicationId": app_id, "availability": 0, "hits": 30, "page": i, "sort": "standard" } response = requests.get(url, search_params) result = response.json() item_key = ['itemName', 'itemPrice'] for i in range(0, len(result['Items'])): tmp_item = {} item = result['Items'][i]['Item'] for key, value in item.items(): if key in item_key: tmp_item[key] = value item_list.append(tmp_item.copy()) print(item_list) #CSV出力 df = pd.DataFrame(item_list) df.to_csv(csv_file, encoding='utf-8') eel.view_log_js("検索完了") eel.view_log_js("CSVファイル:{}".format(csv_file))
def main(): try: path = os.getcwd() driver_path = ChromeDriverManager(path=path).install() driver = set_driver(driver_path) driver.implicitly_wait(10) wait = WebDriverWait(driver, 5) myscraping = MyScraping() num_list = eel.page()().split(',') Todofuken_name = eel.Todofuken_name()() name_list, top_img_urls = myscraping.get_top_detail( driver, wait, eel.url()(), num_list) eel.view_log_js('\n商品一覧ページからの収集が完了しました') eel.view_log_js('\n商品の送料情報を収集します') myscraping.postage_price(driver, wait, Todofuken_name) driver.quit() # i = 1 # for name, img in zip(name_list, top_img_urls): # if i == 1: # driver = set_driver(driver_path) # driver.implicitly_wait(30) # wait = WebDriverWait(driver, 10) # if i % 100 == 0: # driver.quit() # if os.name == 'nt': # Windows # driver = set_driver(driver_path) # elif os.name == 'posix': # Mac # driver = set_driver(driver_path) # driver.implicitly_wait(30) # wait = WebDriverWait(driver, 10) # myscraping.get_item_details(driver, wait, name, img) # eel.view_log_js(str(i) + "/" + str(len(name_list)) + "件目") # i += 1 eel.view_log_js('商品の送料情報収集が完了しました') eel.view_log_js('\nファイルの作成をします') print('===============================') result_array = { "M": items_condition, "N": items_name_en, "O": items_id, "Q": items_price, "W": top_img_urls, "X": items_Descriptions, "Y": items_condition_en } result_df = pd.DataFrame(result_array) print('result_df1') print(len(result_df)) result_df.dropna(subset=['O', 'Y'], inplace=True) print('result_df2') print(len(result_df)) wb = openpyxl.Workbook() ws = wb.active ws["M1"] = "*ConditionID" ws["N1"] = "Title" ws["O1"] = "CustomLabel" ws["Q1"] = "元値" ws["W1"] = "PicURL" ws["X1"] = "*Description" ws["Y1"] = "ConditionDescription" for column_name, item_list in result_df.iteritems(): i = 2 print('len(item_list)') print(len(item_list)) for item in item_list: ws[column_name + str(i)] = item i += 1 wb.save(eel.file_name()()) eel.view_log_js('ファイルの作成が完了しました') # except WebDriverException: # eel.view_log_js("一度、driversファイルとdrivers.jsonを削除してからやり直してください") except Exception: import traceback t = traceback.format_exc() eel.view_log_js(t) eel.view_log_js("予期せぬエラーが発生しました。")
def get_top_detail(self, driver, wait, main_url, num_list): # ファイルの読み込み delete_before_file = pd.read_csv('./' + eel.delete_word_before()(), header=None, names=['削除するワード_before']) delete_before_list = delete_before_file['削除するワード_before'].tolist() delete_after_file = pd.read_csv('./' + eel.delete_word_after()(), header=None, names=['削除するワード_after']) delete_after_list = delete_after_file['削除するワード_after'].tolist() add_word_file = pd.read_csv('./' + eel.fill_in_word()(), header=None, names=['追加するワード']) add_word_list = add_word_file['追加するワード'].tolist() # 商品名の処理 add_name = eel.add_word()() add_name_len = 80 - len(add_name) add_name_split_dot = add_name.split(',') add_name_split_brank = add_name.replace(",", " ").split(" ") # ページごと for i in range(int(num_list[0]), int(num_list[1]) + 1): eel.view_log_js("\n" + str(i) + "ページ目開始") page_url = main_url + "&b=" + str(i + (i * 100 - 100)) driver.get(page_url) wait.until( EC.visibility_of_element_located( (By.CLASS_NAME, "Products__items"))) # items = driver.find_elements_by_class_name('Products__items') html = driver.page_source.encode('utf-8') # ==========追記分============ soup = BeautifulSoup(html, 'lxml') items = soup.find_all("li", class_="Product") print(len(items)) for item in items: item_id = item.findAll("a")[0].get("data-auction-id") # print('============================================') # print(item_id) item_counter = 0 # アイテムごと for j, item in enumerate(items): item_id = item.findAll("a")[0].get("data-auction-id") page_url = 'https://page.auctions.yahoo.co.jp/jp/auction/' + item_id driver.get(page_url) wait.until( EC.visibility_of_element_located( (By.CLASS_NAME, 'ProductImage__inner'))) html = driver.page_source.encode('utf-8') soup = BeautifulSoup(html, 'lxml') Price_buynow = soup.find_all("div", class_="Price--buynow") if (len(Price_buynow) == 0): eel.view_log_js( str(j + 1) + "/" + str(len(items)) + "商品目 即決価格なし 除外") continue print('============================================') print('item_id') print(item_id) # 商品のコンディションID M列 # 商品の状態情報抽出 item_condition_element = soup.find_all( "tr", class_="ProductTable__row") # テキスト化 item_condition_text = re.sub( r'[\n ]', '', item_condition_element[1].findAll("a")[0].text) # 除外対象か判別。対象の場合飛ばして次の商品へ移動 if item_condition_text == '全体的に状態が悪い': eel.view_log_js( str(j + 1) + "/" + str(len(items)) + "商品目 状態が悪いため除外") continue # コンディションIDを設定 item_condition_id = '3000' # 商品名 N列 # 商品名抽出 item_name_ja_element = soup.find_all( "h1", class_="ProductTitle__text") item_name_ja = item_name_ja_element[0].text item_name_ja_delete = self.delete_word_before( item_name_ja, delete_before_list) item_name_en = self.translate(item_name_ja_delete) item_name_en_delete_list = self.delete_word_after( item_name_en, delete_after_list, add_name_split_brank) item_name_en = ' '.join( OrderedDict.fromkeys(item_name_en_delete_list)) item_name_en = self.add_word(add_name_len, item_name_en, add_word_list) item_name_en = add_name_split_dot[0] + " " + \ item_name_en + " " + add_name_split_dot[1] item_name_en = re.sub(r'([\s]+)', ' ', item_name_en) item_name_en = item_name_en.title() # 商品識別ID O列 # 取得済み # 商品の値段 Q列 # 配送料チェック postage_price_free_element = soup.find_all( "span", class_="Price__postageValue--free") # 配送料無料ではない商品をピックアップする。 if (len(postage_price_free_element) == 0): # 着払いではない商品をピックアップする。 postage_price_text_element = Price_buynow[0].findAll( "span", class_="Price__postageValue")[0].text # if(postage_price_text_element != '着払い'): # 料金はpostage_priceで取得し計算する postage_price_list.append(item_id) price_element = Price_buynow[0].findAll( "dd", class_="Price__value")[0].text item_price = 0 if '税込' in price_element: price_element = re.split('[円税込]', price_element)[3] item_price = re.sub(r'[, ]', '', price_element) else: item_price = price_element.split('円')[0].replace( ',', '').replace('\n', '') # 商品画像(10枚まで) W列 item_images_element = soup.find_all( "div", class_="ProductImage__inner") limit = 10 urls = [] for k, item_image in enumerate(item_images_element): urls.append(item_image.findAll("img")[0].get("src")) if k + 1 == limit: break image_urls = self.image_url_add(urls) # 商品説明 X列 item_Description_element = soup.find_all( "div", class_="ProductExplanation__commentBody") item_Description = self.Description_string( item_Description_element[0].text) # 商品の状態 Y列 item_condition_en = self.judge_condition( item_condition_text.replace('\n', '')) # 各データを格納 items_condition.append(item_condition_id) items_name_ja.append(item_name_ja) items_name_en.append(item_name_en) items_id.append(item_id) items_price.append(item_price) top_img_urls.append(image_urls) items_Descriptions.append(item_Description) items_condition_en.append(item_condition_en) eel.view_log_js(str(j + 1) + "/" + str(len(items)) + "商品目") item_counter += 1 else: eel.view_log_js(str(i) + "ページ目終了") eel.view_log_js(str(item_counter) + "件抽出") # print('items_condition') # print(len(items_condition)) # # print(items_condition) # print('items_name_ja') # print(len(items_name_ja)) # # print(items_name_ja) # print('items_name_en') # print(len(items_name_en)) # # print(items_name_en) # print('items_id') # print(len(items_id)) # # print(items_id) # print('items_price') # print(len(items_price)) # # print(items_price) # print('top_img_urls') # print(len(top_img_urls)) # # print(top_img_urls) # print('items_Descriptions') # print(len(items_Descriptions)) # # print(items_Descriptions) # print('items_condition_en') # print(len(items_condition_en)) # # print(items_condition_en) return items_name_ja, top_img_urls
def main(): log("処理開始") log("キーワード:" + search_keyword + "で検索") eel.view_log_js("キーワード:「{}」で検索を開始します。".format(search_keyword)) #options = Options() #options.add_argument('--headless') driver = webdriver.Chrome("C:\\Users\Kanomata\Desktop\chromedriver.exe" ) #, chrome_options=options driver.get("https://tenshoku.mynavi.jp/") time.sleep(5) #ポップアップを閉じる try: driver.execute_script( 'document.querySelector(".karte-close").click()') time.sleep(5) driver.execute_script( 'document.querySelector(".karte-close").click()') except: pass # キーワード検索 search_form = driver.find_element_by_class_name("topSearch__text") search_form.send_keys(search_keyword) search_btn = driver.find_element_by_class_name("topSearch__button") search_btn.click() # 変数宣言 company_name_null_list = [] sell_point_null_list = [] employee_status_null_list = [] pay_null_list = [] i = 1 count = 1 success = 1 fail = 0 while True: #検索ワードでヒットした募集の会社名、セールスポイント、就業ステータス、給料を抽出 company_name_list = driver.find_elements_by_css_selector( ".cassetteRecruit__heading .cassetteRecruit__name") sell_point_list = driver.find_elements_by_css_selector( ".cassetteRecruit__heading .cassetteRecruit__copy") employee_status_list = driver.find_elements_by_css_selector( ".cassetteRecruit__heading .labelEmploymentStatus") pay_list = driver.find_elements_by_css_selector( ".cassetteRecruit .tableCondition") for company_name, sell_point, employee_status, pay in zip( company_name_list, sell_point_list, employee_status_list, pay_list): try: company_name_null_list.append(company_name.text) sell_point_null_list.append(sell_point.text) employee_status_null_list.append(employee_status.text) first_year_pay = find_table_target_word( pay.find_elements_by_tag_name("th"), pay.find_elements_by_tag_name("td"), "初年度年収") pay_null_list.append(first_year_pay) #log(f"{count}件目成功 : {name.text}") #pay_null_list.append(pay.text) log("抽出成功(" + str(success) + "/" + str(count) + "回目)") print("抽出成功(" + str(success) + "/" + str(count) + "回目)") success = success + 1 except Exception as e: fail = fail + 1 log("抽出失敗(" + str(fail) + "/" + str(count) + "回目)") log(e) print("抽出失敗(" + str(fail) + "/" + str(count) + "回目)") finally: count = count + 1 search_next = driver.find_elements_by_class_name( "iconFont--arrowLeft") #「次のページ」ボタンが一ページに二つあれば次のページへ、なければ終了 if len(search_next) == 2: next_page_link = search_next[0].get_attribute("href") driver.get(next_page_link) print(i, "ページ目終了") i = i + 1 time.sleep(5) else: print("全ページ終了") log("抽出処理終了") eel.view_log_js("検索が完了しました") eel.view_log_js("-------------------------") eel.view_log_js("成功:" + str(success) + "/" + str(count)) eel.view_log_js("失敗:" + str(fail) + "/" + str(count)) eel.view_log_js("CSVファイル:{}".format(CSV_FILE)) break #csv出力 df = pd.DataFrame({ "企業名": company_name_null_list, "セールスポイント": sell_point_null_list, "採用ステータス": employee_status_null_list, "初年度年収": pay_null_list, }) df.to_csv(CSV_FILE, encoding="utf-8-sig")
def view_item_list(self): for item in self.item_order_list: eel.view_log_js("商品コード:{}".format(item[0]))
def translate_to_japanese(text): eel.view_log_js(translator.translate(text, dest='ja').text)
def kimetsu_search(word, csv): # 検索処理呼び出し、結果リストに格納 result = search.kimetsu_search(word, csv) # 検索結果出力処理呼び出し(JavaScript) eel.view_log_js(result)
def translate_to_english(text): eel.view_log_js(translator.translate(text, dest='en').text)