def main(verbose): # BURBERRY base_url = "https://us.burberry.com/womens-new-arrivals-new-in/" urls = [ base_url, base_url + "?start=2&pageSize=120&productsOffset=&cellsOffset=8&cellsLimit=&__lang=en" ] df_burberry = Scraping(verbose).process( urls, lambda x: "-p80" in x, lambda docs: '-'.join(docs).replace('/', ''), "SS2020_Burberry_word_frequency.jpg", 'burberry') # VERSACE tail = '/us/en-us/women/new-arrivals/new-in/' urls = ["https://www.versace.com" + tail] df_versace = Scraping(verbose).process( urls, lambda x: x.startswith(tail), lambda docs: '-'.join([ doc.replace(tail, '').split('/')[0] for doc in docs if not doc.startswith(tail + "?") ]), "SS2020_Versace_word_frequency.jpg", 'versace') print(df_burberry) print(df_versace)
def __init__(self, ha): log.make() me = Scraping() #me.find_url('https://www.naver.com') me.scraping() #self.get_file(self.read_file(self.get_file_list('./')),'download') """ lst = glob.glob(os.path.join('./','data')+'/*')
def main(): providers = [ 'naver', ] """スクレイピングした内容をテーブルに保存する""" # Parallel(n_jobs=1, verbose=0)([ # delayed(Scraping.run)(Scraping(), provider) for provider in providers # ]) Scraping.run(Scraping(), 'naver') """スクレイピング結果から名詞をテーブルに保存する""" Analyze.save_words(Analyze())
def getInfos(): # 調査するURL集合 urls = { "阿佐ヶ谷ロフト": "http://www.loft-prj.co.jp/schedule/lofta", "ロフトプラスワン": "http://www.loft-prj.co.jp/schedule/plusone", } # scraping classにURL'sを渡して__call__関数で抽出処理を実施する return Scraping(urls)()
class Heart: def __init__(self): self.__stocks = None self.__scraping = Scraping() def __scraping_task(self): date_now = datetime.datetime.now() list_quote = [] print(f'Initializing capture at {date_now}') for stock in self.__stocks: print(f'Getting value from {stock.codigo}...') value = self.__scraping.get_stock_value(stock) list_quote.append(Quote(stock.codigo, date_now, value)) Mongo().insert_quotes(list_quote) print('Values inserted in MongoDB database') def __return_stocks_from_environ(self): stocks = os.environ['STOCKS'].split(',') result = [] for stock in stocks: result.append(Stock(stock, None)) return result def play(self): print('Robo starting...') schedule.every(MINUTES_SCHEDULE).minutes.do(self.__scraping_task) print('Capture links from stocks...') self.__stocks = self.__return_stocks_from_environ() self.__scraping.set_urls(self.__stocks) print(f'Schedule started! ({MINUTES_SCHEDULE} minutes)') while True: schedule.run_pending() time.sleep(1)
def mouse_press_events_for_scraping(self, event): point_item = None line_item = None # A graphics_item with 0 z-value and is not a point. lane_edge_item = None # A graphics_item with non-zero z-value for graphics_item in self.items(event.x(), event.y(), 5, 5): if not graphics_item.is_selectable: continue if isinstance(graphics_item, QtGui.QGraphicsEllipseItem) and not point_item: point_item = graphics_item elif graphics_item.zValue() > 0 and not lane_edge_item: lane_edge_item = graphics_item else: line_item = graphics_item if point_item and line_item and lane_edge_item: break if not point_item and not line_item and not lane_edge_item: return self.selections.reset_selected_items() self.selections.reset_selection_region() if not self.scraping: self.scraping = Scraping(self.main_window) #elif event.modifiers() == QtCore.Qt.ControlModifier: # if lane_edge_item: # self.scraping.extend_to_meet(lane_edge_item) # elif line_item: # self.scraping.extend_to_meet(line_item) # return #if point_item: # self.scraping.add_point_item(point_item) #elif line_item: # self.scraping.add_line_item(line_item) if not self.scraping.add(point_item, line_item, lane_edge_item, self.snap_to_line): self.scraping = None
def data(): all_temperatures = Scraping() longyearbyen = all_temperatures.Longyearbyen() yellowknife = all_temperatures.Yellowknife() iqaluit = all_temperatures.Iqaluit() nuuk = all_temperatures.Nuuk() qaanaaq = all_temperatures.Qaanaaq() khatanga = all_temperatures.Khatanga() return render_template('data.html', longyearbyen=longyearbyen, yellowknife=yellowknife, iqaluit=iqaluit, nuuk=nuuk, qaanaaq=qaanaaq, khatanga=khatanga)
def execute(self): about = Scraping().homePage() products = [] products.extend(Scraping().pcGames()) products.extend(Scraping().pcGamesOnSale()) products.extend(Scraping().xboxProducts()) products.extend(Scraping().playstationProducts()) Scraping().exit() data = {"Store": about, "Products": products} Intents(data=data).createIntents() pass
def run(self, demo): if demo: # GET CROSSWORD PUZZLE nyTimesConnector = Connector( "C:\Program Files (x86)/chromedriver.exe") nyTimesConnector.connectToPuzzle() self.cellNumberArray = nyTimesConnector.cellNumberArray self.cellBlockArray = nyTimesConnector.cellBlockArray self.cluesAcross = nyTimesConnector.cluesAcross self.cluesDown = nyTimesConnector.cluesDown self.cellAnswerArray = nyTimesConnector.cellAnswerArray self.setClues() print("===================\nWEB SCRAPING\n===================") webScrapper = Scraping(self.clues, self.cellAnswerArray, self.cellNumberArray) webScrapper.setDomains() print( "===================\nSOLVING THE PUZZLE\n===================") puzzleSolver = newSolver(self.cellBlockArray, self.cellNumberArray, self.cluesDown, self.cluesAcross, webScrapper.domains) else: #with open('data.json', 'r') as fp: # data = json.load(fp) with open('cellBlockArray.json', 'r') as fp: self.cellBlockArray = json.load(fp) with open('cellNumberArray.json', 'r') as fp: self.cellNumberArray = json.load(fp) with open('clueAcross.json', 'r') as fp: self.cluesAcross = json.load(fp) with open('cluesDown.json', 'r') as fp: self.cluesDown = json.load(fp) with open('answers.json', 'r') as fp: self.cellAnswerArray = json.load(fp) print("===================\nWEB SCRAPING\n===================") self.setClues() webScrapper = Scraping(self.clues, self.cellAnswerArray, self.cellNumberArray) webScrapper.setDomains() print( "===================\nSOLVING THE PUZZLE\n===================") puzzleSolver = newSolver(self.cellBlockArray, self.cellNumberArray, self.cluesDown, self.cluesAcross, webScrapper.domains) # SAVE """ with open('cellBlockArray.json', 'w') as fp: json.dump(self.cellBlockArray, fp, indent=4) with open('cellNumberArray.json', 'w') as fp: json.dump(self.cellNumberArray, fp, indent=4) with open('clueAcross.json', 'w') as fp: json.dump(self.cluesAcross, fp, indent=4) with open('cluesDown.json', 'w') as fp: json.dump(self.cluesDown, fp, indent=4) with open('data.json', 'w') as fp: json.dump(webScrapper.domains, fp, indent=4) """ # puzzleSolver = CrosswordSolver(self.cellBlockArray, self.cellNumberArray,self.cluesDown, self.cluesAcross, data)#webScrapper.domains) #puzzleSolver = newSolver(cellBlockArray, cellNumberArray,cluesDown, cluesAcross, webScrapper.domains) print("===================\nSOLUTION\n===================") for i in puzzleSolver.solvedPuzzle: print(i) # DRAW GUI import sys app = QtWidgets.QApplication(sys.argv) MainWindow = QtWidgets.QMainWindow() ui = Ui_MainWindow() ui.setupUi(MainWindow, self.cellNumberArray, self.cellBlockArray, self.cluesAcross, self.cluesDown, self.cellAnswerArray, puzzleSolver.solvedPuzzle) MainWindow.show() sys.exit(app.exec_())
from scraping import Scraping from selenium import webdriver # Scraping startup scraping = Scraping() # Selenium startup driver = webdriver.Chrome( "/home/repente/prog/python/youtube/parsers/SE00/SE#06/FlashscoreScraping/chromedriver" ) # driver.get("https://ya.ru") # # get all matches of Brazilian Championship Serie A 2019 scraping.collect(driver, 'brazil', 'serie-a', 2017) # # get all LaLiga matches from 2012 until 2019 scraping.collect(driver, 'spain', 'laliga', 2018, 2012) driver.quit()
from selenium.common.exceptions import NoSuchElementException from time import sleep from scraping import Scraping if __name__ == '__main__': try: scraping = Scraping() scraping.scrap_and_insert() sleep(1) print('The And!!') except Exception as exception: print(exception) finally: pass
from datetime import datetime, timedelta from time import sleep import schedule from ifttt import ifttt_webhook from scraping import Scraping scraper = Scraping(headless=True) last_status = { "tutor_book": None, "reserve_day": None, "lesson_time": None, "exec_day": None } def job(): print("do job at", datetime.now().strftime("%Y/%m/%d %H:%M:%S")) fail_frag = False lessons = None today = datetime.today() lesson_time: datetime = last_status["lesson_time"] if last_status["lesson_time"] is not None and \ lesson_time - timedelta(hours=1) < today < lesson_time + timedelta(hours=1): print("lesson time") elif last_status["tutor_book"] is not None and \ last_status["tutor_book"] == True and \ last_status["reserve_day"] == "today" and \ lesson_time > today: print("already booked") elif last_status["tutor_book"] is not None and \ last_status["tutor_book"] == True and \
def task(v): getLogger().info("%s start", v) Scraping() getLogger().info("%s end", v)
def tasks(): print(f'This job is run every {INTERVAL_MINUTES} minutes.') Scraping()
def __init__(self): self.__stocks = None self.__scraping = Scraping()
from scraping import Scraping if __name__ == '__main__': url = "https://detail.chiebukuro.yahoo.co.jp/qa/question_detail/q13241092928" save_paths = { 'question': { 'png': './question.png', 'txt': './question.txt', }, 'answer': { 'png': './answer.png', 'txt': './answer.txt', }, } Scraping.save_datas(url, save_paths)
opener = urllib2.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] movies = Movie.getMoviesWithoutDirector() for id_movie in movies: sid = str(movies[id_movie]) directorsUrl = "http://www.imdb.com/title/tt{0}/fullcredits?ref_=tt_ov_dr#directors".format(sid.zfill(7)) print directorsUrl try: request = opener.open(directorsUrl) except urllib2.HTTPError: print "can't to open directory url " + directorsUrl except urllib2.URLError: print "error directory url " + directorsUrl else: directors = Scraping.getDirectors(request.read()) for director in directors: id_director = Director.find(director.strip()) if(id_director == False): id_director = Director.save(director.strip()) if(id_director != False): try: Movie.addDirector(id_movie, id_director) except MySQLdb.IntegrityError: print "Duplicate entry " + str(id_movie) + " " + str(id_director) print "asso " + str(id_movie) + " " + str(id_director) time.sleep(1)
class Graphics_view(QtGui.QGraphicsView): def __init__(self, graphics_scene, main_window): super(Graphics_view, self).__init__(graphics_scene, main_window) self.scale(0.925, 0.925) self.status_bar = main_window.statusBar() self.main_window = main_window self.setDragMode(QtGui.QGraphicsView.ScrollHandDrag) #self.setCursor(QtGui.QCursor(QtCore.Qt.ArrowCursor)) self.setRenderHint(QtGui.QPainter.Antialiasing) self.setRenderHint(QtGui.QPainter.TextAntialiasing) # Flip the view about the X-axis so that the view is in Cartesian co-ordinate system # where the Y-axis points upwards. matrix = QtGui.QTransform() matrix.rotate(180, QtCore.Qt.XAxis) self.setTransform(matrix, True) # When user presses 'C' or 'A' to rotate the view, we maintain the orientation field # so that we can revert to the original orientation when the user presses 'O' (letter O). self.orientation = 0 self.setMouseTracking(True) self.selections = Selections(self) self.scraping = None self.edit_form = None self.snap_to_line = True def wheelEvent(self, event): if event.modifiers() == QtCore.Qt.ControlModifier: self.rotate(event.delta() / 240.0) else: self.zoom(1.41**(-event.delta() / 240.0)) def rotate(self, delta_angle): self.orientation = self.orientation + delta_angle super(Graphics_view, self).rotate(delta_angle) def zoom(self, factor): self.scale(factor, factor) def resizeEvent(self, event): return rect = self.scene().sceneRect() size = event.size() x_factor = size.width() / rect.width() y_factor = size.height() / rect.height() factor = x_factor if x_factor > y_factor else y_factor self.scale(factor, factor) def keyPressEvent(self, event): if event.key() == QtCore.Qt.Key_Plus: self.scale(1.1, 1.1) # too lazy to press the <Shift> to get '+'. So let Key_Equal do the same thing. elif event.key() == QtCore.Qt.Key_Equal: self.scale(1.1, 1.1) elif event.key() == QtCore.Qt.Key_Minus: self.scale(0.91, 0.91) # Make it difficult to rotate the view by assigning capital 'C', 'A", and 'O' (letter O) elif event.key() == QtCore.Qt.Key_C and event.modifiers( ) == QtCore.Qt.ShiftModifier: # Rotate the view clockwise by 1 degree self.rotate(-1) self.orientation -= 1 elif event.key() == QtCore.Qt.Key_A and event.modifiers( ) == QtCore.Qt.ShiftModifier: # Rotate the view anti-clockwise by 1 degree self.rotate(1) self.orientation += 1 elif event.key() == QtCore.Qt.Key_O and event.modifiers( ) == QtCore.Qt.ShiftModifier: # Revert back to the original orientation self.rotate(-self.orientation) self.orientation = 0 def mouseMoveEvent(self, event): if self.selections.rubber_band.isVisible(): self.selections.adjust_rubber_band(event.pos()) elif self.scraping: point = self.mapToScene(event.pos()) self.scraping.adjust(point.x(), point.y()) if event.buttons() == QtCore.Qt.NoButton: point = self.mapToScene(event.pos()) message = "(%.2f, %.2f)" % (point.x(), point.y()) for graphics_item in self.items(event.x(), event.y(), 5, 5): if not graphics_item.is_selectable: continue message = message + " | %s" % graphics_item.road_item self.status_bar.showMessage(message) event.ignore() super(Graphics_view, self).mouseMoveEvent(event) def mousePressEvent(self, event): if event.button() & QtCore.Qt.RightButton: if self.edit_form: show_error_message( "You are currently editing one or more road-items. " "Click on the <Ok> or <Cancel> button to close the edit-form before " "selecting another item.") super(Graphics_view, self).mousePressEvent(event) return if self.scraping: self.mouse_press_events_for_scraping(event) super(Graphics_view, self).mousePressEvent(event) return if event.modifiers() == (QtCore.Qt.ControlModifier | QtCore.Qt.AltModifier): self.selections.show_rubber_band(event.pos()) elif event.modifiers() != QtCore.Qt.ControlModifier: self.mouse_press_events_for_scraping(event) else: if event.modifiers() != QtCore.Qt.ShiftModifier: self.selections.reset_selected_items() for graphics_item in self.items(event.x(), event.y(), 5, 5): if not graphics_item.is_selectable: continue self.selections.add_item(graphics_item) road_info = '' for selected_item in self.selections.selected_items: road_info = road_info + selected_item.graphics_item.road_item.road_info( ) self.main_window.show_selected_road_items_info(road_info) super(Graphics_view, self).mousePressEvent(event) def mouseReleaseEvent(self, event): if self.selections.rubber_band.isVisible(): self.selections.set_rubber_band(self) super(Graphics_view, self).mouseReleaseEvent(event) def mouseDoubleClickEvent(self, event): if self.scraping: self.scraping.finish() self.connect(self.scraping, QtCore.SIGNAL("finish_scraping"), self.finish_scraping) super(Graphics_view, self).mouseDoubleClickEvent(event) def finish_scraping(self): self.scraping = None def mouse_press_events_for_scraping(self, event): point_item = None line_item = None # A graphics_item with 0 z-value and is not a point. lane_edge_item = None # A graphics_item with non-zero z-value for graphics_item in self.items(event.x(), event.y(), 5, 5): if not graphics_item.is_selectable: continue if isinstance(graphics_item, QtGui.QGraphicsEllipseItem) and not point_item: point_item = graphics_item elif graphics_item.zValue() > 0 and not lane_edge_item: lane_edge_item = graphics_item else: line_item = graphics_item if point_item and line_item and lane_edge_item: break if not point_item and not line_item and not lane_edge_item: return self.selections.reset_selected_items() self.selections.reset_selection_region() if not self.scraping: self.scraping = Scraping(self.main_window) #elif event.modifiers() == QtCore.Qt.ControlModifier: # if lane_edge_item: # self.scraping.extend_to_meet(lane_edge_item) # elif line_item: # self.scraping.extend_to_meet(line_item) # return #if point_item: # self.scraping.add_point_item(point_item) #elif line_item: # self.scraping.add_line_item(line_item) if not self.scraping.add(point_item, line_item, lane_edge_item, self.snap_to_line): self.scraping = None def edit_selected_items(self): if len(self.selections.selected_items) == 0: show_error_message("Select a road-item and try again.") return if len(self.selections.selected_items) == 1: road_item = self.selections.selected_items[ 0].graphics_item.road_item self.edit_form = Edit_form(road_item, self.main_window) else: #road_items = [item.graphics_item.road_item for item in self.selections.selected_items] #self.edit_form = Multiple_edit_form(road_items, self.main_window) return self.connect(self.edit_form, QtCore.SIGNAL("finish_editing"), self.finish_editing) self.edit_form.setVisible(True) def finish_editing(self): self.edit_form = None self.selections.reset_selected_items() def delete_selected_item(self): if len(self.selections.selected_items) != 1: show_error_message("You must select a lane-edge to delete.") return selected_item = self.selections.selected_items[0] graphics_item = selected_item.graphics_item if not isinstance(graphics_item.road_item, Lane_edge): show_error_message("You must select a lane-edge to delete.") return self.selections.reset_selected_items() self.scene().removeItem(graphics_item) lane_edge = graphics_item.road_item lane_edge.delete()
from scraping import Scraping scrap = Scraping() # Chamar a função definindo a UF scrap.main('SP', scrap) # Chamar a função para todas as UFs scrap.main('', scrap)
now_time = datetime.now() is_run = (9 <= now_time.hour and now_time.hour <= 24) # 9時~24時の間運用 if is_run: cfg = TakaragaikeConfig() cb = ControlBrowser(cfg, True, False) db_cfg = DataBaseConfig() db = DataBase(db_cfg) tw_cfg = TweetConfig() tw_api = Tweet(tw_cfg) cb.setURL(cfg.LOGIN_URL) cb.login() cb.setCarType(cfg.AT) sc = Scraping(cb.getSource(), "html.parser", cfg) cb.close() sc.makeReservationList() resv_list = sc.getReservationList() db.update(resv_list) free_list = db.getFreeList() filled_list = db.getFilledList() file = open('log1.txt', 'w') exist_free_list = len(free_list) > 0 exist_filled_list = len(filled_list) > 0