Exemplo n.º 1
0
def main(verbose):
    # BURBERRY
    base_url = "https://us.burberry.com/womens-new-arrivals-new-in/"
    urls = [
        base_url, base_url +
        "?start=2&pageSize=120&productsOffset=&cellsOffset=8&cellsLimit=&__lang=en"
    ]

    df_burberry = Scraping(verbose).process(
        urls, lambda x: "-p80" in x,
        lambda docs: '-'.join(docs).replace('/', ''),
        "SS2020_Burberry_word_frequency.jpg", 'burberry')

    # VERSACE
    tail = '/us/en-us/women/new-arrivals/new-in/'
    urls = ["https://www.versace.com" + tail]

    df_versace = Scraping(verbose).process(
        urls, lambda x: x.startswith(tail), lambda docs: '-'.join([
            doc.replace(tail, '').split('/')[0] for doc in docs
            if not doc.startswith(tail + "?")
        ]), "SS2020_Versace_word_frequency.jpg", 'versace')

    print(df_burberry)
    print(df_versace)
Exemplo n.º 2
0
 def __init__(self, ha):
     log.make()
     me = Scraping()
     #me.find_url('https://www.naver.com')
     me.scraping()
     #self.get_file(self.read_file(self.get_file_list('./')),'download')
     """        lst = glob.glob(os.path.join('./','data')+'/*')
Exemplo n.º 3
0
def main():
    providers = [
        'naver',
    ]

    """スクレイピングした内容をテーブルに保存する"""
    # Parallel(n_jobs=1, verbose=0)([
    #     delayed(Scraping.run)(Scraping(), provider) for provider in providers
    # ])

    Scraping.run(Scraping(), 'naver')

    """スクレイピング結果から名詞をテーブルに保存する"""
    Analyze.save_words(Analyze())
Exemplo n.º 4
0
def getInfos():
    # 調査するURL集合
    urls = {
        "阿佐ヶ谷ロフト": "http://www.loft-prj.co.jp/schedule/lofta",
        "ロフトプラスワン": "http://www.loft-prj.co.jp/schedule/plusone",
    }
    # scraping classにURL'sを渡して__call__関数で抽出処理を実施する
    return Scraping(urls)()
Exemplo n.º 5
0
class Heart:
    def __init__(self):
        self.__stocks = None
        self.__scraping = Scraping()

    def __scraping_task(self):
        date_now = datetime.datetime.now()
        list_quote = []

        print(f'Initializing capture at {date_now}')

        for stock in self.__stocks:
            print(f'Getting value from {stock.codigo}...')
            value = self.__scraping.get_stock_value(stock)
            list_quote.append(Quote(stock.codigo, date_now, value))

        Mongo().insert_quotes(list_quote)
        print('Values inserted in MongoDB database')

    def __return_stocks_from_environ(self):
        stocks = os.environ['STOCKS'].split(',')

        result = []

        for stock in stocks:
            result.append(Stock(stock, None))

        return result

    def play(self):
        print('Robo starting...')
        schedule.every(MINUTES_SCHEDULE).minutes.do(self.__scraping_task)

        print('Capture links from stocks...')
        self.__stocks = self.__return_stocks_from_environ()
        self.__scraping.set_urls(self.__stocks)

        print(f'Schedule started! ({MINUTES_SCHEDULE} minutes)')
        while True:
            schedule.run_pending()
            time.sleep(1)
Exemplo n.º 6
0
    def mouse_press_events_for_scraping(self, event):
        point_item = None
        line_item = None  # A graphics_item with 0 z-value and is not a point.
        lane_edge_item = None  # A graphics_item with non-zero z-value
        for graphics_item in self.items(event.x(), event.y(), 5, 5):
            if not graphics_item.is_selectable:
                continue
            if isinstance(graphics_item,
                          QtGui.QGraphicsEllipseItem) and not point_item:
                point_item = graphics_item
            elif graphics_item.zValue() > 0 and not lane_edge_item:
                lane_edge_item = graphics_item
            else:
                line_item = graphics_item
            if point_item and line_item and lane_edge_item:
                break

        if not point_item and not line_item and not lane_edge_item:
            return

        self.selections.reset_selected_items()
        self.selections.reset_selection_region()

        if not self.scraping:
            self.scraping = Scraping(self.main_window)
        #elif event.modifiers() == QtCore.Qt.ControlModifier:
        #    if lane_edge_item:
        #        self.scraping.extend_to_meet(lane_edge_item)
        #    elif line_item:
        #        self.scraping.extend_to_meet(line_item)
        #    return

        #if point_item:
        #    self.scraping.add_point_item(point_item)
        #elif line_item:
        #    self.scraping.add_line_item(line_item)
        if not self.scraping.add(point_item, line_item, lane_edge_item,
                                 self.snap_to_line):
            self.scraping = None
Exemplo n.º 7
0
def data():
    all_temperatures = Scraping()
    longyearbyen = all_temperatures.Longyearbyen()
    yellowknife = all_temperatures.Yellowknife()
    iqaluit = all_temperatures.Iqaluit()
    nuuk = all_temperatures.Nuuk()
    qaanaaq = all_temperatures.Qaanaaq()
    khatanga = all_temperatures.Khatanga()
    return render_template('data.html',
                           longyearbyen=longyearbyen,
                           yellowknife=yellowknife,
                           iqaluit=iqaluit,
                           nuuk=nuuk,
                           qaanaaq=qaanaaq,
                           khatanga=khatanga)
Exemplo n.º 8
0
    def execute(self):
        about = Scraping().homePage()

        products = []
        products.extend(Scraping().pcGames())
        products.extend(Scraping().pcGamesOnSale())
        products.extend(Scraping().xboxProducts())
        products.extend(Scraping().playstationProducts())

        Scraping().exit()

        data = {"Store": about, "Products": products}

        Intents(data=data).createIntents()
        pass
Exemplo n.º 9
0
    def run(self, demo):
        if demo:
            # GET CROSSWORD PUZZLE

            nyTimesConnector = Connector(
                "C:\Program Files (x86)/chromedriver.exe")
            nyTimesConnector.connectToPuzzle()
            self.cellNumberArray = nyTimesConnector.cellNumberArray
            self.cellBlockArray = nyTimesConnector.cellBlockArray
            self.cluesAcross = nyTimesConnector.cluesAcross
            self.cluesDown = nyTimesConnector.cluesDown
            self.cellAnswerArray = nyTimesConnector.cellAnswerArray
            self.setClues()
            print("===================\nWEB SCRAPING\n===================")
            webScrapper = Scraping(self.clues, self.cellAnswerArray,
                                   self.cellNumberArray)
            webScrapper.setDomains()
            print(
                "===================\nSOLVING THE PUZZLE\n===================")
            puzzleSolver = newSolver(self.cellBlockArray, self.cellNumberArray,
                                     self.cluesDown, self.cluesAcross,
                                     webScrapper.domains)
        else:

            #with open('data.json', 'r') as fp:
            #    data = json.load(fp)
            with open('cellBlockArray.json', 'r') as fp:
                self.cellBlockArray = json.load(fp)
            with open('cellNumberArray.json', 'r') as fp:
                self.cellNumberArray = json.load(fp)
            with open('clueAcross.json', 'r') as fp:
                self.cluesAcross = json.load(fp)
            with open('cluesDown.json', 'r') as fp:
                self.cluesDown = json.load(fp)
            with open('answers.json', 'r') as fp:
                self.cellAnswerArray = json.load(fp)
            print("===================\nWEB SCRAPING\n===================")
            self.setClues()
            webScrapper = Scraping(self.clues, self.cellAnswerArray,
                                   self.cellNumberArray)
            webScrapper.setDomains()
            print(
                "===================\nSOLVING THE PUZZLE\n===================")
            puzzleSolver = newSolver(self.cellBlockArray, self.cellNumberArray,
                                     self.cluesDown, self.cluesAcross,
                                     webScrapper.domains)

        # SAVE
        """
        with open('cellBlockArray.json', 'w') as fp:
            json.dump(self.cellBlockArray, fp,  indent=4)
        with open('cellNumberArray.json', 'w') as fp:
            json.dump(self.cellNumberArray, fp,  indent=4)
        with open('clueAcross.json', 'w') as fp:
            json.dump(self.cluesAcross, fp,  indent=4)
        with open('cluesDown.json', 'w') as fp:
            json.dump(self.cluesDown, fp,  indent=4)
        with open('data.json', 'w') as fp:
            json.dump(webScrapper.domains, fp,  indent=4)
        """
        # puzzleSolver = CrosswordSolver(self.cellBlockArray, self.cellNumberArray,self.cluesDown, self.cluesAcross, data)#webScrapper.domains)
        #puzzleSolver = newSolver(cellBlockArray, cellNumberArray,cluesDown, cluesAcross, webScrapper.domains)

        print("===================\nSOLUTION\n===================")
        for i in puzzleSolver.solvedPuzzle:
            print(i)
        # DRAW GUI
        import sys
        app = QtWidgets.QApplication(sys.argv)
        MainWindow = QtWidgets.QMainWindow()
        ui = Ui_MainWindow()
        ui.setupUi(MainWindow, self.cellNumberArray, self.cellBlockArray,
                   self.cluesAcross, self.cluesDown, self.cellAnswerArray,
                   puzzleSolver.solvedPuzzle)
        MainWindow.show()
        sys.exit(app.exec_())
Exemplo n.º 10
0
from scraping import Scraping
from selenium import webdriver

# Scraping startup
scraping = Scraping()

# Selenium startup
driver = webdriver.Chrome(
    "/home/repente/prog/python/youtube/parsers/SE00/SE#06/FlashscoreScraping/chromedriver"
)

# driver.get("https://ya.ru")
# # get all matches of Brazilian Championship Serie A 2019

scraping.collect(driver, 'brazil', 'serie-a', 2017)

# # get all LaLiga matches from 2012 until 2019
scraping.collect(driver, 'spain', 'laliga', 2018, 2012)

driver.quit()
Exemplo n.º 11
0
from selenium.common.exceptions import NoSuchElementException
from time import sleep
from scraping import Scraping

if __name__ == '__main__':
    try:
        scraping = Scraping()
        scraping.scrap_and_insert()
        sleep(1)
        print('The And!!')

    except Exception as exception:
        print(exception)

    finally:
        pass



Exemplo n.º 12
0
from datetime import datetime, timedelta
from time import sleep
import schedule
from ifttt import ifttt_webhook
from scraping import Scraping

scraper = Scraping(headless=True)
last_status = {
    "tutor_book": None,
    "reserve_day": None,
    "lesson_time": None,
    "exec_day": None
}


def job():
    print("do job at", datetime.now().strftime("%Y/%m/%d %H:%M:%S"))
    fail_frag = False
    lessons = None
    today = datetime.today()
    lesson_time: datetime = last_status["lesson_time"]
    if last_status["lesson_time"] is not None and \
         lesson_time - timedelta(hours=1) < today < lesson_time + timedelta(hours=1):
        print("lesson time")
    elif last_status["tutor_book"] is not None and \
        last_status["tutor_book"] == True and \
        last_status["reserve_day"] == "today" and \
        lesson_time > today:
        print("already booked")
    elif last_status["tutor_book"] is not None and \
        last_status["tutor_book"] == True and \
Exemplo n.º 13
0
def task(v):
    getLogger().info("%s start", v)
    Scraping()
    getLogger().info("%s end", v)
Exemplo n.º 14
0
def tasks():
    print(f'This job is run every {INTERVAL_MINUTES} minutes.')
    Scraping()
Exemplo n.º 15
0
 def __init__(self):
     self.__stocks = None
     self.__scraping = Scraping()
Exemplo n.º 16
0
from scraping import Scraping


if __name__ == '__main__':
    url = "https://detail.chiebukuro.yahoo.co.jp/qa/question_detail/q13241092928"

    save_paths = {
        'question': {
            'png': './question.png',
            'txt': './question.txt',
        },
        'answer': {
            'png': './answer.png',
            'txt': './answer.txt',
        },
    }

    Scraping.save_datas(url, save_paths)
Exemplo n.º 17
0
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]

movies = Movie.getMoviesWithoutDirector()
for id_movie in movies:
    sid = str(movies[id_movie])
    directorsUrl = "http://www.imdb.com/title/tt{0}/fullcredits?ref_=tt_ov_dr#directors".format(sid.zfill(7))
    print directorsUrl

    try:
        request = opener.open(directorsUrl)
    except urllib2.HTTPError:
        print "can't to open directory url " + directorsUrl
    except urllib2.URLError:
        print "error directory url " + directorsUrl
    else:
        directors = Scraping.getDirectors(request.read())
        for director in directors:
            id_director = Director.find(director.strip())
            if(id_director == False):
                id_director = Director.save(director.strip())

            if(id_director != False):
                try:
                    Movie.addDirector(id_movie, id_director)
                except MySQLdb.IntegrityError:
                    print "Duplicate entry " + str(id_movie) + " " + str(id_director)
                print "asso " + str(id_movie) + " " + str(id_director)
    time.sleep(1)
Exemplo n.º 18
0
class Graphics_view(QtGui.QGraphicsView):
    def __init__(self, graphics_scene, main_window):
        super(Graphics_view, self).__init__(graphics_scene, main_window)
        self.scale(0.925, 0.925)
        self.status_bar = main_window.statusBar()
        self.main_window = main_window

        self.setDragMode(QtGui.QGraphicsView.ScrollHandDrag)
        #self.setCursor(QtGui.QCursor(QtCore.Qt.ArrowCursor))
        self.setRenderHint(QtGui.QPainter.Antialiasing)
        self.setRenderHint(QtGui.QPainter.TextAntialiasing)

        # Flip the view about the X-axis so that the view is in Cartesian co-ordinate system
        # where the Y-axis points upwards.
        matrix = QtGui.QTransform()
        matrix.rotate(180, QtCore.Qt.XAxis)
        self.setTransform(matrix, True)

        # When user presses 'C' or 'A' to rotate the view, we maintain the orientation field
        # so that we can revert to the original orientation when the user presses 'O' (letter O).
        self.orientation = 0

        self.setMouseTracking(True)
        self.selections = Selections(self)
        self.scraping = None
        self.edit_form = None
        self.snap_to_line = True

    def wheelEvent(self, event):
        if event.modifiers() == QtCore.Qt.ControlModifier:
            self.rotate(event.delta() / 240.0)
        else:
            self.zoom(1.41**(-event.delta() / 240.0))

    def rotate(self, delta_angle):
        self.orientation = self.orientation + delta_angle
        super(Graphics_view, self).rotate(delta_angle)

    def zoom(self, factor):
        self.scale(factor, factor)

    def resizeEvent(self, event):
        return

        rect = self.scene().sceneRect()
        size = event.size()
        x_factor = size.width() / rect.width()
        y_factor = size.height() / rect.height()
        factor = x_factor if x_factor > y_factor else y_factor
        self.scale(factor, factor)

    def keyPressEvent(self, event):
        if event.key() == QtCore.Qt.Key_Plus:
            self.scale(1.1, 1.1)
        # too lazy to press the <Shift> to get '+'.  So let Key_Equal do the same thing.
        elif event.key() == QtCore.Qt.Key_Equal:
            self.scale(1.1, 1.1)
        elif event.key() == QtCore.Qt.Key_Minus:
            self.scale(0.91, 0.91)
        # Make it difficult to rotate the view by assigning capital 'C', 'A", and 'O' (letter O)
        elif event.key() == QtCore.Qt.Key_C and event.modifiers(
        ) == QtCore.Qt.ShiftModifier:
            # Rotate the view clockwise by 1 degree
            self.rotate(-1)
            self.orientation -= 1
        elif event.key() == QtCore.Qt.Key_A and event.modifiers(
        ) == QtCore.Qt.ShiftModifier:
            # Rotate the view anti-clockwise by 1 degree
            self.rotate(1)
            self.orientation += 1
        elif event.key() == QtCore.Qt.Key_O and event.modifiers(
        ) == QtCore.Qt.ShiftModifier:
            # Revert back to the original orientation
            self.rotate(-self.orientation)
            self.orientation = 0

    def mouseMoveEvent(self, event):
        if self.selections.rubber_band.isVisible():
            self.selections.adjust_rubber_band(event.pos())
        elif self.scraping:
            point = self.mapToScene(event.pos())
            self.scraping.adjust(point.x(), point.y())

        if event.buttons() == QtCore.Qt.NoButton:
            point = self.mapToScene(event.pos())
            message = "(%.2f, %.2f)" % (point.x(), point.y())
            for graphics_item in self.items(event.x(), event.y(), 5, 5):
                if not graphics_item.is_selectable:
                    continue
                message = message + " | %s" % graphics_item.road_item
            self.status_bar.showMessage(message)
        event.ignore()
        super(Graphics_view, self).mouseMoveEvent(event)

    def mousePressEvent(self, event):
        if event.button() & QtCore.Qt.RightButton:
            if self.edit_form:
                show_error_message(
                    "You are currently editing one or more road-items.  "
                    "Click on the <Ok> or <Cancel> button to close the edit-form before "
                    "selecting another item.")
                super(Graphics_view, self).mousePressEvent(event)
                return

            if self.scraping:
                self.mouse_press_events_for_scraping(event)
                super(Graphics_view, self).mousePressEvent(event)
                return

            if event.modifiers() == (QtCore.Qt.ControlModifier
                                     | QtCore.Qt.AltModifier):
                self.selections.show_rubber_band(event.pos())
            elif event.modifiers() != QtCore.Qt.ControlModifier:
                self.mouse_press_events_for_scraping(event)
            else:
                if event.modifiers() != QtCore.Qt.ShiftModifier:
                    self.selections.reset_selected_items()
                for graphics_item in self.items(event.x(), event.y(), 5, 5):
                    if not graphics_item.is_selectable:
                        continue
                    self.selections.add_item(graphics_item)

                road_info = ''
                for selected_item in self.selections.selected_items:
                    road_info = road_info + selected_item.graphics_item.road_item.road_info(
                    )
                self.main_window.show_selected_road_items_info(road_info)
        super(Graphics_view, self).mousePressEvent(event)

    def mouseReleaseEvent(self, event):
        if self.selections.rubber_band.isVisible():
            self.selections.set_rubber_band(self)
        super(Graphics_view, self).mouseReleaseEvent(event)

    def mouseDoubleClickEvent(self, event):
        if self.scraping:
            self.scraping.finish()
            self.connect(self.scraping, QtCore.SIGNAL("finish_scraping"),
                         self.finish_scraping)
        super(Graphics_view, self).mouseDoubleClickEvent(event)

    def finish_scraping(self):
        self.scraping = None

    def mouse_press_events_for_scraping(self, event):
        point_item = None
        line_item = None  # A graphics_item with 0 z-value and is not a point.
        lane_edge_item = None  # A graphics_item with non-zero z-value
        for graphics_item in self.items(event.x(), event.y(), 5, 5):
            if not graphics_item.is_selectable:
                continue
            if isinstance(graphics_item,
                          QtGui.QGraphicsEllipseItem) and not point_item:
                point_item = graphics_item
            elif graphics_item.zValue() > 0 and not lane_edge_item:
                lane_edge_item = graphics_item
            else:
                line_item = graphics_item
            if point_item and line_item and lane_edge_item:
                break

        if not point_item and not line_item and not lane_edge_item:
            return

        self.selections.reset_selected_items()
        self.selections.reset_selection_region()

        if not self.scraping:
            self.scraping = Scraping(self.main_window)
        #elif event.modifiers() == QtCore.Qt.ControlModifier:
        #    if lane_edge_item:
        #        self.scraping.extend_to_meet(lane_edge_item)
        #    elif line_item:
        #        self.scraping.extend_to_meet(line_item)
        #    return

        #if point_item:
        #    self.scraping.add_point_item(point_item)
        #elif line_item:
        #    self.scraping.add_line_item(line_item)
        if not self.scraping.add(point_item, line_item, lane_edge_item,
                                 self.snap_to_line):
            self.scraping = None

    def edit_selected_items(self):
        if len(self.selections.selected_items) == 0:
            show_error_message("Select a road-item and try again.")
            return
        if len(self.selections.selected_items) == 1:
            road_item = self.selections.selected_items[
                0].graphics_item.road_item
            self.edit_form = Edit_form(road_item, self.main_window)
        else:
            #road_items = [item.graphics_item.road_item for item in self.selections.selected_items]
            #self.edit_form = Multiple_edit_form(road_items, self.main_window)
            return
        self.connect(self.edit_form, QtCore.SIGNAL("finish_editing"),
                     self.finish_editing)
        self.edit_form.setVisible(True)

    def finish_editing(self):
        self.edit_form = None
        self.selections.reset_selected_items()

    def delete_selected_item(self):
        if len(self.selections.selected_items) != 1:
            show_error_message("You must select a lane-edge to delete.")
            return
        selected_item = self.selections.selected_items[0]
        graphics_item = selected_item.graphics_item
        if not isinstance(graphics_item.road_item, Lane_edge):
            show_error_message("You must select a lane-edge to delete.")
            return
        self.selections.reset_selected_items()
        self.scene().removeItem(graphics_item)
        lane_edge = graphics_item.road_item
        lane_edge.delete()
Exemplo n.º 19
0
from scraping import Scraping

scrap = Scraping()

# Chamar a função definindo a UF
scrap.main('SP', scrap)

# Chamar a função para todas as UFs
scrap.main('', scrap)
Exemplo n.º 20
0
    now_time = datetime.now()
    is_run = (9 <= now_time.hour and now_time.hour <= 24)  # 9時~24時の間運用

    if is_run:
        cfg = TakaragaikeConfig()
        cb = ControlBrowser(cfg, True, False)
        db_cfg = DataBaseConfig()
        db = DataBase(db_cfg)
        tw_cfg = TweetConfig()
        tw_api = Tweet(tw_cfg)

        cb.setURL(cfg.LOGIN_URL)
        cb.login()
        cb.setCarType(cfg.AT)

        sc = Scraping(cb.getSource(), "html.parser", cfg)
        cb.close()

        sc.makeReservationList()

        resv_list = sc.getReservationList()

        db.update(resv_list)

        free_list = db.getFreeList()
        filled_list = db.getFilledList()

        file = open('log1.txt', 'w')

        exist_free_list = len(free_list) > 0
        exist_filled_list = len(filled_list) > 0