Exemplo n.º 1
0
    def __init__(self, type):
        self.scraper = WebScraper()
        self.logger = Logger()
        self.db = DbManager.get_instance()

        if type == 'fill_database':
            self.__init_fill_db()
Exemplo n.º 2
0
 def log_into_spotify(self):
     self.get_login()
     driver = WebScraper(self.url).setWebdriver(False)
     cookies_pop = WebDriverWait(driver, 2).until(
         ec.presence_of_element_located(
             (By.XPATH, '//*[@id="onetrust-accept-btn-handler"]')))
     cookies_pop.click()
     driver.implicitly_wait(3)
     loggin_sign = driver.find_element_by_xpath(
         '//*[@id="main"]/div/div[2]/div[1]/header/div[5]/button[2]')
     loggin_sign.click()
     driver.implicitly_wait(3)
     username_field = driver.find_element_by_xpath(
         '//*[@id="login-username"]')
     username_field.send_keys(self.username)
     time.sleep(2)
     password_field = driver.find_element_by_xpath(
         '//*[@id="login-password"]')
     password_field.send_keys(self.password)
     time.sleep(2)
     button_login = driver.find_element_by_xpath('//*[@id="login-button"]')
     button_login.click()
     time.sleep(3)
     pickle.dump(driver.get_cookies(), open("cookies.pkl", "wb"))
     return driver
Exemplo n.º 3
0
 def __init__(self, tile_name, column_name, pairs):
     self.db = DbManager.get_instance()
     self.scraper = WebScraper()
     self.cursor = None
     self.logger = Logger()
     self.column_name = column_name
     self.tile_name = tile_name
     self.pairs = pairs
Exemplo n.º 4
0
 def runMusic(self, query):
     if path.isfile('cookies.pkl') is True:
         cookies = pickle.load(open("cookies.pkl", "rb"))
         driver = WebScraper(self.url).setWebdriver(False)
         for cookie in cookies:
             driver.add_cookie(cookie)
             driver.refresh()
         play = self.search_playlist(driver, query)
     else:
         driver = self.log_into_spotify()
         play = self.search_playlist(driver, query)
     return play.click()
Exemplo n.º 5
0
 def runMusic(self, query):
     driver = WebScraper(self.url).setWebdriver(True)
     search_box = driver.find_element_by_xpath(
         '//*[@id="content"]/div/div/div[2]/div/div[1]/span/span/form/input'
     )
     search_box.send_keys(query)
     time.sleep(2)
     driver.find_element_by_xpath(
         '//*[@id="content"]/div/div/div[2]/div/div[1]/span/span/form/button'
     ).click()
     time.sleep(2)
     play_button = driver.find_element_by_xpath(
         '//*[@id="content"]/div/div/div[3]/div/div/div/ul/li[1]/div/div/div/div[2]/div[1]/div/div/div[1]/a'
     )
     music_play = play_button.click()
     return music_play
Exemplo n.º 6
0
def run(start_page=1):
    all_blog_urls = []

    scraper = WebScraper()

    pages_number = scraper.get_pages_number()

    for page_no in range(start_page, pages_number + 1):
        blog_urls = scraper.get_blogs_from_page(page_no)
        all_blog_urls.extend(blog_urls)
        print("{}/{}".format(page_no, pages_number + 1))

    with open("blog_urls_all.dat", "wb") as f:
        pickle.dump(all_blog_urls, f)

    print(len(all_blog_urls))
Exemplo n.º 7
0
    def __init__(self):
        available_hosts = ['rpi1', 'rpi2']

        self.logger = Logger()
        self.scraper = WebScraper()
        self.db = DbManager.get_instance()
        depts = self.scraper.get_all_departments()
        workers = []

        for tile in depts:
            if good_table_name(tile) in Settings.BANNED_TILES:
                continue
            self.db.create_table(good_table_name(tile))
            for column in depts[tile]:
                if column in Settings.BANNED_COLUMNS:
                    continue

                worker = Worker(good_table_name(tile), column,
                                depts[tile][column])

                proc = multiprocessing.Process(target=worker.start_working)
                self.logger.starting_worker(tile, column)

                workers.append((worker, proc))
                proc.start()
                time.sleep(5)

                while len(workers) >= 4:
                    for w, p in workers:
                        p.join(timeout=0)
                        if not p.is_alive():
                            workers.remove((w, p))
                            break

        def start_ssh_job(self, host, tile):
            timeout = 60 * 60 * 2

            process = subprocess.Popen([
                'ssh', host,
                '"python3 emag-scraping/process.py {}"'.format(tile)
            ])
            time.sleep(5)

            if process.poll() is not None:
                print('Done {}'.format(tile))
Exemplo n.º 8
0
 def __init__(self):
     self.wait_for_market_open()
     # dictionary containing all of the day's information, stored at end of day
     self.record = {"date": str(datetime.date.today()),
                    "starting": None,
                    "ending": None,
                    "profit": None,
                    "stocks": []}
     # initiatilize WebScraper to get top five gainers of the day
     ws = WebScraper()
     self.stocks = ws.stocks
     self.record["stocks"] = ws.stocks
     self.login()
     # keys: stocks, values: buying power allocated to stock
     self.funds = {}
     self.start_funds = self.split_funds()
     # boolean dictionary: holds number of shares, None otherwise
     self.bought = {}
     for s in self.stocks:
         self.bought[s] = None
     self.trade()
     self.logout()