def __init__(self, prodj): super().__init__() self.prodj = prodj self.queue = Queue() self.pdb_enabled = True self.pdb = PDBProvider(prodj) self.dbc_enabled = True self.dbc = DBClient(prodj) # db queries seem to work if we submit player number 0 everywhere (NOTE: this seems to work only if less than 4 players are on the network) # however, this messes up rendering on the players sometimes (i.e. when querying metadata and player has browser opened) # alternatively, we can use a player number from 1 to 4 without rendering issues, but then only max. 3 real players can be used self.own_player_number = 0 self.request_retry_count = 3 self.metadata_store = DataStore( ) # map of player_number,slot,track_id: metadata self.artwork_store = DataStore( ) # map of player_number,slot,artwork_id: artwork_data self.waveform_store = DataStore( ) # map of player_number,slot,track_id: waveform_data self.preview_waveform_store = DataStore( ) # map of player_number,slot,track_id: preview_waveform_data self.beatgrid_store = DataStore( ) # map of player_number,slot,track_id: beatgrid_data
def __init__(self): self.session = requests.Session() self.base_url = 'https://store.steampowered.com/search' self.base_url += '/?category1=998&supportedlang=english' self.urls = [] self.dbc = DBClient('games.db') self.dbc.create_table()
def setUp(self): self.dbc = DBClient('test.db') self.mock_ideal_game = { 'Name': 'sample', 'RawgID': 20, 'Metacritic': 100, 'Genres': 'Action', 'Indie': False, 'Presence': 83, 'Platform': 'Windows', 'Graphics': '4gb GPU', 'Storage': '180gb', 'Memory': '8gb', 'RatingsBreakdown': '34/45/15', 'ReleaseDate': 'January 14, 2020', 'Soundtrack': False, 'Franchise': None, 'OriginalCost': '$39.99', 'DiscountedCost': None, 'Players': 'singleplayer, multiplayer', 'Controller': True, 'Languages': 'English, Mandarin', 'ESRB': 'Teen', 'Achievements': 55, 'Publisher': 'idSoftware', 'Description': 'lots of stuff', 'Tags': 'Fun, Violent', 'SteamURL': 'https://store.steampowered.com/app/42700/?snr=1_5_9__205', }
def main(argv): path = "" file = "" dbclient = DBClient("fi_dev") try: opts, args = getopt.getopt(argv, "hp:f:") except getopt.GetoptError: print 'datainserter.py -p <url> -f <file>' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'datainserter.py -p <url> -f <file>' sys.exit() elif opt in ("-p"): path = arg elif opt in ("-f"): file = arg if file != "": logging.info("************Start to insert data and file: " + file) solveFile(file, dbclient) else: logging.info("************Start to insert data and path: " + path) for parent, dirnames, filenames in os.walk(path): for filename in filenames: solveFile(filename, dbclient)
def main(argv): step = 25 dbclient = DBClient() config = Config() try: opts, args = getopt.getopt(argv, "hs:") except getopt.GetoptError: print 'runner-fix.py -s <step>' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'runner-fix.py -s <step>' sys.exit() elif opt in ("-s"): step = int(arg) while (dbclient.getIssueUrlNum(config.getCurrentProviceCode()) > 0): scrapyCmd = 'scrapy runspider tianyancha_fix.py -a step=' + str(step) os.system(scrapyCmd)
def __init__(self, conf, products): QtWidgets.QWidget.__init__(self) self.setObjectName("MainView") self.signals = MainViewSignals() self._conf = conf self._products = products self.serialClient = serial.Serial(self._conf["SERIAL_CLIENT"]["PORT"], self._conf["SERIAL_CLIENT"]["BAUDRATE"], timeout=self._conf["SERIAL_CLIENT"]["TIMEOUT"]) self.dbClient = DBClient(self._conf["DATABASE_CLIENT"]) self.mailClient = MailClient(self._conf["MAIL_CLIENT"]) self.balance = self._conf["PRODUCT_TRANSACTIONS"]["CURRENT_BALANCE"] self.paymentWidget = PaymentWidget() self.successWidget = SuccessWidget() self.selectionWidget = SelectionWidget(self._products, self.balance) self.selectionWidget.signals.new_purchase.connect(self.new_purchase_event) self._init_layout()
class DownloadMiddleware(object): RETRY_HTTP_CODES = [500, 503, 504, 400, 403, 404, 408] dbclient = DBClient() def process_response(self, request, response, spider): config = Config() if response.status in self.RETRY_HTTP_CODES: logger.info("Request: " + request.url + " failed for: " + str(response.status) + " and will be recorded in DB") record_id = uuid.uuid1() industry_code = request.meta['industry_code'] provice_code = config.getCurrentProviceCode() updateSQL = "insert into ISSUE_URL values ('" + str( record_id ) + "', '" + request.url + "', '" + provice_code + "', '" + industry_code + "','0')" self.dbclient.updateDB(updateSQL) return response
class DataProvider(Thread): def __init__(self, prodj): super().__init__() self.prodj = prodj self.queue = Queue() self.pdb_enabled = True self.pdb = PDBProvider(prodj) self.dbc_enabled = True self.dbc = DBClient(prodj) # db queries seem to work if we submit player number 0 everywhere (NOTE: this seems to work only if less than 4 players are on the network) # however, this messes up rendering on the players sometimes (i.e. when querying metadata and player has browser opened) # alternatively, we can use a player number from 1 to 4 without rendering issues, but then only max. 3 real players can be used self.own_player_number = 0 self.request_retry_count = 3 self.metadata_store = DataStore( ) # map of player_number,slot,track_id: metadata self.artwork_store = DataStore( ) # map of player_number,slot,artwork_id: artwork_data self.waveform_store = DataStore( ) # map of player_number,slot,track_id: waveform_data self.preview_waveform_store = DataStore( ) # map of player_number,slot,track_id: preview_waveform_data self.beatgrid_store = DataStore( ) # map of player_number,slot,track_id: beatgrid_data def start(self): self.keep_running = True super().start() def stop(self): self.keep_running = False self.pdb.stop() self.metadata_store.stop() self.artwork_store.stop() self.waveform_store.stop() self.preview_waveform_store.stop() self.beatgrid_store.stop() self.join() def cleanup_stores_from_changed_media(self, player_number, slot): self.metadata_store.removeByPlayerSlot(player_number, slot) self.artwork_store.removeByPlayerSlot(player_number, slot) self.waveform_store.removeByPlayerSlot(player_number, slot) self.preview_waveform_store.removeByPlayerSlot(player_number, slot) self.beatgrid_store.removeByPlayerSlot(player_number, slot) self.pdb.cleanup_stores_from_changed_media(player_number, slot) # called from outside, enqueues request def get_metadata(self, player_number, slot, track_id, callback=None): self._enqueue_request("metadata", self.metadata_store, (player_number, slot, track_id), callback) def get_root_menu(self, player_number, slot, callback=None): self._enqueue_request("root_menu", None, (player_number, slot), callback) def get_titles(self, player_number, slot, sort_mode="default", callback=None): self._enqueue_request("title", None, (player_number, slot, sort_mode), callback) def get_titles_by_album(self, player_number, slot, album_id, sort_mode="default", callback=None): self._enqueue_request("title_by_album", None, (player_number, slot, sort_mode, [album_id]), callback) def get_titles_by_artist_album(self, player_number, slot, artist_id, album_id, sort_mode="default", callback=None): self._enqueue_request( "title_by_artist_album", None, (player_number, slot, sort_mode, [artist_id, album_id]), callback) def get_titles_by_genre_artist_album(self, player_number, slot, genre_id, artist_id, album_id, sort_mode="default", callback=None): self._enqueue_request( "title_by_genre_artist_album", None, (player_number, slot, sort_mode, [genre_id, artist_id, album_id]), callback) def get_artists(self, player_number, slot, callback=None): self._enqueue_request("artist", None, (player_number, slot), callback) def get_artists_by_genre(self, player_number, slot, genre_id, callback=None): self._enqueue_request("artist_by_genre", None, (player_number, slot, [genre_id]), callback) def get_albums(self, player_number, slot, callback=None): self._enqueue_request("album", None, (player_number, slot), callback) def get_albums_by_artist(self, player_number, slot, artist_id, callback=None): self._enqueue_request("album_by_artist", None, (player_number, slot, [artist_id]), callback) def get_albums_by_genre_artist(self, player_number, slot, genre_id, artist_id, callback=None): self._enqueue_request("album_by_genre_artist", None, (player_number, slot, [genre_id, artist_id]), callback) def get_genres(self, player_number, slot, callback=None): self._enqueue_request("genre", None, (player_number, slot), callback) def get_playlist_folder(self, player_number, slot, folder_id=0, callback=None): self._enqueue_request("playlist_folder", None, (player_number, slot, folder_id), callback) def get_playlist(self, player_number, slot, playlist_id, sort_mode="default", callback=None): self._enqueue_request("playlist", None, (player_number, slot, sort_mode, playlist_id), callback) def get_artwork(self, player_number, slot, artwork_id, callback=None): self._enqueue_request("artwork", self.artwork_store, (player_number, slot, artwork_id), callback) def get_waveform(self, player_number, slot, track_id, callback=None): self._enqueue_request("waveform", self.waveform_store, (player_number, slot, track_id), callback) def get_preview_waveform(self, player_number, slot, track_id, callback=None): self._enqueue_request("preview_waveform", self.preview_waveform_store, (player_number, slot, track_id), callback) def get_beatgrid(self, player_number, slot, track_id, callback=None): self._enqueue_request("beatgrid", self.beatgrid_store, (player_number, slot, track_id), callback) def get_mount_info(self, player_number, slot, track_id, callback=None): self._enqueue_request("mount_info", None, (player_number, slot, track_id), callback) def get_track_info(self, player_number, slot, track_id, callback=None): self._enqueue_request("track_info", None, (player_number, slot, track_id), callback) def _enqueue_request(self, request, store, params, callback): player_number = params[0] if player_number == 0 or player_number > 4: logging.warning("DataProvider: invalid %s request parameters", request) return logging.debug("DataProvider: enqueueing %s request with params %s", request, str(params)) self.queue.put( (request, store, params, callback, self.request_retry_count)) def _handle_request_from_store(self, store, params): if len(params) != 3: logging.error( "DataProvider: unable to handle request from store with != 3 arguments" ) return None if params in store: return store[params] return None def _handle_request_from_pdb(self, request, params): return self.pdb.handle_request(request, params) def _handle_request_from_dbclient(self, request, params): return self.dbc.handle_request(request, params) def _handle_request(self, request, store, params, callback): #logging.debug("DataProvider: handling %s request params %s", request, str(params)) reply = None answered_by_store = False if store is not None: logging.debug("DataProvider: trying request %s %s from store", request, str(params)) reply = self._handle_request_from_store(store, params) if reply is not None: answered_by_store = True if self.pdb_enabled and reply is None: try: logging.debug("DataProvider: trying request %s %s from pdb", request, str(params)) reply = self._handle_request_from_pdb(request, params) except FatalQueryError as e: # on a fatal error, continue with dbc logging.warning("DataProvider: pdb failed [%s]", str(e)) if not self.dbc_enabled: raise if self.dbc_enabled and reply is None: logging.debug("DataProvider: trying request %s %s from dbc", request, str(params)) reply = self._handle_request_from_dbclient(request, params) if reply is None: raise FatalQueryError( "DataStore: request returned none, see log for details") # special call for metadata since it is expected to be part of the client status if request == "metadata": self.prodj.cl.storeMetadataByLoadedTrack(*params, reply) if store is not None and answered_by_store == False: store[params] = reply # TODO: synchronous mode if callback is not None: callback(request, *params, reply) def _retry_request(self, request): self.queue.task_done() if request[-1] > 0: logging.info("DataProvider: retrying %s request", request[0]) self.queue.put((*request[:-1], request[-1] - 1)) time.sleep( 1 ) # yes, this is dirty, but effective to work around timing problems on failed request else: logging.info("DataProvider: %s request failed %d times, giving up", request[0], self.request_retry_count) def gc(self): self.dbc.gc() def run(self): logging.debug("DataProvider starting") while self.keep_running: try: request = self.queue.get(timeout=1) except Empty: self.gc() continue try: self._handle_request(*request[:-1]) self.queue.task_done() except TemporaryQueryError as e: logging.error("DataProvider: %s request failed: %s", request[0], e) self._retry_request(request) except FatalQueryError as e: logging.error("DataProvider: %s request failed: %s", request[0], e) self.queue.task_done() logging.debug("DataProvider shutting down")
class TianyanchaSpider(scrapy.Spider): name = "tianyancha" config = Config() allowed_domains = config.getAllowDomains() start_urls = config.getStartUrls() #start_urls = ['http://snx.tianyancha.com/search/oc26'] #start_urls = ['https://www.tianyancha.com/company/644630399'] provice_code = config.getCurrentProviceCode() industry_code = config.getDefaultIndustryCode() start_page_code = config.getDefaultPageCode() page_count = config.getStep() dbclient = DBClient() redisclient = RadisClient() proxyloader = ProxyLoader() start_url = "" proxy_url = "" def __init__(self, prov=None, ind=None, page=None, count=None, *args, **kwargs): super(TianyanchaSpider, self).__init__(*args, **kwargs) if (prov): self.provice_code = prov if (ind): self.industry_code = ind if (page and page != 'p1'): self.start_page_code = page if (count): self.page_count = int(count) self.start_url = 'https://' + self.provice_code + '.tianyancha.com/search/' + self.industry_code + '/' + self.start_page_code #print("******************************" + start_url) self.start_urls = [self.start_url] def parse(self, response): logger.info('Start to parse industry info: ' + self.industry_code) logger.info(response) try: url = response.meta['url'] except BaseException, e: url = self.start_url try: current_url = response.url page = current_url[current_url.rfind("/") + 1:] items = response.xpath( "/html/body/div[2]/div[1]/div/div/div[1]/div[3]/div").extract( ) logger.info("***************Items length: " + str(len(items))) if len(items) == 0: raise Exception() for item in items: sl = Selector(text=item) corp_urls = sl.xpath( "//div[2]/div[1]/div[1]/a/@href").extract() for corp_url in corp_urls: corp_url = self.solveHttps(corp_url) logger.info('Corporation URL: ' + corp_url) corp_code = corp_url[corp_url.rfind("/") + 1:] if not self.redisclient.get(corp_code): if self.proxy_url == "": yield Request(url=corp_url, meta={ 'page': page, 'corp_url': corp_url, 'corp_code': corp_code, 'industry_code': self.industry_code }, callback=self.parse_corp) else: yield Request(url=corp_url, meta={ 'proxy': self.proxy_url, 'page': page, 'corp_url': corp_url, 'corp_code': corp_code, 'industry_code': self.industry_code }, callback=self.parse_corp) nextPage = response.xpath( "/html/body/div[2]/div[1]/div/div/div[1]/div[4]/ul/li[14]/a/@href" ).extract() if not nextPage: nextPage = response.xpath( "/html/body/div[2]/div[1]/div/div/div[1]/div[4]/ul/li[13]/a/@href" ).extract() if nextPage and nextPage[0] and self.page_count > 0: logger.info('***************Next url: ' + nextPage[0]) self.page_count = self.page_count - 1 if self.proxy_url == "": yield Request(url=nextPage[0], meta={ 'url': nextPage[0], 'industry_code': self.industry_code }, callback=self.parse) else: yield Request(url=nextPage[0], meta={ 'proxy': self.proxy_url, 'url': nextPage[0], 'industry_code': self.industry_code }, callback=self.parse) except BaseException, e: logger.info('***************Error parsing url: ' + url + ' and reload by using new proxy') #重启一个代理 proxy = self.proxyloader.getProxy() if proxy != "": self.proxy_url = self.proxyloader.getProtocal() + proxy yield Request(url=url, meta={ 'url': url, 'proxy': self.proxy_url, 'industry_code': self.industry_code }, callback=self.parse, dont_filter=True)
class Test_DBClient(unittest.TestCase): def setUp(self): self.dbc = DBClient('test.db') self.mock_ideal_game = { 'Name': 'sample', 'RawgID': 20, 'Metacritic': 100, 'Genres': 'Action', 'Indie': False, 'Presence': 83, 'Platform': 'Windows', 'Graphics': '4gb GPU', 'Storage': '180gb', 'Memory': '8gb', 'RatingsBreakdown': '34/45/15', 'ReleaseDate': 'January 14, 2020', 'Soundtrack': False, 'Franchise': None, 'OriginalCost': '$39.99', 'DiscountedCost': None, 'Players': 'singleplayer, multiplayer', 'Controller': True, 'Languages': 'English, Mandarin', 'ESRB': 'Teen', 'Achievements': 55, 'Publisher': 'idSoftware', 'Description': 'lots of stuff', 'Tags': 'Fun, Violent', 'SteamURL': 'https://store.steampowered.com/app/42700/?snr=1_5_9__205', } def tearDown(self): self.dbc.close() os.remove('test.db') def test_create_table(self): self.dbc.create_table() self.dbc.cursor.execute( "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name") res = self.dbc.cursor.fetchall() self.assertIn(('games', ), res, 'game not found in tables') def test_drop_table(self): self.dbc.drop_table() self.dbc.cursor.execute( "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name") res = self.dbc.cursor.fetchall() self.assertNotIn(('games', ), res, 'game not found in tables') def test_add_game(self): self.dbc.create_table() self.dbc.add_game(self.mock_ideal_game) self.dbc.cursor.execute("SELECT * FROM games") res = self.dbc.cursor.fetchone() self.assertIn(self.mock_ideal_game['SteamURL'], res, 'game with url not in table') def test_get_game(self): idx = 1 self.dbc.create_table() self.dbc.add_game(self.mock_ideal_game) self.dbc.cursor.execute("SELECT id FROM games") res = [features[0] for features in self.dbc.cursor.fetchall()] self.assertIn(idx, res, 'game with idx no in table') game = self.dbc.get_game(idx) self.assertIs(game[0], idx, 'game idx does not match') def test_get_game_by_url(self): url = self.mock_ideal_game['SteamURL'] self.dbc.create_table() self.dbc.add_game(self.mock_ideal_game) self.dbc.cursor.execute("SELECT SteamURL FROM games;") res = [features[0] for features in self.dbc.cursor.fetchall()] self.assertIn(url, res, 'game with url not in table') game = self.dbc.get_game_by_url(url) self.assertIn(url, game, 'returned game url does not match') def test_get_all_games(self): self.dbc.create_table() num_games = 5 for idx in range(num_games): self.dbc.add_game(self.mock_ideal_game) games = self.dbc.get_all_games() self.assertIs(num_games, len(games), 'returned game length should equal num_games') def test_to_csv(self): self.dbc.create_table() num_games = 5 for idx in range(num_games): self.dbc.add_game(self.mock_ideal_game) self.dbc.to_csv('test.csv') test_csv_df = pd.read_csv('test.csv') self.assertIs(num_games, test_csv_df.shape[0], 'test csv should have num_games rows') os.remove('test.csv') def test_delete_game(self): self.dbc.create_table() delete_idx = 2 num_games = 5 for idx in range(num_games): self.dbc.add_game(self.mock_ideal_game) self.dbc.cursor.execute("SELECT id FROM games;") game_ids = [result[0] for result in self.dbc.cursor.fetchall()] self.assertIn(delete_idx, game_ids, 'no game present with delete_idx') self.dbc.delete_game(delete_idx) self.dbc.cursor.execute("SELECT id FROM games;") game_ids = [result[0] for result in self.dbc.cursor.fetchall()] self.assertNotIn(delete_idx, game_ids, 'game with delete_idx still present')
class TianyanchaFixSpider(scrapy.Spider): name = "tianyancha_fix" allowed_domains = ["www.tianyancha.com"] start_urls = ['https://www.tianyancha.com/'] dbclient = DBClient() redisclient = RadisClient() proxyloader = ProxyLoader() config = Config() count = 0 proxy_url = "" interval = 25 records_to_fix = [] cur_index = 0 step = 25 def __init__(self, step=None, *args, **kwargs): super(TianyanchaFixSpider, self).__init__(*args, **kwargs) if (step): self.step = int(step) logger.info("Start to fix data") query = "select * from ISSUE_URL t where t.provice_code = '" + self.config.getCurrentProviceCode( ) + "' and t.status = '0'" self.records_to_fix = self.dbclient.queryDB(query) logger.info("************************Total data number: " + str(len(self.records_to_fix))) def parse(self, response): while self.cur_index < self.step: record = self.records_to_fix[self.cur_index] uuid = record[0] url = record[1] if self.count == 0: self.proxy_url = self.proxyloader.getProtocal( ) + self.proxyloader.getProxy() self.count = self.interval if self.cur_index != 0: yield Request(url=self.start_urls[0], callback=self.parse) corp_code = url[url.rfind("/") + 1:] self.cur_index = self.cur_index + 1 if not self.redisclient.get(corp_code): provice_code = record[2] industry_code = record[3] logger.info("URL: " + url + " to be solved") self.count = self.count - 1 yield Request(url=url, meta={ 'proxy': self.proxy_url, 'provice_code': provice_code, 'industry_code': industry_code, 'corp_code': corp_code, 'uuid': uuid }, callback=self.parse_corp) else: logger.info('****************Corp: ' + corp_code + ' has been solved') updateSQL = "update ISSUE_URL t set t.status = '2' where t.id = '" + uuid + "'" self.dbclient.updateDB(updateSQL) def parse_corp(self, response): try: page = "" corp_code = response.meta['corp_code'] provice_code = response.meta['provice_code'] industry_code = response.meta['industry_code'] uuid = response.meta['uuid'] logger.info( '****************Handle corperation info for industry: ' + industry_code + ' and page: ' + page) item = EnterpriseItem() item['province'] = provice_code item['industry_code'] = industry_code item['page'] = page item['name'] = response.xpath( "//*[@id='company_web_top']/div[2]/div[2]/div/span/text()" ).extract()[0] item['phone'] = response.xpath( "//*[@id='company_web_top']/div[2]/div[2]/div/div[2]/div[1]/span[2]/text()" ).extract()[0] item['address'] = response.xpath( "//*[@id='company_web_top']/div[2]/div[2]/div/div[3]/div[2]/span[2]/text()" ).extract()[0] item['legal_person'] = response.xpath( "//*[@id='_container_baseInfo']/div/div[1]/table/tbody/tr/td[1]/div/div[1]/div[2]/div/a/text()" ).extract()[0] item['credit_code'] = response.xpath( "//*[@id='_container_baseInfo']/div/div[2]/table/tbody/tr[2]/td[1]/div/span/text()" ).extract()[0] item['reg_capital'] = response.xpath( "//*[@id='_container_baseInfo']/div/div[1]/table/tbody/tr/td[2]/div[1]/div[2]/div/text()" ).extract()[0].strip() item['establish_date'] = response.xpath( "//*[@id='_container_baseInfo']/div/div[1]/table/tbody/tr/td[2]/div[2]/div[2]/div/text()" ).extract()[0].strip() item['biz_period_start'] = response.xpath( "//*[@id='_container_baseInfo']/div/div[2]/table/tbody/tr[4]/td[2]/div/span/text()" ).extract()[0].replace("\n", "").replace(' ', '') item['reg_authority'] = response.xpath( "//*[@id='_container_baseInfo']/div/div[2]/table/tbody/tr[5]/td[2]/div/span/text()" ).extract()[0] item['biz_reg_num'] = response.xpath( "//*[@id='_container_baseInfo']/div/div[2]/table/tbody/tr[1]/td[1]/div/span/text()" ).extract()[0] item['org_code'] = response.xpath( "//*[@id='_container_baseInfo']/div/div[2]/table/tbody/tr[1]/td[2]/div/span/text()" ).extract()[0] item['taxpayer_code'] = response.xpath( "//*[@id='_container_baseInfo']/div/div[2]/table/tbody/tr[3]/td/div/span/text()" ).extract()[0] item['industry'] = response.xpath( "//*[@id='_container_baseInfo']/div/div[2]/table/tbody/tr[4]/td[1]/div/span/text()" ).extract()[0] self.redisclient.set(corp_code, corp_code) updateSQL = "update ISSUE_URL t set t.status = '1' where t.id = '" + uuid + "'" self.dbclient.updateDB(updateSQL) yield item except BaseException, e: logger.exception("Solve corp exception") updateSQL = "update ISSUE_URL t set t.status = '-1' where t.id = '" + uuid + "'" self.dbclient.updateDB(updateSQL)
class SteamCrawl: def __init__(self): self.session = requests.Session() self.base_url = 'https://store.steampowered.com/search' self.base_url += '/?category1=998&supportedlang=english' self.urls = [] self.dbc = DBClient('games.db') self.dbc.create_table() def crawl(self, fetch_urls=False): # get list of urls if fetch_urls: self.__download_urls_page_source() self.__parse_urls() else: self.__parse_urls() # loop through list for url in tqdm(self.urls): if self.__already_downloaded(url): return # get features for each url game = {} game.update(self.__get_steam_features(url)) game.update(self.__get_rawg_features(url)) # save features in db self.dbc.add_game(game) self.dbc.to_csv('games.csv') return 'finished' def __download_urls_page_source(self): self.browser = webdriver.Safari() self.browser.get(self.base_url) self.__short_pause() lastHeight = self.browser.execute_script( "return document.body.scrollHeight") while True: self.browser.execute_script( "window.scrollTo(0, document.body.scrollHeight);") self.__short_pause() newHeight = self.browser.execute_script( "return document.body.scrollHeight") if newHeight == lastHeight: break lastHeight = newHeight self.__save_game_list_source() self.browser.close() def __parse_urls(self): html = self.__load_game_list_source() soup = BeautifulSoup(html) a_tags = soup.find('div', id='search_results').find_all('a') self.urls = [a_tag.get('href') for a_tag in a_tags] def __save_game_list_source(self): with open("game_list.html", "w") as f: f.write(self.browser.page_source) def __load_game_list_source(self): with open("game_list.html", "r") as f: game_list_source = f.read() return game_list_source def __short_pause(self): duration = random.uniform(0, 3) time.sleep(duration) def __already_downloaded(self, url): game = self.dbc.get_game_by_url(url) return game def __get_steam_features(self, url): sgi = SteamGameInfo() html = sgi.get_game_html(url) if html: features = sgi.strip_features(html) return features def __get_rawg_features(self, url): name = url.split('/')[5].replace('_', ' ') rawg = RAWG() features = rawg.get_game(name) return features
class MainView(QtWidgets.QWidget): def __init__(self, conf, products): QtWidgets.QWidget.__init__(self) self.setObjectName("MainView") self.signals = MainViewSignals() self._conf = conf self._products = products self.serialClient = serial.Serial(self._conf["SERIAL_CLIENT"]["PORT"], self._conf["SERIAL_CLIENT"]["BAUDRATE"], timeout=self._conf["SERIAL_CLIENT"]["TIMEOUT"]) self.dbClient = DBClient(self._conf["DATABASE_CLIENT"]) self.mailClient = MailClient(self._conf["MAIL_CLIENT"]) self.balance = self._conf["PRODUCT_TRANSACTIONS"]["CURRENT_BALANCE"] self.paymentWidget = PaymentWidget() self.successWidget = SuccessWidget() self.selectionWidget = SelectionWidget(self._products, self.balance) self.selectionWidget.signals.new_purchase.connect(self.new_purchase_event) self._init_layout() def _init_centralWidget(self): self.centralWidgets = QtWidgets.QStackedWidget() self.centralWidgets.setObjectName("centralWidgets") self.centralWidgets.addWidget(self.selectionWidget) self.centralWidgets.addWidget(self.paymentWidget) self.centralWidgets.addWidget(self.successWidget) def _init_layout(self): self.verticalLayout = QtWidgets.QVBoxLayout(self) self.verticalLayout.setObjectName("verticalLayout") self._init_centralWidget() self.verticalLayout.addWidget(self.centralWidgets) self.balanceLabel = QtWidgets.QLabel() sizePolicy = QtWidgets.QSizePolicy( QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Maximum) sizePolicy.setHorizontalStretch(0) sizePolicy.setVerticalStretch(0) sizePolicy.setHeightForWidth( self.balanceLabel.sizePolicy().hasHeightForWidth()) self.balanceLabel.setSizePolicy(sizePolicy) font = QtGui.QFont() font.setPointSize(28) self.balanceLabel.setFont(font) self.balanceLabel.setAlignment( QtCore.Qt.AlignVCenter | QtCore.Qt.AlignHCenter) self.balanceLabel.setObjectName("BalanceLabel") self.balanceLabel.setText("${:0.2f}".format(self.balance)) self.verticalLayout.addWidget(self.balanceLabel) self.controlButton = QtWidgets.QPushButton() self.controlButton.setObjectName("ControlButton") self.controlButton.setText("Check Payment") controlPolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.MinimumExpanding, QtWidgets.QSizePolicy.Fixed) controlPolicy.setHorizontalStretch(0) controlPolicy.setVerticalStretch(0) self.controlButton.setSizePolicy(controlPolicy) self.controlButton.clicked.connect(self.change_window) self.verticalLayout.addWidget(self.controlButton) def transaction_to_balance(self, new_transaction): self.balance += new_transaction["amount"] self.balanceLabel.setText("${:0.2f}".format(self.balance)) self.selectionWidget.update_balance(self.balance) def close_serial(self): self.serialClient.close() # ============================== SLOTS ============================== @QtCore.pyqtSlot() def change_window(self): if self.centralWidgets.currentIndex() == 0: print("Trying to connect to mail server...") status = self.mailClient.open_mail_connection() print(status[1]) if status[0]: self.centralWidgets.setCurrentWidget(self.paymentWidget) self.controlButton.setText("Back") else: exit() elif self.centralWidgets.currentIndex() == 1: print("Reading last payments...") transaction_req = self.mailClient.get_last_transactions() if transaction_req[0] and len(transaction_req[1]) > 0: for payment in transaction_req[1]: payment["machine_id"] = self._conf["MACHINE_PROFILE"]["MACHINE_ID"] self.dbClient.add_new_payment(payment) self.transaction_to_balance(payment) else: if transaction_req[0] == 0: print(transaction_req[1]) print("Closing connection with mail server...") self.mailClient.close_mail_connection() self.centralWidgets.setCurrentWidget(self.selectionWidget) self.controlButton.setText("Check Payment") else: self.successWidget.reset() self.centralWidgets.setCurrentWidget(self.selectionWidget) self.controlButton.setText("Check Payment") @QtCore.pyqtSlot(dict) def new_purchase_event(self, transaction_info): transaction_info["machine_id"] = self._conf["MACHINE_PROFILE"]["MACHINE_ID"] self.dbClient.add_new_transaction(transaction_info) self.balance -= transaction_info['price'] self.balanceLabel.setText("${:0.2f}".format(self.balance)) self.selectionWidget.update_balance(self.balance) self.centralWidgets.setCurrentWidget(self.successWidget) self.controlButton.setText("Back") self.successWidget.start() serial_msg = "{}\n".format(transaction_info['serial']) self.serialClient.write(serial_msg.encode('utf-8')) self.serialClient.flush()
# -*- coding:utf-8 -*- """ ====================== @author Vincent @config file config for database ====================== """ import os from sqlalchemy import create_engine from sqlalchemy.ext.declarative import declarative_base from dbclient import DBClient DBClients = DBClient() ServersPort = 8005 Base = declarative_base() EngineRead = create_engine( 'mysql://%s:%s@%s:%d/%s?charset=utf8' % (DBClients.db.MySQLREAD.User, DBClients.db.MySQLREAD.Passwd, DBClients.db.MySQLREAD.Host, DBClients.db.MySQLREAD.Port, DBClients.db.MySQLREAD.Dbname), encoding='utf8', echo=False, pool_size=100, pool_recycle=3600)