Пример #1
0
    def __init__(self, prodj):
        super().__init__()
        self.prodj = prodj
        self.queue = Queue()

        self.pdb_enabled = True
        self.pdb = PDBProvider(prodj)

        self.dbc_enabled = True
        self.dbc = DBClient(prodj)

        # db queries seem to work if we submit player number 0 everywhere (NOTE: this seems to work only if less than 4 players are on the network)
        # however, this messes up rendering on the players sometimes (i.e. when querying metadata and player has browser opened)
        # alternatively, we can use a player number from 1 to 4 without rendering issues, but then only max. 3 real players can be used
        self.own_player_number = 0
        self.request_retry_count = 3

        self.metadata_store = DataStore(
        )  # map of player_number,slot,track_id: metadata
        self.artwork_store = DataStore(
        )  # map of player_number,slot,artwork_id: artwork_data
        self.waveform_store = DataStore(
        )  # map of player_number,slot,track_id: waveform_data
        self.preview_waveform_store = DataStore(
        )  # map of player_number,slot,track_id: preview_waveform_data
        self.beatgrid_store = DataStore(
        )  # map of player_number,slot,track_id: beatgrid_data
Пример #2
0
 def __init__(self):
     self.session = requests.Session()
     self.base_url = 'https://store.steampowered.com/search'
     self.base_url += '/?category1=998&supportedlang=english'
     self.urls = []
     self.dbc = DBClient('games.db')
     self.dbc.create_table()
Пример #3
0
    def setUp(self):
        self.dbc = DBClient('test.db')

        self.mock_ideal_game = {
            'Name': 'sample',
            'RawgID': 20,
            'Metacritic': 100,
            'Genres': 'Action',
            'Indie': False,
            'Presence': 83,
            'Platform': 'Windows',
            'Graphics': '4gb GPU',
            'Storage': '180gb',
            'Memory': '8gb',
            'RatingsBreakdown': '34/45/15',
            'ReleaseDate': 'January 14, 2020',
            'Soundtrack': False,
            'Franchise': None,
            'OriginalCost': '$39.99',
            'DiscountedCost': None,
            'Players': 'singleplayer, multiplayer',
            'Controller': True,
            'Languages': 'English, Mandarin',
            'ESRB': 'Teen',
            'Achievements': 55,
            'Publisher': 'idSoftware',
            'Description': 'lots of stuff',
            'Tags': 'Fun, Violent',
            'SteamURL':
            'https://store.steampowered.com/app/42700/?snr=1_5_9__205',
        }
Пример #4
0
def main(argv):
    path = ""
    file = ""
    dbclient = DBClient("fi_dev")
    try:
        opts, args = getopt.getopt(argv, "hp:f:")
    except getopt.GetoptError:
        print 'datainserter.py -p <url> -f <file>'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'datainserter.py -p <url> -f <file>'
            sys.exit()
        elif opt in ("-p"):
            path = arg
        elif opt in ("-f"):
            file = arg

    if file != "":
        logging.info("************Start to insert data and file: " + file)
        solveFile(file, dbclient)
    else:
        logging.info("************Start to insert data and path: " + path)
        for parent, dirnames, filenames in os.walk(path):
            for filename in filenames:
                solveFile(filename, dbclient)
Пример #5
0
def main(argv):
    step = 25
    dbclient = DBClient()
    config = Config()
    try:
        opts, args = getopt.getopt(argv, "hs:")
    except getopt.GetoptError:
        print 'runner-fix.py -s <step>'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'runner-fix.py -s <step>'
            sys.exit()
        elif opt in ("-s"):
            step = int(arg)
    while (dbclient.getIssueUrlNum(config.getCurrentProviceCode()) > 0):
        scrapyCmd = 'scrapy runspider tianyancha_fix.py -a step=' + str(step)
        os.system(scrapyCmd)
Пример #6
0
    def __init__(self, conf, products):
        QtWidgets.QWidget.__init__(self)
        self.setObjectName("MainView")
        self.signals = MainViewSignals()

        self._conf = conf
        self._products = products

        self.serialClient = serial.Serial(self._conf["SERIAL_CLIENT"]["PORT"], self._conf["SERIAL_CLIENT"]["BAUDRATE"], timeout=self._conf["SERIAL_CLIENT"]["TIMEOUT"])
        self.dbClient = DBClient(self._conf["DATABASE_CLIENT"]) 
        self.mailClient = MailClient(self._conf["MAIL_CLIENT"])

        self.balance = self._conf["PRODUCT_TRANSACTIONS"]["CURRENT_BALANCE"]

        self.paymentWidget = PaymentWidget()
        self.successWidget = SuccessWidget()
        self.selectionWidget = SelectionWidget(self._products, self.balance)
        self.selectionWidget.signals.new_purchase.connect(self.new_purchase_event)

        self._init_layout()
Пример #7
0
class DownloadMiddleware(object):

    RETRY_HTTP_CODES = [500, 503, 504, 400, 403, 404, 408]
    dbclient = DBClient()

    def process_response(self, request, response, spider):
        config = Config()
        if response.status in self.RETRY_HTTP_CODES:
            logger.info("Request: " + request.url + " failed for: " +
                        str(response.status) + " and will be recorded in DB")
            record_id = uuid.uuid1()
            industry_code = request.meta['industry_code']
            provice_code = config.getCurrentProviceCode()
            updateSQL = "insert into ISSUE_URL values ('" + str(
                record_id
            ) + "', '" + request.url + "', '" + provice_code + "', '" + industry_code + "','0')"
            self.dbclient.updateDB(updateSQL)
        return response
Пример #8
0
class DataProvider(Thread):
    def __init__(self, prodj):
        super().__init__()
        self.prodj = prodj
        self.queue = Queue()

        self.pdb_enabled = True
        self.pdb = PDBProvider(prodj)

        self.dbc_enabled = True
        self.dbc = DBClient(prodj)

        # db queries seem to work if we submit player number 0 everywhere (NOTE: this seems to work only if less than 4 players are on the network)
        # however, this messes up rendering on the players sometimes (i.e. when querying metadata and player has browser opened)
        # alternatively, we can use a player number from 1 to 4 without rendering issues, but then only max. 3 real players can be used
        self.own_player_number = 0
        self.request_retry_count = 3

        self.metadata_store = DataStore(
        )  # map of player_number,slot,track_id: metadata
        self.artwork_store = DataStore(
        )  # map of player_number,slot,artwork_id: artwork_data
        self.waveform_store = DataStore(
        )  # map of player_number,slot,track_id: waveform_data
        self.preview_waveform_store = DataStore(
        )  # map of player_number,slot,track_id: preview_waveform_data
        self.beatgrid_store = DataStore(
        )  # map of player_number,slot,track_id: beatgrid_data

    def start(self):
        self.keep_running = True
        super().start()

    def stop(self):
        self.keep_running = False
        self.pdb.stop()
        self.metadata_store.stop()
        self.artwork_store.stop()
        self.waveform_store.stop()
        self.preview_waveform_store.stop()
        self.beatgrid_store.stop()
        self.join()

    def cleanup_stores_from_changed_media(self, player_number, slot):
        self.metadata_store.removeByPlayerSlot(player_number, slot)
        self.artwork_store.removeByPlayerSlot(player_number, slot)
        self.waveform_store.removeByPlayerSlot(player_number, slot)
        self.preview_waveform_store.removeByPlayerSlot(player_number, slot)
        self.beatgrid_store.removeByPlayerSlot(player_number, slot)
        self.pdb.cleanup_stores_from_changed_media(player_number, slot)

    # called from outside, enqueues request
    def get_metadata(self, player_number, slot, track_id, callback=None):
        self._enqueue_request("metadata", self.metadata_store,
                              (player_number, slot, track_id), callback)

    def get_root_menu(self, player_number, slot, callback=None):
        self._enqueue_request("root_menu", None, (player_number, slot),
                              callback)

    def get_titles(self,
                   player_number,
                   slot,
                   sort_mode="default",
                   callback=None):
        self._enqueue_request("title", None, (player_number, slot, sort_mode),
                              callback)

    def get_titles_by_album(self,
                            player_number,
                            slot,
                            album_id,
                            sort_mode="default",
                            callback=None):
        self._enqueue_request("title_by_album", None,
                              (player_number, slot, sort_mode, [album_id]),
                              callback)

    def get_titles_by_artist_album(self,
                                   player_number,
                                   slot,
                                   artist_id,
                                   album_id,
                                   sort_mode="default",
                                   callback=None):
        self._enqueue_request(
            "title_by_artist_album", None,
            (player_number, slot, sort_mode, [artist_id, album_id]), callback)

    def get_titles_by_genre_artist_album(self,
                                         player_number,
                                         slot,
                                         genre_id,
                                         artist_id,
                                         album_id,
                                         sort_mode="default",
                                         callback=None):
        self._enqueue_request(
            "title_by_genre_artist_album", None,
            (player_number, slot, sort_mode, [genre_id, artist_id, album_id]),
            callback)

    def get_artists(self, player_number, slot, callback=None):
        self._enqueue_request("artist", None, (player_number, slot), callback)

    def get_artists_by_genre(self,
                             player_number,
                             slot,
                             genre_id,
                             callback=None):
        self._enqueue_request("artist_by_genre", None,
                              (player_number, slot, [genre_id]), callback)

    def get_albums(self, player_number, slot, callback=None):
        self._enqueue_request("album", None, (player_number, slot), callback)

    def get_albums_by_artist(self,
                             player_number,
                             slot,
                             artist_id,
                             callback=None):
        self._enqueue_request("album_by_artist", None,
                              (player_number, slot, [artist_id]), callback)

    def get_albums_by_genre_artist(self,
                                   player_number,
                                   slot,
                                   genre_id,
                                   artist_id,
                                   callback=None):
        self._enqueue_request("album_by_genre_artist", None,
                              (player_number, slot, [genre_id, artist_id]),
                              callback)

    def get_genres(self, player_number, slot, callback=None):
        self._enqueue_request("genre", None, (player_number, slot), callback)

    def get_playlist_folder(self,
                            player_number,
                            slot,
                            folder_id=0,
                            callback=None):
        self._enqueue_request("playlist_folder", None,
                              (player_number, slot, folder_id), callback)

    def get_playlist(self,
                     player_number,
                     slot,
                     playlist_id,
                     sort_mode="default",
                     callback=None):
        self._enqueue_request("playlist", None,
                              (player_number, slot, sort_mode, playlist_id),
                              callback)

    def get_artwork(self, player_number, slot, artwork_id, callback=None):
        self._enqueue_request("artwork", self.artwork_store,
                              (player_number, slot, artwork_id), callback)

    def get_waveform(self, player_number, slot, track_id, callback=None):
        self._enqueue_request("waveform", self.waveform_store,
                              (player_number, slot, track_id), callback)

    def get_preview_waveform(self,
                             player_number,
                             slot,
                             track_id,
                             callback=None):
        self._enqueue_request("preview_waveform", self.preview_waveform_store,
                              (player_number, slot, track_id), callback)

    def get_beatgrid(self, player_number, slot, track_id, callback=None):
        self._enqueue_request("beatgrid", self.beatgrid_store,
                              (player_number, slot, track_id), callback)

    def get_mount_info(self, player_number, slot, track_id, callback=None):
        self._enqueue_request("mount_info", None,
                              (player_number, slot, track_id), callback)

    def get_track_info(self, player_number, slot, track_id, callback=None):
        self._enqueue_request("track_info", None,
                              (player_number, slot, track_id), callback)

    def _enqueue_request(self, request, store, params, callback):
        player_number = params[0]
        if player_number == 0 or player_number > 4:
            logging.warning("DataProvider: invalid %s request parameters",
                            request)
            return
        logging.debug("DataProvider: enqueueing %s request with params %s",
                      request, str(params))
        self.queue.put(
            (request, store, params, callback, self.request_retry_count))

    def _handle_request_from_store(self, store, params):
        if len(params) != 3:
            logging.error(
                "DataProvider: unable to handle request from store with != 3 arguments"
            )
            return None
        if params in store:
            return store[params]
        return None

    def _handle_request_from_pdb(self, request, params):
        return self.pdb.handle_request(request, params)

    def _handle_request_from_dbclient(self, request, params):
        return self.dbc.handle_request(request, params)

    def _handle_request(self, request, store, params, callback):
        #logging.debug("DataProvider: handling %s request params %s", request, str(params))
        reply = None
        answered_by_store = False
        if store is not None:
            logging.debug("DataProvider: trying request %s %s from store",
                          request, str(params))
            reply = self._handle_request_from_store(store, params)
            if reply is not None:
                answered_by_store = True
        if self.pdb_enabled and reply is None:
            try:
                logging.debug("DataProvider: trying request %s %s from pdb",
                              request, str(params))
                reply = self._handle_request_from_pdb(request, params)
            except FatalQueryError as e:  # on a fatal error, continue with dbc
                logging.warning("DataProvider: pdb failed [%s]", str(e))
                if not self.dbc_enabled:
                    raise
        if self.dbc_enabled and reply is None:
            logging.debug("DataProvider: trying request %s %s from dbc",
                          request, str(params))
            reply = self._handle_request_from_dbclient(request, params)

        if reply is None:
            raise FatalQueryError(
                "DataStore: request returned none, see log for details")

        # special call for metadata since it is expected to be part of the client status
        if request == "metadata":
            self.prodj.cl.storeMetadataByLoadedTrack(*params, reply)

        if store is not None and answered_by_store == False:
            store[params] = reply

        # TODO: synchronous mode
        if callback is not None:
            callback(request, *params, reply)

    def _retry_request(self, request):
        self.queue.task_done()
        if request[-1] > 0:
            logging.info("DataProvider: retrying %s request", request[0])
            self.queue.put((*request[:-1], request[-1] - 1))
            time.sleep(
                1
            )  # yes, this is dirty, but effective to work around timing problems on failed request
        else:
            logging.info("DataProvider: %s request failed %d times, giving up",
                         request[0], self.request_retry_count)

    def gc(self):
        self.dbc.gc()

    def run(self):
        logging.debug("DataProvider starting")
        while self.keep_running:
            try:
                request = self.queue.get(timeout=1)
            except Empty:
                self.gc()
                continue
            try:
                self._handle_request(*request[:-1])
                self.queue.task_done()
            except TemporaryQueryError as e:
                logging.error("DataProvider: %s request failed: %s",
                              request[0], e)
                self._retry_request(request)
            except FatalQueryError as e:
                logging.error("DataProvider: %s request failed: %s",
                              request[0], e)
                self.queue.task_done()
        logging.debug("DataProvider shutting down")
Пример #9
0
class TianyanchaSpider(scrapy.Spider):
    name = "tianyancha"
    config = Config()
    allowed_domains = config.getAllowDomains()
    start_urls = config.getStartUrls()
    #start_urls = ['http://snx.tianyancha.com/search/oc26']
    #start_urls = ['https://www.tianyancha.com/company/644630399']
    provice_code = config.getCurrentProviceCode()
    industry_code = config.getDefaultIndustryCode()
    start_page_code = config.getDefaultPageCode()
    page_count = config.getStep()
    dbclient = DBClient()
    redisclient = RadisClient()
    proxyloader = ProxyLoader()
    start_url = ""
    proxy_url = ""

    def __init__(self,
                 prov=None,
                 ind=None,
                 page=None,
                 count=None,
                 *args,
                 **kwargs):
        super(TianyanchaSpider, self).__init__(*args, **kwargs)
        if (prov):
            self.provice_code = prov
        if (ind):
            self.industry_code = ind
        if (page and page != 'p1'):
            self.start_page_code = page
        if (count):
            self.page_count = int(count)
        self.start_url = 'https://' + self.provice_code + '.tianyancha.com/search/' + self.industry_code + '/' + self.start_page_code
        #print("******************************" + start_url)
        self.start_urls = [self.start_url]

    def parse(self, response):
        logger.info('Start to parse industry info: ' + self.industry_code)
        logger.info(response)
        try:
            url = response.meta['url']
        except BaseException, e:
            url = self.start_url
        try:
            current_url = response.url
            page = current_url[current_url.rfind("/") + 1:]
            items = response.xpath(
                "/html/body/div[2]/div[1]/div/div/div[1]/div[3]/div").extract(
                )
            logger.info("***************Items length: " + str(len(items)))
            if len(items) == 0:
                raise Exception()

            for item in items:
                sl = Selector(text=item)
                corp_urls = sl.xpath(
                    "//div[2]/div[1]/div[1]/a/@href").extract()
                for corp_url in corp_urls:
                    corp_url = self.solveHttps(corp_url)
                    logger.info('Corporation URL: ' + corp_url)
                    corp_code = corp_url[corp_url.rfind("/") + 1:]
                    if not self.redisclient.get(corp_code):
                        if self.proxy_url == "":
                            yield Request(url=corp_url,
                                          meta={
                                              'page': page,
                                              'corp_url': corp_url,
                                              'corp_code': corp_code,
                                              'industry_code':
                                              self.industry_code
                                          },
                                          callback=self.parse_corp)
                        else:
                            yield Request(url=corp_url,
                                          meta={
                                              'proxy': self.proxy_url,
                                              'page': page,
                                              'corp_url': corp_url,
                                              'corp_code': corp_code,
                                              'industry_code':
                                              self.industry_code
                                          },
                                          callback=self.parse_corp)

            nextPage = response.xpath(
                "/html/body/div[2]/div[1]/div/div/div[1]/div[4]/ul/li[14]/a/@href"
            ).extract()
            if not nextPage:
                nextPage = response.xpath(
                    "/html/body/div[2]/div[1]/div/div/div[1]/div[4]/ul/li[13]/a/@href"
                ).extract()
            if nextPage and nextPage[0] and self.page_count > 0:
                logger.info('***************Next url: ' + nextPage[0])
                self.page_count = self.page_count - 1
                if self.proxy_url == "":
                    yield Request(url=nextPage[0],
                                  meta={
                                      'url': nextPage[0],
                                      'industry_code': self.industry_code
                                  },
                                  callback=self.parse)
                else:
                    yield Request(url=nextPage[0],
                                  meta={
                                      'proxy': self.proxy_url,
                                      'url': nextPage[0],
                                      'industry_code': self.industry_code
                                  },
                                  callback=self.parse)

        except BaseException, e:
            logger.info('***************Error parsing url: ' + url +
                        ' and reload by using new proxy')
            #重启一个代理
            proxy = self.proxyloader.getProxy()
            if proxy != "":
                self.proxy_url = self.proxyloader.getProtocal() + proxy
                yield Request(url=url,
                              meta={
                                  'url': url,
                                  'proxy': self.proxy_url,
                                  'industry_code': self.industry_code
                              },
                              callback=self.parse,
                              dont_filter=True)
Пример #10
0
class Test_DBClient(unittest.TestCase):
    def setUp(self):
        self.dbc = DBClient('test.db')

        self.mock_ideal_game = {
            'Name': 'sample',
            'RawgID': 20,
            'Metacritic': 100,
            'Genres': 'Action',
            'Indie': False,
            'Presence': 83,
            'Platform': 'Windows',
            'Graphics': '4gb GPU',
            'Storage': '180gb',
            'Memory': '8gb',
            'RatingsBreakdown': '34/45/15',
            'ReleaseDate': 'January 14, 2020',
            'Soundtrack': False,
            'Franchise': None,
            'OriginalCost': '$39.99',
            'DiscountedCost': None,
            'Players': 'singleplayer, multiplayer',
            'Controller': True,
            'Languages': 'English, Mandarin',
            'ESRB': 'Teen',
            'Achievements': 55,
            'Publisher': 'idSoftware',
            'Description': 'lots of stuff',
            'Tags': 'Fun, Violent',
            'SteamURL':
            'https://store.steampowered.com/app/42700/?snr=1_5_9__205',
        }

    def tearDown(self):
        self.dbc.close()
        os.remove('test.db')

    def test_create_table(self):
        self.dbc.create_table()
        self.dbc.cursor.execute(
            "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name")
        res = self.dbc.cursor.fetchall()
        self.assertIn(('games', ), res, 'game not found in tables')

    def test_drop_table(self):
        self.dbc.drop_table()
        self.dbc.cursor.execute(
            "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name")
        res = self.dbc.cursor.fetchall()
        self.assertNotIn(('games', ), res, 'game not found in tables')

    def test_add_game(self):
        self.dbc.create_table()
        self.dbc.add_game(self.mock_ideal_game)
        self.dbc.cursor.execute("SELECT * FROM games")
        res = self.dbc.cursor.fetchone()
        self.assertIn(self.mock_ideal_game['SteamURL'], res,
                      'game with url not in table')

    def test_get_game(self):
        idx = 1
        self.dbc.create_table()
        self.dbc.add_game(self.mock_ideal_game)
        self.dbc.cursor.execute("SELECT id FROM games")
        res = [features[0] for features in self.dbc.cursor.fetchall()]
        self.assertIn(idx, res, 'game with idx no in table')
        game = self.dbc.get_game(idx)
        self.assertIs(game[0], idx, 'game idx does not match')

    def test_get_game_by_url(self):
        url = self.mock_ideal_game['SteamURL']
        self.dbc.create_table()
        self.dbc.add_game(self.mock_ideal_game)
        self.dbc.cursor.execute("SELECT SteamURL FROM games;")
        res = [features[0] for features in self.dbc.cursor.fetchall()]
        self.assertIn(url, res, 'game with url not in table')
        game = self.dbc.get_game_by_url(url)
        self.assertIn(url, game, 'returned game url does not match')

    def test_get_all_games(self):
        self.dbc.create_table()
        num_games = 5
        for idx in range(num_games):
            self.dbc.add_game(self.mock_ideal_game)

        games = self.dbc.get_all_games()
        self.assertIs(num_games, len(games),
                      'returned game length should equal num_games')

    def test_to_csv(self):
        self.dbc.create_table()
        num_games = 5
        for idx in range(num_games):
            self.dbc.add_game(self.mock_ideal_game)

        self.dbc.to_csv('test.csv')
        test_csv_df = pd.read_csv('test.csv')
        self.assertIs(num_games, test_csv_df.shape[0],
                      'test csv should have num_games rows')
        os.remove('test.csv')

    def test_delete_game(self):
        self.dbc.create_table()
        delete_idx = 2
        num_games = 5
        for idx in range(num_games):
            self.dbc.add_game(self.mock_ideal_game)

        self.dbc.cursor.execute("SELECT id FROM games;")
        game_ids = [result[0] for result in self.dbc.cursor.fetchall()]
        self.assertIn(delete_idx, game_ids, 'no game present with delete_idx')

        self.dbc.delete_game(delete_idx)
        self.dbc.cursor.execute("SELECT id FROM games;")
        game_ids = [result[0] for result in self.dbc.cursor.fetchall()]
        self.assertNotIn(delete_idx, game_ids,
                         'game with delete_idx still present')
Пример #11
0
class TianyanchaFixSpider(scrapy.Spider):

    name = "tianyancha_fix"
    allowed_domains = ["www.tianyancha.com"]
    start_urls = ['https://www.tianyancha.com/']
    dbclient = DBClient()
    redisclient = RadisClient()
    proxyloader = ProxyLoader()
    config = Config()
    count = 0
    proxy_url = ""
    interval = 25
    records_to_fix = []
    cur_index = 0
    step = 25

    def __init__(self, step=None, *args, **kwargs):
        super(TianyanchaFixSpider, self).__init__(*args, **kwargs)
        if (step):
            self.step = int(step)
        logger.info("Start to fix data")
        query = "select * from ISSUE_URL t where t.provice_code = '" + self.config.getCurrentProviceCode(
        ) + "' and t.status = '0'"
        self.records_to_fix = self.dbclient.queryDB(query)
        logger.info("************************Total data number: " +
                    str(len(self.records_to_fix)))

    def parse(self, response):
        while self.cur_index < self.step:
            record = self.records_to_fix[self.cur_index]
            uuid = record[0]
            url = record[1]
            if self.count == 0:
                self.proxy_url = self.proxyloader.getProtocal(
                ) + self.proxyloader.getProxy()
                self.count = self.interval
                if self.cur_index != 0:
                    yield Request(url=self.start_urls[0], callback=self.parse)
            corp_code = url[url.rfind("/") + 1:]
            self.cur_index = self.cur_index + 1
            if not self.redisclient.get(corp_code):
                provice_code = record[2]
                industry_code = record[3]
                logger.info("URL: " + url + " to be solved")
                self.count = self.count - 1
                yield Request(url=url,
                              meta={
                                  'proxy': self.proxy_url,
                                  'provice_code': provice_code,
                                  'industry_code': industry_code,
                                  'corp_code': corp_code,
                                  'uuid': uuid
                              },
                              callback=self.parse_corp)
            else:
                logger.info('****************Corp: ' + corp_code +
                            ' has been solved')
                updateSQL = "update ISSUE_URL t set t.status = '2' where t.id = '" + uuid + "'"
                self.dbclient.updateDB(updateSQL)

    def parse_corp(self, response):
        try:
            page = ""
            corp_code = response.meta['corp_code']
            provice_code = response.meta['provice_code']
            industry_code = response.meta['industry_code']
            uuid = response.meta['uuid']
            logger.info(
                '****************Handle corperation info for industry: ' +
                industry_code + ' and page: ' + page)
            item = EnterpriseItem()
            item['province'] = provice_code
            item['industry_code'] = industry_code
            item['page'] = page
            item['name'] = response.xpath(
                "//*[@id='company_web_top']/div[2]/div[2]/div/span/text()"
            ).extract()[0]
            item['phone'] = response.xpath(
                "//*[@id='company_web_top']/div[2]/div[2]/div/div[2]/div[1]/span[2]/text()"
            ).extract()[0]
            item['address'] = response.xpath(
                "//*[@id='company_web_top']/div[2]/div[2]/div/div[3]/div[2]/span[2]/text()"
            ).extract()[0]
            item['legal_person'] = response.xpath(
                "//*[@id='_container_baseInfo']/div/div[1]/table/tbody/tr/td[1]/div/div[1]/div[2]/div/a/text()"
            ).extract()[0]
            item['credit_code'] = response.xpath(
                "//*[@id='_container_baseInfo']/div/div[2]/table/tbody/tr[2]/td[1]/div/span/text()"
            ).extract()[0]
            item['reg_capital'] = response.xpath(
                "//*[@id='_container_baseInfo']/div/div[1]/table/tbody/tr/td[2]/div[1]/div[2]/div/text()"
            ).extract()[0].strip()
            item['establish_date'] = response.xpath(
                "//*[@id='_container_baseInfo']/div/div[1]/table/tbody/tr/td[2]/div[2]/div[2]/div/text()"
            ).extract()[0].strip()
            item['biz_period_start'] = response.xpath(
                "//*[@id='_container_baseInfo']/div/div[2]/table/tbody/tr[4]/td[2]/div/span/text()"
            ).extract()[0].replace("\n", "").replace(' ', '')
            item['reg_authority'] = response.xpath(
                "//*[@id='_container_baseInfo']/div/div[2]/table/tbody/tr[5]/td[2]/div/span/text()"
            ).extract()[0]
            item['biz_reg_num'] = response.xpath(
                "//*[@id='_container_baseInfo']/div/div[2]/table/tbody/tr[1]/td[1]/div/span/text()"
            ).extract()[0]
            item['org_code'] = response.xpath(
                "//*[@id='_container_baseInfo']/div/div[2]/table/tbody/tr[1]/td[2]/div/span/text()"
            ).extract()[0]
            item['taxpayer_code'] = response.xpath(
                "//*[@id='_container_baseInfo']/div/div[2]/table/tbody/tr[3]/td/div/span/text()"
            ).extract()[0]
            item['industry'] = response.xpath(
                "//*[@id='_container_baseInfo']/div/div[2]/table/tbody/tr[4]/td[1]/div/span/text()"
            ).extract()[0]
            self.redisclient.set(corp_code, corp_code)
            updateSQL = "update ISSUE_URL t set t.status = '1' where t.id = '" + uuid + "'"
            self.dbclient.updateDB(updateSQL)
            yield item
        except BaseException, e:
            logger.exception("Solve corp exception")
            updateSQL = "update ISSUE_URL t set t.status = '-1' where t.id = '" + uuid + "'"
            self.dbclient.updateDB(updateSQL)
Пример #12
0
class SteamCrawl:
    def __init__(self):
        self.session = requests.Session()
        self.base_url = 'https://store.steampowered.com/search'
        self.base_url += '/?category1=998&supportedlang=english'
        self.urls = []
        self.dbc = DBClient('games.db')
        self.dbc.create_table()

    def crawl(self, fetch_urls=False):
        # get list of urls
        if fetch_urls:
            self.__download_urls_page_source()
            self.__parse_urls()
        else:
            self.__parse_urls()
        # loop through list
        for url in tqdm(self.urls):
            if self.__already_downloaded(url):
                return
            # get features for each url
            game = {}
            game.update(self.__get_steam_features(url))
            game.update(self.__get_rawg_features(url))
            # save features in db
            self.dbc.add_game(game)

        self.dbc.to_csv('games.csv')

        return 'finished'

    def __download_urls_page_source(self):
        self.browser = webdriver.Safari()
        self.browser.get(self.base_url)
        self.__short_pause()
        lastHeight = self.browser.execute_script(
            "return document.body.scrollHeight")
        while True:
            self.browser.execute_script(
                "window.scrollTo(0, document.body.scrollHeight);")
            self.__short_pause()
            newHeight = self.browser.execute_script(
                "return document.body.scrollHeight")
            if newHeight == lastHeight:
                break
            lastHeight = newHeight
        self.__save_game_list_source()
        self.browser.close()

    def __parse_urls(self):
        html = self.__load_game_list_source()
        soup = BeautifulSoup(html)
        a_tags = soup.find('div', id='search_results').find_all('a')
        self.urls = [a_tag.get('href') for a_tag in a_tags]

    def __save_game_list_source(self):
        with open("game_list.html", "w") as f:
            f.write(self.browser.page_source)

    def __load_game_list_source(self):
        with open("game_list.html", "r") as f:
            game_list_source = f.read()
        return game_list_source

    def __short_pause(self):
        duration = random.uniform(0, 3)
        time.sleep(duration)

    def __already_downloaded(self, url):
        game = self.dbc.get_game_by_url(url)
        return game

    def __get_steam_features(self, url):
        sgi = SteamGameInfo()
        html = sgi.get_game_html(url)
        if html:
            features = sgi.strip_features(html)
            return features

    def __get_rawg_features(self, url):
        name = url.split('/')[5].replace('_', ' ')
        rawg = RAWG()
        features = rawg.get_game(name)
        return features
Пример #13
0
class MainView(QtWidgets.QWidget):
    def __init__(self, conf, products):
        QtWidgets.QWidget.__init__(self)
        self.setObjectName("MainView")
        self.signals = MainViewSignals()

        self._conf = conf
        self._products = products

        self.serialClient = serial.Serial(self._conf["SERIAL_CLIENT"]["PORT"], self._conf["SERIAL_CLIENT"]["BAUDRATE"], timeout=self._conf["SERIAL_CLIENT"]["TIMEOUT"])
        self.dbClient = DBClient(self._conf["DATABASE_CLIENT"]) 
        self.mailClient = MailClient(self._conf["MAIL_CLIENT"])

        self.balance = self._conf["PRODUCT_TRANSACTIONS"]["CURRENT_BALANCE"]

        self.paymentWidget = PaymentWidget()
        self.successWidget = SuccessWidget()
        self.selectionWidget = SelectionWidget(self._products, self.balance)
        self.selectionWidget.signals.new_purchase.connect(self.new_purchase_event)

        self._init_layout()

    def _init_centralWidget(self):
        self.centralWidgets = QtWidgets.QStackedWidget()
        self.centralWidgets.setObjectName("centralWidgets")
        self.centralWidgets.addWidget(self.selectionWidget)
        self.centralWidgets.addWidget(self.paymentWidget)
        self.centralWidgets.addWidget(self.successWidget)

    def _init_layout(self):
        self.verticalLayout = QtWidgets.QVBoxLayout(self)
        self.verticalLayout.setObjectName("verticalLayout")
        
        self._init_centralWidget()
        self.verticalLayout.addWidget(self.centralWidgets)

        self.balanceLabel = QtWidgets.QLabel()
        sizePolicy = QtWidgets.QSizePolicy(
            QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Maximum)
        sizePolicy.setHorizontalStretch(0)
        sizePolicy.setVerticalStretch(0)
        sizePolicy.setHeightForWidth(
            self.balanceLabel.sizePolicy().hasHeightForWidth())
        self.balanceLabel.setSizePolicy(sizePolicy)
        font = QtGui.QFont()
        font.setPointSize(28)
        self.balanceLabel.setFont(font)
        self.balanceLabel.setAlignment(
            QtCore.Qt.AlignVCenter | QtCore.Qt.AlignHCenter)
        self.balanceLabel.setObjectName("BalanceLabel")
        self.balanceLabel.setText("${:0.2f}".format(self.balance))
        self.verticalLayout.addWidget(self.balanceLabel)

        self.controlButton = QtWidgets.QPushButton()
        self.controlButton.setObjectName("ControlButton")
        self.controlButton.setText("Check Payment")
        controlPolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.MinimumExpanding, QtWidgets.QSizePolicy.Fixed)
        controlPolicy.setHorizontalStretch(0)
        controlPolicy.setVerticalStretch(0)
        self.controlButton.setSizePolicy(controlPolicy)
        self.controlButton.clicked.connect(self.change_window)
        self.verticalLayout.addWidget(self.controlButton)

    def transaction_to_balance(self, new_transaction):
        self.balance += new_transaction["amount"]
        self.balanceLabel.setText("${:0.2f}".format(self.balance))
        self.selectionWidget.update_balance(self.balance)

    def close_serial(self):
        self.serialClient.close()

    # ============================== SLOTS ==============================
    @QtCore.pyqtSlot()
    def change_window(self):
        if self.centralWidgets.currentIndex() == 0:
            print("Trying to connect to mail server...")
            status = self.mailClient.open_mail_connection()
            print(status[1])
            if status[0]:
                self.centralWidgets.setCurrentWidget(self.paymentWidget)
                self.controlButton.setText("Back")
            else:
                exit()

        elif self.centralWidgets.currentIndex() == 1:
            print("Reading last payments...")
            transaction_req = self.mailClient.get_last_transactions()
            if transaction_req[0] and len(transaction_req[1]) > 0:
                for payment in transaction_req[1]:
                    payment["machine_id"] = self._conf["MACHINE_PROFILE"]["MACHINE_ID"]

                    self.dbClient.add_new_payment(payment)
                    self.transaction_to_balance(payment)
            else:
                if transaction_req[0] == 0:
                    print(transaction_req[1])

            print("Closing connection with mail server...")
            self.mailClient.close_mail_connection()

            self.centralWidgets.setCurrentWidget(self.selectionWidget)
            self.controlButton.setText("Check Payment")
        
        else:
            self.successWidget.reset()
            self.centralWidgets.setCurrentWidget(self.selectionWidget)
            self.controlButton.setText("Check Payment")

    @QtCore.pyqtSlot(dict)
    def new_purchase_event(self, transaction_info):
        transaction_info["machine_id"] = self._conf["MACHINE_PROFILE"]["MACHINE_ID"]
        self.dbClient.add_new_transaction(transaction_info)

        self.balance -= transaction_info['price']
        self.balanceLabel.setText("${:0.2f}".format(self.balance))
        self.selectionWidget.update_balance(self.balance)

        self.centralWidgets.setCurrentWidget(self.successWidget)
        self.controlButton.setText("Back")
        self.successWidget.start()

        serial_msg = "{}\n".format(transaction_info['serial'])
        self.serialClient.write(serial_msg.encode('utf-8'))
        self.serialClient.flush()
Пример #14
0
# -*- coding:utf-8 -*-
"""
======================
@author Vincent
@config file
config for database
======================
"""
import os
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from dbclient import DBClient

DBClients = DBClient()
ServersPort = 8005
Base = declarative_base()
EngineRead = create_engine(
    'mysql://%s:%s@%s:%d/%s?charset=utf8' %
    (DBClients.db.MySQLREAD.User, DBClients.db.MySQLREAD.Passwd,
     DBClients.db.MySQLREAD.Host, DBClients.db.MySQLREAD.Port,
     DBClients.db.MySQLREAD.Dbname),
    encoding='utf8',
    echo=False,
    pool_size=100,
    pool_recycle=3600)