Пример #1
0
    def __getSaveOneSymbol(self, symbol):
        ''' get and save data for one symbol '''
        try:
            lastExcp = None
            with self.readLock:  #dam is not thread safe
                failCount = 0
                #try several times since it may fail
                while failCount < MAX_TRY:
                    try:
                        self.googleDAM.setSymbol(symbol)
                        quotes = self.googleDAM.readQuotes(
                            self.start, self.end)

                    except BaseException as excp:
                        failCount += 1
                        lastExcp = excp
                    else:
                        break

                if failCount >= MAX_TRY:
                    raise BaseException("Can't retrieve historical data %s" %
                                        lastExcp)

            with self.writeLock:  #dam is not thread safe
                self.outputDAM.setSymbol(symbol)
                self.outputDAM.writeQuotes(quotes)

        except BaseException as excp:
            LOG.info("Error while processing %s: %s" % (symbol, excp))
            self.failed.append(symbol)
        else:
            LOG.info("Processed %s" % symbol)
            self.succeeded.append(symbol)
Пример #2
0
    def __getSaveOneSymbol(self, symbol):
        ''' get and save data for one symbol '''
        try:
            lastExcp = None
            with self.readLock: #dam is not thread safe
                failCount = 0
                #try several times since it may fail
                while failCount < MAX_TRY:
                    try:
                        self.googleDAM.setSymbol(symbol)
                        quotes = self.googleDAM.readQuotes(self.start, self.end)

                    except BaseException as excp:
                        failCount += 1
                        lastExcp = excp
                    else:
                        break

                if failCount >= MAX_TRY:
                    raise BaseException("Can't retrieve historical data %s" % lastExcp)

            with self.writeLock: #dam is not thread safe
                self.outputDAM.setSymbol(symbol)
                self.outputDAM.writeQuotes(quotes)

        except BaseException as excp:
            LOG.info("Error while processing %s: %s" % (symbol, excp))
            self.failed.append(symbol)
        else:
            LOG.info("Processed %s" % symbol)
            self.succeeded.append(symbol)
Пример #3
0
 def __getOutputDamSetting(self):
     self.sqlLocation = 'sqlite:///%s' % self.__getOutputSql()
     LOG.info("Sqlite location: %s" % self.sqlLocation)
     return {'db': self.sqlLocation}
Пример #4
0
                                target=self.__getSaveOneSymbol,
                                args=[symbol])
                thread.daemon = True
                thread.start()

                threads.append(thread)

            for thread in threads:
                thread.join(
                    THREAD_TIMEOUT
                )  # no need to block, because thread should complete at last

            #can't start another thread to do commit because for sqlLite, only object for the same thread can be commited
            if 0 == rounds % 3:
                self.outputDAM.commit()

            counter += size
            rounds += 1

            # sleep for 3 second to avoid being blocked by google...
            time.sleep(5)


if __name__ == '__main__':
    crawler = GoogleCrawler(["AAPL", "EBAY", "GOOG"], "20151001")
    crawler.getSaveOneSymbol("GOOG")
    # crawler.getAndSaveSymbols()
    LOG.info("Sqlite location: %s" % crawler.sqlLocation)
    LOG.info("Succeeded: %s" % crawler.succeeded)
    LOG.info("Failed: %s" % crawler.failed)
Пример #5
0
 def __getOutputDamSetting(self):
     self.sqlLocation = 'sqlite:///%s' % self.__getOutputSql()
     LOG.info("Sqlite location: %s" % self.sqlLocation)
     return {'db': self.sqlLocation}
Пример #6
0
            symbols = self.symbols[counter: counter + size]

            threads = []
            for symbol in symbols:
                thread = Thread(name = symbol, target = self.__getSaveOneSymbol, args = [symbol])
                thread.daemon = True
                thread.start()

                threads.append(thread)

            for thread in threads:
                thread.join(THREAD_TIMEOUT) # no need to block, because thread should complete at last

            #can't start another thread to do commit because for sqlLite, only object for the same thread can be commited
            if 0 == rounds % 3:
                self.outputDAM.commit()

            counter += size
            rounds += 1

            # sleep for 3 second to avoid being blocked by google...
            time.sleep(5)

if __name__ == '__main__':
    crawler = GoogleCrawler(["AAPL", "EBAY", "GOOG"], "20151001")
    crawler.getSaveOneSymbol("GOOG")
    # crawler.getAndSaveSymbols()
    LOG.info("Sqlite location: %s" % crawler.sqlLocation)
    LOG.info("Succeeded: %s" % crawler.succeeded)
    LOG.info("Failed: %s" % crawler.failed)