Exemplo n.º 1
0
 def stock_list(self):
     STOCK_LIST = setting.currentStockList()
     return STOCK_LIST
Exemplo n.º 2
0
class Spider(scrapy.Spider):
    name = 'stock-cwsj'
    startYear = 2017
    baseURL = 'http://emweb.securities.eastmoney.com/NewFinanceAnalysis/MainTargetAjax?'

    MONGODB_ID = const.MONGODB_ID
    ID_NAME = const.CWSJ_KEYWORD.ID_NAME
    DB_NAME = const.CWSJ_KEYWORD.DB_NAME
    COLLECTION_HEAD = const.CWSJ_KEYWORD.COLLECTION_HEAD
    KEY_NAME = const.CWSJ_KEYWORD.KEY_NAME
    NEED_TO_NUMBER = const.CWSJ_KEYWORD.NEED_TO_NUMBER
    DATA_SUB = const.CWSJ_KEYWORD.DATA_SUB
    # STOCK_LIST = const.STOCK_LIST
    # STOCK_LIST = {'600028'}
    # STOCK_LIST = {'000725'}
    STOCK_LIST = setting.currentStockList()
    # KEY = 'var XbnsgnRv'

    allowed_domains = [
        'www.eastmoney.com',
        'emweb.securities.eastmoney.com',
    ]
    start_urls = ['http://www.eastmoney.com']

    headers = {
        'Host': 'emweb.securities.eastmoney.com',
        'Referer':
        'http://emweb.securities.eastmoney.com/NewFinanceAnalysis/Index?type=web&code=SZ000725',
        'User-Agent':
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
        'X-Requested-With': 'XMLHttpRequest',
    }

    xpath = {}

    received = set()

    def genParams(self, code):
        def addHead(code):
            if code.startswith('6'):
                return 'SH' + code
            elif code.startswith('0') or code.startswith('3'):
                return 'SZ' + code

        code = addHead(code)
        params = {
            'ctype': 4,
            'type': 0,
            'code': code,  # = SZ000725
        }
        return params

    def genStockList(self):
        out = []
        for code in self.STOCK_LIST:
            url = encode_params(self.genParams(code))
            url = self.baseURL + url
            out.append((url, code))

        return out

    def parsePage(self, json):
        try:
            tmp = []
            for item in json:
                one_stock = util.utils.dealwithData(
                    item,
                    util.utils.threeOP(self.DATA_SUB, self.NEED_TO_NUMBER,
                                       self.KEY_NAME))
                one_stock[self.MONGODB_ID] = item.get(self.ID_NAME)
                series = pd.Series(one_stock)
                tmp.append(series)

            df = pd.DataFrame(tmp)
            return df
        except Exception as e:
            logging.warning("parsePage Exception %s" % (str(e)))

    def saveDB(self, data: pd.DataFrame, date):

        re = util.saveMongoDB(data, util.genEmptyFunc(), self.DB_NAME,
                              self.COLLECTION_HEAD + date, None)
        util.everydayChange(re, 'cwsj')

    def parse(self, response):
        self.received.add(response.url)

        if 'step' not in response.meta:
            code = self.genStockList()
            # realOut = set(quarter) - self.received
            for one in code:
                yield Request(one[0], meta={'step': 1, 'code': one[1]})

        if 'step' in response.meta:
            json_data = {}
            if response.meta['step'] >= 1:

                content = response.body
                try:
                    data = content.decode('utf-8')
                    json_data = json.loads(data)
                except Exception as e:
                    logging.warning("parse json Exception %s" % (str(e)))

                # if response.meta['step'] == 1:
                # nextPage = self.nextPage(json_data, response.meta)
                # # realOut = set(nextPage) - self.received
                # for one in nextPage:
                #   yield Request(one, meta={'step': 2, 'quarter': response.meta['quarter']})

                df = self.parsePage(json_data)
                if len(df):
                    self.saveDB(df, response.meta['code'])
Exemplo n.º 3
0
    import sys

    sys.path.append('D:\stock_python\stock/new_stock')
##########################
import util
import util.utils
import const
import const.TS as TS
import setting
from fake_spider import spider

MONGODB_ID = const.MONGODB_ID
DB_NAME = TS.BASICS.DB_NAME
COLLECTION_NAME = TS.BASICS.COLLECTION_NAME

STOCK_LIST = setting.currentStockList()

jsonCallBack = 'jsonpCallback24417'


class Handler(spider.FakeSpider):
    crawl_config = {}

    def on_start(self):
        out = self.url()
        for one in out:
            save = {'key': one[1]}
            if one[1].startswith('6'):
                self.crawl(one[0],
                           headers=self.shHeader(one[1]),
                           callback=self.processFirstPage,
Exemplo n.º 4
0
 def stock_list(self):
   STOCK_LIST = setting.currentStockList()
   # STOCK_LIST = ['600000', '601318']
   return STOCK_LIST