Exemplo n.º 1
0
    def start(self):
        self.initialization("info_sizechange", "jj_info_sizechange")
        source = self._webdriver.get(self.url)
        trs = source.find('div', id="gmbdtable").find("tbody").find_all('tr')
        amount = 0
        for tr in trs:
            tds = tr.find_all('td')
            if len(tds) == 6:
                amount += 1
                values = [self.code]

                date = tds[0].text
                for td in tds:
                    values.append(td.text)
                sel = ("code", self.code, "date", date)
                self.storage(values, sel)
            else:
                pass
        printf("jj_info_sizechange storage ,CODE:%s,AMOUNT:%s" %
               (self.code, amount))
Exemplo n.º 2
0
 def start(self):
     self.initialization("info_holder_struct", "jj_info_holder_struct",
                         PROJECT)
     source = self._webdriver.get(self.url)
     trs = source.find('div', id="cyrjgtable").find("tbody").find_all('tr')
     amount = 0
     for tr in trs:
         tds = tr.find_all('td')
         if len(tds) == 5:
             amount += 1
             values = [self.code]
             gonggao_date = tds[0].text
             for td in tds:
                 values.append(td.text)
             sel = ("code", self.code, "gonggao_date", gonggao_date)
             self.storage(values, sel)
         else:
             pass
     printf("jj_info_holder_struct storage ,CODE:%s,AMOUNT:%s" %
            (self.code, amount))
Exemplo n.º 3
0
 def start(self):
     self.initialization("info_property_pz", "jj_info_property_pz")
     ###
     self.url = "http://fund.eastmoney.com/f10/zcpz_002423.html"
     ###
     source = self._webdriver.get(self.url)
     trs = source.find("table",
                       class_="w782 comm tzxq").find("tbody").find_all('tr')
     amount = 0
     for tr in trs:
         amount += 1
         values = [self.code]
         tds = tr.find_all('td')
         report_date = tds[0].text
         for td in tds:
             values.append(td.text)
         sel = ("code", self.code, "report_date", report_date)
         self.storage(values, sel)
     printf("jj_info_property_pz storage ,CODE:%s,AMOUNT:%s" %
            (self.code, amount))
Exemplo n.º 4
0
 def start(self):
     self.initialization("info_fh", "jj_info_fh_peisong", PROJECT)
     source = self._webdriver.get(self.url)
     tags = source.find(
         'table', class_='w782 comm cfxq').find('tbody').find_all('tr')
     amount = 0
     for tag in tags:
         values = [self.code]
         infos = tag.find_all('td')
         day = infos[1].text
         values.append(infos[0].text)
         values.append(day)
         values.append(infos[2].text)
         values.append(get_digit(infos[3].text))
         values.append(infos[4].text)
         sel = ('code', self.code, 'quanyidengji_date', day)
         self.storage(values, sel)
         amount += 1
     printf("jj_info_fh_peisong storage ,CODE:%s,AMOUNT:%s" %
            (self.code, amount))
Exemplo n.º 5
0
    def start(self):
        self.initialization('info_history', 'jj_history_income')
        self._webdriver.get(self.url)
        # send_box = self._webdriver.find_element_by_xpath("//div[@id='pagebar']//input[1]")

        # botton = self._webdriver.find_element_by_xpath("//div[@id='pagebar']//input[@type='button']")
        next_botton = "//div[@id='pagebar']//label[last()]"
        current_xpath = "//div[@id='pagebar']//label[@class='cur']"
        pages_num = self._webdriver.find_element_by_xpath(
            "//div[@id='pagebar']//label[last()-1]").text
        current_page = self._webdriver.find_element_by_xpath(
            current_xpath).text
        amount = 0
        while int(current_page) < int(pages_num):
            current_page = self._webdriver.find_element_by_xpath(
                current_xpath).text
            time.sleep(3)
            tags = BeautifulSoup(
                self._webdriver._brower.page_source, 'lxml').find(
                    'table',
                    class_="w782 comm lsjz").find('tbody').find_all('tr')
            for tag in tags:
                tars = tag.find_all('td')
                values = []
                values.append(self.code)
                values.append(tars[0].text)
                values.append(tars[1].text)
                values.append(tars[2].text)
                values.append(tars[3].text)
                values.append(tars[4].text)
                values.append(tars[5].text)
                values.append(tars[6].text)
                self._mysqlclient.storage(
                    values, ('code', self.code, 'jz_date', tars[0].text))
                amount += 1
            botton = self._webdriver.find_element_by_xpath(next_botton)
            time.sleep(3)
            botton.click()
            time.sleep(3)
        printf('jj_history_income storage CODE:%s,AMOUNT:%s' %
               (self.code, amount))
Exemplo n.º 6
0
 def start(self):
     self.initialization("info_level", "jj_info_level")
     source = self._webdriver.get(self.url)
     tags = source.find('table',
                        id='fundgradetable').find('tbody').find_all('tr')
     amount = 0
     for tag in tags:
         values = [self.code]
         infos = tag.find_all('td')
         if len(infos) == 5:
             day = infos[0].text
             values.append(day)
             values.append(infos[1].text.count("★"))
             values.append(infos[2].text.count("★"))
             values.append(infos[3].text.count("★"))
             values.append(infos[4].text.count("★"))
             sel = ('code', self.code, 'pj_date', day)
             self.storage(values, sel)
             amount += 1
         else:
             printf("jj_info_level", "Struction Error!")
     printf("jj_info_level storage,CODE:%s,AMOUNT:%s" % (self.code, amount))
Exemplo n.º 7
0
 def jj_info_company(self, source):
     # jj_info_company storage
     box1 = source.find('div', "first-block").find('tbody').find_all('tr')
     company = box1[0].find('td', class_="category-value").text
     value1 = []
     value1.append(self.code)
     value1.append(company)
     value1.append(box1[1].find('td', class_="category-value").text)
     value1.append(box1[2].find('td', class_="category-value").text)
     value1.append(box1[3].find('td', class_="category-value").text)
     value1.append(box1[4].find(
         'td', class_="category-value attached-value fixed-width").text)
     value1.append(box1[5].find('td', class_="category-value").text)
     value1.append(box1[6].find('td', class_="category-value").text)
     value1.append(box1[7].find('td', class_="category-value").text)
     value1.append(box1[8].find('td', class_="category-value").text)
     value1.append(box1[9].find('td',
                                class_="category-value fixed-width").text)
     value1.append(box1[10].find('td', class_="category-value").text)
     value1.append(box1[11].find('td',
                                 class_="category-value").text.replace(
                                     ' ', '').replace('\n', ''))
     value1.append(box1[12].find(
         'td', class_="category-value fixed-width").text.replace(
             ' ', '').replace('\n', ''))
     value1.append(box1[3].find(
         'td', class_="category-value attached-value").text)
     value1.append(box1[4].find(
         'td', class_="category-value attached-value").text)
     value1.append(box1[8].find(
         'td', class_="category-value attached-value").text)
     value1.append(box1[9].find(
         'td', class_="category-value attached-value").text)
     value1.append(box1[12].find(
         'td', class_="category-value attached-value").text)
     sel1 = ('code', self.code, 'name', company)
     self.storage(self._mysqlclient[0], value1, sel1)
     printf("jj_info_company storage CODE:%s" % self.code)
     return company
Exemplo n.º 8
0
 def init(self):
     printf("Main Spider Initization!")
     self._webdriver = Webdriver()
     self._redisclient = RedisClient("info_basic_info")
     self._redisclient_code = RedisClient("info_basic_info" + "_code")
     self._mysqlclient = Mysql_Client("jj_basic_list")
     # self.initialization('Spider_basic_list', 'jj_basic_list')
     # Redis Server initialization
     self._redisclient.init()
     self._redisclient_code.init()
     init_list = [
         'info_manager', 'info_company', 'info_level', 'info_history',
         'info_fh', 'info_holds', 'info_bets_holds', 'info_holds_trend',
         'info_trade_pz', 'info_trade_compare', 'info_property_pz',
         'info_changes', 'info_sizechange', 'info_holder_struct',
         'info_all_gonggao', 'info_finance_target', 'info_property_bets',
         'info_profit', 'info_income', 'info_cost', 'info_purchase_info',
         'info_swich_info'
     ]
     for client in init_list:
         RedisClient(client).init()
         RedisClient(client + "_code").init()
Exemplo n.º 9
0
 def start(self):
     self.initialization("info_holds_trend", "jj_info_holds_trend")
     self._webdriver.get(self.url)
     next_botton = "//div[@id='pagebar']//label[last()]"
     current_xpath = "//div[@id='pagebar']//label[@class='cur']"
     pages_num = self._webdriver.find_element_by_xpath(
         "//div[@id='pagebar']//label[last()-1]").text
     current_page = self._webdriver.find_element_by_xpath(
         current_xpath).text
     amount = 0
     while int(current_page) < int(pages_num):
         amount += 1
         current_page = self._webdriver.find_element_by_xpath(
             current_xpath).text
         time.sleep(3)
         # tags = .find('div', id="wyhBody").find(
         #     'tbody').find_all('tr')
         botton = self._webdriver.find_element_by_xpath(next_botton)
         time.sleep(3)
         botton.click()
         time.sleep(3)
     printf("jj_info_bets_holds storage,CODE:%s,AMOUNT:%s" %
            (self.code, amount))
Exemplo n.º 10
0
    def jj_info_company_admin(self, company):
        # jj_info_company_admin storage

        next_botton = "//div[@id='gcglPager']//ul//li//a[@class='next ttjj-iconfont']"
        current_xpath = "//div[@id='gcglPager']//li[@class=' active']"
        pages_num = self._webdriver.find_element_by_xpath(
            "//div[@id='gcglPager']//li[last()-1]//a").text
        current_page = self._webdriver.find_element_by_xpath(
            current_xpath).text
        amount = 0
        while int(current_page) < int(pages_num):
            current_page = self._webdriver.find_element_by_xpath(
                current_xpath).text
            time.sleep(3)
            tags = BeautifulSoup(
                self._webdriver._brower.page_source,
                'lxml').find('div', id="gcglBody").find('tbody').find_all('tr')
            for index in range(int(len(tags[1:]) / 2)):
                values = []
                values.append(company)
                info = tags[2 * index + 1].find_all('td')
                name = info[0].text
                values.append(name)
                values.append(info[1].text)
                values.append(info[2].text)
                values.append(info[3].text)
                values.append(tags[2 * index + 2].text.replace(' ',
                                                               '').replace(
                                                                   '\n', ''))
                sel = ('company', company, 'name', name)
                self.storage(self._mysqlclient[2], values, sel)
                amount += 1
            botton = self._webdriver.find_element_by_xpath(next_botton)
            time.sleep(3)
            botton.click()
            time.sleep(3)
        printf("jj_info_company_honor,CODE:%s,AMOUNT:%s" % (self.code, amount))
Exemplo n.º 11
0
 def jj_info_manager_changes(self, source):
     box1 = source.find('div', class_='box').find('table').find_all('tr')
     amount = 0
     for tr in box1[1:]:
         value1 = []
         value1.append(self.code)
         value1.append(tr.find_all('td')[0].text)
         value1.append(tr.find_all('td')[1].text)
         value1.append(tr.find_all('td')[2].text)
         value1.append(tr.find_all('td')[3].text)
         value1.append(tr.find_all('td')[4].text)
         sel1 = ('code', self.code, 'start_date', tr.find_all('td')[0].text,
                 'manager', tr.find_all('td')[2].text)
         self.storage(self._mysqlclient[0], value1, sel1)
         amount += 1
     printf("jj_info_manager_changes storage,CODE:%s,AMOUNT:%s" %
            (self.code, amount))
     # jj_info_manager_history storage
     box3 = source.find_all(
         'table', class_='w782 comm jloff')[-1].find('tbody').find_all('tr')
     value3s = []
     sel3s = []
     for tr in box3:
         value3 = []
         jjcode = tr.find_all('td')[0].text
         value3.append(jjcode)
         value3.append(tr.find_all('td')[1].text)
         value3.append(tr.find_all('td')[2].text)
         value3.append(tr.find_all('td')[3].text)
         value3.append(tr.find_all('td')[4].text)
         value3.append(tr.find_all('td')[5].text)
         value3.append(tr.find_all('td')[6].text)
         value3.append(tr.find_all('td')[7].text)
         value3.append(tr.find_all('td')[8].text)
         value3s.append(value3)
         sel3s.append(['code', jjcode, 'manager'])
     return value3s, sel3s
Exemplo n.º 12
0
 def start(self):
     self.initialization("info_all_gonggao", "jj_info_all_gonggao")
     self._webdriver.get(self.url)
     js = "window.open('%s');"
     # start
     next_botton = "//div[@id='pagebar']//label[last()]"
     current_xpath = "//div[@id='pagebar']//label[@value='%s']"
     pages_num = self._webdriver.find_element_by_xpath(
         "//div[@id='pagebar']//label[last()-1]").text
     current_page = 0
     amount = 0
     while int(current_page) < int(pages_num):
         current_page += 1
         try:
             self._webdriver.find_element_by_xpath(current_xpath %
                                                   current_page).click()
         except Exception as e:
             logging_except(e)
             self._webdriver.find_element_by_xpath(next_botton).click()
         time.sleep(3)
         table = BeautifulSoup(
             self._webdriver._brower.page_source,
             'lxml').find('div', id='ggtable').find('tbody').find_all('tr')
         for tr in table:
             tds = tr.find_all('td')
             if len(tds) == 3:
                 amount += 1
                 values = [self.code]
                 title = tds[0].text.replace(' ', '').replace('\n', '')
                 report_type = tds[1].text.replace(' ',
                                                   '').replace('\n', '')
                 date = tds[-1].text.replace(' ', '').replace('\n', '')
                 href = tds[0].find('a')['href']
                 for td in [title, report_type, date]:
                     values.append(td)
                 self._webdriver._brower.execute_script(js % href)
                 self._webdriver._brower.switch_to_window(
                     self._webdriver._brower.window_handles[1])
                 time.sleep(3)
                 while not BeautifulSoup(
                         self._webdriver._brower.page_source, "lxml").find(
                             'pre', id='jjggzwcontentbody'):
                     time.sleep(3)
                 values.append(
                     special_repace(
                         BeautifulSoup(self._webdriver._brower.page_source,
                                       "lxml").find(
                                           'pre',
                                           id='jjggzwcontentbody').text))
                 time.sleep(3)
                 self._webdriver._brower.close()
                 self._webdriver._brower.switch_to_window(
                     self._webdriver._brower.window_handles[0])
                 sel = ("code", self.code, "title", title, 'date', date,
                        'type', report_type)
                 self.storage(values, sel)
                 amount += 1
             else:
                 pass
         # botton = self._webdriver.find_element_by_xpath(next_botton)
         # time.sleep(3)
         # botton.click()
         # time.sleep(3)
     # end
     printf("jj_info_all_gonggao storage ,CODE:%s,AMOUNT:%s" %
            (self.code, amount))
Exemplo n.º 13
0
def db_initization(spider_name):
    # check if db exesists
    with pymysql.connect(**MYSQL_TMP) as cursor:
        cursor.execute("SHOW DATABASES;")
        DBs = [i[0] for i in cursor.fetchall()]
        if spider_name not in DBs:
            cursor.execute("CREATE DATABASE %s CHARSET='utf8';" % spider_name)
            printf("Create database:%s"%spider_name)
    # checking done

    MYSQL = MYSQL_conf(spider_name)
    with pymysql.connect(**MYSQL) as cursor:
        cursor.execute("SHOW TABLES;")
        Tables = cursor.fetchall()
        for table in Tables:
            cursor.execute("DROP TABLE %s;" % table)
            printf("DROP TABLE %s;" % table)
        printf("ALL TABLES CLEAR!")

    db_config = xlrd.open_workbook("db_struc/%s.xlsx" % spider_name)

    for sheet_name in db_config.sheet_names():
        if sheet_name == 'target_urls':
            # 建立主表
            sheet = db_config.sheet_by_name(sheet_name)

            # 新建表
            nrows = sheet.nrows
            # ncols = sheet.ncols
            headers = [i.value for i in sheet.row(0)]
            SQL = "CREATE TABLE %s (`id` INT PRIMARY KEY  AUTO_INCREMENT" % sheet_name
            for head in headers:
                SQL = SQL + ",`" + head + "` VARCHAR(255) "
            SQL += ");"
            with pymysql.connect(**MYSQL) as cursor:
                cursor.execute(SQL)
                printf("CREATE TABLE %s" % sheet_name)
            # 新建表

            INSERT_SQL = "INSERT INTO %s (" % sheet_name + ','.join(["`%s`"] * len(headers)) % tuple(
                headers) + ") VALUES("

            for i in range(1, nrows):
                # 插入数据

                values = [i.value for i in sheet.row(i)]
                SQL = INSERT_SQL + ','.join(["'%s'"] * len(values)) % tuple(values) + ");"
                with pymysql.connect(**MYSQL) as cursor:
                    cursor.execute(SQL)
        else:
            # 建立从表
            sheet = db_config.sheet_by_name(sheet_name)
            # 新建表
            nrows = sheet.nrows
            ncols = sheet.ncols
            comments = [i.value for i in sheet.col(0)[1:]]
            headers = [i.value for i in sheet.col(1)[1:]]
            SQL = "CREATE TABLE %s (`id` INT PRIMARY KEY  AUTO_INCREMENT" % sheet_name
            for i in range(nrows - 1):
                SQL = SQL + ",`" + headers[i] + "` VARCHAR(255) COMMENT '" + comments[i] + "'"
            SQL += ");"
            with pymysql.connect(**MYSQL) as cursor:
                cursor.execute(SQL)
                printf("CREATE TABLE %s" % sheet_name)
    ext = ext_SQL.get(spider_name)
    if ext:
        with pymysql.connect(**MYSQL) as cursor:
            cursor.execute(ext)
    printf('Database intiazation completed!')