Exemplo n.º 1
0
    def parse_item(self, response):
        rows = response.css('div.detail_cn tr')[1:]
        for r in rows:
            row_str = ''.join(r.css('td ::text').re('\S+'))
            if '产品' in row_str or '净值' in row_str:
                continue
            if 'a5905a55-18b9-4676-8e47-c399e357ef45' in response.url:
                fund_name = response.css('span#msgTitle::text').re_first(
                    '(.*)产品净值')
                date = ''.join(r.css('td:nth-child(1) ::text').re('\S+'))
                nav_a = ''.join(r.css('td:nth-child(2) ::text').re('\S+'))
                add_nav_a = ''.join(r.css('td:nth-child(4) ::text').re('\S+'))
                item = GGFundNavItem()
                item['sitename'] = self.sitename
                item['fund_name'] = fund_name
                item['channel'] = self.channel
                item['url'] = response.url
                item['nav'] = float(nav_a.replace('.', '.')) if nav_a else None
                item['added_nav'] = float(add_nav_a.replace(
                    ' ', '')) if add_nav_a else None
                item['statistic_date'] = datetime.strptime(
                    date, '%Y-%m-%d') if date else None
                yield item

                nav_b = ''.join(r.css('td:nth-child(3) ::text').re('\S+'))
                add_nav_b = ''.join(r.css('td:nth-child(5) ::text').re('\S+'))
                item = GGFundNavItem()
                item['sitename'] = self.sitename
                item['fund_name'] = fund_name + '次级'
                item['channel'] = self.channel
                item['url'] = response.url
                item['nav'] = float(nav_b.replace('.', '.')) if nav_b else None
                item['added_nav'] = float(add_nav_b.replace(
                    ' ', '')) if add_nav_b else None
                item['statistic_date'] = datetime.strptime(
                    date, '%Y-%m-%d') if date else None
                yield item

            else:
                date = ''.join(r.css('td:nth-child(1) ::text').re('\S+'))
                fund_name = ''.join(r.css('td:nth-child(3) ::text').re('\S+'))
                nav = ''.join(r.css('td:nth-child(4) ::text').re('\S+'))
                add_nav = ''.join(r.css('td:nth-child(5) ::text').re('\S+'))
                if '和而泰员工持股计划' == fund_name:
                    fund_code = ''.join(
                        r.css('td:nth-child(2) ::text').re('\S+'))
                    fund_name = fund_name + fund_code

                item = GGFundNavItem()
                item['sitename'] = self.sitename
                item['fund_name'] = fund_name
                item['channel'] = self.channel
                item['url'] = response.url
                item['nav'] = float(nav.replace('.', '.')) if nav else None
                item['added_nav'] = float(add_nav.replace(
                    ' ', '')) if add_nav else None
                item['statistic_date'] = datetime.strptime(
                    date, '%Y-%m-%d') if date else None

                yield item
Exemplo n.º 2
0
    def parse_item(self, response):
        rows = response.xpath('//div[@class="w100"]/table/tbody/tr')
        fund_name = response.meta['ext']['fund_name']
        for row in rows[1:]:
            statistic_date = row.xpath('normalize-space(./td[1]/text())').extract_first()
            statistic_date = datetime.strptime(statistic_date, '%Y-%m-%d')
            nav = row.xpath('normalize-space(./td[2]/text())').extract_first()

            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            item['statistic_date'] = statistic_date
            item['nav'] = float(nav) if nav is not None else None

            yield item
        dates = re.search('categories:\s*\[([^\]]+)\]\s*\},', response.text).group(1)
        dates = re.findall('\d+-\d+-\d+', dates)
        added_navs = re.search("name:\s*'累计净值',\s*data:\s*\[([^\]]+)\]", response.text).group(1)
        added_navs = re.findall('[0-9.]+', added_navs)
        for date, added_nav in zip(dates, added_navs):
            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            statistic_date = date
            item['statistic_date'] = datetime.strptime(statistic_date, '%Y-%m-%d')
            added_nav = added_nav
            item['added_nav'] = float(added_nav) if added_nav is not None else None
            yield item
Exemplo n.º 3
0
    def parse_item(self, response):
        fund_name = response.meta['ext']['fund_name']
        added_nav = response.meta['ext']['added_nav']
        rows = response.xpath('//div[@class="jzgbShow"]/table//tr')
        for row in rows[1:]:
            statistic_date = row.xpath('.//td[1]//text()').extract_first()
            nav = row.xpath('.//td[2]//text()').extract_first()
            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            item['nav'] = float(nav)
            item['added_nav'] = float(added_nav) if added_nav is not None else None
            item['statistic_date'] = datetime.strptime(statistic_date, '%Y%m%d')
            added_nav = None
            yield item

        end_page = re.findall('"last_page":(.*?),', response.text)[0]
        pg = response.meta['pg']
        old_str = '/page/' + str(pg)
        if pg < int(end_page):
            new_str = '/page/' + str(pg + 1)
            next_url = response.url.replace(old_str, new_str)
            self.ips.append({
                'url': next_url,
                'ref': response.url,
                'pg': pg + 1,
                'ext': {'fund_name': fund_name, 'added_nav': added_nav}
            })
    def parse_item(self, response):
        fund_name = response.xpath(
            '//table[@class="product_table ke-zeroborder"]//tr[1]/td[2]/text()').extract_first().strip()
        nav_rows = response.xpath('//div[@class="shop_div"]/table[@class="ke-zeroborder"]//tr')

        for nav_row in nav_rows[1:]:
            nav_td = nav_row.xpath('td//text()').extract()
            if nav_td:
                if nav_td[1].strip() != '日期':
                    if nav_td[1].strip() == '2017/12/01':
                        statistic_date = nav_td[1].strip()
                        nav = 1.0544
                    else:
                        statistic_date = nav_td[1].strip()
                        nav = nav_td[2].strip()

                    item = GGFundNavItem()
                    item['sitename'] = self.sitename
                    item['channel'] = self.channel
                    item['url'] = response.url
                    item['fund_name'] = fund_name
                    item['statistic_date'] = datetime.strptime(statistic_date, '%Y/%m/%d')
                    item['nav'] = float(nav) if nav else None

                    yield item
Exemplo n.º 5
0
    def parse_item(self, response):
        fund_name = response.meta['ext']['fund_name']
        type = response.meta['ext']['type']
        rows = re.search('netList:\[(.*?)\]', response.text).group(1)
        rows = rows.replace('\\', '')
        navs = re.findall('valueStr2:\'([0-9.]+)\'', rows)
        added_navs = re.findall('valueStr1:\'([0-9.]+)\'', rows)
        dates = re.findall('\d+-\d+-\d+', rows)
        for nav, added_nav, date in zip(navs, added_navs, dates):
            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            statistic_date = date
            item['statistic_date'] = datetime.strptime(statistic_date, '%Y-%m-%d')
            if type == '0':
                nav = nav
                item['nav'] = float(nav) if nav is not None else None
                annualized_return = added_nav
                item['added_nav'] = float(annualized_return) if annualized_return is not None else None
            else:
                nav = nav
                item['nav'] = float(nav) if nav is not None else None
                added_nav = added_nav
                item['added_nav'] = float(added_nav) if added_nav is not None else None

            yield item
Exemplo n.º 6
0
    def parse_item(self, response):
        fund_name = response.meta['ext']['fund_name']
        rows = response.xpath('//table/tr')[1:]
        for row in rows:
            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            statistic_date = row.xpath('./td[3]//text()').re_first('\d+-\d+-\d+')
            if statistic_date is None:
                continue
            item['statistic_date'] = datetime.strptime(statistic_date, '%Y-%m-%d')
            if '七日年化收益率' in response.text:
                income_value_per_ten_thousand = row.xpath('./td[1]').re_first('>\s*([0-9.]+)\s*<')
                item['income_value_per_ten_thousand'] = float(income_value_per_ten_thousand)if income_value_per_ten_thousand else None

                d7_annualized_return = row.xpath('./td[2]').re_first('>\s*([0-9.]+)\s*<')
                item['d7_annualized_return'] = float(d7_annualized_return)if d7_annualized_return else None

            else:
                nav = row.xpath('./td[1]').re_first('>\s*([0-9.]+)\s*<')
                item['nav'] = float(nav) if nav is not None else None
                added_nav = row.xpath('./td[2]').re_first('>\s*([0-9.]+)\s*<')
                item['added_nav'] = float(added_nav) if added_nav is not None else None

            yield item
 def parse_item(self, response):
     fund_name = response.meta['ext']['fund_name']
     rows = response.xpath(
         "//div[@class='col-lg-12 col-md-12 col-sm-12']/table[@class='table table-striped']/tbody/tr"
     )
     if '暂无信息' not in response.text:
         for row in rows:
             statistic_date = row.xpath(
                 "./td[1]//text()").extract_first().strip()
             nav = row.xpath("./td[2]//text()").extract_first().strip()
             added_nav = row.xpath(
                 "./td[3]//text()").extract_first().strip()
             item = GGFundNavItem()
             item['sitename'] = self.sitename
             item['channel'] = self.channel
             item['url'] = response.url
             item['fund_name'] = fund_name
             item['nav'] = float(nav)
             item['added_nav'] = float(added_nav)
             item['statistic_date'] = datetime.strptime(
                 statistic_date, '%Y-%m-%d')
             yield item
         pg = response.meta['pg']
         next_pg = pg + 1
         next_url = response.url.replace('type_unit_p_' + str(pg),
                                         'type_unit_p_' + str(next_pg))
         self.ips.append({
             'url': next_url,
             'ref': response.url,
             'pg': next_pg,
             'ext': {
                 'fund_name': fund_name
             }
         })
     yield self.request_next()
Exemplo n.º 8
0
 def parse_item(self, response):
     rows = response.xpath("//div[@class='right']//tr")
     for row in rows[1:]:
         fund_name = row.xpath("./td[1]//text()").extract_first()
         statistic_date = row.xpath("./td[4]//text()").extract_first()
         nav = row.xpath("./td[2]//text()").extract_first().replace(
             '(', '(').split('(')[0]
         added_nav = row.xpath("./td[3]//text()").extract_first().replace(
             '(', '(').split('(')[0]
         item = GGFundNavItem()
         item['sitename'] = self.sitename
         item['channel'] = self.channel
         item['url'] = response.url
         item['fund_name'] = fund_name
         if fund_name == '沣谊一号':
             item['nav'] = float(nav) if nav is not None else None
             item['added_nav_2'] = float(
                 added_nav) if added_nav is not None else None
         else:
             item['nav'] = float(nav) if nav is not None else None
             item['added_nav'] = float(
                 added_nav) if nav is not None else None
         item['statistic_date'] = datetime.strptime(statistic_date,
                                                    '%Y-%m-%d')
         yield item
Exemplo n.º 9
0
    def parse_item(self, response):
        # print(response)
        fundInfo = response.xpath('//div[@id="infoContent"]/table/tbody/tr[2]')
        # print(navInfo)
        for fund in fundInfo:
            try:
                fundName = fund.xpath('td[1]/text()').extract()[0]
                fundNav = fund.xpath('td[2]/text()').extract()[0]
                navDate = fund.xpath('td[3]/text()').extract()[0]

                item = GGFundNavItem()

                item['sitename'] = self.sitename
                item['channel'] = self.channel
                item['url'] = response.url
                item['fund_name'] = fundName.strip('\n').strip('\t')
                init_date = navDate.strip('\n').strip('\t')
                item['statistic_date'] = datetime.strptime(init_date, '%Y-%m-%d')
                nav = fundNav
                item['nav'] = float(nav) if nav is not None else None
                item['added_nav'] = None
                yield item
            except:
                continue
        yield self.request_next()
Exemplo n.º 10
0
    def parse_item(self, response):
        rows = response.css('table.tableStyle.overvieTOP tr')[1:]
        if rows:
            for r in rows:
                row = r.xpath('td//text()').extract()
                date = row[1]
                nav = row[2]
                add_nav = row[3]
                fund_name = response.meta['ext']

                item = GGFundNavItem()
                item['sitename'] = self.sitename
                item['channel'] = self.channel
                item['url'] = response.url
                item['fund_name'] = fund_name
                item['statistic_date'] = datetime.strptime(date, '%Y-%m-%d')
                item['nav'] = float(nav) if nav is not None else None
                item['added_nav'] = float(
                    add_nav) if add_nav is not None else None
                yield item

            next_pg = response.meta['pg'] + 1

            self.ips.append({
                'url': re.sub('\d+$', str(next_pg), response.url),
                'ref': response.url,
                'pg': next_pg,
                'ext': response.meta['ext']
            })
 def parse_item(self, response):
     fund_name = response.meta['ext']['fund_name']
     rows = response.xpath("//tr")
     if len(rows) > 1:
         for row in rows[1:]:
             nav = row.xpath('./td[2]//text()').extract_first().replace(
                 '( ', '(').replace('(', '(')
             if '(' in nav:
                 nav = nav.split('(')[0]
             statistic_date = row.xpath(
                 './td[1]//text()').extract_first().replace(
                     '年', '-').replace('月', '-').replace('日', '')
             item = GGFundNavItem()
             item['sitename'] = self.sitename
             item['channel'] = self.channel
             item['url'] = response.url
             item['fund_name'] = fund_name
             item['nav'] = float(nav) if nav is not None else None
             item['statistic_date'] = datetime.strptime(
                 statistic_date, '%Y-%m-%d')
             yield item
         pg = response.meta['pg']
         next_pg = int(pg) + 1
         url = response.meta['ext']['url']
         next_url = 'http://www.longwininvestment.com' + url + '&page=' + str(
             next_pg)
         self.ips.append({
             'url': next_url,
             'ref': response.url,
             'pg': next_pg,
             'ext': {
                 'fund_name': fund_name,
                 'url': url
             },
         })
Exemplo n.º 12
0
    def parse_item(self, response):
        fund_name = response.meta['ext']['fund_name']
        rows = response.xpath("//div[@class='right_colume']//ul")
        if len(rows) > 1:
            for row in rows[1:]:
                statistic_date = row.xpath("./li[1]//text()").extract_first()
                nav = row.xpath("./li[2]//text()").extract_first()
                added_nav = row.xpath("./li[3]//text()").extract_first()
                item = GGFundNavItem()
                item['sitename'] = self.sitename
                item['channel'] = self.channel
                item['url'] = response.url
                item['fund_name'] = fund_name
                item['nav'] = float(nav)
                item['added_nav'] = float(added_nav)
                item['statistic_date'] = datetime.strptime(
                    statistic_date, '%Y-%m-%d')
                yield item

            pg = response.meta['pg']
            last_two_date = response.meta['ext']['last_one_date']
            last_one_date = statistic_date
            if last_one_date != last_two_date:
                next_pg = pg + 1
                next_url = response.url.replace('page=' + str(pg),
                                                'page=' + str(next_pg))
                self.ips.append({
                    'url': next_url,
                    'ref': response.url,
                    'pg': next_pg,
                    'ext': {
                        'fund_name': fund_name,
                        'last_one_date': last_one_date
                    }
                })
Exemplo n.º 13
0
    def parse_item(self, response):
        fps = response.meta['fps']
        ips = response.meta['ips']

        fund_name = response.xpath(
            '//*[@id="view_text_24_10_txt"]/div/p/span/strong/text()'
        ).extract_first()
        rows = response.xpath(
            '//*[@id="view_text_17_10_txt"]/div/table/tbody/tr')
        rows.pop(0)
        for row in rows:
            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            #//*[@id="view_text_17_10_txt"]/div/table/tbody/tr[2]/td[1]/span[1]
            statistic_date = row.xpath(
                "./td[1]/span[1]/text()").extract_first()
            statistic_date += row.xpath(
                "./td[1]/span[2]/text()").extract_first()
            item['statistic_date'] = datetime.strptime(statistic_date,
                                                       '%Y年%m月%d日')

            item['nav'] = float(
                row.xpath("./td[2]/span[1]/text()").extract_first())
            item['added_nav'] = float(
                row.xpath("./td[3]/span[1]/text()").extract_first())
            yield item

        yield self.request_next(fps, ips)
Exemplo n.º 14
0
    def parse_item(self, response):
        fund_name = response.meta['ext']
        rows = response.css('tr')[2:]
        col_type = response.css('tr th::text').extract()
        if rows:
            for r in rows:
                row = r.css('td::text').extract()
                date = row[0]

                item = GGFundNavItem()
                item['sitename'] = self.sitename
                item['channel'] = self.channel
                item['url'] = response.url
                item['fund_name'] = fund_name
                item['statistic_date'] = datetime.strptime(date, '%Y-%m-%d') if date else None

                if '单位净值' in col_type:
                    item['nav'] = float(row[1]) if row[1] else None
                    item['added_nav'] = float(row[2]) if row[2] else None
                elif '7日年化' in col_type:
                    item['d7_annualized_return'] = float(row[1]) if row[1] else None
                    item['income_value_per_ten_thousand'] = float(row[2]) if row[2] else None

                yield item

            next_pg = response.meta['pg'] + 1
            meta = response.meta
            meta['pg'] = next_pg
            meta['form']['pageIndex'] = str(next_pg)
            self.ips.append(meta)
Exemplo n.º 15
0
    def parse_item(self, response):
        fund_name = '道通润丰一号'
        tab = response.css('table.MsoNormalTable tr')

        year = 2016
        m2 = '0'
        for row in tab[1:]:
            date = ''.join(row.xpath('td//text()').extract()[:-1]).strip()
            m1 = date.split('月')[0]
            # 抓取没有年份,所以通过1月和12月交替的那两条记录的拼接判断是否跨年
            if m1 + m2 == '121':
                year = year - 1
            statistic_date = str(year) + '年' + date
            nav = row.xpath('td//text()').extract()[-1]
            m2 = m1

            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            item['statistic_date'] = datetime.strptime(statistic_date, '%Y年%m月%d日')
            item['nav'] = float(nav)

            yield item

        yield self.request_next()
Exemplo n.º 16
0
    def parse_item(self, response):
        name_match = {'S81154': '泰和汇1期择时策略混合基金', 'SN5055': '泰和汇2期主题精选私募投资基金'}

        rows = response.css('table tr')
        for row in rows[1:]:
            info = row.css('th ::text').re('\S+')
            if info:
                fund_name = name_match[info[0]]
                statistic_date = info[1]
                nav = info[2]
                added_nav = info[3]

                item = GGFundNavItem()
                item['sitename'] = self.sitename
                item['channel'] = self.channel
                item['url'] = response.url
                item['fund_name'] = fund_name
                statistic_date = datetime.strptime(statistic_date, '%Y-%m-%d')
                item['statistic_date'] = statistic_date
                item['nav'] = float(nav) if nav is not None else None
                item['added_nav'] = float(
                    added_nav) if added_nav is not None else None
                yield item

        yield self.request_next()
    def parse_item(self, response):
        fund_name = response.meta['ext']['fund_name']
        rows = response.xpath(
            "//div[@class='bottombox fl']/table[@class='equityTable']//tr")
        end_pg = re.findall('当前为<font color="#FF0000">(.*?)</font>/(\d+)页 ',
                            response.text)[0][1]
        if len(rows) > 1:
            for row in rows[1:]:
                statistic_date = row.xpath("./td[1]//text()").extract_first()
                nav = row.xpath("./td[2]//text()").extract_first()
                added_nav = row.xpath("./td[3]//text()").extract_first()
                item = GGFundNavItem()
                item['sitename'] = self.sitename
                item['channel'] = self.channel
                item['url'] = response.url
                item['fund_name'] = fund_name
                item['nav'] = float(nav)
                item['added_nav'] = float(added_nav)
                item['statistic_date'] = datetime.strptime(
                    statistic_date, '%Y/%m/%d')
                yield item

        pg = response.meta['pg']
        if pg < int(end_pg):
            next_pg = pg + 1
            next_url = response.url.replace('?page=' + str(pg),
                                            '?page=' + str(next_pg))
            self.ips.append({
                'url': next_url,
                'ref': response.url,
                'pg': next_pg,
                'ext': {
                    'fund_name': fund_name
                }
            })
Exemplo n.º 18
0
    def parse_item(self, response):
        fund_name = response.meta['ext']['fund_name']
        rows = response.xpath("//tr")
        for row in rows[1:]:
            statistic_date = row.xpath("./td[1]//text()").extract_first()
            nav = row.xpath("./td[2]//text()").extract_first()
            added_nav = row.xpath("./td[3]//text()").extract_first()
            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            item['nav'] = float(nav) if nav is not None else None
            item['added_nav'] = float(
                added_nav) if added_nav is not None else None
            item['statistic_date'] = datetime.strptime(statistic_date,
                                                       '%Y-%m-%d')
            yield item

        if len(rows) > 1:
            pg = response.meta['pg']
            next_pg = int(pg) + 1
            next_url = response.url.replace('&page=' + str(pg),
                                            '&page=' + str(next_pg))
            self.ips.append({
                'url': next_url,
                'ref': response.url,
                'pg': next_pg,
                'ext': {
                    'fund_name': fund_name
                },
            })
Exemplo n.º 19
0
 def parse_item(self, response):
     fund_name = response.meta['ext']['fund_name']
     row_info = json.loads(response.text)
     if '125.93.53.23' in response.url:
         rows = row_info['result']
     else:
         rows = row_info['get_response']['netAnnouncements']['items']
     for row in rows:
         statistic_date = row['netValueDate']
         nav = row['netValue']
         added_nav = row['totalNetValue']
         item = GGFundNavItem()
         item['sitename'] = self.sitename
         item['url'] = response.url
         item['fund_name'] = fund_name
         item['nav'] = float(nav) if nav is not None else None
         item['added_nav'] = float(added_nav) if nav is not None else None
         if '125.93.53.23' in response.url:
             item['channel'] = self.channel
             item['statistic_date'] = datetime.strptime(
                 statistic_date, '%Y-%m-%d')
         else:
             item['channel'] = '券商资管净值'
             item['statistic_date'] = datetime.strptime(
                 time.strftime("%Y-%m-%d",
                               time.localtime(int(statistic_date) / 1000)),
                 '%Y-%m-%d')
         yield item
    def parse_item(self, response):
        fund_name = response.meta['ext']['fund_name']
        rows = response.xpath("//table[@class='jingzhi']//tr")
        if rows:
            for row in rows:
                statistic_date = row.xpath("./td[1]//text()").extract_first()
                nav = row.xpath('./td[2]//text()').extract_first()
                added_nav = row.xpath('./td[3]//text()').extract_first()
                item = GGFundNavItem()
                item['sitename'] = self.sitename
                item['channel'] = self.channel
                item['url'] = response.url
                item['fund_name'] = fund_name
                item['nav'] = float(nav)
                if added_nav:
                    item['added_nav'] = float(added_nav)
                item['statistic_date'] = datetime.strptime(
                    statistic_date, '%Y/%m/%d')
                yield item

            pg = response.meta['pg']
            next_pg = pg + 1
            next_url = response.url.replace('_' + str(pg) + '.html',
                                            '_' + str(next_pg) + '.html')
            self.ips.append({
                'url': next_url,
                'ref': response.url,
                'pg': next_pg,
                'ext': {
                    'fund_name': fund_name
                }
            })
Exemplo n.º 21
0
 def parse_item(self, response):
     ext = response.meta['ext']
     fund_name = ext['fund_name']
     datas = response.xpath('//tr')
     for row in datas[1:]:
         item = GGFundNavItem()
         item['sitename'] = self.sitename
         item['fund_name'] = fund_name
         item['channel'] = self.channel
         item['url'] = response.url
         tds = row.xpath('./td')
         if len(tds) > 4:
             statistic_date = row.xpath('./td[2]//text()').re_first(
                 r'\d+-\d+-\d+')
             if statistic_date is None or statistic_date == '':
                 continue
             nav = row.xpath('./td[3]//text()').re_first(r'(\d+\.?\d*)')
             item['nav'] = float(
                 nav) if nav is not None and nav != '' else None
             item['statistic_date'] = datetime.strptime(
                 statistic_date, '%Y-%m-%d')
             yield item
         elif len(tds) > 2:
             statistic_date = row.xpath('./td[1]//text()').re_first(
                 r'\d+年\d+月\d+日')
             if statistic_date is None or statistic_date == '':
                 continue
             nav = row.xpath('./td[2]//text()').re_first(r'(\d+\.?\d*)')
             item['nav'] = float(
                 nav) if nav is not None and nav != '' else None
             item['statistic_date'] = datetime.strptime(
                 statistic_date, '%Y年%m月%d日')
             yield item
Exemplo n.º 22
0
    def parse_item(self, response):
        print(response.text)
        data = json.loads(response.text)['data']
        ext = response.meta['ext']

        fund_name = ext['fund_name']
        for record in data['records']:
            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name

            statistic_date = record['netDate'][0:10]
            item['statistic_date'] = datetime.strptime(statistic_date, '%Y-%m-%d')

            item['nav'] = record['netValue']
            item['added_nav'] = record['netValueAccu']
            yield item

        pg = response.meta['pg']
        if pg['page'] * 500 < int(data['totalRecordCount']):
            pg['page'] = pg['page'] + 1
            self.ips.append({
                'pg': pg,
                'url': response.url,
                'headers': response.meta['headers'],
                'body': response.meta['body'],
                'ref': response.meta['ref'],
                'ext': response.meta['ext']
            })

        yield self.request_next()
Exemplo n.º 23
0
    def parse_item(self, response):
        rows = response.xpath('//tr')
        ext = response.meta['ext']
        fund_name = ext['fund_name']
        url = ext['url']
        next_page = response.xpath(
            '/html/body/div/a[text()="下一页"]/@href').re_first(r'&page=(\d+)')
        for row in rows[1:]:
            fund_date = row.xpath('./td[4]/text()').extract_first()
            nav = row.xpath('./td[1]/text()').extract_first()
            added_nav = row.xpath('./td[2]/text()').extract_first()
            item = GGFundNavItem()

            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            try:
                item['statistic_date'] = datetime.strptime(
                    fund_date, '%Y/%m/%d')
            except ValueError:
                continue
            item['nav'] = float(nav) if nav is not None else None
            item['added_nav'] = float(added_nav) if nav is not None else None
            yield item
        if next_page:
            self.ips.append({
                'url': url + '&page=' + str(next_page),
                'ref': response.url,
                'ext': {
                    'fund_name': fund_name,
                    'url': url
                }
            })
Exemplo n.º 24
0
    def parse_item(self, response):
        rows = response.xpath("//div[@class='Details_right']/div[@class='jz_table']//tr")
        fund_name = response.xpath("//div[@class='left']//text()").extract_first()
        if len(rows) > 1:
            for row in rows[1:]:
                statistic_date = row.xpath("./td[1]//text()").extract_first()
                if '-' in statistic_date:
                    statistic_date = statistic_date.replace('-', '/')
                if statistic_date[4] != '/':
                    statistic_date = statistic_date.replace(statistic_date[0:4], statistic_date[0:4] + '/')

                nav = row.xpath('./td[2]//text()').extract_first()
                added_nav = row.xpath('./td[3]//text()').extract_first()
                item = GGFundNavItem()
                item['sitename'] = self.sitename
                item['channel'] = self.channel
                item['url'] = response.url
                item['fund_name'] = fund_name
                item['nav'] = float(nav) if nav is not None else None
                item['added_nav'] = float(added_nav) if added_nav is not None else None
                item['statistic_date'] = datetime.strptime(statistic_date, '%Y/%m/%d')
                yield item

            pg = response.meta['pg']
            next_pg = int(pg) + 1
            next_url = response.url.replace('&page=' + str(pg), '&page=' + str(next_pg))
            self.ips.append({
                'url': next_url,
                'ref': response.url,
                'pg': next_pg,
            })
Exemplo n.º 25
0
    def parse_item(self, response):
        fps = response.meta['fps']
        ips = response.meta['ips']

        fund_name = response.xpath(
            '//*[@id="fund-gy"]/div[1]/div[1]/p[1]/span/text()').extract_first(
            )
        rows = response.xpath(
            '//*[@id="divmodal"]/div/div/div[2]/div/div/table/tbody/tr')
        rows.pop(0)
        for row in rows:
            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name

            statistic_date = row.xpath("./td[1]/text()").extract_first()
            item['statistic_date'] = datetime.strptime(statistic_date,
                                                       '%Y-%m-%d')
            nav = row.xpath("./td[2]/text()").extract_first()
            if '(已清盘)' == nav:
                continue
            item['nav'] = float(nav)
            added_nav = row.xpath("./td[3]/text()").extract_first()
            item['added_nav'] = float(added_nav)
            yield item

        yield self.request_next(fps, ips)
Exemplo n.º 26
0
    def parse_item(self, response):
        # print(response.text)
        nvList = json.loads(response.text)['result']
        # print(nvList)
        item = GGFundNavItem()

        item['sitename'] = self.sitename
        item['channel'] = self.channel
        item['url'] = response.url
        for eachNv in nvList:
            item['fund_name'] = eachNv['pd_name']
            init_date = eachNv['init_date']
            # print(init_date)
            # print(str(statistic_date)[:4])
            statistic_date = str(init_date)[:4] + '-' + str(
                init_date)[4:6] + '-' + str(init_date)[6:8]
            # print(statistic_date)
            item['statistic_date'] = datetime.strptime(statistic_date,
                                                       '%Y-%m-%d')
            # print(item['statistic_date'])
            nav = eachNv['share_net']
            item['nav'] = float(nav) if nav is not None else None
            added_nav = eachNv['share_net_total']
            item['added_nav'] = float(
                added_nav) if added_nav is not None else None
            yield item
Exemplo n.º 27
0
    def parse_item(self, response):
        ips = response.meta['ips']
        rows = response.css('tr.tts')
        if rows:
            for row in rows:
                fund_name = row.xpath(
                    './td[@width="126"]/text()').extract_first()
                statistic_date = row.xpath(
                    './td[@width="89"]/text()').extract_first()
                nav = row.xpath('./td[@width="73"]/text()').extract_first()

                item = GGFundNavItem()
                item['sitename'] = self.sitename
                item['channel'] = self.channel
                item['url'] = response.url
                item['fund_name'] = fund_name
                item['statistic_date'] = datetime.strptime(
                    statistic_date,
                    '%Y-%m-%d') if statistic_date is not None else None
                item['nav'] = float(nav) if nav is not None else None
                item['added_nav'] = None
                yield item

                url = response.url
                pg = re.compile('.*-(\d+)\.html').findall(url)[0]
                sub_str = str(int(pg) + 1) + '.html'
                next_url = re.sub('\d+\.html', sub_str, url)
                ips.append({'url': next_url, 'ref': url})

                yield self.request_next()
    def parse_item(self, response):
        f_list = response.xpath('//tbody//tr')
        for i in f_list:
            item = GGFundNavItem()
            t = i.xpath('td//text()').extract()
            fund_name = re.findall('.*净值', t[0])[0].replace('净值', '')
            if i.xpath('td[3]//text()'):
                nav = t[1]
                added_nav = t[2]
                statistic_date = t[3]
                item['nav'] = float(nav) if nav is not None else None
                item['added_nav'] = float(
                    added_nav) if nav is not None else None
            else:
                nav = t[1]
                statistic_date = t[2]
                item['nav'] = float(nav) if nav is not None else None

            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            item['statistic_date'] = datetime.strptime(statistic_date,
                                                       '%Y-%m-%d')
            yield item

        next_href = response.xpath(
            '//li[@class ="paging_next"]//a[contains(text(),下一页)]//@href'
        ).extract_first()
        if next_href:
            ips_url = 'http://www.qk-capital.com' + next_href
            self.ips.append({'url': ips_url, 'ref': response.url})
Exemplo n.º 29
0
    def parse_item(self, response):
        rows = response.css('tbody.list')[-1].css('tr')

        for row in rows:
            row_info = row.xpath('td//text()').extract()
            fund_name = row_info[0]
            statistic_date = row_info[2]
            nav = float(row_info[4])
            add_nav = float(row_info[5])
            if '中信兴聚一期' in fund_name:
                # 如果产品为“中信兴聚一期”所有净值需除以100
                nav = nav / 100
                add_nav = add_nav / 100

            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            item['nav'] = nav
            item['added_nav'] = add_nav
            item['statistic_date'] = datetime.strptime(statistic_date,
                                                       '%Y-%m-%d')
            yield item

        yield self.request_next()
Exemplo n.º 30
0
 def parse_item(self, response):
     ext = response.meta['ext']
     url = ext['url']
     page = int(ext['page'])
     next_page = response.xpath(
         '//*[@id="productcontent"]/div[3]/div[2]/select[@name="page"]/option[last()]/text()'
     ).re_first(r'(\d+)')
     fund_name = response.xpath(
         '//*[@id="prodtitle"]/text()').extract_first()
     rows = response.xpath('//*[@id="productcontent"]/div[3]/table//tr')
     for row in rows[1:]:
         item = GGFundNavItem()
         item['sitename'] = self.sitename
         item['fund_name'] = fund_name
         item['channel'] = self.channel
         item['url'] = response.url
         nav = row.xpath('./td[1]/text()').extract_first()
         item['nav'] = float(nav)
         added_nav = row.xpath('./td[2]/text()').extract_first()
         item['added_nav'] = float(added_nav)
         statistic_date = row.xpath('./td[3]/text()').extract_first()
         item['statistic_date'] = datetime.strptime(statistic_date,
                                                    '%Y%m%d')
         yield item
     if page < int(next_page):
         self.ips.append({
             'url': url + '&page=' + str(page + 1),
             'ref': response.url,
             'ext': {
                 'page': str(page + 1),
                 'url': url
             }
         })