Exemplo n.º 1
0
    def parse(self, response):
        title, stock_code = self._parse_title_and_stock_code(response)

        XPATH_ROOT = '//*[@id="SysJustIFRAMEDIV"]/table/tr[2]/td[2]/table[1]/tr/td/table/tr'
        rows = response.xpath(XPATH_ROOT)

        # Parse the first row. It is the content of statement dates. The first
        # entry is the name and then the following entries are statement dates.
        name_and_statement_dates = rows[0].xpath('td//text()').extract()

        # Parse the following rest rows. Each row is containing of metrics of
        # different statement dates. The first entry is the name and then the
        # following entries are values.
        for i in range(1, len(rows)):
            name_and_values = rows[i].xpath('td//text()').extract()
            for j in range(1, len(name_and_values)):
                item = FinancialStatementEntryItem()
                item['title'] = title
                item['statement_date'] = datetime_utils. \
                    build_datetime_from_roc_era_with_quarter(name_and_statement_dates[j])
                item['stock_code'] = stock_code
                item['metric_index'] = i - 1
                item['metric_name'] = name_and_values[0].strip()
                item['metric_value'] = \
                    metric_value_utils.normalize(name_and_values[j])
                yield item
        yield EndOfDocumentItem()
Exemplo n.º 2
0
    def parse(self, response):
        json_response = json.loads(response.body_as_unicode())

        stock_code = self._parse_stock_code(json_response)

        # Parse metric names. The first one represents the year of the trading
        # date and the second one represents the month of the trading date.
        metric_names = json_response['fields']

        for metric_values in json_response['data']:
            if len(metric_values) != len(metric_names):
                raise ValueError(u'Could not parse metric values: {0}'.format(
                    metric_values))

            statement_date = datetime_utils. \
                build_datetime_from_roc_era_and_month(metric_values[0], metric_values[1])

            for i in range(2, len(metric_names)):
                item = FinancialStatementEntryItem()
                item['title'] = u'個股月成交資訊'
                item['statement_date'] = statement_date
                item['stock_code'] = stock_code
                item['metric_index'] = i - 2
                item['metric_name'] = metric_names[i].strip()
                item['metric_value'] = metric_value_utils.normalize(
                    metric_values[i])
                yield item
        yield EndOfDocumentItem()
Exemplo n.º 3
0
    def parse(self, response):
        title, stock_code = self._parse_title_and_stock_code(response)

        XPATH_ROOT = '//*[@id="oMainTable"]/tr[not(@id="oScrollHead")]'
        rows = response.xpath(XPATH_ROOT)

        # Parse the first row. The first entry is the date frame, and then the
        # following entries are metric names.
        date_frame_and_metric_names = rows[0].xpath('td//text()').extract()

        # Parse the following rest rows. Each row is containing of metric
        # values on different month. The first entry is the statement date (a
        # specific month) and then the following entries are metric values.
        for i in range(1, len(rows)):
            statement_date_and_metric_values = rows[i].xpath(
                'td//text()').extract()
            for j in range(1, len(statement_date_and_metric_values)):
                item = FinancialStatementEntryItem()
                item['title'] = title
                item['statement_date'] = datetime_utils. \
                    build_datetime_from_roc_era_with_month(statement_date_and_metric_values[0])
                item['stock_code'] = stock_code
                item['metric_index'] = j - 1
                item['metric_name'] = date_frame_and_metric_names[j].strip()
                item['metric_value'] = \
                    metric_value_utils.normalize(statement_date_and_metric_values[j])
                yield item
        yield EndOfDocumentItem()
Exemplo n.º 4
0
    def parse(self, response):
        title, stock_code = self._parse_title_and_stock_code(response)

        XPATH_ROOT = '//*[@id="SysJustIFRAMEDIV"]/table/tr/td/table/tr/td/table/tr'
        rows = response.xpath(XPATH_ROOT)

        # The first row is header and we skip it. Then parse the second row:
        # The first entry is the date frame, and then the following entries are
        # metric names.
        date_frame_and_metric_names = rows[1].xpath('td/text()').extract()

        # Parse the following rest rows except the last two rows (ignore pie
        # charts gracefully). Each row is containing of metric values on
        # different year. The first entry is the statement date (a specific
        # year) and then the following entries are metric values.
        for i in range(2, len(rows) - 2):
            statement_date_and_metric_values = rows[i].xpath(
                'td/text()').extract()
            for j in range(1, len(statement_date_and_metric_values)):
                item = FinancialStatementEntryItem()
                item['title'] = title
                item['statement_date'] = datetime_utils. \
                    build_datetime_from_roc_era(statement_date_and_metric_values[0])
                item['stock_code'] = stock_code
                item['metric_index'] = j - 1
                item['metric_name'] = date_frame_and_metric_names[j].strip()
                item['metric_value'] = \
                    metric_value_utils.normalize(statement_date_and_metric_values[j])
                yield item
        yield EndOfDocumentItem()