def save(self): self.validate() ModelClass = self.model session = Session() try: row = ModelClass() for key in self.fields.keys(): value = self.get(key) if (getattr(ModelClass, key).property.columns[0].type.__class__ == DECIMAL) and (isinstance(value, str) or isinstance(value, unicode)): if not value: value = None else: value = Decimal(value.replace(',', '')) elif (getattr(ModelClass, key).property.columns[0].type.__class__ == Integer) and (isinstance(value, str) or isinstance(value, unicode)): if not value: value = None else: value = int(value.replace(',', '')) setattr(row, key, value) session.add(row) session.commit() except (SqlalchemyIntegrityError, PymysqlIntegrityError) as e: session.rollback() raise DropItem(e.message) # except Exception as e: # session.rollback() # raise DropItem(e.message) finally: session.close()
def start_requests(self): session = Session() try: stock_cd_market_part_list = session.query( CurrListedCorp.stock_cd, CurrListedCorp.market_part).all() for stock_cd_market_part in stock_cd_market_part_list: stock_cd = stock_cd_market_part[0] market_part = stock_cd_market_part[1] yield Request( self.information_url_pattern.format(market_part, stock_cd)) finally: session.close()
def save(self): self.validate() session = Session() try: session.query(CurrListedCorp).filter( CurrListedCorp.stock_cd == self['stock_cd']).update( {CurrListedCorp.market_part: self['market_part']}) session.commit() except (SqlalchemyIntegrityError, PymysqlIntegrityError) as e: session.rollback() raise DropItem(e.message) # except Exception as e: # session.rollback() # raise DropItem(e.message) finally: session.close()
def start_requests(self): session = Session() try: #year_period_list = session.query( # PeriodList.year, PeriodList.period #).all() stock_cd_market_part_list = session.query( CurrListedCorp.stock_cd, CurrListedCorp.market_part).all() for stock_cd_market_part in stock_cd_market_part_list: stock_cd = stock_cd_market_part[0] market_part = stock_cd_market_part[1] yield Request(url=self.cashflow_url_pattern.format( market_part, stock_cd), meta={'stock_cd': stock_cd}, callback=self.parse_cashflow) finally: session.close()
def start_requests(self): session = Session() try: year_period_list = session.query( PeriodList.year, PeriodList.period ).all() stock_cd_market_part_list = session.query( CurrListedCorp.stock_cd ).all() for stock_cd_market_part in stock_cd_market_part_list: stock_cd = stock_cd_market_part[0] for year_period in year_period_list: year = int(year_period[0]) period = year_period[1] period_season = self.monthList[int(period)] yield Request( url=self.profit_url_pattern.format( stock_cd, year, period_season ), meta={ 'stock_cd': stock_cd, 'year': year, 'period': period, }, callback=self.parse_profit ) finally: session.close()
def start_requests(self): session = Session() try: year_period_list = session.query(PeriodList.year, PeriodList.period).all() stock_cd_list = session.query(CurrListedCorp.stock_cd).filter( CurrListedCorp.data_sour == '0').all() finally: session.close() for stock_cd, in stock_cd_list: for year_period in year_period_list: year = year_period[0] period = year_period[1] formdata = { 'report_year': year, 'stock_id': stock_cd, 'report_period_id': self.period_notice_type_dict.get(period) } yield FormRequest(url=self.search_url, formdata=formdata, meta={ 'stock_cd': stock_cd, 'year': year, 'period': period, }, callback=self.parse_search)
def start_requests(self): session = Session() try: self.periodlist = session.query(PeriodList).order_by( desc(PeriodList.year), desc(PeriodList.period)).limit(1).one() stock_cd_market_part_list = session.query( CurrListedCorp.stock_cd, CurrListedCorp.market_part).all() for stock_cd_market_part in stock_cd_market_part_list: stock_cd = stock_cd_market_part[0] market_part = stock_cd_market_part[1] yield Request( self.information_url_pattern.format(market_part, stock_cd)) finally: session.close()
def start_requests(self): session = Session() try: self.periodlist = session.query(PeriodList).order_by( desc(PeriodList.year), desc(PeriodList.period)).all() stock_cd_list = session.query(CurrListedCorp.stock_cd).all() for stock_cd in stock_cd_list: stock_code = stock_cd[0] for period in self.periodlist: year = period.year.encode('utf8') mm = self.monthList[int(period.period.encode('utf8'))] balanceSheetUrl = 'http://www.cninfo.com.cn/information/stock/balancesheet_.jsp?stockCode=' + stock_code + '&yyyy=' + year + '&&mm=' + ( year != str(2016) and mm or '') + '&cwzb=balancesheet&button2=%CC%E1%BD%BB' req = Request(balanceSheetUrl, callback=self.parsebalance) req.meta['year'] = year req.meta['month'] = period.period.encode('utf8') yield req finally: session.close()
def start_requests(self): session = Session() try: year_period_list = session.query(PeriodList.year, PeriodList.period).all() stock_cd_list = session.query(CurrListedCorp.stock_cd).filter( CurrListedCorp.data_sour == '0').all() finally: session.close() for stock_cd, in stock_cd_list: for year_period in year_period_list: year = year_period[0] period = year_period[1] formdata = { 'report_year': year, 'stock_id': stock_cd, 'report_period_id': self.period_notice_type_dict.get(period) } yield FormRequest( url=self.search_url, formdata=formdata, # headers={ # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', # 'Accept-Encoding': 'gzip, deflate', # 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6', # 'Content-Type': 'application/x-www-form-urlencoded', # 'Host': 'www.cninfo.com.cn', # 'Origin': 'http://www.cninfo.com.cn', # 'Referer': 'http://www.cninfo.com.cn/search/search.jsp' # }, meta={ 'stock_cd': stock_cd, 'year': year, 'period': period, }, callback=self.parse_search)
def start_requests(self): session = Session() try: year_period_list = session.query( PeriodList.year, PeriodList.period ).all() stock_cd_market_part_list = session.query( CurrListedCorp.stock_cd, CurrListedCorp.market_part ).all() finally: session.close() for stock_cd_market_part in stock_cd_market_part_list: stock_cd = stock_cd_market_part[0] market_part = stock_cd_market_part[1] if not market_part: continue for year_period in year_period_list: year = int(year_period[0]) period = year_period[1] if period == '3': start_time = '{}-01-01'.format(year+1) end_time = '{}-01-01'.format(year+2) else: start_time = '{}-01-01'.format(year) end_time = '{}-01-01'.format(year+1) formdata = { 'orderby': 'date11', 'marketType': self.market_part_market_type_dict.get(market_part), 'noticeType': self.period_notice_type_dict.get(period), 'stockCode': stock_cd, 'keyword': '', 'startTime': start_time, 'endTime': end_time, 'pageNo': '1' } yield FormRequest( url=self.pdf_search_url, formdata=formdata, headers={ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6', 'Content-Type': 'application/x-www-form-urlencoded', 'Host': 'www.cninfo.com.cn', 'Origin': 'http://www.cninfo.com.cn', 'Referer': 'http://www.cninfo.com.cn/search/search.jsp' }, meta={ 'stock_cd': stock_cd, 'year': year, 'period': period, }, callback=self.parse_search )