def _main(): """\ Unit-testing function. It is using previously build modules as a necessary wrapping. """ from parameters import Parameters from input_output import InputOutput from tables_builder import NamesConflict settings = Parameters().process() interface = InputOutput(settings) input_tree, valid_tree = interface.read_trees() analyser = XMLAnalyser(settings, input_tree, valid_tree) try: result = analyser.run() except NamesConflict: sys.stderr.write("Conflicting names detected!\n") sys.exit(90) except ValidationFail: sys.stderr.write("ValidationFail exception catched!\n") sys.exit(91) interface.write(result) return 0
class CrawlIndustryInfo(object): def __init__(self): self.seed_url = 'http://q.10jqka.com.cn/stock/thshy/' self.conn = MySQLdb.connect(host='127.0.0.1', user='******', passwd='root', db='stock', charset='utf8') self.cursor = self.conn.cursor(cursorclass=MySQLdb.cursors.DictCursor) self.input_output = InputOutput(self.conn, self.cursor) def _get_html(self, url): req = urllib2.Request(url) con = urllib2.urlopen(req) doc = con.read() con.close() return doc def _get_industry_code_list(self, url, from_encoding='utf8'): doc = self._get_html(url) soup = BeautifulSoup(doc, 'html.parser', from_encoding=from_encoding) result_temp = soup.find_all('div', attrs={'class':'cate_items'}) industry_urls, result = [], {} for line in result_temp: for item in line.find_all('a'): industry_urls.append(item['href']) for industry_url in industry_urls: doc = self._get_html(industry_url) soup = BeautifulSoup(doc, 'html.parser', from_encoding=from_encoding) result_temp = soup.find_all('div', attrs={'class':'stock_name'}) name = result_temp[0].h2.string code = result_temp[0].input['value'] result[code] = name return result def main(self): code_name = self._get_industry_code_list(self.seed_url, from_encoding='gbk') for k, v in code_name.items(): sql = "insert into indus_info (indus_code, indus_name) value " \ "('%s', '%s')" % (k, v) print k, v self.input_output.insert_data(sql) self.cursor.close() self.conn.close()
def __init__(self): self.seed_url = 'http://q.10jqka.com.cn/stock/thshy/' self.conn = MySQLdb.connect(host='127.0.0.1', user='******', passwd='root', db='stock', charset='utf8') self.cursor = self.conn.cursor(cursorclass=MySQLdb.cursors.DictCursor) self.input_output = InputOutput(self.conn, self.cursor)
def __init__(self): self.redis = redis.StrictRedis(host='192.168.1.24', port=6379, db=4, password='******',charset='utf8') self.pipe = self.redis.pipeline() self.conn = MySQLdb.connect(host='127.0.0.1', user='******', passwd='root', db='stock',charset='utf8') self.cursor = self.conn.cursor(cursorclass=MySQLdb.cursors.DictCursor) self.input_output = InputOutput(self.conn, self.cursor)
def main(): """\ Wrapping main function for invocation purposes only. """ # Getting script parameters: settings = Parameters().process() # Initializing Input-Output interface: interface = InputOutput(settings) # Reading and parsing the script input: input_tree, valid_tree = interface.read_trees() # Initializing the script analyser: analyser = XMLAnalyser(settings, input_tree, valid_tree) # Running the analysis: try: database = analyser.run() except KeywordError: prog = os.path.basename(sys.argv[0]) msg = "reserved SQL keyword detected as an element name" sys.stderr.write("%s: ERROR: %s\n" % (prog, msg)) sys.exit(EXIT_CODES["error_format"]) except NamesConflict: prog = os.path.basename(sys.argv[0]) msg = "collisions between attribute and element names detected" sys.stderr.write("%s: ERROR: %s\n" % (prog, msg)) sys.exit(EXIT_CODES["error_names_conflict"]) except ValidationFail: prog = os.path.basename(sys.argv[0]) msg = "database structure can't store the validation file data" sys.stderr.write("%s: ERROR: %s\n" % (prog, msg)) sys.exit(EXIT_CODES["error_validation_fail"]) # Converting the result of analysis to another XML representation if # requested: if settings.g: DBaseToXML(database).run() interface.write(database) return EXIT_CODES["no_error"]
class LoadStockInfoIntoRedis(object): def __init__(self): self.redis = redis.StrictRedis(host='192.168.1.24', port=6379, db=4, password='******',charset='utf8') self.pipe = self.redis.pipeline() self.conn = MySQLdb.connect(host='127.0.0.1', user='******', passwd='root', db='stock',charset='utf8') self.cursor = self.conn.cursor(cursorclass=MySQLdb.cursors.DictCursor) self.input_output = InputOutput(self.conn, self.cursor) def main(self): sql = 'select * from stock_info' stock_info = self.input_output.get_data(sql) for line in stock_info: code = line['v_code'] self.pipe.set('stock:%s:jianpin' % code, '%s' % line['v_jian_pin']) self.pipe.set('stock:%s:name' % code, '%s' % line['v_name']) self.pipe.set('stock:%s:nameurl' % code, '%s' % line['v_name_url']) self.pipe.set('stock:%s:quanpin' % code, '%s' % line['v_quan_pin']) self.pipe.execute()