Exemplo n.º 1
0
def _main():
    """\
    Unit-testing function. It is using previously build modules as a
    necessary wrapping.
    """
    from parameters import Parameters
    from input_output import InputOutput
    from tables_builder import NamesConflict

    settings = Parameters().process()
    interface = InputOutput(settings)
    input_tree, valid_tree = interface.read_trees()
    analyser = XMLAnalyser(settings, input_tree, valid_tree)

    try:
        result = analyser.run()
    except NamesConflict:
        sys.stderr.write("Conflicting names detected!\n")
        sys.exit(90)
    except ValidationFail:
        sys.stderr.write("ValidationFail exception catched!\n")
        sys.exit(91)

    interface.write(result)

    return 0
class CrawlIndustryInfo(object):

    def __init__(self):

        self.seed_url = 'http://q.10jqka.com.cn/stock/thshy/'
        self.conn = MySQLdb.connect(host='127.0.0.1', user='******',
                                    passwd='root', db='stock',
                                    charset='utf8')
        self.cursor = self.conn.cursor(cursorclass=MySQLdb.cursors.DictCursor)
        self.input_output = InputOutput(self.conn, self.cursor)

    def _get_html(self, url):

        req = urllib2.Request(url)
        con = urllib2.urlopen(req)
        doc = con.read()
        con.close()
        return doc

    def _get_industry_code_list(self, url, from_encoding='utf8'):

        doc = self._get_html(url)
        soup = BeautifulSoup(doc, 'html.parser', from_encoding=from_encoding)
        result_temp = soup.find_all('div', attrs={'class':'cate_items'})
        industry_urls, result = [], {}
        for line in result_temp:
            for item in line.find_all('a'):
                industry_urls.append(item['href'])
        for industry_url in industry_urls:
            doc = self._get_html(industry_url)
            soup = BeautifulSoup(doc, 'html.parser',
                                 from_encoding=from_encoding)
            result_temp = soup.find_all('div', attrs={'class':'stock_name'})
            name = result_temp[0].h2.string
            code = result_temp[0].input['value']
            result[code] = name
        return result

    def main(self):

        code_name = self._get_industry_code_list(self.seed_url,
                                                 from_encoding='gbk')
        for k, v in code_name.items():
            sql = "insert into indus_info (indus_code, indus_name) value " \
                  "('%s', '%s')" % (k, v)
            print k, v
            self.input_output.insert_data(sql)
        self.cursor.close()
        self.conn.close()
    def __init__(self):

        self.seed_url = 'http://q.10jqka.com.cn/stock/thshy/'
        self.conn = MySQLdb.connect(host='127.0.0.1', user='******',
                                    passwd='root', db='stock',
                                    charset='utf8')
        self.cursor = self.conn.cursor(cursorclass=MySQLdb.cursors.DictCursor)
        self.input_output = InputOutput(self.conn, self.cursor)
    def __init__(self):

        self.redis = redis.StrictRedis(host='192.168.1.24', port=6379,
                                        db=4, password='******',charset='utf8')
        self.pipe = self.redis.pipeline()
        self.conn = MySQLdb.connect(host='127.0.0.1', user='******',
                                        passwd='root', db='stock',charset='utf8')
        self.cursor = self.conn.cursor(cursorclass=MySQLdb.cursors.DictCursor)
        self.input_output = InputOutput(self.conn, self.cursor)
Exemplo n.º 5
0
def main():
    """\
    Wrapping main function for invocation purposes only.
    """
    # Getting script parameters:
    settings = Parameters().process()

    # Initializing Input-Output interface:
    interface = InputOutput(settings)

    # Reading and parsing the script input:
    input_tree, valid_tree = interface.read_trees()

    # Initializing the script analyser:
    analyser = XMLAnalyser(settings, input_tree, valid_tree)

    # Running the analysis:
    try:
        database = analyser.run()
    except KeywordError:
        prog = os.path.basename(sys.argv[0])
        msg = "reserved SQL keyword detected as an element name"
        sys.stderr.write("%s: ERROR: %s\n" % (prog, msg))
        sys.exit(EXIT_CODES["error_format"])
    except NamesConflict:
        prog = os.path.basename(sys.argv[0])
        msg = "collisions between attribute and element names detected"
        sys.stderr.write("%s: ERROR: %s\n" % (prog, msg))
        sys.exit(EXIT_CODES["error_names_conflict"])
    except ValidationFail:
        prog = os.path.basename(sys.argv[0])
        msg = "database structure can't store the validation file data"
        sys.stderr.write("%s: ERROR: %s\n" % (prog, msg))
        sys.exit(EXIT_CODES["error_validation_fail"])

    # Converting the result of analysis to another XML representation if
    # requested:
    if settings.g:
        DBaseToXML(database).run()

    interface.write(database)
    return EXIT_CODES["no_error"]
class LoadStockInfoIntoRedis(object):

    def __init__(self):

        self.redis = redis.StrictRedis(host='192.168.1.24', port=6379,
                                        db=4, password='******',charset='utf8')
        self.pipe = self.redis.pipeline()
        self.conn = MySQLdb.connect(host='127.0.0.1', user='******',
                                        passwd='root', db='stock',charset='utf8')
        self.cursor = self.conn.cursor(cursorclass=MySQLdb.cursors.DictCursor)
        self.input_output = InputOutput(self.conn, self.cursor)

    def main(self):

        sql = 'select * from stock_info'
        stock_info = self.input_output.get_data(sql)
        for line in stock_info:
            code = line['v_code']
            self.pipe.set('stock:%s:jianpin' % code, '%s' % line['v_jian_pin'])
            self.pipe.set('stock:%s:name' % code, '%s' % line['v_name'])
            self.pipe.set('stock:%s:nameurl' % code, '%s' % line['v_name_url'])
            self.pipe.set('stock:%s:quanpin' % code, '%s' % line['v_quan_pin'])
        self.pipe.execute()