def start(self):
		conn = MysqlHelper.connect()
		cur = conn.cursor()
		cur.execute('drop table if exists pmphmooc_open')
		cur.execute('create table pmphmooc_open(id int(11) primary key auto_increment,title varchar(255),school varchar(255),teacher varchar(255),touxian varchar(255),resume text,hitcount varchar(255),url varchar(255))')
		sql = 'insert into pmphmooc_open(title,school,teacher,touxian,resume,hitcount,url) values(%s,%s,%s,%s,%s,%s,%s)'
		content = json.loads(self.getJson())
		for item in content["rows"]:
			oneline = Item()
			oneline.title = item["name"]
			oneline.url = 'http://www.pmphmooc.com/web/opencoursedetail?courseid=' + str(item["id"])
			oneline.school = item["agencyname"]
			oneline.hitcount = item["hitcount"]
			if item.has_key("username"):
				oneline.teacher = item["username"]
			if item.has_key("touxian"):
				oneline.touxian = item["touxian"]
			if item.has_key("resume"):
				oneline.resume = item["resume"]
				
			value = []
			value.append(oneline.title)
			value.append(oneline.school)
			value.append(oneline.teacher)
			value.append(oneline.touxian)
			value.append(oneline.resume)
			value.append(oneline.hitcount)
			value.append(oneline.url)
			MysqlHelper.insert_one(cur,sql,value)
		MysqlHelper.finish(conn)
示例#2
0
	def start(self):
		conn = MysqlHelper.connect()
		cur = conn.cursor()
		cur.execute('drop table if exists mooccollege')
		cur.execute('create table mooccollege(id int(11) primary key auto_increment,title varchar(255),teacher varchar(255),school varchar(255),type varchar(255))')
		sql = 'insert into mooccollege(title,teacher,school,type) values(%s,%s,%s,%s)'
		for i in range(1,5):
			oneline = Item()
			page = self.getPage(i)
			info = self.getInfo(page)
			for item in info:
				# print item[0],item[1],item[2]
				oneline.title = item[0]
				oneline.teacher = item[1]
				oneline.school = item[2]
				if i == 1:
					oneline.type = "冲刺专题"
				elif i == 2:
					oneline.type = "考题解析"
				elif i == 3:
					oneline.type = "同步教材"
				else:
					oneline.type = "知识模块"
				value = []
				value.append(oneline.title)
				value.append(oneline.teacher)
				value.append(oneline.school)
				value.append(oneline.type)
				MysqlHelper.insert_one(cur,sql,value)
		MysqlHelper.finish(conn)
示例#3
0
    def start(self):
        conn = MysqlHelper.connect()
        cur = conn.cursor()
        cur.execute('drop table if exists pmphmooc_open')
        cur.execute(
            'create table pmphmooc_open(id int(11) primary key auto_increment,title varchar(255),school varchar(255),teacher varchar(255),touxian varchar(255),resume text,hitcount varchar(255),url varchar(255))'
        )
        sql = 'insert into pmphmooc_open(title,school,teacher,touxian,resume,hitcount,url) values(%s,%s,%s,%s,%s,%s,%s)'
        content = json.loads(self.getJson())
        for item in content["rows"]:
            oneline = Item()
            oneline.title = item["name"]
            oneline.url = 'http://www.pmphmooc.com/web/opencoursedetail?courseid=' + str(
                item["id"])
            oneline.school = item["agencyname"]
            oneline.hitcount = item["hitcount"]
            if item.has_key("username"):
                oneline.teacher = item["username"]
            if item.has_key("touxian"):
                oneline.touxian = item["touxian"]
            if item.has_key("resume"):
                oneline.resume = item["resume"]

            value = []
            value.append(oneline.title)
            value.append(oneline.school)
            value.append(oneline.teacher)
            value.append(oneline.touxian)
            value.append(oneline.resume)
            value.append(oneline.hitcount)
            value.append(oneline.url)
            MysqlHelper.insert_one(cur, sql, value)
        MysqlHelper.finish(conn)
示例#4
0
文件: fudan.py 项目: nanaal/WangYaoya
	def start(self):
		content = json.loads(self.getContent())
		conn = MysqlHelper.connect()
		cur = conn.cursor()
		cur.execute('drop table if exists fudan')
		cur.execute('create table if not exists fudan(id int(11) primary key auto_increment,title varchar(255),lesson_code varchar(255),start_time varchar(255),current_sem varchar(255),spend_time varchar(255),short_desc text,knowledge_res text,chapter_info text,common_prob text,teacher_info text,url varchar(255))')
		sql = 'insert into fudan(title,lesson_code,start_time,current_sem,spend_time,short_desc,knowledge_res,chapter_info,common_prob,teacher_info,url) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
		for item in content["course"]:
			if not item["about"].find("lecture") == -1:
				continue
			value = []
			url = "http://fudan.xuetangx.com" + item["about"]
			page = self.getPage(url)
			title = self.getTitle(page)
			value.append(title)
			info = self.getInfo1(page)
			for item in info:
				value.append(item[0] + ':' + self.tool.replace(item[1]))
				
			info2 = self.getInfo2(page)
			for item in info2:
				value.append(item[0] + ':' + re.sub(r'[\n\t]+',r'\n', self.tool.replace(item[1]), flags=re.S))
			for x in range(4 - len(info2)):
				value.append('')
			teacherinfo = self.getTeacherInfo(page)
			teacher = ""
			for item in teacherinfo:
				str = item[0] + '\n' + item[1] + '\n' + self.tool.replace(item[2]) + '\n'
				teacher = teacher + str
			value.append(teacher)
			value.append(url)
			MysqlHelper.insert_one(cur,sql,value)
		MysqlHelper.finish(conn)
示例#5
0
    def start(self):
        indexPage = self.getPage('http://computer.icourses.cn/')
        conn = MysqlHelper.connect()
        cur = conn.cursor()
        cur.execute('drop table if exists computer_icourses')
        cur.execute(
            'create table computer_icourses(id int(11) primary key auto_increment,title varchar(255),short_desc text,description text,requirement text,pre_knowledge text,chapter text,reference text,common_prob text,teacher text,url varchar(255))'
        )
        sql = 'insert into computer_icourses(title,short_desc,description,requirement,pre_knowledge,chapter,reference,common_prob,teacher,url) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
        url = self.getURL(indexPage)
        for item in url:
            oneline = Item()
            oneline.url = item
            page = self.getPage(item)
            title = self.getTitle(page)
            oneline.title = title
            shortDesc = self.getShortDesc(page)
            oneline.short_desc = shortDesc
            info = self.getInfo(page)
            for item in info:
                if item[0] == '课程概述':
                    oneline.description = re.sub(self.tool.replaceNBSP, " ",
                                                 self.tool.replace(item[1]))
                if item[0] == '证书要求':
                    oneline.requirement = re.sub(self.tool.replaceNBSP, " ",
                                                 self.tool.replace(item[1]))
                if item[0] == '预备知识':
                    oneline.pre_knowledge = re.sub(self.tool.replaceNBSP, " ",
                                                   self.tool.replace(item[1]))
                if item[0] == '授课大纲':
                    oneline.chapter = re.sub(self.tool.replaceNBSP, " ",
                                             self.tool.replace(item[1]))
                if item[0] == '参考资料':
                    oneline.reference = re.sub(self.tool.replaceNBSP, " ",
                                               self.tool.replace(item[1]))
                if item[0] == '常见问题':
                    oneline.common_prob = re.sub(self.tool.replaceNBSP, " ",
                                                 self.tool.replace(item[1]))
            teacher = self.getTeacher(page)
            teacherstr = ""
            for item in teacher:
                teacherstr = teacherstr + item + '\n'
            oneline.teacher = teacherstr

            value = []
            value.append(oneline.title)
            value.append(oneline.short_desc)
            value.append(oneline.description)
            value.append(oneline.requirement)
            value.append(oneline.pre_knowledge)
            value.append(oneline.chapter)
            value.append(oneline.reference)
            value.append(oneline.common_prob)
            value.append(oneline.teacher)
            value.append(oneline.url)
            MysqlHelper.insert_one(cur, sql, value)
        MysqlHelper.finish(conn)
示例#6
0
def export_corpus():
    mysql_helper = MysqlHelper('193.168.15.136', 'test', 'test', 'p**n',
                               'utf8')
    with open('data/p**n.txt', 'w', encoding='utf-8',
              errors='ignore') as fporn_write:
        result_porn = mysql_helper.exeQuery('select Content from p**n')
        for row in result_porn._rows:
            fporn_write.write('{}\n'.format(row[0]))
    with open('data/unporn.txt', 'w', encoding='utf-8') as funporn_write:
        result_unporn = mysql_helper.exeQuery('select Content from unporn')
        for row in result_unporn._rows:
            funporn_write.write('{}\n'.format(row[0]))
示例#7
0
class CheckLogin():
    def __init__(self):
        self.mysql_helper = MysqlHelper(host="localhost",
                                        user="******",
                                        passwd="123456",
                                        port=3306,
                                        db='python')

    def get_message(self):
        self.username = input("用户名:")
        self.password = input("密码:")

        hash_password = hashlib.sha1()
        hash_password.update(self.password.encode("utf-8"))
        self.hash_password = hash_password.hexdigest()

    def judge_user(self):
        sql = "select passwd from user_passwd where username=%s"
        params = [self.username]
        self.result = self.mysql_helper.find_get(sql, params)

    def login(self):

        self.get_message()
        #user_passwd
        self.judge_user()
        if self.result == ():
            print("\n用户不存在,请注册")
            register = input("是否注册[Y/N]:")
            if register.lower() == "y":
                self.register()
            else:
                print("成功退出")
        else:
            if self.result[0][0] == self.hash_password:
                print("\n*******登录成功********")
            else:
                print("\n密码错误")

    def register(self):
        self.get_message()
        self.judge_user()
        if self.result == ():
            sql = "insert into user_passwd values(0,%s,%s);"
            params = [self.username, self.hash_password]
            self.mysql_helper.cud(sql, params)
        else:
            print("用户已存在,请登录")
            login = input("是否登录[Y/N]:")
            if login.lower() == "y":
                self.login()
            else:
                print("成功退出")
示例#8
0
    def getCourceInfo(self):
        cource_url_list = self.getCourceUrl(self.url)
        conn = MysqlHelper.connect()
        cur = conn.cursor()
        cur.execute('drop table if exists jisuanke')
        cur.execute('create table jisuanke(id int(11) primary key auto_increment,title varchar(255),time varchar(255),learn_count varchar(255),short_desc text,outline text)')
        sql = 'insert into jisuanke(title,time,learn_count,short_desc,outline) values(%s,%s,%s,%s,%s)'

        #file = open("JiSuanke.txt","w+")
        for pageurl in cource_url_list:
            value = []
            cource_url = "http:" + pageurl
            page = self.getPageInfo(cource_url)
            courceName = self.getCourceName(page)
            title = self.removeTab(courceName[0])
            value.append(title)
            #file.write('\n'+'课程题目:' + title)
            #print title
            #print courceName[0]
            courceTime = self.getCourceTime(page)
            times = self.removeTab(courceTime[0])
            value.append(times)
            #file.write('\n'+'课程时长:' + times)
            #print times
            peopleNum = self.getPeopleNum(page)
            value.append( peopleNum[0])
            #file.write('\n'+'学习人数:' + peopleNum[0])
            #print peopleNum[0]
            #brief = self.getBrief(page)
            #file.write('\n'+'课程介绍:'+ brief[0])
            brief = self.getClassInfo(page)
            value.append(brief)
            #print brief
            #file.write('\n' + '课程介绍:' + brief)

            courseInfo= self.getInfo(page)
            #file.write('\n'+'课程目录:')
            str = ""
            if courseInfo:
                for item in courseInfo:
                    str= str + item[0] + ':' + item[1] + '\n'
                    #file.write('\n\t' + item[0] + ':' + item[1])
                    #print item[0]
                    #print item[1]
                    pattern = re.compile(r'<li>(.*?)</li>',re.S)
                    li = re.findall(pattern, item[2])
                    for info in li:
                        str = str + info

            value.append(str)
            MysqlHelper.insert_one(cur,sql,value)
        MysqlHelper.finish(conn)
示例#9
0
def insert_data(porn_file, unporn_file):
    mysql_helper = MysqlHelper('193.168.15.136', 'test', 'test', 'p**n',
                               'utf8')
    with open(porn_file, 'r', encoding='utf-8', errors='ignore') as fporn_read:
        for line in fporn_read:
            sql_query = "select * from p**n where Content= %s"
            param_query = line.rstrip()
            result = mysql_helper.find(sql_query, param_query)
            if result == 0:
                sql_insert = 'insert into p**n(ID,Content) values (%s,%s)'
                md5 = hashlib.md5()
                md5.update(line.rstrip().encode(encoding='utf-8'))
                param_insert = md5.hexdigest(), line.rstrip()
                mysql_helper.cud(sql_insert, param_insert)
    with open(unporn_file, 'r', encoding='utf-8') as funporn_read:
        for line in funporn_read:
            sql_query = "select * from unporn where Content= %s"
            param_query = line.rstrip()
            result = mysql_helper.find(sql_query, param_query)
            if result == 0:
                sql_insert = 'insert into unporn(ID,Content) values (%s,%s)'
                md5 = hashlib.md5()
                md5.update(line.rstrip().encode(encoding='utf-8'))
                param_insert = md5.hexdigest(), line.rstrip()
                mysql_helper.cud(sql_insert, param_insert)
	def start(self):
		indexPage = self.getPage('http://computer.icourses.cn/')
		conn = MysqlHelper.connect()
		cur = conn.cursor()
		cur.execute('drop table if exists computer_icourses')
		cur.execute('create table computer_icourses(id int(11) primary key auto_increment,title varchar(255),short_desc text,description text,requirement text,pre_knowledge text,chapter text,reference text,common_prob text,teacher text,url varchar(255))')
		sql = 'insert into computer_icourses(title,short_desc,description,requirement,pre_knowledge,chapter,reference,common_prob,teacher,url) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
		url = self.getURL(indexPage)
		for item in url:
			oneline = Item()
			oneline.url = item
			page = self.getPage(item)
			title = self.getTitle(page)
			oneline.title = title
			shortDesc = self.getShortDesc(page)
			oneline.short_desc = shortDesc
			info = self.getInfo(page)
			for item in info:
				if item[0] == '课程概述':
					oneline.description = re.sub(self.tool.replaceNBSP," ",self.tool.replace(item[1]))
				if item[0] == '证书要求':
					oneline.requirement = re.sub(self.tool.replaceNBSP," ",self.tool.replace(item[1]))
				if item[0] == '预备知识':
					oneline.pre_knowledge = re.sub(self.tool.replaceNBSP," ",self.tool.replace(item[1]))
				if item[0] == '授课大纲':
					oneline.chapter = re.sub(self.tool.replaceNBSP," ",self.tool.replace(item[1]))
				if item[0] == '参考资料':
					oneline.reference = re.sub(self.tool.replaceNBSP," ",self.tool.replace(item[1]))
				if item[0] == '常见问题':
					oneline.common_prob = re.sub(self.tool.replaceNBSP," ",self.tool.replace(item[1]))
			teacher = self.getTeacher(page)
			teacherstr = ""
			for item in teacher:
				teacherstr = teacherstr + item + '\n'
			oneline.teacher = teacherstr

			value = []
			value.append(oneline.title)
			value.append(oneline.short_desc)
			value.append(oneline.description)
			value.append(oneline.requirement)
			value.append(oneline.pre_knowledge)
			value.append(oneline.chapter)
			value.append(oneline.reference)
			value.append(oneline.common_prob)
			value.append(oneline.teacher)
			value.append(oneline.url)
			MysqlHelper.insert_one(cur,sql,value)
		MysqlHelper.finish(conn)
示例#11
0
	def start(self):
		conn = MysqlHelper.connect()
		cur = conn.cursor()
		cur.execute('drop table if exists pmphmooc')
		cur.execute('create table pmphmooc(id int(11) primary key auto_increment,title varchar(255),description text,chapter text,course_begin varchar(255),course_end varchar(255),course_totaltime varchar(255),course_load varchar(255),teacher text,block text,url varchar(255))')
		sql = 'insert into pmphmooc(title,description,chapter,course_begin,course_end,course_totaltime,course_load,teacher,block,url) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
		content = json.loads(self.getJson())
		for item in content["rows"]:
			oneline = Item()
			# print item["name"],item["id"]
			oneline.title = item["name"]
			oneline.url = 'http://www.pmphmooc.com/web/scholl/' + str(item["id"])
			page = self.getPage(item["id"])
			# print page
			description = self.getDescription(page)
			oneline.description = self.tool.replace(description)
			beginAndEnd = self.getBeginAndEnd(page)
			for item in beginAndEnd:
				oneline.course_begin = item[0]
				oneline.course_end = item[1]
			totalAndLoad = self.getTotalAndLoad(page)
			oneline.course_totaltime = totalAndLoad[0]
			oneline.course_load = totalAndLoad[1]
			teacher = self.getTeacher(page)
			teastr = ""
			for item in teacher:
				teastr = teastr + item[0] + '\n' + item[1] + '\n'
			oneline.teacher = teastr
			block = self.getBlock(page)
			oneline.block = block
			chapterWords = ["授课大纲","课程章节"]
			chapter = self.getText(block,chapterWords)
			if chapter:
				oneline.chapter = re.sub(self.tool.replaceNBSP,"",self.tool.replace(chapter))

			value = []
			value.append(oneline.title)
			value.append(oneline.description)
			value.append(oneline.chapter)
			value.append(oneline.course_begin)
			value.append(oneline.course_end)
			value.append(oneline.course_totaltime)
			value.append(oneline.course_load)
			value.append(oneline.teacher)
			value.append(oneline.block)
			value.append(oneline.url)
			MysqlHelper.insert_one(cur,sql,value)
		MysqlHelper.finish(conn)
示例#12
0
def loadFoodListComm(sql, type):
    params = (2,)
    print(sql % params)
    rows = MysqlHelper.MysqlHelper().fetchall(sql, params)

    for row in rows:
        id, parent_id, url, total_page, current_page = row

        # total_page = total_page - 30
        # if total_page < current_page:
        #     total_page = current_page + 5

        for page in range(current_page + 1, total_page + 1):
            page_ = "?&page="
            if "?" in url:
                page_ = "&page="
            # 爬取每页数据
            if type == 1:
                loadFoodListPage(url + page_ + str(page), id, parent_id)
            else:
                loadFoodMaterialListPage(url + page_ + str(page), id, parent_id)
            # 更新页码
            updateCurrentPage(id, page)

            sleepRandom()
示例#13
0
def anaylysiscount():
    dbhelper = MysqlHelper.DbHelper()
    total = dbhelper.fetchCount("select count(*) from maoyan")
    am = dbhelper.fetchCount("select count(*) from 'newdatabase'.'maoyan'where time like '%美国%'")
    china = dbhelper.fetchCount("select count(*) from 'newdatabase'.'maoyan'where time like '%中国%'")
    japan = dbhelper.fetchCount("select count(*) from 'newdatabase'.'maoyan'where time like '%日本%'")
    print(total,am,japan,china)
示例#14
0
 def select_many(self, sql, params=None):
     result = None
     try:
         result = MysqlHelper(self.r_config).query_many(sql, params)
     except Exception as e:
         print('Error:' + str(e))
     return result
示例#15
0
def loadFoodListPage(url, class1_id, class2_id):
    divList = getContent(url).xpath('//div[@class="listtyle1"]/a')
    for div in divList:
        html_url = div.xpath("./@href")[0]
        title = div.xpath("./@title")[0]
        thumbnail_url = div.xpath("img/@src")[0]

        comment_num = 0
        popularity_num = 0
        spanTextList = div.xpath("div//span/text()")
        for spanText in spanTextList:
            span = spanText.replace(" ", "").encode("utf-8")
            m = re.compile(r'(\d*)评论(\d*)人气').match(span)
            if m is not None:
                comment_num = m.group(1)
                popularity_num = m.group(2)

        step_num = 0
        liTextList = div.xpath("div//li[@class='li1']/text()")
        for liText in liTextList:
            step = liText.replace(" ", "").encode("utf-8")
            m = re.compile(r'(\d*)步').match(step)
            if m is not None:
                step_num = m.group(1)

        sql = "insert into lb_food ( name , class1_id , class2_id , comment_num , popularity_num ," \
              " step_num , html_url , thumbnail_url) values ( %s, %s, %s, %s, %s, %s, %s, %s )"
        params = (
            title, class1_id, class2_id, comment_num, popularity_num, step_num, html_url,
            thumbnail_url)
        # print(sql % params)
        MysqlHelper.MysqlHelper().cud(sql, params)
示例#16
0
    def __init__(self, model, thread_size, gpu_card):
        """
        init
        """
        self.config = {}
        try:
            cf = ConfigParser.ConfigParser()
            cf.read("../conf/load_config.conf")
            self.mysql_host = cf.get("db", "mysql_host")
            self.mysql_port = cf.getint("db", "mysql_port")
            self.mysql_user = cf.get("db", "mysql_user")
            self.mysql_passwd = cf.get("db", "mysql_passwd")
            #TODO
            conf_name = "conf_%s" % model
            try:
                self.test_db = cf.get(conf_name, "test_db") % thread_size
            except Exception as e:
                self.test_db = cf.get("db", "test_db")
            self.mysql = mysql_helper.MysqlHelper(host = self.mysql_host,\
                port = self.mysql_port, user = self.mysql_user, \
                passwd = self.mysql_passwd, db = self.test_db)

            self.gpu_card = gpu_card
        except Exception as exception:
            print exception
            return
示例#17
0
    def __init__(self, model, batch_size, gpu_card):
        """
        init
        """
        self.config = {}
        try:
            cf = ConfigParser.ConfigParser()
            cf.read("../conf/load_config.conf")
            self.mysql_host = cf.get("db", "mysql_host")
            self.mysql_port = cf.getint("db", "mysql_port")
            self.mysql_user = cf.get("db", "mysql_user")
            self.mysql_passwd = cf.get("db", "mysql_passwd")
            #TODO
            conf_name = "conf_%s" % model
            try:
                self.test_db = cf.get(conf_name, "test_db") % batch_size
            except Exception as e:
                print(
                    "\033[0;31;m[error]: Pls Check The Modle input wrong!\033[0m"
                )
                sys.exit(1)

            self.mysql = mysql_helper.MysqlHelper(host = self.mysql_host,\
                port = self.mysql_port, user = self.mysql_user, \
                passwd = self.mysql_passwd, db = self.test_db)

            self.gpu_card = gpu_card
        except Exception as exception:
            print exception
            return
示例#18
0
def spiderProcess(food_id, content):
    divStepList = content.xpath("//div[@class='editnew edit']/div[@class='content clearfix']")
    count = 0
    for div in divStepList:
        step = ""
        content = ""
        img_url = ""

        stepList = div.xpath("em/text()")
        if len(stepList) > 0:
            step = stepList[0]

        stepList = div.xpath("div/p/text()")
        if len(stepList) > 0:
            content = stepList[0]

        stepList = div.xpath("div/p/img/@src")
        if len(stepList) > 0:
            img_url = stepList[0]

        sql = "insert into lb_cook_process (food_id, step, content, img_url)" \
              " values ( %s, %s, %s, %s )"
        params = (food_id, step, content, img_url)
        # print(sql % params)
        if MysqlHelper.MysqlHelper(isTest).cud(sql, params) == 1:
            count += 1

    return count == len(divStepList)
示例#19
0
 def execute_many(self, sql, params=None):
     result = False
     try:
         i = MysqlHelper(self.w_config).execute_many(sql, params)
         result = i > 0
     except Exception as e:
         print('Error:' + str(e))
     return result
示例#20
0
def updateSpiderDetail(error_msg, food_id, is_spider_detail_OK = False):
    sql = "update lb_food set is_spider_detail = %s, error_msg = %s where id = %s "
    is_spider_detail = 2
    if is_spider_detail_OK:
        is_spider_detail = 1
    params = (is_spider_detail, error_msg, food_id)
    print(sql % params)
    MysqlHelper.MysqlHelper(isTest).cud(sql, params)
示例#21
0
 def start(self):
     indexPage = self.getContent(1)
     pageNum = self.getPageNum(indexPage)
     conn = MysqlHelper.connect()
     cur = conn.cursor()
     cur.execute('drop table if exists imooc')
     cur.execute(
         'create table imooc(id int(11) primary key auto_increment,title varchar(255),difficulty varchar(255),time varchar(255),learn_count varchar(255),short_desc text,outline text)'
     )
     sql = 'insert into imooc(title,difficulty,time,learn_count,short_desc,outline) values(%s,%s,%s,%s,%s,%s)'
     for i in range(1, int(pageNum) + 1):
         indexPage = self.getContent(i)
         ViewsId = self.getViewsId(indexPage)
         for item in ViewsId:
             value = []
             learnpage = self.getLearnPage(item)
             viewpage = self.getViewPage(item)
             title = self.getTitle(learnpage)
             value.append(title)
             info = self.getLevelTimeAndCount(learnpage)
             infos = []
             for item in info:
                 item = self.tool.replace(item)
                 infos.append(item)
             value.append(infos[0])
             value.append(infos[1])
             value.append(infos[2])
             brief = self.getBrief(viewpage)
             value.append(brief)
             outline = self.getOutline(learnpage)
             str = ""
             for item in outline:
                 str = str + self.tool.replace(item[0]) + '\n'
                 pattern = re.compile('<li>(.*?)</li>', re.S)
                 result = re.findall(pattern, item[1])
                 if result:
                     for item in result:
                         item = re.sub(self.tool.removeAddr, "", item)
                         item = re.sub(self.tool.replaceLT, "<", item)
                         item = re.sub(self.tool.replaceGT, ">", item)
                         str = str + item.strip() + '\n'
             value.append(str)
             MysqlHelper.insert_one(cur, sql, value)
     MysqlHelper.finish(conn)
示例#22
0
文件: imooc.py 项目: nanaal/WangYaoya
	def start(self):
		indexPage = self.getContent(1)
		pageNum = self.getPageNum(indexPage)
		conn = MysqlHelper.connect()
		cur = conn.cursor()
		cur.execute('drop table if exists imooc')
		cur.execute('create table imooc(id int(11) primary key auto_increment,title varchar(255),difficulty varchar(255),time varchar(255),learn_count varchar(255),short_desc text,outline text)')
		sql = 'insert into imooc(title,difficulty,time,learn_count,short_desc,outline) values(%s,%s,%s,%s,%s,%s)'
		for i in range(1,int(pageNum)+1):
			indexPage = self.getContent(i)
			ViewsId = self.getViewsId(indexPage)
			for item in ViewsId:
				value = []
				learnpage = self.getLearnPage(item)
				viewpage = self.getViewPage(item)
				title = self.getTitle(learnpage)
				value.append(title)
				info = self.getLevelTimeAndCount(learnpage)
				infos = []
				for item in info:
					item = self.tool.replace(item)
					infos.append(item)
				value.append(infos[0])
				value.append(infos[1])
				value.append(infos[2])
				brief = self.getBrief(viewpage)
				value.append(brief)
				outline = self.getOutline(learnpage)
				str = ""
				for item in outline:
					str = str + self.tool.replace(item[0]) + '\n'
					pattern = re.compile('<li>(.*?)</li>',re.S)
					result = re.findall(pattern,item[1])
					if result:
						for item in result:
							item = re.sub(self.tool.removeAddr,"",item)
							item = re.sub(self.tool.replaceLT,"<",item)
							item = re.sub(self.tool.replaceGT,">",item)
							str = str + item.strip() + '\n'
				value.append(str)
				MysqlHelper.insert_one(cur,sql,value)
		MysqlHelper.finish(conn)
示例#23
0
 def getCpuAndMemory(self):
     list_cpu = []
     list_vss = []
     list_rss = []
     packageName = self.packageName
     saveFileName = self.saveFileName
     file = self.file
     infofile = self.infofile
     line = file.readline()
     while line:
         temp_result = line.replace('\n', '').split()
         if temp_result[9] == packageName:
             infofile.writelines(line)
         line = file.readline()
     infofile.close()
     resultFile = open(saveFileName)
     resultLine = resultFile.readline()
     while resultLine:
         temp_line = resultLine.replace('\n', '').split()
         list_cpu.append(int(temp_line[2][:-1]))
         list_vss.append(int(temp_line[5][:-1]))
         list_rss.append(int(temp_line[6][:-1]))
         resultLine = resultFile.readline()
     resultFile.close()
     cpumax = max(list_cpu)
     cpuavg = "%.2f" % (float(sum(list_cpu)) / len(list_cpu))
     vsizemax = max(list_vss)
     vsizeavg = sum(list_vss) / len(list_vss)
     rssmax = max(list_rss)
     rssavg = sum(list_rss) / len(list_rss)
     print cpumax, cpuavg, vsizemax, vsizeavg, rssmax, rssavg
     sqlquery = ("UPDATE %s " + "SET cpumax = '%s',cpuavg = '%s'," +
                 "vsizemax = '%s',vsizeavg = '%s'," +
                 "rssmax = '%s',rssavg = '%s' " +
                 "WHERE devicemodel = '%s' ") % (
                     self.appName, cpumax, cpuavg, vsizemax, vsizeavg,
                     rssmax, rssavg, self.deviceModel)
     print sqlquery
     cxn = MysqlHelper.connect()
     cur = cxn.cursor()
     res = MysqlHelper.update(cur, sqlquery)
     MysqlHelper.finish(cxn)
示例#24
0
 def getCpuAndMemory(self):
     list_cpu = []
     list_vss = []
     list_rss = []
     packageName = self.packageName
     saveFileName = self.saveFileName
     file = self.file
     infofile = self.infofile
     line = file.readline()
     while line:
         temp_result = line.replace('\n','').split()
         if temp_result[9] == packageName:
             infofile.writelines(line)
         line = file.readline()
     infofile.close()
     resultFile = open(saveFileName)
     resultLine = resultFile.readline()
     while resultLine:
         temp_line = resultLine.replace('\n','').split()
         list_cpu.append(int(temp_line[2][:-1]))
         list_vss.append(int(temp_line[5][:-1]))
         list_rss.append(int(temp_line[6][:-1]))
         resultLine = resultFile.readline()
     resultFile.close()
     cpumax = max(list_cpu)
     cpuavg = "%.2f" % (float(sum(list_cpu))/len(list_cpu))
     vsizemax = max(list_vss)
     vsizeavg = sum(list_vss)/len(list_vss)
     rssmax = max(list_rss)
     rssavg = sum(list_rss)/len(list_rss)
     print cpumax,cpuavg,vsizemax,vsizeavg,rssmax,rssavg
     sqlquery = ("UPDATE %s " + 
                "SET cpumax = '%s',cpuavg = '%s'," +
                "vsizemax = '%s',vsizeavg = '%s'," +
                "rssmax = '%s',rssavg = '%s' " +
                "WHERE devicemodel = '%s' ")%(self.appName,cpumax,cpuavg,vsizemax,vsizeavg,rssmax,rssavg,self.deviceModel)
     print sqlquery
     cxn = MysqlHelper.connect()
     cur = cxn.cursor()
     res =  MysqlHelper.update(cur , sqlquery)
     MysqlHelper.finish(cxn)
示例#25
0
	def start(self):
		content = json.loads(self.getContent())
		#file = open("tsinghua.txt","w+")
		conn = MysqlHelper.connect()
		cur = conn.cursor()
		cur.execute('drop table if exists tsinghua')
		cur.execute('create table if not exists tsinghua(id int(11) primary key auto_increment,title varchar(255),lesson_code varchar(255),start_time varchar(255),current_sem varchar(255),spend_time varchar(255),short_desc text,knowledge_res text,chapter_info text,common_prob text,teacher_info text,url varchar(255))')
		sql = 'insert into tsinghua(title,lesson_code,start_time,current_sem,spend_time,short_desc,knowledge_res,chapter_info,common_prob,teacher_info,url) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
		for item in content["course"]:
			if not item["about"].find("lecture") == -1:
				continue
			value = []
			#url = self.getUrl(content)
			#把url符号变成unicode形式
			#page = self.getPage(urllib.quote_plus("http://tsinghua.xuetangx.com/courses/TSINGHUA/MOOC001/2014_T2/about"))
			url = "http://tsinghua.xuetangx.com" + item["about"]
			page = self.getPage(url)
			title = self.getTitle(page)
			value.append(title)
			info = self.getInfo1(page)
			for item in info:
				value.append(item[0] + ':' + self.tool.replace(item[1]))
				
			info2 = self.getInfo2(page)
			for item in info2:
				#print item[0] + ':' + self.tool.replace(item[1])
				value.append(item[0] + ':' + re.sub(r'[\n\t]+',r'\n', self.tool.replace(item[1]), flags=re.S))
			for x in range(4 - len(info2)):
				value.append('')
			#teacher = self.getTeacher(page)
			# print teacher
			teacherinfo = self.getTeacherInfo(page)
			teacher = ""
			for item in teacherinfo:
				str = item[0] + '\n' + item[1] + '\n' + self.tool.replace(item[2]) + '\n'
				teacher = teacher + str
			value.append(teacher)
			value.append(url)
			MysqlHelper.insert_one(cur,sql,value)
		MysqlHelper.finish(conn)
示例#26
0
 def create_database(self):
     """
     create database
     """
     mysql = mysql_helper.MysqlHelper(host = self.mysql_host,\
         port = self.mysql_port, user = self.mysql_user, \
         passwd = self.mysql_passwd)
     create_database_sql = "CREATE DATABASE IF NOT EXISTS %s" % self.test_db
     try:
         mysql.execute_withnodb(create_database_sql)
         logging.info("create the database %s sucess~~" % self.test_db)
     except Exception as exception:
         logging.error("create the database %s failed~~" % self.test_db)
示例#27
0
def loadFoodPages():
    """
       爬取页码
    """
    sql = "select class_url, id  from lb_food_class where level = %s and total_page = %s"
    params = (2, 0)
    print(sql % params)
    rows = MysqlHelper.MysqlHelper().fetchall(sql, params)

    for row in rows:
        class_url, id = row
        request = urllib2.Request(class_url, headers=headers)
        html = urllib2.urlopen(request).read()

        page = re.compile(r'共(\d*)页').match(html).group(1)

        sql = "update lb_food_class set total_page =%s where id = %s"
        params = (page, id)
        print(sql % params)
        MysqlHelper.MysqlHelper().cud(sql, params)

        sleepRandom()
示例#28
0
 def __init__(self, configFilePath):
     self.configFilePath = configFilePath
     self.dbHelper = None
     # self.dbHelper = MysqlHelper.DB("localhost", 3306, "root", "", "metadata")
     self.dbHelper = MysqlHelper.DB(
         readInXml.getElement(self.configFilePath, 'DatabaseConfig',
                              'host'),
         int(
             readInXml.getElement(self.configFilePath, 'DatabaseConfig',
                                  'port')),
         readInXml.getElement(self.configFilePath, 'DatabaseConfig',
                              'userName'), "",
         readInXml.getElement(self.configFilePath, 'DatabaseConfig',
                              'dbName'))
示例#29
0
def loadFoodClass2():
    """
        爬取第二分类
    """
    sql = "select class_url, id  from lb_food_class where level = %s"
    params = (1,)
    print(sql % params)
    rows = MysqlHelper.MysqlHelper().fetchall(sql, params)

    for row in rows:
        class_url, parentId = row
        # print (url, parentId)
        dlList = getContent(class_url).xpath('//div[@class="main"]/div/div/dl')
        for dl in dlList:
            tag = dl.xpath("dt/text()")[0]
            aList = dl.xpath("dd/a")
            for a in aList:
                text = a.text
                class_url = a.attrib.get("href")
                sql = "insert into lb_food_class ( name , parent_id , level , class_url , tag)" \
                      " values ( %s, %s, %s, %s, %s)"
                params = (text, parentId, 2, class_url, tag)
                print(sql % params)
                MysqlHelper.MysqlHelper().cud(sql, params)
示例#30
0
def loadFoodMaterialListPage(url, class1_id, class2_id):
    divList = getContent(url).xpath('//div[@class="listtyle1"]')
    for div in divList:
        html_url = div.xpath('div[@class="img"]/a/@href')[0]
        logo_url = div.xpath("div/a/img/@src")[0]
        title = div.xpath('div[@class="info1"]/h3/a/text()')[0]
        description = ""
        descriptionList = div.xpath('div[@class="info1"]/div/span/text()')
        for d in descriptionList:
            description = d

        sql = "insert into lb_food_material ( name , class1_id , class2_id ,description, html_url," \
              " logo_url) values ( %s, %s, %s, %s, %s, %s)"
        params = (title, class1_id, class2_id, description, html_url, logo_url)
        # print(sql % params)
        MysqlHelper.MysqlHelper().cud(sql, params)
示例#31
0
def spiderComment(food_id, content):
    # 爬取评论数据
    comlist = content.xpath("//div[@class='cp_comlist_w']/ul/li")
    count = 0
    for c in comlist:
        content = c.xpath(".//p/strong")[0].tail.encode("utf-8").strip()

        commentTime = ""
        comm_time = c.xpath(".//div/span/text()")[0].encode("utf-8")
        m = re.compile(r'(\d{4}-\d{1,2}-\d{1,2}\s\d{1,2}:\d{1,2}:\d{1,2})来自').findall(comm_time)
        if len(m) > 0:
            commentTime = m[0]
        comment_time = datetime.datetime.strptime(commentTime, '%Y-%m-%d %H:%M:%S')

        user_name = ""
        user_name_list = c.xpath("a/h5/text()")
        if len(user_name_list) > 0:
            user_name = user_name_list[0]

        user_avatar_url = c.xpath("a/img/@src")[0]

        comHref = c.xpath("a/@href")

        user_id = 0
        user_url = ""

        if len(comHref) > 0:
            com_user_url = comHref[0]
            m = re.compile(r'.*id=(\d*)').match(com_user_url)
            if m is not None:
                user_id = m.group(1)
                user_url = com_user_url

        # print(comment)
        # print(comment_time)
        # print(user_name)
        # print(user_avatar_url)
        # print(user_id)
        # print(user_url)

        sql = "insert into lb_food_comment ( food_id , user_name , user_id , user_avatar_url ," \
              " user_url, content, comment_time) values ( %s, %s, %s, %s, %s, %s, %s)"
        params = (food_id, user_name, user_id, user_avatar_url, user_url, content, comment_time)
        # print(sql % params)
        if MysqlHelper.MysqlHelper(isTest).cud(sql, params) == 1:
            count += 1
    return count == len(comlist)
示例#32
0
def loadFoodClass1():
    """
        爬取第一分类
    """
    url = "https://www.meishij.net/jiankang/"
    dlList = getContent(url).xpath('//div[@class="nav"]/ul/li[2]/div/div/div/dl')

    for dl in dlList:
        aList = dl.xpath("dt/a")
        for a in aList:
            text = a.text
            class_url = a.attrib.get("href")

            sql = "insert into lb_food_class ( name , level , class_url ) values ( %s, %s, %s)"
            params = (text, 1, class_url)
            print(sql % params)
            MysqlHelper.MysqlHelper().cud(sql, params)
示例#33
0
def write2SQL(item):
    """
    将返回的数据插入到数据库中
    :param item:
    :return:
    """
    dbhelper = MysqlHelper.DbHelper()
    title = item['title']
    actor = item['stars'].split(":")[1]
    time = item['releasetime'].split(":")[1]
    sql = "INSERT INTO newdatabase.maoyan(title,actor,time) VALUES(%s,%s,%s)"
    params = (title, actor, time)
    result = dbhelper.execute(sql, params)
    if result == True:
        print("插入成功")
    else:
        print("插入失败")
示例#34
0
def spiderMaterial(food_id, content):
    divList = content.xpath("//div[@class='materials_box']/div")
    count = 0
    allCount = 0
    for div in divList:
        goods_ad_list = div.xpath("./@class")
        if len(goods_ad_list) > 0 and goods_ad_list[0] == "goods_ad":
            continue
        tag = div.xpath("h3/a/text()")[0]
        liList = div.xpath("ul/li")
        allCount += len(liList)
        for li in liList:
            thumbnail_url = ""
            name = ""
            dosage = ""

            thumbnail_url_list = li.xpath("a/img/@src")
            if len(thumbnail_url_list) > 0:
                thumbnail_url = thumbnail_url_list[0]

            name_list = li.xpath("div/h4/a/text()")
            if len(name_list) > 0:
                name = name_list[0]

            if name == "":
                name_list = li.xpath("h4/a/text()")
                if len(name_list) > 0:
                    name = name_list[0]

            dosage_list = li.xpath("div/h4/span/text()")
            if len(dosage_list) > 0:
                dosage = dosage_list[0]

            if dosage == "":
                dosage_list = li.xpath("span/text()")
                if len(dosage_list) > 0:
                    dosage = dosage_list[0]

            sql = "insert into lb_food_material_assoc ( food_id , tag, name, thumbnail_url, dosage)" \
                  " values ( %s, %s, %s, %s, %s )"
            params = (food_id, tag, name, thumbnail_url, dosage)
            # print(sql % params)
            if MysqlHelper.MysqlHelper(isTest).cud(sql, params) == 1:
                count += 1

    return allCount == count
示例#35
0
def loadFoodDetail():
    sql = "select id, html_url from lb_food where is_spider_detail = %s limit %s, %s "
    params = (0, pageSize * (page - 1), pageSize * page)
    print(sql % params)
    rows = MysqlHelper.MysqlHelper(isTest).fetchall(sql, params)

    for row in rows:
        food_id, html_url = row
        try:
            content = getContent(html_url)
            # global isTest
            # isTest = True
            # content = getContent("https://www.meishij.net/zuofa/liangbankugua_37.html")
            if spiderBaseInfo(food_id, content):
                print("爬取基本信息成功")
                if spiderComment(food_id, content):
                    print("爬取评论数据成功")
                    if spiderProcess(food_id, content):
                        print("爬取做法成功")
                        if spiderMaterial(food_id, content):
                            print("爬取用料成功")
                            # 更新状态
                            updateSpiderDetail("爬取成功", food_id, True)
                            sleepRandom()
                            continue
                        else:
                            s = "爬取用料出错"
                            print(s)
                            updateSpiderDetail(s, food_id)
                    else:
                        error_msg = "爬取做法出错"
                        print(error_msg)
                        updateSpiderDetail(error_msg, food_id)
                else:
                    s1 = "爬取评论数据出错"
                    print(s1)
                    updateSpiderDetail(s1, food_id)
            else:
                s2 = "爬取基本信息出错"
                print(s2)
                updateSpiderDetail(s2, food_id)
        except Exception as e:
            updateSpiderDetail(e.message, food_id)
        sleepRandom()
示例#36
0
def truncate_table_sql(model, batch_size):
    """
    init
    """
    try:
        cf = ConfigParser.ConfigParser()
        cf.read("./conf/load_config.conf")
        mysql_host = cf.get("db", "mysql_host")
        mysql_port = cf.getint("db", "mysql_port")
        mysql_user = cf.get("db", "mysql_user")
        mysql_passwd = cf.get("db", "mysql_passwd")
        conf_name = "conf_%s" % model
        try:
            test_db = cf.get(conf_name, "test_db") % batch_size
        except Exception as e:
            sys.exit(1)
        mysql = mysql_helper.MysqlHelper(host = mysql_host,\
            port = mysql_port, user = mysql_user, \
            passwd = mysql_passwd, db = test_db)

    except Exception as exception:
        print exception
        return

    table_name=["anakin2_yolo_time_satistic_k1200", "anakin2_yolo_time_satistic_p4", \
                "anakin_tensorrt_time_satistic_k1200", "anakin_tensorrt_time_satistic_p4", \
                "nvidia_list_1sec_k1200", "nvidia_list_1sec_p4", \
                "nvidia_list_1sec_version_k1200", "nvidia_list_1sec_version_p4", \
                "nvidia_list_1sec_version_tensorRT_k1200", "nvidia_list_1sec_version_tensorRT_p4", \
                "top_list_1sec_avg_k1200", "top_list_1sec_avg_p4", \
                "top_list_1sec_avg_tensorRT_k1200", "top_list_1sec_avg_tensorRT_p4", \
                "top_list_1sec_k1200", "top_list_1sec_p4", \
                "log_monitor_k1200", "log_monitor_p4"]
    for item in table_name:
        truncate_sql = "truncate table %s" % (item)

        print "[INFO]: start truncate the sql"
        try:
            truncate_result = mysql.executes(truncate_sql)
            print("[INFO]: truncate %s success!!!" % item)
            print truncate_result
        except Exception as exception:
            print("[ERROR]: truncate %s error!!!" % item)
示例#37
0
    def __init__(self, db_name):
        """
        init
        """
        self.config = {}
        try:
            cf = ConfigParser.ConfigParser()
            cf.read("../../conf/load_config.conf")
            self.mysql_host = cf.get("db", "mysql_host")
            self.mysql_port = cf.getint("db", "mysql_port")
            self.mysql_user = cf.get("db", "mysql_user")
            self.mysql_passwd = cf.get("db", "mysql_passwd")
            self.test_db = db_name
            self.mysql = mysql_helper.MysqlHelper(host = self.mysql_host,\
                port = self.mysql_port, user = self.mysql_user, \
                passwd = self.mysql_passwd, db = self.test_db)

        except Exception as exception:
            print exception
            return
示例#38
0
 def getNewsDetail(self, news_url):
     """
     获取新闻具体细节
     Args:
         search_url:每条新闻具体链接
     Returns:
         新闻链接、标题、正文、时间、日期
     """
     result = {}
     res = requests.get(news_url)
     res.encoding = 'utf-8'
     soup = BeautifulSoup(res.text, 'html.parser')
     if (len(soup.select('.article-title h2')) > 0
             and len(soup.select('.date')[0]) > 0
             and len(soup.select('.time')[0]) > 0):
         result['url'] = news_url
         result["title"] = soup.select('.article-title h2')[0].text
         result["date"] = soup.select('.date')[0].text.lstrip('发布时间:')
         result["time"] = soup.select('.time')[0].text
         #source = soup.select('.account-authentication')[0].text
         #print(title,date,time,source)
         if len(result["date"]) == 5:
             result["datetime"] = "2020-" + result["date"] + " " + result[
                 "time"]
         elif len(result["date"]) == 8:
             result[
                 "datetime"] = "20" + result["date"] + " " + result["time"]
         else:
             result["datetime"] = result["date"] + " " + result["time"]
         result["article"] = self.getArticle(news_url)
         msh = MysqlHelper.MysqlHelper(host="localhost",
                                       username="******",
                                       password="******",
                                       db="baiduSearchNews",
                                       charset="utf8",
                                       port=3306)
         msh.connect()
         sql = "insert into disease values('%s','%s','%s','%s')" % (
             result["article"], result["datetime"], result["title"],
             result['url'])
         msh.insert(sql)
示例#39
0
	def start(self):
		content = json.loads(self.getContent())
		conn = MysqlHelper.connect()
		cur = conn.cursor()
		cur.execute('drop table if exists xjtu')
		cur.execute('create table if not exists xjtu(id int(11) primary key auto_increment,title varchar(255),lesson_code varchar(255),start_time varchar(255),current_sem varchar(255),spend_time varchar(255),short_desc text,knowledge_res text,chapter_info text,common_prob text,teacher_info text,url varchar(255))')
		sql = 'insert into xjtu(title,lesson_code,start_time,current_sem,spend_time,short_desc,knowledge_res,chapter_info,common_prob,teacher_info,url) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
		for item in content["course"]:
			oneline = Course()
			url = "http://xjtu.xuetangx.com" + item["about"]
			page = self.getPage(url)
			title = self.getTitle(page)
			oneline.title = title
			info = self.getInfo1(page)
			for item in info:
				if item[0] == "课程代码":
					oneline.lesson_code = self.tool.replace(item[1])
				if item[0] == "开课时间":
					oneline.start_time = self.tool.replace(item[1])
				if item[0] == "当前学期":
					oneline.current_sem = self.tool.replace(item[1])
				if item[0] == "投入时间":
					oneline.spend_time = self.tool.replace(item[1])
			info2 = self.getInfo2(page)
			for item in info2:
				if item[0] == "课程简介":
					oneline.short_desc = re.sub(r'[\n\t]+',r'\n', self.tool.replace(item[1]), flags=re.S)
				if item[0] == "知识储备":
					oneline.knowledge_res = re.sub(r'[\n\t]+',r'\n', self.tool.replace(item[1]), flags=re.S)
				if item[0] == "章节信息":
					oneline.chapter_info = re.sub(r'[\n\t]+',r'\n', self.tool.replace(item[1]), flags=re.S)
				if item[0] == "常见问题":
					oneline.common_prob = re.sub(r'[\n\t]+',r'\n', self.tool.replace(item[1]), flags=re.S)
			teacherinfo = self.getTeacherInfo(page)
			teacher = ""
			if teacherinfo:
				for item in teacherinfo:
					str = item[0] + '\n' + item[1] + '\n' + self.tool.replace(item[2]) + '\n'
					teacher = teacher + str
			oneline.teacher_info = teacher
			oneline.url = url
			value = []
			value.append(oneline.title)
			value.append(oneline.lesson_code)
			value.append(oneline.start_time)
			value.append(oneline.current_sem)
			value.append(oneline.spend_time)
			value.append(oneline.short_desc)
			value.append(oneline.knowledge_res)
			value.append(oneline.chapter_info)
			value.append(oneline.common_prob)
			value.append(oneline.teacher_info)
			value.append(oneline.url)
			MysqlHelper.insert_one(cur,sql,value)
		if content["lecture"]:
			cur.execute('drop table if exists xjtu_lecture')
			cur.execute('create table if not exists xjtu_lecture(id int(11) primary key auto_increment,title varchar(255),intro text,guest text,video_info text,addr text,url varchar(255))')
			sql = 'insert into xjtu_lecture(title,intro,guest,video_info,addr,url) values(%s,%s,%s,%s,%s,%s)'
			for item in content["lecture"]:
				oneline = Lecture()
				url = "http://xjtu.xuetangx.com" + item["about"]
				page = self.getPage(url)
				title = self.getLectureTitle(page)
				oneline.title = title
				intro = self.getLectureIntro(page)
				oneline.intro = self.tool.replace(intro)
				guest = self.getLectureGuest(page)
				oneline.guest = self.tool.replace(guest)
				videoInfo = self.getVideoInfo(page)
				oneline.video_info = self.tool.replace(videoInfo)
				addr = self.getLectureInfo(page)
				oneline.addr = self.tool.replace(addr)
				oneline.url = url
				value = []
				value.append(oneline.title)
				value.append(oneline.intro)
				value.append(oneline.guest)
				value.append(oneline.video_info)
				value.append(oneline.addr)
				value.append(oneline.url)
				MysqlHelper.insert_one(cur,sql,value)		
		MysqlHelper.finish(conn)
示例#40
0
	def start(self):
		indexPage = self.getContent(1)
		# print indexPage
		pageNum = self.getPageNum(indexPage)
		# print pageNum[-2]
		conn = MysqlHelper.connect()
		cur = conn.cursor()
		# cur.execute('drop table if exists mooc')
		# cur.execute('create table mooc(id int(11) primary key auto_increment,title_chinese varchar(255),title_english varchar(255),brief text,teacher text,chapter text,requires text,form text,question text,resource text,url varchar(255))')
		sql = 'insert into mooc(title_chinese,title_english,brief,teacher,chapter,requires,form,question,resource,url) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
		for i in range(78,int(pageNum[-2])+1):
		# for i in range(1,2):
			indexPage = self.getContent(i)
			URL = self.getURL(indexPage)
			for item in URL:
				oneline = Item()
				page = self.getPage(item)
				# print page
				title_chinese = self.getTitle_chinese(page)
				oneline.title_chinese = title_chinese
				title_english = self.getTitle_english(page)
				oneline.title_english = title_english
				# print title_chinese,title_english
				block = self.getBlock(page)
				#添加的</div>用户判断文本结尾
				block = block + '</div>'
				# print block + '\n'
				briefWords = ["课程概述","课程概况","课程简介"]
				if self.wordInText(block,briefWords):
					brief = self.getText(block,briefWords)
					oneline.brief = self.tool.replace(brief)
					# print brief
				teacherWords = ["授课教师","主讲教师"]
				if self.wordInText(block,teacherWords):
					teacher = self.getText(block,teacherWords)
					oneline.teacher = self.tool.replace(teacher)
					# print teacher
				chapterWords = ["授课大纲","课程大纲"]
				if self.wordInText(block,chapterWords):
					chapter = self.getText(block,chapterWords)
					oneline.chapter = self.tool.replace(chapter)
					# print chapter
				requireWords = ["先修要求","先修知识","背景知识"]
				if self.wordInText(block,requireWords):
					require = self.getText(block,requireWords)
					oneline.require = self.tool.replace(require)
					# print require
				formWords = ["授课形式"]
				if self.wordInText(block,formWords):
					form = self.getText(block,formWords)
					oneline.form = self.tool.replace(form)
					# print form
				questionWords = ["常见问题解答","常见问题"]
				if self.wordInText(block,questionWords):
					question = self.getText(block,questionWords)
					oneline.question = self.tool.replace(question)
					# print question
				resourceWords = ["参考资料"]
				if self.wordInText(block,resourceWords):
					resource = self.getText(block,resourceWords)
					oneline.resource = self.tool.replace(resource)
					# print self.tool.replace(resource)
				#url
				oneline.url = item
				value = []
				value.append(oneline.title_chinese)
				value.append(oneline.title_english)
				value.append(oneline.brief)
				value.append(oneline.teacher)
				value.append(oneline.chapter)
				value.append(oneline.require)
				value.append(oneline.form)
				value.append(oneline.question)
				value.append(oneline.resource)
				value.append(oneline.url)
				MysqlHelper.insert_one(cur,sql,value)
		MysqlHelper.finish(conn)