Пример #1
0
import HTMLParser  #处理html编码字符
import re
import json
from utils import LoggerUtil
from utils.SqlUtil import MongoDB
import pymongo
import urlparse
from bs4 import BeautifulSoup  #lxml解析器
from cfg import subjects, xds, PATH, URL, COLL
import sys

reload(sys)
sys.setdefaultencoding('utf-8')

html_parser = HTMLParser.HTMLParser()
logger = LoggerUtil.getLogger(__name__)
logger_major = LoggerUtil.getLogger('major')


class PaperParse:
    '''分析组卷网的试卷页面'''
    def __init__(self, url=URL.rootUrl):
        self.session = requests.Session()
        self.session.get(url)

    def parseParperPropAll(self, url=URL.paper_url):
        '''分析试卷的所有公共属性'''
        mongo = MongoDB()
        #创建唯一索引
        # for key, value in COLL.type.items():
        #     coll = mongo.getCollection(value)
Пример #2
0
#!/usr/bin/python
#-*-coding:utf-8-*-

import requests
import os
import urlparse
import json
from utils.SqlUtil import PostgreSql
from utils import LoggerUtil, Utils
import re
import sys

reload(sys)
sys.setdefaultencoding('utf-8')
logger = LoggerUtil.getLogger(__name__)

SELECT_SQL = 'SELECT qid,answer FROM T_QUES_ZUJUAN_EX WHERE cate=1 AND subject= %s AND qid > %s ORDER BY seq ASC LIMIT %s '
UPDATE_SQL = 'UPDATE T_QUES_ZUJUAN_EX SET  choice_answer = %s WHERE qid = %s'
UPDATE_STATUS_SQL = 'UPDATE T_QUES_ZUJUAN_EX SET  status = %s WHERE qid = %s'
ROWS = 1000
rootImagPath = '/data/meiqiming/data/zj_image_new'

OPTIONS = {'A': [], 'B': [], 'C': [], 'D': [], 'E': []}


def init():
    for parent, dir_names, file_names in os.walk('data'):
        for file_name in file_names:
            for key, values in OPTIONS.items():
                if file_name.startswith(key):
                    values.append(