示例#1
0
 def _getSCInfoFromHtmls(self, para):
     '''
     :param para:
     :return:
         类型:列表
         元素:第一个元素为哨兵(保存学科类别代码与名称),其余元素为数据项
             其中:其余所有数据项为:datum = asyncRunFunc(self._asyncGetInstitutionInfo, paraList, asyn=False)
     '''
     (SC_code, value) = para
     SC_name = value[0]
     SC_code_and_name = SC_code + '-' + SC_name
     htmls_SC_Path = self.htmlsRootPath + '\\' + SC_code_and_name
     self.storerIns.makeDir(htmls_SC_Path)
     SC_instutions = value[1:]
     paraList = []
     final = getArgs(SC_code, SC_name)
     for page in SC_instutions:
         for ins in page:
             if (ins == ''):
                 break
             ins_code = ins[0]
             ins_name = ins[1]
             ins_url = self._getInstitutionURL(SC_code, ins_name)
             html_ins_Path = htmls_SC_Path + '\\' + ins_code + '-' + ins_name
             para = getArgs(ins_code, ins_name, ins_url, html_ins_Path,
                            htmls_SC_Path)
             paraList.append(para)
     datum = asyncRunFunc(self._asyncGetInstitutionInfo,
                          paraList,
                          poolNum=self.poolNum,
                          asyn=self.asynFlag)
     datum.insert(0, final)
     return datum
 def getResearchInfo(self, insCodeAndName, insData, insUrl):
     data = []
     for researchData in insData:
         researchInfo = []
         temp = getArgs(insCodeAndName[0], insUrl)
         researchInfo.append(temp)
         researchInfo = researchInfo + researchData
         temp = researchInfo.pop(1)
         researchInfo.insert(4, temp)
         temp = researchInfo.pop(8)
         temp = domain + '/zsml/kskm.jsp?id=' + temp
         researchInfo.insert(8, getArgs('点此查看', temp))
         temp = researchInfo.pop()
         scope = self.getScope(temp, researchInfo, researchData, insUrl)
         cntList = [i for i in range(0, 4)]
         for cnt in cntList:
             researchInfo.insert(9, scope.pop())
         data.append(researchInfo)
     return data
 def _modifyData(self, department, major, researchArea):
     data = getArgs(department, major, researchArea)
     temp = []
     for i in data:
         try:
             tup = findAllWithRe(i, '\((.+)\)(.+)')[0]
             temp.append(tup[0] + '-' + tup[1])
         except IndexError:
             print(i)
             exit(-1)
     return temp
 def getSubjectInfo(self, subjectName, datum):
     dic = {}
     for data in datum:
         (insName, department, major, researchArea, examType, learngType,
          teacher, enrolledNumer, scopeUrl, course1, course2, course3,
          course4, crossMajor, remark) = data
         # 'http://yz.chsi.com.cn/zsml/querySchAction.do?dwmc=%E5%8C%97%E4%BA%AC%E5%A4%A7%E5%AD%A6&yjxkdm=0101'
         # examTypeLi = ['统考', '单考']
         # #examTypes = ['(不含推免)']
         # enrolledNumerTypes = ['一级学科:', '专业:', '研究方向:', '院系所:']
         (department, major,
          researchArea) = self._modifyData(department, major, researchArea)
         insName = insName[0]
         data = getArgs(insName, department, subjectName, major,
                        researchArea, examType, learngType, enrolledNumer)
         value = getArgs(department, subjectName, major, researchArea,
                         examType, learngType, enrolledNumer)
         if (not insName in dic):
             dic.update({insName: [value]})
         else:
             dic[insName] = dic[insName] + [value]
     return dic
    def _modifyData(self, department, major, researchArea):
        data = getArgs(department, major, researchArea)
        temp = []
        for i in data:
            try:
                tup = findAllWithRe(i, '\((.+)\)(.+)')[0]
                temp.append(tup[0] + '-' + tup[1])
            except IndexError:
                print(i)
                exit(-1)
        return temp

    # def _getEnrolledNumber(self, data):
    #     print(data)
    #     (insName, department, subjectName, major, researchArea, examType, learngType, enrolledNumer) = data
    #     if(not enrolledNumer.find('一级学科:') = -1)
    #     exit(0)
def _getCount():
    data = storerIns.getPickleFileData(pklPath)
    if data == False:
        count = 0
        errCount = 0
        errNum = 0
        errMax = 0
        count2 = 0
        smallestFileSize = 999999999
    else:
        count = data['count']
        errCount = data['errCount']
        errNum = data['errNum']
        errMax = data['errMax']
        count2 = data['count2']
        smallestFileSize = data['smallestFileSize']
    return getArgs(count, errCount, errNum, errMax, count2, smallestFileSize)
 def getInstitutionInfo(self, subjectData, xlsxDirPath):
     xlsxFilePath = xlsxDirPath + '//rawInfo.xlsx'
     sheetName = 'rawInfo'
     sheetHead = [
         '机构名', '院系所', '专业', '研究方向', '考试方式', '学习方式', '指导教师', '拟招生人数',
         '考试范围', '政治', '外语', '业务课一', '业务课二', '跨专业', '备注'
     ]
     sheetDatum = []
     for insInfo in subjectData:
         insHead = insInfo[0]
         insCodeAndName = [insHead[0] + '-' + insHead[1]]
         insUrl = insHead[2]
         insData = insInfo[1:]
         sheetDatum = sheetDatum + self.getResearchInfo(
             insCodeAndName, insData, insUrl)
     data = getArgs(xlsxFilePath, sheetName, sheetHead, sheetDatum)
     return data
 def getRawSubjectInfo(self, subjectsInfo):
     instance = modifyRawSubjeectsInfo.getInstance()
     final = [self.xlsxRootDirPath]
     for subjectInfo in subjectsInfo:
         subjectCodeAndName = subjectInfo[0]
         subjectData = subjectInfo[1:]
         subjectCode = subjectCodeAndName[0]
         subjectName = subjectCodeAndName[1]
         xlsxDirPath = self.xlsxRootDirPath + '//' + subjectCode + '-' + subjectName
         data = instance.getInstitutionInfo(subjectData, xlsxDirPath)
         (xlsxFilePath, sheetName, sheetHead, sheetDatum) = data
         data = tuple(data)
         data = []
         temp = getArgs(xlsxDirPath, xlsxFilePath, sheetName, sheetHead)
         data.append(temp)
         data = data + sheetDatum
         final.append(data)
     return final
示例#9
0
 def _getSCInfoFromPklFile(self, SC_code, value):
     SC_code_and_name = SC_code + '-' + value[0]
     pkl_SC_path = self.pklsRootPath + '\\' + SC_code_and_name + '.pkl'
     para = getArgs(SC_code, value)
     return self.storerIns.getPickleFileDataFromOtherData(
         pkl_SC_path, self._getSCInfoFromHtmls, para)