def CreateQuery(self, buildData=None): data = hhAPI.getCityCode(self.city, JSONParser.Parse(HTMLData.getStringHTMLData('https://api.hh.ru/areas', 'utf-8'))) link = "https://api.hh.ru/vacancies?specialization=" for spec in self.specializationIdList[:-1]: if spec.split(".")[0] in self.importantSpecializations: link += spec + "&specialization=" link += self.specializationIdList[-1] link += "&area=1" print(buildData) if buildData is not None: print("self.progLangs union = ") print(self.progLangs[0].union(buildData)) else: print("self.progLangs = ") print(self.progLangs) if self.progLangs is not None: link += "&text=" for lang in self.progLangs[0]: link += lang + "%20or%20" link = link[:-8] link += "&per_page=200" print(link) res = JSONParser.Parse(HTMLData.getStringHTMLData(link, 'utf-8')) #print(res) print("result len = " + str(res['found'])) res = self.ReadManyVacancyes(res['items']) return res
def ReadManyVacancyes(self, JSONAllVacancyData): result = [] count = 0 for item in JSONAllVacancyData: # print(item) count += 1 findedInCache = VacancyCache.objects.filter(vacancy_Id=item['url'].split("vacancies/")[1]) if len(findedInCache) == 0: VacancyJson = JSONParser.Parse(HTMLData.getStringHTMLData(item['url'], 'utf-8')) if VacancyJson['salary'] is not None and VacancyJson['salary']['to'] != None and VacancyJson['salary']['from'] != None: vacancy = VacancyCache(name=VacancyJson['name'], description=VacancyJson['description'], url=VacancyJson['alternate_url'], company_name=VacancyJson['employer']['name'], salary_start=VacancyJson['salary']['from'], salary_end=VacancyJson['salary']['to'], vacancy_Id=VacancyJson['id']) self.SaveVacancyToDB(vacancy) result.append(vacancy) else: vacancy = VacancyCache(name=VacancyJson['name'], description=VacancyJson['description'], url=VacancyJson['alternate_url'], company_name=VacancyJson['employer']['name'], salary_start=0, salary_end=0, vacancy_Id=VacancyJson['id']) self.SaveVacancyToDB(vacancy) result.append(vacancy) else: result.append(findedInCache[0]) if count > 100: break # print(len(result)) return result
def GetJSONByUserName(UserName): res = None link = "https://api.github.com/users/" + UserName + "/repos" # print(link) try: res = JSONParser.Parse(HTMLData.getStringHTMLData(link, 'utf-8')) except: print("wrong git hub url, ignoring") return res
def GetJSONByUserName(UserName): res = None link = "https://api.github.com/users/" + UserName + "/repos" # print(link) try: res = JSONParser.Parse(HTMLData.getStringHTMLData(link, "utf-8")) except: print("wrong git hub url, ignoring") return res
def GetLanguages(UserName): languagesLinks = [] languagesNames = [] ReposJSON = GitHubAPI.GetJSONByUserName(UserName) if ReposJSON is not None: for val in ReposJSON: languagesLinks.append(val['languages_url']) for link in languagesLinks: #TODO may be sloooowwwww keysList = JSONParser.Parse(HTMLData.getStringHTMLData(link, 'utf-8')).keys() #слишком медленно работает for lang in keysList: # print(lang) if lang.lower() in commonLangs: languagesNames.append(lang.lower()) # print(set(languagesNames)) return set(languagesNames) #languagesNames.append(JSONParser.Parse(HTMLData.getStringHTMLData(link, 'utf-8')))
def GetLanguages(UserName): languagesLinks = [] languagesNames = [] ReposJSON = GitHubAPI.GetJSONByUserName(UserName) if ReposJSON is not None: for val in ReposJSON: languagesLinks.append(val["languages_url"]) for link in languagesLinks: # TODO may be sloooowwwww keysList = JSONParser.Parse( HTMLData.getStringHTMLData(link, "utf-8") ).keys() # слишком медленно работает for lang in keysList: # print(lang) if lang.lower() in commonLangs: languagesNames.append(lang.lower()) # print(set(languagesNames)) return set(languagesNames)
def GetOneStandart(link): HTMLData.SaveHTML(link, "cp1251", "out.html") return HTMLData.getStringHTMLData(link, "cp1251")
def GetHTML(cls): HTMLData.SaveHTML("http://www.edu.ru/db/cgi-bin/portal/spe/prog_list_new.plx?substr=&rasd=all&st=all&kod=all", "cp1251", "output.html")
import pymorphy2 from lib.JSONParser import JSONParser from lib.HTMLData import HTMLData from lib.EduStandartsParser import EduStandartsParser import urllib.error morph = pymorphy2.MorphAnalyzer() SpecializationDict = None try: SpecializationDict = JSONParser.Parse( HTMLData.getStringHTMLData("https://api.hh.ru/specializations", "utf-8")) except urllib.error.URLError: print("Connection error, I can not work") exit() #EduStandartsParser.GetHTML() commonLangs = [ "c", "c++", "ruby", "python", "javascript", "java", "c#", "f#", "css", "objective-c", "go", "shell", "perl", "php", "lisp", "haskell", "pascal", "assembly", "scala", "sql", "с++" ]
import pymorphy2 from lib.JSONParser import JSONParser from lib.HTMLData import HTMLData from lib.EduStandartsParser import EduStandartsParser import urllib.error morph = pymorphy2.MorphAnalyzer() SpecializationDict = None try: SpecializationDict = JSONParser.Parse(HTMLData.getStringHTMLData("https://api.hh.ru/specializations", "utf-8")) except urllib.error.URLError: print("Connection error, I can not work") exit() #EduStandartsParser.GetHTML() commonLangs = ["c", "c++", "ruby", "python", "javascript", "java", "c#", "f#", "css", "objective-c", "go", "shell", "perl", "php", "lisp", "haskell", "pascal", "assembly", "scala", "sql", "с++"]
def getPossibleEdulevels(self): edudata = JSONParser.Parse(HTMLData.getStringHTMLData("https://api.hh.ru/dictionaries", 'utf-8'))['education_level'] result = [] for val in edudata: result.append(val['name']) return result
def getDictionary(link): return JSONParser.Parse(HTMLData.getStringHTMLData(link, "utf-8"))