def loadPurchase(self, pageNumber): ParserBase.__init__(self, 'purchaseMethod', "p=true&d-4031123-p=%i" % pageNumber) skipValue = u"Все способы" contenttable = self.soup.find('table', {'class': 'maintable', 'id': 'resultTable'}) for method in (val.text.strip().lower() \ for val in contenttable.findAll('td', {'class': 'purchaseValue'}) \ if val.text.find(skipValue) == -1): yield method if __name__ == '__main__': import os os.environ.setdefault("DJANGO_SETTINGS_MODULE", "goszak.settings") from models.purchase import PurchaseMethod from django.core.exceptions import ObjectDoesNotExist parser = PurchaseMethodLoader() for pageNum in xrange(1, 170): for method in parser.loadPurchase(pageNum): try: print("%s" % method) methodObj = PurchaseMethod.objects.get(name=method) print("\tFound") except ObjectDoesNotExist: print("\tNot exists") methodObj = PurchaseMethod(name=method) methodObj.save() #http://zakupki.gov.ru/223/purchase/public/purchase-method/choose-purchase-method.html?p=true&d-4031123-p=163
def main(agrv = list()): projectLocation = '/home/pavel/code/python/gos_zak_analitics/goszak/' addpath(projectLocation) import os os.environ.setdefault("DJANGO_SETTINGS_MODULE", "goszak.settings") setConsoleEncoding() checkMissingValues() from searchForm import searchForm as SearchFormParser from purchase import Purchase as PurchasepageParser from protocol import Protocol as ProtocolPageParser from models.customer import Organization, Person, Staff, PurchaseStaff from models.purchase import Purchase, Lot, PurchaseMethod from models.supplier import Supplier, Bet, PurchaseResult from okvedData.models.okdpCodes import OKDPCodes from okvedData.models.okvedCodes import OkvedCodes from django.core.exceptions import ObjectDoesNotExist pageLimit = 2 purchasseStage = "PLACEMENT_COMPLETE" ######################################## ## GET PURCHASE LIST FROM SEARCH FORM ## ######################################## searchPageparcer = SearchFormParser() #pageLimit = searchPageparcer.totalPagesCount() + 1 print(searchPageparcer.totalPagesCount()) for url, _purchaseMethod, published, _startPrice, lastModified \ in searchPageparcer.allPurchaseFor(purchaseStage=purchasseStage, startPage=1, pagelimit=pageLimit): ########################################### ## PARSE PURCHASE PAGE AND GET INFO DICT ## ## [Общие сведения о закупке] ## ## [Заказчик] ## ## [Контактное лицо] ## ## ## ## VALUES ## ## [Предоставление документации] ## ## [Порядок размещения закупки] ## ## RETURNS BUT ARE IGNORED ## ########################################### purchasePageParser = PurchasepageParser(url) resDict = purchasePageParser.purchaseInfo() ######################################## ## PARSE PURCHASE INFO DICT WITH KEY ## ## [Общие сведения о закупке] ## ######################################## values = resDict.get(u"Общие сведения о закупке") name = values.get(u"Наименование закупки") noticeNumber = values.get(u"Номер извещения") purchaseMethod = values.get(u"Способ размещения закупки").lower() edition = values.get(u"Редакция") try: purchaseMethodObj = PurchaseMethod.objects.get(name=purchaseMethod) except ObjectDoesNotExist: purchaseMethodObj = PurchaseMethod(name=purchaseMethod) purchaseMethodObj.save() try: print("Purchase %s" % noticeNumber) purchaseObj = Purchase.objects.get(noticeNumber=noticeNumber, url=url) if purchaseObj.stage != purchasseStage: purchaseObj.stage = purchasseStage purchaseObj.lastModified = lastModified print("\tExists") except ObjectDoesNotExist: print("\tNOT Exists") purchaseObj = Purchase(noticeNumber=noticeNumber , name=name , url=url , edition=edition , stage=purchasseStage , method_fk=purchaseMethodObj , publishDate=published , lastModified=lastModified) purchaseObj.save() ######################################## ## PARSE PURCHASE INFO DICT WITH KEY ## ## [Заказчик] ## ######################################## values = resDict.get(u"Заказчик") INN, KPP = (x.strip() for x in values.get(u"ИНН \ КПП").split("\\")[0:2]) try: print("Organization %s %s" % (INN, KPP)) organization = Organization.objects.get(INN=INN, KPP=KPP) print("\tExists") except ObjectDoesNotExist: print("\tNOT Exists") print("FIRST RUN \"OrganizationSearchForm.py\" to upload new companies") raise SystemExit() ######################################## ## PARSE PURCHASE INFO DICT WITH KEY ## ## [Контактное лицо] ## ######################################## values = resDict.get(u"Контактное лицо", "") FIO = values.get(u"Контактное лицо") FIO = FIO if len(FIO) > 0 else None email = values.get(u"Электронная почта", "") email = email if len(email) > 0 else None telephone = values.get(u"Телефон", "") telephone = telephone if len(telephone) > 0 else None fax = values.get(u"Факс") fax = fax if len(fax) > 0 else None try: print("Customer Person %s" % FIO) person = Person.objects.get(FIO=FIO, email=email, telephone=telephone, fax=fax) print("\tExists") except ObjectDoesNotExist: print("\tNOT Exists") person = Person(FIO=FIO, email=email, telephone=telephone, fax=fax) person.save() ############################################ ## MATCH ORGANIZATION AND CONTACT PERSON ## ############################################ try: print("Staff") staff = Staff.objects.get(organization_fk=organization, person_fk=person) print("\tExists") except ObjectDoesNotExist: print("\tNOT Exists") staff = Staff(organization_fk=organization, person_fk=person) staff.save() pass ############################### ## MATCH PURCHASE AND STAFF ## ############################### try: purchaseStaffObj = PurchaseStaff.objects.get(purchase_fk=purchaseObj, staff_fk=staff) except ObjectDoesNotExist: purchaseStaffObj = PurchaseStaff(purchase_fk=purchaseObj, staff_fk=staff) purchaseStaffObj.save() pass ################### ### GET LOT LIST ## ################### for lot in purchasePageParser.getLotList(): priceParams = lot.get(u"Начальная (макс.) цена договора") joint = lot.get(u"Совместная закупка") name = lot.get(u"Наименование лота") OKVEDstr = lot.get(u"Классификация по ОКВЭД") OKDPstr = lot.get(u"Классификация по ОКДП") valstr = ''.join([x for x in priceParams if x in '0123456789.,']) priceValue = float(valstr.replace(',', '.')) # TODO Add MONEY FIELD to BET priceCurrency = priceParams[len(valstr):].strip() joint = False if joint == u"Нет" else True OKVED = OKVEDstr.split()[0] OKDP = OKDPstr.split()[0] print("OKVED %s OKDP %s" % (OKVED, OKDP)) try: OKVEDObj = OkvedCodes.objects.get(code=OKVED) except ObjectDoesNotExist: OKVEDObj = OkvedCodes(code=OKVED, description=OKVEDstr[len(OKVED)].strip(), parent=None) OKVEDObj.save() try: OKDPObj = OKDPCodes.objects.get(code=OKDP) except ObjectDoesNotExist: print("\tOKDP Code %s not found.Saved" % OKDP) OKDPObj = OKDPCodes(code=OKDP, description=OKDPstr[len(OKDP):].strip()) OKDPObj.save() try: print("LOT %s" % name) lotObj = Lot.objects.get(name=name, purchase_fk=purchaseObj) print("\tFound") except ObjectDoesNotExist: print("\tNot Found") lotObj = Lot(name=name , joint=joint \ , startPrice=priceValue \ , OKVED_fk=OKVEDObj \ , OKDP_fk=OKDPObj , purchase_fk=purchaseObj) lotObj.save() ############################## ## CHECK IF PROTOCOL EXISTS ## ############################## protocolparam = purchasePageParser.getLastProtocolUrl() print("Protocol") if protocolparam is not None: print("\tFound") protocolPageParser = ProtocolPageParser(protocolparam) for applicant in protocolPageParser.getFullResult(): applicantParams = applicant.get(u"Участник") if applicantParams is None: ## NO APPLICANTS ## print("\tNo applicants -> Next") break priceParams = applicant.get(u"Предложенная цена договора") publicTime = applicant.get(u"Дата и время получения заявки") admission = applicant.get(u"Допуск") place = applicant.get(U"Результат") isLegalEntity = applicant.get(u"Юридическое лицо") isIndividual = applicant.get(u"Физическое лицо") notRussianResident = applicant.get(u"Не резидент") try: innIndex = applicantParams.index(u"ИНН") INN = ''.join(x for x in applicantParams[innIndex:] if x.isdigit()) innValueEnd = applicantParams.index(INN) + len(INN) applicantParams = applicantParams[0: innIndex] + applicantParams[innValueEnd:] except ValueError: INN = u"" try: KPPIndex = applicantParams.index(u"КПП") KPP = ''.join(x for x in applicantParams[KPPIndex:] if x.isdigit()) KPPValueEnd = applicantParams.index(KPP) + len(KPP) applicantParams = applicantParams[0: KPPIndex] + applicantParams[KPPValueEnd:] except ValueError: KPP = u"" name = applicantParams.strip() ########################### ## GET APPLICANT COMPANY ## ########################### try: print("Applicant %s" % name) if notRussianResident: supplierObj = Supplier.objects.get(name=name, notRussianResident=notRussianResident) else: if isLegalEntity: supplierObj = Supplier.objects.get(INN=INN, KPP=KPP) else: supplierObj = Supplier.objects.get(name=name) print("\tFound") except ObjectDoesNotExist: print("\tNOT Found") supplierObj = Supplier(INN=INN\ , KPP=KPP\ , name=name\ , isLegalEntity=isLegalEntity\ , isIndividual=isIndividual\ , notRussianResident=notRussianResident) supplierObj.save() ####################### ## GET APPLICANT BET ## ####################### try: print("BET") betObj = Bet.objects.get(supplier_fk=supplierObj, purchase_fk=purchaseObj) print("\tFound") except ObjectDoesNotExist: valstr = ''.join([x for x in priceParams if x in '0123456789.,']) priceValue = float(valstr.replace(',', '.')) # TODO Add MONEY FIELD to BET priceCurrency = priceParams[len(valstr):].strip() print("\tNot Found with value %s" % priceValue) betObj = Bet(value=priceValue, supplier_fk=supplierObj, purchase_fk=purchaseObj) betObj.save() ################### ## GET BET PLACE ## ################### try: print("BET PLACE") placeObj = PurchaseResult.objects.get(bet_fk=betObj) print("\tfound") except ObjectDoesNotExist: print("\tNOT found with place %s" % place) placeObj = PurchaseResult(bet_fk=betObj, admission=admission, place=place) placeObj.save() ############# ## SKIPPED ## ############# #values = resDict.get(u"Предоставление документации") #values = resDict.get(u"Порядок размещения закупки") print("\n")