def add_document(url_info): url = str(url_info['url']).strip() reg_id = url_info['reg_id'] print url_info['reg_name'] if not Registrant.objects.filter(reg_id=reg_id).exists(): reg = Registrant (reg_id=reg_id, reg_name = url_info['reg_name'] ) reg.save() if not Document.objects.filter(url = url).exists(): document = Document(url = url, reg_id = url_info['reg_id'], doc_type = url_info['doc_type'], stamp_date = url_info['stamp_date'], ) document.save() if not MetaData.objects.filter(link= url).exists(): document = Document.objects.get(url = url) md = MetaData(link = url, upload_date = datetime.date.today(), reviewed = False, processed = False, is_amendment = False, form = document.id, ) md.save()
def add_document(url_info): url = str(url_info['url']).strip() reg_id = url_info['reg_id'] print url_info['reg_name'] if not Registrant.objects.filter(reg_id=reg_id).exists(): reg = Registrant(reg_id=reg_id, reg_name=url_info['reg_name']) reg.save() if not Document.objects.filter(url=url).exists(): document = Document( url=url, reg_id=url_info['reg_id'], doc_type=url_info['doc_type'], stamp_date=url_info['stamp_date'], ) document.save() if not MetaData.objects.filter(link=url).exists(): document = Document.objects.get(url=url) md = MetaData( link=url, upload_date=datetime.date.today(), reviewed=False, processed=False, is_amendment=False, form=document.id, ) md.save()
def add_document(url_info): document = Document(url = url_info[0], reg_id = url_info[1], doc_type = url_info[2], stamp_date = url_info[3], ) document.save()
def add_document(url_info): url = str(url_info['url']).strip() if not Document.objects.filter(url = url).exists(): document = Document(url = url, reg_id = url_info['reg_id'], doc_type = url_info['doc_type'], stamp_date = url_info['stamp_date'], ) document.save() print "\n New document discovered- \n %s \n" %(url)
def handle(self, *args, **options): for md in MetaData.objects.all(): if md.end_date == None or md.end_date == '': try: Document.objects.get(url=md.link) document = Document.objects.get(url=md.link) except: url = md.link reg_id = re.sub('-', '', url[25:29]) reg_id = re.sub('S', '', reg_id) reg_id = re.sub('L', '', reg_id) info = re.findall(r'-(.*?)-', url) if info[0] == 'Amendment': doc_type = 'Amendment' elif info[0] == 'Short': doc_type = 'Short Form' elif info[0] == 'Exhibit': if "AB" in url: doc_type = 'Exhibit AB' if "C" in url: doc_type = 'Exhibit C' if "D" in url: doc_type = 'Exhibit D' elif info[0] == 'Conflict': doc_type = 'Conflict Provision' elif info[0] == 'Registration': doc_type = 'Registration' elif info[0] == 'Supplemental': doc_type = 'Supplemental' else: print info[0] stamp_date = re.findall(r'\d{8}', url) stamp_date = stamp_date[0] stamp_date_obj = datetime.datetime.strptime( stamp_date, "%Y%m%d") document = Document( url=url, reg_id=reg_id, doc_type=doc_type, stamp_date=stamp_date_obj, processed=md.processed, ) document.save() print md.notes md.end_date = document.stamp_date md.save()
def handle(self, *args, **options): for md in MetaData.objects.all(): if md.end_date == None or md.end_date == '': try: Document.objects.get(url=md.link) document = Document.objects.get(url=md.link) except: url = md.link reg_id = re.sub('-','', url[25:29]) reg_id = re.sub('S','', reg_id) reg_id = re.sub('L','', reg_id) info = re.findall( r'-(.*?)-', url) if info[0] == 'Amendment': doc_type = 'Amendment' elif info[0] == 'Short': doc_type = 'Short Form' elif info[0] == 'Exhibit': if "AB" in url: doc_type = 'Exhibit AB' if "C" in url: doc_type = 'Exhibit C' if "D" in url: doc_type = 'Exhibit D' elif info[0] == 'Conflict': doc_type = 'Conflict Provision' elif info[0] == 'Registration': doc_type = 'Registration' elif info[0] == 'Supplemental': doc_type = 'Supplemental' else: print info[0] stamp_date = re.findall(r'\d{8}', url) stamp_date = stamp_date[0] stamp_date_obj = datetime.datetime.strptime(stamp_date, "%Y%m%d") document = Document( url = url, reg_id = reg_id, doc_type = doc_type, stamp_date = stamp_date_obj, processed = md.processed, ) document.save() print md.notes md.end_date = document.stamp_date md.save()
pass if line['model'] == "fara.registration": reg_id = int(line['fields']['registrant']) file_id = line['fields']['source_file'] date = line['fields']['filing_date'] try: date_obj = datetime.strptime(date, "%Y-%m-%d") except: date_obj = None print "bad date" doc = Document(url = file_id, reg_id = reg_id, doc_type = "Supplemental", stamp_date = date_obj, ) doc.save() if line['model'] == "fara.fee": clientreg_id = line['fields']['client_registration'] registration = creg_reg_client[clientreg_id][0] reg_id = reg_fara[registration] client_id = creg_reg_client[clientreg_id][1] nclient_id = client_nclient[client_id] client_obj = Client.objects.get(id=nclient_id) reg_obj = Registrant.objects.get(reg_id=reg_id) fee = str(line['fields']['feesretainer']).strip()
pass if line['model'] == "fara.registration": reg_id = int(line['fields']['registrant']) file_id = line['fields']['source_file'] date = line['fields']['filing_date'] try: date_obj = datetime.strptime(date, "%Y-%m-%d") except: date_obj = None print "bad date" doc = Document( url=file_id, reg_id=reg_id, doc_type="Supplemental", stamp_date=date_obj, ) doc.save() if line['model'] == "fara.fee": clientreg_id = line['fields']['client_registration'] registration = creg_reg_client[clientreg_id][0] reg_id = reg_fara[registration] client_id = creg_reg_client[clientreg_id][1] nclient_id = client_nclient[client_id] client_obj = Client.objects.get(id=nclient_id) reg_obj = Registrant.objects.get(reg_id=reg_id) fee = str(line['fields']['feesretainer']).strip()