Пример #1
0
def add_document(url_info):
    url = str(url_info['url']).strip()
    reg_id = url_info['reg_id']

    print url_info['reg_name']

    if not Registrant.objects.filter(reg_id=reg_id).exists():
        reg = Registrant(reg_id=reg_id, reg_name=url_info['reg_name'])
        reg.save()

    if not Document.objects.filter(url=url).exists():
        document = Document(
            url=url,
            reg_id=url_info['reg_id'],
            doc_type=url_info['doc_type'],
            stamp_date=url_info['stamp_date'],
        )
        document.save()

    if not MetaData.objects.filter(link=url).exists():
        document = Document.objects.get(url=url)
        md = MetaData(
            link=url,
            upload_date=datetime.date.today(),
            reviewed=False,
            processed=False,
            is_amendment=False,
            form=document.id,
        )
        md.save()
Пример #2
0
def add_document(url_info):
    url = str(url_info['url']).strip()
    reg_id = url_info['reg_id']

    print url_info['reg_name']
    
    if not Registrant.objects.filter(reg_id=reg_id).exists():
        reg = Registrant (reg_id=reg_id,
            reg_name = url_info['reg_name']
        )
        reg.save()

    if not Document.objects.filter(url = url).exists():
        document = Document(url = url,
            reg_id = url_info['reg_id'],
            doc_type = url_info['doc_type'],
            stamp_date = url_info['stamp_date'],
        )
        document.save()
        
    if not MetaData.objects.filter(link= url).exists():
        document = Document.objects.get(url = url)
        md = MetaData(link = url,
                        upload_date = datetime.date.today(),
                        reviewed = False,
                        processed = False,
                        is_amendment = False,
                        form = document.id,
        )
        md.save()
Пример #3
0
    def handle(self, *args, **options):
        for doc in HistoricalDoc.objects.all():
            link = doc.document_link
            related_fara_metadata = MetaData.objects.filter(link=link)
            if len(related_fara_metadata) == 0:
                #skip for now
                #we may want to add the metadata
                continue


            #look for the registrant
            hist_reln = doc.historical_relationship
            registrant_no = hist_reln.registrant_no
            registrant = Registrant.objects.filter(reg_id=registrant_no)
            if len(registrant) == 1:
                registrant = registrant[0]
            elif len(registrant) == 0:
                registrant = Registrant(reg_id=registrant_no,
                                        reg_name=hist_reln.registrant)
                registrant.save()
            else:
                #there's more tha one registrant with the same ID
                #what should we do?
                print("Multiple registrants with that ID, moving on!")
                continue


            #look for the client
            names = create_search_names(hist_reln.principal)
            for name in names:
                clients = Client.objects.filter(client_name__iexact=name)
                if len(clients) > 0:
                    break

            if len(clients) == 1:
                client = clients[0]
            elif len(clients) > 1:
                #multiple matching clients, help! skipping for now
                print("Multiple clients with that name, moving on")
            else:
                location_text = hist_reln.location_represented

                #create the client
                #but first we have to match the location.

                #clean up known location mismatches
                location_dict = {'SOMALI DEMOCRATIC REPUBLIC' : 'SOMALIA',
                                'KOREA REPUBLIC OF' : 'SOUTH KOREA',
                                'BOSNIA-HERZEGOVINA' : 'BOSNIA AND HERZEGOVINA'}
                if location_text in location_dict:
                    location_text = location_dict[location_text]

                location = Location.objects.filter(location__iexact=location_text)
                if len(location) == 1:
                    location = location[0]
                else:
                    print "Unknown location {}".format(location_text)
                    continue
                client = Client(location=location,
                                client_name=name.strip(),
                                address1=hist_reln.address,
                                state=hist_reln.state)
                #only going to save client if it actually needs
                #to be added

            for md in related_fara_metadata:

                new_registrant = False

                #with registrants we can use their id system to dedup
                #so we'll just add them if they aren't in there

                if not registrant in md.registrant_set.all():
                    md.registrant_set.add(registrant)
                    md.save()
                    new_registrant = True            
                    registrant.meta_data.add(link)
                #for clients we have to be more careful since there's no id
                #so we'll only add clients if the client_set is empty
                #otherwise we may add duplicates
                #and presumably only human-entered data is in the db and it's good?
                if len(md.client_set.all()) == 0:
                    client.save() #we're good to save now
                    md.client_set.add(client)
                    md.save()
                    for r in md.registrant_set.all():
                        cr = ClientReg(client_id=client,
                                reg_id=r)
                        cr.save()
                        cr.meta_data.add(link)
                        cr.save()
                elif new_registrant:
                    for c in md.client_set.all():
                        cr = ClientReg(client_id=c,
                                reg_id=registrant)
                        cr.save()
                        cr.meta_data.add(link)
                        cr.save()
Пример #4
0
    def handle(self, *args, **options):
        for doc in HistoricalDoc.objects.all():
            link = doc.document_link
            related_fara_metadata = MetaData.objects.filter(link=link)
            if len(related_fara_metadata) == 0:
                #skip for now
                #we may want to add the metadata
                continue

            #look for the registrant
            hist_reln = doc.historical_relationship
            registrant_no = hist_reln.registrant_no
            registrant = Registrant.objects.filter(reg_id=registrant_no)
            if len(registrant) == 1:
                registrant = registrant[0]
            elif len(registrant) == 0:
                registrant = Registrant(reg_id=registrant_no,
                                        reg_name=hist_reln.registrant)
                registrant.save()
            else:
                #there's more tha one registrant with the same ID
                #what should we do?
                print("Multiple registrants with that ID, moving on!")
                continue

            #look for the client
            names = create_search_names(hist_reln.principal)
            for name in names:
                clients = Client.objects.filter(client_name__iexact=name)
                if len(clients) > 0:
                    break

            if len(clients) == 1:
                client = clients[0]
            elif len(clients) > 1:
                #multiple matching clients, help! skipping for now
                print("Multiple clients with that name, moving on")
            else:
                location_text = hist_reln.location_represented

                #create the client
                #but first we have to match the location.

                #clean up known location mismatches
                location_dict = {
                    'SOMALI DEMOCRATIC REPUBLIC': 'SOMALIA',
                    'KOREA REPUBLIC OF': 'SOUTH KOREA',
                    'BOSNIA-HERZEGOVINA': 'BOSNIA AND HERZEGOVINA'
                }
                if location_text in location_dict:
                    location_text = location_dict[location_text]

                location = Location.objects.filter(
                    location__iexact=location_text)
                if len(location) == 1:
                    location = location[0]
                else:
                    print "Unknown location {}".format(location_text)
                    continue
                client = Client(location=location,
                                client_name=name.strip(),
                                address1=hist_reln.address,
                                state=hist_reln.state)
                #only going to save client if it actually needs
                #to be added

            for md in related_fara_metadata:

                new_registrant = False

                #with registrants we can use their id system to dedup
                #so we'll just add them if they aren't in there

                if not registrant in md.registrant_set.all():
                    md.registrant_set.add(registrant)
                    md.save()
                    new_registrant = True
                    registrant.meta_data.add(link)
                #for clients we have to be more careful since there's no id
                #so we'll only add clients if the client_set is empty
                #otherwise we may add duplicates
                #and presumably only human-entered data is in the db and it's good?
                if len(md.client_set.all()) == 0:
                    client.save()  #we're good to save now
                    md.client_set.add(client)
                    md.save()
                    for r in md.registrant_set.all():
                        cr = ClientReg(client_id=client, reg_id=r)
                        cr.save()
                        cr.meta_data.add(link)
                        cr.save()
                elif new_registrant:
                    for c in md.client_set.all():
                        cr = ClientReg(client_id=c, reg_id=registrant)
                        cr.save()
                        cr.meta_data.add(link)
                        cr.save()