Пример #1
0
class Record(dsl.Document):
    """Base record type
    """
    fulltext = dsl.Text()  # see Record.assemble_fulltext()

    #class Index:
    #    name = ???
    # We could define Index here but we don't because we want to be consistent
    # with ddr-local and ddr-public.

    @staticmethod
    def from_dict(class_, fieldnames, record_id, data):
        """
        @param class_: Person, FarRecord, WraRecord
        @param fieldnames: list
        @param record_id: str
        @param data: dict
        @returns: Record
        """
        #print(f'  from_dict({class_}, {fieldnames}, {record_id}, data)')
        # Elasticsearch 7 chokes on empty ('') dates so remove from rowd
        empty_dates = [
            fieldname for fieldname, val in data.items()
            if ('date' in fieldname) and (val == '')
        ]
        for fieldname in empty_dates:
            data.pop(fieldname)
        # set values
        record = class_(meta={'id': record_id})
        record.errors = []
        for field in fieldnames:
            #print(f'    field {field}')
            if data.get(field):
                try:
                    #print(f'      data[field] {data[field]}')
                    setattr(record, field, data[field])
                    #print('       ok')
                except dsl.exceptions.ValidationException:
                    err = ':'.join([field, data[field]])
                    record.errors.append(err)
                    #print(f'       err {err}')
        return record

    @staticmethod
    def from_hit(class_, hit):
        """Build Record object from Elasticsearch hit
        @param class_: Person, FarRecord, WraRecord
        @param hit
        @returns: Record
        """
        hit_d = hit.__dict__['_d_']
        if record_id:
            record = class_(meta={'id': record_id})
            for field in definitions.FIELDS_MASTER:
                setattr(record, field, _hitvalue(hit_d, field))
            record.assemble_fulltext()
            return record
        record.assemble_fulltext(fieldnames)
        return None

    @staticmethod
    def field_values(class_, field, es=None, index=None):
        """Returns unique values and counts for specified field.
        """
        if es and index:
            s = dsl.Search(using=es, index=index)
        else:
            s = dsl.Search()
        s = s.doc_type(class_)
        s.aggs.bucket('bucket', 'terms', field=field, size=1000)
        response = s.execute()
        return [(x['key'], x['doc_count'])
                for x in response.aggregations['bucket']['buckets']]

    @staticmethod
    def fields_enriched(record,
                        label=False,
                        description=False,
                        list_fields=[]):
        """Returns dict for each field with value and label etc for display
        
        # list fields and values in order
        >>> for field in record.details.values:
        >>>     print(field.label, field.value)
        
        # access individual values
        >>> record.details.m_dataset.label
        >>> record.details.m_dataset.value
        
        @param record: dict (not an elasticsearch_dsl..Hit)
        @param label: boolean Get pretty label for fields.
        @param description: boolean Get pretty description for fields. boolean
        @param list_fields: list If non-blank get pretty values for these fields.
        @returns: dict
        """
        details = []
        model = record.__class__.Index.model
        fieldnames = FIELDS_BY_MODEL[model]
        for n, fieldname in enumerate(fieldnames):
            try:
                value = getattr(record, fieldname)
            except AttributeError:
                continue
            field_def = definitions.FIELD_DEFINITIONS[model].get(fieldname, {})
            display = field_def.get('display', None)
            if value and display:
                # display datetimes as dates
                if isinstance(value, datetime):
                    value = value.date()
                data = {
                    'field': fieldname,
                    'label': fieldname,
                    'description': '',
                    'value_raw': value,
                    'value': value,
                }
                if (not list_fields) or (fieldname in list_fields):
                    # get pretty value from FIELD_DEFINITIONS
                    choices = field_def.get('choices', {})
                    if choices and choices.get(value, None):
                        data['value'] = choices[value]
                if label:
                    data['label'] = field_def.get('label', fieldname)
                if description:
                    data['description'] = field_def.get('description', '')
                item = (fieldname, data)
                details.append(item)
        return OrderedDict(details)
Пример #2
0
class WraRecord(Record):
    """WraRecord model
    TODO review field types for aggs,filtering
    """
    wra_record_id = dsl.Keyword()
    facility = dsl.Keyword()
    lastname = dsl.Text()
    firstname = dsl.Text()
    middleinitial = dsl.Text()
    birthyear = dsl.Keyword()
    gender = dsl.Keyword()
    originalstate = dsl.Keyword()
    familyno = dsl.Keyword()
    individualno = dsl.Keyword()
    notes = dsl.Text()
    assemblycenter = dsl.Keyword()
    originaladdress = dsl.Keyword()
    birthcountry = dsl.Keyword()
    fatheroccupus = dsl.Keyword()
    fatheroccupabr = dsl.Keyword()
    yearsschooljapan = dsl.Keyword()
    gradejapan = dsl.Keyword()
    schooldegree = dsl.Keyword()
    yearofusarrival = dsl.Keyword()
    timeinjapan = dsl.Keyword()
    ageinjapan = dsl.Keyword()
    militaryservice = dsl.Keyword()
    maritalstatus = dsl.Keyword()
    ethnicity = dsl.Keyword()
    birthplace = dsl.Keyword()
    citizenshipstatus = dsl.Keyword()
    highestgrade = dsl.Keyword()
    language = dsl.Keyword()
    religion = dsl.Keyword()
    occupqual1 = dsl.Keyword()
    occupqual2 = dsl.Keyword()
    occupqual3 = dsl.Keyword()
    occupotn1 = dsl.Keyword()
    occupotn2 = dsl.Keyword()
    wra_filenumber = dsl.Keyword()
    person = dsl.Nested(NestedPerson)
    family = dsl.Nested(ListFamily)
    timestamp = dsl.Date()

    class Index:
        model = 'wrarecord'
        name = f'{INDEX_PREFIX}wrarecord'

    def __repr__(self):
        return f'<WraRecord {self.wra_record_id}>'

    @staticmethod
    def get(oid, request):
        """Get record for web app"""
        return docstore_object(request, 'wrarecord', oid)

    @staticmethod
    def from_dict(wra_record_id, data):
        """
        @param wra_record_id: str
        @param data: dict
        @returns: WraRecord
        """
        # exclude private fields
        fieldnames = [
            f for f in FIELDS_WRARECORD if f not in EXCLUDE_FIELDS_WRARECORD
        ]
        record = Record.from_dict(WraRecord, fieldnames, wra_record_id, data)
        assemble_fulltext(record, fieldnames)
        record.family = []
        if data.get('family'):
            record.family = [{
                'wra_record_id': person['wra_record_id'],
                'lastname': person['lastname'],
                'firstname': person['firstname'],
            } for person in data['family']]
        return record

    @staticmethod
    def from_hit(hit):
        """Build WraRecord object from Elasticsearch hit
        @param hit
        @returns: WraRecord
        """
        return Record.from_hit(WraRecord, hit)

    @staticmethod
    def field_values(field, es=None, index=None):
        """Returns unique values and counts for specified field.
        """
        return Record.field_values(WraRecord, field, es, index)
Пример #3
0
class ListFamily(dsl.InnerDoc):
    family_number = dsl.Keyword()
    far_record_id = dsl.Keyword()
    last_name = dsl.Text()
    first_name = dsl.Text()
Пример #4
0
class FarRecord(Record):
    """FarRecord model
    TODO review field types for aggs,filtering
    """
    far_record_id = dsl.Keyword()
    facility = dsl.Keyword()
    original_order = dsl.Keyword()
    family_number = dsl.Keyword()
    far_line_id = dsl.Keyword()
    last_name = dsl.Text()
    first_name = dsl.Text()
    other_names = dsl.Text()
    date_of_birth = dsl.Keyword()
    year_of_birth = dsl.Keyword()
    sex = dsl.Keyword()
    marital_status = dsl.Keyword()
    citizenship = dsl.Keyword()
    alien_registration = dsl.Keyword()
    entry_type_code = dsl.Keyword()
    entry_type = dsl.Keyword()
    entry_category = dsl.Keyword()
    entry_facility = dsl.Keyword()
    pre_evacuation_address = dsl.Keyword()
    pre_evacuation_state = dsl.Keyword()
    date_of_original_entry = dsl.Keyword()
    departure_type_code = dsl.Keyword()
    departure_type = dsl.Keyword()
    departure_category = dsl.Keyword()
    departure_facility = dsl.Keyword()
    departure_date = dsl.Keyword()
    departure_state = dsl.Keyword()
    camp_address_original = dsl.Keyword()
    camp_address_block = dsl.Keyword()
    camp_address_barracks = dsl.Keyword()
    camp_address_room = dsl.Keyword()
    reference = dsl.Keyword()
    original_notes = dsl.Text()
    person = dsl.Nested(NestedPerson)
    family = dsl.Nested(ListFamily)
    timestamp = dsl.Date()

    class Index:
        model = 'farrecord'
        name = f'{INDEX_PREFIX}farrecord'

    def __repr__(self):
        return f'<FarRecord {self.far_record_id}>'

    @staticmethod
    def get(oid, request):
        """Get record for web app"""
        return docstore_object(request, 'farrecord', oid)

    @staticmethod
    def from_dict(far_record_id, data):
        """
        @param far_record_id: str
        @param data: dict
        @returns: FarRecord
        """
        # exclude private fields
        fieldnames = [
            f for f in FIELDS_FARRECORD if f not in EXCLUDE_FIELDS_FARRECORD
        ]
        record = Record.from_dict(FarRecord, fieldnames, far_record_id, data)
        assemble_fulltext(record, fieldnames)
        record.family = []
        if data.get('family'):
            record.family = [{
                'far_record_id': person['far_record_id'],
                'last_name': person['last_name'],
                'first_name': person['first_name'],
            } for person in data['family']]
        return record

    @staticmethod
    def from_hit(hit):
        """Build FarRecord object from Elasticsearch hit
        @param hit
        @returns: FarRecord
        """
        return Record.from_hit(FarRecord, hit)

    @staticmethod
    def field_values(field, es=None, index=None):
        """Returns unique values and counts for specified field.
        """
        return Record.field_values(FarRecord, field, es, index)
Пример #5
0
class NestedPerson(dsl.InnerDoc):
    nr_id = dsl.Keyword()
    preferred_name = dsl.Text()
Пример #6
0
class Person(Record):
    """Person record model
    TODO review field types for aggs,filtering
    """
    nr_id = dsl.Keyword()
    family_name = dsl.Text()
    given_name = dsl.Text()
    given_name_alt = dsl.Text()
    other_names = dsl.Text()
    middle_name = dsl.Text()
    prefix_name = dsl.Text()
    suffix_name = dsl.Text()
    jp_name = dsl.Text()
    preferred_name = dsl.Text()
    birth_date = dsl.Date()
    birth_date_text = dsl.Text()
    birth_place = dsl.Text()
    death_date = dsl.Date()
    death_date_text = dsl.Text()
    wra_family_no = dsl.Text()
    wra_individual_no = dsl.Text()
    citizenship = dsl.Keyword()
    alien_registration_no = dsl.Text()
    gender = dsl.Keyword()
    preexclusion_residence_city = dsl.Keyword()
    preexclusion_residence_state = dsl.Keyword()
    postexclusion_residence_city = dsl.Keyword()
    postexclusion_residence_state = dsl.Keyword()
    exclusion_order_title = dsl.Keyword()
    exclusion_order_id = dsl.Keyword()
    timestamp = dsl.Date()
    facilities = dsl.Nested(ListFacility)
    far_records = dsl.Nested(ListFarRecord)
    wra_records = dsl.Nested(ListWraRecord)
    family = dsl.Nested(ListFamily)

    class Index:
        model = 'person'
        name = f'{INDEX_PREFIX}person'

    def __repr__(self):
        return f'<Person {self.nr_id}>'

    @staticmethod
    def get(oid, request):
        """Get record for web app"""
        return docstore_object(request, 'person', oid)

    @staticmethod
    def from_dict(nr_id, data):
        """
        @param nr_id: str
        @param data: dict
        @returns: Person
        """
        # exclude private fields
        fieldnames = [
            f for f in FIELDS_PERSON if f not in EXCLUDE_FIELDS_PERSON
        ]
        record = Record.from_dict(Person, fieldnames, nr_id, data)
        assemble_fulltext(record, fieldnames)
        record.family = []
        if data.get('family'):
            record.family = [{
                'nr_id': person['nr_id'],
                'preferred_name': person['preferred_name'],
            } for person in data['family']]
        return record

    @staticmethod
    def from_hit(hit):
        """Build Person object from Elasticsearch hit
        @param hit
        @returns: Person
        """
        return Record.from_hit(Person, hit)

    @staticmethod
    def from_id(nr_id):
        return Person.get(nr_id)

    @staticmethod
    def field_values(field, es=None, index=None):
        """Returns unique values and counts for specified field.
        """
        return Record.field_values(Person, field, es, index)
Пример #7
0
class ListFamily(dsl.InnerDoc):
    wra_family_no = dsl.Keyword()
    nr_id = dsl.Keyword()
    preferred_name = dsl.Text()
Пример #8
0
class ListWraRecord(dsl.InnerDoc):
    wra_record_id = dsl.Keyword()
    lastname = dsl.Text()
    firstname = dsl.Text()
    middleinitial = dsl.Text()
Пример #9
0
class ListFarRecord(dsl.InnerDoc):
    far_record_id = dsl.Keyword()
    last_name = dsl.Text()
    first_name = dsl.Text()