Пример #1
0
def update_address():
    def get_action(hit):
        address = hit['_source']['gs_data']['full_address']
        return batch.get_update_action(hit, {
            'address': address,
            'address_updated': True
        })

    batch.run(
        {
            '_source': ['gs_data.full_address'],
            'query': {
                'type': 'ngo',
                'bool': {
                    'must': [{
                        'exists': {
                            'field': 'gs_data.full_address'
                        }
                    }],
                    'must_not': [{
                        'exists': {
                            'field': 'address_updated'
                        }
                    }]
                }
            }
        }, get_action)
Пример #2
0
def update_city():
    with open('../data/location.jsonl', 'r') as f:
        locations = (json.loads(line) for line in f)
        loc_dict = {x['_id']: x['geocoding_data'] for x in locations}

    def get_action(hit):
        src = hit['_source']
        geos = loc_dict.get(hit['_id'], None)
        if geos and len(geos) > 0 and len(src.get('state') or '') > 2:
            geo = geos[0]
            doc = {
                'state': get_component(geo, 'administrative_area_level_1'),
            }
        else:
            return []

        return [batch.get_update_action(hit, doc)]

    batch.run(
        {
            '_source': ['state'],
            'query': {
                'bool': {
                    'must': [{
                        'type': {
                            'value': 'ngo'
                        }
                    }, {
                        'exists': {
                            'field': 'location'
                        }
                    }]
                }
            }
        }, get_action)
Пример #3
0
def copy_fb_location():
    def get_action(hit):
        location = deep_get(hit, '_source.fb_data.location')
        return batch.get_update_action(hit, {'location': location})

    batch.run(
        {
            '_source': ['fb_data.location'],
            'query': {
                'bool': {
                    'must': [
                        {
                            'type': {
                                'value': 'ngo'
                            }
                        },
                        {
                            'exists': {
                                'field': 'fb_data.location'
                            }
                        },
                    ],
                    'must_not': [{
                        'exists': {
                            'field': 'location'
                        }
                    }]
                }
            }
        }, get_action)
Пример #4
0
def update_source():
    def get_action(hit):
        if 'source_link' not in hit['_source']:
            print hit

        source_link = hit['_source']['source_link']
        source = None

        if 'guidestar' in source_link:
            source = 'guidestar'
        elif 'unodc' in source_link:
            source = 'unodc'

        return batch.get_update_action(hit, {'source': source})

    batch.run(
        {
            '_source': ['source_link'],
            'query': {
                'bool': {
                    'must': [{
                        'type': {
                            'value': 'ngo'
                        }
                    }],
                    'must_not': [{
                        'exists': {
                            'field': 'source'
                        }
                    }]
                }
            }
        }, get_action)
Пример #5
0
def copy_data():
    def get_action(hit):
        return {
            '_index': 'ngos',
            '_type': 'ngo',
            '_id': hit['_id'],
            '_source': hit['_source']
        }

    batch.run({'query': {'match_all': {}}}, get_action)
Пример #6
0
def update_guidestar():
    def get_action(hit):
        org_id = hit['_source']['source_data']['organization_id']
        gs_data = None
        retry_count = 0

        while not gs_data and retry_count <= 10:
            try:
                gs_data = guidestar.get(org_id)
            except Exception as ex:
                print org_id, 'Error:', ex

            retry_count += 1
            if not gs_data:
                print org_id, 'Not Found'
                time.sleep(5)

        print org_id, 'Found' if gs_data else 'Really Not Found'
        return batch.get_update_action(hit['_id'], {'gs_data': gs_data})

    batch.run(
        {
            '_source': ['source_data.organization_id'],
            'query': {
                'bool': {
                    'must': [{
                        'exists': {
                            'field': 'source_data.organization_id'
                        }
                    }, {
                        'match': {
                            "gs_data.not_found": {
                                "query": True
                            }
                        }
                    }]
                },
            }
        }, get_action)
Пример #7
0
def clean_address():
    def get_action(hit):
        address = hit['_source']['address']
        m = re.search(r'.*[\r\n\t\s]+([\w\W]+?)(tel|EIN):', address, re.M)
        address = clean_text(m.group(1))
        return batch.get_update_action(hit, {'address': address})

    batch.run(
        {
            '_source': ['address'],
            'query': {
                'bool': {
                    'must': [{
                        'type': {
                            'value': 'ngo'
                        }
                    }, {
                        'term': {
                            'source': 'charity_nav'
                        }
                    }]
                }
            }
        }, get_action)
Пример #8
0
def update_coords():
    def get_action(hit):
        address = clean_text(hit['_source']['address'])

        try:
            results = geocoding.get(address)
        except Exception as ex:
            print address, 'Error:', ex
            return batch.get_update_action(
                hit, {
                    'address': address,
                    'geocoding_data': None,
                    'location_error': 'NotFound'
                })

        n = len(results)
        doc = {}

        if n > 0:
            geo = results[0]
            doc = {
                'formatted_address': geo['formatted_address'],
                'location': {
                    'lat': geo['geometry']['location']['lat'],
                    'lon': geo['geometry']['location']['lng']
                }
            }

        if n != 1:
            doc['location_error'] = 'NotFound' if n == 0 else 'Ambiguous'

        doc['geocoding_data'] = results
        doc['address'] = address

        return batch.get_update_action(hit, doc)

    batch.run(
        {
            'query': {
                'bool': {
                    'must': [{
                        'type': {
                            'value': 'ngo'
                        }
                    }, {
                        'wildcard': {
                            'address': '*'
                        }
                    }],
                    'must_not': [{
                        'exists': {
                            'field': 'location'
                        }
                    }, {
                        'exists': {
                            'field': 'location_error'
                        }
                    }]
                }
            }
        }, get_action)