Exemplo n.º 1
0
def remove_offices():
    """
    Remove offices (overload the data provided by the importer).
    """
    # When returning multiple rows, the SQLAlchemy Query class can only give them out as tuples.
    # We need to unpack them explicitly.
    offices_to_remove = [
        siret for (siret, ) in db_session.query(OfficeAdminRemove.siret).all()
    ]

    for siret in offices_to_remove:
        # Apply changes in ElasticSearch.
        try:
            es.Elasticsearch().delete(index=settings.ES_INDEX,
                                      doc_type=es.OFFICE_TYPE,
                                      id=siret)
        except TransportError as e:
            if e.status_code != 404:
                raise
        # Apply changes in DB.
        office = Office.query.filter_by(siret=siret).first()
        if office:
            try:
                office.delete()
            except OperationalError:  # retry once in case of deadlock error
                time.sleep(10)
                office.delete()
            # Delete the current PDF.
            pdf_util.delete_file(office)
    def setUp(self):
        super().setUp()

        # Create an office.
        self.office = Office(
            departement='75',
            siret='78548035101646',
            company_name='NICOLAS',
            headcount='03',
            city_code='75110',
            zipcode='75010',
            naf='4646Z',
            tel='0100000000',
            score=80,
            x=2.3488,
            y=48.8534,
        )
        self.office.save()

        # Remove pdf file if it already exists
        pdf.delete_file(self.office)
Exemplo n.º 3
0
def update_offices():
    """
    Update offices (overload the data provided by the importer).
    """
    # Good engineering eliminates users being able to do the wrong thing as much as possible.
    # But since it is possible to store multiple SIRETs, there is no longer any constraint of uniqueness
    # on a SIRET. As a result, it shouldn't but there may be `n` entries in `OfficeAdminUpdate`
    # for the same SIRET. We order the query by creation date ASC so that the most recent changes take
    # priority over any older ones.
    for office_to_update in db_session.query(OfficeAdminUpdate).order_by(
            asc(OfficeAdminUpdate.date_created)).all():

        for siret in OfficeAdminUpdate.as_list(office_to_update.sirets):

            office = Office.query.filter_by(siret=siret).first()

            if office:
                # Apply changes in DB.
                office.company_name = office_to_update.new_company_name or office.company_name
                office.office_name = office_to_update.new_office_name or office.office_name
                office.email = '' if office_to_update.remove_email else (
                    office_to_update.new_email or office.email)
                office.tel = '' if office_to_update.remove_phone else (
                    office_to_update.new_phone or office.tel)
                office.website = '' if office_to_update.remove_website else (
                    office_to_update.new_website or office.website)

                office.email_alternance = office_to_update.email_alternance
                office.phone_alternance = office_to_update.phone_alternance
                office.website_alternance = office_to_update.website_alternance

                # Note : we need to handle when score and score_alternance = 0
                office.score = office_to_update.score if office_to_update.score is not None else office.score
                office.score_alternance = office_to_update.score_alternance if office_to_update.score_alternance is not None else office.score_alternance
                office.social_network = office_to_update.social_network
                office.contact_mode = office_to_update.contact_mode
                office.save()

                # Apply changes in ElasticSearch.
                body = {
                    'doc': {
                        'email': office.email,
                        'phone': office.tel,
                        'website': office.website,
                        'flag_alternance': 1 if office.flag_alternance else 0
                    }
                }

                scores_by_rome, scores_alternance_by_rome, boosted_romes, boosted_alternance_romes = get_scores_by_rome_and_boosted_romes(
                    office, office_to_update)
                if scores_by_rome:
                    body['doc']['scores_by_rome'] = scores_by_rome
                    body['doc']['boosted_romes'] = boosted_romes
                if scores_alternance_by_rome:
                    body['doc'][
                        'scores_alternance_by_rome'] = scores_alternance_by_rome
                    body['doc'][
                        'boosted_alternance_romes'] = boosted_alternance_romes

                # The update API makes partial updates: existing `scalar` fields are overwritten,
                # but `objects` fields are merged together.
                # https://www.elastic.co/guide/en/elasticsearch/guide/1.x/partial-updates.html
                # However `scores_by_rome` and `boosted_romes` need to be overwritten because they
                # may change over time.
                # To do this, we perform 2 requests: the first one resets `scores_by_rome` and
                # `boosted_romes` and the second one populates them.
                delete_body = {'doc': {}}
                delete_body = {
                    'doc': {
                        'scores_by_rome': None,
                        'boosted_romes': None,
                        'scores_alternance_by_rome': None,
                        'boosted_alternance_romes': None
                    }
                }

                # Unfortunately these cannot easily be bulked :-(
                # The reason is there is no way to tell bulk to ignore missing documents (404)
                # for a partial update. Tried it and failed it on Oct 2017 @vermeer.
                es.Elasticsearch().update(index=settings.ES_INDEX,
                                          doc_type=es.OFFICE_TYPE,
                                          id=siret,
                                          body=delete_body,
                                          params={'ignore': 404})
                es.Elasticsearch().update(index=settings.ES_INDEX,
                                          doc_type=es.OFFICE_TYPE,
                                          id=siret,
                                          body=body,
                                          params={'ignore': 404})

                # Delete the current PDF thus it will be regenerated at the next download attempt.
                pdf_util.delete_file(office)