Exemplo n.º 1
0
    def normalize(item: Item) -> School:
        name = " ".join([
            item.get("Schulbezeichnung_1", ""),
            item.get("Schulbezeichnung_2", ""),
            item.get("Schulbezeichnung_3", "")
        ]).strip()
        helper = NordRheinWestfalenHelper()
        right, high = item.get('UTMRechtswert'), item.get('UTMHochwert')
        this_projection = Proj(item.get('EPSG'))
        target_projection = Proj('epsg:4326')
        lon, lat = transform(this_projection, target_projection, right, high)

        return School(
            name=name,
            id='NW-{}'.format(item.get('Schulnummer')),
            address=item.get('Strasse'),
            zip=item.get("PLZ"),
            city=item.get('Ort'),
            website=item.get('Homepage'),
            email=item.get('E-Mail'),
            legal_status=helper.resolve('rechtsform', item.get('Rechtsform')),
            school_type=helper.resolve('schulform', item.get('Schulform')),
            provider=helper.resolve('provider', item.get('Traegernummer')),
            fax=f"{item.get('Faxvorwahl')}{item.get('Fax')}",
            phone=f"{item.get('Telefonvorwahl')}{item.get('Telefon')}",
            latitude=lat,
            longitude=lon,
        )
Exemplo n.º 2
0
    def normalize(item: Item) -> School:
        v = list(item.get('phone_numbers').values())
        phone_numbers = v[0] if len(v) > 0 else None

        address_objects = re.split('\d{5}', item.get('Postanschrift').strip())
        if len(address_objects) == 0:
            address = ''
            zip = ''
            city = ''
        elif len(address_objects) == 1:
            address = ''
            zip = ''
            city = address_objects[0].strip()
        else:
            address = re.split('\d{5}', item.get('Postanschrift'))[0].strip()
            zip = re.findall('\d{5}', item.get('Postanschrift'))[0].strip()
            city = re.split('\d{5}', item.get('Postanschrift'))[1].strip()

        return School(name=item.get('title'),
                      id='SN-{}'.format(item.get('Dienststellenschlüssel')),
                      address=address,
                      zip=zip,
                      city=city,
                      website=item.get('Homepage'),
                      email=item.get('E-Mail'),
                      school_type=item.get('Einrichtungsart'),
                      legal_status=item.get('Rechtsstellung'),
                      provider=item.get('Schulträger'),
                      fax=item.get('Telefax'),
                      phone=phone_numbers,
                      director=item.get('Schulleiter')
                      or item.get('Schulleiter/in'))
Exemplo n.º 3
0
 def normalize(item: Item) -> School:
     zip_code, *city_parts = item.get('city').split()
     return School(name=item.get('name'),
                   phone=item.get('phone'),
                   website=item.get('web'),
                   address=item.get('street'),
                   city=' '.join(city_parts),
                   zip=zip_code,
                   id='BY-{}'.format(item.get('number')))
Exemplo n.º 4
0
 def update_or_create(item: SchoolPipelineItem) -> School:
     school = session.query(School).get(item.info['id'])
     if school:
         session.query(School).filter_by(id=item.info['id']).update({
             **item.info, 'raw':
             item.item
         })
     else:
         school = School(**item.info, raw=item.item)
     return school
Exemplo n.º 5
0
 def process_item(self, item, spider):
     school = School.update_or_create(item)
     try:
         session.add(school)
         session.commit()
     except SQLAlchemyError as e:
         logging.warning('Error when putting to DB')
         logging.warning(e)
         session.rollback()
     return school
Exemplo n.º 6
0
 def normalize(item: Item) -> School:
     return School(name=item.get('name'),
                   phone=item.get('telefon'),
                   fax=item.get('fax'),
                   website=item.get('homepage'),
                   address=item.get('straße'),
                   city=item.get('ort'),
                   zip=item.get('plz'),
                   school_type=item.get('schultyp'),
                   id='HE-{}'.format(item.get('id')))
Exemplo n.º 7
0
 def normalize(self, item: Item) -> School:
     return School(name=item.get('name'),
                   id='RP-{}'.format(item.get('id')),
                   address=item.get('Adresse'),
                   city=item.get('Ort'),
                   website=item.get('Internet'),
                   email=item.get('E-Mail'),
                   school_type=item.get('Schulform'),
                   fax=item.get('Fax'),
                   phone=item.get('Telefon'))
Exemplo n.º 8
0
 def normalize(item: Item) -> School:
     return School(name=item.get('name'),
                   id='SH-{}'.format(item.get('Dienststellennummer')),
                   address=item.get('Strasse'),
                   zip=item.get("Postleitzahl"),
                   city=item.get("Ort"),
                   email=item.get('E-Mail'),
                   school_type=item.get('Schularten'),
                   fax=item.get('Fax'),
                   phone=item.get('Telefon'),
                   director=item.get('Schulleitung'))
Exemplo n.º 9
0
 def normalize(item: Item) -> School:
     return School(name=item.get('Name'),
                   id = 'SA-{}'.format(item.get('ID')),
                   address=re.split('\d{5}', item.get('Adresse').strip())[0].strip(),
                   zip=re.findall('\d{5}', item.get('Adresse').strip())[0],
                   city=re.split('\d{5}', item.get('Adresse').strip())[1].strip(),
                  # address=item.get('Adresse'),
                   website=item.get('Homepage'),
                   email=item.get('E-Mail'),
                   fax=item.get('Telefax'),
                   phone=item.get('Telefon'),
                   )
Exemplo n.º 10
0
 def normalize(item: Item) -> School:
     return School(name=item.get('name'),
                   id='RP-{}'.format(item.get('id')),
                   address=item.get('Adresse'),
                   city=re.split('\d{5}',
                                 item.get('Ort').strip())[1].strip(),
                   zip=re.findall('\d{5}', item.get('Ort'))[0],
                   website=item.get('Internet'),
                   email=item.get('E-Mail'),
                   school_type=item.get('Schulform'),
                   fax=item.get('Fax'),
                   phone=item.get('Telefon'))
Exemplo n.º 11
0
 def normalize(item: Item) -> School:
     return School(name=item.get('title'),
                   id='SN-{}'.format(item.get('Dienststellenschlüssel')),
                   address=item.get('Postanschrift'),
                   website=item.get('Homepage'),
                   email=item.get('E-Mail'),
                   school_type=item.get('Einrichtungsart'),
                   legal_status=item.get('Rechtsstellung'),
                   provider=item.get('Schulträger'),
                   fax=item.get('Telefax'),
                   phone=item.get('phone_numbers'),
                   director=item.get('Schulleiter'))
Exemplo n.º 12
0
 def normalize(item: Item) -> School:
     city_parts = item.get('Ort').split()
     zip, city = city_parts[0], ' '.join(city_parts[1:])
     return School(name=item.get('Schule'),
                   phone=item.get('Tel'),
                   fax=None,
                   email=item.get('E-Mail'),
                   website=item.get('Homepage'),
                   address=item.get('Straße'),
                   zip=zip,
                   city=city,
                   school_type=item.get("Schul-gliederung(en)"),
                   id='NI-{}'.format(item.get('Schulnummer')))
Exemplo n.º 13
0
 def normalize(item: Item) -> School:
     tel = item.get('telefon')
     return School(name=item.get('name'),
                   phone=tel,
                   fax=item.get('telefax'),
                   website=item.get('homepage'),
                   email=item.get('e-mail'),
                   address=item.get('straße'),
                   city=item.get('ort'),
                   zip=item.get('plz'),
                   school_type=item.get('schultyp'),
                   director=item.get('schulleitung'),
                   id='SL-{}'.format(tel.replace(" ", "-")))
Exemplo n.º 14
0
 def normalize(item: Item) -> School:
     return School(name=item.get('name'),
                   id='BW-{}'.format(item.get('id')),
                   address=item.get('Strasse'),
                   zip=item.get('PLZ'),
                   city=item.get('Ort'),
                   website=item.get('Internet'),
                   email=item.get('E-Mail'),
                   fax=item.get('Fax'),
                   phone=item.get('Telefon'),
                   provider=item.get('Schulamt'),
                   director=item.get('Schulleitung'),
                   school_type='')    
 def normalize(self, item: Item) -> School:
     city_parts = item.get('Ort').split(' ', 1)
     zip, city = city_parts[0], city_parts[1]
     return School(name=item.get('name'),
                   id='RP-{}'.format(item.get('id')),
                   address=item.get('Adresse'),
                   zip=zip,
                   city=city,
                   website=item.get('Internet'),
                   email=item.get('E-Mail'),
                   school_type=item.get('Schulform'),
                   fax=item.get('Fax'),
                   phone=item.get('Telefon'))
Exemplo n.º 16
0
 def normalize(self, item: Item) -> School:
     return School(name=item.get('name'),
                   id='BE-{}'.format(item.get('id')),
                   address=item.get('address'),
                   zip=item.get('zip'),
                   city=item.get('city'),
                   website=item.get('web'),
                   email=item.get('mail'),
                   school_type=item.get('schooltype'),
                   fax=item.get('fax'),
                   phone=item.get('telephone'),
                   director=item.get('headmaster'),
                   legal_status=item.get('legal_status'))
Exemplo n.º 17
0
 def normalize(item: Item) -> School:
     zip_code, *city_parts = item.get('city').split()
     return School(name=item.get('name'),
                   phone=item.get('phone'),
                   fax=item.get('fax'),
                   website=item.get('web'),
                   address=item.get('street'),
                   city=' '.join(city_parts),
                   zip=zip_code,
                   school_type=item.get('school_type'),
                   legal_status=item.get('type'),
                   id='BY-{}'.format(item.get('number')),
                   latitude=item.get('latitude'),
                   longitude=item.get('longitude'))
Exemplo n.º 18
0
 def normalize(item: Item) -> School:
     city_parts = item.get('Ort').split()
     zip, city = city_parts[0], ' '.join(city_parts[1:])
     return School(name=item.get('Schulname'),
                   id='TH-{}'.format(item.get('Schulnummer')),
                   address=item.get('Straße'),
                   zip=zip,
                   city=city,
                   website=item.get('Internet'),
                   email=ThueringenSpider._deobfuscate_email(item.get('E-Mail')),
                   school_type=item.get('Schulart'),
                   provider=item.get('Schulträger'),
                   fax=item.get('Telefax'),
                   phone=item.get('Telefon'))
Exemplo n.º 19
0
 def normalize(item: Item) -> School:
     return School(name=item.get('title'),
                   id='SN-{}'.format(item.get('Dienststellenschlüssel')),
                   address=re.split('\d{5}', item.get('Postanschrift').strip())[0].strip(),
                   zip=re.findall('\d{5}', item.get('Postanschrift').strip())[0],
                   city=re.split('\d{5}', item.get('Postanschrift').strip())[1].strip(),
                   website=item.get('Homepage'),
                   email=item.get('E-Mail'),
                   school_type=item.get('Einrichtungsart'),
                   legal_status=item.get('Rechtsstellung'),
                   provider=item.get('Schulträger'),
                   fax=item.get('Telefax'),
                   phone=list(item.get('phone_numbers').values())[0],
                   director=item.get('Schulleiter') or item.get('Schulleiter/in'))
Exemplo n.º 20
0
    def test_import_new(self):
        # Arrange
        info = School(name='Test Schule', id='NDS-1')
        item = dict(name='Test Schule', nr=1)
        school_item: SchoolPipelineItem = SchoolPipelineItem(info=info, item=item)
        db_item = DBSchool.update_or_create(school_item)
        session.add(db_item)
        session.commit()

        # Act
        count = session.query(DBSchool).count()

        # Assert
        self.assertEqual(count, 1)
Exemplo n.º 21
0
 def normalize(item):
     zip, city = item['Stadt/Gemeinde'].split(', ')
     phone = item.get('Telefon').split('\n')[0] if item.get(
         'Telefon') else None
     return School(
         id=item['id'],
         name=item.get('name'),
         phone=phone,
         director=item.get('Schulleiter/in'),
         website=item.get('Homepage'),
         fax=item.get('Telefax'),
         email=item.get('E-Mail'),  # email,
         address=item.get('Straße'),
         zip=zip,
         city=city)
Exemplo n.º 22
0
 def normalize(item: Item) -> School:
     *name, street, place = item.get('Adresse')
     zip_code, *city_parts = place.split(" ")
     return School(name=' '.join(name),
                     id='BB-{}'.format(item.get('id')),
                     address=street,
                     zip=zip_code,
                     city=' '.join(city_parts),
                     website=first_or_none(item.get('Internet')),
                     email=first_or_none(item.get('E-Mail')),
                     school_type=first_or_none(item.get('Schulform')),
                     provider=first_or_none(item.get('Schulamt')),
                     fax=first_or_none(item.get('Fax')),
                     phone=first_or_none(item.get('Telefon')),
                     director=first_or_none(item.get('Schulleiter/in')))
Exemplo n.º 23
0
 def normalize(item: Item) -> School:
     city_parts = item.get('adresse_ort').split()
     zip_code, city = city_parts[0], city_parts[1:]
     return School(name=item.get('schulname'),
                   id='HH-{}'.format(item.get('schul_id')),
                   address=item.get('adresse_strasse_hausnr'),
                   address2='',
                   zip=zip_code,
                   city=' '.join(city),
                   website=item.get('schul_homepage'),
                   email=item.get('schul_email'),
                   school_type=item.get('schulform'),
                   fax=item.get('fax'),
                   phone=item.get('schul_telefonnr'),
                   director=item.get('name_schulleiter'))
 def normalize(item: Item) -> School:
     dst = str(item.get('Dst-Nr.:')).replace('.0', '')
     plz = str(item.get('Plz')).replace('.0', '')
     return School(name=item.get('Schulname'),
                   id='MV-{}'.format(dst),
                   address=item.get('Straße, Haus-Nr.'),
                   address2='',
                   zip=plz,
                   city=item.get('Ort'),
                   website=item.get('Homepage'),
                   email=item.get('E-Mail'),
                   school_type=item.get('Schulart/ Org.form'),
                   fax=item.get('Telefax'),
                   phone=item.get('Telefon'),
                   provider=item.get('Schul-behörde'),
                   director=item.get('Schulleitung'))
 def normalize(item: Item) -> School:
     name = " ".join([item.get("Schulbezeichnung_1", ""),
                      item.get("Schulbezeichnung_2", ""),
                      item.get("Schulbezeichnung_3", "")]).strip()
     helper = NordRheinWestfalenHelper()
     return School(name=name,
                   id='NW-{}'.format(item.get('Schulnummer')),
                   address=item.get('Strasse'),
                   zip=item.get("PLZ"),
                   city=item.get('Ort'),
                   website=item.get('Homepage'),
                   email=item.get('E-Mail'),
                   legal_status=helper.resolve('rechtsform', item.get('Rechtsform')),
                   school_type=helper.resolve('schulform', item.get('Schulform')),
                   provider=helper.resolve('provider', item.get('Traegernummer')),
                   fax=f"{item.get('Faxvorwahl')}{item.get('Fax')}",
                   phone=f"{item.get('Telefonvorwahl')}{item.get('Telefon')}")
Exemplo n.º 26
0
    def test_import_existing(self):
        # This test requires the previous one to have run already so that the item
        # exists in the database
        # Arrange
        info = School(name='Test Schule (updated)', id='NDS-1')
        item = dict(name='Test Schule', nr=1)
        school_item: SchoolPipelineItem = SchoolPipelineItem(info=info, item=item)
        db_item = DBSchool.update_or_create(school_item)
        session.add(db_item)
        session.commit()

        # Act
        count = session.query(DBSchool).count()
        db_school = session.query(DBSchool).first()

        # Assert
        self.assertEqual(count, 1)
        self.assertEqual(db_school.name, "Test Schule (updated)")
Exemplo n.º 27
0
 def normalize(item: Item) -> School:
     ansprechpersonen = item['Ansprechperson'].replace(
         'Schulleitung:', '').replace('Vertretung:', ',').split(',')
     item['Schulleitung'] = ansprechpersonen[0]
     item['Vertretung'] = ansprechpersonen[1]
     return School(name=item.get('name'),
                   id='HB-{}'.format(item.get('id')),
                   address=re.split(
                       '\d{5}',
                       item.get('Anschrift:').strip())[0].strip(),
                   zip=re.findall('\d{5}',
                                  item.get('Anschrift:').strip())[0],
                   city=re.split('\d{5}',
                                 item.get('Anschrift:').strip())[1].strip(),
                   website=item.get('Internet'),
                   email=item.get('E-Mail-Adresse').strip(),
                   fax=BremenSpider.fix_number(item.get('Telefax')),
                   phone=BremenSpider.fix_number(item.get('Telefon')))
Exemplo n.º 28
0
 def normalize(item: Item) -> School:
     name = " ".join(
         [item.get('schulname', ''),
          item.get('namenszuatz', '')]).strip()
     address = item.get('sdb_adressen', [{}])[0]
     ort = address.get('sdb_ort', {})
     school_type = NiedersachsenSpider._get(item, 'sdb_art', {}).get('art')
     provider = NiedersachsenSpider._get(item, 'sdb_traeger',
                                         {}).get('name')
     return School(name=name,
                   phone=item.get('telefon'),
                   fax=item.get('fax'),
                   email=item.get('email'),
                   website=item.get('homepage'),
                   address=address.get('strasse'),
                   zip=ort.get('plz'),
                   city=ort.get('ort'),
                   school_type=school_type,
                   provider=provider,
                   legal_status=item.get("sdb_traegerschaft",
                                         {}).get('bezeichnung'),
                   id='NI-{}'.format(item.get('schulnr')))
Exemplo n.º 29
0
 def process_item(self, item, spider):
     if spider.name == 'saarland':
         address = u"{} {}".format(item.get('street', ""),
                                   item.get('zip', ""))
         if item.get('email'):
             email = item['email'].replace('mailto:',
                                           '').replace('%40', '@')
         else:
             email = None
         school = School(name=item.get('name'),
                         phone=item.get('telephone'),
                         director=item.get('telephone'),
                         website=item.get('website'),
                         fax=item.get('fax'),
                         email=email,
                         address=address)
     elif spider.name == 'niedersachsen':
         address = u"{} {}".format(item.get('Straße', ""),
                                   item.get('Ort', ""))
         school = School(name=item.get('Schule'),
                         phone=item.get('Tel'),
                         email=item.get('E-Mail'),
                         website=item.get('Homepage'),
                         address=address,
                         id='NDS-{}'.format(item.get('Schulnummer')))
     elif spider.name == 'bayern':
         school = School(name=item.get('Name'),
                         phone=item.get('Telefon'),
                         website=item.get('website'),
                         address=item.get('Anschrift'),
                         id='BAY-{}'.format(item.get('Schulnummer')))
     elif spider.name == 'thueringen':
         school = School(name=item.get('Schulname'),
                         id='TH-{}'.format(item.get('Schulnummer')),
                         address=u"{} {}".format(item.get('Straße'),
                                                 item.get('Ort')),
                         website=item.get('Internet'),
                         email=item.get('E-Mail'),
                         school_type=item.get('Schulart'),
                         provider=item.get('Schulträger'),
                         fax=item.get('Telefax'),
                         phone=item.get('Telefon'))
     elif spider.name == 'schleswig-holstein':
         school = School(name=item.get('Name'),
                         id='SH-{}'.format(item.get('Dienststellen Nr.')),
                         address=u"{} {} {}".format(item.get('Straße'),
                                                    item.get("PLZ"),
                                                    item.get("Ort")),
                         email=item.get('EMail'),
                         school_type=item.get('Organisationsform'),
                         legal_status=item.get('Rechtsstatus'),
                         provider=item.get('Träger'),
                         fax=item.get('Fax'),
                         phone=item.get('Telefon'),
                         director=item.get('Schulleiter(-in)'))
     elif spider.name == 'bremen':
         ansprechpersonen = item['Ansprechperson'].replace(
             'Schulleitung:', '').replace('Vertretung:', ',').split(',')
         item['Schulleitung'] = ansprechpersonen[0]
         item['Vertretung'] = ansprechpersonen[1]
         school = School(name=item.get('name'),
                         address=item.get('Anschrift:'),
                         website=item.get('Internet'),
                         email=item.get('E-Mail-Adresse'),
                         fax=item.get('Telefax'),
                         phone=item.get('Telefon'))
     elif spider.name == 'sachsen':
         school = School(name=item.get('title'),
                         id='SN-{}'.format(
                             item.get('Dienststellenschlüssel')),
                         address=item.get('Postanschrift'),
                         website=item.get('Homepage'),
                         email=item.get('E-Mail'),
                         school_type=item.get('Einrichtungsart'),
                         legal_status=item.get('Rechtsstellung'),
                         provider=item.get('Schulträger'),
                         fax=item.get('Telefax'),
                         phone=item.get('phone_numbers'),
                         director=item.get('Schulleiter'))
     elif spider.name == 'sachsen-anhalt':
         school = School(
             name=item.get('Name'),
             address=item.get('Addresse'),
             website=item.get('Homepage'),
             email=item.get('E-Mail'),
             fax=item.get('Fax'),
             phone=item.get('Telefon'),
         )
     elif spider.name == 'brandenburg':
         school = School(name=item.get('name'),
                         id=item.get('nummer'),
                         address=item.get('Adresse'),
                         website=item.get('Internet'),
                         email=item.get('E-Mail'),
                         school_type=item.get('Schulform'),
                         provider=item.get('Schulamt'),
                         fax=item.get('Fax'),
                         phone=item.get('Telefon'),
                         director=item.get('Schulleiter/in'))
     else:
         return item
         raise DropItem("Missing name in %s" % item)
     return {'info': school, 'item': item}