def setUp(self): """ Creates one *address_book* with 2 groups and some people in them """ self.address_book = AddressBook() # Groups self.jacksons = Group('Jacksons 5') self.singers = Group('Singers') # Michael jackson will be a member of both groups self.mj = Person('Michael', 'Jackson') # Members group #1 self.jacksons.addPerson(self.mj) self.jacksons.addPerson(Person('Tito', 'Jackson')) self.jacksons.addPerson(Person('Another', 'Jackson')) # Members group #2 self.singers.addPerson(Person('Lionel', 'Ritchie')) self.singers.addPerson(self.mj) # Add groups to the Address Book self.address_book.addGroup(self.jacksons) self.address_book.addGroup(self.singers) TestAddressBook.__addMockEmails(self.address_book)
def get_person(id_p): mysql_util = open_connect_mysql() sql = "select * from Persons where id_p= " + "'" + id_p + "'" person = mysql_util.mysql_query(sql) N_Person = Person(person[0][0], person[0][1], person[0][2], person[0][3], person[0][4], person[0][6]) close_connect_mysql(mysql_util) return N_Person.to_string()
def find_person(self): """ Load committee details for the given detail page URL or numeric ID """ # Read either person_id or committee_url from the opposite user_overview_url = self.urls['PERSON_OVERVIEW_PRINT_PATTERN'] % self.config['scraper']['base_url'] logging.info("Getting user overview from %s", user_overview_url) time.sleep(self.config['scraper']['wait_time']) response = self.get_url(user_overview_url) if not response: return # seek(0) is necessary to reset response pointer. response.seek(0) html = response.read() html = html.replace(' ', ' ') parser = etree.HTMLParser() dom = etree.parse(StringIO(html), parser) trs = dom.xpath(self.xpath['PERSONLIST_LINES']) for tr in trs: current_person = None link = tr.xpath('.//a') if len(link): parsed = parse.search(self.urls['PERSON_DETAIL_PARSE_PATTERN'], link[0].get('href')) if not parsed: parsed = parse.search(self.urls['PERSON_DETAIL_PARSE_PATTERN_ALT'], link[0].get('href')) if parsed: person_id = parsed['person_id'] current_person = Person(originalId=person_id) if current_person: tds = tr.xpath('.//td') if len(tds): if len(tds[0]): person_name = tds[0][0].text.strip() if person_name: current_person.name = person_name if len(tds) > 1: person_party = tds[1].text.strip() if person_party: for party_alias in self.config['scraper']['party_alias']: if party_alias[0] == person_party: person_party = party_alias[1] break new_organization = Organization(originalId=person_party, name=person_party, classification='party') new_membership = Membership(originalId=unicode(person_id) + '-' + person_party, organization=new_organization) current_person.membership = [new_membership] if current_person: if hasattr(self, 'person_queue'): self.person_queue.add(current_person.originalId) self.db.save_person(current_person) return
def parse_json(self, js): """ 解析上传数据包,并保存新用户 截取倒数20个 :param js: JSON数据包 :return: """ upload_count = len(js) slice = js[-20:] count = len(slice) mysql = MysqlAdapter() mysql.open() find_count = 0 for i in range(count): dic = slice[i] p = Person() p.uid = dic['uid'] p.nickname = dic['nickname'] p.gender = dic['gender'] p.follower_count = -1 p.following_count = -1 p.used = 0 result = p.check_save(mysql) if result == 1: find_count += 1 print('分析%d个用户,找到%d个新用户,上传%d个用户' % (count, find_count, upload_count)) mysql.close()
def test_add_people_to_address_book(self): """ Feature #1: Add a person to the address book. """ address_book = AddressBook() jerry = Person('Jerry', 'Garcia') janis = Person('Janis', 'Joplin') address_book.addPerson(jerry) address_book.addPerson(janis) self.assertEqual(2, len(address_book.people))
def editModel (self, *ignore): if self.value is None: value= Person () value.name= self.name.value value.surname= self.surname.value # we do it this way because doing `self.value= Person()´ # would empty the Entry's values (due to refresh()) self.value= value else: self.value.name= self.name.value self.value.surname= self.surname.value self.value.isDirty= True
def editModel(self, *ignore): if self.value is None: value = Person() value.name = self.name.value value.surname = self.surname.value # we do it this way because doing `self.value= Person()´ # would empty the Entry's values (due to refresh()) self.value = value else: self.value.name = self.name.value self.value.surname = self.surname.value self.value.isDirty = True
def get_person_list(): if request.method == 'POST': data = request.data pageInfo = json.loads(data) # persons = personDao.get_person_list(pageInfo['start'], pageInfo['pagesize']) persons = personDao.get_person_list_like(pageInfo['searchName'], pageInfo['start'], pageInfo['pagesize']) person_model = Person() persons_json_array = person_model.get_persons_json_array(persons) response = get_response(persons_json_array) return response
class TestPerson(unittest.TestCase): def setUp(self): self.person = Person('David', 'Gilmour') def test_person_str(self): self.assertEqual('Gilmour, David', str(self.person)) def test_add_street_address(self): self.person.addStreetAddress('15 Charlotte Street') self.assertEqual(1, len(self.person.street_addresses)) def test_add_empty_street_address(self): self.assertRaises(ValueError, self.person.addStreetAddress, '') def test_add_email_address(self): self.person.addEmailAddress('*****@*****.**') self.assertEqual(1, len(self.person.email_addresses)) def test_add_empty_email_address(self): self.assertRaises(ValueError, self.person.addEmailAddress, '') def test_add_phone_number(self): self.person.addPhoneNumber('+44 7770 231 213') self.assertEqual(1, len(self.person.phone_numbers)) def test_add_empty_phone_number(self): self.assertRaises(ValueError, self.person.addPhoneNumber, '') def test_add_person_to_group(self): groupPf = Group('Pink Floyd') groupSt = Group('Spinal Tap') self.person.addToGroup(groupPf) self.person.addToGroup(groupSt) self.assertEqual(2, len(self.person.groups)) # Add person to a duplicated group, and verify that the # group count doesn't change groupPfDup = Group('Pink Floyd') self.person.addToGroup(groupPf) self.assertEqual(2, len(self.person.groups)) # Verify that the Groups' members count got incremented self.assertEqual(1, len(groupPf.members)) self.assertEqual(1, len(groupSt.members)) # The duplicated Group's members did not get incremented self.assertEqual(0, len(groupPfDup.members))
def find_person(self): """ Load committee details for the given detail page URL or numeric ID """ # Read either person_id or committee_url from the opposite user_overview_url = self.urls['PERSON_OVERVIEW_PRINT_PATTERN'] logging.info("Getting user overview from %s", user_overview_url) time.sleep(self.config.WAIT_TIME) response = self.get_url(user_overview_url) if not response: return # seek(0) is necessary to reset response pointer. response.seek(0) html = response.read() html = html.replace(' ', ' ') parser = etree.HTMLParser() dom = etree.parse(StringIO(html), parser) trs = dom.xpath(self.xpath['PERSONLIST_LINES']) for tr in trs: current_person = None link = tr.xpath('.//a') if len(link): parsed = parse.search(self.urls['PERSON_DETAIL_PARSE_PATTERN'], link[0].get('href')) if not parsed: parsed = parse.search(self.urls['PERSON_DETAIL_PARSE_PATTERN_ALT'], link[0].get('href')) if parsed: person_id = parsed['person_id'] current_person = Person(numeric_id=person_id) if current_person: tds = tr.xpath('.//td') if len(tds): if len(tds[0]): person_name = tds[0][0].text.strip() if person_name: current_person.title = person_name if len(tds) > 1: person_party = tds[1].text.strip() if person_party: if person_party in self.config.PARTY_ALIAS: person_party = self.config.PARTY_ALIAS[person_party] current_person.committee = [{'committee': Committee(identifier=person_party, title=person_party, type='party')}] if current_person: if hasattr(self, 'person_queue'): self.person_queue.add(current_person.numeric_id) self.db.save_person(current_person) return
def test_add_emp_person(app): app.open_home_page() app.session.login(username="******", password="******") app.person.open_add_page() app.person.create_person( Person(firstname="", middlename="", nickname="", birthYear="")) app.session.logout()
def add_emails_main(): parser = argparse.ArgumentParser() parser.add_argument("--force", action='store_true') parser.add_argument("file") args = parser.parse_args() force = args.force with open(args.file) as csvfile: for row in csv.DictReader(csvfile): if 'Name' not in row or 'Email' not in row: continue name = row['Name'] email = row['Email'] whoever = Person.find(name) if whoever is None: print("Could not find", name) continue old_email = whoever.get_email() old_not_valid = old_email is None or old_email.startswith( "member_") or old_email == "" if force or old_not_valid: whoever.set_email(email) if old_not_valid: print("Set email for", name, "to", email) else: print("Changed email for", name, "to", email, "from", old_email)
def test_add_emp_person(self): self.open_home_page() self.login(username="******", password="******") self.open_add_page() self.create_person( Person(firstname="", middlename="", nickname="", birthYear="")) self.logout()
def series(self, tv_id: int()): result = tmdb.TV(tv_id) tv_info = result.info() tv_credits = result.credits() cast = list() for actor in tv_credits['cast']: cast.append( Cast(id=actor['id'], name=actor['name'], character=actor['character'], profile_path=actor['profile_path'])) crew = list() for c in tv_credits['crew']: crew.append( Crew(id=c['id'], name=c['name'], profile_path=c['profile_path'], job=c['job'], department=c['department'])) credits = Credits(cast=cast, crew=crew) prod_companies = list() for company in tv_info['networks']: prod_companies.append( ProductionCompanies(id=company['id'], name=company['name'], logo_path=company['logo_path'], origin_country=company['origin_country'])) show_creaters = list() for creaters in tv_info['created_by']: show_creaters.append( Person(id=creaters['id'], name=creaters['name'], profile_path=creaters['profile_path'], gender=creaters['gender'])) episode_runtime = [run for run in tv_info['episode_run_time']] tv = TVSeries(id=tv_info['id'], name=tv_info['name'], original_name=tv_info['original_name'], original_language=tv_info['original_language'], overview=tv_info['overview'], in_production=tv_info['in_production'], first_air_date=tv_info['first_air_date'], last_air_date=tv_info['last_air_date'], homepage=tv_info['homepage'], number_of_episodes=tv_info['number_of_episodes'], number_of_seasons=tv_info['number_of_seasons'], episode_runtime=episode_runtime, created_by=show_creaters, backdrop_path=tv_info['backdrop_path'], poster_path=tv_info['poster_path'], networks=prod_companies, credits=credits) return tv
def __obj_converter(self, item_str: str) -> Person: person = Person() obj_array = item_str.split(';') person.id = obj_array[0] person.name = obj_array[1] person.date_of_birth = obj_array[2] person.type_person = obj_array[3] person.cpf = obj_array[4] person.cnpj = obj_array[5] return person
def post(self): person=Person.all().filter('user = '******'displayname') person.brandname=self.request.get('brandname') person.save() self.get()
def test_edit_contact(app, db, check_ui): if app.contact.count() == 0: app.contact.add_contact_fill_form(Person(firstname="John", lastname="Doe", company="paramount", address="23168 CA, sunbeach blvd", home_phone_num="555111000", year="1980")) old_contacts = db.get_contact_list() index = old_contacts.index(random.choice(old_contacts)) contact = Person(firstname="UU", lastname="UU", company="paramount", address="23168 CA, sunbeach blvd", home_phone_num="555111000", year="1980") contact.id = old_contacts[index].id app.contact.edit_contact_by_index(index,contact) new_contacts = db.get_contact_list() assert len(old_contacts) == len(new_contacts) if check_ui: def clean(contact): return Person(id=contact.id,firstname=contact.firstname.strip(),lastname=contact.lastname.strip()) new_contacts = map(clean,db.get_contact_list()) assert sorted(new_contacts, key = Person.id_or_max) == sorted(app.contact.get_contact_list(), key = Person.id_or_max)
def test_sort_people(self): people = [] with open(TEST_FILE, 'r') as reader: for line in reader.readlines(): people.append(Person(parse_line(line))) self.assertEqual(sorted_list_1(people), SORTED_1_GENDER_THEN_LASTNAME) self.assertEqual(sorted_list_2(people), SORTED_2_BIRTHDATE) self.assertEqual(sorted_list_3(people), SORTED_3_LASTNAME_DESCENDING)
def test_addperson(self): self.open_home_page() self.login(username="******", password="******") self.open_add_page() self.create_person( Person(firstname="Dastan", middlename="Ergaly", nickname="Dastish", birthYear="2000")) self.logout()
def setUp(self): super(TestGrid, self).setUp() self.list = [ Person('jose', 'perez'), Person('marcos', 'dione'), Person('john', 'lenton'), ] columns = ( Column(name='Nombre', attribute='name'), Column(name='Apellido', attribute='surname'), ) self.parent = self.window = Window(title='Test', parent=self.app) self.widget = self.grid = EditableGrid(parent=self.parent, columns=columns, cls=Person) target = DummyTarget(self.list) self.setUpControl(target=target, attr='dummy')
def to_object(self, row): if len(row) > 0: return (Person(id_=int(row[0]), name=row[1], email=row[2], role=row[3], login=row[4], password=row[5])) return None
def test_add_members_to_group(self): david = Person('David', 'Gilmour') syd = Person('Syd', 'Barret') self.group.addPerson(david) self.group.addPerson(syd) self.assertEqual(2, len(self.group.members)) # Verify that the People groups count got incremented self.assertEqual(1, len(david.groups)) self.assertEqual(1, len(syd.groups)) # Try to add person to a group that they are already a member of # and verify that the members count doesn't change self.group.addPerson(syd) self.assertEqual(2, len(self.group.members)) # The persons' group count did not get incremented neither self.assertEqual(1, len(syd.groups))
def test_edit_person(app, db, check_ui): if len(db.get_person_list()) == 0: app.person.create(Person(lname="Zadornov")) old_persons = db.get_person_list() i = randrange(len(old_persons)) op = old_persons[i] person = Person(fname="Ivan", mname=op.mname, lname="Ivanov", nname=op.nname, homephone=op.homephone, mobilephone=op.mobilephone, workphone=op.workphone, secondaryphone=op.secondaryphone, allphones_from_home_page=op.allphones_from_home_page, email=op.email, email2=op.email2, email3=op.email3, allemails_from_home_page=op.allemails_from_home_page, address=op.address, id=op.id) app.person.edit_person_by_id(person) new_persons = db.get_person_list() assert len(old_persons) == len(new_persons) old_persons[i] = person assert sorted(old_persons, key=Person.id_or_max) == sorted(new_persons, key=Person.id_or_max) if check_ui: assert sorted(new_persons, key=Person.id_or_max) == sorted(app.person.get_person_list(), key=Person.id_or_max)
def setUp(self): super(TestEditor, self).setUp() self.store = Store() self.parent = self.window = cimarron.skin.Window() # self.widget = cimarron.skin.Editor(store=self.store, # attributes=['name', 'surname']) self.widget = Editor.fromXmlFile('test/editor.xml') self.widget.parent = self.parent self.widget.store = self.store self.entry = self.widget.entries.children[0] self.setUpControl(target=Person('Marcos', 'Dione'), attr=None)
def get_person_info_from_view_page(self, index): wd = self.app.wd self.open_person_to_view_by_index(index) text = wd.find_element_by_id("content").text homephone = re.search("H: (.*)", text).group(1) workphone = re.search("W: (.*)", text).group(1) mobilephone = re.search("M: (.*)", text).group(1) secondaryphone = re.search("P: (.*)", text).group(1) return Person(homephone=homephone, workphone=workphone, mobilephone=mobilephone, secondaryphone=secondaryphone)
def render(handler, path, template_values): person=Person.all().filter('user = '******'person':person }); template_values.update( { 'content':template.render(path, template_values) }); shared_path = os.path.join(os.path.dirname(__file__), '../html/shared.html') handler.response.out.write(template.render(shared_path, template_values))
def setUp(self): super(TestSearchEntry, self).setUp() self.parent = self.window = cimarron.skin.Window(title='Test', parent=self.app) self.columns = ( cimarron.skin.Column(name='Nombre', attribute='name'), cimarron.skin.Column(name='Apellido', attribute='surname'), ) self.widget = cimarron.skin.SearchEntry( parent=self.parent, columns=self.columns, searcher=Person, cls=Person, ) Person.__values__ = self.data = [ Person('jose', 'perez'), Person('marcos', 'dione'), Person('john', 'lenton'), Person('pedro', 'dargenio'), ] self.setUpControl(target=self.data[0], attr=None)
def create_initialize_db(db_url, echo=False): engine = create_engine(db_url, echo=echo) Base.metadata.create_all(engine) Session = sessionmaker(engine) session = Session() try: permissions = [Permission(name=name) for name in Permission.ROLES] session.add_all(permissions) admin = Person(email="admin", password="******") user = Person(email="user", password="******") session.add_all([admin, user]) admin.permissions.append(permissions[0]) admin.permissions.append(permissions[1]) user.permissions.append(permissions[1]) session.commit() except IntegrityError: pass except Exception, ex: logging.warn(ex)
def post(self): person=Person() person.displayname=self.request.get('displayname') person.brandname=self.request.get('brandname') person.user=users.get_current_user() person.save() self.redirect('/profile')
def post(self, company_number): company = Company.query.get_or_404(company_number) payload = Box(api.payload) person = Person( name=payload.name, nationality=payload.nationality, dob_year=payload.yearOfBirth, notified_on=payload.notifiedOn, natures_of_control=[], address_line_1='', address_line_2='', address_care_of='', address_country='', address_locality='', address_po_box='', address_postal_code='', address_premises='', address_region='', ceased_on=db.func.now(), country_of_residence='', dob_day=0, dob_month=0, etag='', id_country_registered='', id_legal_authority='', id_legal_form='', id_place_registered='', id_registration_number='', kind='', link_self='', link_statement='', forename='', other_forenames='', surname='', title='' ) for nature in payload.naturesOfControl: noc = NatureOfControl(nature_of_control=nature.natureOfControl) person.natures_of_control.append(noc) company.people.append(person) db.session.commit() return person, 201
def parse_file(): # Parse the command: args = create_parser().parse_args() input_path = args.Path people = [] if not os.path.isfile(input_path): print('The file specified was not found.') else: with open(input_path, 'r') as reader: for line in reader.readlines(): people.append(Person(parse_line(line))) for sort in sorts: print(sort[1]) for line in sort[0](people): print(line)
def test_delete_person(app, db, check_ui): if len(db.get_person_list()) == 0: app.person.create(Person(lname="Zadornov")) old_persons = db.get_person_list() i = randrange(len(old_persons)) person = old_persons[i] app.person.delete_person_by_id(person.id) new_persons = db.get_person_list() assert len(old_persons) - 1 == len(new_persons) old_persons[i:i + 1] = [] assert old_persons == new_persons if check_ui: assert sorted(new_persons, key=Person.id_or_max) == sorted( app.person.get_person_list(), key=Person.id_or_max)
def get_person_list(self): if self.person_cache is None: wd = self.app.wd wd.find_element_by_link_text("home").click() time.sleep(1) #self.open_groups_page() self.person_cache = [] for row in wd.find_elements_by_name("entry"): cells = row.find_elements_by_tag_name("td") lastname = cells[1].text firstname = cells[2].text id = cells[0].find_element_by_name("selected[]").get_attribute( "value") all_emails = cells[4].text all_phones = cells[5].text self.person_cache.append( Person(fname=firstname, lname=lastname, id=id, allphones_from_home_page=all_phones, allemails_from_home_page=all_emails)) return list(self.person_cache)
def person(self, person_id: int()): person = tmdb.People(person_id) result = person.info() cast = list() for actor in person.movie_credits()['cast']: cast.append( Cast(id=actor['id'], title=actor['title'], poster_path=actor['poster_path'], character=actor['character'])) crew = list() for c in person.movie_credits()['crew']: crew.append( Crew(id=c['id'], title=c['title'], job=c['job'], department=c['department'], poster_path=c['poster_path'])) credits = Credits(cast=cast, crew=crew) return Person(id=result['id'], name=result['name'], imdb_id=result['imdb_id'], gender=result['gender'], homepage=result['homepage'], profile_path=result['profile_path'], also_known_as=result['also_known_as'], biography=result['biography'], deathday=result['deathday'], birthday=result['birthday'], place_of_birth=result['place_of_birth'], popularity=result['popularity'], credits=credits)
def create(person): ''' Handler function for POST 'create' request to /api/records: creates a new person in the people structure based on the passed in person data. :param person: person to create in people structure :return: 201 on success, 406 on person exists ''' line = person.get('line', None) if not line: abort( 422, f'Cannot create Person from "None" data', ) person_object = Person(parse_line(line)) if person_object in DATA_STORE: abort( 406, f'Person with last name "{person_object.lname}" already exists', ) else: DATA_STORE.append(person_object) return make_response(f'"{str(person_object)}" successfully created', 201)
def get_person_info_from_edit_page(self, index): wd = self.app.wd self.open_person_to_edit_by_index(index) homephone = wd.find_element_by_name("home").get_attribute("value") mobilephone = wd.find_element_by_name("mobile").get_attribute("value") workphone = wd.find_element_by_name("work").get_attribute("value") secondaryphone = wd.find_element_by_name("phone2").get_attribute( "value") email = wd.find_element_by_name("email").get_attribute("value") email2 = wd.find_element_by_name("email2").get_attribute("value") email3 = wd.find_element_by_name("email3").get_attribute("value") firstname = wd.find_element_by_name("firstname").get_attribute("value") lastname = wd.find_element_by_name("lastname").get_attribute("value") id = wd.find_element_by_name("id").get_attribute("value") return Person(fname=firstname, lname=lastname, id=id, homephone=homephone, workphone=workphone, mobilephone=mobilephone, secondaryphone=secondaryphone, email=email, email2=email2, email3=email3)
class TestPerson(unittest.TestCase): """Test cases for Person class""" person = Person('Adeola', 'Adedire') def test_new_person_class(self): self.assertEqual(type(self.person), Person) def test_new_person_first_name(self): self.assertEqual(self.person.fullname, 'ADEOLA ADEDIRE') def test_new_person_office_allocated(self): self.assertEqual(self.person.office_allocated, False) def test_new_person_office_space(self): self.assertEqual(self.person.office, '') def test_new_person_living_allocated(self): self.assertEqual(self.person.living_allocated, False) def test_new_person_living_allocated(self): self.assertEqual(self.person.living, '') def test_create_new_person_raises_error(self): self.assertRaises(Exception, Person, 9, 'Ade')
def create_person_from_json(self): person = Person() json_person = self.parseJson() person.person_type = json_person["person_type"] person.surname_ukr = json_person["surname_ukr"] person.first_name_ukr = json_person["first_name_ukr"] person.second_name_ukr = json_person["second_name_ukr"] person.surname_eng = json_person["surname_eng"] person.first_name_eng = json_person["first_name_eng"] person.birth_day = datetime.date(json_person["birth_day"]["year"], json_person["birth_day"]["month"], json_person["birth_day"]["day"]) person.sex = json_person["sex"] person.marital_status = json_person["marital_status"] person.nationality = json_person["nationality"] person.private_case_chars = json_person["private_case_chars"] person.private_case_number = json_person["private_case_number"] person.is_outlander = json_person["is_outlander"] person.reservist = json_person["reservist"] person.hostel_need = json_person["hostel_need"] person.burn_place = json_person["burn_place"] person.registration_place = json_person["registration_place"] person.post_registration_place = json_person["post_registration_place"] person.photo = json_person["photo"] person.mobile_phone1 = json_person["mobile_phone1"] person.mobile_phone2 = json_person["mobile_phone2"] person.home_phone = json_person["home_phone"] person.work_phone = json_person["work_phone"] person.email = json_person["email"] person.skype = json_person["skype"] person.web_site = json_person["web_site"] person.icq = json_person["icq"] for doc in json_person["documents"]: document = Document() document.category = doc["category"] document.document_name = doc["document_name"] document.document_case_char = doc["document_case_char"] document.document_case_number = doc["document_case_number"] document.day_of_issue = datetime.date(doc["day_of_issue"]["year"], doc["day_of_issue"]["month"], doc["day_of_issue"]["day"]) document.issued_by = doc["issued_by"] document.document_is_original = doc["document_is_original"] document.document_is_foreign = doc["document_is_foreign"] document.category_reward = doc["category_reward"] document.reward = doc["reward"] document.type_of_reward = doc["type_of_reward"] document.average_rate = doc["average_rate"] document.pincode = doc["pincode"] person.documents.append(document) return person
def get_person_committee(self, person_id=None, committee_url=None): url = "%skp020.asp?KPLFDNR=%s&history=true" % (self.config.BASE_URL, person_id) response = self.get_url(url) if not url: return tree = html.fromstring(response.text) committees = [] person = Person(numeric_id=person_id) # maps name of type to form name and membership type type_map = { u'Rat der Stadt' : {'mtype' : 'parliament', 'field' : 'PALFDNR'}, u'Fraktion' : {'mtype' : 'organisation', 'field' : 'FRLFDNR'}, u'Ausschüsse' : {'mtype' : 'committee', 'field' : 'AULFDNR'}, 'Stadtbezirk': {'mtype' : 'parliament', 'field' : 'PALFDNR'}, 'BVV': {'mtype' : 'parliament', 'field' : 'PALFDNR'} } # obtain the table with the membership list via a simple state machine mtype = "parliament" field = 'PALFDNR' old_group_id = None # for checking if it changes old_group_name = None # for checking if it changes group_id = None # might break otherwise table = tree.xpath('//*[@id="rismain_raw"]/table[2]')[0] for line in table.findall("tr"): if line[0].tag == "th": what = line[0].text.strip() if what not in type_map: logging.error("Unknown committee type %s at person detail page %s", what, person_id) continue mtype = type_map[what]['mtype'] field = type_map[what]['field'] else: if "Keine Information" in line.text_content(): # skip because no content is available continue membership = {} # first get the name of group group_name = line[1].text_content() committee = Committee(identifier=group_name) committee.type = mtype # now the first col might be a form with more useful information which will carry through until we find another one # with it. we still check the name though form = line[0].find("form") if form is not None: group_id = int(form.find("input[@name='%s']" % field).get("value")) committee.numeric_id = group_id old_group_id = group_id # remember it for next loop old_group_name = group_name # remember it for next loop else: # we did not find a form. We assume that the old group still applies but we nevertheless check if the groupname is still the same if old_group_name != group_name: logging.debug("Group name differs but we didn't get a form with new group id: group name=%s, old group name=%s, group_id=%s at url %s", group_name, old_group_name, old_group_id, url) # TODO: create a list of functions so we can index them somehow function = line[2].text_content() raw_date = line[3].text_content() # parse the date information if "seit" in raw_date: dparts = raw_date.split() membership['end'] = dparts[-1] elif "Keine" in raw_date: # no date information available start_date = end_date = None else: dparts = raw_date.split() membership['start'] = dparts[0] membership['end'] = dparts[-1] membership['committee'] = committee committees.append(membership) person.committee = committees oid = self.db.save_person(person)
def find_person(self): find_person_url = self.config.BASE_URL + 'kp041.asp?template=xyz&selfaction=ws&showAll=true&PALFDNRM=1&kpdatfil=&filtdatum=filter&kpname=&kpsonst=&kpampa=99999999&kpfr=99999999&kpamfr=99999999&kpau=99999999&kpamau=99999999&searchForm=true&search=Suchen' """parse an XML file and return the tree""" parser = etree.XMLParser(recover=True) r = self.get_url(find_person_url) if not r: return xml = r.text.encode('ascii','xmlcharrefreplace') tree = etree.fromstring(xml, parser=parser) # element 0 is the special block # element 1 is the list of persons for node in tree[1].iterchildren(): elem = {} for e in node.iterchildren(): elem[e.tag] = e.text # now retrieve person details such as committee memberships etc. # we also get the age (but only that, no date of birth) person = Person(numeric_id=int(elem['kplfdnr']), identifier=elem['kplfdnr']) if elem['link_kp']: person.original_url = elem['link_kp'] # personal information if elem['adtit']: person.title = elem['adtit'] if elem['antext1'] == 'Frau': person.sex = 1 elif elem['antext1'] == 'Herr': person.sex = 2 if elem['advname']: person.firstname = elem['advname'] if elem['adname']: person.lastname = elem['adname'] # address if elem['adstr']: person.address = elem['adstr'] if elem['adhnr']: person.house_number = elem['adhnr'] if elem['adplz']: person.postalcode = elem['adplz'] if elem['adtel']: person.phone = elem['adtel'] # contact if elem['adtel']: person.phone = elem['adtel'] if elem['adtel2']: person.mobile = elem['adtel2'] if elem['adfax']: person.fax = elem['adfax'] if elem['adfax']: person.fax = elem['adfax'] if elem['ademail']: person.email = elem['ademail'] if elem['adwww1']: person.website = elem['adwww1'] person_party = elem['kppartei'] if person_party: if person_party in self.config.PARTY_ALIAS: person_party = self.config.PARTY_ALIAS[person_party] person.committee = [{'committee': Committee(identifier=person_party, title=person_party, type='party')}] if elem['link_kp'] is not None: if hasattr(self, 'person_queue'): self.person_queue.add(person.numeric_id) else: logging.info("Person %s %s has no link", person.firstname, person.lastname) oid = self.db.save_person(person)
def get_person_organization(self, person_organization_url=None, person_id=None): """ Load committee details for the given detail page URL or numeric ID """ # Read either committee_id or committee_url from the opposite if person_id is not None: person_committee_url = self.urls['PERSON_ORGANIZATION_PRINT_PATTERN'] % (self.config['scraper']['base_url'], person_id) elif person_organization_url is not None: parsed = parse.search(self.urls['PERSON_ORGANIZATION_PRINT_PATTERN'], person_organization_url) person_id = parsed['person_id'] logging.info("Getting person %d organizations from %s", person_id, person_committee_url) person = Person(originalId=person_id) time.sleep(self.config['scraper']['wait_time']) response = self.get_url(person_committee_url) if not response: return # seek(0) is necessary to reset response pointer. response.seek(0) html = response.read() html = html.replace(' ', ' ') parser = etree.HTMLParser() dom = etree.parse(StringIO(html), parser) trs = dom.xpath(self.xpath['PERSON_ORGANIZATION_LINES']) organisations = [] memberships = [] for tr in trs: tds = tr.xpath('.//td') long_info = False if len(tds) == 5: long_info = True if len(tds) == 5 or len(tds) == 2: if tds[0].xpath('.//a'): href = tds[0][0].get('href') href_tmp = href.split('&') # delete __cgrname when it's there if len(href_tmp) == 2: if href_tmp[1][0:10] == '__cgrname=': href = href_tmp[0] parsed = parse.search(self.urls['ORGANIZATION_DETAIL_PARSE_PATTERN'], href) if not parsed: parsed = parse.search(self.urls['ORGANIZATION_DETAIL_PARSE_PATTERN_FULL'], href) if parsed is not None: new_organisation = Organization(originalId=int(parsed['committee_id'])) new_organisation.name = tds[0][0].text else: new_organisation = Organization(originalId=tds[0].text) if new_organisation and long_info: new_membership = Membership() membership_original_id = originalId=unicode(person_id) + '-' + unicode(new_organisation.originalId) if tds[2].text: new_membership.role = tds[2].text if tds[3].text: new_membership.startDate = tds[3].text membership_original_id += '-' + tds[3].text if tds[4].text: new_membership.endDate = tds[4].text membership_original_id += '-' + tds[4].text new_membership.originalId = membership_original_id new_membership.organization = new_organisation memberships.append(new_membership) else: if not new_organisation: logging.error("Bad Table Structure in %s", person_committee_url) if memberships: person.membership = memberships oid = self.db.save_person(person) logging.info("Person %d stored with _id %s", person_id, oid) return
def get_person_organization(self, person_id=None, organization_url=None): url = ("%skp020.asp?KPLFDNR=%s&history=true" % (self.config['scraper']['base_url'], person_id)) logging.info("Getting person organization from %s", url) # Stupid re-try concept because AllRis sometimes misses start < at # tags at first request. try_counter = 0 while True: try: response = self.get_url(url) if not url: return tree = html.fromstring(response.text) memberships = [] person = Person(originalId=person_id) # maps name of type to form name and membership type type_map = { u'Rat der Stadt' : {'mtype' : 'parliament', 'field' : 'PALFDNR'}, u'Parlament' : {'mtype' : 'parliament', 'field' : 'PALFDNR'}, u'Fraktion' : {'mtype' : 'organisation', 'field' : 'FRLFDNR'}, 'Fraktionen': {'mtype' : 'parliament', 'field' : 'FRLFDNR'}, u'Ausschüsse' : {'mtype' : 'organization', 'field' : 'AULFDNR'}, 'Stadtbezirk': {'mtype' : 'parliament', 'field' : 'PALFDNR'}, 'BVV': {'mtype' : 'parliament', 'field' : 'PALFDNR'}, 'Bezirksparlament': {'mtype' : 'parliament', 'field' : 'PALFDNR'}, 'Bezirksverordnetenversammlung': {'mtype' : 'parliament', 'field' : 'PALFDNR'} } # obtain the table with the membership list via a simple state machine mtype = "parliament" field = 'PALFDNR' # for checking if it changes old_group_id = None # for checking if it changes old_group_name = None # might break otherwise group_id = None table = tree.xpath('//*[@id="rismain_raw"]/table[2]') if len(table): table = table[0] for line in table.findall("tr"): if line[0].tag == "th": what = line[0].text.strip() field = None field_list = None if what in type_map: mtype = type_map[what]['mtype'] field = type_map[what]['field'] elif 'Wahlperiode' in what: mtype = 'parliament' # 'FRLFDNR' field_list = ['KPLFDNR', 'AULFDNR'] elif "Auskünfte gemäß BVV" in what: break else: logging.error("Unknown organization type %s " "at person detail page %s", what, person_id) continue else: if "Keine Information" in line.text_content(): # skip because no content is available continue # Empty line = strange stuff comes after this if len(list(line)) < 2: break # first get the name of group group_name = line[1].text_content() organization = Organization(name=group_name) organization.classification = mtype # Now the first col might be a form with more # useful information which will carry through # until we find another one. # With it. we still check the name though. form = line[0].find("form") if form is not None: if field: group_id = int(form.find( "input[@name='%s']" % field).get( "value")) elif field_list: for field in field_list: temp_form = form.find( "input[@name='%s']" % field) if temp_form is not None: group_id = int(temp_form.get( "value")) organization.originalId = group_id # remember it for next loop old_group_id = group_id # remember it for next loop old_group_name = group_name else: # We did not find a form. We assume that the # old group still applies but we nevertheless # check if the groupname is still the same. if old_group_name != group_name: logging.warn("Group name differs but we " "didn't get a form with new " "group id: group name=%s, old " "group name=%s, old group " "id=%s at url %s", group_name, old_group_name, old_group_id, url) organization.originalId = None else: organization.originalId = old_group_id membership = Membership(organization=organization) membership.originalId = (unicode(person_id) + '-' + unicode(group_id)) # TODO: create a list of functions so we can # index them somehow function = line[2].text_content() raw_date = line[3].text_content() # parse the date information if "seit" in raw_date: dparts = raw_date.split() membership.endDate = dparts[-1] elif "Keine" in raw_date or not raw_date.strip(): # no date information available start_date = end_date = None else: dparts = raw_date.split() membership.startDate = dparts[0] membership.endDate = dparts[-1] if organization.originalId is not None: memberships.append(membership) else: logging.warn("Bad organization at %s", url) person.membership = memberships oid = self.db.save_person(person) return else: logging.info("table missing, nothing to do at %s", url) return except AttributeError: if try_counter < 3: logging.info("Try again: Getting person organizations with " "person id %d from %s", person_id, url) try_counter += 1 else: logging.error("Failed getting person organizations with " "person id %d from %s", person_id, url) return
class PersonEditPage (cimarron.skin.Controller): def __init__ (self, **kw): super (PersonEditPage, self).__init__ (**kw) h= cimarron.skin.HBox () h.label= 'Edit' h.parent= self new= cimarron.skin.Button ( parent= h, label= 'New', onAction= self.newPerson, ) v= cimarron.skin.VBox (parent=h) self.name= cimarron.skin.Entry ( parent= v, onAction= self.editModel, ) self.name.delegates.append (self) self.surname= cimarron.skin.Entry ( parent= v, onAction= self.editModel, ) self.surname.delegates.append (self) save= cimarron.skin.Button ( parent= h, label= 'Save', onAction= self.savePerson, ) def will_focus_out (self, *ignore): self.editModel () def newPerson (self, *ignore): self.value= Person () def savePerson (self, *ignore): if self.value.isDirty: if self.value.new: # NOTE: this is set up like this only because I won't implement # a model that can add itself to a transaction and commit self.onAction () self.value.save () def editModel (self, *ignore): if self.value is None: value= Person () value.name= self.name.value value.surname= self.surname.value # we do it this way because doing `self.value= Person()´ # would empty the Entry's values (due to refresh()) self.value= value else: self.value.name= self.name.value self.value.surname= self.surname.value self.value.isDirty= True def refresh (self, *ignore): if self.value is not None: self.name.value= self.value.name self.surname.value= self.value.surname else: self.name.value= '' self.surname.value= ''
def newPerson (self, *ignore): self.value= Person ()
def find_person(self): find_person_url = (self.config['scraper']['base_url'] + 'kp041.asp?template=xyz&selfaction=ws&showAll=true&' 'PALFDNRM=1&kpdatfil=&filtdatum=filter&kpname=&' 'kpsonst=&kpampa=99999999&kpfr=99999999&' 'kpamfr=99999999&kpau=99999999&kpamau=99999999&' 'searchForm=true&search=Suchen') logging.info("Getting person overview from %s", find_person_url) """parse an XML file and return the tree""" parser = etree.XMLParser(recover=True) r = self.get_url(find_person_url) if not r: return xml = r.text.encode('ascii', 'xmlcharrefreplace') tree = etree.fromstring(xml, parser=parser) h = HTMLParser.HTMLParser() # element 0 is the special block # element 1 is the list of persons for node in tree[1].iterchildren(): elem = {} for e in node.iterchildren(): if e.text: elem[e.tag] = h.unescape(e.text) else: elem[e.tag] = '' # now retrieve person details such as organization memberships etc. # we also get the age (but only that, no date of birth) person = Person(originalId=int(elem['kplfdnr'])) if elem['link_kp']: person.originalUrl = elem['link_kp'] # personal information if elem['adtit']: person.title = elem['adtit'] if elem['antext1'] == 'Frau': person.sex = 1 elif elem['antext1'] == 'Herr': person.sex = 2 if elem['advname']: person.firstname = elem['advname'] if elem['adname']: person.lastname = elem['adname'] # address if elem['adstr']: person.address = elem['adstr'] if elem['adhnr']: person.house_number = elem['adhnr'] if elem['adplz']: person.postalcode = elem['adplz'] if elem['adtel']: person.phone = elem['adtel'] # contact if elem['adtel']: person.phone = elem['adtel'] if elem['adtel2']: person.mobile = elem['adtel2'] if elem['adfax']: person.fax = elem['adfax'] if elem['adfax']: person.fax = elem['adfax'] if elem['ademail']: person.email = elem['ademail'] if elem['adwww1']: person.website = elem['adwww1'] person_party = elem['kppartei'] if person_party: if person_party in self.config['scraper']['party_alias']: person_party = self.config['scraper']['party_alias'][person_party] new_organization = Organization(originalId=person_party, name=person_party, classification='party') original_id = unicode(person.originalId) + '-' + person_party person.membership = [Membership(originalId=original_id, organization=new_organization)] if elem['link_kp'] is not None: if hasattr(self, 'person_queue'): self.person_queue.add(person.originalId) else: logging.info("Person %s %s has no link", person.firstname, person.lastname) self.db.save_person(person)
def get_person_committee(self, person_committee_url=None, person_id=None): """ Load committee details for the given detail page URL or numeric ID """ # Read either committee_id or committee_url from the opposite if person_id is not None: person_committee_url = self.urls['PERSON_COMMITTEE_PRINT_PATTERN'] % person_id elif person_committee_url is not None: parsed = parse.search(self.urls['PERSON_COMMITTEE_PRINT_PATTERN'], person_committee_url) person_id = parsed['person_id'] logging.info("Getting meeting (committee) %d from %s", person_id, person_committee_url) person = Person(numeric_id=person_id) time.sleep(self.config.WAIT_TIME) response = self.get_url(person_committee_url) if not response: return # seek(0) is necessary to reset response pointer. response.seek(0) html = response.read() html = html.replace(' ', ' ') parser = etree.HTMLParser() dom = etree.parse(StringIO(html), parser) trs = dom.xpath(self.xpath['PERSON_COMMITTEE_LINES']) committees = [] for tr in trs: new_committee = None tds = tr.xpath('.//td') long_info = False if len(tds) == 5: long_info = True if len(tds) == 5 or len(tds) == 2: if tds[0].xpath('.//a'): href = tds[0][0].get('href') href_tmp = href.split('&') # delete __cgrname when it's there if len(href_tmp) == 2: if href_tmp[1][0:10] == '__cgrname=': href = href_tmp[0] parsed = parse.search(self.urls['COMMITTEE_DETAIL_PARSE_PATTERN'], href) if not parsed: parsed = parse.search(self.urls['COMMITTEE_DETAIL_PARSE_PATTERN_FULL'], href) if parsed is not None: new_committee = { 'committee': Committee(numeric_id=int(parsed['committee_id']))} new_committee['committee'].identifier = tds[0][0].text new_committee['committee'].title = tds[0][0].text else: new_committee = {'committee': Committee(identifier=tds[0].text)} if new_committee and long_info: new_committee['position'] = tds[2].text if tds[3].text: new_committee['start'] = tds[3].text if tds[4].text: new_committee['end'] = tds[4].text else: if not new_committee: logging.error("Bad Table Structure in %s", person_committee_url) if new_committee: committees.append(new_committee) if committees: person.committee = committees oid = self.db.save_person(person) logging.info("Person %d stored with _id %s", person_id, oid) return