def parse(self, html): """ Parse the awards from USFIRST. """ html = html.decode("utf-8", "ignore") # Clean html before feeding itno BeautifulSoup soup = BeautifulSoup(html, convertEntities=BeautifulSoup.HTML_ENTITIES) table = soup.findAll("table")[6] awards_by_type = {} for tr in table.findAll("tr")[3:]: tds = tr.findAll("td") name_str = unicode(self._recurseUntilString(tds[0])) award_type_enum = AwardHelper.parse_award_type(name_str) if award_type_enum is None: continue team_number = None try: team_number = self._recurseUntilString(tds[1]) except AttributeError: team_number = None if team_number and team_number.isdigit(): team_number = int(team_number) else: team_number = None awardee = None if award_type_enum in AwardType.INDIVIDUAL_AWARDS: try: awardee_str = self._recurseUntilString(tds[2]) if awardee_str: awardee = unicode(sanitize(awardee_str)) except TypeError: awardee = None if not awardee: # Turns '' into None awardee = None # an award must have either an awardee or a team_number if awardee is None and team_number is None: continue recipient_json = json.dumps({"team_number": team_number, "awardee": awardee}) if award_type_enum in awards_by_type: if team_number is not None: awards_by_type[award_type_enum]["team_number_list"].append(team_number) awards_by_type[award_type_enum]["recipient_json_list"].append(recipient_json) else: awards_by_type[award_type_enum] = { "name_str": strip_number(name_str), "award_type_enum": award_type_enum, "team_number_list": [team_number] if team_number is not None else [], "recipient_json_list": [recipient_json], } return awards_by_type.values(), False
def parse(cls, data): """ Parse CSV that contains awards Format is as follows: year, event_short, award_name_str, team_number (can be blank), awardee (can be blank) Example: 2000,mi,Regional Finalist,45, """ awards_by_key = {} csv_data = list(csv.reader(StringIO.StringIO(data), delimiter=',', skipinitialspace=True)) for award in csv_data: year = int(award[0]) event_short = award[1] name_str = award[2] team_number = award[3] awardee = award[4] if team_number == '': team_number = None else: team_number = int(team_number) if awardee == '': awardee = None # an award must have either an awardee or a team_number if awardee is None and team_number is None: continue if team_number is not None: team_number_list = [team_number] else: team_number_list = [] recipient_json = json.dumps({ 'team_number': team_number, 'awardee': awardee, }) award_type_enum = AwardHelper.parse_award_type(name_str) if award_type_enum is None: # If we can't figure it out, fall back to OTHER (good for offseason events) award_type_enum = AwardType.OTHER award_key_name = Award.render_key_name('{}{}'.format(year, event_short), award_type_enum) if award_key_name in awards_by_key: if team_number is not None: awards_by_key[award_key_name]['team_number_list'].append(team_number) awards_by_key[award_key_name]['recipient_json_list'].append(recipient_json) else: awards_by_key[award_key_name] = { 'year': year, 'event_short': event_short, 'name_str': name_str, 'award_type_enum': award_type_enum, 'team_number_list': team_number_list, 'recipient_json_list': [recipient_json], } return awards_by_key.values()
def parse(self, response): awards_by_type = {} for award in response['Awards']: team_number = award['teamNumber'] if self.valid_team_nums is not None and team_number not in self.valid_team_nums: continue award_type_enum = AwardHelper.parse_award_type(award['name']) if award_type_enum is None: continue recipient_json = json.dumps({ 'team_number': team_number, 'awardee': award['person'], }) if award_type_enum in awards_by_type: if team_number is not None: awards_by_type[award_type_enum]['team_number_list'].append( team_number) awards_by_type[award_type_enum]['recipient_json_list'].append( recipient_json) else: awards_by_type[award_type_enum] = { 'name_str': award['name'], 'award_type_enum': award_type_enum, 'team_number_list': [team_number] if team_number is not None else [], 'recipient_json_list': [recipient_json], } awards = [] for award in awards_by_type.values(): awards.append( Award(id=Award.render_key_name(self.event.key_name, award['award_type_enum']), name_str=award['name_str'], award_type_enum=award['award_type_enum'], year=self.event.year, event=self.event.key, event_type_enum=self.event.event_type_enum, team_list=[ ndb.Key(Team, 'frc{}'.format(team_number)) for team_number in award['team_number_list'] ], recipient_json_list=award['recipient_json_list'])) return awards
def parse(self, awards_json, event_key): """ Parse JSON that contains a list of awards where each award is a dict of: name_str: String of award name. ex: "Tournament Winner" or "Dean's List Finalist" team_key: String in the format "frcXXX" for the team that won the award. Can be null. awardee: String corresponding to the name of an individual that won the award. Can be null. """ try: awards = json.loads(awards_json) except: raise ParserInputException("Invalid JSON. Please check input.") awards_by_key = {} for award in awards: if type(award) is not dict: raise ParserInputException("Awards must be dicts.") name_str = award.get('name_str', None) team_key = award.get('team_key', None) awardee = award.get('awardee', None) if name_str is None: raise ParserInputException("Award must have a 'name_str'") if team_key and not re.match(r'frc\d+', str(team_key)): raise ParserInputException("Bad team_key: '{}'. Must follow format 'frcXXX' or be null.".format(team_key)) award_type_enum = AwardHelper.parse_award_type(name_str) if award_type_enum is None: raise ParserInputException("Cannot determine award type from: '{}'. Please contact a www.thebluealliance.com admin.".format(name_str)) recipient_json = json.dumps({ 'team_number': int(team_key[3:]) if team_key else None, 'awardee': awardee, }) award_key_name = Award.render_key_name(event_key, award_type_enum) if award_key_name in awards_by_key: if team_key is not None: awards_by_key[award_key_name]['team_key_list'].append(team_key) awards_by_key[award_key_name]['recipient_json_list'].append(recipient_json) else: awards_by_key[award_key_name] = { 'name_str': name_str, 'award_type_enum': award_type_enum, 'team_key_list': [team_key] if team_key else [], 'recipient_json_list': [recipient_json], } return awards_by_key.values()
def parse(self, response): awards_by_type = {} for award in response['Awards']: team_number = award['teamNumber'] if self.valid_team_nums is not None and team_number not in self.valid_team_nums: continue award_type_enum = AwardHelper.parse_award_type(award['name']) if award_type_enum is None: continue recipient_json = json.dumps({ 'team_number': team_number, 'awardee': award['person'], }) if award_type_enum in awards_by_type: if team_number is not None: awards_by_type[award_type_enum]['team_number_list'].append(team_number) awards_by_type[award_type_enum]['recipient_json_list'].append(recipient_json) else: awards_by_type[award_type_enum] = { 'name_str': award['name'], 'award_type_enum': award_type_enum, 'team_number_list': [team_number] if team_number is not None else [], 'recipient_json_list': [recipient_json], } awards = [] for award in awards_by_type.values(): awards.append(Award( id=Award.render_key_name(self.event.key_name, award['award_type_enum']), name_str=award['name_str'], award_type_enum=award['award_type_enum'], year=self.event.year, event=self.event.key, event_type_enum=self.event.event_type_enum, team_list=[ndb.Key(Team, 'frc{}'.format(team_number)) for team_number in award['team_number_list']], recipient_json_list=award['recipient_json_list'] )) return awards
def parse(self, html): """ Parse the awards from USFIRST. """ html = html.decode( 'utf-8', 'ignore') # Clean html before feeding itno BeautifulSoup soup = BeautifulSoup(html, convertEntities=BeautifulSoup.HTML_ENTITIES) table = soup.findAll('table')[6] awards_by_type = {} for tr in table.findAll('tr')[3:]: tds = tr.findAll('td') name_str = unicode(self._recurseUntilString(tds[0])) award_type_enum = AwardHelper.parse_award_type(name_str) if award_type_enum is None: continue team_number = None try: team_number = self._recurseUntilString(tds[1]) except AttributeError: team_number = None if team_number and team_number.isdigit(): team_number = int(team_number) else: team_number = None awardee = None if award_type_enum in AwardType.INDIVIDUAL_AWARDS: try: awardee_str = self._recurseUntilString(tds[2]) if awardee_str: awardee = unicode(sanitize(awardee_str)) except TypeError: awardee = None if not awardee: # Turns '' into None awardee = None # an award must have either an awardee or a team_number if awardee is None and team_number is None: continue recipient_json = json.dumps({ 'team_number': team_number, 'awardee': awardee, }) if award_type_enum in awards_by_type: if team_number is not None: awards_by_type[award_type_enum]['team_number_list'].append( team_number) awards_by_type[award_type_enum]['recipient_json_list'].append( recipient_json) else: awards_by_type[award_type_enum] = { 'name_str': strip_number(name_str), 'award_type_enum': award_type_enum, 'team_number_list': [team_number] if team_number is not None else [], 'recipient_json_list': [recipient_json], } return awards_by_type.values(), False
def parse(self, data): """ Parse CSV that contains awards Format is as follows: year, event_short, award_name_str, team_number (can be blank), awardee (can be blank) Example: 2000,mi,Regional Finalist,45, """ awards_by_key = {} csv_data = list( csv.reader(StringIO.StringIO(data), delimiter=',', skipinitialspace=True)) for award in csv_data: year = int(award[0]) event_short = award[1] name_str = award[2] team_number = award[3] awardee = award[4] if team_number == '': team_number = None else: team_number = int(team_number) if awardee == '': awardee = None # an award must have either an awardee or a team_number if awardee is None and team_number is None: continue if team_number is not None: team_number_list = [team_number] else: team_number_list = [] recipient_json = json.dumps({ 'team_number': team_number, 'awardee': awardee, }) award_type_enum = AwardHelper.parse_award_type(name_str) if award_type_enum is None: continue award_key_name = Award.render_key_name( '{}{}'.format(year, event_short), award_type_enum) if award_key_name in awards_by_key: if team_number is not None: awards_by_key[award_key_name]['team_number_list'].append( team_number) awards_by_key[award_key_name]['recipient_json_list'].append( recipient_json) else: awards_by_key[award_key_name] = { 'year': year, 'event_short': event_short, 'name_str': name_str, 'award_type_enum': award_type_enum, 'team_number_list': team_number_list, 'recipient_json_list': [recipient_json], } return awards_by_key.values()
def parse(self, html): """ Parse the awards from USFIRST. """ html = html.decode('utf-8', 'ignore') # Clean html before feeding itno BeautifulSoup soup = BeautifulSoup(html) table = soup.findAll('table')[0] awards_by_type = {} for tr in table.findAll('tr')[2:]: tds = tr.findAll('td') if tds == []: continue name_str = unicode(self._recurseUntilString(tds[0])) award_type_enum = AwardHelper.parse_award_type(name_str) if award_type_enum is None: continue team_number = None try: team_number = self._recurseUntilString(tds[1]) except AttributeError: team_number = None if team_number and team_number.isdigit(): team_number = int(team_number) else: team_number = None awardee = None if award_type_enum in AwardType.INDIVIDUAL_AWARDS: try: awardee_str = self._recurseUntilString(tds[2]) if awardee_str: awardee = unicode(sanitize(awardee_str)) except TypeError: awardee = None if not awardee: # Turns '' into None awardee = None # an award must have either an awardee or a team_number if awardee is None and team_number is None: continue recipient_json = json.dumps({ 'team_number': team_number, 'awardee': awardee, }) if award_type_enum in awards_by_type: if team_number is not None: awards_by_type[award_type_enum]['team_number_list'].append(team_number) awards_by_type[award_type_enum]['recipient_json_list'].append(recipient_json) else: awards_by_type[award_type_enum] = { 'name_str': strip_number(name_str), 'award_type_enum': award_type_enum, 'team_number_list': [team_number] if team_number is not None else [], 'recipient_json_list': [recipient_json], } return awards_by_type.values(), False
def test_parse(self): """ Tests for a select subset of award types. Add more if desired. """ self.assertEqual(AwardHelper.parse_award_type("Chairman's"), AwardType.CHAIRMANS) self.assertEqual(AwardHelper.parse_award_type("Chairman"), AwardType.CHAIRMANS) self.assertEqual(AwardHelper.parse_award_type("Winner #1"), AwardType.WINNER) self.assertEqual(AwardHelper.parse_award_type("Division Winner #2"), AwardType.WINNER) self.assertEqual( AwardHelper.parse_award_type("Newton - Division Champion #3"), AwardType.WINNER) self.assertEqual( AwardHelper.parse_award_type("Championship Winner #3"), AwardType.WINNER) self.assertEqual( AwardHelper.parse_award_type("Championship Champion #4"), AwardType.WINNER) self.assertEqual(AwardHelper.parse_award_type("Championship Champion"), AwardType.WINNER) self.assertEqual(AwardHelper.parse_award_type("Championship Winner"), AwardType.WINNER) self.assertEqual(AwardHelper.parse_award_type("Winner"), AwardType.WINNER) self.assertEqual(AwardHelper.parse_award_type("Finalist #1"), AwardType.FINALIST) self.assertEqual(AwardHelper.parse_award_type("Division Finalist #2"), AwardType.FINALIST) self.assertEqual( AwardHelper.parse_award_type("Championship Finalist #3"), AwardType.FINALIST) self.assertEqual( AwardHelper.parse_award_type("Championship Finalist #4"), AwardType.FINALIST) self.assertEqual(AwardHelper.parse_award_type("Championship Finalist"), AwardType.FINALIST) self.assertEqual(AwardHelper.parse_award_type("Finalist"), AwardType.FINALIST) self.assertEqual( AwardHelper.parse_award_type("Dean's List Finalist #1"), AwardType.DEANS_LIST) self.assertEqual(AwardHelper.parse_award_type("Dean's List Finalist"), AwardType.DEANS_LIST) self.assertEqual(AwardHelper.parse_award_type("Dean's List Winner #9"), AwardType.DEANS_LIST) self.assertEqual(AwardHelper.parse_award_type("Dean's List Winner"), AwardType.DEANS_LIST) self.assertEqual(AwardHelper.parse_award_type("Dean's List"), AwardType.DEANS_LIST) self.assertEqual( AwardHelper.parse_award_type( "Excellence in Design Award sponsored by Autodesk (3D CAD)"), AwardType.EXCELLENCE_IN_DESIGN_CAD) self.assertEqual( AwardHelper.parse_award_type( "Excellence in Design Award sponsored by Autodesk (Animation)" ), AwardType.EXCELLENCE_IN_DESIGN_ANIMATION) self.assertEqual( AwardHelper.parse_award_type("Excellence in Design Award"), AwardType.EXCELLENCE_IN_DESIGN) self.assertEqual( AwardHelper.parse_award_type( "Dr. Bart Kamen Memorial Scholarship #1"), AwardType.BART_KAMEN_MEMORIAL) self.assertEqual( AwardHelper.parse_award_type( "Media and Technology Award sponsored by Comcast"), AwardType.MEDIA_AND_TECHNOLOGY) self.assertEqual(AwardHelper.parse_award_type("Make It Loud Award"), AwardType.MAKE_IT_LOUD) self.assertEqual(AwardHelper.parse_award_type("Founder's Award"), AwardType.FOUNDERS) self.assertEqual( AwardHelper.parse_award_type("Championship - Web Site Award"), AwardType.WEBSITE) self.assertEqual( AwardHelper.parse_award_type( "Recognition of Extraordinary Service"), AwardType.RECOGNITION_OF_EXTRAORDINARY_SERVICE) self.assertEqual( AwardHelper.parse_award_type("Outstanding Cart Award"), AwardType.OUTSTANDING_CART) self.assertEqual( AwardHelper.parse_award_type( "Wayne State University Aim Higher Award"), AwardType.WSU_AIM_HIGHER) self.assertEqual( AwardHelper.parse_award_type( "Delphi \"Driving Tommorow's Technology\" Award"), AwardType.DRIVING_TOMORROWS_TECHNOLOGY) self.assertEqual( AwardHelper.parse_award_type("Delphi Drive Tommorows Technology"), AwardType.DRIVING_TOMORROWS_TECHNOLOGY) self.assertEqual( AwardHelper.parse_award_type( "Kleiner, Perkins, Caufield and Byers"), AwardType.ENTREPRENEURSHIP) self.assertEqual( AwardHelper.parse_award_type("Leadership in Control Award"), AwardType.LEADERSHIP_IN_CONTROL) self.assertEqual(AwardHelper.parse_award_type("#1 Seed"), AwardType.NUM_1_SEED) self.assertEqual(AwardHelper.parse_award_type("Incredible Play Award"), AwardType.INCREDIBLE_PLAY) self.assertEqual( AwardHelper.parse_award_type("People's Choice Animation Award"), AwardType.PEOPLES_CHOICE_ANIMATION) self.assertEqual( AwardHelper.parse_award_type( "Autodesk Award for Visualization - Grand Prize"), AwardType.VISUALIZATION) self.assertEqual( AwardHelper.parse_award_type( "Autodesk Award for Visualization - Rising Star"), AwardType.VISUALIZATION_RISING_STAR) self.assertEqual( AwardHelper.parse_award_type("Some Random Award Winner"), None) self.assertEqual(AwardHelper.parse_award_type("Random Champion"), None) self.assertEqual(AwardHelper.parse_award_type("An Award"), None) # Make sure all old regional awards have matching types with open('test_data/pre_2002_regional_awards.csv', 'r') as f: csv_data = list( csv.reader(StringIO.StringIO(f.read()), delimiter=',', skipinitialspace=True)) for award in csv_data: self.assertNotEqual(AwardHelper.parse_award_type(award[2]), None) # Make sure all old regional awards have matching types with open('test_data/pre_2007_cmp_awards.csv', 'r') as f: csv_data = list( csv.reader(StringIO.StringIO(f.read()), delimiter=',', skipinitialspace=True)) for award in csv_data: self.assertNotEqual(AwardHelper.parse_award_type(award[2]), None) # test 2015 award names with open('test_data/fms_api/2015_award_types.json', 'r') as f: for award in json.loads(f.read()): print award['description'] self.assertNotEqual( AwardHelper.parse_award_type(award['description']), None)
def test_parse(self): """ Tests for a select subset of award types. Add more if desired. """ self.assertEqual(AwardHelper.parse_award_type("Chairman's"), AwardType.CHAIRMANS) self.assertEqual(AwardHelper.parse_award_type("Chairman"), AwardType.CHAIRMANS) self.assertEqual(AwardHelper.parse_award_type("Winner #1"), AwardType.WINNER) self.assertEqual(AwardHelper.parse_award_type("Division Winner #2"), AwardType.WINNER) self.assertEqual(AwardHelper.parse_award_type("Newton - Division Champion #3"), AwardType.WINNER) self.assertEqual(AwardHelper.parse_award_type("Championship Winner #3"), AwardType.WINNER) self.assertEqual(AwardHelper.parse_award_type("Championship Champion #4"), AwardType.WINNER) self.assertEqual(AwardHelper.parse_award_type("Championship Champion"), AwardType.WINNER) self.assertEqual(AwardHelper.parse_award_type("Championship Winner"), AwardType.WINNER) self.assertEqual(AwardHelper.parse_award_type("Winner"), AwardType.WINNER) self.assertEqual(AwardHelper.parse_award_type("Finalist #1"), AwardType.FINALIST) self.assertEqual(AwardHelper.parse_award_type("Division Finalist #2"), AwardType.FINALIST) self.assertEqual(AwardHelper.parse_award_type("Championship Finalist #3"), AwardType.FINALIST) self.assertEqual(AwardHelper.parse_award_type("Championship Finalist #4"), AwardType.FINALIST) self.assertEqual(AwardHelper.parse_award_type("Championship Finalist"), AwardType.FINALIST) self.assertEqual(AwardHelper.parse_award_type("Finalist"), AwardType.FINALIST) self.assertEqual(AwardHelper.parse_award_type("Dean's List Finalist #1"), AwardType.DEANS_LIST) self.assertEqual(AwardHelper.parse_award_type("Dean's List Finalist"), AwardType.DEANS_LIST) self.assertEqual(AwardHelper.parse_award_type("Dean's List Winner #9"), AwardType.DEANS_LIST) self.assertEqual(AwardHelper.parse_award_type("Dean's List Winner"), AwardType.DEANS_LIST) self.assertEqual(AwardHelper.parse_award_type("Dean's List"), AwardType.DEANS_LIST) self.assertEqual(AwardHelper.parse_award_type("Excellence in Design Award sponsored by Autodesk (3D CAD)"), AwardType.EXCELLENCE_IN_DESIGN_CAD) self.assertEqual(AwardHelper.parse_award_type("Excellence in Design Award sponsored by Autodesk (Animation)"), AwardType.EXCELLENCE_IN_DESIGN_ANIMATION) self.assertEqual(AwardHelper.parse_award_type("Excellence in Design Award"), AwardType.EXCELLENCE_IN_DESIGN) self.assertEqual(AwardHelper.parse_award_type("Dr. Bart Kamen Memorial Scholarship #1"), AwardType.BART_KAMEN_MEMORIAL) self.assertEqual(AwardHelper.parse_award_type("Media and Technology Award sponsored by Comcast"), AwardType.MEDIA_AND_TECHNOLOGY) self.assertEqual(AwardHelper.parse_award_type("Make It Loud Award"), AwardType.MAKE_IT_LOUD) self.assertEqual(AwardHelper.parse_award_type("Founder's Award"), AwardType.FOUNDERS) self.assertEqual(AwardHelper.parse_award_type("Championship - Web Site Award"), AwardType.WEBSITE) self.assertEqual(AwardHelper.parse_award_type("Recognition of Extraordinary Service"), AwardType.RECOGNITION_OF_EXTRAORDINARY_SERVICE) self.assertEqual(AwardHelper.parse_award_type("Outstanding Cart Award"), AwardType.OUTSTANDING_CART) self.assertEqual(AwardHelper.parse_award_type("Wayne State University Aim Higher Award"), AwardType.WSU_AIM_HIGHER) self.assertEqual(AwardHelper.parse_award_type("Delphi \"Driving Tommorow's Technology\" Award"), AwardType.DRIVING_TOMORROWS_TECHNOLOGY) self.assertEqual(AwardHelper.parse_award_type("Delphi Drive Tommorows Technology"), AwardType.DRIVING_TOMORROWS_TECHNOLOGY) self.assertEqual(AwardHelper.parse_award_type("Kleiner, Perkins, Caufield and Byers"), AwardType.ENTREPRENEURSHIP) self.assertEqual(AwardHelper.parse_award_type("Leadership in Control Award"), AwardType.LEADERSHIP_IN_CONTROL) self.assertEqual(AwardHelper.parse_award_type("#1 Seed"), AwardType.NUM_1_SEED) self.assertEqual(AwardHelper.parse_award_type("Incredible Play Award"), AwardType.INCREDIBLE_PLAY) self.assertEqual(AwardHelper.parse_award_type("People's Choice Animation Award"), AwardType.PEOPLES_CHOICE_ANIMATION) self.assertEqual(AwardHelper.parse_award_type("Autodesk Award for Visualization - Grand Prize"), AwardType.VISUALIZATION) self.assertEqual(AwardHelper.parse_award_type("Autodesk Award for Visualization - Rising Star"), AwardType.VISUALIZATION_RISING_STAR) self.assertEqual(AwardHelper.parse_award_type("Some Random Award Winner"), None) self.assertEqual(AwardHelper.parse_award_type("Random Champion"), None) self.assertEqual(AwardHelper.parse_award_type("An Award"), None) # Make sure all old regional awards have matching types with open('test_data/pre_2002_regional_awards.csv', 'r') as f: csv_data = list(csv.reader(StringIO.StringIO(f.read()), delimiter=',', skipinitialspace=True)) for award in csv_data: self.assertNotEqual(AwardHelper.parse_award_type(award[2]), None) # Make sure all old regional awards have matching types with open('test_data/pre_2007_cmp_awards.csv', 'r') as f: csv_data = list(csv.reader(StringIO.StringIO(f.read()), delimiter=',', skipinitialspace=True)) for award in csv_data: self.assertNotEqual(AwardHelper.parse_award_type(award[2]), None) # test 2015 award names with open('test_data/fms_api/2015_award_types.json', 'r') as f: for award in json.loads(f.read()): self.assertNotEqual(AwardHelper.parse_award_type(award['description']), None)
def parse(self, html): """ Parse the awards from USFIRST. """ html = html.decode( 'utf-8', 'ignore') # Clean html before feeding itno BeautifulSoup soup = BeautifulSoup(html) # Bad formatting on some pages makes this necessary trs1 = soup.findAll('tr', {'style': 'background-color:#D2D2FF;'}) trs2 = soup.findAll('tr', {'style': 'background-color:#FFFFFF;'}) trs = trs1 + trs2 awards_by_type = {} for tr in trs: tds = tr.findAll('td') name_str = unicode(self._recurseUntilString(tds[0])) award_type_enum = AwardHelper.parse_award_type(name_str) if award_type_enum is None: continue team_number = None try: team_number = self._recurseUntilString(tds[1]) except AttributeError: team_number = None if team_number and team_number.isdigit(): team_number = int(team_number) else: team_number = None awardee = None if award_type_enum in AwardType.INDIVIDUAL_AWARDS: try: awardee_str = self._recurseUntilString(tds[2]) if awardee_str: awardee = unicode(sanitize(awardee_str)) except TypeError: awardee = None if not awardee: # Turns '' into None awardee = None # an award must have either an awardee or a team_number if awardee is None and team_number is None: continue recipient_json = json.dumps({ 'team_number': team_number, 'awardee': awardee, }) if award_type_enum in awards_by_type: if team_number is not None: awards_by_type[award_type_enum]['team_number_list'].append( team_number) awards_by_type[award_type_enum]['recipient_json_list'].append( recipient_json) else: awards_by_type[award_type_enum] = { 'name_str': strip_number(name_str), 'award_type_enum': award_type_enum, 'team_number_list': [team_number] if team_number is not None else [], 'recipient_json_list': [recipient_json], } return awards_by_type.values(), False
def parse(self, html): """ Parse the awards from USFIRST. """ html = html.decode('utf-8', 'ignore') # Clean html before feeding itno BeautifulSoup soup = BeautifulSoup(html, convertEntities=BeautifulSoup.HTML_ENTITIES) # Bad formatting on some pages makes this necessary trs1 = soup.findAll('tr', {'style': 'background-color:#D2D2FF;'}) trs2 = soup.findAll('tr', {'style': 'background-color:#FFFFFF;'}) trs = trs1 + trs2 awards_by_type = {} for tr in trs: tds = tr.findAll('td') name_str = unicode(self._recurseUntilString(tds[0])) award_type_enum = AwardHelper.parse_award_type(name_str) if award_type_enum is None: continue team_number = None try: team_number = self._recurseUntilString(tds[1]) except AttributeError: team_number = None if team_number and team_number.isdigit(): team_number = int(team_number) else: team_number = None awardee = None if award_type_enum in AwardType.INDIVIDUAL_AWARDS: try: awardee_str = self._recurseUntilString(tds[2]) if awardee_str: awardee = unicode(sanitize(awardee_str)) except TypeError: awardee = None if not awardee: # Turns '' into None awardee = None # an award must have either an awardee or a team_number if awardee is None and team_number is None: continue recipient_json = json.dumps({ 'team_number': team_number, 'awardee': awardee, }) if award_type_enum in awards_by_type: if team_number is not None: awards_by_type[award_type_enum]['team_number_list'].append(team_number) awards_by_type[award_type_enum]['recipient_json_list'].append(recipient_json) else: awards_by_type[award_type_enum] = { 'name_str': strip_number(name_str), 'award_type_enum': award_type_enum, 'team_number_list': [team_number] if team_number is not None else [], 'recipient_json_list': [recipient_json], } return awards_by_type.values(), False