def find_spids_and_holding_dates(self,date_string,verbose,mtime_after): self.h = {} self.current_date = date_string filename = self.file_template % date_string if os.path.exists(filename): if mtime_after and get_file_mtime(filename) < mtime_after: return self.h # print filename self.parser.parse(filename) if verbose and len(self.h) == 0: print " Warning: no questions found in "+filename return self.h
# (c) Look through the Official Reports for questions that were # actually asked in the parliament. # # ------------------------------------------------------------------------ # First (a) the Business Bulletins: bulletin_prefix = "http://www.scottish.parliament.uk/business/businessBulletin/" bulletins_directory = "../../../parldata/cmpages/sp/bulletins/" bulletin_filenames = glob.glob( bulletins_directory + "day-*" ) bulletin_filenames.sort() for day_filename in bulletin_filenames: if modified and get_file_mtime(day_filename) < modified_after: continue m = re.search('(?i)day-(bb-(\d\d))_([ab]b-(\d\d)-(\d\d)-?(\w*)\.html?)$',day_filename) if not m: if verbose: print "Couldn't parse file %s" % ( day_filename ) continue subdir = m.group(1) two_digit_year = m.group(2) page = m.group(3) two_digit_month = m.group(4) two_digit_day = m.group(5) section = m.group(6)
# (c) Look through the Official Reports for questions that were # actually asked in the parliament. # # ------------------------------------------------------------------------ # First (a) the Business Bulletins: bulletin_prefix = "http://www.scottish.parliament.uk/business/businessBulletin/" bulletins_directory = "../../../parldata/cmpages/sp/bulletins/" bulletin_filenames = glob.glob(bulletins_directory + "day-*") bulletin_filenames.sort() for day_filename in bulletin_filenames: if modified and get_file_mtime(day_filename) < modified_after: continue m = re.search('(?i)day-(bb-(\d\d))_([ab]b-(\d\d)-(\d\d)-?(\w*)\.html?)$', day_filename) if not m: if verbose: print "Couldn't parse file %s" % (day_filename) continue subdir = m.group(1) two_digit_year = m.group(2) page = m.group(3) two_digit_month = m.group(4) two_digit_day = m.group(5) section = m.group(6)