def redownload_protocol(committee_meeting): if committee_meeting.committee.type == 'plenum': download_for_existing_meeting(committee_meeting) else: with CommitteeMeetingProtocol.get_from_url( committee_meeting.src_url) as protocol: committee_meeting.protocol_text = protocol.text committee_meeting.protocol_text_update_date = datetime.now() committee_meeting.save()
def redownload_protocol(self): if self.committee.type == 'plenum': # TODO: Using managment command this way is an antipattern, a common service should be extracted and used from plenum.management.commands.parse_plenum_protocols_subcommands.download import \ download_for_existing_meeting download_for_existing_meeting(self) else: with KnessetDataCommitteeMeetingProtocol.get_from_url(self.src_url) as protocol: self.protocol_text = protocol.text self.protocol_text_update_date = datetime.now() self.save()
def redownload_protocol(self): if self.committee.type == 'plenum': # TODO: Using managment command this way is an antipattern, a common service should be extracted and used from plenum.management.commands.parse_plenum_protocols_subcommands.download import \ download_for_existing_meeting download_for_existing_meeting(self) else: try: with KnessetDataCommitteeMeetingProtocol.get_from_url( self.src_url) as protocol: self.protocol_text = protocol.text self.protocol_text_update_date = datetime.now() self.save() except AntiwordException as e: logger.error(e.message, exc_info=True, extra={'output': e.output}) raise e
def redownload_protocol(self): if self.committee.type == 'plenum': # TODO: Using managment command this way is an antipattern, a common service should be extracted and used from plenum.management.commands.parse_plenum_protocols_subcommands.download import \ download_for_existing_meeting download_for_existing_meeting(self) else: try: with KnessetDataCommitteeMeetingProtocol.get_from_url(self.src_url) as protocol: self.protocol_text = protocol.text self.protocol_text_update_date = datetime.now() self.save() except AntiwordException, e: logger.error( e.message, exc_info=True, extra={ 'output': e.output } ) raise e
def get_resource(): for row_num, row in enumerate(download_rows): logging.info("{} / {}".format(row_num, len(download_rows))) try: original_filename = os.path.join("files", str(row["GroupTypeID"]), str(row["DocumentCommitteeSessionID"])[0], str(row["DocumentCommitteeSessionID"])[1], str(row["DocumentCommitteeSessionID"]) + "." + row["ApplicationDesc"]) ext = os.path.splitext(original_filename)[1].lower() output_filename = "files/{}/{}/{}.{}".format(str(row["CommitteeSessionID"])[0], str(row["CommitteeSessionID"])[1], str(row["CommitteeSessionID"]), "csv" if parse_type == "parts" else "txt") if not files_limit or stats["parsed files"] < files_limit: if download_from_path: download_filename = "../data/committees/download_document_committee_session/" + original_filename if os.path.exists(download_filename): with open(download_filename, "rb") as f: with CommitteeMeetingProtocol.get_from_file(f) as protocol: parse_protocol(output_filename, protocol) else: logging.warning("missing download_filename {}".format(download_filename)) elif download_from_remote_storage: url = download_from_remote_storage + original_filename with CommitteeMeetingProtocol.get_from_url(url) as protocol: parse_protocol(output_filename, protocol) else: raise Exception("no valid download option") row.update(protocol_extension=ext, parsed_filename=output_filename) yield row except Exception as e: # there is a bug in knesset-data-python which prevents getting the error message from the exception # TODO: fix this bug error_message = "failed to parse CommitteeSessionID {}".format(row["CommitteeSessionID"]) # , str(e)) logging.exception(error_message) row.update(error=error_message) errors.append(row)
class CommitteeMeeting(BaseKnessetDataServiceFunctionObject): ORDERED_FIELDS = [ ("id", KnessetDataServiceSimpleField( 'Committee_Agenda_id', 'integer', "the primary key of committee meetings")), ("committee_id", KnessetDataServiceSimpleField( 'Committee_Agenda_committee_id', 'integer', "id of the committee (linked to Committee object)")), ("datetime", KnessetDataServiceSimpleField('committee_agenda_date', 'datetime', "date/time when the meeting started")), ("title", KnessetDataServiceSimpleField('title', 'string', "title of the meeting")), ("session_content", KnessetDataServiceSimpleField( 'committee_agenda_session_content', 'string', "seems like in some committee meetings, the title field is empty, in that case title can be taken from this field" )), ("url", KnessetDataServiceSimpleField('url', 'string', "url to download the protocol")), # a CommitteeMeetingProtocol object which allows to get data from the protocol # because parsing the protocol requires heavy IO and processing - we provide it as a generator # see tests/test_meetings.py for usage example ("protocol", KnessetDataServiceLambdaField( lambda obj, entry: CommitteeMeetingProtocol.get_from_url( obj.url, proxies=obj._proxies) if obj.url else None)), ("location ", KnessetDataServiceSimpleField( 'committee_location', 'string', "this seems like a shorter name of the place where meeting took place" )), ("place ", KnessetDataServiceSimpleField( 'Committee_Agenda_place', 'string', "this looks like a longer field with the specific details of where the meeting took place" )), ("meeting_stop ", KnessetDataServiceSimpleField( 'meeting_stop', 'string', "date/time when the meeting ended - this is not always available, in some meetings it's empty" )), ### following fields seem less interesting ### ("agenda_canceled ", KnessetDataServiceSimpleField('Committee_Agenda_canceled')), ("agenda_sub ", KnessetDataServiceSimpleField('Committee_agenda_sub')), ("agenda_associated ", KnessetDataServiceSimpleField('Committee_agenda_associated')), ("agenda_associated_id ", KnessetDataServiceSimpleField('Committee_agenda_associated_id')), ("agenda_special ", KnessetDataServiceSimpleField('Committee_agenda_special')), ("agenda_invited1 ", KnessetDataServiceSimpleField('Committee_agenda_invited1')), ("agenda_invite ", KnessetDataServiceSimpleField('sd2committee_agenda_invite')), ("note ", KnessetDataServiceSimpleField('Committee_agenda_note')), ("start_datetime ", KnessetDataServiceSimpleField('StartDateTime')), ("topid_id ", KnessetDataServiceSimpleField('Topic_ID')), ("creation_date ", KnessetDataServiceSimpleField('Date_Creation')), ("streaming_url ", KnessetDataServiceSimpleField('streaming_url')), ("meeting_start ", KnessetDataServiceSimpleField('meeting_start')), ("is_paused ", KnessetDataServiceSimpleField('meeting_is_paused')), ("date_order ", KnessetDataServiceSimpleField('committee_date_order')), ("date ", KnessetDataServiceSimpleField('committee_date')), ("day ", KnessetDataServiceSimpleField('committee_day')), ("month ", KnessetDataServiceSimpleField('committee_month')), ("material_id ", KnessetDataServiceSimpleField('material_id')), ("material_committee_id ", KnessetDataServiceSimpleField('material_comittee_id')), ("material_expiration_date ", KnessetDataServiceSimpleField('material_expiration_date')), ("material_hour ", KnessetDataServiceSimpleField('committee_material_hour')), ("old_url ", KnessetDataServiceSimpleField('OldUrl')), ("background_page_link ", KnessetDataServiceSimpleField('CommitteeBackgroundPageLink')), ("agenda_invited ", KnessetDataServiceSimpleField('Committee_agenda_invited')), ] @classmethod def _get_url_base(cls): return "http://online.knesset.gov.il/WsinternetSps/KnessetDataService/CommitteeScheduleData.svc/CommitteeAgendaSearch" @classmethod def get(cls, committee_id, from_date, to_date=None, proxies=None): """ # example usage: >>> from datetime import datetime # get all meetings of committee 1 from Jan 01, 2016 >>> CommitteeMeeting.get(1, datetime(2016, 1, 1)) # get all meetings of committee 2 from Feb 01, 2015 to Feb 20, 2015 >>> CommitteeMeeting.get(2, datetime(2015, 2, 1), datetime(2015, 2, 20)) """ params = { "CommitteeId": "'%s'" % committee_id, "FromDate": "'%sT00:00:00'" % from_date.strftime('%Y-%m-%d') } if to_date: params["ToDate"] = "'%sT00:00:00'" % to_date.strftime('%Y-%m-%d') return super(CommitteeMeeting, cls).get(params, proxies=proxies)