示例#1
0
 def test_attending_members_invalid_data(self):
     # file does not exist
     with CommitteeMeetingProtocol.get_from_filename('/foo/bar/baz') as protocol:
         with self.assertRaises(IOError): protocol.find_attending_members([])
     # no text
     with CommitteeMeetingProtocol.get_from_text(None) as protocol:
         self.assertEqual([], protocol.find_attending_members([]))
    def extract_speakers_from_txt_file(self,file_object_path,committee_id,meeting_id):

        text = object_storage.read(self.s3, "committees", file_object_path).decode()


        with CommitteeMeetingProtocol.get_from_text(text) as protocol:
            speakers = protocol.speakers

        if speakers is not None:

            for speaker in speakers:
                yield {"committee_id":committee_id,
                       "meeting_id":meeting_id,
                       "name":speaker }
示例#3
0
 def find_attending_members(self, mks=None, mk_names=None):
     logger.debug('find_attending_members')
     if mks is None and mk_names is None:
         logger.debug('get_all_mk_names')
         mks, mk_names = get_all_mk_names()
     with KnessetDataCommitteeMeetingProtocol.get_from_text(self.protocol_text) as protocol:
         attended_mk_names = protocol.find_attending_members(mk_names)
         for name in attended_mk_names:
             i = mk_names.index(name)
             if not mks[i].party_at(self.date):  # not a member at time of this meeting?
                 continue  # then don't search for this MK.
             self.mks_attended.add(mks[i])
     logger.debug('meeting %d now has %d attending members' % (
         self.id,
         self.mks_attended.count()))
示例#4
0
    def create_protocol_parts(self, delete_existing=False, mks=None,
                              mk_names=None):
        """ Create protocol parts from this instance's protocol_text
            Optionally, delete existing parts.
            If the meeting already has parts, and you don't ask to
            delete them, a ValidationError will be thrown, because
            it doesn't make sense to create the parts again.
        """
        logger.debug('create_protocol_parts %s' % delete_existing)
        if delete_existing:
            ppct = ContentType.objects.get_for_model(ProtocolPart)
            annotations = Annotation.objects.filter(content_type=ppct,
                                                    object_id__in=self.parts.all)
            logger.debug(
                'deleting %d annotations, because I was asked to delete the relevant protocol parts on cm.id=%d' % (
                    annotations.count(), self.id))
            annotations.delete()
            self.parts.all().delete()
        else:
            if self.parts.count():
                raise ValidationError(
                    'CommitteeMeeting already has parts. delete them if you want to run create_protocol_parts again.')
        if not self.protocol_text:  # sometimes there are empty protocols
            return  # then we don't need to do anything here.
        if self.committee.type == 'plenum':
            create_plenum_protocol_parts(self, mks=mks, mk_names=mk_names)
            return
        else:
            def get_protocol_part(i, part):
                logger.debug('creating protocol part %s' % i)
                return ProtocolPart(meeting=self, order=i, header=part.header,
                                    body=part.body)

            with KnessetDataCommitteeMeetingProtocol.get_from_text(
                    self.protocol_text) as protocol:
                # TODO: use bulk_create (I had a strange error when using it)
                # ProtocolPart.objects.bulk_create(
                # for testing, you could just save one part:
                # get_protocol_part(1, protocol.parts[0]).save()
                list([
                         get_protocol_part(i, part).save()
                         for i, part
                         in
                         zip(range(1, len(protocol.parts) + 1), protocol.parts)
                         ])
            self.protocol_parts_update_date = datetime.now()
            self.save()
示例#5
0
 def create_protocol_parts(self, delete_existing=False, mks=None, mk_names=None):
     """ Create protocol parts from this instance's protocol_text
         Optionally, delete existing parts.
         If the meeting already has parts, and you don't ask to
         delete them, a ValidationError will be thrown, because
         it doesn't make sense to create the parts again.
     """
     logger.debug('create_protocol_parts %s'%delete_existing)
     if delete_existing:
         ppct = ContentType.objects.get_for_model(ProtocolPart)
         annotations = Annotation.objects.filter(content_type=ppct, object_id__in=self.parts.all)
         logger.debug(
             'deleting %d annotations, because I was asked to delete the relevant protocol parts on cm.id=%d' % (
                 annotations.count(), self.id))
         annotations.delete()
         self.parts.all().delete()
     else:
         if self.parts.count():
             raise ValidationError(
                 'CommitteeMeeting already has parts. delete them if you want to run create_protocol_parts again.')
     if not self.protocol_text:  # sometimes there are empty protocols
         return  # then we don't need to do anything here.
     if self.committee.type == 'plenum':
         create_plenum_protocol_parts(self, mks=mks, mk_names=mk_names)
         return
     else:
         def get_protocol_part(i, part):
             logger.debug('creating protocol part %s'%i)
             return ProtocolPart(meeting=self, order=i, header=part.header, body=part.body)
         with KnessetDataCommitteeMeetingProtocol.get_from_text(self.protocol_text) as protocol:
             # TODO: use bulk_create (I had a strange error when using it)
             # ProtocolPart.objects.bulk_create(
             # for testing, you could just save one part:
             # get_protocol_part(1, protocol.parts[0]).save()
             list([
                 get_protocol_part(i, part).save()
                 for i, part
                 in zip(range(1, len(protocol.parts)+1), protocol.parts)
             ])
         self.protocol_parts_update_date = datetime.now()
         self.save()
示例#6
0
def get_kns_committeesession_resource():
    for committeesession_row in kns_committeesession_resource:
        if ((not parameters.get("filter-meeting-id")
             or int(committeesession_row["CommitteeSessionID"])
             in parameters["filter-meeting-id"])
                and (not parameters.get("filter-committee-id")
                     or int(committeesession_row["CommitteeID"])
                     in parameters["filter-committee-id"])
                and (not parameters.get("filter-knesset-num")
                     or int(committeesession_row["KnessetNum"])
                     in parameters["filter-knesset-num"])):
            # text_file_name	                                            text_file_size
            # data/committees/meeting_protocols_text/files/5/7/570611.txt	72817
            if (committeesession_row["text_file_name"]
                    and committeesession_row["text_file_size"]
                    and committeesession_row["text_file_size"] > 0):
                protocol_text_url = "https://storage.googleapis.com/knesset-data-pipelines/{}".format(
                    committeesession_row["text_file_name"])
                text = requests.get(protocol_text_url).content.decode("utf-8")
                with CommitteeMeetingProtocol.get_from_text(text) as protocol:
                    committeesession_row.update(protocol.attendees)
            yield committeesession_row
示例#7
0
 def find_attending_members(self, mks=None, mk_names=None):
     logger.debug('find_attending_members')
     if mks is None and mk_names is None:
         logger.debug('get_all_mk_names')
         mks, mk_names = get_all_mk_names()
     try:
         with KnessetDataCommitteeMeetingProtocol.get_from_text(self.protocol_text) as protocol:
             attended_mk_names = protocol.find_attending_members(mk_names)
             for name in attended_mk_names:
                 i = mk_names.index(name)
                 if not mks[i].party_at(self.date):  # not a member at time of this meeting?
                     continue  # then don't search for this MK.
                 self.mks_attended.add(mks[i])
     except Exception:
         exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
         logger.debug("%s%s",
                      ''.join(traceback.format_exception(exceptionType,
                                                         exceptionValue,
                                                         exceptionTraceback)
                              ),
                      '\nCommitteeMeeting.id=' + str(self.id))
     logger.debug('meeting %d now has %d attending members' % (
         self.id,
         self.mks_attended.count()))
    def extract_attendees_from_txt_file(self,file_object_path,committee_id,meeting_id):

        text = object_storage.read(self.s3, "committees",file_object_path).decode()

        with CommitteeMeetingProtocol.get_from_text(text) as protocol:
            attendees = protocol.attendees

        if attendees is not None:

            for key in attendees.keys():
                for attendee in attendees[key]:
                    if key == "invitees":
                        yield {"committee_id":committee_id,
                               "meeting_id":meeting_id,
                               "name":attendee["name"],
                               "role":"invitees",
                               "additional_information":attendee["role"] if "role" in attendee.keys() else ""}

                    else:
                        yield {"committee_id":committee_id,
                               "meeting_id":meeting_id,
                               "name":attendee,
                               "role":key,
                               "additional_information":""}
示例#9
0
 def find_attending_members(self, mks=None, mk_names=None):
     logger.debug('find_attending_members')
     if mks is None and mk_names is None:
         logger.debug('get_all_mk_names')
         mks, mk_names = get_all_mk_names()
     try:
         with KnessetDataCommitteeMeetingProtocol.get_from_text(self.protocol_text) as protocol:
             attended_mk_names = protocol.find_attending_members(mk_names)
             for name in attended_mk_names:
                 i = mk_names.index(name)
                 if not mks[i].party_at(self.date):  # not a member at time of this meeting?
                     continue  # then don't search for this MK.
                 self.mks_attended.add(mks[i])
     except Exception:
         exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
         logger.debug("%s%s",
                      ''.join(traceback.format_exception(exceptionType,
                                                         exceptionValue,
                                                         exceptionTraceback)
                              ),
                      '\nCommitteeMeeting.id=' + str(self.id))
     logger.debug('meeting %d now has %d attending members' % (
         self.id,
         self.mks_attended.count()))
def process_row(row, row_index, spec, resource_index, parameters, stats):
    if spec['name'] == 'kns_committeesession':
        row.update(mks=None, invitees=None, legal_advisors=None, manager=None)
        if ((not parameters.get("filter-meeting-id") or int(
                row["CommitteeSessionID"]) in parameters["filter-meeting-id"])
                and
            (not parameters.get("filter-committee-id")
             or int(row["CommitteeID"]) in parameters["filter-committee-id"])
                and
            (not parameters.get("filter-knesset-num")
             or int(row["KnessetNum"]) in parameters["filter-knesset-num"])):
            if row["text_parsed_filename"]:
                new_cache_hash, old_cache_hash, cache_hash_path, cache_hash_row = None, None, None, None
                if os.environ.get('KNESSET_PIPELINES_DATA_PATH'):
                    m = BASE_HASH_OBJ.copy()
                    m.update(str(row['text_crc32c']).encode())
                    m.update(str(row['parts_crc32c']).encode())
                    new_cache_hash = m.hexdigest()
                    cache_hash_path = os.path.join(
                        os.environ['KNESSET_PIPELINES_DATA_PATH'],
                        'people/committees/meeting-attendees/cache_hash/{}.json'
                        .format(row["text_parsed_filename"]))
                    if os.path.exists(cache_hash_path):
                        with open(cache_hash_path) as f:
                            cache_data = json.load(f)
                            old_cache_hash = cache_data['hash']
                            cache_hash_row = cache_data['row']
                if cache_hash_path and old_cache_hash and old_cache_hash == new_cache_hash:
                    row.update(**cache_hash_row)
                else:
                    logging.info('getting attendees for meeting {}'.format(
                        row['CommitteeSessionID']))
                    text = None
                    if os.environ.get('KNESSET_PIPELINES_DATA_PATH'):
                        protocol_text_path = os.path.join(
                            os.environ['KNESSET_PIPELINES_DATA_PATH'],
                            'committees/meeting_protocols_text/{}'.format(
                                row["text_parsed_filename"]))
                        if os.path.exists(
                                protocol_text_path
                        ) and os.path.getsize(protocol_text_path) > 0:
                            with open(protocol_text_path) as f:
                                text = f.read()
                    else:
                        protocol_text_url = "https://storage.googleapis.com/knesset-data-pipelines/data/committees/" \
                                            "meeting_protocols_text/{}".format(row["text_parsed_filename"])
                        res = requests.get(protocol_text_url)
                        if res.status_code == 200:
                            text = res.content.decode("utf-8")
                    update_row = dict(mks=None,
                                      invitees=None,
                                      legal_advisors=None,
                                      manager=None)
                    if text:
                        with CommitteeMeetingProtocol.get_from_text(
                                text) as protocol:
                            attendees = protocol.attendees
                            if attendees:
                                update_row = dict(
                                    mks=attendees['mks'],
                                    invitees=attendees['invitees'],
                                    legal_advisors=attendees['legal_advisors'],
                                    manager=attendees['manager'],
                                    financial_advisors=attendees.get(
                                        'financial_advisors', []))
                                row.update(**update_row)
                    if cache_hash_path:
                        os.makedirs(os.path.dirname(cache_hash_path),
                                    exist_ok=True)
                        with open(cache_hash_path, 'w') as f:
                            json.dump(
                                {
                                    'hash': new_cache_hash,
                                    'row': update_row
                                }, f)
    return row