Exemplos de find_legislation em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: lawcrunch.textanalysis.legislation

Método / Função: find_legislation

Exemplos em hotexamples.com: 2

find_legislation em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de lawcrunch.textanalysis.legislation.find_legislation em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Relacionados

ogq

select_unique_column

SearchQuery

insert_user

_validate_mac_address

Joueur

MyContact

rankagg

relpos2binaryrelpos

enable_usercase

Related in langs

PaginationDTOBuilder (PHP)

PkLess (PHP)

SceneNode (C#)

FileSystem (C#)

parse_file_identifier (C++)

writeReset (C++)

NewDeleteOptions (Go)

FindContracts (Go)

DataFlowElement (Java)

ExecutorService (Java)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_legislation.py Projeto: olibrook/lawcrunch

def test_find_legislation(text, expected): tokenized = tokenization.tokenize(text) tokenized_titles = ttls.find_titles(tokenized) # Check result matches our expected output legislation_matches = lgsln.find_legislation(tokenized_titles, text) assert legislation_matches == expected # Check match can be located in the original text for legislation_match in legislation_matches: category, match = legislation_match title, begin, end = match assert text[begin:end] == title

Exemplo n.º 2

0

Exibir arquivo

Arquivo: iclr.py Projeto: olibrook/lawcrunch

def get_meta(s): soup = bs4.BeautifulSoup(s, 'lxml') meta = {} case = soup.find(class_=u'case') meta['case'] = case.text parties = [x for x in case.children] separator = u" v " split = -1 for i, party in enumerate(parties): if isinstance(party, bs4.Tag) and party.text == separator: split = i left_parties = [unicode(x) for x in parties[:split]] right_parties = [unicode(x) for x in parties[split+1:]] meta['parties'] = { 'left': left_parties, 'right': right_parties, } meta['keywords'] = {} keywords = soup.find_all(class_=u'kw') for kw in keywords: words = kw.text.split(_EM_DASH) category = words[0].strip() others = words[1:] meta['keywords'][category] = [s.strip() for s in others] citations = soup.find_all(class_=u'ncit') if not len(citations) == 1: raise Exception(u'Could not parse any citations for this document') citation = citations[0] citations = [c.strip() for c in citation.text.split(";")] pattern = r"^\[\d\d\d\d\]\sWLR\s\(D\)" if not len(citations) > 0 and not re.match(pattern, citations[-1], re.UNICODE): raise Exception(u'Unable to parse citation for this document') else: neutral = citations[:-1] # Can be more than one iclr = citations[-1] # Only ever one for iclr publications meta['cite_as'] = { 'neutral': neutral, 'iclr': iclr, } judges_and_date = citation.find_next_sibling('p').text # Fields look reliably separated by newlines fields = [f.strip() for f in judges_and_date.split('\n')] # Colons carry no meaning and are commonly-mispelled twice fields = [f[:-2] if f.endswith(u'::') else f for f in fields] fields = [f[:-1] if f.endswith(u':') else f for f in fields] if not len(fields) == 3: raise Exception('Could not parse hearing details') court_abbr, judges, date = fields judges = [j.strip() for j in judges.split(',')] meta['hearing'] = { 'court_abbr': court_abbr, 'judges': judges, 'date': _date_to_js(_parse_date(date)), } # TODO: Reporter reporter_el = soup.find(class_=u'reporter') if reporter_el: reporter_str = reporter_el.text prefix = u'Reported by:' if reporter_str.startswith(prefix): reporter_str = reporter_str[len(prefix):] if reporter_str.endswith('.'): reporter_str = reporter_str[:-1] else: reporter_str = None def map_reporter(reporter): # Sometimes only the name is given reporter_fields = [f.strip() for f in reporter.split(',')] return { 'name': reporter_fields[0] if len(reporter_fields) > 0 else None, 'role': reporter_fields[1] if len(reporter_fields) > 1 else None, } if reporter_str is not None: reporters = [s.strip() for s in reporter_str.split('and')] reporters = map(map_reporter, reporters) else: reporters = [] meta['reporters'] = reporters # TODO: Appearances in court, these need proper parsing. They are tricky! appearances = soup.find_all(class_='hnote')[-1].find_next_sibling('p').text prefix = u'Appearances:' if not appearances.startswith(prefix): appearances = '' else: appearances = appearances[len(prefix):].strip() meta['appearances'] = appearances # TODO: Body of report paras = soup.find_all(class_=u'hnote') body = u'\n\n'.join(p.text for p in paras) meta['body'] = body # TODO: Cited cases/legislation # TODO: Check len(citations) == len(candidates) # A common inconsistency is a reference to an act in square # brackets, like this: "Under the [2004] Act". Normally square # brackets indicate a citation of case-law. tokenized = tokenization.tokenize(body) tokenized_titles = ttls.find_titles(tokenized) citations = ctns.find_citations(body) candidate_citations = ctns.weak_find_citations(body) legislation = lgs.find_legislation(tokenized_titles, body) candidate_legislation = lgs.find_candidate_legislation(body) meta['citations'] = list(set(citations)) meta['candidate_citations'] = candidate_citations meta['legislation'] = list(set(legislation)) meta['candidate_legislation'] = candidate_legislation # TODO: Which party won? return meta