Python Utilities.contains_numbers示例

编程语言: Python

类/类型: Utilities

方法/功能: contains_numbers

hotexamples.com的示例: 2

Python Utilities.contains_numbers - 已找到2个示例。这些是从开源项目中提取的最受好评的Utilities.contains_numbers 来自程序包 facebook_page_scraper现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

format_CIK(13)

write_result_to_file(10)

AverageMeter(9)

mkdir(9)

tokenizeFile(7)

init_distribution(7)

get_err_from_predict(7)

sanitize_filing_year(6)

failExecution(6)

setup_db(5)

printFrequencies(5)

connect(5)

replace_zero_label_with_neg_one(5)

insert_entries(5)

pre_compute_threshes(4)

getIndentSize(4)

print_to_file(3)

findNextNonWhiteSpaceCharIndex(3)

getPyQt4ModulesDirectory(3)

get_alpha_numeric_count(3)

pwDecode(3)

get_random_name(3)

get_suffix(3)

printInfo(3)

rmTree(3)

is_CIK_valid(3)

parse_time(3)

isInsideTextLiteral(3)

get_prefix(3)

check_number(3)

check_capital(3)

check_bar(3)

get_f_ranking_from_predictions(2)

from_dungeon_level(2)

rot_center(2)

pwEncode(2)

get_auc_from_predict(2)

BhattacharyaCoeff(2)

checkBlacklistedVersions(2)

maximalElements(2)

collateFrequencies(2)

chat(2)

pre_compute_threshes_uci(2)

pre_compute_threshes_8news(2)

character_counter(2)

isSubList(2)

is_inside_frustum(2)

listMerge(2)

loadAll(2)

normalise_plurk_id(2)

示例#1

显示文件

文件： headervalidity.py 项目： mchrzanowski/SEC10KParser

def check_whether_header_is_valuable(location, hits):
    
    header = get_header_of_chunk(location, hits)
    
    #print "CHECKING HEADER:", header
    
    # header *has* to contain some special keywords.
    contains_keyword = False    
    for word in header:
        if re.match(lfp.headerpatternrepository.get_pattern_of_headers_we_want(), word):
            contains_keyword = True
            #print word, header
            break
    
    if not contains_keyword:
        return False

    # now check for common bigrams that we don't want.
    compressed_header = ''.join(header)
    
    #print header
    #print compressed_header
    
    if re.search("Debt|Other|Environmental|Proceeding", compressed_header, re.I) and \
    not re.search("Litigation|Contingenc|Commitment|" + \
                      "Contigencies|Legal|Subsequent", compressed_header, re.I):
        return False
    
    for regex in lfp.headerpatternrepository.get_patterns_of_headers_we_dont_want():
        if re.search(regex, compressed_header):
            #print "MATCH ON BAD REGEX"
            return False
    
    # we only want subsequent event headers; nothing more.
    if re.search("S[uU][bB][sS][eE][qQ][uU][eE][nN][tT]", compressed_header) \
    and not re.search("Subsequent.*?Event", compressed_header, re.I):
        #print "match on sub"
        return False
    
    # first words are never numbers.
    if Utilities.contains_numbers(header[0]) \
    or (len(header) >= 2 and Utilities.contains_numbers(header[1])):
        #print "MATCH ON NUMBER"
        return False
    
    # does it have a letter in parentheses that is not "A"? 
    # if so, forget it.
    if re.search("\([B-Zb-z]\)", compressed_header):
        return False
    
    return True

示例#2

显示文件

文件： tokenvalidity.py 项目： mchrzanowski/SEC10KParser

def was_cut_within_a_table(location, hits):
            
    last_sentence_fragment = lfp.wordtokencreation.get_last_sentence_fragment(location, hits)
    
    if last_sentence_fragment is None:
        return False
    
    compressed_sentence_fragment = lfp.wordtokencreation.get_last_sentence_fragment(location, hits, return_as_string=True)

    #print "FRAGMENT:", compressed_sentence_fragment

    # see whether we picked up a table. 
    # tables normally have units of currency as well as the word follows somewhere.
    # if these hold, then we're probably in a table from a previous section.
    # that means that if we're in a relevant section right now, and the new hit demarcates a new section,
    # then we want to stop recording. if we're not recording, then we probably want to start.
    # if we're in a relevant section and the new hit does *not* have a header that's been whitelisted as being
    # a section, then we can continue recording.
    if re.search("(in)?\s*(millions|thousands|billions)", compressed_sentence_fragment, re.I | re.M | re.S) \
    and re.search("total|follow(s|ing)|balance", compressed_sentence_fragment, re.I | re.M | re.S):
        #print "MATCH ON currency"
        #print 'MATCH ON FOLLOWS|total'
        return True
        
    char_frequency = Utilities.character_counter(compressed_sentence_fragment, '$')
    
    if char_frequency['$'] >= 6:
        #print 'MATCH ON DOLLAR COUNT'
        return True
    
    number_count = 0
    for word in last_sentence_fragment:
        if Utilities.contains_numbers(word):
            number_count += 1
            
    if re.search("total|follow(s|ing)|balance", compressed_sentence_fragment, re.I | re.M | re.S) \
    and number_count >= 6:
        #print "match on number count"
        return True
    
    return False