Exemplo n.º 1
0
def get_context(url, matchtext, before, after):
    html = get_cached_url(url).read()
    textsegments = html_to_text(html)
    i = textsegments.find(matchtext)
    bigtext = textsegments[max(0, i - before):min(i +
                                                  after, len(textsegments))]
    return trim_to_words(bigtext)
Exemplo n.º 2
0
def find_pattern_matches(content, lowercontent, prefix):
    start = lowercontent.find(prefix, 0)
    matches = []
    while start != -1:
        snip = trim_to_words(content[max(0, start - 1000):start + 1000])
        matches.append(snip)
        start = lowercontent.find(prefix, start + 1)
    return matches
Exemplo n.º 3
0
def find_pattern_matches(content,lowercontent,prefix):
	start = lowercontent.find(prefix,0)
	matches = []
	while start != -1:
		snip = trim_to_words(content[max(0,start-1000):start+1000])	
		matches.append(snip)		
		start = lowercontent.find(prefix,start+1)
	return matches
Exemplo n.º 4
0
def get_context(url,matchtext,before,after):
	html = get_cached_url(url).read()
	textsegments = html_to_text(html)
	i =  textsegments.find(matchtext)
	bigtext = textsegments[max(0,i-before):min(i+after,len(textsegments))]
	return trim_to_words(bigtext)
Exemplo n.º 5
0
def trim_string(context,claimtext):
	context = messy_cleanup(context)
	pos = context.find(claimtext)	
	shrunken = context[max(0,pos-100):min(pos+100,len(context))]
	return trim_to_words(shrunken)