def get_context(url, matchtext, before, after): html = get_cached_url(url).read() textsegments = html_to_text(html) i = textsegments.find(matchtext) bigtext = textsegments[max(0, i - before):min(i + after, len(textsegments))] return trim_to_words(bigtext)
def find_pattern_matches(content, lowercontent, prefix): start = lowercontent.find(prefix, 0) matches = [] while start != -1: snip = trim_to_words(content[max(0, start - 1000):start + 1000]) matches.append(snip) start = lowercontent.find(prefix, start + 1) return matches
def find_pattern_matches(content,lowercontent,prefix): start = lowercontent.find(prefix,0) matches = [] while start != -1: snip = trim_to_words(content[max(0,start-1000):start+1000]) matches.append(snip) start = lowercontent.find(prefix,start+1) return matches
def get_context(url,matchtext,before,after): html = get_cached_url(url).read() textsegments = html_to_text(html) i = textsegments.find(matchtext) bigtext = textsegments[max(0,i-before):min(i+after,len(textsegments))] return trim_to_words(bigtext)
def trim_string(context,claimtext): context = messy_cleanup(context) pos = context.find(claimtext) shrunken = context[max(0,pos-100):min(pos+100,len(context))] return trim_to_words(shrunken)