def replacementDict(tc, dc, cc, tr): MAX_PREDICATES = 3 repl = {} for i in range(len(tc)): repl["tag_%i"%i] = tc[i] repl["tag_%i_aan"%i] = a_or_an(tc[i]) if dc[i]: if len(dc[i].keys()) < MAX_PREDICATES: relVerbs = dc[i].keys() lemmas = [rc(dc[i][relVerb]) for relVerb in relVerbs] for j in range(MAX_PREDICATES - len(relVerbs)): useAgain = rc(dc[i].keys()) relVerbs.append(useAgain) lemmas.append(rc(dc[i][useAgain])) else: relVerbs = rs(dc[i].keys(), MAX_PREDICATES) lemmas = [rc(dc[i][relVerb]) for relVerb in relVerbs] zipped = zip(relVerbs, lemmas) for j in range(MAX_PREDICATES): repl["tag_%i_predicate_%i"%(i,j)] = " ".join(zipped[j]) dump = [] for rv in dc[i].keys(): for lemma in dc[i][rv]: dump.append("%s it %s %s." % (rc(tr), rv, lemma)) random.shuffle(dump) repl["tag_%i_dump"%i] = dump else: for j in range(MAX_PREDICATES): repl["tag_%i_predicate_%i"%(i,j)] = "remains unknown" repl["tag_%i_dump"%i] = [] return repl
def replacementDict(tc, dc, cc, tr): MAX_PREDICATES = 3 repl = {} for i in range(len(tc)): repl["tag_%i"%i] = tc[i] repl["tag_%i_aan"%i] = a_or_an(tc[i]) if dc[i]: if len(dc[i].keys()) < MAX_PREDICATES: relVerbs = dc[i].keys() lemmas = [rc(dc[i][relVerb]) for relVerb in relVerbs] for j in range(MAX_PREDICATES - len(relVerbs)): useAgain = rc(dc[i].keys()) relVerbs.append(useAgain) lemmas.append(rc(dc[i][useAgain])) else: relVerbs = rs(dc[i].keys(), MAX_PREDICATES) lemmas = [rc(dc[i][relVerb]) for relVerb in relVerbs] zipped = zip(relVerbs, lemmas) for j in range(MAX_PREDICATES): repl["tag_%i_predicate_%i"%(i,j)] = "%s %s" % (rc(zipped[j][0]), zipped[j][1]) dump = [] for rv in dc[i].keys(): for lemma in dc[i][rv]: dump.append("%s it %s %s." % (rc(tr), rc(rv), lemma)) random.shuffle(dump) repl["tag_%i_dump"%i] = dump else: for j in range(MAX_PREDICATES): repl["tag_%i_predicate_%i"%(i,j)] = "remains unknown" repl["tag_%i_dump"%i] = [] return repl
def explodeTag(tag): articleList = ["a", "an", "the"] candidates = defaultdict(list) normalizedTag = normalize(tag) start = "/c/en/"+normalizedTag.replace(" ", "_") try: edges = conceptNet(start)["edges"] except KeyError: edges = [] for edge in edges: endLemma = edge["end"].split("/")[-1].replace("_", " ") rel = edge["rel"] try: verbs, aan = relDict[rel] except KeyError: verbs = False else: unmodified = endLemma endLemma = verbConjugate(unmodified, rel, aan) if verbs and len(endLemma) > 1 and not unmodified in [tag, normalizedTag]: if aan and not startsWithCheck(endLemma, articleList): candidates[tuple(verbs)].append(a_or_an(endLemma)) else: candidates[tuple(verbs)].append(endLemma) return candidates
def chTitle(hi): htmlFile = open(APPPATH+'static/output/'+hi+'.html', 'r') html = htmlFile.read() htmlFile.close() soup = BeautifulSoup(html) text = "\n".join([unicode(i) for i in soup.p.contents]).replace("<br/>", "\n") s = parsetree(text) nounPhrases = [] for sentence in s: for chunk in sentence.chunks: if chunk.type == "NP": nounPhrases.append(chunk.string) selectNPs = rs([np for np in nounPhrases if not "&" in np], ri(1,2)) articles = ["a", "an", "the"] nps = [] for np in selectNPs: if startsWithCheck(np, articles): nps.append(np) else: nps.append(a_or_an(np)) if len(selectNPs) == 1: title = titlecase(nps[0]) elif len(selectNPs) == 2: title = titlecase(" and ".join(nps)) # elif len(selectNPs) == 3: # title = titlecase("%s, %s, and %s" % tuple(nps)) return title.encode('ascii', 'xmlcharrefreplace')
def explodeTag(tag): relDict = { "/r/RelatedTo": ["evokes", False], "/r/IsA": ["is", True], "/r/PartOf": ["appertains to", True], "/r/MemberOf": ["belongs to", True], "/r/HasA": ["has", True], "/r/UsedFor": ["is for", False], "/r/CapableOf": ["may", False], #"/r/AtLocation": [False, False], "/r/Causes": ["causes", True], "/r/HasSubevent": ["manifests", False], "/r/HasFirstSubevent": ["began with", True], "/r/HasLastSubevent": ["ends with", True], "/r/HasPrerequisite": ["requires", True], "/r/HasProperty": ["is", False], "/r/MotivatedByGoal": ["dreams of", False], "/r/ObstructedBy": ["struggles with", True], "/r/Desires": ["yearns for", False], "/r/CreatedBy": ["resulted from", True], "/r/Synonym": ["is also known as", True], "/r/Antonym": ["is not", True], "/r/DerivedFrom": ["is made from", True], "/r/TranslationOf": ["is known to some as", False], "/r/DefinedAs": ["remains", True] } articleList = ["a", "an", "the"] candidates = defaultdict(list) normalizedTag = normalize(tag) start = "/c/en/" + normalizedTag.replace(" ", "_") try: edges = conceptNet(start)["edges"] except KeyError: edges = [] for edge in edges: endLemma = edge["end"].split("/")[-1].replace("_", " ") rel = edge["rel"] try: verb, aan = relDict[rel] except KeyError: verb = False else: unmodified = endLemma endLemma = verbConjugate(unmodified, rel, aan) if verb and len(endLemma) > 1 and not unmodified in [ tag, normalizedTag ]: if aan and not startsWithCheck(endLemma, articleList): candidates[verb].append(a_or_an(endLemma)) else: candidates[verb].append(endLemma) return candidates
def explodeTag(tag): relDict = { "/r/RelatedTo": ["evokes", False], "/r/IsA": ["is", True], "/r/PartOf": ["appertains to", True], "/r/MemberOf": ["belongs to", True], "/r/HasA": ["has", True], "/r/UsedFor": ["is for", False], "/r/CapableOf": ["may", False], #"/r/AtLocation": [False, False], "/r/Causes": ["causes", True], "/r/HasSubevent": ["manifests", False], "/r/HasFirstSubevent": ["began with", True], "/r/HasLastSubevent": ["ends with", True], "/r/HasPrerequisite": ["requires", True], "/r/HasProperty": ["is", False], "/r/MotivatedByGoal": ["dreams of", False], "/r/ObstructedBy": ["struggles with", True], "/r/Desires": ["yearns for", False], "/r/CreatedBy": ["resulted from", True], "/r/Synonym": ["is also known as", True], "/r/Antonym": ["is not", True], "/r/DerivedFrom": ["is made from", True], "/r/TranslationOf": ["is known to some as", False], "/r/DefinedAs": ["remains", True] } articleList = ["a", "an", "the"] candidates = defaultdict(list) normalizedTag = normalize(tag) start = "/c/en/"+normalizedTag try: edges = conceptNet(start)["edges"] except KeyError: edges = [] for edge in edges: endLemma = edge["end"].split("/")[-1].replace("_", " ") rel = edge["rel"] try: verb, aan = relDict[rel] except KeyError: verb = False else: unmodified = endLemma endLemma = verbConjugate(unmodified, rel, aan) if verb and len(endLemma) > 1 and not unmodified in [tag, normalizedTag]: if aan and not startsWithCheck(endLemma, articleList): candidates[verb].append(a_or_an(endLemma)) else: candidates[verb].append(endLemma) return candidates