def fetch_trigrams(): for tag in TARGET_TAGS: # This manual transfer to a list seems necessary, instead of interating # through the selected lines, due to an exception that is thrown # in the latter case. tag_pairs = [] for bg in Bigram.select().where(Bigram.tag1 == tag): tag_pairs.append((bg.tag1, bg.tag2)) for pair in tag_pairs: resp = session.get(make_url([pair[0], pair[1]]), params={ 'pagesize': 100, 'site': 'stackoverflow', }) respJson = resp.json() if 'items' not in respJson.keys(): continue for i in respJson['items']: tags = [pair[0], pair[1], i['name']] tags_ord = sorted(tags) try: tg = Trigram.create(tag1=tags_ord[0], tag2=tags_ord[1], tag3=tags_ord[2]) except peewee.IntegrityError: tg = Trigram.get( Trigram.tag1 == tags_ord[0], Trigram.tag2 == tags_ord[1], Trigram.tag3 == tags_ord[2], ) tg.count = i['count'] tg.save()
def fetch_bigrams(): for tag in TARGET_TAGS: resp = session.get(make_url([tag]), params={ 'pagesize': 100, 'site': 'stackoverflow', }) respJson = resp.json() for i in respJson['items']: bg, _ = Bigram.get_or_create(tag1=tag, tag2=i['name']) bg.count = i['count'] bg.save()
# convert variables to the right types xloc = float(xloc) yloc = float(yloc) loc = (xloc, yloc) parse = ParentedTree.parse(parse) modparse = ParentedTree.parse(modparse) # how many ancestors should the sampled landmark have? num_ancestors = count_lmk_phrases(modparse) - 1 # sample `args.iterations` times for each sentence for _ in xrange(args.iterations): lmk, rel = get_meaning(loc, num_ancestors) if args.verbose: print "utterance:", repr(sentence) print "location: %s" % repr(loc) print "landmark: %s (%s)" % (lmk, lmk_id(lmk)) print "relation: %s" % rel_type(rel) print "parse:" print parse.pprint() print "modparse:" print modparse.pprint() print "-" * 70 location = Location(x=xloc, y=yloc) save_tree(modparse, location, rel, lmk) Bigram.make_bigrams(location.words) Trigram.make_trigrams(location.words) session.commit()
assert(not isinstance(rel, tuple)) if args.verbose: print 'utterance:', repr(sentence) print 'location: %s' % repr(loc) print 'landmark: %s (%s)' % (lmk, lmk_id(lmk)) print 'relation: %s' % rel_type(rel) print 'parse:' print parse.pprint() print 'modparse:' print modparse.pprint() print '-' * 70 location = Location(x=xloc, y=yloc) save_tree(modparse, location, rel, lmk) Bigram.make_bigrams(location.words) Trigram.make_trigrams(location.words) if i % 200 == 0: session.commit() for sentence,(parse,modparse) in unique_sentences.items(): SentenceParse.add_sentence_parse_blind(sentence, parse, modparse) session.commit() print 'counting ...' # count words w1 = aliased(Word) w2 = aliased(Word) parent = aliased(Production)