def save(date, name_newspaper, words): for a_word in words: obj_newspaper = Newspaper.objects.get(name=name_newspaper) obj = Word( newspaper=obj_newspaper, _datetime=date, word=a_word, ) obj.save()
def undo_lowercase_words(_, __): Word.objects.all().delete() tweets = Tweet.objects.all() all_tokens = [] for tweet in tweets: tokens_from_tweet = word_tokenize(tweet.content) for token in tokens_from_tweet: if token not in all_tokens: all_tokens.append(token) for token in all_tokens: word = Word(word=token, frequency=-1) word.save()
def create_snippet(word, rawUrl, url, starttime, endtime): try: word_object = Word.objects.get(value=word) except models.Word.DoesNotExist: word_object = Word(value=word) word_object.save() audio_object = Audio.objects.get(url=rawUrl) new_Snippet = Snippet(word=word_object, audio=audio_object, start=starttime, end=endtime, url=url) new_Snippet.save() print "Saved \"%s\" Snippet to database" % word
def populate_words(_, __): tweets = Tweet.objects.all() all_tokens = [] for tweet in tweets: tokens_from_tweet = word_tokenize(tweet.content) for token in tokens_from_tweet: if token not in all_tokens: all_tokens.append(token) for token in all_tokens: word = Word( word=token, ) word.save()
def undo_stem_words(_, __): Word.objects.all().delete() tweets = Tweet.objects.all() all_tokens = [] for tweet in tweets: tokens_from_tweet = word_tokenize(str(tweet.content).lower()) for token in tokens_from_tweet: if token not in all_tokens: all_tokens.append(token) for token in all_tokens: word = Word( word=token, frequency=-1, ) if Word.objects.filter(word=token).count() is 0: word.save()
def create(name, description, words): """ Create a new category """ words_obj = [] if words: for word in words: words_obj.append(Word(word=word)) category = Category(name=name, description=description, words=words_obj) return category.save()
def wordPop(LANGUAGE_TO_POPULATE): ''' Populate the -Words- and -Lemmas- ''' fileName = ''.join(['data/langs/', LANGUAGE_TO_POPULATE, '.txt']) languageObject = Language.objects.get(name=LANGUAGE_TO_POPULATE) with open(fileName, "r", encoding="utf8") as wordData: for line in wordData: # Lemma Word Tagset - delimiter ('/t') rowContent = line.split('\t') # print(rowContent) if (len(rowContent) >= 3): # checks if line is valid tagsetName = rowContent[-1].rstrip() tagSetObject, created = TagSet.objects.get_or_create( name=tagsetName) lemmaName = rowContent[0] wordName = rowContent[1] allLabels = tagsetName.split( ";") # last block of words corrensponds to allLabels for currLabel in allLabels: try: currFeature = findFeature[currLabel.upper()] featObject = Feature.objects.get(name=currFeature) tagSetObject.features.add(featObject) except KeyError: print(f'{currLabel} - not exist') posName = findFeature[allLabels[0].upper()].rstrip() posObject = POS.objects.get(name=posName) # If lemma exists try: lemmaObject = Lemma.objects.get(name=lemmaName, pos=posObject.id, language=languageObject.id) # If not create a new one except Lemma.DoesNotExist: lemmaObject = Lemma(name=lemmaName) lemmaObject.language = languageObject lemmaObject.pos = posObject lemmaObject.save() # Finally create the word finally: wordObject = Word(name=wordName) wordObject.lemma = lemmaObject wordObject.tagset = tagSetObject wordObject.language = languageObject wordObject.save()
# load dic file from json format dictionary = json.load(dict_file) print("Loaded: reading file") word_objs = [] defs_objs = [] exam_objs = [] syno_objs = [] for entry in dictionary: print(f"getting word: {entry['word']}") # make the word objec word_obj = Word(w_id=entry['link_id'].lower(), word=entry['word'].lower(), url=entry['url'], etymology=entry['etymology'], notes=entry['notes']) # make the definition objects definitions = entry['definitions'] if isinstance(definitions, list): for definition in definitions: defs_objs.append(Definition(word=word_obj, definition=definition, syntax=None)) else: for key in definitions.keys(): for definition in definitions[key]: defs_objs.append(Definition(word=word_obj, definition=definition, syntax=key)) # make examples objects for example in entry["examples"]: exam_objs.append(Example(word=word_obj, example=example)) # put word into objs list word_objs.append(word_obj)
def mutate_and_get_payload(cls, root, info): word = Word() word.save() return IntroduceWord(word=word)