Пример #1
0
    def checkColumns(self, eng_dict):
        """
                    This function checks if all of the words placed in columns of the board are in the eng_dict
                    :param eng_dict:(set) english dictionary (set of english words)
                    :return: boolean
        """

        s = len(self.board)
        col = 0
        while col < s:
            row = 0
            while row < s:
                if self.board[row][col] != '_':
                    new_word = ""
                    counter = 0
                    for i in range(row, s):     #searching for a whole word
                        if self.board[i][col] != '_':
                            new_word += self.board[i][col]
                            counter += 1
                        else:
                            if new_word != "" and len(new_word) > 1:  #if we found whole word
                                nw = Word(new_word)
                                #we are checking if it appears in the eng_dict set
                                if not nw.checkWord(eng_dict):        #using Word() method
                                    return False
                            row += counter
                            break
                row += 1
            col += 1
        return True
def insert_words(fv, hash_set):
    """
    -------------------------------------------------------
    Retrieves every Word in fv and inserts into
    a Hash_Set.
    -------------------------------------------------------
    Parameters:
        fv - the already open file containing data to evaluate (file)
        hash_set - the Hash_Set to insert the words into (Hash_Set)
    Returns:
        Each Word object in hash_set contains the number of comparisons
        required to insert that Word object from file_variable into hash_set.
    -------------------------------------------------------
    """
    array = []

    line = fv.readline()

    for line in fv:
        line = line.split(" ")
        for word in line:
            word = Word()
            if word not in array:
                word = word.lowercase()
                array.insert(0, word)

    hash_set = Hash_Set()
    hash_set.insert(array)
 def __init__(self, phrases, one_letter=False):
     list_p = phrases
     if len(phrases) > 0:
         #if without pos
         if type(phrases[0]) is not tuple:
             #don't allow one-letter words
             if not one_letter:
                 list_p = filter(lambda x: len(x) > 1, phrases)
                 list_p = itertools.chain(
                     *map(lambda x: x.split('_', 1), list_p))
             self.phrases = list(
                 set([Word(w) for w in list_p if w in english_wordlist]))
         #if basic knowledge - key words
         else:
             if not one_letter:
                 list_p = filter(lambda x: len(x[0]) > 1, phrases)
             self.phrases = [
                 Word(w[0], w[1]) for w in phrases
                 if w[0] in english_wordlist
             ]
         self.phrases = list(set(self.phrases))
         #print [ (w.name,w.pos) for w in self.phrases]
         forms = self.add_forms()
         self.phrases = list(set(self.phrases)) + list(set(forms))
         self.phrases = list(set(self.phrases))
     else:
         self.phrases = []
Пример #4
0
    def set(self, location, text):
        """
		Assign this sentence to the given text.
		
		Will automatically update child items (aka, words).
		
		:param location: set where this item is in the document
		:param text: set the text value
		"""
        self.location = location
        self.children = []
        # TODO: this tokenizer does not work well.
        #	see: https://github.com/armatthews/TokenizeAnything
        words = nltk.word_tokenize(text)
        locs = findTokenLocations(text, words, self.location[0])
        # TODO: this tokenizer doesn't work well either
        # looking into BLLIP, but this bug:
        #	https://github.com/BLLIP/bllip-parser/issues/48
        partsOfSpeech = nltk.pos_tag(
            words)  # where tokens is [(word,part_of_speech)]
        for i in range(len(partsOfSpeech)):
            p = partsOfSpeech[i]
            w = Word(self.doc, self, locs[1])
            w.set(locs[i], p[0], p[1])
            self.children.append(w)
Пример #5
0
 def word(self, symbol):
     sequence = []
     sequence.append(symbol)
     temp = Word(sequence)
     #todo: compute constraints and map
     #just make map the identity
     temp.map = AffineMap(identity(self.dim), System.makeZeros(self.dim))
     #next make the constraints
     constraintMatrix = []
     constraintVector = []
     for q in range(0, symbol.dim()-1): #cvxopt uses <= constraints by default
         newRow = System.makeZeros(symbol.dim())
         newRow[symbol.sigmaInv(q+1)] = -1.0
         newRow[symbol.sigmaInv(q)] = 1.0
         constraintMatrix.append(newRow)
         constraintVector.append(0.0)
     #for now, we are NOT going to use agents for 0 and 1 in lieu of 0 < x < 1 explicit constraint
     #actually, we can build these constraints into the solver
     #first cell greater than 0
     newRow = System.makeZeros(symbol.dim())
     newRow[symbol.sigmaInv(0)] = -1.0
     constraintMatrix.append(newRow)
     constraintVector.append(0.0)
     #second cell less than 1
     newRow = System.makeZeros(symbol.dim())
     newRow[symbol.sigmaInv(symbol.dim()-1)] = 1.0
     constraintMatrix.append(newRow)
     constraintVector.append(1.0)
     temp.set = ConvexSet(constraintMatrix, constraintVector)
     #separationConstraints if you want them
     #temp.set = ConvexSet.intersect(temp.set, self.separationConstraint)
     return temp
Пример #6
0
def parse_dict(path):
    words = []
    mode = ''
    valid = False

    with open(path, encoding="utf-8") as f:
        for line in f:
            item = line.split(';')
            if len(item) <= 2:
                if item[0] == '##NOUNS':
                    mode = 'n'
                elif item[0] == '##VERBS':
                    mode = 'v'
                elif item[0] == '##END':
                    if int(item[1]) == len(words):
                        valid = True
                        break
                else:
                    continue
            else:
                if mode == 'n':
                    words.append(Word.Noun(item[0], item[1], item[2], item[3], int(item[4])))
                elif mode == 'v':
                    words.append(Word.Verb(item[0], item[1], item[2], int(item[3])))
                else:
                    continue
    return words, valid
Пример #7
0
def Word_factory(name, category):
    '''
    Process all synsets of words before hash insertion
    '''
    word = Word(name=name, category=category)
    synsets = wn.synsets(name)
    for wn_synset in synsets:
        synset = Syn_factory(wn_synset)
        word.populate(synset)
    return word
Пример #8
0
 def incScore(self, word, coords):
     """
         This function increases player's score.
         :param word:(string) word player made up
         :param coords:(itertools deafaultdict) where key: coordinates of the letter (x,y) val: letter
         :return: void - only changing the object attribute
     """
     w = Word(word)
     plus_scr = w.score(
         coords
     )  #calculating score increase using Word() score method (it returns int)
     self.score += plus_scr
Пример #9
0
 def _find_hyponym(self, word, pool):
     try:
         if word.pos.startswith("N"):
             pool.nouns |= set(
                 [Word(w, "NN") for w in en.noun.hyponym(word.name)[0]])
         elif word.pos.startswith("V"):
             pool.verbs |= set(
                 [Word(w, "V") for w in en.verb.hyponym(word.name)[0]])
         elif word.pos.startswith("JJ"):
             pool.adjectives |= set([
                 Word(w, "JJ") for w in en.adjective.hyponym(word.name)[0]
             ])
     except:
         print "Couldn't find hyponyms"
Пример #10
0
    def read_file(self, file):
        #read in a single file from corpus
        #parse to sentence class and word class but fully document class
        doc_to_return = Document()
        lines = file.readlines()
        pre_sent = []
        sent_index = 0
        is_offset = 1
        doc_list = []
        for line in lines:
            if not line.startswith('#'):
                if line.isspace():
                    doc_to_return.sentences.append(
                        Sentence(pre_sent.copy(), sent_index))
                    pre_sent.clear()
                    sent_index += 1
                else:
                    entries = line.split()
                    # Set Doc Id
                    if not doc_to_return.docID:
                        doc_to_return.get_doc_id(entries[0])

                    # Construct word
                    word = Word()
                    word.set_entries(entries)
                    pre_sent.append(word)
                    # Create mapping for resolving difference in ISNotes offset and OntoNotes
                    #doc_to_return.coref_to_ISoffset[is_offset] = (doc_to_return.header, sent_index, int(entries[2]))
                    #is_offset += 1
            elif line.startswith('#begin'):
                doc_to_return = Document()
                doc_to_return.header = line
            else:
                doc_to_return.end = line
                doc_list.append(doc_to_return)
                sent_index = 0
                doc_to_return = Document()

        # for sent in doc_to_return.sentences:
        #     sent.get_names()  # May no longer be needed. since exists in make_mentions in sentence_init

        # Construct gold coreference clusters
        # for doc in doc_list:
        #     for sent in doc.sentences:
        #         for m in sent.mentions:
        #             if m.gold_cluster_id is not None:
        #                 doc.gold_clusters[m.gold_cluster_id].append(m)

        return doc_list
Пример #11
0
def add_screen():
    """
    add screen
    """
    word_to_add = input('(word, meaning)\n>>> ').split(', ')
    len_word = len(word_to_add)

    # the user introduced just the word, not the meaning/s
    if len_word == 1:
        app.add(Word(word_to_add[0]))
    else:
        #word with meaning/s
        app.add(Word(word_to_add[0], word_to_add[1:]))
    os.system('clear')
    main_screen()
Пример #12
0
    def test_LetterScoringHeuristicScoreWord_QUEEN_ReturnsFourteen(self):
        # Arrange
        expected = 14
        hand = []
        q = "Q"
        u = "U"
        e = "E"
        n = "N"
        tiles = []
        qTile = Tile(q)
        uTile = Tile(u)
        eTile = Tile(e)
        nTile = Tile(n)
        tiles.append(qTile)
        tiles.append(uTile)
        tiles.append(eTile)
        tiles.append(eTile)
        tiles.append(nTile)
        word = Word(tiles)
        heuristic = LetterScoringHeuristic(0)

        # Act
        result = heuristic.ScoreWord(word, hand)

        # Assert
        self.assertEqual(result, expected)
 def generate_phrase(self, pool):
     try:
         adj = choice(list(pool.adjectives))
         parser = ChartParser(self.grammar)
         gr = parser.grammar()
         phrase = self.produce(gr, gr.start())
         #adj = choice(list(pool.adjectives))
         noun = choice(list(pool.comparisons[adj]))
         if en.noun.plural(noun.name) == noun.name:
             article = "the"
         else:
             article = en.noun.article(noun.name).split(" ")[0]
         replace_words = {'adj': adj, 'n': noun, 'det': article}
         for pos in replace_words:
             while pos in phrase:
                 try:
                     phrase = self.replace_pos(pos, replace_words[pos],
                                               phrase)
                 except:
                     return
         for w in phrase:
             if not isinstance(w, Word):
                 phrase[phrase.index(w)] = Word(w)
         return phrase
     except:
         return
Пример #14
0
def loadWords(files):
    global C
    global dTerms
    C = int(input("Digite o valor do parâmetro C: "))

    words = {}
    i = 1
    for fil in files:
        f = open(fil, "r")
        dTerms[fil] = 0
        fileWords = {}
        for line in f:
            for word in re.split('; |, |\*|\n|;|!|\?|\.|\t| ', line):
                if len(word) < C:
                    continue
                theWord = un.unidecode(word.lower())
                if theWord not in words:
                    words[theWord] = wd.Word(theWord, len(files))
                if theWord not in fileWords:
                    fileWords[theWord] = theWord
                    dTerms[fil] += 1
                words[theWord].incrementOccurs(i)
        i = i + 1
        f.close()

    return words
Пример #15
0
 def _find_antonym(self, word, pool):
     antonyms = []
     try:
         if word.pos.startswith("N"):
             h = en.noun.antonym(word.name)
             antonyms = [Word(w, "NN") for w in h[0]]
         elif word.pos.startswith("V"):
             h = en.verb.antonym(word.name)
             antonyms = [Word(w, "V") for w in h[0]]
         elif word.pos.startswith("JJ"):
             h = en.adjective.antonym(word.name)
             antonyms = [Word(w, "JJ") for w in h[0]]
         pool.antonyms[word] = antonyms
     except:
         print "Couldn't find antonyms"
     return antonyms
Пример #16
0
def main():
    global debug, wildcard
    ''' Wildcard is represented as this character '''
    wildcard = '?'
    debug = False

    try:
        letters = sys.argv[1]
    except IndexError:
        print('Please enter letters')
        exit(0)
    ''' Get all the possible words '''
    words = getWords(letters)
    ''' Score all of the possible words '''
    scoredWords = []
    for i in words:
        s = w.Word(i, score(i))
        scoredWords.append(s)
    ''' Sort the words in decending order based on their points '''
    newlist = sorted(scoredWords, key=lambda i: i.points, reverse=True)

    for i in newlist:
        print(i.word + ': ' + str(i.points))
    if len(newlist) > 0:
        print('\nHighest point word: ' + newlist[0].word + ' (' +
              str(newlist[0].points) + ' points)')
        print('Total possible words: ' + str(len(newlist)))
    else:
        print('No possible words.')
    def autoSetWords(self, content, documents):
        for index, word in enumerate(content):
            word_obj = Word.Word(self.doc_id, self.hemistic_id, index, word,
                                 documents)
            self.words.append(word_obj)

        del self.content  # to lower object size
Пример #18
0
 def _find_hypernym(self, word, pool):
     hypernyms = []
     try:
         if word.pos.startswith("N"):
             h = en.noun.hypernym(word.name)
             hypernyms = [Word(w, "NN") for w in h[0]]
         elif word.pos.startswith("V"):
             h = en.verb.hypernym(word.name)
             hypernyms = [Word(w, "V") for w in h[0]]
         elif word.pos.startswith("JJ"):
             h = en.adjective.hypernym(word.name)
             hypernyms = [Word(w, "JJ") for w in h[0]]
         pool.hypernyms[word] = hypernyms
     except:
         print "Couldn't find hypernyms"
     return hypernyms
Пример #19
0
    def get_all_infixes(self, language):
        # Returns an array containing
        # All of the infixes of the given language
        if type(language) != str:
            raise TypeError("language should be of type str")
        elif re.match(".{3,}", language) is None:
            raise TypeError("language should be an ISO 639-3 code")
        else:
            db = dataset.connect(self.db_url)
            table = db["words"]

            table_results = db.query("SELECT * FROM words WHERE word LIKE '" +
                                     language + ": %-'")

            results = []

            for result in table_results:
                if re.match("^" + language + ":\s\-[^-]+\-$", result["word"]):
                    word = ComplexWord(
                        Word(re.sub(r'\n+', '', result['word']),
                             re.sub(r'\n+', '', result['relation']),
                             re.sub(r'\n+', '', result['related'])))
                    results.append(word)

            return results
Пример #20
0
    def find_word(self, to_find):
        # Returns an array of ComplexWords
        # Containing every entry for the given word
        if type(to_find) != str:
            raise TypeError("to_find should be of type str")
        elif re.match(".{2,}:\s*.+", to_find) is None:
            raise TypeError(
                "to_find should contain an ISO 639-3 code, a colon, and then the word"
            )
        else:
            db = dataset.connect(self.db_url)
            table = db["words"]

            table_results = table.find(word=to_find)

            results = []

            for result in table_results:
                results.append(
                    ComplexWord(
                        Word(re.sub(r'\n+', '', result['word']),
                             re.sub(r'\n+', '', result['relation']),
                             re.sub(r'\n+', '', result['related']))))

            return results
Пример #21
0
def generate_file(ham_dic, spam_dic, smooth, path):
    lines = []
    file = open(path, 'w')
    total_count_ham = 0
    total_count_spam = 0
    for value in ham_dic.values():
        total_count_ham = total_count_ham + value
    for value in spam_dic.values():
        total_count_spam = total_count_spam + value

    total_count_ham_smooth = total_count_ham + smooth * len(vocabulary)
    total_count_spam_smooth = total_count_spam + smooth * len(vocabulary)
    i = 1
    for word in sorted(vocabulary):

        word_in_ham_nosmooth = ham_dic.get(word, 0)
        word_in_spam_nosmooth = spam_dic.get(word, 0)
        word_in_ham_smooth = ham_dic.get(word, 0) + smooth
        word_in_spam_smooth = spam_dic.get(word, 0) + smooth

        word_prob_ham = word_in_ham_smooth / total_count_ham_smooth
        word_prob_spam = word_in_spam_smooth / total_count_spam_smooth


        line = str(i) + "  "+ word +"  " + str(word_in_ham_nosmooth) + "  " \
               + str(word_prob_ham) + "  "\
               + str(word_in_spam_nosmooth) + "  "+str(word_prob_spam) +'\n'
        w = Word(word, word_in_ham_smooth, word_prob_ham, word_in_spam_smooth,
                 word_prob_spam)
        lines.append(line)
        file.write(line)
        model[word] = w
        i = i + 1
Пример #22
0
    def test_LongestWordHeuristicScoreWord_FourLetterWord_ReturnsFour(self):
        # Arrange
        expected = 4
        hand = []
        f = "F"
        o = "O"
        u = "U"
        r = "R"
        tiles = []
        fTile = Tile(f)
        oTile = Tile(o)
        uTile = Tile(u)
        rTile = Tile(r)
        tiles.append(fTile)
        tiles.append(oTile)
        tiles.append(uTile)
        tiles.append(rTile)
        word = Word(tiles)
        heuristic = LongestWordHeuristic(0)

        # Act
        result = heuristic.ScoreWord(word, hand)

        # Assert
        self.assertEqual(result, expected)
 def findAllWordFrames(self):
     '''Finds all whole words in a frame given by the begIndex and endIndex'''
     wordStorer = []
     for textSource in self.textSources:
         #Try-except is substantially faster than if-else
         try:
             appData = self.cleanData[textSource.dictionaryKeyString]
             for ((time, (begIndex, endIndex)), (cps, scaling)) in appData:
                 #If endIndex=0, we haven't started yet
                 if endIndex == 0:
                     continue
                 #Raw string visible on screen at this point
                 textInFrame = textSource.text[begIndex:endIndex]
                 #List of partial words on screen at given time
                 partialsInFrame = self.splitString(textInFrame)
                 #Partial words with their word indices
                 annotatedPartials = self.mapIndexToWords(
                     begIndex, endIndex, partialsInFrame, textSource)
                 #Full words in frame, if enough partial word is in frame
                 fullWordsInFrame = self.checkBoundaryPartials(
                     textSource, begIndex, endIndex, annotatedPartials)
                 lowDataWords = []
                 #Words are low data because they only come from one frame
                 for (wordIndex, wordString) in fullWordsInFrame:
                     lowDataWord = Word(wordString, textSource, wordIndex,
                                        cps)
                     lowDataWords.append(lowDataWord)
                 wordStorer.append(lowDataWords)
         except KeyError:
             continue
     return wordStorer
Пример #24
0
    def no_homo(self, phone, w):
        '''
		True when adding the word with phone as nucleus would NOT create a homophone
		where a homophone is multiple meanings (word IDs) mapped to a single phonetic sequence

		example: speaker says "bed" with nucleus [epsilon],
				 listener applies deassimilation and finds a nearby phone [I]
						  which is mapped to "bid" already.
				 This would create a homophone where [b][I][d] is mapped to both
					  "[b][I][d]" and "[b][epsilon][d]".
				 The listener would not allow the micro-change,
					 so would either choose another nearby phone or add a new one,
					 i.e. not recognize the match

		Will always return True if the agent has 
		'''

        w_ons = w.onset
        w_cod = w.coda
        w_nuc = w.nucleus
        phone_n = phone.name
        pot_w = Word.Word(w_ons, w_nuc, w_cod)
        if (phone_n != w_nuc and pot_w.id in self.idio):
            return False

        return True
Пример #25
0
def clf_make(test):
    print(test)
    x = []
    y = []
    for grade in range(1, 4):
        for num in range(1, 127):
            if num % 10 == test:
                continue
            try:

                a = []
                f = open('paragraph/%d/%d.t' % (grade, num), 'r')
            except:
                continue

            text = f.read()
            text = text.replace('\n', ' ')
            a.extend(Word.Count([text]))
            a.extend(Vex.Count([text], fqc=True))
            sents = nltk.sent_tokenize(text)
            k = Len.tkn(sents)
            a.extend(k)
            a.extend(Len.Test(sents, range(2, 4)))

            f1 = open('paragraph/%d/%d.list' % (grade, num), 'w')
            json.dump(a, f1)
            x.append(a)
            y.append(grade - 1)

    clf_p = svm.SVC()
    clf_p.fit(x, y)
    f = open('clf_p_v.p', 'wb')
    pickle.dump(clf_p, f)
    f.close()
Пример #26
0
    def convert_sentence(self, sentence):

        sentence = sentence.replace('-', ' ')
        sentence = sentence.translate(str.maketrans('', '',
                                                    string.punctuation))
        converted_sentence = ''
        #gramatica = GramaticalClass(sentence)

        lista = [i.lower() for i in re.findall(r"[\w]+", sentence)]
        words = [Word(palavra, pos) for pos, palavra in enumerate(lista)]
        words = list(map(lambda x: x.find_tonic(), words))

        for num, word in enumerate(words):
            #word.setClass()
            #word.setBase()
            word.setIsExc()
            word.find_tonic()
            if (num != len(words) - 1):
                word.setIsSandi(words[num + 1])
            conversion = word.convert_word()
            if (word.getIsSandi()):
                converted_sentence += conversion
            else:
                converted_sentence += conversion + ' '

        return converted_sentence
Пример #27
0
 def tokenize_template(self):
     with open(self._template_file, 'r') as f:
         poem_template_text = f.read()
     template_split = poem_template_text.splitlines()
     template_tokenize = [nltk.word_tokenize(s) for s in template_split]
     template_words = [[Word(w) for w in l] for l in template_tokenize]
     return template_words
Пример #28
0
 def tokenize(self):
     words = re.finditer(r"\w+('\w)?", self._text)
     tokens = []
     start = 0
     for match in words:
         ignore_text = self._text[start:match.start()]
         if ignore_text:
             ignore = Word.Ignore(ignore_text)
             tokens.append(ignore)
         word = match.group(0)
         token = Word.Classic(word, character='_')
         tokens.append(token)
         start = match.end()
     ignore_text = self._text[start:]
     if ignore_text:
         ignore = Word.Ignore(ignore_text)
         tokens.append(ignore)
     return tokens
Пример #29
0
 def test_hint(self):
     word = Word.Classic('at', '*')
     self.assertEqual('*t', word.hint())
     self.assertEqual('at', word.hint())
     try:
         word.hint()
         self.fail()
     except Exception:
         pass
Пример #30
0
 def __init__(self, words=None):
     self.words = []
     if words is not None:
         if isinstance(words[0], Word):  #if list of Word
             self.words = copy.deepcopy(words)
         else:  #if list of strings
             for w in words:
                 self.words.append(Word(w))
     else:
         self.words = []
Пример #31
0
 def load(self):
     """
     Load the data from the csv file
     """
     with open(self.file_name) as csv_file:
         reader = csv.reader(csv_file)
         next(reader)
         for word in reader:
             self.add(Word(word[0], word[1].split(DELIMITER), word[2]))
         print('... data loaded ...')
Пример #32
0
def createDb():
  db = sqlite3.connect('./db/users.db')
  cursor = db.cursor()

  sql_create = " ".join((
          "CREATE TABLE followers (",
          "user_name text,",
          "screen_name text,",
          "image_url text,",
          "false_percent integer,",
          "profile_text text,",
          "followers_count integer,",
          "profile_image binary",
          ");"))
  sql_insert_f = " ".join((
                "INSERT INTO followers (",
                "user_name,",
                "screen_name,",
                "image_url,",
                "false_percent,",
                "profile_text,",
                "followers_count,",
                "profile_image",
                ") VALUES (?,?,?,?,?,?,?);"))
  cur = db.execute("SELECT * FROM sqlite_master WHERE type='table' and name='%s'" % "followers")
  if cur.fetchone()==None:  #存在してないので作る
    cursor.execute(sql_create)
    db.commit()
  else:
    cursor.execute("""DROP TABLE %s;""" % "followers")  #リセット
    cursor.execute(sql_create)

  # OAuth2.0用のキーを取得する
  with open("secret.json") as f:
    secretjson = json.load(f)

  # Twitterへの接続
  t = Twitter(auth=OAuth(secretjson["access_token"], secretjson["access_token_secret"], secretjson["consumer_key"], secretjson["consumer_secret"]))

  my_account = input('あなたのアカウント名>>>  @')
  my_account = "@" + my_account
  print(my_account)
  # 検索する
  try:
    followers = t.followers.list(screen_name=str(my_account), count=50)
  except:
    print("ERROR!!")
    exit()
  
  followers_list = []
  for x in followers['users']:
    user = {}
    user['name'] = x['name']
    user['screen_name'] = x['screen_name']
    user['image_url'] = x['profile_image_url_https']
    user['false_percent'] = 0
    user['profile_text'] = x['description']
    user['followers_count'] = x['followers_count']
    Image.saveImage(user['image_url'], user['screen_name'], "./pictures/followers")
    followers_list.append(user)
  
  for u in followers_list:
    same_names = t.users.search(q=u['name'],count=50)
    same_names = sorted(same_names, key=lambda x:x["followers_count"],reverse=True)
    same_name = u
    for s in same_names:
      if u['name'] == s['name']:
        same_name = s
        break
    print("[ユーザー名] " + u['name'])
  
  
    if u['name']==same_name['name']:
      if u['screen_name']==same_name['screen_name']:
        print('  [アカウント名] ' + u['name'] + '@' + u['screen_name'] + " " + str(u['followers_count']) + " [本物]\n")
        cursor.execute(sql_insert_f,(u['name'], u['screen_name'], u['image_url'], 0, u['profile_text'], u['followers_count'], None)) 
      else:
        print('  [アカウント名] ' + same_name['name'] + '@' + same_name['screen_name'] + " " + str(same_name['followers_count']) + " [本物?]")
        print('  [アカウント名] ' + u['name'] + '@' + u['screen_name'] + " " + str(u['followers_count']) + " [偽物かも?]")
      ######## 画像比較 ########
        print("  -> 画像判別:  ")
        Image.saveImage(same_name['profile_image_url_https'], same_name['screen_name'], "./pictures/same_name_users")
        image_match = Image.compareImage(u['screen_name'], same_name['screen_name'])
        print("    画像類似度" + str(image_match) + "%")
        u['false_percent']  = u['false_percent'] + image_match
      
      ######## 文字比較 ########
        print("  -> 文字判別:  ")
        word_match = Word.wordCompare(u['profile_text'],same_name['description'])
        u['false_percent']  = u['false_percent'] + word_match
        print("    プロフィール類似度: " + str(word_match) + "%")
      
        print("  -> その他:  未実装")

        print("なりすましアカウント率: " + str(u['false_percent']) + "%\n")
        cursor.execute(sql_insert_f,(u['name'], u['screen_name'], u['image_url'], u['false_percent'], u['profile_text'], u['followers_count'], None))

    print('---------------------------------------------------')
    pass
  db.commit()
  db.close()
Пример #33
0
def applyPostbase(word, postbase):
	""" add a postbase to a word """
	#TODO would be cool if you could pass a list of postbases in here and have it do the "right thing"
	exp = Base.explode(word)
	keepStrongCfinal = False
	# @ symbol?
	dropVCfinal = False
	attachIrregular = False

	#keep the final consonant
	plus = string.find(postbase, '+')
	if plus > -1:
		postbase = postbase[:plus] + postbase[plus + 1:]

	# FIXME need to check against words that contain '-' as a part of the word
	# FIXME this might cause trouble with enclitics
	# remove the last consonant
	minus = string.find(postbase, '-')
	if minus > -1:
		postbase = postbase[:minus] + postbase[minus + 1:]
		if not Word.isVowel(exp[-1]):
			exp.pop(-1)

	# remove final 'e'
	tilde = string.find(postbase, '~')
	if tilde > -1:
		postbase = postbase[:tilde] + postbase[tilde + 1:]
		if exp[-1] == 'e':
			exp.pop(-1)

	# choose between letters in parenthesis
	paren = string.find(postbase, '(')
	if paren > -1:
		pl = parenLetter(word, postbase)
		#FIXME, what if multiple parens
		parenOpen = string.find(postbase, '(')
		parenClose = string.find(postbase, ')') + 1

		postbase = postbase[:parenOpen] + pl + postbase[parenClose:]

	# add gemination if needed
	#FIXME not tested on words that contain 2 \' ...does such a word exist?
	apos = string.find(postbase, '\'')
	if apos > -1:
		postbase = postbase[:apos] + postbase[apos + 1:]

		# FIXME this may indicate that there's something that needs tweaked about the syllablematches
		# function. A short base is defined as [C]VCe, currently this only tests the end of the word.
		# this should match VCe and CVCe only
		shortA = len(exp) == 3 and Syllables.syllableMatches(exp, 'VCe')
		shortB = len(exp) == 4 and Syllables.syllableMatches(exp, 'CVCe')
		if shortA or shortB:
			exp.pop(-1)
			if Syllables.syllableCount(exp) == 1:
				exp.append('\'')
		elif exp[-1] == 'e':
			exp.pop(-1)

	# velar dropping suffixes
	colon = string.find(postbase, ':')
	if colon > -1:
		testsuf = exp[-1] + postbase
		testExp = Base.explode(testsuf)
		colon = testExp.index(':')
		velar = testExp[colon + 1]
		testExp = testExp[:colon] + testExp[colon + 1:]

		if Syllables.syllableMatches(testExp, 'CV' + velar + 'V'): #FIXME might crash if word isn't long enough
			testExp = Base.explode(postbase)
			colon = testExp.index(':')
			testExp.pop(colon)
			testExp.pop(colon)
		else:
			testExp = Base.explode(postbase)
			colon = testExp.index(':')
			testExp.pop(colon)

		postbase = ''.join(testExp)

	if postbase[0] == '÷':
		keepStrongCfinal = True

	if string.find(postbase, ':') > -1:
		dropVelar = True

	if postbase[0] == '- -':
		dropVCfinal = True

	if postbase[0] == '%':
		attachIrregular = True

	word = ''.join(exp)
	word = word + postbase

	#cleanup for words that wind up not needing the \' for gemination because they are followed by 2 vowels
	#FIXME not tested on words that contain 2 \' ...does such a word exist
	exp = Base.explode(word)
	try:
		gemmarker = exp.index('\'')
	except ValueError:
		gemmarker = -1
	if gemmarker > -1 and len(exp) >= gemmarker + 3:
		syl = exp[gemmarker + 1:gemmarker + 3]
		if Syllables.syllableMatches(syl, 'VV'):
			exp.pop(gemmarker)

	word = ''.join(exp)

	return word
Пример #34
0
    def concat(self, w1, trans):

        #trans uses order indices rather than innate indices
        currentPermutation = w1.lastInSequence()
        newWord = Word([])
        newWord.sequence.extend(w1.sequence)
        newWord.sequence.append(currentPermutation.transpose(trans))#append the result of the transposition?
        newWord.flips.extend(w1.flips)
        newWord.flips.append(trans)

        frequency = self.phi(currentPermutation) #frequency vector for current permutation

        #new maps
        if (trans == "R"):
            indexFront = 0
            indexBack = currentPermutation.sigmaInv(self.dim - 1)
            fback = frequency[indexBack]
            ffront = 0
            newMatrix = []
            newVector = []
            #cyclic reindex: there is none, we're using innate coordinates
            #just set the xsigmainvn-1 to 0
            for i in range(0, self.dim):#i is innate
                temp = System.makeZeros(self.dim)#zero vector
                if i != currentPermutation.sigmaInv(self.dim - 1):#everywhere except for oscillator hitting 1
                    assert(fback != 0)
                    freqRatio = frequency[i]/fback
                    temp[i] = 1
                    temp[indexBack] -= freqRatio
                    newMatrix.append(temp)
                    newVector.append(freqRatio)
                else: #the row of the oscillator hitting 1, set to zero
                    newMatrix.append(temp)#just zeros
                    newVector.append(0)


        else:
            indexBack = currentPermutation.sigmaInv(trans[0])
            indexFront = currentPermutation.sigmaInv(trans[1])
            fback = frequency[indexBack]
            ffront = frequency[indexFront]
            newMatrix = []
            newVector = System.makeZeros(self.dim)
            for i in range(0, self.dim):#i is innate
                freqRatio = frequency[i]/(fback - ffront)
                temp = System.makeZeros(self.dim)#zero vector
                temp[i] = 1
                temp[indexFront] += freqRatio
                temp[indexBack] -= freqRatio
                newMatrix.append(temp)
        newMap = AffineMap(newMatrix, newVector)

        newWord.map = AffineMap.compose(newMap, w1.map)#i think I should compose from left (check)


        #make row for mainTrans
        mainRow = System.makeZeros(self.dim)
        if trans == 'R':
            tBack = currentPermutation.sigmaInv(self.dim - 1)#innate
            mainRow[tBack] += -1/frequency[tBack]
            mainConstant = 1/frequency[tBack]
        else:
            tFront = currentPermutation.sigmaInv(trans[1])
            tBack = currentPermutation.sigmaInv(trans[0])
            mainRow[tBack] += -1/(frequency[tBack] - frequency[tFront])
            mainRow[tFront] += 1/(frequency[tBack] - frequency[tFront])
            mainConstant = 0

        transpositions = self.admissibleTranspositions(currentPermutation)
        constraintA = []
        constraintb = []
        for competingTrans in transpositions:
            if competingTrans != trans:
                if competingTrans == 'R':
                    ctBack = currentPermutation.sigmaInv(self.dim - 1)#innate
                    competingRow = System.makeZeros(self.dim)
                    competingRow[ctBack] = -1/frequency[ctBack]#temp and frequency use innate indices
                    competingConstant = 1/frequency[ctBack]

                else:
                    q = competingTrans[1]
                    ctBack = currentPermutation.sigmaInv(q-1)
                    ctFront = currentPermutation.sigmaInv(q)
                    competingRow = System.makeZeros(self.dim)#temp will use innate indices since it is used to make constraints
                    assert(frequency[ctBack] != frequency[ctFront])
                    competingRow[ctBack] = -1/(frequency[ctBack] - frequency[ctFront])
                    competingRow[ctFront] = 1/(frequency[ctBack] - frequency[ctFront])
                    competingConstant = 0

                constraintRow = []
                for i in range(0, self.dim):
                    constraintRow.append(mainRow[i] - competingRow[i])
                constraintConstant = competingConstant - mainConstant
                constraintA.append(constraintRow)
                constraintb.append(constraintConstant)




        #append zero row for empty set of constraints
        constraintA.append(System.makeZeros(self.dim))
        constraintb.append(0.0)

        constraintsAtTrans = ConvexSet(constraintA, constraintb)
        constraintsAt0 = ConvexGeometry.preImage(w1.map, constraintsAtTrans)
        newWord.set = ConvexSet.intersect(constraintsAt0, w1.set)

        return newWord
Пример #35
0
    def oldconcat(self, w1, w2, intPerm):
        #todo: handle case when things "passing" eachother travel the same speed
        phi = self.phi(w1.lastInSequence())
        intermediateMatrix = []
        intermediateVector = []
        midTraj = w1.lastInSequence()

        if intPerm == "R":
            #rotation case
            criticalIndex = midTraj.sigmaInv(self.dim-1)
            speedDifferential = phi[criticalIndex]
            if speedDifferential == 0:
                #return with null feasibility
                temp = Word([])
                temp.sequence = []
                temp.sequence.extend(w1.sequence)
                temp.sequence.extend(w2.sequence)
                return temp
            for i in range(0, self.dim):
                speedRatio = phi[i]/speedDifferential
                temp = System.makeZeros(self.dim)
                if i != criticalIndex:
                    temp[i] += 1
                    temp[criticalIndex] -= speedRatio
                    intermediateMatrix.append(temp)
                    intermediateVector.append(speedRatio)
                else:
                    intermediateMatrix.append(temp)
                    intermediateVector.append(speedRatio-1)
            print "intermediateMatrix"
            print intermediateMatrix
            print intermediateVector

        else:
            #transposition case
            speedDifferential = phi[midTraj.sigmaInv(intPerm[1])] - phi[midTraj.sigmaInv(intPerm[0])]
            if speedDifferential >= 0:
                #return with null feasibility
                temp = Word([])
                temp.sequence = []
                temp.sequence.extend(w1.sequence)
                temp.sequence.extend(w2.sequence)
                return temp
            for i in range(0, self.dim):
                speedDifferential = phi[midTraj.sigmaInv(intPerm[1])] - phi[midTraj.sigmaInv(intPerm[0])]
                speedRatio = phi[i]/(speedDifferential)
                temp = System.makeZeros(self.dim)
                temp[i] += 1
                temp[midTraj.sigmaInv(intPerm[0])] += speedRatio
                temp[midTraj.sigmaInv(intPerm[1])] -= speedRatio
                intermediateMatrix.append(temp)
            intermediateVector = System.makeZeros(self.dim)

        intermediateMap = AffineMap(intermediateMatrix, intermediateVector)

        temp = Word([])
        temp.sequence = []
        temp.sequence.extend(w1.sequence)
        temp.sequence.extend(w2.sequence)
        temp.flips.extend(w1.flips) #append flips of first piece
        temp.flips.append(intPerm) #for now, we know the middle flip
        temp.flips.extend(w2.flips) #append flips of second piece

        #transform intPerm into static indexing
        if intPerm == "R":
            loggedEvent = "R"
        else:
            index1 = w1.sequence[-1].permutation[intPerm[0]]
            index2 = w1.sequence[-1].permutation[intPerm[1]]
            loggedEvent = [index1,index2]
        temp.eventLog.extend(w1.eventLog)
        temp.eventLog.append(loggedEvent)
        temp.eventLog.extend(w2.eventLog)

        #todo: compute constraints and map
        temp.map = AffineMap.compose(intermediateMap,w1.map)
        temp.set = ConvexSet.intersect(w1.set, ConvexGeometry.preImage(temp.map, w2.set))
        temp.map = AffineMap.compose(w2.map, temp.map)
        return temp