示例#1
0
def main():
    """
    Demonstration of the Soundex module, creating lists of name pairs
    and running them through the soundex method before printing results.
    """

    print("-----------------")
    print("| codedrome.com |")
    print("| Soundex       |")
    print("-----------------\n")

    names1 = [
        "Johnson", "Adams", "Davis", "Simons", "Richards", "Taylor", "Carter",
        "Stevenson", "Taylor", "Smith", "McDonald", "Harris", "Sim",
        "Williams", "Baker", "Wells", "Fraser", "Jones", "Wilks", "Hunt",
        "Sanders", "Parsons", "Robson", "Harker"
    ]

    names2 = [
        "Jonson", "Addams", "Davies", "Simmons", "Richardson", "Tailor",
        "Chater", "Stephenson", "Naylor", "Smythe", "MacDonald", "Harrys",
        "Sym", "Wilson", "Barker", "Wills", "Frazer", "Johns", "Wilkinson",
        "Hunter", "Saunders", "Pearson", "Robertson", "Parker"
    ]

    namecount = len(names1)

    for i in range(0, len(names1)):

        s1 = soundex.soundex(names1[i])
        s2 = soundex.soundex(names2[i])

        print("{:20s}{:4s}  {:20s}{:4s}".format(names1[i], s1, names2[i], s2))
示例#2
0
def similar(word="hallo", filename="/usr/share/dict/words"):
    checkaskiiword = lambda word: reduce(
        lambda x, y: x and y, list(map(lambda x: ord(x) < 128, word)))
    allasciiwords = list(filter(checkaskiiword, open(filename).read().split()))
    wontedsoundex = soundex(word)
    result = list(filter(lambda w: soundex(w) == wontedsoundex, allasciiwords))
    print(result)

    return result
示例#3
0
def soundex_distance(ovv_snd,cand):
    try:
        lev = Levenshtein.distance(unicode(ovv_snd),soundex.soundex(cand.decode("utf-8","ignore")))
    except UnicodeEncodeError:
        print('UnicodeEncodeError[ovv_snd]: %s %s' % (ovv_snd,cand))
        lev = Levenshtein.distance(ovv_snd,soundex.soundex(cand.encode("ascii","ignore")))
    except UnicodeDecodeError:
        print('UnicodeDecodeError[ovv_snd]: %s %s' % (ovv_snd,cand))
        lev = Levenshtein.distance(ovv_snd,soundex.soundex(cand.decode("ascii","ignore")))
    except TypeError:
        print ('TypeError[ovv_snd]: %s %s' % (ovv_snd,cand))
        lev = 10.
    snd_dis = lev
    return snd_dis
示例#4
0
 def test_allFeatures(self):
     self.assertEqual('T522', soundex('Tymczak'))
     self.assertEqual('A261', soundex('Ashcraft'))
     self.assertEqual('A261', soundex('Ashcroft'))
     self.assertEqual('P236', soundex('Pfister'))
     self.assertEqual('R150', soundex('Rubin'))
     self.assertEqual('R163', soundex('Robert'))
     self.assertEqual('R163', soundex('Rupert'))
     self.assertEqual('H555', soundex('Honeyman'))
示例#5
0
def main(args):
    if len(args) < 2:
        print("RTFM")
        exit
    file = open(args[1], "r")
    words = file.readlines()
    for word in words:
        cleanWord = ""
        for letter in word:
            if letter in list(string.ascii_letters):
                cleanWord = cleanWord + letter

        if soundex(args[0]) == soundex(cleanWord):
            print(cleanWord)

    file.close()
    pass
示例#6
0
 def gen_key(self,val):
     if self.use_soundex:
         try:
             return soundex.soundex(val)
         except UnicodeEncodeError:
             return val
     else:
         return val
示例#7
0
def main():

    

    
    names1 = ["Johnson", "Adams", "Davis", "Simons", "Richards", "Taylor", "Carter", "Stevenson", "Taylor", "Smith", "McDonald", "Harris", "Sim", "Williams", "Baker", "Wells", "Fraser", "Jones", "Wilks", "Hunt", "Sanders", "Parsons", "Robson", "Harker"]

    names2 = ["Jonson", "Addams", "Davies", "Simmons", "Richardson", "Tailor", "Chater", "Stephenson", "Naylor", "Smythe", "MacDonald", "Harrys", "Sym", "Wilson", "Barker", "Wills", "Frazer", "Johns", "Wilkinson", "Hunter", "Saunders", "Pearson", "Robertson", "Parker"]

    namecount = len(names1)

    for i in range(0, len(names1)):

        s1 = soundex.soundex(names1[i])
        s2 = soundex.soundex(names2[i])

        print("{:20s}{:4s}  {:20s}{:4s}".format(names1[i], s1, names2[i], s2))
示例#8
0
    def __init__(self, dbstate, uistate, options_class, name, callback=None):
        self.label = _('SoundEx code generator')
        tool.Tool.__init__(self, dbstate, options_class, name)
        ManagedWindow.ManagedWindow.__init__(self, uistate, [], self.__class__)

        self.glade = Glade()
        self.glade.connect_signals({
            "destroy_passed_object": self.close,
            "on_help_clicked": self.on_help_clicked,
            "on_delete_event": self.close,
        })

        window = self.glade.toplevel
        self.set_window(window, self.glade.get_object('title'), self.label)

        self.value = self.glade.get_object("value")
        self.autocomp = self.glade.get_object("name_list")
        self.name = self.autocomp.child

        self.name.connect('changed', self.on_apply_clicked)

        names = []
        person = None
        for person in self.db.iter_people():
            lastname = person.get_primary_name().get_surname()
            if lastname not in names:
                names.append(lastname)

        names.sort()

        AutoComp.fill_combo(self.autocomp, names)

        if person:
            n = person.get_primary_name().get_surname()
            self.name.set_text(n)
            try:
                se_text = soundex.soundex(n)
            except UnicodeEncodeError:
                se_text = soundex.soundex('')
            self.value.set_text(se_text)
        else:
            self.name.set_text("")

        self.show()
示例#9
0
文件: phone.py 项目: sh1mmer/a2bot
def lookup(name):
    if not data:
        load_data()
    name = name.lower()
    # Start with SOUNDEX match.
    idx = soundex.soundex(name)
    d = data.get(idx, {})
    l = []
    # Narrow to best match.
    for k, v in d.iteritems():
        for x in [ k, v.get('E-mail', ''), v.get('Username', '') ]:
            x = x.lower()
            if name in x:
                l.append(v)
                break
    # If no best match, return all.
    if not l:
        l = d.values()
    return l
示例#10
0
def checksimilar(filename="/usr/share/dict/words"):
    checkaskiiword = lambda word: reduce(
        lambda x, y: x and y, list(map(lambda x: ord(x) < 128, word)))
    allasciiwords = list(filter(checkaskiiword, open(filename).read().split()))

    A = {}
    for i in range(0, len(allasciiwords)):
        sdx = soundex(allasciiwords[i])
        if sdx in A:
            #A[sdx].append(allasciiwords[i])
            A[sdx] = A[sdx] + 1
        else:
            A[sdx] = 1
    A_sorted = sorted(A.items(), key=lambda x: x[1], reverse=True)
    # print(A_sorted)
    print("Top 10:")
    for i in range(0, 10):
        print(A_sorted[i][0], "klingt:", A_sorted[i][1], "mal !")

    return A_sorted
def main(args):
    if len(args) < 1:
        print("RTFM")
        exit
    file = open(args[0], "r")
    words = file.readlines()

    soundexDict = {}

    for word in words:
        cleanWord = "".join([l for l in word if l in string.ascii_letters])

        sndX = soundex(cleanWord)

        soundexDict[sndX] = soundexDict.get(sndX, []) + [cleanWord]

    wordList = sorted(soundexDict.items(), key=lambda t: -len(t[1]))[0]

    print(wordList)

    file.close()
    pass
示例#12
0
 def test_upperchars(self):
     "also uppercase characters"
     self.assertEqual("s53200", soundex("Soundex"))
     self.assertEqual("s53200", soundex("soUNDeggs"))
     self.assertEqual("f46140", soundex("fLuRbEl"))
示例#13
0
for (projectPair) in projectPairs:
    RFname = projectPair[0]
    RGname = projectPair[1]
    RFurl = projectPair[2]
    RGurl = projectPair[3]

    # lowercase everything
    RFnameLC = RFname.lower()
    RGnameLC = RGname.lower()
    RFurlLC = RFurl.lower()
    RGurlLC = RGurl.lower()

    # calculate string metrics
    levNames = edit_distance(RFnameLC, RGnameLC)
    levURLs = edit_distance(RFurlLC, RGurlLC)
    soundexRFname = soundex(RFnameLC)
    soundexRGname = soundex(RGnameLC)

    # is the RF project name inside the RG project name?
    if RFnameLC in RGnameLC:
        rf_in_rg = 1
    else:
        rf_in_rg = 0

    # is the RF project name inside the RG project URL?
    if RFnameLC in RGurl:
        rf_in_rgurl = 1
    else:
        rf_in_rgurl = 0

    # is any dev on the RF candidate in the dev list for the RG candidate?
示例#14
0
 def on_apply_clicked(self, obj):
     try:
         se_text = soundex.soundex(unicode(obj.get_text()))
     except UnicodeEncodeError:
         se_text = soundex.soundex('')
     self.value.set_text(se_text)
示例#15
0
def main(args):
	override=False	# par défaut, ne pas recharger les fichiers locaux (si ils existent)
	for arg in args:
		if arg=="-download" : 
			override=True
			
	print "--------------------------------------------------------------"
	print botName,botVersion
	
	print "Analyse COG Régions"
	clk=time.time()
	regions=insee.insee_region(regionUrl,config.osm_temp_folder)
	regions.download(override)
	regions.scan()
	print "> %d region(s), t=%.2f" % (len(regions.data_list),time.time()-clk)
		
	print "Analyse COG Départements"
	clk=time.time()
	departements=insee.insee_departement(deptUrl,config.osm_temp_folder)
	departements.download(override)
	departements.scan()
	print "> %d département(s), t=%.2f" % (len(departements.data_list),time.time()-clk)
	
	print "Analyse COG Communes"
	clk=time.time()
	communes=insee.insee_commune(commUrl,config.osm_temp_folder)
	communes.download(override)
	communes.scan()
	print "> %d communes, t=%.2f" % (len(communes.data_list),time.time()-clk)
		
	print "Analyse Recensement %d (populations)" % insee_year
	clk=time.time()
	populations=insee.insee_population(popUrl,config.osm_temp_folder)
	populations.download(override)
	if override:
		print "> Download %.2f" % (time.time()-clk)
	clk=time.time()
	populations.scan(regions,departements,communes)
	print "> Scan %.2f" % (time.time()-clk)
	
	clk=time.time()
	dbName=sqlDBFileName
	if not os.path.isfile(dbName):
		sql=sqlite3.connect(dbName)
		sql.execute('''CREATE TABLE regions (id INTEGER PRIMARY KEY NOT NULL,name TEXT,sname TEXT,center TEXT,population INTEGER,year INTEGER);''')
		sql.execute('''CREATE TABLE departements (id VARCHAR(5) PRIMARY KEY NOT NULL,region INTEGER,name TEXT,sname TEXT,center TEXT,population INTEGER,year INTEGER);''')
		sql.execute('''CREATE TABLE communes (id VARCHAR(10) PRIMARY KEY NOT NULL,name TEXT,sname TEXT,departement VARCHAR(5),region INTEGER,population INTEGER,year INTEGER,osm_id INTEGER,osm_type VARCHAR(15),latitude FLOAT,longitude FLOAT);''')
		sql.commit()
		print "create new database"
	else:
		sql=sqlite3.connect(dbName)
		print "open existing database"
	c=sql.cursor()
	nc=0
	nu=0
	
	print "update regions data (%d)" % len(regions.data_list)
	for r in regions.data_list:
		sname=soundex.soundex(r.name)
		c.execute('''SELECT * FROM regions WHERE id=%d;''' % r.region)
		answer=c.fetchone()
		if answer==None:
			t=(r.region,r.name,sname,r.cheflieu,r.population,insee_year)
			c.execute('''INSERT INTO regions (id,name,sname,center,population,year) VALUES (?,?,?,?,?,?);''',t)
			nc=nc+1
		else:
			t=(r.name,sname,r.cheflieu,r.population,insee_year,r.region)
			c.execute('''UPDATE regions SET name=?,sname=?,center=?,population=?,year=? WHERE id=?;''',t)
			nu=nu+1
	sql.commit()

	print "update departements data (%d)" % len(departements.data_list)
	for d in departements.data_list:
		sname=soundex.soundex(d.name)
		c.execute('''SELECT * FROM departements WHERE id="%s";''' % d.dep)
		answer=c.fetchone()
		if answer==None:
			t=(d.dep,d.region,d.name,sname,d.cheflieu,d.population,insee_year)
			c.execute('''INSERT INTO departements (id,region,name,sname,center,population,year) VALUES (?,?,?,?,?,?,?);''',t)
			nc=nc+1
		else:
			t=(d.region,d.name,sname,d.cheflieu,d.population,insee_year,d.dep)
			c.execute('''UPDATE departements SET region=?,name=?,sname=?,center=?,population=?,year=? WHERE id=?;''',t)
			nu=nu+1
	sql.commit()
	
	print "update communes data (%d)" % len(communes.data_list)
	for cc in communes.data_list:
		sname=soundex.soundex(cc.name)
		c.execute('''SELECT * FROM communes WHERE id="%s";''' % cc.insee)
		answer=c.fetchone()
		if answer==None:
			t=(cc.insee,cc.name,sname,cc.dep,cc.reg,cc.population,insee_year)
			try:
				c.execute('''INSERT INTO communes (id,name,sname,departement,region,population,year) VALUES ("%s","%s","%s","%s",%d,%d,%d);''' % t)
			except:
				print "\terror with",cc.insee,cc.nccenr
				print sys.exc_info()
			nc=nc+1
		else:
			t=(cc.name,sname,cc.dep,cc.reg,cc.population,insee_year,cc.insee)
			c.execute('''UPDATE communes SET name="%s",sname="%s",departement="%s",region=%d,population=%d,year=%d WHERE id="%s";''' % t)
			nu=nu+1
	sql.commit()
	
	print "> Database update %.2f" % (time.time()-clk)
	print "database, %d ajout(s) et %d mise à jour" % (nc,nu)
	print
	
	c.close()
	sql.close()
		
	print "--------------------------------------------------------------"
示例#16
0
文件: phone.py 项目: sh1mmer/a2bot
def __add_record(name, d):
    idx = soundex.soundex(name)
    if idx in data:
        data[idx][d['Full Name']] = d
    else:
        data[idx] = { d['Full Name']: d }
示例#17
0
    def testKnownValues(self):
        """soundex should give known result with known input"""
        for name, result in self.knownValues:
	    self.assertEqual(soundex.soundex(name), result)
示例#18
0
 def test_retainsCaseOfFirstChar(self):
     self.assertEqual('a000', soundex('a'))
     self.assertEqual('l000', soundex('l'))
     self.assertEqual('H000', soundex('H'))
示例#19
0
 def test_removesVowels(self):
     self.assertEqual('A000', soundex('Aaaa'))
示例#20
0
 def test_contractsConsonantsSeparatedByHorW(self):
     self.assertEqual('A400', soundex('Alhl'))
示例#21
0
 def test_removesHandW(self):
     self.assertEqual('A000', soundex('Ahw'))
示例#22
0
 def test_contractConsecutiveConsonants(self):
     self.assertEqual('A400', soundex('All'))
     self.assertEqual('A400', soundex('ALl'))
示例#23
0
 def test_replacesConsonantsWithDigit(self):
     self.assertEqual('A400', soundex('Al'))
示例#24
0
 def test_zeroPadsSingleCharWord(self):
     self.assertEqual('A000', soundex('A'))
示例#25
0
 def test_singles(self):
     "single characters"
     self.assertEqual("a00000", soundex("a"))
     self.assertEqual("x00000", soundex("x"))
     self.assertEqual("o00000", soundex("o"))
示例#26
0
 def test_short(self):
     "long words shortened a lot"
     self.assertEqual("s00000", soundex("sAEIOUWYHaeiouwyh"))
     self.assertEqual("x00000", soundex("x" + "AEIOUWYHaeiouwyh" * 17))
     self.assertEqual("a00000", soundex("a" + "AEIOUWYHaeiouwyh" * 42))
示例#27
0
 def test_examples(self):
     "examples"
     self.assertEqual("s53200", soundex("soundex"))
     self.assertEqual("s53200", soundex("soundeggs"))
     self.assertEqual("f46140", soundex("flurbel"))
示例#28
0
 def test_doesNotReplaceConsonantWithDigitIfFirstLetter(self):
     self.assertEqual('L000', soundex('L'))
示例#29
0
 def test_alwaysRetunsCodeWithThreeDigits(self):
     self.assertEqual('A261', soundex('Ashcraft'))
示例#30
0
 def test_tooooolong(self):
     "more than 6 chars"
     self.assertEqual("s53232", soundex("soundexdex"))
     self.assertEqual("s53232", soundex("soundexdexdex"))
     self.assertEqual("s53232", soundex("soundexdexdexdex"))
     self.assertEqual("s53232", soundex("soundexdexdexdexflurbel"))
示例#31
0
 def testKnownValues(self):
     """soundex should give known result with known input"""
     for name, result in self.knownValues:
         self.assertEqual(soundex.soundex(name), result)
示例#32
0
 def test_retainsSoleCharOfWord(self):
     self.assertEqual('A', first_char(soundex('A')))
     self.assertEqual('B', first_char(soundex('B')))
示例#33
0
 def test_handelsUpperCaseLikeLowerCase(self):
     self.assertEqual('a400', soundex('aLH'))
示例#34
0
 def test_all(self):
     "all characters"
     chars = "AEIOUWYHaeiouwyh".join("bfpvCGJKQSXZBFPVcgjkqsxzdt")
     self.assertEqual("s12123", soundex("s" + chars))
     chars = "AEIOUWYHaeiouwyh".join("lmnrmnrmnrmnrmnrmnr")
     self.assertEqual("x45656", soundex("x" + chars))