def findTrack(self, artistname, trackname): # Find the song chars0 = trackname[0].lower() + trackname[1].lower() chars1 = trackname[0].upper() + trackname[1].lower() chars2 = trackname[0].lower() + trackname[1].upper() chars3 = trackname[0].upper() + trackname[1].upper() req = { "jsonrpc": "2.0", "method": "AudioLibrary.GetSongs", "id": "libSongs", "params": { "properties": ["artist", "duration", "album", "title", "file", "thumbnail", "fanart", "track"], "limits": {"start": 0, "end": 1000}, "sort": {"order": "ascending", "method": "track", "ignorearticle": True}, "filter": {"and": [ {"field": "artist", "operator": "is", "value": artistname.encode('utf-8')}, {"or": [ #{"field": "title", "operator": "is", "value": trackname.lower().encode('utf-8')} {"field": "title", "operator": "startswith", "value": chars0.encode('utf-8')}, {"field": "title", "operator": "startswith", "value": chars1.encode('utf-8')}, {"field": "title", "operator": "startswith", "value": chars2.encode('utf-8')}, {"field": "title", "operator": "startswith", "value": chars3.encode('utf-8')}, ]} ]} } } rpcresp = xbmc.executeJSONRPC(json.dumps(req)) # xbmc.log(str(rpcresp)) rpcresp = json.loads(rpcresp) found = 0 < int(rpcresp['result']['limits']['end']) ret = None #strInd = ' ' if found: # xbmc.log(str(rpcresp)) trackname_stripped = strip_accents(trackname.strip().lower()) for s in rpcresp['result']['songs']: if trackname_stripped == strip_accents(s['title'].strip().lower()): path = xbmc.translatePath(s['file']) path = xbmc.validatePath(path) if os.path.exists(path): ret = s break log("Found in library, but file doesn't exist: %s" % path) xbmc.executebuiltin('Notification(%s,%s, 1000)' % ("File doesn't exist", os.path.basename(path))) #if artistname.lower() <> ret['artist'][0].lower() or trackname.lower() <> ret['title'].lower(): #if ret is not None: # strInd = '+++ ' # xbmc.log(strInd + str(artistname.encode('utf-8')) + " -- " + str(trackname.encode('utf-8'))) # xbmc.log(' ' + str(ret['artist'][0].encode('utf-8')) + " -- " + str(ret['title'].encode('utf-8'))) #else: #xbmc.log('NOT found track ' + str(artistname.encode('utf-8')) + " -- " + str(trackname.encode('utf-8'))) #xbmc.log(strInd + str(artistname.encode('utf-8')) + " -- " + str(trackname.encode('utf-8'))) return ret
def format_text_udf(text): return functions.udf( lambda t: remove_punctuation( REMOVE_URL_EXPR.sub( "", strip_accents(t.lower().replace('\t', ' ').replace('\n', ' '))) ), types.StringType())(text)
def getCards(self, edition, edkey): qs = 'e:{k}'.format(k=edkey) # response = requests.get('http://magiccards.info/'+edkey, params={'q': qs, 'v': 'list'}, proxies=self.proxies) response = requests.get('http://magiccards.info/'+edkey+'.html', proxies=self.proxies) text = response.text.replace('\n', '') print text tab = '<th><b>Edition</b></th>\s+</tr>(.*?)</table>' cardpart = re.search(tab, text, re.DOTALL) print cardpart cardlist = cardpart.group(1).split(r'</tr>') for cardraw in cardlist: findexp = re.search(r'<a\s+href="(.*?)">(.*?)</a>.*?<td>(.*?)</td>.*?<td>(.*?)</td>.*?<td>(.*?)</td>', cardraw, re.DOTALL) # cardpage, name, type, cc, rarity if not findexp: logger.warning('pattern failed to match on "' + cardraw + '"') print 'pattern failed to match on "' + cardraw + '"' continue name = findexp.group(2) print name print 'orig ', type(name) # print 'deciso ', name.decode('iso-8859-1') # print 'decut ', name.decode('utf8') name = strip_accents(name) print name rarity = findexp.group(5) print rarity ci = CardInfo(name, rarity) ci.edition = self.db.session.query(Edition).filter(Edition.name == edition).first() ci.save(self.db.session) self.db.session.commit()
def run(self): while True: wavfile = NamedTemporaryFile() voice.record_to_file(wavfile.name) print 'recorded', wavfile.name asr_res = google_asr.decode_wavefile(wavfile.name) print 'asr out', asr_res if len(asr_res) > 0: best = asr_res[0] text = strip_accents(best['utterance']) print 'text', text res = [] done = False for patterns, cmd in self.cmd_dict.items(): for pattern in patterns: ld = leven(pattern, text) if pattern in text or ld < 2: self.send_cmd(cmd) done = True break if done: break else: self.send_cmd("noentiendo")
def cache_new(self, what): ndx = len(self.cache['items']) name = strip_accents(what)[:20] item = "%d_%s" % (ndx, name,) assert not what in self.cache['items'] self.cache['items'][what] = item return item
def mineName(self, rawProp): namecontent = self.mineTDContent(rawProp) raw = re.search(r'\?cn=(.*)&', namecontent).group(1) unic = strip_accents(raw.decode('utf8')) # print 'orig ', raw # print 'deciso ', raw.decode('iso-8859-1') # print 'decut ', raw.decode('utf8') # print 'denorm', strip_accents(unic) # print 'decue ', raw.decode('unicode-escape') self.currentCard.name = unic
def get_names_for_entities(namespath): print("getting names map...") names = {} with open(namespath, 'r') as f: for i, line in enumerate(f): if i % 1000000 == 0: print("line: {}".format(i)) items = line.strip().split("\t") if len(items) != 4: print("ERROR: line - {}".format(line)) entity = clean_uri(items[0]) type = clean_uri(items[1]) literal = clean_uri(items[2]).lower() literal = strip_accents(literal) if entity not in names.keys(): names[entity] = [literal] else: names[entity].append(literal) return names
def create_inverted_index_entity(namespath, outpath): print("creating the index map...") index = {} size = 0 with open(namespath, 'r') as f: for i, line in enumerate(f): if i % 1000000 == 0: print("line: {}".format(i)) items = line.strip().split("\t") if len(items) != 4: print("ERROR: line - {}".format(line)) entity_mid = clean_uri(items[0]) entity_type = clean_uri(items[1]) entity_name = clean_uri(items[2]) name_ngrams = get_name_ngrams(entity_name) for ngram_tuple in name_ngrams: size += 1 ngram = " ".join(ngram_tuple) ngram = strip_accents(ngram) # print(ngram) if ngram in index.keys(): index[ngram].add(entity_mid) else: index[ngram] = set([entity_mid]) print("num keys: {}".format(len(index))) print("total key-value pairs: {}".format(size)) print("dumping to pickle...") with open(outpath, 'wb') as f: pickle.dump(index, f) print("DONE")
def findTracks(self, artistname, tracks): ret = [] # Find artists with name starts with ... chars0 = artistname[0].lower() + artistname[1].lower() chars1 = artistname[0].upper() + artistname[1].lower() chars2 = artistname[0].lower() + artistname[1].upper() chars3 = artistname[0].upper() + artistname[1].upper() req = { "jsonrpc": "2.0", "method": "AudioLibrary.GetArtists", "id": "libSongs", "params": { #"properties": ["thumbnail", "fanart"], "limits": { "start": 0, "end": 1000 }, "sort": { "order": "ascending", "method": "track", "ignorearticle": True }, "filter": { "or": [ #{"field": "artist", "operator": "is", "value": artistname.encode('utf-8')} { "field": "artist", "operator": "startswith", "value": chars0.encode('utf-8') }, { "field": "artist", "operator": "startswith", "value": chars1.encode('utf-8') }, { "field": "artist", "operator": "startswith", "value": chars2.encode('utf-8') }, { "field": "artist", "operator": "startswith", "value": chars3.encode('utf-8') }, ] } } } # xbmc.log(json.dumps(req)) rpcresp = xbmc.executeJSONRPC(json.dumps(req)) # xbmc.log(str(rpcresp)) rpcresp = json.loads(rpcresp) found = False foundTrack = False if 'error' in rpcresp: log(str(rpcresp)) pass elif 'result' in rpcresp: found = 0 < int(rpcresp['result']['limits']['end']) pass if found: found = False artistname_stripped = strip_accents(artistname) for a in rpcresp['result']['artists']: if strip_accents( a['artist'].strip().lower()) == artistname_stripped: #xbmc.log('Found artist: ' + str(artistname.encode('utf-8'))) found = True # xbmc.log(str(rpcresp['result'])) for t in tracks: item = self.findTrack(a['artist'], t) if item is not None: foundTrack = True ret.append(item) if foundTrack: #log(len(ret)) a = ret[len(ret) - 1]['artist'][0] if artistname.lower() <> a.lower(): log("WARNING: artist name has some differencies: '" + str(artistname.encode('utf-8')) + "' --- '" + str(a.encode('utf-8')) + "'") if not found: log('NOT found artist: "' + str(artistname.encode('utf-8')) + '"') pass return ret
def findTrack(self, artistname, trackname): # Find the song chars0 = trackname[0].lower() + trackname[1].lower() chars1 = trackname[0].upper() + trackname[1].lower() chars2 = trackname[0].lower() + trackname[1].upper() chars3 = trackname[0].upper() + trackname[1].upper() req = { "jsonrpc": "2.0", "method": "AudioLibrary.GetSongs", "id": "libSongs", "params": { "properties": [ "artist", "duration", "album", "title", "file", "thumbnail", "fanart", "track" ], "limits": { "start": 0, "end": 1000 }, "sort": { "order": "ascending", "method": "track", "ignorearticle": True }, "filter": { "and": [ { "field": "artist", "operator": "is", "value": artistname.encode('utf-8') }, { "or": [ #{"field": "title", "operator": "is", "value": trackname.lower().encode('utf-8')} { "field": "title", "operator": "startswith", "value": chars0.encode('utf-8') }, { "field": "title", "operator": "startswith", "value": chars1.encode('utf-8') }, { "field": "title", "operator": "startswith", "value": chars2.encode('utf-8') }, { "field": "title", "operator": "startswith", "value": chars3.encode('utf-8') }, ] } ] } } } rpcresp = xbmc.executeJSONRPC(json.dumps(req)) # xbmc.log(str(rpcresp)) rpcresp = json.loads(rpcresp) found = 0 < int(rpcresp['result']['limits']['end']) ret = None #strInd = ' ' if found: # xbmc.log(str(rpcresp)) trackname_stripped = strip_accents(trackname.strip().lower()) for s in rpcresp['result']['songs']: if trackname_stripped == strip_accents( s['title'].strip().lower()): path = xbmc.translatePath(s['file']) path = xbmc.validatePath(path) if os.path.exists(path): ret = s break log("Found in library, but file doesn't exist: %s" % path) xbmc.executebuiltin( 'Notification(%s,%s, 1000)' % ("File doesn't exist", os.path.basename(path))) #if artistname.lower() <> ret['artist'][0].lower() or trackname.lower() <> ret['title'].lower(): #if ret is not None: # strInd = '+++ ' # xbmc.log(strInd + str(artistname.encode('utf-8')) + " -- " + str(trackname.encode('utf-8'))) # xbmc.log(' ' + str(ret['artist'][0].encode('utf-8')) + " -- " + str(ret['title'].encode('utf-8'))) #else: #xbmc.log('NOT found track ' + str(artistname.encode('utf-8')) + " -- " + str(trackname.encode('utf-8'))) #xbmc.log(strInd + str(artistname.encode('utf-8')) + " -- " + str(trackname.encode('utf-8'))) return ret
playerObj = player.TrackLibraryPlayer() pass elif 'topArtists' == action: playerObj = player.ArtistLibraryPlayer() pass elif 'syncLibrary' == action: dialog = xbmcgui.Dialog() path = dialog.browseSingle(3, 'Select folder', 'files') log('Path = "%s"' % path) if '' != path: playerObj = player.LovedTracksPlayer() playerObj.init(*sys.argv[2:]) log('Artist count = %d' % len(playerObj.tracks.keys())) for a in playerObj.tracks.keys(): aPath = os.path.join(path, slugify(strip_accents(a.strip()))) for t in playerObj.tracks[a]: #log('# %s' % a) track = playerObj.findTrack(a, t) if None != track: tPath = os.path.join(aPath, slugify(strip_accents(track['album'].strip().lower()))) base, extension = os.path.splitext(track['file']) #log('Track: base = %s, extension = %s' % (base, extension)) tFile = os.path.join(tPath, slugify(str(track['track']).zfill(3) + '-' + strip_accents(track['title'].strip().lower())) + extension.lower()) log('Track path = %s' % tFile) mkdir_p(tPath) shutil.copy(track['file'], tFile) #log(str(track)) sys.exit() pass
def findTracks(self, artistname, tracks): ret = [] # Find artists with name starts with ... chars0 = artistname[0].lower() + artistname[1].lower() chars1 = artistname[0].upper() + artistname[1].lower() chars2 = artistname[0].lower() + artistname[1].upper() chars3 = artistname[0].upper() + artistname[1].upper() req = { "jsonrpc": "2.0", "method": "AudioLibrary.GetArtists", "id": "libSongs", "params": { #"properties": ["thumbnail", "fanart"], "limits": {"start": 0, "end": 1000}, "sort": {"order": "ascending", "method": "track", "ignorearticle": True}, "filter": {"or": [ #{"field": "artist", "operator": "is", "value": artistname.encode('utf-8')} {"field": "artist", "operator": "startswith", "value": chars0.encode('utf-8')}, {"field": "artist", "operator": "startswith", "value": chars1.encode('utf-8')}, {"field": "artist", "operator": "startswith", "value": chars2.encode('utf-8')}, {"field": "artist", "operator": "startswith", "value": chars3.encode('utf-8')}, ]} } } # xbmc.log(json.dumps(req)) rpcresp = xbmc.executeJSONRPC(json.dumps(req)) # xbmc.log(str(rpcresp)) rpcresp = json.loads(rpcresp) found = False foundTrack = False if 'error' in rpcresp: log(str(rpcresp)) pass elif 'result' in rpcresp: found = 0 < int(rpcresp['result']['limits']['end']) pass if found: found = False artistname_stripped = strip_accents(artistname) for a in rpcresp['result']['artists']: if strip_accents(a['artist'].strip().lower()) == artistname_stripped: #xbmc.log('Found artist: ' + str(artistname.encode('utf-8'))) found = True # xbmc.log(str(rpcresp['result'])) for t in tracks: item = self.findTrack(a['artist'], t) if item is not None: foundTrack = True ret.append(item) if foundTrack: #log(len(ret)) a = ret[len(ret) - 1]['artist'][0] if artistname.lower() <> a.lower(): log("WARNING: artist name has some differencies: '" + str(artistname.encode('utf-8')) + "' --- '" + str(a.encode('utf-8')) + "'") if not found: log('NOT found artist: "' + str(artistname.encode('utf-8')) + '"') pass return ret
playerObj = player.TrackLibraryPlayer() pass elif 'topArtists' == action: playerObj = player.ArtistLibraryPlayer() pass elif 'syncLibrary' == action: dialog = xbmcgui.Dialog() path = dialog.browseSingle(3, 'Select folder', 'files') log('Path = "%s"' % path) if '' != path: playerObj = player.LovedTracksPlayer() playerObj.init(*sys.argv[2:]) log('Artist count = %d' % len(playerObj.tracks.keys())) for a in playerObj.tracks.keys(): aPath = os.path.join(path, slugify(strip_accents(a.strip()))) for t in playerObj.tracks[a]: #log('# %s' % a) track = playerObj.findTrack(a, t) if None != track: tPath = os.path.join( aPath, slugify(strip_accents(track['album'].strip().lower()))) base, extension = os.path.splitext(track['file']) #log('Track: base = %s, extension = %s' % (base, extension)) tFile = os.path.join( tPath, slugify( str(track['track']).zfill(3) + '-' + strip_accents(track['title'].strip().lower())) + extension.lower())