def download(self, unit=[]): if len(unit) == 0: unit = self.chps else: nunit = list() for idd in unit: nunit.append(self.chps[idd]) unit = nunit for idx in unit: fname = idx[0] + '_' + self.fec + '.txt' if os.path.exists(fname): continue f = open(fname,'w') chplink = idx[1] print 'Downloading', idx[2], '>>> ', fname self.response = urllib2.urlopen(chplink) hj = self.rm_spchar(self.response.read()) chpsp = bs(hj) chpsp = self.rm_spam(chpsp) # format output by wehku # get title/content chttl = chpsp.findAll('div',{'class':'chaptertitle'}) chcnt = chpsp.findAll('div',{'class':'chaptercontent'}) for vol in range(len(chttl) -1): atl = jtof(chttl[vol].text).encode(self.fec,'ignore').lstrip() f.write(atl) f.write('\n==========\n') f.write(jtof(chcnt[vol].text).encode(self.fec,'ignore')) f.write('\n\n\n\n') chpsp.close() f.close() return
def cvt_codec(node): if hasattr( node.text, 'encode'): val = jtof(node.text.encode('utf-8')) # val = (node.text.encode('utf-8')) else: val = jtof(node.text) # print(val)‰ # isUni(val) return val
def fetch_data(word_lst): for word in word_lst: if len(word.split()) > 4 : # stop lookup a sentence continue req_word =str("_".join(word.split())) req_url =_REQ_HTTP_PREFIX+req_word print(req_url) results = requests.get(req_url, headers={'User-Agent': 'Mozilla/5.0'}, timeout=_REQ_TIMEOUTS) soup = BeautifulSoup(results.text) if not found_result(soup): continue look_up_word =soup.find('h1', {'id' :'word_name_h1'}).getText() output_result("\n[%s] " % look_up_word , False) soup_translate =soup.find('div', {'class' :'group_pos'}) # Chinese meaning for lbl in soup_translate.findChildren('label'): output_result( jtof( lbl.getText().strip()) , False) output_result('') soup_sent = soup.findAll('dl',{'class':'vDef_list'}) try: got_word = soup.find('dl',{'class':'vDef_list'}).find('span',{'class' :'text_blue'}).getText() except Exception as e: continue # example sentence for each_sent in soup_sent: # res_word="".join(word.split()) replace_word=" ".join(got_word.split()) sent = each_sent.find('dt').getText().replace(got_word," "+replace_word+" ") print each_sent.find('dt').firstText() print each_sent.find('dt').getText() output_result('\t'+sent) output_result('\t'+jtof(each_sent.find('dd').getText())) pass
def _jtof(self, text=''): try: text = jianfan.jtof(text) text = text.replace(u'“', u'「'); text = text.replace(u'”', u'」'); except Exception as e: pass return text
def __search__(self,print_results=False): ''' returns list of results if successful or False otherwise ''' results = [] for page in range(0,self.pages): rsz = 8 if self.rsz == RSZ_SMALL: rsz = 4 args = {'q' : self.query, 'v' : '1.0', 'start' : page*rsz, 'rsz': self.rsz, 'safe' : self.safe, 'filter' : self.filter, 'hl' : self.hl } self.logger.debug('search: "%s" page# : %s'%(self.query, page)) q = urllib.urlencode(args) search_results = urllib.urlopen(URL+q) data = json.loads(search_results.read()) if not data.has_key('responseStatus'): self.logger.error('response does not have a responseStatus key') continue if data.get('responseStatus') != 200: self.logger.debug('responseStatus is not 200') self.logger.error('responseDetails : %s'%(data.get('responseDetails', None))) continue if print_results: if data.has_key('responseData') and data['responseData'].has_key('results'): for result in data['responseData']['results']: if result: newTitle,bChanged = jtof('[%s]'%(urllib.unquote(result['titleNoFormatting']))) jianWarningStr='' if bChanged : jianWarningStr = u'\u7c21\u9ad4\u7db2\u7ad9' print clrTx(newTitle,'YELLOW')+clrTx(jianWarningStr,'GREY30') print jtof(result['content'].strip("<b>...</b>").replace("<b>",'').replace("</b>",'').replace("'","'").strip())[0] print clrTx(urllib.unquote(result['unescapedUrl'])+'\n','GREY30') else: # no responseData key was found in 'data' self.logger.error('no responseData key found in response. very unusal') results.append(data) return results
def main(): FileNamePathSet = scanAndProduceStack(unicode(tPath)) #use pop while len(FileNamePathSet) != 0: oldPath = FileNamePathSet.pop() newPath = jtof(oldPath) #print u"o:(%s) n:(%s)" %(oldPath,newPath) if newPath != oldPath: process = Popen(prepareRenamePara(oldPath,newPath)) process.wait()
def convert_file(target_file): if os.path.isfile(target_file): user_dic = {} f_encoding = get_encoding(target_file) print("正在轉換", target_file, " 編碼為: ", f_encoding) if f_encoding is None: print("抱歉, 未能正確判斷編碼!\n\n") else: if os.path.getsize(target_file) > 0: if backup: # do backup backup_file = target_file + '.bak' shutil.copy2(target_file, backup_file) result_content = '' fp = open(target_file, 'r', encoding=f_encoding) original_content = fp.read() fp.close() path_dir = os.path.dirname(os.path.abspath(target_file)) user_dic_pathname = path_dir + os.path.sep + user_dic_file if os.path.exists(user_dic_pathname): user_dic = get_dictionary(user_dic_pathname) if convert_type == "none" or convert_type == "utf8": new_content = original_content else: new_content = jtof(original_content) origlines = new_content.splitlines(True) if use_bom: fpw = open(target_file, 'w', encoding='utf-8-sig') else: fpw = open(target_file, 'w', encoding='utf-8') for line in origlines: if convert_type == "g2bdic": newline = convert_vocabulary(line, dic_tw) if use_user_dic: newline = convert_vocabulary(newline, user_dic) fpw.write(newline) else: fpw.write(line) # fpw.write(new_content.encode('UTF-8')) fpw.close() print(MSG_CONVERT_FINISH) else: print(MSG_NO_CONVERT) else: print("File not found! " + target_file + " 檔案不存在! ")
def renamer(target): filter = ['mp3'] #'pdf', 'zip', 'chm', 'rar', 'djvu', 'epub' os.chdir(target) for entry in os.listdir('.'): if os.path.isdir(entry): backto = os.getcwd() renamer(entry) os.chdir(backto) else: filename = entry list = filename.rsplit(".", 1) if len(list) > 1 and list[-1] in filter: newfilename = jtof(list[0]) + "." + list[-1] os.rename(filename, newfilename)
def convertFile(target_file): user_dic = {} if os.path.exists(target_file): f_encoding = getEncoding(target_file) print u"正在轉換", target_file, u" 編碼為: ", f_encoding if f_encoding == None: print(u"抱歉, 未能正確判斷編碼!\n\n") else: result_content = u'' original_content = u'' fp = open(target_file, 'r') original_content = fp.read() fp.close() if original_content.startswith(codecs.BOM_UTF8): original_content = original_content.lstrip(codecs.BOM_UTF8) utf8content = original_content.decode(f_encoding) newcontent = jtof(utf8content) lines = newcontent.splitlines() for line in lines: line = convertVocabulary(line, dic_tw()) if os.path.getsize(target_file) > 0: # do backup backup_file = target_file + '.bak' shutil.copy2(target_file, backup_file) fpw = open(target_file, 'w') if not newcontent.startswith(codecs.BOM_UTF8.decode("utf8")): fpw.write(codecs.BOM_UTF8) pathdir = os.path.dirname(os.path.abspath(target_file)) user_dic_pathname = pathdir + os.path.sep + user_dic_file if os.path.exists(user_dic_pathname): user_dic = getUserDic(user_dic_pathname) if len(user_dic) > 0: for line in lines: line = convertVocabulary(line, user_dic) for line in lines: line = convertVocabulary(line, user_dic) fpw.write(line.encode('UTF-8')) fpw.write("\n") fpw.close() print(MSG_CONVERT_FINISH) else: print MSG_NO_CONVERT
def convertFile(target_file): user_dic= {} if os.path.exists(target_file): f_encoding = getEncoding(target_file) print u"正在轉換", target_file, u" 編碼為: ", f_encoding if f_encoding == None: print (u"抱歉, 未能正確判斷編碼!\n\n"); else: result_content = u'' original_content = u'' fp = open(target_file, 'r') original_content = fp.read() fp.close() if original_content.startswith( codecs.BOM_UTF8 ): original_content = original_content.lstrip( codecs.BOM_UTF8); utf8content=original_content.decode(f_encoding) newcontent = jtof(utf8content) lines = newcontent.splitlines(); for line in lines: line = convertVocabulary(line, dic_tw()); if os.path.getsize(target_file) > 0: # do backup backup_file = target_file + '.bak' shutil.copy2(target_file, backup_file) fpw = open(target_file, 'w') if not newcontent.startswith(codecs.BOM_UTF8.decode( "utf8" )): fpw.write(codecs.BOM_UTF8) pathdir =os.path.dirname(os.path.abspath(target_file)); user_dic_pathname = pathdir +os.path.sep+user_dic_file; if os.path.exists(user_dic_pathname): user_dic = getUserDic(user_dic_pathname); if len(user_dic) > 0: for line in lines: line = convertVocabulary(line, user_dic); for line in lines: line = convertVocabulary(line, user_dic); fpw.write(line.encode('UTF-8')) fpw.write("\n"); fpw.close(); print (MSG_CONVERT_FINISH) else: print MSG_NO_CONVERT
def convertFile(target_file): if os.path.isfile(target_file): f_encoding = getEncoding(target_file) print u"正在轉換", target_file, u" 編碼為: ", f_encoding if f_encoding is None: print(u"抱歉, 未能正確判斷編碼!\n\n") else: if os.path.getsize(target_file) > 0: if backup: # do backup backup_file = target_file + '.bak' shutil.copy2(target_file, backup_file) result_content = u'' original_content = u'' fp = open(target_file, 'r') original_content = fp.read() fp.close() if original_content.startswith(codecs.BOM_UTF8): original_content.lstrip(codecs.BOM_UTF8) utf8content = original_content.decode(f_encoding, 'ignore') if convertType != "none": newcontent = jtof(utf8content) else: newcontent = utf8content origlines = newcontent.splitlines(True) fpw = open(target_file, 'w') if (use_bom): if not newcontent.startswith( codecs.BOM_UTF8.decode("utf8")): fpw.write(codecs.BOM_UTF8) for line in origlines: if convertType == "g2bdic": fpw.write( convertVocabulary(line, dic_tw()).encode('UTF-8')) else: fpw.write(line.encode('UTF-8')) #fpw.write(newcontent.encode('UTF-8')) fpw.close() print(MSG_CONVERT_FINISH) else: print MSG_NO_CONVERT else: print "File not found! " + target_file + " 檔案不存在! "
def convertFile(target_file): if os.path.isfile(target_file): f_encoding = getEncoding(target_file) print u"正在轉換", target_file, u" 編碼為: ", f_encoding if f_encoding is None: print (u"抱歉, 未能正確判斷編碼!\n\n"); else: if os.path.getsize(target_file) > 0: if backup: # do backup filename, file_extension = os.path.splitext(target_file) backup_file = filename + '.bak' + file_extension shutil.copy2(target_file, backup_file) result_content = u'' original_content = u'' fp = open(target_file, 'r') original_content = fp.read() fp.close() if original_content.startswith(codecs.BOM_UTF8): original_content.lstrip(codecs.BOM_UTF8) utf8content = original_content.decode(f_encoding, 'ignore') if convertType != "none": newcontent = jtof(utf8content) else: newcontent = utf8content origlines = newcontent.splitlines(True) fpw = open(target_file, 'w') if (use_bom): if not newcontent.startswith(codecs.BOM_UTF8.decode("utf8")): fpw.write(codecs.BOM_UTF8) for line in origlines: if convertType == "g2bdic": fpw.write(convertVocabulary(line, dic_tw()).encode('UTF-8')) else: fpw.write(line.encode('UTF-8')) # fpw.write(newcontent.encode('UTF-8')) fpw.close() print (MSG_CONVERT_FINISH) else: print MSG_NO_CONVERT else: print "File not found! " + target_file + " 檔案不存在! "
def getWikiContent(queryStr): result = '' #DB.debug('debug print') #DB.error('error!') wikipedia.set_lang(LANG) try: q = wikipedia.page(queryStr) content = '' if LANG == 'zh' : content, bChanged = jtof(q.content) else: content = q.content result = content DB.debug(result) except wikipedia.exceptions.DisambiguationError as e : DB.error(e) except wikipedia.exceptions.PageError as e: DB.error(e) return result
def download_subtitles_in_dir(dir_name, timeout): try: file_list = os.listdir(dir_name.decode('utf-8')) for video_filename in file_list: # print "ext:", splitext(video_filename)[1] # print video_filename if splitext(video_filename)[1] in [".mkv",".avi"]: if os.path.isfile(dir_name+splitext(video_filename)[0]+".ass") or os.path.isfile(dir_name+splitext(video_filename)[0]+".srt") \ or os.path.isfile(dir_name+splitext(video_filename)[0]+".ssa") or os.path.isfile(dir_name+splitext(video_filename)[0]+".sub"): print "subtitle exist already, skip" else: fan_video_filename = jtof(video_filename) # video_filename is simplified chinese, rename the file as tranditional pne if fan_video_filename != video_filename: os.rename(dir_name+video_filename, dir_name+fan_video_filename) download_subtitle((dir_name+fan_video_filename).encode('big5'), timeout) else: download_subtitle((dir_name+video_filename).encode('big5'), timeout) except IOError: print "failed to open directory", dir_name,
def GetDBInfo(HtmlContent,WooyunNumber): DBInfoList = [] ParserTarget = pq(HtmlContent) Content = ParserTarget('h3').map(lambda i,e: pq(e).text()) #抓到所有H3內的標籤 for i,x in enumerate(Content): NoSpacestr = x.encode('utf8').replace('\n', '').replace('\t', '') #替換多餘空格 TmpList = NoSpacestr.split(':')#陣列[1]即為所要的內容 if i == 0:#特別為漏洞編號設的,因為從網頁內抓會少個0,直接用檔案名稱就OK DBInfoList.append(WooyunNumber) continue Big5Str = jtof(TmpList[1])#簡體轉繁體 #詞語轉換!,否則注入->註入!! Big5Str = ConvertWords.Convert(Big5Str) #DbgPrint('Big5Str:%s' % Big5Str.encode('utf8') ) DBInfoList.append(Big5Str) #存繁體!! #抓取是否為精華文章的判斷() return DBInfoList
def search(original_query, age_check=None): query = jianfan.jtof(original_query) logging.debug('Requesting Plurk for query <{0}>'.format(repr(query))) offset = 0 uri = u'/APP/PlurkSearch/search?query={0}'.format(query) for page in range(25): try: response = get_client().callAPI(u'{0}&offset={1}'.format(uri,offset or '')) except ValueError as error: logging.info(u'Received Plurk error <{0}> suggesting no results'.format(error.message)) break if not response: break users = response.get('users') or {} for plurk in response.get('plurks',[]): if query.lower() not in plurk.get('content_raw').lower(): continue if age_check and age_check(plurk.get('posted')): raise StopIteration user_id = plurk.get('user_id') user = get_user_details(user_id) or users.get(user_id) or {} if user.get('default_lang') not in ('cn','tr_ch'): continue yield get_tweet(plurk, user) if not response.get('has_more'): break offset = response.get('last_offset')
def download(self): if len(unit) == 0: unit = self.chps else: nunit = list() for idd in unit: nunit.append(self.chps[idd]) unit = nunit for idx in unit: fname = idx[0] + '_' + self.fec + '.txt' if os.path.exists(fname): continue print 'Downloading', idx[2], '>>> ', fname f = open(fname,'w') chplink = self.SRC_URL + idx[1] self.response = urllib2.urlopen(chplink) processed = self.dosomething(self.response.read()) chpsp = bs(processed) # format output by sfacg f.write(jtof(chpsp.body.text).encode(self.fec,"ignore")) f.close() return
def segmentToListPerQuery(queryString): listPerQuery = [] segedList = [] out1 = re.sub("[a-zA-Z]+", "", queryString) out1 = re.sub("[%s]" % re.escape(string.punctuation), "", out1) # segString = pseg.cut(queryString.decode("utf-8")) dd = jianfan.ftoj(out1).encode("utf-8") segString = pseg.cut(dd) # segString = pseg.cut(queryString.decode("utf-8")) # segString = jieba.cut(queryString,cut_all=False) # print ".. ".join(segString) # for i in segString: # listPerQuery.append(i) for z in segString: # print z.word + "\n" # if z.flag == "n" or z.flag == "ns" or z.flag == "v" or z.flag == "t" or z.flag == "a" or z.flag == "nr" or z.flag == "nz" or z.flag == "i" or z.flag == "m": if z.flag != "x": # segedList.append(z.word.encode("utf-8")) dd = jianfan.jtof(z.word).encode("utf-8") # segedList.append(dd) segedList.append(z.word) return segedList
def segmentToListPerQuery(queryString): listPerQuery = [] segedList = [] out1 = re.sub('[a-zA-Z]+', '', queryString) out1 = re.sub('[%s]' % re.escape(string.punctuation), '', out1) #segString = pseg.cut(queryString.decode("utf-8")) dd = jianfan.ftoj(out1).encode("utf-8") segString = pseg.cut(dd) #segString = pseg.cut(queryString.decode("utf-8")) #segString = jieba.cut(queryString,cut_all=False) #print ".. ".join(segString) #for i in segString: # listPerQuery.append(i) for z in segString: #print z.word + "\n" #if z.flag == "n" or z.flag == "ns" or z.flag == "v" or z.flag == "t" or z.flag == "a" or z.flag == "nr" or z.flag == "nz" or z.flag == "i" or z.flag == "m": if z.flag != "x": #segedList.append(z.word.encode("utf-8")) dd = jianfan.jtof(z.word).encode("utf-8") #segedList.append(dd) segedList.append(z.word) return segedList
def translate_word(word): dflt_params = conn2Site("dict-co.iciba.com","/api/dictionary.php?w=",word) root = etree.fromstring("%s" % dflt_params, parser=etree.XMLParser(recover=True)) if type(root) == type(None): return -1 if len(root.getchildren()) == 1 : dflt_params = conn2Site("dict.qq.com","/dict?q=",word) json_data = json.loads(dflt_params) if 'err' in json_data: return else: if 'netdes' in json_data: for ln in json_data['netdes']: for sub_ln in ln['des']: try: if "mf" in sub_ln: # print("mf") print str(sub_ln['mf']+" "+jtof(sub_ln['d'])).replace('"','') else: print(sub_ln['d']) except : pass if 'netsen' in json_data: for ln in json_data['netsen']: continue print ln['cs'] print re.sub('<[^<]+?>', '', ln['es'])#escape html tag # for k,v in ln.iteritems(): # print k # print unicode(ln,'utf-8') return child = root.getchildren() for node in child: tag = node.tag val = cvt_codec(node) if "key"==tag: print("") if len(val.split()) >=2 : stdout.write( "句子/片語:"+str(val)+"\r\n") elif len(val.split()) == 1 : stdout.write( "單字:"+str(val)+"\r\n") else : print("Error") if "pos"==tag: stdout.write( "(%s)"%val) if "acceptation"==tag: stdout.write(val) if "sent" == tag: got_sentence(node)
mainpage = "http://www.3d66.com/model.html" driver.get(mainpage) # Main page sub_types =[] main_types = driver.find_elements_by_xpath("//div[@class='class_para']") #info_file = "f:/3dbb_links.txt" #file_object = open(info_file, "w") for main_type in main_types: main_type_name = main_type.find_element_by_xpath("./p/a") sub_type_names = main_type.find_elements_by_xpath("./span/a") for sub_type_name in sub_type_names: sub_type_link = sub_type_name.get_attribute('href') mt_name = jianfan.jtof(main_type_name.text).replace(u'3d模型','') st_name = jianfan.jtof(sub_type_name.text) sub_types.append([mt_name, st_name, sub_type_link]) #info_content = (mt_name + "," + st_name + "," + sub_type_link + "\n").encode('utf-8') #file_object.write(info_content) #file_object.close() saved = sub_types ''' x = 0 for save in saved: x = x + 1 print save[1], x ''' # main type, sub type, link
import os import sys sys.path.append('j2f') import charsets from jianfan import jtof fp_1 = open('ESOcast72_s.srt','r') fc_1 = fp_1.read() fp_1.close() fc_c = jtof(fc_1) fc_2 = "" # for cc_c in fc_c: # fc_2 += unicode(cc_c).encode('utf-8') fc_2 = fc_c fn_2 = 'ESOcast72_t.srt' fp_2 = open(fn_2,'w') # print >> fp_2, fc_2 fp_2.write(fc_2) fp_2.close()
name = name.string for character in name: if (u'\u4e00' <= character <= u'\u9fff'): find = 1 break if find == 1: for character in name: if (u'\u3041' <= character <= u'\u309C' or u'\uAC00' <= character <= u'\uCB4C'): find = 0 break name = name.encode('utf8') name_cht = jianfan.jtof(name) if find == 0: name_cht = 'No cht name' player_min = soup.find('minplayers').string player_max = soup.find('maxplayers').string game_time = soup.find('maxplaytime').string publish_year = soup.find('yearpublished').string if soup.find('image') != None: imgsrc = soup.find('image').string imgsrc = imgsrc.split('/')[4] else: imgsrc = 0
def htmlParser(tPage): opener = urllib2.build_opener() opener.addheaders = [('User-agent','Mozilla/5.0')] resp = opener.open(tPage) if resp.code == 200 : data = resp.read() resp.close() elif resp.code == 404 : print "page do not exist" exit() elif resp.code == 403 : print "Forbidden!" exit() else : print "can not open page" exit() parser = etree.HTMLParser() tree = etree.parse(StringIO(data), parser) #etree.strip_tags(tree,'a') #etree.strip_tags(tree,'dt') #etree.strip_tags(tree,'dd') #etree.strip_tags(tree,'dl') result = etree.tostring(tree.getroot(), pretty_print=True, method="html", encoding='UTF-8') #DB(1, result) encoding = chardet.detect(result) #print encoding targetURL = "" lineSum = 0 os.system('clear') resultSet = [] #|class|howmany| arrangedSetA = [] classSet = re.findall('class="allpage fs16">([^<]+)<',result) for className in classSet : if className is not None: #print className arrangedSetA.append([className,0]) howmanySet = re.findall('class="schnum">([^<]+)<',result) iCnt = 0 for howmany in howmanySet : if howmany is not None: #print howmany numbers = re.findall('([0-9])',howmany) #for number in numbers: #print number arrangedSetA[iCnt][1] = numbers[1] iCnt += 1 #print arrangedSetA #dtSet = tree.xpath("//dl[@class='allList']") #print len(dtSet) #|title|href| arrangedSetB = [] aSet = tree.xpath("//dl[@class='allList']//dt/a") for e in aSet: if e.text is not None: arrangedSetB.append([e.text,e.get('href')]) DB(1, e.text+"|"+e.get('href')) #|explaination| arrangedSetC = [] ddSet = tree.xpath("//dl[@class='allList']//dd") for e in ddSet: if e.text is not None: arrangedSetC.append(e.text) #print e.text+'\n' accumulation = 0 for e in arrangedSetA: bSChineseWarning = False if e[0] == '\xe6\x97\xa5\xe4\xb8\xad\xe8\xbe\x9e\xe6\x9b\xb8' or e[0] == '\xe4\xb8\xad\xe6\x97\xa5\xe8\xbe\x9e\xe6\x9b\xb8': bSChineseWarning = True print clrTx(e[0]+'\n','BLUE') for idx in range(int(e[1])): print clrTx(arrangedSetB[accumulation][0],'YELLOW')+clrTx(' > input('+str(accumulation)+')for more detail\n','GREY30') if bSChineseWarning == True: print jianfan.jtof(arrangedSetC[accumulation])[0] else : print ripSentence(arrangedSetC[accumulation]) #print tokenizedRomaji(ripSentence(arrangedSetC[accumulation])) accumulation+=1 num = raw_input() iIn = parseInt(num) #[]==user can input detail url futher by any pause if iIn is not None: #print 'go %s'%(aSet[1].get('href')) process = Popen(['python',INSFOLDER+'/gooDetailA.py','http://dictionary.goo.ne.jp'+aSet[iIn].get('href'),INSFOLDER,'1']) process.wait() break #for console user experience #myList = tree.xpath("//div[@class='allResultList']") #resultSet = handler(myList) return resultSet
#-*- encoding:utf-8 -*- from sys import argv from jianfan import jtof, ftoj f = open(argv[1]) s = (("".join([w.strip() for w in f.readlines()])).decode('utf-8')).split(" ") f2 = open(argv[2]) lines = f2.readlines() print "result:" i = 0 for line in lines: if line[0] != " " and line[0] != "<": line2 = line.strip() if len(line2) > 1: result = s[i]+jtof(line2.decode('utf-8')[1:].strip()) print result.replace(" ","") i+=1