def __init__(self, parent=None): super(moviedata, self).__init__(parent) self.setupUi(self) self.setWindowTitle('电影数据') self.radioButton.clicked.connect(self.search) self.cld = cloud() self.rac = roleactor() self.box = boxchange() self.qua = quater() self.mon = month() self.pushButton.clicked.connect(self.cloud) self.pushButton_2.clicked.connect(self.roleactor) self.pushButton_3.clicked.connect(self.boxchange) self.pushButton_4.clicked.connect(self.quater) self.pushButton_5.clicked.connect(self.month)
def pre_WHT1(url): """Prints Text Output for a given URL from Whitehouse Speeches and Remarks""" import urllib2, sys, random import os from bs4 import BeautifulSoup soup = BeautifulSoup(urllib2.urlopen(url).read()) # Get URL url2 = "Cite: \n" + url + "\n" # Get Date Date = soup.find("div", {"class": "date"}) raw_date = Date.get_text() date = raw_date.replace(' ', '', 12) # Get Release Release = soup.find("div", {"class": "release"}) raw_release = Release.get_text() release = raw_release.replace(' ', '', 12) + "\n\n" # Get Title Title = soup.find("h1", {"property": "dc:title"}) title = Title.get_text() # Get Paragraph Body content = soup.find("div", {"id": "content"}) paragraph = ["".join(x.findAll(text=True)) for x in content.findAll("p")] paragraph_body = "\n\n%s" % ("\n\n".join(paragraph)) #Get File ID - Date & Time #Date - RAW date_split = date.split(' ') month_raw = date_split[0] day_raw = date_split[1] year_raw = date_split[2] #MonthID month_clean1 = month_raw.replace(' ', '') month_clean2 = month_clean1.replace('\n', '') try: month_id = month(month_clean2) except: month_id = month_clean2 #DayID day_clean1 = day_raw.replace(',', '') day_clean2 = day_clean1.replace(' ', '') day_clean3 = day_clean2.replace('\n', '') day_id = day_clean3 #YearID year_clean1 = year_raw.replace(' ', '') year_clean2 = year_clean1.replace('\n', '') year_id = year_clean2 #Final DateID date_id = year_id + '-' + month_id + '-' + day_id #Random ID randID1 = str(random.randrange(6, 10000, 1)) randID2 = str(random.randrange(6, 10000, 1)) try: path1 = date_id + "_" + "ID1" + ".txt" path2 = date_id + "_" + "ID2" + ".txt" path3 = date_id + "_" + "ID3" + ".txt" path4 = date_id + "_" + "ID4" + ".txt" path5 = date_id + "_" + "ID5" + ".txt" if os.path.isfile(path1) == False: #print "no file ID1 found, create ID1" f = open(date_id + "_" + "ID1" + ".txt", 'w') f.write(url2.encode('utf-8')) f.write(date.encode('utf-8')) f.write(release.encode('utf-8')) f.write(title.encode('utf-8')) f.write(paragraph_body.encode('utf-8')) f.close open_call1 = "open " + path1 subprocess.call(open_call1, shell=True) return elif os.path.isfile(path1) == True: #print "found file ID1, check for ID2" if os.path.isfile(path2) == False: print "found ID1, no file ID2 found, make ID2" f = open(date_id + "_" + "ID2" + ".txt", 'w') f.write(url2.encode('utf-8')) f.write(date.encode('utf-8')) f.write(release.encode('utf-8')) f.write(title.encode('utf-8')) f.write(paragraph_body.encode('utf-8')) f.close open_call2 = "open " + path2 subprocess.call(open_call2, shell=True) return elif os.path.isfile(path2) == True: #print "found file ID2, check for ID3" if os.path.isfile(path3) == False: print "found IDs 1-2, no file ID3 found, make ID3" f = open(date_id + "_" + "ID3" + ".txt", 'w') f.write(url2.encode('utf-8')) f.write(date.encode('utf-8')) f.write(release.encode('utf-8')) f.write(title.encode('utf-8')) f.write(paragraph_body.encode('utf-8')) f.close open_call3 = "open " + path3 subprocess.call(open_call3, shell=True) return elif os.path.isfile(path3) == True: #print "found file ID3, check for ID4" if os.path.isfile(path4) == False: print "found IDs 1-3, no file ID4 found, make ID4" f = open(date_id + "_" + "ID4" + ".txt", 'w') f.write(url2.encode('utf-8')) f.write(date.encode('utf-8')) f.write(release.encode('utf-8')) f.write(title.encode('utf-8')) f.write(paragraph_body.encode('utf-8')) f.close open_call4 = "open " + path4 subprocess.call(open_call4, shell=True) return elif os.path.isfile(path4) == True: #print "found file ID4, check for ID5" if os.path.isfile(path5) == False: print "found IDs 1-4, no file ID5 found, make ID5" f = open(date_id + "_" + "ID5" + ".txt", 'w') f.write(url2.encode('utf-8')) f.write(date.encode('utf-8')) f.write(release.encode('utf-8')) f.write(title.encode('utf-8')) f.write(paragraph_body.encode('utf-8')) f.close open_call5 = "open " + path5 subprocess.call(open_call5, shell=True) return elif os.path.isfile(path5) == True: print "found IDs 1-5, create random ID" f = open( date_id + "_" + "ID" + randID1 + "-" + randID2 + ".txt", 'w') f.write(url2.encode('utf-8')) f.write(date.encode('utf-8')) f.write(release.encode('utf-8')) f.write(title.encode('utf-8')) f.write(paragraph_body.encode('utf-8')) f.close return finally: pass
def pre_WHT3(url): """Prints Text Output for a given URL from Whitehouse Speeches and Remarks""" import urllib2,sys, random import os from bs4 import BeautifulSoup soup = BeautifulSoup(urllib2.urlopen(url).read()) # Get URL url2 = "Cite: \n"+url+"\n" # Get Date Date = soup.find("meta", {"property":"article:published_time"}) raw_date = str(Date).split('"', 2)[1][0:10] date = '\n'+raw_date+'\n' date_reform = parse(str(date)).strftime('%B %d, %Y') date_reformat = '\n'+date_reform+'\n' # Get Release Release = soup.find("meta", {"property":"og:title"}) raw_release = str(Release) release = '\n'+'The White House''\n''Office of the Press Secretary'+'\n'+'\n'+'For Immediate Release'+date_reformat # Get Title Title = soup.find("meta", {"property":"og:title"}) raw_title = str(Title).split('"', 2)[1] title = '\n'+raw_title+'\n' #Get File ID - Date & Time #Date - RAW date_split = date.split('-') month_raw = date_split[1] day_raw = date_split[2] year_raw = date_split[0] #MonthID month_clean1 = month_raw.replace(' ', '') month_clean2 = month_clean1.replace('\n', '') try: month_id = month(month_clean2) except: month_id = month_clean2 #DayID day_clean1 = day_raw.replace(',', '') day_clean2 = day_clean1.replace(' ', '') day_clean3 = day_clean2.replace('\n', '') day_id = day_clean3 #YearID year_clean1 = year_raw.replace(' ', '') year_clean2 = year_clean1.replace('\n', '') year_id = year_clean2 #Final DateID date_id = year_id+'-'+month_id+'-'+day_id #Define Content content = soup.find("div", {"class":"field-items"}) #Get Paragraph1p paragraph1p = ["".join(x.findAll(text=True)) for x in content.findAll("p")] paragraph_body1p = "\n\n%s" % ("\n\n".join(paragraph1p)) #Get Paragraph1div paragraph1div = ["".join(x.findAll(text=True)) for x in content.findAll("div")] paragraph_body1div = "\n\n%s" % ("\n\n".join(paragraph1div)) # Get Paragraph2 paragraph2 = ["".join(x.findAll(text=True)) for x in content.findAll("div", {"class":"legacy-para"})] paragraph_body2lp = "\n\n%s" % ("\n\n".join(paragraph2)) # Get Paragraph2p paragraph2p = ["".join(x.findAll(text=True)) for x in content.findAll("p")] # Test ID - Div - Legacy Para test_2 = paragraph_body2lp.replace(' ', '').replace('\n', '') # Test ID - 1p test_1p = paragraph_body1p.replace(' ', '').replace('\n', '') # Test ID - 1div test_1div = paragraph_body1div.replace(' ', '').replace('\n', '') try: if len(test_2) < 400: #print "paragraph body 2 short" paragraph_body2 = "\n\n%s" % ("\n\n".join(paragraph2p)) paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1div)) elif len(test_2) > 400: paragraph_body2 = "\n\n%s" % ("\n\n".join(paragraph2)) paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1div)) elif test_2 == '': print "paragraph body 2 empty" if len(test_1p) < 400: if test_1div != '': paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1div)) paragraph_body2 = "\n\n%s" % ("\n\n".join(paragraph2p)) else: paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1div)) paragraph_body2 = "\n\n%s" % ("\n\n".join(paragraph2p)) elif len(test_1p) > 400: paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1p)) paragraph_body2 = "\n\n%s" % ("\n\n".join(paragraph2p)) else: print "else" paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1div)) paragraph_body2 = "\n\n%s" % ("\n\n".join(paragraph2p)) except: print "except" paragraph_body2 = "\n\n%s" % ("\n\n".join(paragraph2p)) # Get Paragraph_Body1 if test_1p == '': if test_1div != '': paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1div)) else: print "paragraph_body1p and paragraph_body1div empty" paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1div)) elif test_1p != '': paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1p)) else: paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1div)) print "paragraph_body1 empty" # Perform Quality Check on Parsed Speech speech_parser_two_para_QC(url, paragraph_body1, paragraph_body2) #Random ID randID1 = str(random.randrange(6, 10000, 1)) randID2 = str(random.randrange(6, 10000, 1)) try: path1 = date_id+"_"+"ID1"+".txt" path2 = date_id+"_"+"ID2"+".txt" path3 = date_id+"_"+"ID3"+".txt" path4 = date_id+"_"+"ID4"+".txt" path5 = date_id+"_"+"ID5"+".txt" if os.path.isfile(path1) == False: f = open(date_id+"_"+"ID1"+".txt", 'w') f.write(url2.encode('utf-8')) f.write(paragraph_body1.encode('utf-8')) f.write(paragraph_body2.encode('utf-8')) f.close return elif os.path.isfile(path1) == True: if os.path.isfile(path2) == False: print "found ID1, no file ID2 found, make ID2" f = open(date_id+"_"+"ID2"+".txt", 'w') f.write(url2.encode('utf-8')) f.write(paragraph_body1.encode('utf-8')) f.write(paragraph_body2.encode('utf-8')) f.close return elif os.path.isfile(path2) == True: if os.path.isfile(path3) == False: print "found IDs 1-2, no file ID3 found, make ID3" f = open(date_id+"_"+"ID3"+".txt", 'w') f.write(url2.encode('utf-8')) f.write(paragraph_body1.encode('utf-8')) f.write(paragraph_body2.encode('utf-8')) f.close return elif os.path.isfile(path3) == True: if os.path.isfile(path4) == False: print "found IDs 1-3, no file ID4 found, make ID4" f = open(date_id+"_"+"ID4"+".txt", 'w') f.write(url2.encode('utf-8')) f.write(paragraph_body1.encode('utf-8')) f.write(paragraph_body2.encode('utf-8')) f.close return elif os.path.isfile(path4) == True: if os.path.isfile(path5) == False: print "found IDs 1-4, no file ID5 found, make ID5" f = open(date_id+"_"+"ID5"+".txt", 'w') f.write(url2.encode('utf-8')) f.write(paragraph_body1.encode('utf-8')) f.write(paragraph_body2.encode('utf-8')) f.close return elif os.path.isfile(path5) == True: print "found IDs 1-5, create random ID" f = open(date_id+"_"+"ID"+randID1+"-"+randID2+".txt", 'w') f.write(url2.encode('utf-8')) f.write(paragraph_body1.encode('utf-8')) f.write(paragraph_body2.encode('utf-8')) f.close return finally: pass
def pre_WHT3(url): """Prints Text Output for a given URL from Whitehouse Speeches and Remarks""" import urllib2, sys, random import os from bs4 import BeautifulSoup soup = BeautifulSoup(urllib2.urlopen(url).read()) # Get URL url2 = "Cite: \n" + url + "\n" try: # Get Paragraph2 content2 = soup.find("div", {"class": "legacy-content"}) paragraph2 = [ "".join(x.findAll(text=True)) for x in content2.findAll("div") ] #Pargraph Body2 paragraph_body2 = "\n\n%s" % ("\n\n".join(paragraph2)) # Test ID test_raw = paragraph_body2 test1 = test_raw.replace(' ', '') test_id = test1.replace('\n', '') if test_id == '': print "paragraph body 2 empty" content1 = soup.find("div", {"id": "content"}) paragraph1 = [ "".join(x.findAll(text=True)) for x in content1.findAll("p") ] paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1)) try: date_block_raw = paragraph_body2[0:200] date_block_clean1 = date_block_raw.replace('_', '') date_block_clean2 = date_block_clean1.replace('\n', ' ') date_block_clean3 = date_block_clean2.replace(u'\xa0', '') date_block_clean4 = date_block_clean3.replace(' ', '', 16) #Date - RAW date_split = date_block_clean4.split(' ') month_raw = date_split[3] day_raw = date_split[4] year_raw = date_split[5] #MonthID month_clean1 = month_raw.replace(' ', '') month_clean2 = month_clean1.replace('\n', '') try: month_id = month(month_clean2) except: month_id = month_clean2 #DayID day_clean1 = day_raw.replace(',', '') day_clean2 = day_clean1.replace(' ', '') day_clean3 = day_clean2.replace('\n', '') day_id = day_clean3 #YearID year_clean1 = year_raw.replace(' ', '') year_clean2 = year_clean1.replace('\n', '') year_id = year_clean2 #Final DateID date_id = year_id + '-' + month_id + '-' + day_id except: #DateID date_id = "2009-2010" + '-' + url[60:75] pass elif len(test_id) < 400: print "paragraph body 2 not correct" content1 = soup.find("div", {"id": "content"}) paragraph1 = [ "".join(x.findAll(text=True)) for x in content1.findAll("p") ] paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1)) try: date_block_raw = paragraph_body2[0:200] date_block_clean1 = date_block_raw.replace('_', '') date_block_clean2 = date_block_clean1.replace('\n', ' ') date_block_clean3 = date_block_clean2.replace(u'\xa0', '') date_block_clean4 = date_block_clean3.replace(' ', '', 16) #Date - RAW date_split = date_block_clean4.split(' ') month_raw = date_split[3] day_raw = date_split[4] year_raw = date_split[5] #MonthID month_clean1 = month_raw.replace(' ', '') month_clean2 = month_clean1.replace('\n', '') try: month_id = month(month_clean2) except: month_id = month_clean2 #DayID day_clean1 = day_raw.replace(',', '') day_clean2 = day_clean1.replace(' ', '') day_clean3 = day_clean2.replace('\n', '') day_id = day_clean3 #YearID year_clean1 = year_raw.replace(' ', '') year_clean2 = year_clean1.replace('\n', '') year_id = year_clean2 #Final DateID date_id = year_id + '-' + month_id + '-' + day_id except: #DateID date_id = "2009-2010" + '-' + url[60:75] pass else: print "else" paragraph_body1 = ' ' try: date_block_raw = paragraph_body2[0:200] date_block_clean1 = date_block_raw.replace('_', '') date_block_clean2 = date_block_clean1.replace('\n', ' ') date_block_clean3 = date_block_clean2.replace(u'\xa0', '') date_block_clean4 = date_block_clean3.replace(' ', '', 16) #Date - RAW date_split = date_block_clean4.split(' ') month_raw = date_split[3] day_raw = date_split[4] year_raw = date_split[5] #MonthID month_clean1 = month_raw.replace(' ', '') month_clean2 = month_clean1.replace('\n', '') try: month_id = month(month_clean2) except: month_id = month_clean2 #DayID day_clean1 = day_raw.replace(',', '') day_clean2 = day_clean1.replace(' ', '') day_clean3 = day_clean2.replace('\n', '') day_id = day_clean3 #YearID year_clean1 = year_raw.replace(' ', '') year_clean2 = year_clean1.replace('\n', '') year_id = year_clean2 #Final DateID date_id = year_id + '-' + month_id + '-' + day_id except: #DateID date_id = "2009-2010" + '-' + url[60:75] pass except: # Get Paragraph1 content1 = soup.find("div", {"id": "content"}) paragraph1 = [ "".join(x.findAll(text=True)) for x in content1.findAll("p") ] # Get Paragraph Body1 paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1)) # Test ID test_raw = paragraph_body1 test1 = test_raw.replace(' ', '') test_id = test1.replace('\n', '') if test_id == '': content2 = soup.find("div", {"class": "legacy-content"}) paragraph2 = [ "".join(x.findAll(text=True)) for x in content2.findAll("div") ] paragraph_body2 = "\n\n%s" % ("\n\n".join(paragraph2)) elif len(test_id) < 400: content2 = soup.find("div", {"class": "legacy-content"}) paragraph2 = [ "".join(x.findAll(text=True)) for x in content2.findAll("div") ] paragraph_body2 = "\n\n%s" % ("\n\n".join(paragraph2)) else: paragraph_body2 = ' ' #DateID date_id = "2009-2010" + '-' + url[60:75] #Random ID randID1 = str(random.randrange(6, 10000, 1)) randID2 = str(random.randrange(6, 10000, 1)) try: path1 = date_id + "_" + "ID1" + ".txt" path2 = date_id + "_" + "ID2" + ".txt" path3 = date_id + "_" + "ID3" + ".txt" path4 = date_id + "_" + "ID4" + ".txt" path5 = date_id + "_" + "ID5" + ".txt" if os.path.isfile(path1) == False: #print "no file ID1 found, create ID1" f = open(date_id + "_" + "ID1" + ".txt", 'w') f.write(url2.encode('utf-8')) f.write(paragraph_body1.encode('utf-8')) f.write(paragraph_body2.encode('utf-8')) f.close open_call1 = "open " + path1 subprocess.call(open_call1, shell=True) return elif os.path.isfile(path1) == True: #print "found file ID1, check for ID2" if os.path.isfile(path2) == False: print "found ID1, no file ID2 found, make ID2" f = open(date_id + "_" + "ID2" + ".txt", 'w') f.write(url2.encode('utf-8')) f.write(paragraph_body1.encode('utf-8')) f.write(paragraph_body2.encode('utf-8')) f.close open_call2 = "open " + path2 subprocess.call(open_call2, shell=True) return elif os.path.isfile(path2) == True: #print "found file ID2, check for ID3" if os.path.isfile(path3) == False: print "found IDs 1-2, no file ID3 found, make ID3" f = open(date_id + "_" + "ID3" + ".txt", 'w') f.write(url2.encode('utf-8')) f.write(paragraph_body1.encode('utf-8')) f.write(paragraph_body2.encode('utf-8')) f.close open_call3 = "open " + path3 subprocess.call(open_call3, shell=True) return elif os.path.isfile(path3) == True: #print "found file ID3, check for ID4" if os.path.isfile(path4) == False: print "found IDs 1-3, no file ID4 found, make ID4" f = open(date_id + "_" + "ID4" + ".txt", 'w') f.write(url2.encode('utf-8')) f.write(paragraph_body1.encode('utf-8')) f.write(paragraph_body2.encode('utf-8')) f.close open_call4 = "open " + path4 subprocess.call(open_call4, shell=True) return elif os.path.isfile(path4) == True: #print "found file ID4, check for ID5" if os.path.isfile(path5) == False: print "found IDs 1-4, no file ID5 found, make ID5" f = open(date_id + "_" + "ID5" + ".txt", 'w') f.write(url2.encode('utf-8')) f.write(paragraph_body1.encode('utf-8')) f.write(paragraph_body2.encode('utf-8')) f.close open_call5 = "open " + path5 subprocess.call(open_call5, shell=True) return elif os.path.isfile(path5) == True: print "found IDs 1-5, create random ID" f = open( date_id + "_" + "ID" + randID1 + "-" + randID2 + ".txt", 'w') f.write(url2.encode('utf-8')) f.write(paragraph_body1.encode('utf-8')) f.write(paragraph_body2.encode('utf-8')) f.close return finally: pass
def pre_WHT1(url): """Prints Text Output for a given URL from Whitehouse Speeches and Remarks""" import urllib2,sys, random import os from bs4 import BeautifulSoup soup = BeautifulSoup(urllib2.urlopen(url).read()) # Get URL url2 = "Cite: \n"+url+"\n" # Get Date Date = soup.find("meta", {"property":"article:published_time"}) raw_date = str(Date).split('"', 2)[1][0:10] date = '\n'+raw_date+'\n' date_reform = parse(str(date)).strftime('%B %d, %Y') date_reformat = '\n'+date_reform+'\n' # Get Release Release = soup.find("meta", {"property":"og:title"}) raw_release = str(Release) release = '\n'+'The White House''\n''Office of the Press Secretary'+'\n'+'\n'+'For Immediate Release'+date_reformat # Get Title Title = soup.find("meta", {"property":"og:title"}) raw_title = str(Title).split('"', 2)[1] title = '\n'+raw_title+'\n' # Get Paragraph Body content = soup.find("div", {"class":"field-items"}) paragraph = ["".join(x.findAll(text=True)) for x in content.findAll("p")] paragraph_body = "\n\n%s" % ("\n\n".join(paragraph)) speech_parser_QC(url, paragraph_body, release) #Get File ID - Date & Time #Date - RAW date_split = date.split('-') month_raw = date_split[1] day_raw = date_split[2] year_raw = date_split[0] #MonthID month_clean1 = month_raw.replace(' ', '') month_clean2 = month_clean1.replace('\n', '') try: month_id = month(month_clean2) except: month_id = month_clean2 #DayID day_clean1 = day_raw.replace(',', '') day_clean2 = day_clean1.replace(' ', '') day_clean3 = day_clean2.replace('\n', '') day_id = day_clean3 #YearID year_clean1 = year_raw.replace(' ', '') year_clean2 = year_clean1.replace('\n', '') year_id = year_clean2 #Final DateID date_id = year_id+'-'+month_id+'-'+day_id #Random ID randID1 = str(random.randrange(6, 10000, 1)) randID2 = str(random.randrange(6, 10000, 1)) try: path1 = date_id+"_"+"ID1"+".txt" path2 = date_id+"_"+"ID2"+".txt" path3 = date_id+"_"+"ID3"+".txt" path4 = date_id+"_"+"ID4"+".txt" path5 = date_id+"_"+"ID5"+".txt" if os.path.isfile(path1) == False: #print "no file ID1 found, create ID1" f = open(date_id+"_"+"ID1"+".txt", 'w') f.write(url2.encode('utf-8')) f.write(release.encode('utf-8')) f.write(title.encode('utf-8')) f.write(paragraph_body.encode('utf-8')) f.close return elif os.path.isfile(path1) == True: #print "found file ID1, check for ID2" if os.path.isfile(path2) == False: print "found ID1, no file ID2 found, make ID2" f = open(date_id+"_"+"ID2"+".txt", 'w') f.write(url2.encode('utf-8')) f.write(release.encode('utf-8')) f.write(title.encode('utf-8')) f.write(paragraph_body.encode('utf-8')) f.close return elif os.path.isfile(path2) == True: #print "found file ID2, check for ID3" if os.path.isfile(path3) == False: print "found IDs 1-2, no file ID3 found, make ID3" f = open(date_id+"_"+"ID3"+".txt", 'w') f.write(url2.encode('utf-8')) f.write(release.encode('utf-8')) f.write(title.encode('utf-8')) f.write(paragraph_body.encode('utf-8')) f.close return elif os.path.isfile(path3) == True: #print "found file ID3, check for ID4" if os.path.isfile(path4) == False: print "found IDs 1-3, no file ID4 found, make ID4" f = open(date_id+"_"+"ID4"+".txt", 'w') f.write(url2.encode('utf-8')) f.write(release.encode('utf-8')) f.write(title.encode('utf-8')) f.write(paragraph_body.encode('utf-8')) f.close return elif os.path.isfile(path4) == True: #print "found file ID4, check for ID5" if os.path.isfile(path5) == False: print "found IDs 1-4, no file ID5 found, make ID5" f = open(date_id+"_"+"ID5"+".txt", 'w') f.write(url2.encode('utf-8')) f.write(release.encode('utf-8')) f.write(title.encode('utf-8')) f.write(paragraph_body.encode('utf-8')) f.close return elif os.path.isfile(path5) == True: print "found IDs 1-5, create random ID" f = open(date_id+"_"+"ID"+randID1+"-"+randID2+".txt", 'w') f.write(url2.encode('utf-8')) f.write(release.encode('utf-8')) f.write(title.encode('utf-8')) f.write(paragraph_body.encode('utf-8')) f.close return finally: pass