def pdf_thread(self, url): pdf_name = '' exit_call = '' csv_row = [] # save PDF to disk try: pdf_name = BytesIO( url.split("/")[-1].encode('UTF-8')).read().__str__()[2:-1] valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) regex = re.compile(valid_chars) pdf_name = regex.sub('', pdf_name.__str__()) self.pdf_path = self.document_folder + regex.sub('', pdf_name) r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'}) with open(self.pdf_path, 'wb') as code: code.write(r.content) code.close() csv_row.insert(0, [self.csv_header[0], self.line_count.__str__()]) csv_row.insert( 1, [self.csv_header[1], url if url.__len__() > 0 else 'NULL']) csv_row.insert(2, [ self.csv_header[2], pdf_name if pdf_name.__len__() > 0 else 'NULL' ]) csv_row.insert(3, [ self.csv_header[3], self.pdf_path if self.pdf_path.__len__() > 0 else 'NULL' ]) print(' >>>> PDF START:[' + url + '] ' + self.line_count.__str__() + ' ' + (datetime.datetime.now().__str__()[:-7])) except Exception as e: csv_row.insert(0, [self.csv_header[0], self.line_count.__str__()]) csv_row.insert( 1, [self.csv_header[1], url if url.__len__() > 0 else 'NULL']) csv_row.insert(2, [self.csv_header[2], e.__str__()]) csv_row.insert(3, [ self.csv_header[3], self.pdf_path if self.pdf_path.__len__() > 0 else 'NULL' ]) print(e) pass my_file = os.path.join(self.document_folder + pdf_name) try: fp = open(my_file, 'rb') # self.pdf(fp, csv_row) except Exception as e: print(' PDF LOAD FAILED !!! ' + self.line_count.__str__() + ' : ' + self.pdf_path) csv_row.pop(3) csv_row.insert(3, [ self.csv_header[3], 'PDF FAILED TO OPEN:' + self.pdf_path if self.pdf_path.__len__() > 0 else 'NULL' ]) # Write results row = [] for i in range(csv_row.__len__()): row.append(csv_row[i][1]) report_path = self.report_folder + self.report_name row_append = [ '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '' ] index = 4 for ii in row_append: row.insert(index, ii) index += 1 # OPEN FAILED with open(report_path, 'a', encoding='utf8', newline='') as csv_file: writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL) writer.dialect.lineterminator.replace('\n', '') writer.writerow(row) return try: self.pdf(fp, csv_row) except Exception as e: print('PDF FAIL')