def __init__(self, filename): dfile = filename.split('.')[0] + '.pkl' try: dump = self.loadfile(dfile) self.bag_of_word = dump['bag_of_word'] self.bag_of_index = dump['bag_of_index'] self.markov_matrix = np.array(dump['markov_matrix']) except OSError as e: self.bag_of_word = {} self.bag_of_index = {} self.markov_matrix = np.array([[0]]) index = 0 try: with open(filename, 'r') as file: print( "log: this would show up if its a new file. A markov chain will be created and saved" ) low = (' '.join(file.read().splitlines())).split(' ') progress = ProgressBar(len(low) - 1, fmt=ProgressBar.FULL) for i in range(progress.total): progress.current += 1 progress() if self.bag_of_word.setdefault(low[i], index) == index: self.bag_of_index[index] = low[i] self.markov_matrix = np.pad(self.markov_matrix, [(0, self.max(index)), (0, self.max(index))], mode='constant') index += 1 if self.bag_of_word.setdefault(low[i + 1], index) == index: self.bag_of_index[index] = low[i + 1] self.markov_matrix = np.pad(self.markov_matrix, [(0, self.max(index)), (0, self.max(index))], mode='constant') index += 1 self.markov_matrix[self.bag_of_word[low[i]]][ self.bag_of_word[low[i + 1]]] += 1 progress.done() s = np.sum(self.markov_matrix, axis=1)[:, np.newaxis] s[s == 0] = 1 self.markov_matrix = self.markov_matrix / s self.markov_matrix[self.markov_matrix.shape[0] - 1][0] = 1 dump = {} dump['bag_of_word'] = self.bag_of_word dump['bag_of_index'] = self.bag_of_index dump['markov_matrix'] = self.markov_matrix self.savefile(dump, dfile) del (dump) print("log:chain for ", filename, " is created") except OSError as e: print("file not found !") exit()
def download_images(uri_list, download_location, retain_original_naming=True): num = len(uri_list) progress = ProgressBar(num, fmt=ProgressBar.FULL) for i, src in enumerate(uri_list): ############################### DO WORK HERE ########################### try: img_data = urlopen(src).read() if len(img_data) > 0: #Read Success filename = basename(urlsplit(src)[2]).strip() if not retain_original_naming: filetype = filename.split('.')[-1] filename = str(i + 1) + '.' + filetype output = open(os.path.join(download_location, filename), 'wb') output.write(img_data) output.close() except Exception as e: log_error(e) ############################### END OF WORK #$########################## progress.current += 1 progress() sleep(0.001) progress.done()
def main(argv): check_dependencies() move_files = False date_regex = None log_fname = None sha_rename = False dir_format = os.path.sep.join(['%Y', '%m', '%d']) try: opts, args = getopt.getopt(argv[2:], "d:r:l:mhs", ["date=", "regex=", "log=", "move", "help","sha-rename"]) except getopt.GetoptError as e: print(e) help_info() for opt, arg in opts: if opt in ("-h", "--help"): print('Printing help:') help_info() if opt in ("-d", "--date"): if not arg: print('Date format cannot be empty') sys.exit(0) dir_format = parse_date_format(arg) if opt in ("-l", "--log"): if not arg: print("log file name can't be empty") sys.exit(0) log_fname = arg if opt in ("-m", "--move"): move_files = True print('Using move strategy!') if opt in ("-s", "--sha-rename"): sha_rename = True print('Renaming files to SHA256 name!') if opt in ("-r", "--regex"): try: date_regex = re.compile(arg) except: error("Provided regex is invalid!") if len(argv) < 2: print('ERROR: Number of arguments are less than 2') print(argv) help_info() inputdir = os.path.expanduser(argv[0]) outputdir = os.path.expanduser(argv[1]) if not os.path.isdir(inputdir) or not os.path.exists(inputdir): error('Input directory "%s" does not exist or cannot be accessed' % inputdir) if not os.path.exists(outputdir): print('Output directory "%s" does not exist, creating now' % outputdir) try: os.makedirs(outputdir) except Exception: print('Cannot create output directory. No write access!') sys.exit(0) if log_fname: logging.basicConfig(filename=log_fname, level='DEBUG') else: logging.basicConfig(level='CRITICAL') ignored_files = ('.DS_Store', 'Thumbs.db') error_list = list() count = dict(copied=0, moved=0, duplicate=0, error=0, other=0 ) total_count=0 for _, _, files in os.walk(inputdir): total_count += len(files) bar = ProgressBar(total_count, count.keys()) for root, _, files in os.walk(inputdir): for filename in files: try: if filename in ignored_files: continue if not sha_rename: status = handle_file(os.path.join(root, filename), outputdir, dir_format, move_files, date_regex) else: status = handle_file2(os.path.join(root, filename), outputdir, dir_format, move_files, date_regex) count[status] += 1 bar.increment(status) except KeyboardInterrupt: print('\n Exiting...') print_summary(count) sys.exit(0) except Exception as e : logging.error('Error skipping %s:%s' % (filename,repr(e))) error_list.append(os.path.join(root, filename)) count['error'] += 1 bar.increment('error') bar.done() logging.info('===Files with Errors===') for fn in error_list: logging.info(fn) logging.info('===End Files with Errors===') print_summary(count)