def get_all_files(root_path, rel_path, recursive, pdf): """ Returns the list of the relative path of non-binary files """ file_list = [] path = root_path + '/' + rel_path list = os.listdir(path) for entry in list: abs_path = os.path.normpath(path+"/"+entry) if os.path.isdir(abs_path) and recursive: debug('Sub-directory='+abs_path) file_list.extend(get_all_files(root_path, rel_path+'/'+entry, recursive, pdf)) elif os.path.isfile(abs_path): file_list.append(rel_path+'/'+entry) return file_list
def get_all_files(root_path, rel_path, recursive, pdf): """ Returns the list of the relative path of non-binary files """ file_list = [] path = root_path + '/' + rel_path list = os.listdir(path) for entry in list: abs_path = os.path.normpath(path + "/" + entry) if os.path.isdir(abs_path) and recursive: debug('Sub-directory=' + abs_path) file_list.extend( get_all_files(root_path, rel_path + '/' + entry, recursive, pdf)) elif os.path.isfile(abs_path): file_list.append(rel_path + '/' + entry) return file_list
def get_len(path): """ Get a the lenght of the list stored into a the file specified in path. On entry: path (string) is the path of the file which is stored the list; On exit: the lenght of the list (int). """ if not isinstance(path, str): raise AttributeError('The path:'+str(path)+' is not a string.') if os.path.isdir(path): raise DirectoryPathError('The element specified by the path '+path+' is a directory.') debug("Path: "+path) if os.path.exists(path): l = pickle.load(open(path,'rb')) else: l=[] return len(l)
def append_entry(path, entry): """ Append an entry in a list and store the list in a file. """ if not isinstance(path, str): raise AttributeError('The path:'+str(path)+' is not a string.') if os.path.isdir(path): raise DirectoryPathError('The element specified by the path '+path+' is a directory.') if os.path.exists(path): l = pickle.load(open(path,'rb')) else: l=[] l.append(entry) pickle.dump(l,open(path,'wb')) debug("added: "+str(entry)) pass
def get_len(path): """ Get a the lenght of the list stored into a the file specified in path. On entry: path (string) is the path of the file which is stored the list; On exit: the lenght of the list (int). """ if not isinstance(path, str): raise AttributeError('The path:' + str(path) + ' is not a string.') if os.path.isdir(path): raise DirectoryPathError('The element specified by the path ' + path + ' is a directory.') debug("Path: " + path) if os.path.exists(path): l = pickle.load(open(path, 'rb')) else: l = [] return len(l)
def append_entry(path, entry): """ Append an entry in a list and store the list in a file. """ if not isinstance(path, str): raise AttributeError('The path:' + str(path) + ' is not a string.') if os.path.isdir(path): raise DirectoryPathError('The element specified by the path ' + path + ' is a directory.') if os.path.exists(path): l = pickle.load(open(path, 'rb')) else: l = [] l.append(entry) pickle.dump(l, open(path, 'wb')) debug("added: " + str(entry)) pass
def remove_entry(path,index): """ Remove an entry in a list and store the list in a file. """ if not isinstance(path, str): raise AttributeError('The path:'+str(path)+' is not a string.') if os.path.isdir(path): raise DirectoryPathError('The element specified by the path '+path+' is a directory.') if os.path.exists(path): l = pickle.load(open(path,'rb')) if l is None or len(l)==0: return if index<0 or index>len(l)-1: raise IndexError('Index '+str(index)+' is out of the range.') d = l[index] l.remove(d) pickle.dump(l,open(path,'wb')) debug("removed: "+str(d)) pass
def __init__(self, prop_path, use_sessions=False): """ prop_path: path toward the .conf file use_sessions means that the property file is composed by several files contained into a directory in prop_path. """ if not os.path.exists(prop_path): raise IOError('The property file '+prop_path+' doesn\'t exist.') if not os.path.isfile(prop_path): raise IOError('The property file '+prop_path+' must be a file.') exec(open(prop_path).read(), {}, self.__conf_dict) debug(self.__conf_dict) if use_sessions: if prop_path.find('.')==-1: conf_dir_path = prop_path + '.d' else: conf_dir_path = '.'.join(prop_path.split('.')[0:-1]) + '.d' if os.path.exists(conf_dir_path): # Scan all the files inside entries = os.listdir(conf_dir_path) for e in entries: if os.path.isfile(conf_dir_path +'/'+ e) and e[0]!='.': debug('config file='+e) session_dict = {} exec(open(conf_dir_path+'/'+e).read(), {}, session_dict) debug(session_dict) self.__sessions[e] = session_dict
def analyze_files(path, file_list, keyword, recursive=True, case_sensitive=False, whole_words=False, pdf=False): dict_out = {} for entry in file_list: debug('Entry File: '+entry) abs_path = path+'/'+entry try: if entry.split('.')[-1]=='pdf': debug('Checking the pdf file '+entry+'...') out = subprocess.check_output(['pdftotext', abs_path,'-']) #(st, out) = commands.getstatusoutput('pdftotext "'+entry+'" -') file = io.StringIO(out.decode()) elif os.access(abs_path, os.R_OK): file = open(abs_path,'r') else: continue list_lines = [] for num, line in enumerate(file): if __contains(keyword, line, whole_words, case_sensitive): list_lines.append((num+1, line)) if len(list_lines)!=0: dict_out[entry] = list_lines except UnicodeDecodeError: debug(entry+' doesn\'t have a UTF8 encoding.') continue return dict_out
def __init__(self, prop_path, use_sessions=False): """ prop_path: path toward the .conf file use_sessions means that the property file is composed by several files contained into a directory in prop_path. """ if not os.path.exists(prop_path): raise IOError('The property file ' + prop_path + ' doesn\'t exist.') if not os.path.isfile(prop_path): raise IOError('The property file ' + prop_path + ' must be a file.') exec(open(prop_path).read(), {}, self.__conf_dict) debug(self.__conf_dict) if use_sessions: if prop_path.find('.') == -1: conf_dir_path = prop_path + '.d' else: conf_dir_path = '.'.join(prop_path.split('.')[0:-1]) + '.d' if os.path.exists(conf_dir_path): # Scan all the files inside entries = os.listdir(conf_dir_path) for e in entries: if os.path.isfile(conf_dir_path + '/' + e) and e[0] != '.': debug('config file=' + e) session_dict = {} exec( open(conf_dir_path + '/' + e).read(), {}, session_dict) debug(session_dict) self.__sessions[e] = session_dict
def analyze_files(path, file_list, keyword, recursive=True, case_sensitive=False, whole_words=False, pdf=False): dict_out = {} for entry in file_list: debug('Entry File: ' + entry) abs_path = path + '/' + entry try: if entry.split('.')[-1] == 'pdf': debug('Checking the pdf file ' + entry + '...') out = subprocess.check_output(['pdftotext', abs_path, '-']) #(st, out) = commands.getstatusoutput('pdftotext "'+entry+'" -') file = io.StringIO(out.decode()) elif os.access(abs_path, os.R_OK): file = open(abs_path, 'r') else: continue list_lines = [] for num, line in enumerate(file): if __contains(keyword, line, whole_words, case_sensitive): list_lines.append((num + 1, line)) if len(list_lines) != 0: dict_out[entry] = list_lines except UnicodeDecodeError: debug(entry + ' doesn\'t have a UTF8 encoding.') continue return dict_out
def remove_entry(path, index): """ Remove an entry in a list and store the list in a file. """ if not isinstance(path, str): raise AttributeError('The path:' + str(path) + ' is not a string.') if os.path.isdir(path): raise DirectoryPathError('The element specified by the path ' + path + ' is a directory.') if os.path.exists(path): l = pickle.load(open(path, 'rb')) if l is None or len(l) == 0: return if index < 0 or index > len(l) - 1: raise IndexError('Index ' + str(index) + ' is out of the range.') d = l[index] l.remove(d) pickle.dump(l, open(path, 'wb')) debug("removed: " + str(d)) pass