def __init__(self, password, url, pattern): self.password = password self.url = url self.pattern = pattern # 1. Create a ServerProxy object using the serverurl (ar[3]). # 2. Calls the remote server and retrieves a server list. # 3. For each URL on the list, do the following: proxy = ServerProxy(self.url) server_list = proxy.handle_request(utils.gen_pw(self.password, self.url), 0, []) for url in server_list: # 1. Create a ServerProxy object using the URL. # 2. Calls the remote server to return a list of filenames matching the # pattern (ar[4]). # 3. For each filename doesn't exist locally, do the following: proxy = ServerProxy(self.url) remote_list = proxy.handle_request(utils.gen_pw(self.password, self.url), 1, self.pattern) remote_filename_list = remote_list[0][0] remote_dir_list = remote_list[0][1] local_list = utils.ls() local_filename_list = local_list[0] local_dir_list = local_list[1] proxy_obj = ServerProxy(url) for dirname in filter(lambda n: not n in local_dir_list, remote_dir_list): print dirname file_proxy = _file.FileProxy(dirname) file_proxy.make_dir() for filename in filter(lambda n: not n in local_filename_list, remote_filename_list): '''For each filename doesn't exist locally, do the following:''' print filename file_proxy = _file.FileProxy(filename) with file_proxy.open_file("wb") as handle: handle.write(proxy_obj.handle_request(utils.gen_pw(self.password, self.url), 2, filename).data)
def handle_request(self, pw, mod, pattern): myUrl_password = utils.gen_pw(self.password, self.myUrl) if pw == myUrl_password: if mod == 0: return self.update_server_list(pattern) elif mod == 1: return [utils.ls(pattern)] else: return self.get_file_content(pattern) else: return None
def main(): year = 16 root_folder = os.path.join('..', '..') tree_folder = os.path.join(root_folder, 'dataset', 'twitter', 'twitter' + str(year), 'tree') tree_files = utils.ls(tree_folder) print('number of tree files: {}'.format(len(tree_files))) user_ids = [] i = 1 for tree_file in tree_files: print(i) i += 1 lines = utils.read_lines(os.path.join(tree_folder, tree_file)) source_user_id = get_user_ids(lines[0])[1] user_ids.append(source_user_id) for line in lines: left_user_id, right_user_id = get_user_ids(line) if left_user_id == source_user_id: user_ids.append(right_user_id) print('number of users: {}'.format(len(user_ids))) utils.save(user_ids, os.path.join(root_folder, 'temp', 'user_ids_twitter_' + str(year) + '.txt'))
def flist(self,fd=None,directory=None): """This generates the list of filenames to be opened - doesn't check for existence""" if directory is not None: usedir = directory else: usedir = self.directory file_list = utils.ls(directory=usedir, show=False, returnList=True) get_fn = True files = [] if type(fd) == int: ifile = [fd] elif fd == '?' or fd == None: for i,fn in enumerate(file_list): print '%d - %s' % (i,fn) sfile = raw_input('File numbers: ') if '-' in sfile: sfile = sfile.split('-') ifile = range(int(sfile[0]),int(sfile[1])+1) else: if ',' in sfile: sfile = sfile.split(',') else: sfile = sfile.split() ifile = [] for i in sfile: ifile.append(int(i)) elif type(fd) == list and type(fd[0]) == int: ifile = fd elif type(fd) == list and type(fd[0]) == str: files = fd else: get_fn = False files = [fd] if get_fn: for i,fn in enumerate(file_list): if i in ifile: files.append(fn) return files
def ls_model(fdir, fname, fext="pth", pattern=None) -> list: """ :param fdir: 모델이 저장된 폴더 :param fname: iteration 번호를 제외한 model 이름 :param ext: 확장자 :param pattern: model 이름 패턴. 기본적으로는 ".*{}-(\d+).{}$".format(join(fdir, fname, fext)) 형태 :return: 모델 경로 리스트, 모델 번호 리스트 """ # 파일들 리스트 가져옴 pattern = pattern if pattern is not None else r".*{}-(\d+)\.{}$".format(fname, fext) files = ls(fdir, pattern=pattern) # idx 기준으로 정렬함 def get_key(s): m = re.match(pattern=pattern, string=s) return int(m.group(1)) files = sorted(files, key=get_key) idxs = list(map(get_key, files)) return files, idxs
def flist(self, fd=None, directory=None, tag='dat'): """This generates the list of filenames to be opened - doesn't check for existence""" if directory is not None: usedir = directory else: usedir = self.directory file_list = utils.ls(directory=usedir, tag=tag, show=False, returnList=True) ifile = [] files = [] if type(fd) == int: ifile = [fd] elif fd is None: for i, fn in enumerate(file_list): print('{} - {}'.format(i, fn)) sfile = raw_input('File numbers: ') split_on = None if '-' in sfile: sfile = sfile.split('-') ifile = range(int(sfile[0]), int(sfile[1]) + 1) else: if ',' in sfile: split_on = ',' ifile = [int(x) for x in sfile.split(split_on)] elif type(fd) == list and type(fd[0]) == int: ifile = fd elif type(fd) == list and type(fd[0]) == str: files = fd else: files = [fd] if bool(len(ifile)): for i, fn in enumerate(file_list): if i in ifile: files.append(fn) return files
def load_news(path, decoder): newsOjectsList = None newsFiles = [] if not is_file(path): newsFiles = ls(fix_dirpath(path) + "*.json", pattern=True) else: newsFiles = [path] if len(newsFiles) <= 0: print("No files found") return None newsOjectsList = load_files(newsFiles, decoder) if len(newsOjectsList) <= 0: return None collectedReads = [] globalId = 0 for i in newsOjectsList: for a in i: globalId += 1 a.set_id(globalId) collectedReads.append(a) return collectedReads
# sent_dict[sent] = count_topicwords(sent, tw_dict)/len(word_tokenize(sent)) # scoring number three wordcount = 0 for word in word_tokenize(sent): if word not in stopwords: wordcount += 1 sent_dict[sent] = count_topicwords(sent, tw_dict) / wordcount return sent_dict def generate_summary(topic_file, to_summarize): all_sents = utils.get_sentences(to_summarize) tw_dict = load_topic_words(topic_file) sent_dict = generate_sentence_dict(all_sents, tw_dict) top_sents = sorted(sent_dict.items(), key=lambda t: t[1], reverse=True) pretty = [] for sent in top_sents: if not utils.is_repeat(sent[0], pretty): pretty.append(sent[0]) # return 100 words return " ".join(word_tokenize(" ".join(pretty))[:100]) if __name__ == '__main__': for file in utils.ls("input/"): name = file.split("/")[-1][0:7] if name[0] != ".": summary = generate_summary("tsfiles/" + name + ".ts", file) sumfile = open("tw-summaries/" + name, "w") sumfile.write(summary)
def get_protein_filenames(dir_path): return ls(dir_path, lambda x: x.endswith(PROTEIN_FILENAME_SUFFIX))
def procesarDirectorio(dir): model = OpenNsfw() nroArchivo = 0 excluidos = 0 incluidas = 0 errores = 0 reporte = [] load.start('Buscando archivos...') files = utils.ls(dir) load.stop('%d Archivos encontrados!' % (len(files))) msg = '' pbar = tqdm(files, total=len(files), desc=' Examinando', unit=' archivos', leave=False, dynamic_ncols=True) for f in pbar: img_path = f nroArchivo += 1 try: resultado = isPorno(model, img_path) if (resultado >= score): msg = 'Probabilidad: ' + str(round(resultado * 100, 2)) + ' %' incluidas += 1 minFile = ('P%3d_mini_%4d.jpg' % ((resultado * 100), incluidas)).replace(' ', '0') reporte.append({ 'id': incluidas, 'file_path': img_path, 'score': float(round(resultado, 4)), 'miniature': minFile if createMin else '' }) else: msg = 'Excluido por Score! ' excluidos += 1 except (OSError, ValueError): errores = errores + 1 msg = 'No imagen valida!' continue finally: msg = msg + '\t' + img_path pbar.write(msg) print('\nTotal Archivos: ', nroArchivo) print('Total Archivos invalidos: ', errores) print('Total Imagenes Analizadas: ', nroArchivo - errores) print('Total Inagenes Incluidas en Reporte: ', incluidas) if (score > 0): print('Total Inagenes Excluidas por Score: ', excluidos) if (len(reporte)): print('\nGuardando Reporte...') outFile = str(path.join(outFolder, 'reporte.json')) with open(outFile, 'w') as f: json.dump(reporte, f) print('Reporte Guardado en ', path.abspath(outFile)) if createMin: print('\nCreando miniaturas...') pbar = tqdm(reporte, total=len(reporte), unit=' imgs') for i in pbar: img = image.load_img(i['file_path'], target_size=(80, 80)) file = path.abspath((path.join(outFolder, i['miniature']))) img.save(open(file, 'w')) print('Miniaturas creadas en ', path.abspath(outFolder))
def get_filenames(dir_path, suffix): return ls(dir_path, lambda x: x.endswith(suffix))
def __str__(self): return str("Action <name:" + self.name + "> with attributes" + " [[" + utils.ls(self.attributes) + "]] and target: <" + utils.ls(self.target) + "> with index at " + str(self.index))
def __str__(self): # for easier debugging return "Entity <name:" \ + self.name + "> with attributes " \ "[["+utils.ls(self.attributes)+"]]" \ + " with index at "+str(self.index)
def _clean(self): for meta in ls(os.path.join(cfg['path'], ".meta")): if meta not in ls(cfg['path']): os.remove(os.path.join(cfg['path'], ".meta", meta))
def __init__(self): self._clean() self._notes = {} for note in ls(cfg['path']): n = Note(note) self._notes[note] = n.get_meta()
# scoring number two # sent_dict[sent] = count_topicwords(sent, tw_dict)/len(word_tokenize(sent)) # scoring number three wordcount = 0 for word in word_tokenize(sent): if word not in stopwords: wordcount += 1 sent_dict[sent] = count_topicwords(sent, tw_dict)/wordcount return sent_dict def generate_summary(topic_file, to_summarize): all_sents = utils.get_sentences(to_summarize) tw_dict = load_topic_words(topic_file) sent_dict = generate_sentence_dict(all_sents, tw_dict) top_sents = sorted(sent_dict.items(), key=lambda t: t[1], reverse=True) pretty = [] for sent in top_sents: if not utils.is_repeat(sent[0], pretty): pretty.append(sent[0]) # return 100 words return " ".join(word_tokenize(" ".join(pretty))[:100]) if __name__ == '__main__': for file in utils.ls("input/"): name = file.split("/")[-1][0:7] if name[0] != ".": summary = generate_summary("tsfiles/" + name + ".ts", file) sumfile = open("tw-summaries/" + name, "w") sumfile.write(summary)