def readDocx(path: str, result: str, *keys): with open(result, 'w+') as res: print('読み込: %s' % path) file = docx.Document(path) for t in file.tables: # type:Table for r in t.rows: # type:_Row for c in r.cells: # type:_Cell text = c.text.replace('\n', '').replace('\r', '') [res.write(sep.join([path, key, text])) for key in flatten(keys) if text.find(key) != -1] tmp = '' for par in file.paragraphs: str1 = tmp + par.text text = str1.replace(' ', '').replace(' ', '') [res.write(sep.join([path, key, text])) for key in flatten(keys) if text.find(key) != -1] tmp = par.text
def get_LDA_tags(model, dictionary, text): tokenized_text = process_blog_post(text) bow = dictionary.doc2bow(tokenized_text) topics = model.get_document_topics(bow, minimum_probability=MIN_PROB_THRESHOLD) tags = [ get_topic_tags(model, dictionary, topic_idx) for topic_idx, prob in topics ] return list(flatten(tags))
def read(path: str, *param, sep: str = ',') -> list: if str == '' or str is None: return [] res_list = [] tmp = '' file = docx.Document(path) for t in file.tables: for r in t.rows: for par in r.cells: str1 = tmp + par.text text = tmp + str1.replace(' ', '').replace(' ', '') if text == '' or text is None: continue for key in flatten(param): if text.find(key) != -1: res_list.append( sep.join([ path, key, par.text.replace('\n', '').replace('\r', '') ])) tmp = '' else: tmp = par.text tmp = '' for par in file.paragraphs: str1 = tmp + par.text text = str1.replace(' ', '').replace(' ', '') if text == '' or text is None: continue for key in flatten(param): if text.find(key) != -1: res_list.append( sep.join([ path, key, par.text.replace('\n', '').replace('\r', '') ])) tmp = '' else: tmp = par.text return res_list
def parse_page(self, fpath, data, page_num): list_matches = [] for ind_type, ind_regex in self.patterns.items(): matches = ind_regex.findall(data) for ind_match in matches: if isinstance(ind_match, tuple): ind_match = ind_match[0] if self.is_whitelisted(ind_match, ind_type): continue if ind_type in self.defang: ind_match = re.sub(r"\[\.\]", ".", ind_match) if self.dedup: if (ind_type, ind_match) in self.dedup_store: continue self.dedup_store.add((ind_type, ind_match)) list_matches.append( self.handler.print_match(fpath, page_num, ind_type, ind_match) ) if self.custom_indicators: for indicator_type, indicator_dict in self.custom_indicators.items(): indicators = set(flatten(indicator_dict.values())) indicators = ["\\b{}\\b".format(v) for v in indicators] indicators = "|".join(indicators) findings = re.findall(indicators, data, re.IGNORECASE) if len(findings) > 0 and type(findings[0]) != tuple: for stix_id, names in indicator_dict.items(): lower_names = set(map(lambda x: x.lower(), names)) for finding in findings: try: if finding.lower() in lower_names: list_matches.append( self.handler.print_match( fpath, page_num, indicator_type, stix_id ) ) except Exception as e: self.handler.print_error(findings, e) return list_matches
def remove_trace(nodes): print(type(nodes)) o = [ re.sub('\\[.*\\]', '', i).strip().replace('\t', '').replace('\n', '') for i in dot.body ] o = [i.split(" -> ") if " -> " in i else [i] for i in o] print(o) print(nodes) temp = [(i, j) for i, j in enumerate(o) for n in nodes if n in j] print(temp) if not temp: return index, values = list(zip(*temp)) values = set(flatten(values)) for i in sorted(index)[::-1]: dot.body.pop(i) print(nodes, values) for i in nodes: values.remove(i) return remove_trace(list(values))
def matchers(text, list_of_regex): from setuptools.namespaces import flatten return set( flatten(map(lambda regex: re.findall(regex, text), list_of_regex)))
def flat10(lst): from nltk import flatten return flatten(lst)
def flat8(lst): from matplotlib.cbook import flatten return list(flatten(lst))
def flat7(lst): from pandas.core.common import flatten return list(flatten(lst))
def flat6(lst): from setuptools.namespaces import flatten return flatten(lst)
m4a_tag(audio_dirname, audio_filename, artist, album, track, tracks, title, year, genre, bpms, compilation) except: ERROR("Error: Failed to write tags to " + audio_filename + ".") raise fragile.Break else: ERROR("Error: Names for tags in file " + audio_filename + " could not be detected.") raise fragile.Break except: ERROR("Error: File " + audio_filename + " could not be decoded.") # -------------------- MAIN -------------------- if __name__ == "__main__" : # Iterate through acquired list of files files = list(flatten(args.files)) audio_files = [] for i in files: audio_files.extend(sorted(filter(lambda p: p.suffix in {".mp3", ".flac", ".m4a"}, Path(i).glob("**/*")))) audio_files = [str(Path(i)) for i in audio_files] # Multiprocessing if (__DEBUG__): print(f'Number of cores: {__CPU__}') # Process audio files pool = multiprocessing.Pool(processes=__CPU__) pool.map(func=process_audio, iterable=audio_files, chunksize=1) pool.close() pool.join()