def get_revision_version(): """goes through all files in lib and sums the last number in the CVS revision strings.""" files=[] print "Considering these files for release tag:" for d in ['Lib','bin',]: for f in os.listdir(d): if f[-3:]=='.py' or f=='ovf2vtk': fn = os.path.join(d,f) if os.path.exists(fn): files.append(fn) print " ... %s ..." % (fn) else: print 'File "%s" does not exists. Skipping.'%(fn) revision_version = 0 for l in fileinput.input(files): m = re.match(r'.*?\$Re[v]ision:\s*\d+[.](?P<rev>\d+)\s*\$',l) if m: revision_version = revision_version + eval(m.group('rev')) fileinput.nextfile() print "Done. Version is %s" % str(revision_version) return revision_version
def test_state_is_None(self): """Tests fileinput.nextfile() when fileinput._state is None. Ensure that it raises RuntimeError with a meaningful error message and does not modify fileinput._state""" fileinput._state = None with self.assertRaises(RuntimeError) as cm: fileinput.nextfile() self.assertEqual(("no active input()",), cm.exception.args) self.assertIsNone(fileinput._state)
def test_state_is_None(self): """Tests fileinput.nextfile() when fileinput._state is None. Ensure that it raises RuntimeError with a meaningful error message and does not modify fileinput._state""" fileinput._state = None with self.assertRaises(RuntimeError) as cm: fileinput.nextfile() self.assertEqual(("no active input()", ), cm.exception.args) self.assertIsNone(fileinput._state)
def get_file_name(file): if os.path.exists(file) and os.path.isfile(file): names = [] for line in fileinput.input(file): name = fileinput.filename() if name != None: fileinput.nextfile() names.append(name) return names else: print('the path [{}] is not exits!'.format(file))
def CHKSUMfilter(files): CKSMlist = set() CHK = re.compile('^CKSUM') for hmmline in fileinput.input(files): result = CHK.search(hmmline) if result: if hmmline not in CKSMlist: shutil.copy(fileinput.filename(), out) CKSMlist.add(hmmline) fileinput.nextfile() fileinput.close()
def get_file_name(file): '''只有当文件被读的时候才会取得filename,否则返回None''' if os.path.exists(file) and os.path.isfile(file): names = [] for line in fileinput.input(file): name = fileinput.filename() if name != None: fileinput.nextfile() names.append(name) return names else: print 'path [{}] is not exists~'.format(file)
def CHKSUMfilter(files): CKSMlist = set() CHK = re.compile('^CKSUM') for hmmline in fileinput.input(files): result = CHK.search(hmmline) if result: if hmmline not in CKSMlist: shutil.copy(fileinput.filename(),out) CKSMlist.add(hmmline) fileinput.nextfile() fileinput.close()
def my_glob(p, args, s, base_hash): for file in glob.iglob('{}/{}/{}'.format(p, s, args.filename), recursive=args.flag_recursive): if os.path.isdir(file): fileinput.nextfile() else: with os.scandir(os.path.dirname(os.path.realpath(file))) as it: for entry in it: if pathlib2.PurePath(entry).suffix == '.hash': internal_hash_check(entry, args) for line in fileinput.input(file): folder_path_to_save_in, filename = pathname_finder(file) file_hashed = hash_make(line, base_hash) hash_write_to_file(file_hashed, folder_path_to_save_in, args.filename) fileinput.nextfile()
def TCfilter(files, cutoff): allFiles = fileinput.input(files) for line in allFiles: if line[0:3] == "TC ": TC = line.split()[1] if TC == "------": fileinput.nextfile() else: TC = float(TC) if TC < cutoff: shutil.copy(fileinput.filename(), out) else: fileinput.nextfile() fileinput.close()
def _get_markdown_title(self, markdown_file): """ Return the first line of the given file having stripped off any leading #'s or spaces. Truncate the result to 50 chars """ for line in fileinput.input(markdown_file): title = line break; fileinput.nextfile() matches = re.match(r'^[#]*[ ]*([ \S]+)\n$', title) title = matches.group(1) title = title[:50] title = title.replace("\n", "") return title
def parse(self): skip = -1 skipping = None title = None brand = '' ingredients = None calories = None filename = '' for line in fileinput.input(): if fileinput.isfirstline(): if title: self.result.append({ 'name': self.decode(title), 'url': urllib.parse.unquote(self.name2url(filename)), 'brand': self.decode(brand), 'calories': calories, 'ingredients': ingredients, }) title = None brand = '' ingredients = None filename = fileinput.filename() if title is None and '<title>' in line: title = line.split('>')[1].split(',')[0] if not brand and 'brand:' in line: brand = line.split("'")[1] if skip > 0: skip -= 1 elif skip == 0: skip = -1 if skipping == 'nutri': ingredients = self.getIngredients(self.decode(line)) elif skipping == 'caloric': calories = self.getCalories(self.decode(line)) fileinput.nextfile() elif NUTRI_INFO in line: skip = 1 skipping = 'nutri' elif CALORIC in line: skip = 1 skipping = 'caloric'
def test_missing_debug_statements(self): message = "\nFound a debug missing statement at line %d of file %r: %r" filename = None source_files = glob.glob(os.path.join(self.source_dir, '*.py')) + \ glob.glob(os.path.join(self.source_dir, '*/*.py')) for line in fileinput.input(source_files): if fileinput.isfirstline(): filename = os.path.basename(fileinput.filename()) if filename == 'generate_categories.py': fileinput.nextfile() continue lineno = fileinput.filelineno() match = self.missing_debug.search(line) self.assertIsNone( match, message % (lineno, filename, match.group(0) if match else None))
def extractTitle(inPath, outPath): "extract title from texts and put all into one file" labels = os.listdir(inPath) outFile = open(outPath, 'w') num = {} for label in labels: num[label] = 0 for line in fileinput.input(glob.glob(inPath+"/"+label+"/*")): if not line.startswith('http://'): line_uni = unicode(line, 'gbk', 'ignore') # tokenize tokens = " ".join(jieba.cut(line_uni)) outFile.write( "%s\t%s" % (label, tokens)) num[label]+=1 fileinput.nextfile() fileinput.close() outFile.close() print "Extract Files" for label, n in num.iteritems(): print "%-10s : %d" % (label, n)
def extractTitle(inPath, outPath): "extract title from texts and put all into one file" labels = os.listdir(inPath) outFile = open(outPath, 'w') num = {} for label in labels: num[label] = 0 for line in fileinput.input(glob.glob(inPath + "/" + label + "/*")): if not line.startswith('http://'): line_uni = unicode(line, 'gbk', 'ignore') # tokenize tokens = " ".join(jieba.cut(line_uni)) outFile.write("%s\t%s" % (label, tokens)) num[label] += 1 fileinput.nextfile() fileinput.close() outFile.close() print "Extract Files" for label, n in num.iteritems(): print "%-10s : %d" % (label, n)
def load_bib_lines(filenames): """Load *.tex files and read them line by line. This method only loads the bibliography section and checks for ascii""" bibliography = {} bibsection = 0 biberrors = 0 filenames = expandFilenames(filenames) for line in fileinput.input(filenames, mode='rU'): #iterate until we get to a bibitem section line = line.strip() if line.startswith(r"\begin{thebibliography}"): #mark lines bibitems = [] bibsection = 1 continue elif line.startswith(r"\end{thebibliography}"): bibliography[fileinput.filename()] = bibitems bibitems = [] bibsection = 0 fileinput.nextfile() if bibsection == 1: if not line.isspace(): try: line = line.decode("ascii") candline = removeComment(line) if candline: bibitems.append(candline) except UnicodeDecodeError: print "Special Character on line {0} in file {1}".format(fileinput.filelineno(), fileinput.filename()) print line print "-".center(80, '-') biberrors += 1 if biberrors > 0: print "{0} errors detected. Received non-ASCII input".format(biberrors) #return an empty list so we don't process bad output return [] return split_bibitems(bibliography)
def create_grep_match_generator(regexp, paths, is_file_only=False, path_pfx=os.environ.get('PATH_PFX')): '''Create grep pattern match generator''' files = _create_files_list(xform_args(paths, path_pfx)) try: for line in fileinput.input(files): if re.search(regexp, line): line = line.rstrip('\r\n') end_txt = '' if is_file_only else ':{0}'.format(line) yield fileinput.filename() + end_txt if is_file_only: fileinput.nextfile() except UnicodeDecodeError as exc: #print(repr(exc) pass
def test_state_is_not_None(self): """Tests fileinput.nextfile() when fileinput._state is not None. Ensure that it invokes fileinput._state.nextfile() exactly once, returns whatever it returns, and does not modify fileinput._state to point to a different object.""" nextfile_retval = object() instance = MockFileInput() instance.return_values["nextfile"] = nextfile_retval fileinput._state = instance retval = fileinput.nextfile() self.assertExactlyOneInvocation(instance, "nextfile") self.assertIs(retval, nextfile_retval)
def test_state_is_not_None(self): """Tests fileinput.nextfile() when fileinput._state is not None. Ensure that it invokes fileinput._state.nextfile() exactly once, returns whatever it returns, and does not modify fileinput._state to point to a different object.""" nextfile_retval = object() instance = MockFileInput() instance.return_values["nextfile"] = nextfile_retval fileinput._state = instance retval = fileinput.nextfile() self.assertExactlyOneInvocation(instance, "nextfile") self.assertIs(retval, nextfile_retval) self.assertIs(fileinput._state, instance)
def input_files(): """ Return a list containing tuples such that return[0] is the name of the file and return[1] is a slurped string of the file's contents. # TODO: This function doesn't work if there were options passed. In fact, it attempts to open option strings as filenames. Option strings could be removed from `sys.argv`, I guess. This function hangs if it doesn't get input. Press Ctrl-D (EOF) to continue. """ OPT_STRS = list() for item in PARSER_ARGUMENTS: OPT_STRS += item[0] i = 1 while i < len(sys.argv): s = sys.argv[i].split('=')[0].strip() if s in OPT_STRS: t = sys.argv.pop(i) # if s in {"-c","--config","--logfile","--tempdir","-i","--input","-o","--output"}: # t = sys.argv.pop(i) else: i+=1 files = [] s = '' for line in fileinput.input(): if fileinput.isstdin(): s += line elif fileinput.isfirstline(): fname = fileinput.filename() with open(fname) as f: files.append((fname, f.read())) fileinput.nextfile() if s: files.append((None, s)) return files
def parse(self, key=None, encoding="U8"): files = self.files globs = sorted(glob.glob(files), key=key) if globs: for line in fileinput.input(globs, openhook=fileinput.hook_encoded(encoding)): if "<title>" in line: self.s="" title=self.gettitle(line.strip()[7:-8]) if not title: self.s="" fileinput.nextfile() continue if self.is_need(line): self.s+=line.strip()+"\n" if self.is_end(line): if self.getbody() == "": self.s="" fileinput.nextfile() continue yield title, self.getsubtitle(), self.getauthor(), self.getbody() self.s="" fileinput.nextfile()
def parse(self, key=None, encoding="U8"): files = self.files globs = sorted(glob.glob(files), key=key) if globs: for line in fileinput.input( globs, openhook=fileinput.hook_encoded(encoding)): if "<title>" in line: self.s = "" title = self.gettitle(line.strip()[7:-8]) if not title: self.s = "" fileinput.nextfile() continue if self.is_need(line): self.s += line.strip() + "\n" if self.is_end(line): if self.getbody() == "": self.s = "" fileinput.nextfile() continue yield title, self.getsubtitle(), self.getauthor( ), self.getbody() self.s = "" fileinput.nextfile()
#-*- coding: UTF-8 -*- __author__ = 'mcxiaoke' import os,fileinput # 内部使用FileInput类实现 # fileinput 是一个从标准输入或者文件按行读入的工具类,默认是文本模式,一般的用法是 for line in fileinput.input(['file_input.py']): print fileinput.filename() # 文件名 print fileinput.fileno() #文件描述符,int print fileinput.lineno() #总的行号 print fileinput.filelineno() #当前文件的行号 print fileinput.isstdin() # 是否标准输入 print fileinput.isfirstline() # 是否是文件的第一行 print fileinput.nextfile() # 关闭当前文件,开始读下一个文件 print fileinput.close() # 关闭输入序列
def main(): good_games = 0 total_games = 0 short_games = 0 bugged_games = 0 game_list = [] Wfile = open("list_of_games", "w") #Loop file lines for line in fileinput.input(glob.glob("/Users/Ben/Desktop/Misc_Docs/wcscans/processedFiles/Solo/game*.txt")): #print fileinput.filename() #Make sure game is greater than zero mins if fileinput.filelineno()==5: if "Game Length: 0 minutes" in line: #print "Game Length Zero" fileinput.nextfile() short_games += 1 if "Game Length: 1 minutes" in line: #print "Game Length One" fileinput.nextfile() short_games += 1 #Make sure game is solo if fileinput.filelineno()==4: if "Solo" not in line: print "Not Solo" fileinput.nextfile() if fileinput.isfirstline(): # First line, initialize #print fileinput.filename() game_info = {} game_info.fromkeys(game_fields) total_games += 1 #Get game date/time if fileinput.filelineno()==2: game_info['date_time'] = get_date_time(line) #player names, races, levels, winner try: if fileinput.filelineno()==7: game_info['player1_name'] = get_player_name(line) game_info['player1_race'] = get_player_race(line) if fileinput.filelineno()==8: game_info['player1_level'] = get_player_level(line) if fileinput.filelineno()==10: game_info['player2_name'] = get_player_name(line) game_info['player2_race'] = get_player_race(line) if fileinput.filelineno()==11: game_info['player2_level'] = get_player_level(line) if fileinput.filelineno()==12: game_info['winning_player'] = get_winning_player(line, game_info) except: bugged_games += 1 fileinput.nextfile() #finalize if fileinput.filelineno()>12: if any (field not in game_info for field in game_fields): print "One of the fields for this game is empty, failed to read game." fileinput.nextfile() else: good_games += 1 game_list.append(game_info) if good_games % 1000 == 0: print "Scanned over " + str(good_games) + " games" fileinput.nextfile() print "Short games not counted = " + str(short_games) print "Bugged games not counted = " + str(bugged_games) print "Good games = " + str(good_games) print "Total games = " + str(total_games) #print "\n".join(str(v) for v in game_list) Wfile.write("list_of_games = " + str(game_list))
# webbrowser模块 import webbrowser # webbrowser.open("https://github.com") # fileinput模块 import fileinput # 可以读取参数传入文件,将每一行作为一个迭代器,可以用for来迭代 # 如python some.py f1.txt f2.txt print fileinput.input() fileinput.filename() fileinput.lineno() fileinput.filelineno() fileinput.isfirstline() fileinput.isstdin() fileinput.nextfile() # fileinput.close() # set模块 print set(range(10)) # set([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) # set取唯一的值 print set([0, 1, 2, 3, 0, 3]) # set([0, 1, 2, 3]) a = set([1, 2, 3]) b = set([2, 3, 4]) print a.union(b) # set([1, 2, 3, 4]) print a | b # set([1, 2, 3, 4]) __or__实现 c = a & b # __and__实现 print c # set([2, 3]) print c.issubset(a) # True print c <= a # True __le__实现 print c.issuperset(a) # False
linenumber = fileinput.filelineno() prevline = line yield prevline, True pattern = re.compile(r'(<<(\/?) *(if|for|else|switch|case|replace|link)[^a-zA-Z][^<>]*)') tagfound = [] try: for line, isLastLine in yield_line_and_islastline(fileinput.input()): for (whole,end,tag) in re.findall(pattern,line): if tag == "else" or tag == 'case': if len(tagfound) == 0: myprint("Found", tag, "but with no opening tag:") myprint(" ", linenumber,":", whole) fileinput.nextfile() lasttag = tagfound[-1] if (tag == "else" and lasttag["tag"] != "if") or (tag == "case" and lasttag["tag"] != "switch"): myprint("Mismatched else: Opening tag was:") myprint(" ",lasttag["linenumber"],":", lasttag["whole"]) myprint("But this tag was:") myprint(" ",linenumber,":", whole) fileinput.nextfile() break elif end != '/': tagfound.append({"whole": whole, "linenumber":linenumber,"tag":tag}) else: if len(tagfound) == 0: myprint("Found closing tag but with no opening tag:") myprint(" ", linenumber,":", whole) fileinput.nextfile()
def nextfile(file): "close current file and moves to the next" return fileinput.nextfile()
def main_work(): ################################################# # root is one level below this file in directory structure, ie. below the 'scripts' folder ROOT = os.path.split( os.path.realpath( os.path.abspath( os.path.dirname(inspect.getfile( inspect.currentframe())))))[0] + '/' dirs = { 'ROOT': ROOT, 'CONFIG': ROOT + "configs/", 'VOICES': ROOT + "voices/", 'TRAIN': ROOT + "train/", 'RULES': ROOT + "rules/", 'CORPUS': ROOT + "corpus/", 'BIN': ROOT + "/tools/bin/" } # ======== Get stuff from command line ========== a = ArgumentParser() a.add_argument('-s', dest='speaker', required=True, \ help= "the name of the speaker: <ROOT>/corpus/<LANG>/<SPEAKER>") a.add_argument('-l', dest='lang', required=True, \ help= "the language of the speaker: <ROOT>/corpus/<LANG>") a.add_argument('-o', dest='output', required=False, default=False, \ help= "output audio here") a.add_argument('-t', dest='stage', required=False, default="runtime", \ help=""" defines the current usage stage (definitions of stages should by found in <config>/recipe.cfg""") a.add_argument('-play', dest='play', action="store_true", required=False, default=False, \ help=" play audio after synthesis") a.add_argument('-lab', dest='make_label', action="store_true", default=False, \ help= "make label file as well as wave in output location") a.add_argument('config', help="""configuration to use: naive, semi-naive, gold, as defined in <ROOT>/recipes/<config> -directory""" ) a.add_argument('-bin', dest='custom_bindir') a.add_argument('files', nargs='*', help="text files to speak, reading from stdin by default") a.add_argument('-m', dest='model_dir', required=True, type=str, help="model directory") opts = a.parse_args() dirs['TRAIN'] = opts.model_dir + "/train/" dirs['VOICES'] = opts.model_dir + "/voices/" if opts.custom_bindir != None: dirs['BIN'] = opts.custom_bindir voice_location = os.path.join(dirs['VOICES'], opts.lang, opts.speaker, opts.config) train_location = os.path.join(dirs['TRAIN'], opts.lang, "speakers", opts.speaker, opts.config) config_path = os.path.join(dirs['CONFIG'], opts.config) voice_config = os.path.join(config_path, fname.RECIPE) ## Make Voice object to contain voice elements trained on this corpus: voice = Voice(opts.speaker, opts.lang, opts.config, opts.stage, dirs) if not opts.output: output_wavefile = os.path.join(voice_location, 'output', 'wav', 'temp.wav') else: output_wavefile = opts.output if not opts.output: output_labfile = None else: output_labfile = output_wavefile.replace('.wav', '.lab') prevspace = False para = [] # Go through the files a paragraph at a time, unless it's SSML in which case we parse it # An empty line marks the change of paragraphs in plain text files for line in fileinput.input(opts.files): line = line.decode('utf-8').rstrip() t = start_clock('Synthesise sentence') print line if fileinput.isfirstline(): if para != []: voice.synth_utterance(''.join(para), output_wavefile=output_wavefile, \ output_labfile=output_labfile) if opts.play: os.system('play ' + output_wavefile) para = [] line = line.lstrip() if line.startswith('<speak') or line.startswith('<xml'): tree = etree.parse(fileinput.filename()) parseSSML(tree, voice) fileinput.nextfile() else: para.append(line) elif line.isspace(): prevspace = True elif prevspace and para != []: voice.synth_utterance(''.join(para), output_wavefile=output_wavefile, \ output_labfile=output_labfile) prevspace = False para = [line] else: para.append(line) if para != []: voice.synth_utterance(''.join(para), output_wavefile=output_wavefile, \ output_labfile=output_labfile) if opts.play: os.system('play ' + output_wavefile) stop_clock(t)
def export_swadesh_entries(input_path, output_path=None): print("Input: {0}".format(input_path)) print("Ouput: {0}".format(output_path)) cr = CorpusReaderDict(input_path) print("Data loaded") files = [ "book.csv", "component.csv", "corpusversion.csv", "dictdata.csv", "language_iso.csv", "language_bookname.csv", "language_src.csv", "language_tgt.csv", "nondictdata.csv", "wordlistdata.csv", "wordlistconcept.csv" ] for f in files: shutil.copyfile(os.path.join( input_path, f), os.path.join(output_path, f)) from nltk.stem.snowball import SpanishStemmer stemmer = SpanishStemmer() import qlc.utils #get stopwords stopwords = qlc.utils.stopwords_from_file(os.path.join(os.path.dirname( os.path.realpath( __file__)), "data", "stopwords", "spa.txt")) # load swadesh list swadesh_file = codecs.open(os.path.join(os.path.dirname( os.path.realpath( __file__)), "data", "swadesh", "spa.txt"), "r", "utf-8") swadesh_entries = [] for line in swadesh_file: line = line.strip() for e in line.split(","): stem = stemmer.stem(e) swadesh_entries.append(stem) # find all entries that contain one of the swadesh words # save entry ids to list entry_ids = [] dictdata_ids = cr.dictdata_string_ids for dictdata_id in dictdata_ids: src_language_iso = cr.src_languages_iso_for_dictdata_id(dictdata_id) tgt_language_iso = cr.tgt_languages_iso_for_dictdata_id(dictdata_id) # is there some spanish? if (src_language_iso != ['spa']) and (tgt_language_iso != ['spa']): continue for entry_id, head, translation in \ cr.ids_with_heads_with_translations_for_dictdata_id( dictdata_id): if src_language_iso == [ 'spa' ]: (head, translation) = (translation, head) translation = re.sub(" ?\([^)]\)", "", translation) if translation in stopwords: entry_ids.append(entry_id) else: translation = qlc.utils.remove_stopwords(translation, stopwords) phrase_stems = qlc.utils.stem_phrase(translation, stemmer, True) for stem in phrase_stems: if stem in swadesh_entries: entry_ids.append(entry_id) #print(len(entry_ids)) #return input_entry_csv = os.path.join(input_path, "entry.csv") output_entry_csv = os.path.join(output_path, "entry.csv") input_annotation_csv = os.path.join(input_path, "annotation.csv") output_annotation_csv = os.path.join(output_path, "annotation.csv") output_annotation = codecs.open(output_annotation_csv, "w", "utf-8") annotation_dict = collections.defaultdict(list) # cache annotations for lookup for i, line in enumerate(fileinput.input( input_annotation_csv, openhook=fileinput.hook_encoded("utf-8"))): if i == 0: output_annotation.write(line) continue data = line.strip().split("\t") annotation_dict[ data[_annotation_table_columns['entry_id'] + 1]].append(line) fileinput.nextfile() output = codecs.open(output_entry_csv, "w", "utf-8") count_entries = 0 for i, line in enumerate(fileinput.input( input_entry_csv, openhook=fileinput.hook_encoded("utf-8"))): if i == 0: output.write(line) continue data = line.strip().split("\t") if data[0] in entry_ids: output.write(line) for annotation_line in annotation_dict[data[0]]: output_annotation.write(annotation_line) fileinput.nextfile() output.close() output_annotation.close() # Worldists cr = CorpusReaderWordlist(sys.argv[1]) print("Data loaded") # find all entries that contain one of the swadesh words # save entry ids to list wordlistdata_ids = cr.wordlistdata_string_ids bibtex_keys = collections.defaultdict(list) for wid in wordlistdata_ids: wordlistdata_string = cr.wordlistdata_string_ids[wid] bibtex_key = wordlistdata_string.split("_")[0] bibtex_keys[bibtex_key].append(wid) wordlistentry_ids = [] for bibtex_key in bibtex_key: # first collect all concepts in this book where the spanish counterpart # has one of the swadesh words concepts = [] for wordlistentry_id in wordlistentry_ids: language_iso = cr.get_language_code_for_wordlistdata_id( wordlistdata_id) # is there some spanish? if language_iso != ['spa']: continue for entry_id, concept, counterpart in \ cr.ids_with_concepts_with_counterparts_for_dictdata_id( dictdata_id): counterpart = re.sub(" ?\([^)]\)", "", counterpart) if counterpart in stopwords: entry_ids.append(entry_id) else: counterpart = qlc.utils.remove_stopwords( counterpart, stopwords) phrase_stems = qlc.utils.stem_phrase( counterpart, stemmer, True) for stem in phrase_stems: if stem in swadesh_entries: concepts.append(concept) # now collect the entry ids for those concepts for wordlistentry_id in wordlistentry_ids: for entry_id, concept, counterpart in \ cr.ids_with_concepts_with_counterparts_for_dictdata_id( dictdata_id): if concept in concepts: wordlistentry_ids.append(entry_id) input_entry_csv = os.path.join(input_path, "wordlistentry.csv") output_entry_csv = os.path.join(output_path, "wordlistentry.csv") input_annotation_csv = os.path.join(input_path, "wordlistannotation.csv") output_annotation_csv = os.path.join(output_path, "wordlistannotation.csv") output_annotation = codecs.open(output_annotation_csv, "w", "utf-8") annotation_dict = collections.defaultdict(list) for i, line in enumerate(fileinput.input(input_annotation_csv, openhook=fileinput.hook_encoded("utf-8"))): if i == 0: output_annotation.write(line) continue data = line.strip().split("\t") annotation_dict[data[_wordlistannotation_table_columns['entry_id'] + 1]].append(line) fileinput.nextfile() output = codecs.open(output_entry_csv, "w", "utf-8") count_entries = 0 for i, line in enumerate(fileinput.input(input_entry_csv, openhook=fileinput.hook_encoded("utf-8"))): if i == 0: output.write(line) continue data = line.strip().split("\t") if data[0] in entry_ids: output.write(line) for annotation_line in annotation_dict[data[0]]: output_annotation.write(annotation_line) fileinput.nextfile() output.close() output_annotation.close()
def add_line(self, line): # Parse out timestamp, nick, and message line = line.strip() # strip formatting codes line = re.sub("\x1f|\x02|\x12|\x0f|\x16|\x03(?:\d{1,2}(?:,\d{1,2})?)?", '', line) m = re.match('\[([0-9:]+)\] (\*\*\*|\* [^ ]+|<.+?>) (.*)', line) if m == None: # blank line perhaps m = re.match('\[([0-9:]+)\] (\*\*\*|\* [^ ]+|<.+?>)', line) if m == None: print ("Unrecognized line " + line) return time = m.group(1) if m.group(2) == "***": nick = "**Server**" action = False else: if m.group(2)[0] == "*": nick = m.group(2)[2:] action = True else: nick = m.group(2)[1:-1] action = False if len(m.groups()) == 3: message = m.group(3) else: message = '' # Figure out full timestamp from the time + filename fname = fileinput.filename() m = re.search('([0-9]{4})/?([0-9]{2})/?([0-9]{2})', fname) if m == None: print ("Could not find timestamp for file " + fname) print ("Skipping file...") fileinput.nextfile() return timestamp = "%s%s%s%s%s%s" % (m.group(1), m.group(2), m.group(3), time[0:2], time[3:5], time[6:8]) # If we or the bot joins/quits, wipe whatever current game is in progress as we lost it if nick == "**Server**": m = re.match('(?:Joins|Quits|Parts): (.*?) \((.*?)\)', message) if m != None: if m.group(1) in self.ownnicks or m.group(1) in self.botnicks: self.reset() return # Do we need to start a new game? m = re.match('(.*): Welcome to Werewolf, the popular detective/social party game \(a theme of Mafia\)\.', message) if m != None: if nick in self.botnicks: # if we are currently running a game, this is a bug if self.game: print ("!!! BUG !!! Starting a new game when a game is already running! File: %s Line: %s (started in %s at line %s)" % (fileinput.filename(), fileinput.filelineno(), self.startfile, self.startline)) exit(1) # started a new game, first group is the nicks of who are playing self.game = True self.id = timestamp self.players = m.group(1).split(', ') self.gamesize = len(self.players) self.startfile = fileinput.filename() self.startline = fileinput.filelineno() # now set game options (wolfgunner, nightgunner, hiddentraitor) # wolfgunner and nightgunner are on if the game took place on or after 9/6/2013 # hiddentraitor is on if the game took place on or after 2/10/2014 if int(timestamp) > 20130906000000: self.options['wolfgunner'] = True self.options['nightgunner'] = True if int(timestamp) > 20140210000000: self.options['hiddentraitor'] = True # check if id already exists in the database, if so we can just skip over this game (saves a lot of processing/regexes) # only do this if we aren't updating the schema doc = r.table('games').get(self.id).run(self.db) if doc and doc['schema'] == self.schema: self.skipped += 1 self.reset() return elif doc: self.replace = True elif nick not in self.othernicks: self.othernicks.append(nick) print ("Possible missed bot nick: %s" % nick) # If we have a game running, record the line if self.game: # determine realnick from nickmap realnick = self.nickmap.get(nick, nick) # determine if we need to add something to nickmap if nick == "**Server**": m = re.match('(.*?) is now known as (.*)', message) if m != None: oldnick = m.group(1) newnick = m.group(2) self.nickmap[newnick] = self.nickmap.get(oldnick, oldnick) if oldnick in self.botnicks: # if it is any of these people, the bot never actually changed never = ['Iciloo', 'sid|1'] if newnick not in never: # HALP, THE BOT CHANGED NICKS print ("Bot changed nicks from %s to %s" % (oldnick, newnick)) self.botnicks.append(newnick) # record the line try: self.lines.append({'timestamp': timestamp, 'nick': nick, 'realnick': realnick, 'message': unicode(message, 'utf-8'), 'action': action}) except UnicodeDecodeError: # ignore the line return # If bot said something, figure out the action if nick in self.botnicks: # record lynches/kills/quits (!quit/kick)/idles (incl part and /quit)/shot # on lynch, increment days counter. on kill, increment nights counter # on shoot, record current day, who was shot, and shot outcome m = re.match('(Day|Night) lasted ([0-9]{2}):([0-9]{2})', message) if m != None: # increment days or nights (current day is self.days + 1) if m.group(1) == 'Day': self.days.append(int(m.group(2)) * 60 + int(m.group(3))) self.curday += 1 elif m.group(1) == 'Night': self.nights.append(int(m.group(2)) * 60 + int(m.group(3))) self.curnight += 1 return curday = str(self.curday + 1) curnight = str(self.curnight) for msg in self.lynchmessages: m = re.match(msg, message) if m != None: if curday not in self.lynched: self.lynched[curday] = [] if len(m.groups()) == 1: # have a victim self.lynched[curday].append(m.group(1)) return for msg in self.killmessages: m = re.match(msg, message) if m != None: if curnight not in self.killed: self.killed[curnight] = [] if len(m.groups()) == 1: self.killed[curnight].append(m.group(1)) return for msg in self.shotmessages: m = re.match(msg[0], message) if m != None: if curday not in self.shot: self.shot[curday] = [] if msg[1] == "shoot": self.prevtarget = m.group(1) return elif msg[1] == "explode" or msg[1] == "miss": target = self.prevtarget else: target = m.group(1) self.shot[curday].append({'target': target, 'outcome': msg[1]}) return for msg in self.quitmessages: m = re.match(msg, message) if m != None: self.quit.append(m.group(1)) return for msg in self.idlemessages: m = re.match(msg, message) if m != None: self.idled.append(m.group(1)) return # is game over? if not self.finished: # figure out who won m = re.match('(.*) has forced the game to stop', message) if m != None: # game was !fstopped, so wipe our slate self.reset() return m = re.match('Game over! (All the wolves are dead|There are).*', message) if m != None: self.finished = True if m.group(1) == 'All the wolves are dead': self.winner = "village" else: self.winner = "wolves" return else: # record time and role data and mark game as over # time is before role data, so don't mark over until we have both m = re.match('Game lasted ([0-9]+):([0-9]+)\. ([0-9]+):([0-9]+) was day\. ([0-9]+):([0-9]+) was night\.', message) if m != None: self.daytime = int(m.group(3)) * 60 + int(m.group(4)) self.nighttime = int(m.group(5)) * 60 + int(m.group(6)) return # if we get here, message is the listing of roles list = message.split('. ') for item in list: # remove any trailing periods item = item.rstrip('.') m = re.match('The (.*?) (were|was) (.*)', item) if m == None: print ("Failed to match regex to %s" % (message)) continue # variable is a troll and faked some roles, so make sure we only record valid ones if m.group(1) not in self.rolemap: continue # determine the number of nicks in the 3rd group if m.group(2) == 'was': # one nick only, easy self.ruleset[self.rolemap[m.group(1)]] = 1 self.roles[self.rolemap[m.group(1)]] = [m.group(3)] else: # multiple nicks, not so easy nicks = re.split(', (?:and )?| and ', m.group(3)) self.ruleset[self.rolemap[m.group(1)]] = len(nicks) self.roles[self.rolemap[m.group(1)]] = nicks # finally mark the game as over self.game = False
def update_event(self, inp=-1): self.set_output_val(0, fileinput.nextfile())