Please try to extend the test strings with as many crazy examples from the data as we can get. """ from __future__ import print_function import re from glob import glob import csv import unicodecsv import sys from common import get_location_aliases # get list of territory abbreviations from Jordan's file alias, terr_lower = get_location_aliases() #terr_lower = set(terr_lower) '''terr_lower = set([row['abbreviation'].split()[0] for row in csv.DictReader(open(sys.argv[2])))''' # should point to "/local/diplomacy/code/diplomacy/images/map_locations.csv" # generate uppercase and titlecase variants: aaa -> (aaa, AAA, Aaa) territory = terr_lower + [t.upper() for t in terr_lower] + \ [t.capitalize() for t in terr_lower] # turn into or-regex plus optional "sc" (I don't know what it means) territory = r"(?:{})(?:\s*\((?:sc|Sc|SC)\))?".format("|".join(territory)) # regex to match human orders. # It contains two groups:
from glob import glob import re from collections import Counter, defaultdict try: from tqdm import tqdm except ImportError: tqdm = lambda x: x from common import kCOUNTRIES, kADJECTIVES, get_location_aliases from recreate_game_state import getGameState full_country = {country[0]: country for country in kCOUNTRIES} alias, locations = get_location_aliases() promised_supports = [] for f in glob(sys.argv[1] + "/*.press"): gamename = basename(f) gamename, _ = splitext(gamename) print "\n\nProcessing ", gamename rdr = unicodecsv.reader(open(f)) rdr.next() # discard header for _, _, fro, to, mile, _, msg in rdr: #if fro not in full_country.keys() or to == "M" or mile.endswith("X"): if fro == "Master" or to == "Master" or fro == "?" or mile.endswith("X"): # discard anonymous presses, presses to master and post-game msgs continue
import unicodecsv from glob import glob import re from collections import Counter, defaultdict try: from tqdm import tqdm except ImportError: tqdm = lambda x: x from common import kCOUNTRIES, kADJECTIVES, get_location_aliases from recreate_game_state import getGameState full_country = {country[0]: country for country in kCOUNTRIES} alias, locations = get_location_aliases() promised_supports = [] for f in glob(sys.argv[1] + "/*.press"): gamename = basename(f) gamename, _ = splitext(gamename) print "\n\nProcessing ", gamename rdr = unicodecsv.reader(open(f)) rdr.next() # discard header for _, _, fro, to, mile, _, msg in rdr: #if fro not in full_country.keys() or to == "M" or mile.endswith("X"): if fro == "Master" or to == "Master" or fro == "?" or mile.endswith( "X"): # discard anonymous presses, presses to master and post-game msgs