def read_templates(folder): """ Load yaml templates from template folder. Return list of dicts. """ output = [] for path, subdirs, files in os.walk(folder): for name in sorted(files): if name.endswith('.yml'): tpl = ordered_load(open(os.path.join(path, name)).read()) tpl['template_name'] = name # Test if all required fields are in template: assert 'keywords' in tpl.keys(), 'Missing keywords field.' required_fields = ['date', 'amount', 'invoice_number'] assert len(set(required_fields).intersection(tpl['fields'].keys())) == len(required_fields), \ 'Missing required key in template {} {}. Found {}'.format(name, path, tpl['fields'].keys()) # Keywords as list, if only one. if type(tpl['keywords']) is not list: tpl['keywords'] = [tpl['keywords']] if 'lines' in tpl: assert 'start' in tpl['lines'], 'Lines start regex missing' assert 'end' in tpl['lines'], 'Lines end regex missing' assert 'line' in tpl['lines'], 'Line regex missing' output.append(InvoiceTemplate(tpl)) return output
def load_articles(prefix): articles = [] dir = input_dir + "/" + prefix + "/" for filename in os.listdir(dir): if filename.endswith(".yml"): with open(dir + filename, "r") as f: data = utils.ordered_load(f, yaml.SafeLoader) articles.append(data) return articles
def load_articles(prefix): articles = [] dir = input_dir + "/" + prefix + "/" for filename in os.listdir(dir): if filename.endswith(".yml"): with open(dir + filename,"r") as f: data = utils.ordered_load(f, yaml.SafeLoader) articles.append(data) return articles
def get_options(self, argv): """ Parse the command-line options and set the following object properties: :param argv: usually just sys.argv[1:] :returns: Nothing :ivar debug: Enable logging debug statements :ivar verbose: Enable verbose logging :ivar config: Dict of the config file """ padding = max([ len(x) for x in self.flow.keys() ]) # Find max length of flow step names for padding with white space docstring = __doc__ % ('|'.join(self.flow), ','.join( self.flow.keys()), '\n'.join([ ' ' + k + ' ' * (padding + 4 - len(k)) + v for k, v in self.flow.items() ])) args = docopt(docstring, version=__version__) # Load in default conf values from file if specified if args['--conf']: with open(args['--conf']) as f: conf_args = yaml.load(f) else: conf_args = {} args = merge_args(conf_args, args) schema = Schema({ 'PARAMFILE': Use(open, error='PARAMFILE should be readable'), object: object }) try: args = schema.validate(args) except SchemaError as e: exit(e) if args['all'] == 0: for f in list(self.flow): if args[f] == 0: del self.flow[f] logging.info("Doing flow steps: %s" % (','.join(self.flow.keys()))) self.parameters = ordered_load(args['PARAMFILE']) self.run_dir = args['--rundir'] if args['--debug']: logging.basicConfig(level=logging.DEBUG, format='%(message)s') elif args['--verbose']: logging.basicConfig(level=logging.INFO, format='%(message)s') self.args = args # Just save this for posterity
def main(): # python ga.py conf.yaml rollingtopwords.yaml lat_90,throughput=100000 relations.yaml conf_file = sys.argv[1] basefile = sys.argv[2] metric = sys.argv[3] ref = open(conf_file, "r") sample = yaml.load(ref) result = dict(sample) start = dict() end = dict() step = dict() typ = dict() ref = open(conf_file, "r") conf = utils.ordered_load(ref, yaml.SafeLoader).keys() print sample for k in sample: vrange = sample[k] if len(vrange.split(",")) == 2: start[k] = int(vrange.split(",")[0]) end[k] = int(vrange.split(",")[1]) if len(vrange.split(",")) == 3: start[k] = int(vrange.split(",")[0]) end[k] = int(vrange.split(",")[1]) step[k] = int(vrange.split(",")[2]) if len(vrange.split(",")) == 4: typ[k] = vrange.split(",")[3] if vrange.split(",")[2] != "null": step[k] = int(vrange.split(",")[2]) start[k] = int(vrange.split(",")[0]) end[k] = int(vrange.split(",")[1]) relation_file = sys.argv[4] rel = open(relation_file, "r") rel_dict = dict(yaml.load(rel)) relations = dict() for r in rel_dict: split = rel_dict[r].split(",") relations[r] = list(split[:len(split) - 1]) ga(conf, sample, start, end, step, typ, relations, basefile, metric)
if __name__ == '__main__': arguments = docopt(__doc__, version='yamlcalc 0.1') # print(arguments) # unordered: # with open(arguments['<yaml>'], 'r') as stream: # d = yaml.load(stream) # ordered: with open(arguments['<yaml>'], 'r') as stream: d = ordered_load(stream, yaml.SafeLoader) e = {} try: # in case no value is found for the "kg" and "cm" keys in .yaml bmi = round(d['kg'] / (d['cm'] * 0.01)**2, 1) e['bmi'] = str(bmi).replace('.', ',') e['broca'] = d['cm'] - 100 except TypeError: pass # http://stackoverflow.com/questions/6288892/convert-datetime-format try:
def main(): warnings.simplefilter('ignore', numpy.RankWarning) #python rule_based.py yamlfiles/conf_rollingtopwords_hc.yaml rollingtopwords.yaml lat_90,throughput=150000 yamlfiles/relations.yaml yamlfiles/lat_rank_rc.yaml yamlfiles/tp_rank_rc.yaml conf_file = sys.argv[1] basefile = sys.argv[2] metric = sys.argv[3] ref = open(conf_file, "r") sample = yaml.load(ref) result = dict(sample) start = dict() end = dict() step = dict() typ = dict() ref = open(conf_file, "r") conf = utils.ordered_load(ref, yaml.SafeLoader).keys() for k in sample: vrange = sample[k] if len(vrange.split(",")) == 2: start[k] = int(vrange.split(",")[0]) end[k] = int(vrange.split(",")[1]) if len(vrange.split(",")) == 3: start[k] = int(vrange.split(",")[0]) end[k] = int(vrange.split(",")[1]) step[k] = int(vrange.split(",")[2]) if len(vrange.split(",")) == 4: typ[k] = vrange.split(",")[3] if vrange.split(",")[2] != "null": step[k] = int(vrange.split(",")[2]) start[k] = int(vrange.split(",")[0]) end[k] = int(vrange.split(",")[1]) relation_file = sys.argv[4] rel = open(relation_file, "r") rel_dict = dict(yaml.load(rel)) relations = dict() for r in rel_dict: split = rel_dict[r].split(",") relations[r] = list(split[:len(split) - 1]) lat_file = sys.argv[5] ref = open(lat_file, "r") lat_conf = utils.ordered_load(ref, yaml.SafeLoader) lat_p = dict() behav_lat = dict() i = 0 print lat_conf for c in lat_conf.keys(): lat_p[i] = c print lat_conf[c] behav_lat[c] = int(lat_conf[c]) i += 1 tp_file = sys.argv[6] ref = open(tp_file, "r") tp_conf = utils.ordered_load(ref, yaml.SafeLoader) behav_tp = dict() tp_p = dict() i = 0 for c in tp_conf.keys(): tp_p[i] = c behav_tp[c] = int(tp_conf[c]) i += 1 print "Starting point is " + str(start) #print relations rule_based(conf, sample, start, end, step, typ, relations, basefile, metric, lat_p, tp_p, behav_tp, behav_lat)
def main(): # python rrs.py conf.yaml rollingtopwords.yaml lat_90 relations.yaml lat.yaml tp.yaml algorithm conf_file = sys.argv[1] basefile = sys.argv[2] metric = sys.argv[3] ref = open(conf_file, "r") sample = yaml.load(ref) result = dict(sample) start = dict() end = dict() step = dict() typ = dict() ref = open(conf_file, "r") conf = utils.ordered_load(ref, yaml.SafeLoader).keys() print sample for k in sample: vrange = sample[k] if len(vrange.split(",")) == 2: start[k] = int(vrange.split(",")[0]) end[k] = int(vrange.split(",")[1]) if len(vrange.split(",")) == 3: start[k] = int(vrange.split(",")[0]) end[k] = int(vrange.split(",")[1]) step[k] = int(vrange.split(",")[2]) if len(vrange.split(",")) == 4: typ[k] = vrange.split(",")[3] if vrange.split(",")[2] != "null": step[k] = int(vrange.split(",")[2]) start[k] = int(vrange.split(",")[0]) end[k] = int(vrange.split(",")[1]) relation_file = sys.argv[4] rel = open(relation_file, "r") rel_dict = dict(yaml.load(rel)) relations = dict() for r in rel_dict: split = rel_dict[r].split(",") relations[r] = list(split[:len(split) - 1]) lat_file = sys.argv[5] ref = open(lat_file, "r") lat_conf = utils.ordered_load(ref, yaml.SafeLoader) lat_p = dict() behav_lat = dict() i = 0 print lat_conf for c in lat_conf.keys(): lat_p[i] = c print lat_conf[c] behav_lat[c] = int(lat_conf[c]) i += 1 tp_file = sys.argv[6] ref = open(tp_file, "r") tp_conf = utils.ordered_load(ref, yaml.SafeLoader) behav_tp = dict() tp_p = dict() i = 0 for c in tp_conf.keys(): tp_p[i] = c behav_tp[c] = int(tp_conf[c]) i += 1 print "Starting point is " + str(start) #print relations algo = sys.argv[7] if algo == "hc": hc(conf, sample, start, end, step, typ, relations, basefile, metric) elif algo == "mhc": mhc(conf, sample, start, end, step, typ, relations, basefile, metric) elif algo == "ga": ga(conf, sample, start, end, step, typ, relations, basefile, metric) else: rule_based(conf, sample, start, end, step, typ, relations, basefile, metric, lat_p, tp_p, behav_tp, behav_lat)
def load_results(lang): path = lang + "_" + "results.yml" with open(path, "r") as infile: # see http://stackoverflow.com/questions/20352794/pyyaml-is-producing-undesired-python-unicode-output return utils.ordered_load(infile, yaml.SafeLoader)