def add_ing(self, ingdict): """Add ingredient to ingredients_table based on ingdict and return ingredient object. Ingdict contains: id: recipe_id unit: unit item: description key: keyed descriptor alternative: not yet implemented (alternative) #optional: yes|no optional: True|False (boolean) position: INTEGER [position in list] refid: id of reference recipe. If ref is provided, everything else is irrelevant except for amount. """ self.changed = True debug('adding to ingredients_table %s' % ingdict, 3) timer = TimeAction('rdatabase.add_ing 2', 5) if ingdict.has_key('amount') and not ingdict['amount']: del ingdict['amount'] self.ingredients_table.append(ingdict) timer.end() debug('running ing hooks %s' % self.add_ing_hooks, 3) timer = TimeAction('rdatabase.add_ing 3', 5) if self.add_ing_hooks: self.run_hooks(self.add_ing_hooks, self.ingredients_table[-1]) timer.end() debug('done with ing hooks', 3) return self.ingredients_table[-1]
def add_ref (self, id): timeaction = TimeAction('importer.add_ref',10) if not self.id_converter.has_key(id): self.id_converter[id]=self.rd.new_id() self.ing['refid']=self.id_converter[id] self.ing['unit']='recipe' timeaction.end()
def __init__ (self, rd, recipes, out, one_file=True, ext='txt', conv=None, imgcount=1, exporter=exporter, exporter_kwargs={}, padding=None): """Output all recipes in recipes into a document or multiple documents. if one_file, then everything is in one file. Otherwise, we treat 'out' as a directory and put individual recipe files within it.""" self.timer=TimeAction('exporterMultirec.__init__()') self.rd = rd self.recipes = recipes self.out = out self.padding=padding self.one_file = one_file Pluggable.__init__(self,[BaseExporterMultiRecPlugin]) SuspendableThread.__init__(self,self.name) self.ext = ext self.exporter = exporter self.exporter_kwargs = exporter_kwargs self.fractions = self.exporter_kwargs.get('fractions', convert.FRACTIONS_ASCII) self.DEFAULT_ENCODING = self.exporter.DEFAULT_ENCODING self.one_file = one_file
def find_unit_field(self, fields, fields_is_numfield): testtimer = TimeAction('mealmaster_importer.find_unit_field', 10) if 0 < fields[0][1] - fields[0][0] <= self.unit_length and len( fields) > 1: testtimer.end() return fields[0] testtimer.end()
def __init__(self, filename='Data/mealmaster.mmf', prog=None, source=None, threaded=True, two_col_minimum=38, conv=None): """filename is the file to parse (or filename). rd is the recData instance to start with. prog is a function we tell about our prog to (we hand it a single arg).""" testtimer = TimeAction('mealmaster_importer.__init__', 10) debug("mmf_importer start __init__ ", 5) self.source = source self.header = False self.instr = "" self.ingrs = [] self.ing_added = False self.in_variation = False self.fn = filename self.prog = prog self.unit_length = 2 self.two_col_minimum = two_col_minimum self.last_line_was = None plaintext_importer.TextImporter.__init__(self, filename) #prog=prog, #threaded=threaded,conv=conv) testtimer.end()
def compile_regexps(self): testtimer = TimeAction('mealmaster_importer.compile_regexps', 10) debug("start compile_regexps", 5) plaintext_importer.TextImporter.compile_regexps(self) self.start_matcher = re.compile(mm_start_pattern) self.end_matcher = re.compile("^[M-][M-][M-][M-][M-]\s*$") self.group_matcher = re.compile( "^\s*([M-][M-][M-][M-][M-])-*\s*([^-]+)\s*-*|^\s*---\s*([^-]+)\s*---\s*$", re.IGNORECASE) self.ing_cont_matcher = re.compile("^\s*[-;]") self.ing_opt_matcher = re.compile("(.+?)\s*\(?\s*optional\)?\s*$", re.IGNORECASE) self.ing_or_matcher = re.compile("^[- ]*[Oo][Rr][- ]*$", re.IGNORECASE) self.variation_matcher = re.compile( "^\s*(VARIATION|HINT|NOTES?)(:.*)?", re.IGNORECASE) # a crude ingredient matcher -- we look for two numbers, # intermingled with spaces followed by a space or more, # followed by a two digit unit (or spaces) c = convert.get_converter() self.ing_num_matcher = re.compile( "^\s*%s+\s+([a-z ]{1,2}|%s)\s+.*\w+.*" % (convert.NUMBER_REGEXP, '(' + '|'.join(filter(lambda x: x, c.unit_dict.keys())) + ')'), re.IGNORECASE) self.amt_field_matcher = re.compile("^(\s*%s\s*)$" % convert.NUMBER_REGEXP) # we build a regexp to match anything that looks like # this: ^\s*ATTRIBUTE: Some entry of some kind...$ self.mmf = mmf attrmatch = "^\s*(" for k in self.mmf.recattrs.keys(): attrmatch += "%s|" % re.escape(k) attrmatch = "%s):\s*(.*)\s*$" % attrmatch[0:-1] self.attr_matcher = re.compile(attrmatch) testtimer.end()
def add_unit(self, unit): testtimer = TimeAction('mealmaster_importer.add_unit', 10) unit = unit.strip() if self.mmf.unit_conv.has_key(unit): unit = self.mmf.unit_conv[unit] importer.Importer.add_unit(self, unit) testtimer.end()
def add_item(self, item): testtimer = TimeAction('mealmaster_importer.add_item', 10) self.ing['item'] = item.strip() # fixing bug 1061363, potatoes; cut and mashed should become just potatoes # for keying purposes key_base = self.ing['item'].split(";")[0] self.ing['ingkey'] = self.km.get_key_fast(key_base) testtimer.end()
def add_amt (self, amount): timeaction = TimeAction('importer.add_amt',10) """We should NEVER get non-numeric amounts. Amounts must contain [/.0-9 ] e.g. 1.2 or 1 1/5 or 1/3 etc.""" #gt.gtk_update() self.ing['amount'],self.ing['rangeamount']=parse_range(amount) timeaction.end()
def __init__( self, rd=None, # OBSOLETE total=0, prog=None, # OBSOLETE do_markup=True, conv=None, rating_converter=None, name='importer'): """rd is our recipeData instance. Total is used to keep track of progress. do_markup should be True if instructions and modifications come to us unmarked up (i.e. if we need to escape < and &, etc. -- this might be False if importing e.g. XML). """ timeaction = TimeAction('importer.__init__', 10) if not conv: self.conv = convert.get_converter() self.id_converter = {} # a dictionary for tracking named IDs self.total = total if prog or rd: import traceback traceback.print_stack() if prog: print('WARNING: ', self, 'handed obsolete parameter prog=', prog) if rd: print('WARNING: ', self, 'handed obsolete parameter rd=', rd) self.do_markup = do_markup self.count = 0 self.rd = get_recipe_manager() self.rd_orig_ing_hooks = self.rd.add_ing_hooks self.added_recs = [] self.added_ings = [] #self.rd_orig_hooks = self.rd.add_hooks self.rd.add_ing_hooks = [] #self.rd.add_hooks = [] self.position = 0 self.group = None # allow threaded calls to pause self.suspended = False # allow threaded calls to be terminated (this # has to be implemented in subclasses). self.terminated = False # Our rating converter -- if we've been handed a class, we # assume our caller will handle doing the # conversion. Otherwise we do it ourselves. if rating_converter: self.rating_converter = rating_converter self.do_conversion = False else: self.rating_converter = RatingConverter() self.do_conversion = True self.km = keymanager.get_keymanager() timeaction.end() SuspendableThread.__init__(self, name=name)
def finish_ing (self): timeaction = TimeAction('importer.finish_ing 1',10) # Strip whitespace... for key in ['item','ingkey','unit']: if self.ing.has_key(key): self.ing[key]=re.sub('\s+',' ',self.ing[key]).strip() if not ( (self.ing.has_key('refid') and self.ing['refid']) or (self.ing.has_key('ingkey') and self.ing['ingkey']) ): #self.ing['ingkey']=self.km.get_key(self.ing['item'],0.9) if self.ing.has_key('item'): self.ing['ingkey']=self.km.get_key_fast(self.ing['item']) else: debug('Ingredient has no item! %s'%self.ing,-1) timeaction.end() # if we have an amount (and it's not None), let's convert it # to a number if self.ing.has_key('amount') and self.ing['amount']\ and not self.ing.has_key('rangeamount'): if convert.RANGE_MATCHER.search(str(self.ing['amount'])): self.ing['amount'],self.ing['rangeamount']=parse_range(self.ing['amount']) if self.ing.has_key('amount'): self.ing['amount']=convert.frac_to_float( self.ing['amount'] ) if self.ing.has_key('rangeamount'): self.ing['rangeamount']=convert.frac_to_float( self.ing['rangeamount'] ) timeaction = TimeAction('importer.commit_ing 2',10) if not (self.ing.has_key('position') and self.ing['position']): self.ing['position']=self.position self.position+=1 timeaction.end() timeaction = TimeAction('importer.commit_ing 3',10) if self.group: self.ing['inggroup']=self.group timeaction.end() timeaction = TimeAction('importer.commit_ing 4',10) self.added_ings.append(self.ing); self.ing = {} timeaction.end()
def field_width(tuple): testtimer = TimeAction('mealmaster_importer.field_width', 10) debug("start field_width", 10) if tuple[1]: testtimer.end() return tuple[1] - tuple[0] else: testtimer.end() return None
def start_rec (self, dict=None): self.rec_timer = TimeAction('importer RECIPE IMPORT',10) timeaction = TimeAction('importer.start_rec',10) self.check_for_sleep() if hasattr(self,'added_ings') and self.added_ings: print 'WARNING: starting new rec, but we have ingredients that we never added!' print 'Unadded ingredients: ',self.added_ings self.added_ings=[] self.group = None if dict: self.rec=dict else: self.rec = {} #if not self.rec.has_key('id'): #else: # self.rec['id']=self.rd.new_id() #debug('New Import\'s ID=%s'%self.rec['id'],0) timeaction.end()
def parse_abbrevfile(self, abbrevfile): if self.show_progress: self.show_progress(float(0.03), _('Parsing nutritional data...')) self.datafile = tempfile.TemporaryFile() ll = abbrevfile.splitlines() tot = len(ll) n = 0 for n, l in enumerate(ll): # TODO: Convert ABBREV.txt to UTF-8 l = str(l.decode('iso-8859-1')) tline = TimeAction('1 line iteration', 2) t = TimeAction('split fields', 2) d = self.parse_line(l, NUTRITION_FIELDS) fields = l.split("^") d['desc'] = expand_abbrevs(d['desc']) d['foodgroup'] = FOOD_GROUPS[self.foodgroups_by_ndbno[d['ndbno']]] t.end() if self.show_progress and n % 50 == 0: self.show_progress( float(n) / tot, _('Reading nutritional data: imported %s of %s entries.') % (n, tot)) t = TimeAction('append to db', 3) try: self.db.do_add_fast(self.db.nutrition_table, d) except: try: SQL = 'UPDATE ' + self.db.nutrition_table.name + ' SET ' args = d.copy() del args['ndbno'] SQL += ', '.join('%s = ?' % k for k in args) SQL += ' WHERE ndbno = %s' % d['ndbno'] #if d['ndbno']==1123: # print SQL,args.values() self.db.extra_connection.execute(SQL, list(args.values())) except: print('Error appending to nutrition_table', d) print('Tried modifying table -- that failed too!') raise t.end() tline.end() self.db.commit_fast_adds()
def get_fields(string, tuples): testtimer = TimeAction('mealmaster_importer.get_fields', 10) debug("start get_fields", 10) lst = [] for t in tuples: if t: lst.append(string[t[0]:t[1]]) else: lst.append("") testtimer.end() return lst
def fields_match(strings, fields, matcher): testtimer = TimeAction('mealmaster_importer.fields_match', 10) """Return an array of True or False values representing whether matcher is a match for each of fields in string.""" #retarray = array.array('H',[1]*len(fields)) ret = [] for f in fields: strs = [s[f[0]:f[1]] for s in strings] matches = [matcher.match(s) and True or False for s in strs] if True in matches: ret.append(1) else: ret.append(0) return ret
def field_match(strings, tup, matcher): testtimer = TimeAction('mealmaster_importer.field_match', 10) debug("start field_match", 10) if type(matcher) == type(""): matcher = re.compile(matcher) for f in [s[tup[0]:tup[1]] for s in strings]: #f=s[tup[0]:tup[1]] if f and not matcher.match(f): testtimer.end() return False testtimer.end() return True
def __init__(self): # hooks run after adding, modifying or deleting a recipe. # Each hook is handed the recipe, except for delete_hooks, # which is handed the ID (since the recipe has been deleted) self.add_hooks = [] self.modify_hooks = [] self.delete_hooks = [] self.add_ing_hooks = [] timer = TimeAction('initialize_connection + setup_tables', 2) self.initialize_connection() self.setup_tables() timer.end()
def start_ing (self, **kwargs): timeaction = TimeAction('importer.start_ing',10) #gt.gtk_update() self.ing=kwargs #if self.ing.has_key('id'): # self.ing['recipe_id']=self.ing['id'] # del self.ing['id'] # print 'WARNING: setting ingredients ID is deprecated. Assuming you mean to set recipe_id' #elif self.rec.has_key('id'): # self.ing['recipe_id']=self.rec['id'] #debug('ing ID %s, recipe ID %s'%(self.ing['recipe_id'],self.rec['id']),0) timeaction.end()
def check_for_sleep (self): timeaction = TimeAction('importer.check_for_sleep',10) #gt.gtk_update() if self.terminated: raise Terminated("Importer Terminated!") while self.suspended: #gt.gtk_update() if self.terminated: raise Terminated("Importer Terminated!") else: time.sleep(1) timeaction.end()
def add_rec(self, rdict): """Add a recipe based on a dictionary of properties and values.""" self.changed = True t = TimeAction('rdatabase.add_rec - checking keys', 3) if not rdict.has_key('deleted'): rdict['deleted'] = 0 if not rdict.has_key('id'): rdict['id'] = self.new_id() t.end() try: debug('Adding recipe %s' % rdict, 4) t = TimeAction('rdatabase.add_rec - recipe_table.append(rdict)', 3) self.recipe_table.append(rdict) t.end() debug('Running add hooks %s' % self.add_hooks, 2) if self.add_hooks: self.run_hooks(self.add_hooks, self.recipe_table[-1]) return self.recipe_table[-1] except: debug("There was a problem adding recipe%s" % rdict, -1) raise
def parse_inglist(self): testtimer = TimeAction('mealmaster_importer.parse_inglis', 10) debug("start parse_inglist", 5) """We handle our ingredients after the fact.""" ingfields = self.find_ing_fields() debug("ingredient fields are: %s" % ingfields, 10) for s, g in self.ingrs: for afield, ufield, ifield in ingfields: self.group = g amt, u, i = get_fields(s, (afield, ufield, ifield)) debug( """amt:%(amt)s u:%(u)s i:%(i)s""" % locals(), 0) # sanity check... if not amt.strip() and not u.strip(): if not i: continue # if we have not amt or unit, let's do the right # thing if this just looks misaligned -- in other words # if the "item" column has 2 c. parsley, let's just parse # the damned thing as 2 c. parsley parsed = self.rd.parse_ingredient(i, conv=self.conv, get_key=False) if parsed and parsed.get('amount', '') and parsed.get( 'item', ''): amt = "%s" % parsed['amount'] u = parsed.get('unit', '') i = parsed['item'] debug( """After sanity check amt:%(amt)s u:%(u)s i:%(i)s""" % locals(), 0) if amt.strip() or u.strip() or i.strip(): self.start_ing() if amt: self.add_amt(amt) if u: self.add_unit(u) optm = self.ing_opt_matcher.match(i) if optm: item = optm.groups()[0] self.ing['optional'] = True else: item = i self.add_item(item) debug("committing ing: %s" % self.ing, 6) self.commit_ing() testtimer.end()
def __getitem__(self, k): if self.just_got.has_key(k): return self.just_got[k] if self.pickle_key: k = pickle.dumps(k) t = TimeAction('dbdict getting from db', 5) v = getattr(self.vw.select(**{self.kp: k})[0], self.vp) t.end() if v: try: return pickle.loads(v) except: print "Problem unpickling ", v raise else: return None
def is_ingredient(self, l): """Return true if the line looks like an ingredient. We're going to go with a somewhat hackish approach here. Once we have the ingredient list, we can determine columns more appropriately. For now, we'll assume that a field that starts with at least 5 blanks (the specs suggest 7) or a field that begins with a numeric value is an ingredient""" testtimer = TimeAction('mealmaster_importer.is_ingredient', 10) if self.ing_num_matcher.match(l): testtimer.end() return True if len(l) >= 7 and self.blank_matcher.match(l[0:5]): testtimer.end() return True
def convert_str_to_num (self, str): """Return a numerical servings value""" timeaction = TimeAction('importer.convert_str_to_num',10) debug('converting servings for %s'%str,5) try: return float(str) except: conv = convert.frac_to_float(str) if conv: return conv m=re.match("([0-9/. ]+)",str) if m: num=m.groups()[0] try: return float(num) except: return convert.frac_to_float(num) timeaction.end()
def new_rec(self): """Start a new recipe.""" testtimer = TimeAction('mealmaster_importer.new_rec', 10) debug("start new_rec", 5) if self.rec: # this shouldn't happen if recipes are ended properly # but we'll be graceful if a recipe starts before another # has ended... self.commit_rec() self.committed = False self.start_rec() debug('resetting instructions', 5) self.instr = "" self.mod = "" self.ingrs = [] self.header = False testtimer.end()
def handle_group(self, groupm): """Start a new ingredient group.""" testtimer = TimeAction('mealmaster_importer.handle_group', 10) debug("start handle_group", 10) # the only group of the match will contain # the name of the group. We'll put it into # a more sane title case (MealMaster defaults # to all caps name = groupm.groups()[1] if not name: name = groupm.groups()[2] if not name: return name = name.strip().title() self.group = name #if re.match('^[^A-Za-z]*$',self.group): self.group=None #WTF was this for? testtimer.end()
def find_ing_fields(self): """Find fields in an ingredient line.""" testtimer = TimeAction('mealmaster_importer.find_ing_fields', 10) all_ings = [i[0] for i in self.ingrs] fields = find_fields(all_ings) fields_is_numfield = fields_match(all_ings, fields, self.amt_field_matcher) #fields = [[r,field_match(all_ings,r,self.amt_field_matcher)] for r in find_fields(all_ings)] aindex, afield = self.find_amt_field(fields, fields_is_numfield) if aindex != None: fields = fields[aindex + 1:] fields_is_numfield = fields_is_numfield[aindex + 1:] ufield = fields and self.find_unit_field(fields, fields_is_numfield) if ufield: fields = fields[1:] fields_is_numfield = fields_is_numfield[1:] if fields: ifield = [fields[0][0], None] else: ifield = 0, None retval = [[afield, ufield, ifield]] sec_col_fields = filter(lambda x: x[0] > self.two_col_minimum, fields) if sec_col_fields: ibase = fields.index(sec_col_fields[0]) while sec_col_fields and not fields_is_numfield[ibase]: ibase += 1 sec_col_fields = sec_col_fields[1:] # if we might have a 2nd column... if sec_col_fields and len(sec_col_fields) > 2: fields_is_numfield = fields_is_numfield[ibase:] aindex2, afield2 = self.find_amt_field(sec_col_fields, fields_is_numfield) if aindex2 != None and len(sec_col_fields[aindex2 + 1:]) >= 1: # then it's a go! Shift our first ifield retval[0][2] = [ifield[0], fields[ibase - 1][1]] sec_col_fields = sec_col_fields[aindex2 + 1:] fields_is_numfield = fields_is_numfield[aindex2 + 1:] ufield2 = self.find_unit_field(sec_col_fields, fields_is_numfield) if ufield2: sec_col_fields = sec_col_fields[1:] fields_is_numfield = fields_is_numfield[1:] ifield2 = sec_col_fields[0][0], None retval.append([afield2, ufield2, ifield2]) testtimer.end() return retval
def do_run(self): # count the recipes in the file t = TimeAction("rxml_to_metakit.run counting lines", 0) if isinstance(self.fn, str): f = open(self.fn, 'rb') else: f = self.fn recs = 0 for l in f.readlines(): if l.find(self.recMarker) >= 0: recs += 1 if recs % 5 == 0: self.check_for_sleep() f.close() t.end() self.rh.total = recs self.parse = xml.sax.parse(self.fn, self.rh) self.added_ings = self.rh.added_ings self.added_recs = self.rh.added_recs importer.Importer._run_cleanup_(self.rh)
def commit_rec(self): """Commit our recipe to our database.""" testtimer = TimeAction('mealmaster_importer.commit_rec', 10) if self.committed: return debug("start _commit_rec", 5) self.instr = self.unwrap_lines(self.instr) self.mod = self.unwrap_lines(self.mod) self.rec['instructions'] = self.instr if self.mod: self.rec['modifications'] = self.mod self.parse_inglist() if self.source: self.rec['source'] = self.source importer.Importer.commit_rec(self) # blank rec self.committed = True self.in_variation = False testtimer.end()