def classified_entries(self, key=None): self._initialize() results = DAList() results.gathered = True results.set_random_instance_name() if key is None: query = db.session.execute( select(MachineLearning).filter_by( group_id=self.group_id, active=True).order_by(MachineLearning.id)).scalars() else: query = db.session.execute( select(MachineLearning).filter_by( group_id=self.group_id, active=True, key=key).order_by(MachineLearning.id)).scalars() for entry in query: results.appendObject( MachineLearningEntry, ml=self, id=entry.id, independent=fix_pickle_obj( codecs.decode( bytearray(entry.independent, encoding='utf-8'), 'base64')), dependent=fix_pickle_obj( codecs.decode(bytearray(entry.dependent, encoding='utf-8'), 'base64')), info=fix_pickle_obj( codecs.decode(bytearray(entry.info, encoding='utf-8'), 'base64')) if entry.info is not None else None, create_time=entry.create_time, key=entry.key) return results
def _train_from_db(self): #logmessage("Doing train_from_db where group_id is " + self.group_id + " and lastmodtime is " + repr(ml_thread.lastmodtime[self.group_id])) self._initialize() nowtime = datetime.datetime.utcnow() success = False for record in db.session.execute( select(MachineLearning.independent, MachineLearning.dependent).where( and_( MachineLearning.group_id == self.group_id, MachineLearning.active == True, MachineLearning.modtime > ml_thread.lastmodtime[self.group_id]))).all(): #logmessage("Training...") self._train( fix_pickle_obj( codecs.decode( bytearray(record.independent, encoding='utf-8'), 'base64')), fix_pickle_obj( codecs.decode( bytearray(record.dependent, encoding='utf-8'), 'base64'))) success = True ml_thread.lastmodtime[self.group_id] = nowtime return success
def write_ml_source(playground, playground_number, filename, finalize=True): if re.match(r'ml-.*\.json', filename): output = dict() prefix = 'docassemble.playground' + str( playground_number) + ':data/sources/' + str(filename) for record in db.session.query( MachineLearning.group_id, MachineLearning.independent, MachineLearning.dependent, MachineLearning.key).filter( MachineLearning.group_id.like(prefix + ':%')): parts = record.group_id.split(':') if not is_package_ml(parts): continue if parts[2] not in output: output[parts[2]] = list() the_entry = dict(independent=fix_pickle_obj( codecs.decode(bytearray(record.independent, encoding='utf-8'), 'base64')), dependent=fix_pickle_obj( codecs.decode( bytearray(record.dependent, encoding='utf-8'), 'base64'))) if record.key is not None: the_entry['key'] = record.key output[parts[2]].append(the_entry) if len(output): playground.write_as_json(output, filename=filename) if finalize: playground.finalize() return True return False
def one_unclassified_entry(self, key=None): self._initialize() if key is None: entry = db.session.execute( select(MachineLearning).filter_by( group_id=self.group_id, active=False).order_by(MachineLearning.id)).scalar() else: entry = db.session.execute( select(MachineLearning).filter_by( group_id=self.group_id, key=key, active=False).order_by(MachineLearning.id)).scalar() if entry is None: return None return MachineLearningEntry( ml=self, id=entry.id, independent=fix_pickle_obj( codecs.decode(bytearray(entry.independent, encoding='utf-8'), 'base64')), create_time=entry.create_time, key=entry.key, info=fix_pickle_obj( codecs.decode(bytearray(entry.info, encoding='utf-8'), 'base64')) if entry.info is not None else None)._set_instance_name_for_method()
def unclassified_entries(self, key=None): self._initialize() results = DAList()._set_instance_name_for_method() results.gathered = True if key is None: query = MachineLearning.query.filter_by( group_id=self.group_id, active=False).order_by(MachineLearning.id).all() else: query = MachineLearning.query.filter_by( group_id=self.group_id, key=key, active=False).order_by(MachineLearning.id).all() for entry in query: results.appendObject( MachineLearningEntry, ml=self, id=entry.id, independent=fix_pickle_obj( codecs.decode( bytearray(entry.independent, encoding='utf-8'), 'base64')), create_time=entry.create_time, key=entry.key, info=fix_pickle_obj( codecs.decode(bytearray(entry.info, encoding='utf-8'), 'base64')) if entry.info is not None else None) return results
def retrieve_by_id(self, the_id): self._initialize() existing_entry = db.session.execute(select(MachineLearning).filter_by(group_id=self.group_id, id=the_id)).scalar() if existing_entry is None: raise Exception("There was no entry in the database for id " + str(the_id) + " with group id " + str(self.group_id)) if existing_entry.dependent: dependent = fix_pickle_obj(codecs.decode(bytearray(existing_entry.dependent, encoding='utf-8'), 'base64')) return MachineLearningEntry(ml=self, id=existing_entry.id, independent=fix_pickle_obj(codecs.decode(bytearray(existing_entry.independent, encoding='utf-8'), 'base64')), dependent=dependent, create_time=existing_entry.create_time, key=existing_entry.key, info=fix_pickle_obj(codecs.decode(bytearray(existing_entry.info, encoding='utf-8'), 'base64')) if existing_entry.info is not None else None) return MachineLearningEntry(ml=self, id=existing_entry.id, independent=fix_pickle_obj(codecs.decode(bytearray(existing_entry.independent, encoding='utf-8'), 'base64')), create_time=existing_entry.create_time, key=existing_entry.key, info=fix_pickle_obj(codecs.decode(bytearray(existing_entry.info, encoding='utf-8'), 'base64')) if existing_entry.info is not None else None)
def retrieve_by_id(self, the_id): self._initialize() existing_entry = MachineLearning.query.filter_by(group_id=self.group_id, id=the_id).first() if existing_entry is None: raise Exception("There was no entry in the database for id " + str(the_id) + " with group id " + str(self.group_id)) if existing_entry.dependent: dependent = fix_pickle_obj(codecs.decode(bytearray(existing_entry.dependent, encoding='utf-8'), 'base64')) return MachineLearningEntry(ml=self, id=existing_entry.id, independent=fix_pickle_obj(codecs.decode(bytearray(existing_entry.independent, encoding='utf-8'), 'base64')), dependent=dependent, create_time=existing_entry.create_time, key=existing_entry.key, info=fix_pickle_obj(codecs.decode(bytearray(existing_entry.info, encoding='utf-8'), 'base64')) if existing_entry.info is not None else None) else: return MachineLearningEntry(ml=self, id=existing_entry.id, independent=fix_pickle_obj(codecs.decode(bytearray(existing_entry.independent, encoding='utf-8'), 'base64')), create_time=existing_entry.create_time, key=existing_entry.key, info=fix_pickle_obj(codecs.decode(bytearray(existing_entry.info, encoding='utf-8'), 'base64')) if existing_entry.info is not None else None)
def one_unclassified_entry(self, key=None): self._initialize() if key is None: entry = MachineLearning.query.filter_by(group_id=self.group_id, active=False).order_by(MachineLearning.id).first() else: entry = MachineLearning.query.filter_by(group_id=self.group_id, key=key, active=False).order_by(MachineLearning.id).first() if entry is None: return None return MachineLearningEntry(ml=self, id=entry.id, independent=fix_pickle_obj(codecs.decode(bytearray(entry.independent, encoding='utf-8'), 'base64')), create_time=entry.create_time, key=entry.key, info=fix_pickle_obj(codecs.decode(bytearray(entry.info, encoding='utf-8'), 'base64')) if entry.info is not None else None)._set_instance_name_for_method()
def dependent_in_use(self, key=None): in_use = set() if key is None: query = db.session.execute(select(MachineLearning.dependent).where(MachineLearning.group_id == self.group_id).group_by(MachineLearning.dependent)) else: query = db.session.execute(select(MachineLearning.dependent).where(and_(MachineLearning.group_id == self.group_id, MachineLearning.key == key)).group_by(MachineLearning.dependent)) for record in query: if record.dependent is not None: in_use.add(fix_pickle_obj(codecs.decode(bytearray(record.dependent, encoding='utf-8'), 'base64'))) return sorted(in_use)
def dependent_in_use(self, key=None): in_use = set() if key is None: query = db.session.query(MachineLearning.dependent).filter(MachineLearning.group_id == self.group_id).group_by(MachineLearning.dependent) else: query = db.session.query(MachineLearning.dependent).filter(and_(MachineLearning.group_id == self.group_id, MachineLearning.key == key)).group_by(MachineLearning.dependent) for record in query: if record.dependent is not None: in_use.add(fix_pickle_obj(codecs.decode(bytearray(record.dependent, encoding='utf-8'), 'base64'))) return sorted(in_use)
def _train_from_db(self): #logmessage("Doing train_from_db") self._initialize() nowtime = datetime.datetime.utcnow() success = False for record in MachineLearning.query.filter(and_(MachineLearning.group_id == self.group_id, MachineLearning.active == True, MachineLearning.modtime > lastmodtime[self.group_id])).all(): #logmessage("Training...") self._train(fix_pickle_obj(codecs.decode(bytearray(record.independent, encoding='utf-8'), 'base64')), fix_pickle_obj(codecs.decode(bytearray(record.dependent, encoding='utf-8'), 'base64'))) success = True lastmodtime[self.group_id] = nowtime return success
def _train_from_db(self): #logmessage("Doing train_from_db") self._initialize() nowtime = datetime.datetime.utcnow() success = False for record in MachineLearning.query.filter( and_(MachineLearning.group_id == self.group_id, MachineLearning.active == True, MachineLearning.modtime > lastmodtime[self.group_id])).all(): #logmessage("Training...") self._train( fix_pickle_obj( codecs.decode( bytearray(record.independent, encoding='utf-8'), 'base64')), fix_pickle_obj( codecs.decode( bytearray(record.dependent, encoding='utf-8'), 'base64'))) success = True lastmodtime[self.group_id] = nowtime return success
def _train_from_db(self): #logmessage("Doing train_from_db") self._initialize() nowtime = datetime.datetime.utcnow() success = False data = [] depend_data = [] for record in db.session.execute( select(MachineLearning).where( and_( MachineLearning.group_id == self.group_id, MachineLearning.active == True, MachineLearning.modtime > ml_thread.lastmodtime[ self.group_id]))).scalars().all(): indep_var = fix_pickle_obj( codecs.decode(bytearray(record.independent, encoding='utf-8'), 'base64')) depend_var = fix_pickle_obj( codecs.decode(bytearray(record.dependent, encoding='utf-8'), 'base64')) if isinstance(depend_var, str): depend_var = str(depend_var) if ml_thread.learners[self.group_id]['dep_type'] is not None: if not isinstance( depend_var, ml_thread.learners[self.group_id]['dep_type']): if isinstance(depend_var, int) and ml_thread.learners[ self.group_id]['dep_type'] is float: depend_var = float(depend_var) elif isinstance(depend_var, float) and ml_thread.learners[ self.group_id]['dep_type'] is int: ml_thread.learners[self.group_id]['dep_type'] = float else: raise Exception( "RandomForestMachineLearner: dependent variable type was not consistent" ) else: if not isinstance(depend_var, (str, int, bool, float)): raise Exception( "RandomForestMachineLearner: dependent variable type for key " + repr(key) + " was not a standard variable type") ml_thread.learners[self.group_id]['dep_type'] = type( depend_var) depend_data.append(depend_var) if isinstance(indep_var, DADict): indep_var = indep_var.elements if not isinstance(indep_var, dict): raise Exception( "RandomForestMachineLearner: independent variable was not a dictionary" ) for key, val in indep_var.items(): if isinstance(val, str): val = str(val) if key in ml_thread.learners[self.group_id]['indep_type']: if not isinstance( val, ml_thread.learners[self.group_id] ['indep_type'][key]): if isinstance(val, int) and ml_thread.learners[ self.group_id]['indep_type'][key] is float: val = float(val) elif isinstance(val, float) and ml_thread.learners[ self.group_id]['indep_type'][key] is int: ml_thread.learners[ self.group_id]['indep_type'][key] = float else: raise Exception( "RandomForestMachineLearner: independent variable type for key " + repr(key) + " was not consistent") else: if not isinstance(val, (str, int, bool, float)): raise Exception( "RandomForestMachineLearner: independent variable type for key " + repr(key) + " was not a standard variable type") ml_thread.learners[ self.group_id]['indep_type'][key] = type(val) data.append(indep_var) success = True if success: df = pd.DataFrame(data) for key, val in ml_thread.learners[ self.group_id]['indep_type'].items(): if val is str: df[key] = pd.Series(df[key], dtype="category") ml_thread.learners[self.group_id]['indep_categories'][ key] = df[key].cat.categories df = pd.get_dummies(df, dummy_na=True) if ml_thread.learners[self.group_id]['dep_type'] is str: y = pd.Series(depend_data, dtype="category") ml_thread.learners[ self.group_id]['dep_categories'] = y.cat.categories else: y = pd.Series(depend_data) ml_thread.learners[self.group_id]['learner'].fit(df, list(y)) ml_thread.lastmodtime[self.group_id] = nowtime return success
def decrypt_object(obj_string, secret): obj_string = bytearray(obj_string, encoding='utf-8') decrypter = AES.new(bytearray(secret, encoding='utf-8'), AES.MODE_CBC, obj_string[:16]) return fix_pickle_obj( unpad(decrypter.decrypt(codecs.decode(obj_string[16:], 'base64'))))
def _train_from_db(self): #logmessage("Doing train_from_db") self._initialize() nowtime = datetime.datetime.utcnow() success = False data = list() depend_data = list() for record in MachineLearning.query.filter(and_(MachineLearning.group_id == self.group_id, MachineLearning.active == True, MachineLearning.modtime > lastmodtime[self.group_id])).all(): indep_var = fix_pickle_obj(codecs.decode(bytearray(record.independent, encoding='utf-8'), 'base64')) depend_var = fix_pickle_obj(codecs.decode(bytearray(record.dependent, encoding='utf-8'), 'base64')) if type(depend_var) is str: depend_var = text_type(depend_var) if learners[self.group_id]['dep_type'] is not None: if type(depend_var) is not learners[self.group_id]['dep_type']: if type(depend_var) is int and learners[self.group_id]['dep_type'] is float: depend_var = float(depend_var) elif type(depend_var) is float and learners[self.group_id]['dep_type'] is int: learners[self.group_id]['dep_type'] = float else: raise Exception("RandomForestMachineLearner: dependent variable type was not consistent") else: if not isinstance(depend_var, (string_types, int, bool, float)): raise Exception("RandomForestMachineLearner: dependent variable type for key " + repr(key) + " was not a standard variable type") learners[self.group_id]['dep_type'] = type(depend_var) depend_data.append(depend_var) if isinstance(indep_var, DADict): indep_var = indep_var.elements if type(indep_var) is not dict: raise Exception("RandomForestMachineLearner: independent variable was not a dictionary") for key, val in indep_var.items(): if type(val) is str: val = text_type(val) if key in learners[self.group_id]['indep_type']: if type(val) is not learners[self.group_id]['indep_type'][key]: if type(val) is int and learners[self.group_id]['indep_type'][key] is float: val = float(val) elif type(val) is float and learners[self.group_id]['indep_type'][key] is int: learners[self.group_id]['indep_type'][key] = float else: raise Exception("RandomForestMachineLearner: independent variable type for key " + repr(key) + " was not consistent") else: if not isinstance(val, (string_types, int, bool, float)): raise Exception("RandomForestMachineLearner: independent variable type for key " + repr(key) + " was not a standard variable type") learners[self.group_id]['indep_type'][key] = type(val) data.append(indep_var) success = True if success: df = pd.DataFrame(data) for key, val in learners[self.group_id]['indep_type'].items(): if val is text_type: df[key] = pd.Series(df[key], dtype="category") learners[self.group_id]['indep_categories'][key] = df[key].cat.categories df = pd.get_dummies(df, dummy_na=True) if learners[self.group_id]['dep_type'] is text_type: y = pd.Series(depend_data, dtype="category") learners[self.group_id]['dep_categories'] = y.cat.categories else: y = pd.Series(depend_data) learners[self.group_id]['learner'].fit(df, list(y)) lastmodtime[self.group_id] = nowtime return success
def classified_entries(self, key=None): self._initialize() results = DAList() results.gathered = True results.set_random_instance_name() if key is None: query = MachineLearning.query.filter_by(group_id=self.group_id, active=True).order_by(MachineLearning.id).all() else: query = MachineLearning.query.filter_by(group_id=self.group_id, active=True, key=key).order_by(MachineLearning.id).all() for entry in query: results.appendObject(MachineLearningEntry, ml=self, id=entry.id, independent=fix_pickle_obj(codecs.decode(bytearray(entry.independent, encoding='utf-8'), 'base64')), dependent=fix_pickle_obj(codecs.decode(bytearray(entry.dependent, encoding='utf-8'), 'base64')), info=fix_pickle_obj(codecs.decode(bytearray(entry.info, encoding='utf-8'), 'base64')) if entry.info is not None else None, create_time=entry.create_time, key=entry.key) return results