Exemplo n.º 1
0
 def classified_entries(self, key=None):
     self._initialize()
     results = DAList()
     results.gathered = True
     results.set_random_instance_name()
     if key is None:
         query = db.session.execute(
             select(MachineLearning).filter_by(
                 group_id=self.group_id,
                 active=True).order_by(MachineLearning.id)).scalars()
     else:
         query = db.session.execute(
             select(MachineLearning).filter_by(
                 group_id=self.group_id, active=True,
                 key=key).order_by(MachineLearning.id)).scalars()
     for entry in query:
         results.appendObject(
             MachineLearningEntry,
             ml=self,
             id=entry.id,
             independent=fix_pickle_obj(
                 codecs.decode(
                     bytearray(entry.independent, encoding='utf-8'),
                     'base64')),
             dependent=fix_pickle_obj(
                 codecs.decode(bytearray(entry.dependent, encoding='utf-8'),
                               'base64')),
             info=fix_pickle_obj(
                 codecs.decode(bytearray(entry.info, encoding='utf-8'),
                               'base64'))
             if entry.info is not None else None,
             create_time=entry.create_time,
             key=entry.key)
     return results
Exemplo n.º 2
0
 def _train_from_db(self):
     #logmessage("Doing train_from_db where group_id is " + self.group_id + " and lastmodtime is " + repr(ml_thread.lastmodtime[self.group_id]))
     self._initialize()
     nowtime = datetime.datetime.utcnow()
     success = False
     for record in db.session.execute(
             select(MachineLearning.independent,
                    MachineLearning.dependent).where(
                        and_(
                            MachineLearning.group_id == self.group_id,
                            MachineLearning.active == True,
                            MachineLearning.modtime >
                            ml_thread.lastmodtime[self.group_id]))).all():
         #logmessage("Training...")
         self._train(
             fix_pickle_obj(
                 codecs.decode(
                     bytearray(record.independent, encoding='utf-8'),
                     'base64')),
             fix_pickle_obj(
                 codecs.decode(
                     bytearray(record.dependent, encoding='utf-8'),
                     'base64')))
         success = True
     ml_thread.lastmodtime[self.group_id] = nowtime
     return success
Exemplo n.º 3
0
def write_ml_source(playground, playground_number, filename, finalize=True):
    if re.match(r'ml-.*\.json', filename):
        output = dict()
        prefix = 'docassemble.playground' + str(
            playground_number) + ':data/sources/' + str(filename)
        for record in db.session.query(
                MachineLearning.group_id, MachineLearning.independent,
                MachineLearning.dependent, MachineLearning.key).filter(
                    MachineLearning.group_id.like(prefix + ':%')):
            parts = record.group_id.split(':')
            if not is_package_ml(parts):
                continue
            if parts[2] not in output:
                output[parts[2]] = list()
            the_entry = dict(independent=fix_pickle_obj(
                codecs.decode(bytearray(record.independent, encoding='utf-8'),
                              'base64')),
                             dependent=fix_pickle_obj(
                                 codecs.decode(
                                     bytearray(record.dependent,
                                               encoding='utf-8'), 'base64')))
            if record.key is not None:
                the_entry['key'] = record.key
            output[parts[2]].append(the_entry)
        if len(output):
            playground.write_as_json(output, filename=filename)
            if finalize:
                playground.finalize()
            return True
    return False
Exemplo n.º 4
0
 def one_unclassified_entry(self, key=None):
     self._initialize()
     if key is None:
         entry = db.session.execute(
             select(MachineLearning).filter_by(
                 group_id=self.group_id,
                 active=False).order_by(MachineLearning.id)).scalar()
     else:
         entry = db.session.execute(
             select(MachineLearning).filter_by(
                 group_id=self.group_id, key=key,
                 active=False).order_by(MachineLearning.id)).scalar()
     if entry is None:
         return None
     return MachineLearningEntry(
         ml=self,
         id=entry.id,
         independent=fix_pickle_obj(
             codecs.decode(bytearray(entry.independent, encoding='utf-8'),
                           'base64')),
         create_time=entry.create_time,
         key=entry.key,
         info=fix_pickle_obj(
             codecs.decode(bytearray(entry.info, encoding='utf-8'),
                           'base64')) if entry.info is not None else
         None)._set_instance_name_for_method()
Exemplo n.º 5
0
 def unclassified_entries(self, key=None):
     self._initialize()
     results = DAList()._set_instance_name_for_method()
     results.gathered = True
     if key is None:
         query = MachineLearning.query.filter_by(
             group_id=self.group_id,
             active=False).order_by(MachineLearning.id).all()
     else:
         query = MachineLearning.query.filter_by(
             group_id=self.group_id, key=key,
             active=False).order_by(MachineLearning.id).all()
     for entry in query:
         results.appendObject(
             MachineLearningEntry,
             ml=self,
             id=entry.id,
             independent=fix_pickle_obj(
                 codecs.decode(
                     bytearray(entry.independent, encoding='utf-8'),
                     'base64')),
             create_time=entry.create_time,
             key=entry.key,
             info=fix_pickle_obj(
                 codecs.decode(bytearray(entry.info, encoding='utf-8'),
                               'base64'))
             if entry.info is not None else None)
     return results
Exemplo n.º 6
0
 def retrieve_by_id(self, the_id):
     self._initialize()
     existing_entry = db.session.execute(select(MachineLearning).filter_by(group_id=self.group_id, id=the_id)).scalar()
     if existing_entry is None:
         raise Exception("There was no entry in the database for id " + str(the_id) + " with group id " + str(self.group_id))
     if existing_entry.dependent:
         dependent = fix_pickle_obj(codecs.decode(bytearray(existing_entry.dependent, encoding='utf-8'), 'base64'))
         return MachineLearningEntry(ml=self, id=existing_entry.id, independent=fix_pickle_obj(codecs.decode(bytearray(existing_entry.independent, encoding='utf-8'), 'base64')), dependent=dependent, create_time=existing_entry.create_time, key=existing_entry.key, info=fix_pickle_obj(codecs.decode(bytearray(existing_entry.info, encoding='utf-8'), 'base64')) if existing_entry.info is not None else None)
     return MachineLearningEntry(ml=self, id=existing_entry.id, independent=fix_pickle_obj(codecs.decode(bytearray(existing_entry.independent, encoding='utf-8'), 'base64')), create_time=existing_entry.create_time, key=existing_entry.key, info=fix_pickle_obj(codecs.decode(bytearray(existing_entry.info, encoding='utf-8'), 'base64')) if existing_entry.info is not None else None)
Exemplo n.º 7
0
 def retrieve_by_id(self, the_id):
     self._initialize()
     existing_entry = MachineLearning.query.filter_by(group_id=self.group_id, id=the_id).first()
     if existing_entry is None:
         raise Exception("There was no entry in the database for id " + str(the_id) + " with group id " + str(self.group_id))
     if existing_entry.dependent:
         dependent = fix_pickle_obj(codecs.decode(bytearray(existing_entry.dependent, encoding='utf-8'), 'base64'))
         return MachineLearningEntry(ml=self, id=existing_entry.id, independent=fix_pickle_obj(codecs.decode(bytearray(existing_entry.independent, encoding='utf-8'), 'base64')), dependent=dependent, create_time=existing_entry.create_time, key=existing_entry.key, info=fix_pickle_obj(codecs.decode(bytearray(existing_entry.info, encoding='utf-8'), 'base64')) if existing_entry.info is not None else None)
     else:
         return MachineLearningEntry(ml=self, id=existing_entry.id, independent=fix_pickle_obj(codecs.decode(bytearray(existing_entry.independent, encoding='utf-8'), 'base64')), create_time=existing_entry.create_time, key=existing_entry.key, info=fix_pickle_obj(codecs.decode(bytearray(existing_entry.info, encoding='utf-8'), 'base64')) if existing_entry.info is not None else None)
Exemplo n.º 8
0
 def one_unclassified_entry(self, key=None):
     self._initialize()
     if key is None:
         entry = MachineLearning.query.filter_by(group_id=self.group_id, active=False).order_by(MachineLearning.id).first()
     else:
         entry = MachineLearning.query.filter_by(group_id=self.group_id, key=key, active=False).order_by(MachineLearning.id).first()
     if entry is None:
         return None
     return MachineLearningEntry(ml=self, id=entry.id, independent=fix_pickle_obj(codecs.decode(bytearray(entry.independent, encoding='utf-8'), 'base64')), create_time=entry.create_time, key=entry.key, info=fix_pickle_obj(codecs.decode(bytearray(entry.info, encoding='utf-8'), 'base64')) if entry.info is not None else None)._set_instance_name_for_method()
Exemplo n.º 9
0
 def dependent_in_use(self, key=None):
     in_use = set()
     if key is None:
         query = db.session.execute(select(MachineLearning.dependent).where(MachineLearning.group_id == self.group_id).group_by(MachineLearning.dependent))
     else:
         query = db.session.execute(select(MachineLearning.dependent).where(and_(MachineLearning.group_id == self.group_id, MachineLearning.key == key)).group_by(MachineLearning.dependent))
     for record in query:
         if record.dependent is not None:
             in_use.add(fix_pickle_obj(codecs.decode(bytearray(record.dependent, encoding='utf-8'), 'base64')))
     return sorted(in_use)
Exemplo n.º 10
0
 def dependent_in_use(self, key=None):
     in_use = set()
     if key is None:
         query = db.session.query(MachineLearning.dependent).filter(MachineLearning.group_id == self.group_id).group_by(MachineLearning.dependent)
     else:
         query = db.session.query(MachineLearning.dependent).filter(and_(MachineLearning.group_id == self.group_id, MachineLearning.key == key)).group_by(MachineLearning.dependent)
     for record in query:
         if record.dependent is not None:
             in_use.add(fix_pickle_obj(codecs.decode(bytearray(record.dependent, encoding='utf-8'), 'base64')))
     return sorted(in_use)
Exemplo n.º 11
0
 def _train_from_db(self):
     #logmessage("Doing train_from_db")
     self._initialize()
     nowtime = datetime.datetime.utcnow()
     success = False
     for record in MachineLearning.query.filter(and_(MachineLearning.group_id == self.group_id, MachineLearning.active == True, MachineLearning.modtime > lastmodtime[self.group_id])).all():
         #logmessage("Training...")
         self._train(fix_pickle_obj(codecs.decode(bytearray(record.independent, encoding='utf-8'), 'base64')), fix_pickle_obj(codecs.decode(bytearray(record.dependent, encoding='utf-8'), 'base64')))
         success = True
     lastmodtime[self.group_id] = nowtime
     return success
Exemplo n.º 12
0
 def _train_from_db(self):
     #logmessage("Doing train_from_db")
     self._initialize()
     nowtime = datetime.datetime.utcnow()
     success = False
     for record in MachineLearning.query.filter(
             and_(MachineLearning.group_id == self.group_id,
                  MachineLearning.active == True, MachineLearning.modtime >
                  lastmodtime[self.group_id])).all():
         #logmessage("Training...")
         self._train(
             fix_pickle_obj(
                 codecs.decode(
                     bytearray(record.independent, encoding='utf-8'),
                     'base64')),
             fix_pickle_obj(
                 codecs.decode(
                     bytearray(record.dependent, encoding='utf-8'),
                     'base64')))
         success = True
     lastmodtime[self.group_id] = nowtime
     return success
Exemplo n.º 13
0
 def _train_from_db(self):
     #logmessage("Doing train_from_db")
     self._initialize()
     nowtime = datetime.datetime.utcnow()
     success = False
     data = []
     depend_data = []
     for record in db.session.execute(
             select(MachineLearning).where(
                 and_(
                     MachineLearning.group_id == self.group_id,
                     MachineLearning.active == True,
                     MachineLearning.modtime > ml_thread.lastmodtime[
                         self.group_id]))).scalars().all():
         indep_var = fix_pickle_obj(
             codecs.decode(bytearray(record.independent, encoding='utf-8'),
                           'base64'))
         depend_var = fix_pickle_obj(
             codecs.decode(bytearray(record.dependent, encoding='utf-8'),
                           'base64'))
         if isinstance(depend_var, str):
             depend_var = str(depend_var)
         if ml_thread.learners[self.group_id]['dep_type'] is not None:
             if not isinstance(
                     depend_var,
                     ml_thread.learners[self.group_id]['dep_type']):
                 if isinstance(depend_var, int) and ml_thread.learners[
                         self.group_id]['dep_type'] is float:
                     depend_var = float(depend_var)
                 elif isinstance(depend_var, float) and ml_thread.learners[
                         self.group_id]['dep_type'] is int:
                     ml_thread.learners[self.group_id]['dep_type'] = float
                 else:
                     raise Exception(
                         "RandomForestMachineLearner: dependent variable type was not consistent"
                     )
         else:
             if not isinstance(depend_var, (str, int, bool, float)):
                 raise Exception(
                     "RandomForestMachineLearner: dependent variable type for key "
                     + repr(key) + " was not a standard variable type")
             ml_thread.learners[self.group_id]['dep_type'] = type(
                 depend_var)
         depend_data.append(depend_var)
         if isinstance(indep_var, DADict):
             indep_var = indep_var.elements
         if not isinstance(indep_var, dict):
             raise Exception(
                 "RandomForestMachineLearner: independent variable was not a dictionary"
             )
         for key, val in indep_var.items():
             if isinstance(val, str):
                 val = str(val)
             if key in ml_thread.learners[self.group_id]['indep_type']:
                 if not isinstance(
                         val, ml_thread.learners[self.group_id]
                     ['indep_type'][key]):
                     if isinstance(val, int) and ml_thread.learners[
                             self.group_id]['indep_type'][key] is float:
                         val = float(val)
                     elif isinstance(val, float) and ml_thread.learners[
                             self.group_id]['indep_type'][key] is int:
                         ml_thread.learners[
                             self.group_id]['indep_type'][key] = float
                     else:
                         raise Exception(
                             "RandomForestMachineLearner: independent variable type for key "
                             + repr(key) + " was not consistent")
             else:
                 if not isinstance(val, (str, int, bool, float)):
                     raise Exception(
                         "RandomForestMachineLearner: independent variable type for key "
                         + repr(key) + " was not a standard variable type")
                 ml_thread.learners[
                     self.group_id]['indep_type'][key] = type(val)
         data.append(indep_var)
         success = True
     if success:
         df = pd.DataFrame(data)
         for key, val in ml_thread.learners[
                 self.group_id]['indep_type'].items():
             if val is str:
                 df[key] = pd.Series(df[key], dtype="category")
                 ml_thread.learners[self.group_id]['indep_categories'][
                     key] = df[key].cat.categories
         df = pd.get_dummies(df, dummy_na=True)
         if ml_thread.learners[self.group_id]['dep_type'] is str:
             y = pd.Series(depend_data, dtype="category")
             ml_thread.learners[
                 self.group_id]['dep_categories'] = y.cat.categories
         else:
             y = pd.Series(depend_data)
         ml_thread.learners[self.group_id]['learner'].fit(df, list(y))
         ml_thread.lastmodtime[self.group_id] = nowtime
     return success
Exemplo n.º 14
0
def decrypt_object(obj_string, secret):
    obj_string = bytearray(obj_string, encoding='utf-8')
    decrypter = AES.new(bytearray(secret, encoding='utf-8'), AES.MODE_CBC,
                        obj_string[:16])
    return fix_pickle_obj(
        unpad(decrypter.decrypt(codecs.decode(obj_string[16:], 'base64'))))
Exemplo n.º 15
0
 def _train_from_db(self):
     #logmessage("Doing train_from_db")
     self._initialize()
     nowtime = datetime.datetime.utcnow()
     success = False
     data = list()
     depend_data = list()
     for record in MachineLearning.query.filter(and_(MachineLearning.group_id == self.group_id, MachineLearning.active == True, MachineLearning.modtime > lastmodtime[self.group_id])).all():
         indep_var = fix_pickle_obj(codecs.decode(bytearray(record.independent, encoding='utf-8'), 'base64'))
         depend_var = fix_pickle_obj(codecs.decode(bytearray(record.dependent, encoding='utf-8'), 'base64'))
         if type(depend_var) is str:
             depend_var = text_type(depend_var)
         if learners[self.group_id]['dep_type'] is not None:
             if type(depend_var) is not learners[self.group_id]['dep_type']:
                 if type(depend_var) is int and learners[self.group_id]['dep_type'] is float:
                     depend_var = float(depend_var)
                 elif type(depend_var) is float and learners[self.group_id]['dep_type'] is int:
                     learners[self.group_id]['dep_type'] = float
                 else:
                     raise Exception("RandomForestMachineLearner: dependent variable type was not consistent")
         else:
             if not isinstance(depend_var, (string_types, int, bool, float)):
                 raise Exception("RandomForestMachineLearner: dependent variable type for key " + repr(key) + " was not a standard variable type")
             learners[self.group_id]['dep_type'] = type(depend_var)
         depend_data.append(depend_var)
         if isinstance(indep_var, DADict):
             indep_var = indep_var.elements
         if type(indep_var) is not dict:
             raise Exception("RandomForestMachineLearner: independent variable was not a dictionary")
         for key, val in indep_var.items():
             if type(val) is str:
                 val = text_type(val)
             if key in learners[self.group_id]['indep_type']:
                 if type(val) is not learners[self.group_id]['indep_type'][key]:
                     if type(val) is int and learners[self.group_id]['indep_type'][key] is float:
                         val = float(val)
                     elif type(val) is float and learners[self.group_id]['indep_type'][key] is int:
                         learners[self.group_id]['indep_type'][key] = float
                     else:
                         raise Exception("RandomForestMachineLearner: independent variable type for key " + repr(key) + " was not consistent")
             else:
                 if not isinstance(val, (string_types, int, bool, float)):
                     raise Exception("RandomForestMachineLearner: independent variable type for key " + repr(key) + " was not a standard variable type")
                 learners[self.group_id]['indep_type'][key] = type(val)
         data.append(indep_var)
         success = True
     if success:
         df = pd.DataFrame(data)
         for key, val in learners[self.group_id]['indep_type'].items():
             if val is text_type:
                 df[key] = pd.Series(df[key], dtype="category")
                 learners[self.group_id]['indep_categories'][key] = df[key].cat.categories
         df = pd.get_dummies(df, dummy_na=True)
         if learners[self.group_id]['dep_type'] is text_type:
             y = pd.Series(depend_data, dtype="category")
             learners[self.group_id]['dep_categories'] = y.cat.categories
         else:
             y = pd.Series(depend_data)
         learners[self.group_id]['learner'].fit(df, list(y))
         lastmodtime[self.group_id] = nowtime
     return success
Exemplo n.º 16
0
 def classified_entries(self, key=None):
     self._initialize()
     results = DAList()
     results.gathered = True
     results.set_random_instance_name()
     if key is None:
         query = MachineLearning.query.filter_by(group_id=self.group_id, active=True).order_by(MachineLearning.id).all()
     else:
         query = MachineLearning.query.filter_by(group_id=self.group_id, active=True, key=key).order_by(MachineLearning.id).all()
     for entry in query:
         results.appendObject(MachineLearningEntry, ml=self, id=entry.id, independent=fix_pickle_obj(codecs.decode(bytearray(entry.independent, encoding='utf-8'), 'base64')), dependent=fix_pickle_obj(codecs.decode(bytearray(entry.dependent, encoding='utf-8'), 'base64')), info=fix_pickle_obj(codecs.decode(bytearray(entry.info, encoding='utf-8'), 'base64')) if entry.info is not None else None, create_time=entry.create_time, key=entry.key)
     return results