Exemplo n.º 1
0
 def init_predict(self, ckpt: str) -> None:
     if not self.training_session.config.inference.pred_inputs:
         eval_dataset = self.datasets['test']
         eval_sampler = RandomSampler(eval_dataset)
         eval_dataloader = DataLoader(eval_dataset,
                                      sampler=eval_sampler,
                                      batch_size=1)
         self.model.eval()  # set model to evaluation mode
         eval_tuple = tuple((eval_dataset, eval_sampler, eval_dataloader))
         Inference(self.training_session.config).init_predict(
             ckpt=ckpt,
             model=self.model,
             tokenizer=self.tokenizer,
             eval_tuple=eval_tuple)
     else:
         Inference(self.training_session.config).init_predict(ckpt=ckpt)
Exemplo n.º 2
0
 def gen_pred_exp_ds(self) -> Tuple[Dict, Tuple]:
     pred_exp_tups = fetchallwrapper(self.cnxp.get_connection(),
                                     self.config.inference.sql.pred_exp_sql)
     pred_exp_set = []
     pred_exp_ds = OrderedDict({
         'bucket_type': [],
         'bucket_acc': [],
         'conf_percentile': [],
         'pos_pred_acc': [],
         'neg_pred_acc': [],
         'pos_pred_ratio': [],
         'neg_pred_ratio': [],
         'statement_id': [],
         'statement_text': [],
         'tp': [],
         'tn': [],
         'fp': [],
         'fn': []
     })
     for (bucket_type, bucket_acc, conf_percentile, pos_pred_acc,
          neg_pred_acc, pos_pred_ratio, neg_pred_ratio, statement_id,
          statement_text, ctxt_type, tp, tn, fp, fn) in pred_exp_tups:
         label = 'False' if tp == 1 or fn == 1 else 'True'
         pred_exp_set.append((statement_text, ctxt_type, label))
         for k, v in zip(list(pred_exp_ds.keys()), [
                 bucket_type, bucket_acc, conf_percentile, pos_pred_acc,
                 neg_pred_acc, pos_pred_ratio, neg_pred_ratio, statement_id,
                 statement_text, tp, tn, fp, fn
         ]):
             pred_exp_ds[k].append(v)
     pred_exp_attr_tups, global_metric_summ = Inference(
         self.config, pred_exp_set=pred_exp_set).init_predict()
     pred_exp_ds['pred_exp_attr_tups'] = pred_exp_attr_tups
     return pred_exp_ds, global_metric_summ
Exemplo n.º 3
0
 def build_embed_mappings(self,
                          mapping_inputs: List,
                          base_mode: bool = True) -> List[Dict]:
     embed_mappings = []
     for inputs in mapping_inputs:
         embed_mappings.append(
             Inference(self.config, mapping_set=inputs,
                       base_mode=base_mode).init_predict())
     return embed_mappings
Exemplo n.º 4
0
 def gen_report(self, rpt_type: str) -> None:
     analysis_set = self.gen_analysis_set()
     ds_meta = fetchallwrapper(self.cnxp.get_connection(),
                               self.config.inference.sql.ds_md_sql)[0]
     self.config.data_source.dsid = ds_meta[0]
     self.config.data_source.train_start_date = datetime.datetime.combine(
         ds_meta[1], datetime.time())
     self.config.data_source.train_end_date = datetime.datetime.combine(
         ds_meta[2], datetime.time())
     rpt_tups, stmt_embed_dict = Inference(
         self.config, analysis_set=analysis_set,
         rpt_type=rpt_type).init_predict()
     self.persist_rpt_data(rpt_tups)
     self.maybe_build_cache(stmt_embed_dict)
Exemplo n.º 5
0
 def publish_flow(self) -> None:
     # N.B. publishing all statements and tweets that meet length thresholds, driven by separate statements/tweets
     # tables since metadata is substantially different and not straightforward to cleanly combine
     target_tups = []
     for sql in [
             self.config.experiment.infsvc.sql.stmts_to_analyze_sql,
             self.config.experiment.infsvc.sql.tweets_to_analyze_sql
     ]:
         target_tups.extend(fetchallwrapper(self.cnxp.get_connection(),
                                            sql))
     if target_tups:
         inf_metadata = self.prep_new_threads(target_tups)
         self.publish_inference(
             Inference(self.config).init_predict(), inf_metadata)
     else:
         logger.info(f"No new claims found to analyze and publish")
Exemplo n.º 6
0
def main() -> Optional[NoReturn]:
    config = EnvConfig().config
    if config.experiment.dataprep_only:
        _ = DatasetCollection(config)
    elif config.experiment.predict_only and config.inference.pred_inputs:
        Inference(config).init_predict()
    elif config.experiment.infsvc.enabled:
        init_dc_service(config, 'infsvc')
    elif config.experiment.tweetbot.enabled:
        init_dc_service(config, 'tweetbot')
    elif config.inference.report_mode:
        if not config.experiment.db_functionality_enabled:
            logger.error(
                f"{constants.DB_WARNING_START} Model analysis reports {constants.DB_WARNING_END}"
            )
            sys.exit(0)
        from analysis.model_analysis_rpt import ModelAnalysisRpt
        ModelAnalysisRpt(config)
    else:
        core_flow(config)
Exemplo n.º 7
0
 def maybe_publish(self, target_type: str) -> None:
     # N.B. publishing all statements and tweets that meet length thresholds, driven by four tables:
     # a published and "notpublished" table for both statements and tweets
     # since metadata is substantially different and not straightforward to cleanly combine)
     if target_type == 'stmts':
         target_tups = fetchallwrapper(
             self.cnxp.get_connection(),
             self.config.experiment.tweetbot.sql.stmts_to_analyze_sql)
         interval = self.config.experiment.tweetbot.dcbot_poll_interval * self.non_twitter_updatefreq
     else:
         target_tups = fetchallwrapper(
             self.cnxp.get_connection(),
             self.config.experiment.tweetbot.sql.tweets_to_analyze_sql)
         interval = self.config.experiment.tweetbot.dcbot_poll_interval
     if target_tups:
         self.prep_new_threads(target_tups)
         self.publish_reports(
             Inference(self.config).init_predict(), target_type)
     else:
         logger.info(
             f"No new {target_type} found to analyze and publish. Trying again in {interval} seconds"
         )