Exemplo n.º 1
0
class TestCharterAnalyse(unittest.TestCase):
    @unittest.skipIf(get_mongodb_connection() is None, "requires mongo")
    def test_get_org_names(self):
        parser = CharterParser()

        audits = get_audits()
        if len(audits) == 0:
            logger.warning('no audits')
            return

        audit_id = audits[0]['_id']
        docs = get_docs_by_audit_id(audit_id, kind=CHARTER)

        for db_document in docs:
            print(db_document['filename'])

            parsed_p_json = db_document['parse']
            charter: CharterDocument = join_paragraphs(
                parsed_p_json, doc_id=db_document['_id'])

            # TODO: mind, this could be slow if embedding is required
            parser.find_org_date_number(charter, AuditContext())

            for tag in charter.get_tags():
                print(tag)
Exemplo n.º 2
0
def _test_convert():
    # charter: 5f64161009d100a445b7b0d6
    # protocol: 5ded4e214ddc27bcf92dd6cc
    # contract: 5f0bb4bd138e9184feef1fa8

    db = get_mongodb_connection()
    # a = doc['user']['attributes']
    _test_protocol()
    # j, json_str, doc = test_protocol()
    # validate(instance=json_str, schema=document_schemas, format_checker=FormatChecker())
    # db["documents"].update_one({'_id': doc["_id"]}, {"$set": {"analysis.attributes_tree": j}})

    j, json_str, doc = _test_charter()
    validate(instance=json_str,
             schema=document_schemas,
             format_checker=FormatChecker())
    db["documents"].update_one({'_id': doc["_id"]},
                               {"$set": {
                                   "analysis.attributes_tree": j
                               }})

    j, json_str, doc = _test_contract()
    validate(instance=json_str,
             schema=document_schemas,
             format_checker=FormatChecker())
    db["documents"].update_one({'_id': doc["_id"]},
                               {"$set": {
                                   "analysis.attributes_tree": j
                               }})
Exemplo n.º 3
0
def get_audits():
    db = get_mongodb_connection()
    audits_collection = db['audits']

    res = audits_collection.find({
        'status': 'Finalizing'
    }).sort([("createDate", pymongo.ASCENDING)])
    return res
Exemplo n.º 4
0
def read_all_docs(files_dir: str, doc_type='CONTRACT'):
  db = get_mongodb_connection()
  collection = db['legaldocs']

  wp = WordDocParser()
  filenames = wp.list_filenames(files_dir)

  cnt = 0
  failures = 0
  unknowns = 0
  nodate = 0

  def stats():
    print(f'processed:{cnt};\t failures:\t{failures}\t unknown type: {unknowns}\t unknown date: {nodate}')

  for fn in filenames:

    shortfn = fn.split('/')[-1]
    pth = '/'.join(fn.split('/')[5:-1])
    _doc_id = pth + '/' + shortfn

    cnt += 1
    print(cnt, fn)

    docs = collection.find_one({"_id": _doc_id, 'version': wp.version})
    if docs is None:
      # parse and save to DB
      try:
        docs = wp.read_doc(fn)

        # for res in docs['documents']:
        docs['short_filename'] = shortfn
        docs['path'] = pth
        docs['_id'] = _doc_id

        collection.delete_many({"_id": _doc_id})
        collection.insert_one(docs)

      except Exception:
        print(f"{fn}\nException in WordDocParser code:")
        traceback.print_exc(file=sys.stdout)
        failures += 1

    if docs:
      for res in docs['documents']:

        if doc_type == res["documentType"]:
          _doc = _parse_doc(res, _doc_id)
          yield _doc

          # if False:
          #   contract: ContractDocument = _parse_contract(_doc, row)
          #   json_struct = DocumentJson(contract).__dict__
          #
          #   if contracts_collection.find_one({"_id": _doc_id}) is None:
          #     contracts_collection.insert_one(json_struct)

  stats()
    def get_updated_contracts(self):
        self.lastdate = datetime(1900, 1, 1)
        if len(self.stats) > 0:
            # self.stats.sort_values(["user_correction_date", 'analyze_date', 'export_date'], inplace=True, ascending=False)
            self.lastdate = self.stats[[
                "user_correction_date", 'analyze_date'
            ]].max().max()
        logger.info(f'latest export_date: [{self.lastdate}]')

        logger.debug('obtaining DB connection...')
        db = get_mongodb_connection()
        documents_collection = db['documents']

        # TODO: filter by version
        query = {
            '$and': [{
                "parse.documentType": "CONTRACT"
            }, {
                "state": 15
            }, {
                '$or': [{
                    "analysis.attributes": {
                        "$ne": None
                    }
                }, {
                    "user.attributes": {
                        "$ne": None
                    }
                }]
            }, {
                '$or': [{
                    'analysis.analyze_timestamp': {
                        '$gt': self.lastdate
                    }
                }, {
                    'user.updateDate': {
                        '$gt': self.lastdate
                    }
                }]
            }]
        }

        logger.debug(f'running DB query {query}')
        # TODO: sorting fails in MONGO
        sorting = [('analysis.analyze_timestamp', ASCENDING),
                   ('user.updateDate', ASCENDING)]
        # sorting = None
        res = documents_collection.find(filter=query,
                                        sort=sorting,
                                        projection={'_id': True})

        res.limit(600)

        logger.info('running DB query: DONE')

        return res
Exemplo n.º 6
0
def save_violations(audit, violations):
    db = get_mongodb_connection()
    db["audits"].update_one({'_id': audit["_id"]},
                            {"$set": {
                                "violations": violations
                            }})
    db["audits"].update_one({'_id': audit["_id"]},
                            {"$set": {
                                "status": "Done"
                            }})
Exemplo n.º 7
0
    def test_doc_parser(self):
        db = get_mongodb_connection()
        if db is None:  # TODO: this is a weird way of detecting we're on CI
            return

        FILENAME = "/Users/artem/work/nemo/goil/IN/Другие договоры/Договор Формула.docx"

        wp = WordDocParser()
        res = wp.read_doc(FILENAME)

        doc: LegalDocument = LegalDocument('')
        doc.parse()

        last = 0
        for d in res['documents']:
            for p in d['paragraphs']:
                header_text = p['paragraphHeader']['text'] + '\n'
                body_text = p['paragraphBody']['text'] + '\n'

                header = LegalDocument(header_text)
                header.parse()
                # self.assertEqual(self.n(header_text), header.text)

                doc += header
                headerspan = (last, len(doc.tokens_map))
                print(headerspan)
                last = len(doc.tokens_map)

                body = LegalDocument(body_text)
                body.parse()
                doc += body
                bodyspan = (last, len(doc.tokens_map))

                header_tag = SemanticTag('headline', header_text, headerspan)
                body_tag = SemanticTag('paragraphBody', None, bodyspan)

                print(header_tag)
                # print(body_tag)
                para = Paragraph(header_tag, body_tag)
                doc.paragraphs.append(para)
                last = len(doc.tokens_map)

                h_subdoc = doc.subdoc_slice(para.header.as_slice())
                b_subdoc = doc.subdoc_slice(para.body.as_slice())
                # self.assertEqual(self.n(header_text), h_subdoc.text)
                # self.assertEqual(self.n(body_text), b_subdoc.text)

        print('-' * 100)
        print(doc.text)

        headers = [
            doc.subdoc_slice(p.header.as_slice()) for p in doc.paragraphs
        ]
        print('-' * 100)
Exemplo n.º 8
0
def get_audits() -> [dict]:
    db = get_mongodb_connection()
    audits_collection = db['audits']

    cursor = audits_collection.find({
        'status': 'InWork'
    }).sort([("createDate", pymongo.ASCENDING)])
    res = []
    for audit in cursor:
        res.append(audit)
    return res
Exemplo n.º 9
0
def get_attributes_tree(id: str):
    # x = json.loads(data, object_hook=lambda d: SimpleNamespace(**d))
    # print(x.name, x.hometown.name, x.hometown.id)
    db = get_mongodb_connection()
    doc = get_doc_by_id(ObjectId(id))

    analysis = doc.get('analysis')
    if analysis:
        tree = analysis.get('attributes_tree')
        r = dotdict(tree)

        return r.charter
Exemplo n.º 10
0
def add_link(audit_id, doc_id1, doc_id2):
    db = get_mongodb_connection()
    audit_collection = db['audits']
    audit_collection.update_one({"_id": audit_id}, {
        "$push": {
            "links": {
                "fromId": doc_id1,
                "toId": doc_id2,
                "type": "analysis"
            }
        }
    })
Exemplo n.º 11
0
def save_analysis(db_document: DbJsonDoc,
                  doc: LegalDocument,
                  state: int,
                  retry_number: int = 0):
    # TODO: does not save attributes
    analyse_json_obj: dict = doc.to_json_obj()
    db = get_mongodb_connection()
    documents_collection = db['documents']
    db_document.analysis = analyse_json_obj
    db_document.state = state
    db_document.retry_number = retry_number
    documents_collection.update({'_id': doc.get_id()}, db_document.as_dict(),
                                True)
Exemplo n.º 12
0
 def _get_doc_from_db(self, kind):
     audits = get_mongodb_connection()['audits'].find().sort([
         ("createDate", pymongo.ASCENDING)
     ]).limit(1)
     for audit in audits:
         doc_ids = get_docs_by_audit_id(audit['_id'],
                                        kind=kind,
                                        states=[15],
                                        id_only=True)
         if len(doc_ids) > 0:
             print(doc_ids[0])
             doc = finalizer.get_doc_by_id(doc_ids[0])
             # jdoc = DbJsonDoc(doc)
             yield doc
Exemplo n.º 13
0
def get_docs_by_audit_id(id: str,
                         state,
                         kind=None,
                         id_only=False,
                         without_large_fields=False):
    db = get_mongodb_connection()
    documents_collection = db['documents']

    query = {
        'auditId':
        id,
        'parse.documentType':
        kind,
        "state":
        state,
        "$or": [{
            "$and": [{
                "analysis.attributes.date": {
                    "$ne": None
                }
            }, {
                "user": None
            }]
        }, {
            "user.attributes.date": {
                "$ne": None
            }
        }]
    }
    if id_only:
        res = documents_collection.find(query, projection={'_id': True})
    else:
        if without_large_fields:
            res = documents_collection.find(query,
                                            projection={
                                                'analysis.original_text':
                                                False,
                                                'analysis.normal_text': False,
                                                'analysis.tokenization_maps':
                                                False,
                                                'analysis.headers': False,
                                                'parse.paragraphs': False
                                            })
        else:
            res = documents_collection.find(query)
    docs = []
    for doc in res:
        docs.append(doc)
    return docs
Exemplo n.º 14
0
def remove_old_links(audit_id, contract_id):
    db = get_mongodb_connection()
    audit_collection = db['audits']
    audit_collection.update_one({"_id": audit_id}, {
        "$pull": {
            "links": {
                "type": "analysis",
                "$or": [{
                    "toId": contract_id
                }, {
                    "fromId": contract_id
                }]
            }
        }
    })
Exemplo n.º 15
0
def get_docs_by_audit_id(id: str or None,
                         states=None,
                         kind=None,
                         id_only=False) -> []:
    db = get_mongodb_connection()
    documents_collection = db['documents']

    query = {
        "$and": [
            {
                'auditId': id
            },
            {
                "parserResponseCode": 200
            },
            {
                "$or": [
                    {
                        "analysis.version": None
                    },
                    # {"analysis.version": {"$ne": analyser.__version__}},
                    {
                        "state": None
                    }
                ]
            }
        ]
    }

    if states is not None:
        for state in states:
            query["$and"][2]["$or"].append({"state": state})

    if kind is not None:
        query["$and"].append({'parse.documentType': kind})

    if id_only:
        cursor = documents_collection.find(query, projection={'_id': True})
    else:
        cursor = documents_collection.find(query)

    res = []
    for doc in cursor:
        if id_only:
            res.append(doc["_id"])
        else:
            res.append(doc)
    return res
Exemplo n.º 16
0
def find_top_headers():
  db = get_mongodb_connection()
  headers_collection = db['headers']
  q = {
    'count': {
      '$gt': 10
    }
  }
  items = []
  for c in headers_collection.find(q).sort('count', pymongo.DESCENDING):
    items.append({
      'text': c['text'],
      'count': c['count'],
      'doc_id': c['doc_id']
    })
    print(c['count'], '\t', c['text'])
  df = DataFrame.from_records(items)
  df.to_csv('top_headers.csv')
Exemplo n.º 17
0
def dump_contracts_from_db_to_jsons(output_path):
  db = get_mongodb_connection()
  collection = db['legaldocs']

  wp = WordDocParser()
  filenames = wp.list_filenames('/Users/artem/Downloads/Telegram Desktop/X0/')

  for fn in filenames:
    print(fn)
    shortfn = fn.split('/')[-1]
    pth = '/'.join(fn.split('/')[5:-1])
    _doc_id = pth + '/' + shortfn

    res = collection.find_one({"_id": _doc_id})
    if res is not None:
      json_name = _doc_id.replace('/', '_')
      with open(f'{output_path}/{json_name}.json', 'w') as file:
        _j = json.dumps(res, indent=4, ensure_ascii=False, default=lambda o: '<not serializable>')
        file.write(_j)
        print(f'saved file to {json_name}')
Exemplo n.º 18
0
def analyse_headers():
  db = get_mongodb_connection()
  collection = db['legaldocs']
  headers_collection = db['headers']
  headers_collection.drop()
  headers_collection = db['headers']

  res = collection.find({})
  k = 0
  for doc in res:
    print(k)
    k += 1

    for p in doc['paragraphs']:
      header_text = p['paragraphHeader']['text']

      tokens = TOKENIZER_DEFAULT.tokenize(header_text)
      _, span, _, _ = get_tokenized_line_number(tokens, 0)
      header_id = ' '.join(tokens[span[1]:])
      header_id = header_id.lower()

      # header_id = header_text

      existing = headers_collection.find_one({'text': header_id})
      if existing:
        existing['count'] += 1
        headers_collection.update({'_id': existing['_id']}, existing, True)
        pass
      else:
        header = {
          # '_id': header_id,
          'doc_id': doc['_id'],
          'text': header_id,
          'len': len(header_text),
          'has_newlines': header_text.count('\n'),
          'count': 1
        }
        headers_collection.insert_one(header)
      # headers_collection.update_one({'_id': header_id}, header, True)

  """
Exemplo n.º 19
0
def update_db_dictionaries():
    db = get_mongodb_connection()

    coll = db["subsidiaries"]
    coll.delete_many({})
    coll.insert_many(subsidiaries)

    coll = db["orgStructuralLevel"]
    coll.delete_many({})
    coll.insert_many(OrgStructuralLevel.as_db_json())

    coll = db["legalEntityTypes"]
    coll.delete_many({})
    coll.insert_many(legal_entity_types_as_db_json())

    coll = db["contractSubjects"]
    coll.delete_many({})
    coll.insert_many(contract_subject_as_db_json())

    coll = db["analyser"]
    coll.delete_many({})
    coll.insert_one({'version': analyser.__version__})

    # indexing
    print('creating db indices')
    coll = db["documents"]

    resp = coll.create_index([("analysis.analyze_timestamp", DESCENDING)])
    print("index response:", resp)
    resp = coll.create_index([("user.updateDate", DESCENDING)])
    print("index response:", resp)
    resp = coll.create_index([("analysis.attributes.date.value", DESCENDING)])
    print("index response:", resp)

    coll = db["documents"]
    sorting = [('analysis.analyze_timestamp', ASCENDING),
               ('user.updateDate', ASCENDING)]
    resp = coll.create_index(sorting)
    print("index response:", resp)
Exemplo n.º 20
0
def convert_all_docs():
    ids = get_legacy_docs_ids()
    if should_i_migrate(ids):

        db = get_mongodb_connection()
        documents_collection = db['documents']

        for id in ids:
            doc = documents_collection.find_one({"_id": id},
                                                projection={
                                                    '_id': True,
                                                    'analysis.attributes':
                                                    True,
                                                    'user.attributes': True,
                                                    'parse.documentType': True
                                                })

            convert_one(db, doc)

        migration_logger.info(f"converted {len(ids)} documents")
    else:
        print('Skipping migration. Re-run when you change your mind.')
Exemplo n.º 21
0
class TestRunner(unittest.TestCase):
    default_no_tf_instance: Runner = None

    @unittest.skipIf(get_mongodb_connection() is None, "requires mongo")
    def test_get_audits(self):
        aa = get_audits()
        for a in aa:
            print(a['_id'])

    @unittest.skipIf(get_mongodb_connection() is None, "requires mongo")
    def test_get_docs_by_audit_id(self):
        audits = get_audits()
        if len(audits) == 0:
            logger.warning('no audits')
            return

        audit_id = audits[0]['_id']

        docs = get_docs_by_audit_id(audit_id, kind='PROTOCOL')
        for a in docs:
            print(a['_id'], a['filename'])

    def _get_doc_from_db(self, kind):
        audits = get_mongodb_connection()['audits'].find().sort([
            ("createDate", pymongo.ASCENDING)
        ]).limit(1)
        for audit in audits:
            doc_ids = get_docs_by_audit_id(audit['_id'],
                                           kind=kind,
                                           states=[15],
                                           id_only=True)
            if len(doc_ids) > 0:
                print(doc_ids[0])
                doc = finalizer.get_doc_by_id(doc_ids[0])
                # jdoc = DbJsonDoc(doc)
                yield doc

    def _preprocess_single_doc(self, kind):
        for doc in self._get_doc_from_db(kind):
            d = DbJsonDoc(doc)
            processor = document_processors.get(kind)
            processor.preprocess(d, AuditContext())

    # @unittest.skipIf(SKIP_TF, "requires TF")

    @unittest.skipIf(get_mongodb_connection() is None, "requires mongo")
    def test_preprocess_single_protocol(self):
        self._preprocess_single_doc('PROTOCOL')

    @unittest.skipIf(get_mongodb_connection() is None is None,
                     "requires mongo")
    def test_preprocess_single_contract(self):
        self._preprocess_single_doc('CONTRACT')

    @unittest.skipIf(get_mongodb_connection() is None, "requires mongo")
    def test_process_contracts_phase_1(self):
        # runner = Runner.get_instance()

        audits = get_audits()
        if len(audits) == 0:
            logger.warning('no audits')
            return

        audit_id = audits[0]['_id']

        docs = get_docs_by_audit_id(audit_id, kind='CONTRACT')
        processor = document_processors.get('CONTRACT')
        for _doc in docs:
            jdoc = DbJsonDoc(_doc)
            processor.preprocess(jdoc, AuditContext())

    @unittest.skipIf(get_mongodb_connection() is None, "requires mongo")
    def test_process_charters_phase_1(self):
        audits = get_audits()
        if len(audits) == 0:
            logger.warning('no audits')
            return

        audit_id = audits[0]['_id']
        docs: [dict] = get_docs_by_audit_id(audit_id, kind='CHARTER')
        processor = document_processors.get('CHARTER')
        for _doc in docs:
            jdoc = DbJsonDoc(_doc)
            processor.preprocess(jdoc, AuditContext())

    @unittest.skipIf(get_mongodb_connection() is None, "requires mongo")
    def test_process_protocols_phase_1(self):
        runner = get_runner_instance_no_embedder()

        for audit in get_audits():
            audit_id = audit['_id']
            docs = get_docs_by_audit_id(audit_id, kind='PROTOCOL')

            for doc in docs:
                # charter = runner.make_legal_doc(doc)

                jdoc = DbJsonDoc(doc)
                legal_doc = jdoc.asLegalDoc()

                runner.protocol_parser.find_org_date_number(
                    legal_doc, AuditContext())
                save_analysis(jdoc, legal_doc, -1)

    # if get_mongodb_connection() is not None:
    unittest.main(argv=['-e utf-8'], verbosity=3, exit=False)
Exemplo n.º 22
0
from analyser.log import logger
from analyser.parsing import AuditContext
from analyser.persistence import DbJsonDoc
from analyser.runner import Runner, get_audits, get_docs_by_audit_id, document_processors, save_analysis
from integration.db import get_mongodb_connection

SKIP_TF = True


def get_runner_instance_no_embedder() -> Runner:
    if TestRunner.default_no_tf_instance is None:
        TestRunner.default_no_tf_instance = Runner(init_embedder=False)
    return TestRunner.default_no_tf_instance


@unittest.skipIf(get_mongodb_connection() is None, "requires mongo")
class TestRunner(unittest.TestCase):
    default_no_tf_instance: Runner = None

    @unittest.skipIf(get_mongodb_connection() is None, "requires mongo")
    def test_get_audits(self):
        aa = get_audits()
        for a in aa:
            print(a['_id'])

    @unittest.skipIf(get_mongodb_connection() is None, "requires mongo")
    def test_get_docs_by_audit_id(self):
        audits = get_audits()
        if len(audits) == 0:
            logger.warning('no audits')
            return
Exemplo n.º 23
0
def _test_protocol():
    db = get_mongodb_connection()

    doc = get_doc_by_id(ObjectId('5df7a66b200a3f4d0fad786f'))  # protocol
    convert_one(db, doc)
Exemplo n.º 24
0
def change_audit_status(audit, status):
    db = get_mongodb_connection()
    db["audits"].update_one({'_id': audit["_id"]},
                            {"$set": {
                                "status": status
                            }})
Exemplo n.º 25
0
def change_contract_primary_subject(contract, new_subject):
    db = get_mongodb_connection()
    db['documents'].update_one({'_id': contract['_id']},
                               {'$set': {
                                   'primary_subject': new_subject
                               }})
Exemplo n.º 26
0
            body_features = line_features(bodymap, line_span, ln,
                                          _prev_features)
            body_features['actual'] = 0
            _features.append(body_features)
            _prev_features = body_features.copy()
            ln += 1

    return _features


if __name__ == '__main__':

    features_dicts = []
    count = 0

    db = get_mongodb_connection()
    criterion = {'version': WordDocParser.version}

    res = db['legaldocs'].find(criterion)

    for resp in res:
        for d in resp['documents']:
            doctype = d['documentType']
            if doctype in ('CONTRACT', 'PROTOCOL', 'CHARTER'):
                print(resp['_id'])
                legal_doc = join_paragraphs(d, resp['_id'])
                _doc_features = doc_line_features(legal_doc)
                features_dicts += _doc_features

                count += 1
Exemplo n.º 27
0
def get_doc_by_id(doc_id: ObjectId):
    db = get_mongodb_connection()
    documents_collection = db['documents']
    return documents_collection.find_one({'_id': doc_id})
Exemplo n.º 28
0
def get_audit_by_id(aid: ObjectId):
    db = get_mongodb_connection()
    return db['audits'].find_one({'_id': aid})
Exemplo n.º 29
0
class AnalyzerTestCase(unittest.TestCase):
    @unittest.skip
    def test_analyse_acontract(self):

        doc = get_doc_by_id(ObjectId('5fdb213f542ce403c92b4530'))
        # _db_client = MongoClient(f'mongodb://192.168.10.36:27017/')
        # _db_client.server_info()

        # db = _db_client['gpn']

        # documents_collection = db['documents']

        # doc = documents_collection.find_one({"_id": ObjectId('5fdb213f542ce403c92b4530')} )
        # audit = db['audits'].find_one({'_id': doc['auditId']})
        audit = get_audit_by_id(doc['auditId'])
        jdoc = DbJsonDoc(doc)
        logger.info(f'......pre-processing {jdoc._id}')
        _audit_subsidiary: str = audit["subsidiary"]["name"]

        ctx = AuditContext(_audit_subsidiary)
        processor: BaseProcessor = document_processors[CONTRACT]
        processor.preprocess(jdoc, context=ctx)
        processor.process(jdoc, audit, ctx)
        print(jdoc)

    @unittest.skipIf(get_mongodb_connection() is None, "requires mongo")
    def test_analyze_contract(self):
        processor: BaseProcessor = document_processors[CONTRACT]
        doc = get_doc_by_id(ObjectId('5ded004e4ddc27bcf92dd47c'))
        if doc is None:
            raise RuntimeError("fix unit test please")

        audit = get_audit_by_id(doc['auditId'])

        jdoc = DbJsonDoc(doc)
        logger.info(f'......pre-processing {jdoc._id}')
        ctx = AuditContext()
        processor.preprocess(jdoc, context=ctx)
        processor.process(jdoc, audit, ctx)

    @unittest.skipIf(get_mongodb_connection() is None, "requires mongo")
    def test_analyze_protocol(self):
        processor: BaseProcessor = document_processors[PROTOCOL]
        doc = get_doc_by_id(ObjectId('5e5de70b01c6c73c19eebd35'))
        if doc is None:
            raise RuntimeError("fix unit test please")

        audit = get_audit_by_id(doc['auditId'])

        jdoc = DbJsonDoc(doc)
        logger.info(f'......pre-processing {jdoc._id}')
        ctx = AuditContext()
        processor.preprocess(jdoc, context=ctx)
        processor.process(jdoc, audit, ctx)

    @unittest.skipIf(get_mongodb_connection() is None, "requires mongo")
    def test_analyze_charter(self):
        processor: BaseProcessor = document_processors[CHARTER]
        doc = get_doc_by_id(ObjectId('5e5de70d01c6c73c19eebd48'))
        if doc is None:
            raise RuntimeError("fix unit test please")

        audit = get_audit_by_id(doc['auditId'])

        jdoc = DbJsonDoc(doc)
        logger.info(f'......pre-processing {jdoc._id}')
        ctx = AuditContext()
        processor.preprocess(jdoc, context=ctx)
        processor.process(jdoc, audit, ctx)
Exemplo n.º 30
0
def change_doc_state(doc, state):
    db = get_mongodb_connection()
    db['documents'].update_one({'_id': doc.get_id()},
                               {"$set": {
                                   "state": state
                               }})