Пример #1
0
def work_remote_id_change_type(**kwargs):
    from toll_booth.alg_obj.forge.extractors.credible_fe import CredibleFrontEndDriver
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem

    driving_identifier_stem = IdentifierStem.from_raw(
        kwargs['driving_identifier_stem'])
    identifier_stem = IdentifierStem.from_raw(kwargs['identifier_stem'])
    changelog_types = kwargs['changelog_types']
    change_category = changelog_types.categories[kwargs['category_id']]
    with CredibleFrontEndDriver(
            driving_identifier_stem.get('id_source')) as driver:
        extraction_args = {
            'driving_id_type': driving_identifier_stem.get('id_type'),
            'driving_id_name': driving_identifier_stem.get('id_name'),
            'driving_id_value': kwargs['id_value'],
            'local_max_value': kwargs['local_max_value'],
            'category_id': change_category.category_id,
            'driving_identifier_stem': driving_identifier_stem,
            'identifier_stem': identifier_stem,
            'category': change_category
        }
        remote_changes = driver.get_change_logs(**extraction_args)
        sorted_changes = {}
        for remote_change in remote_changes:
            change_action = remote_change['Action']
            if change_action not in sorted_changes:
                sorted_changes[change_action] = []
            sorted_changes[change_action].append(remote_change)
        return {'change_actions': sorted_changes}
Пример #2
0
 def creep(self):
     with CredibleFrontEndDriver(self._id_source) as driver:
         for entry in self._leech_driver.get_propagated_vertexes(
                 self._propagation_id):
             if not self._driving_identifier_stem:
                 self._driving_identifier_stem = IdentifierStem.from_raw(
                     entry['driving_identifier_stem'])
             if not self._extracted_identifier_stem:
                 self._extracted_identifier_stem = IdentifierStem.from_raw(
                     entry['extracted_identifier_stem'])
             if not self._propagation_identifier_stem:
                 self._propagation_identifier_stem = IdentifierStem.from_raw(
                     entry['identifier_stem'])
             try:
                 self._creep(
                     entry,
                     identifier_stem=self._extracted_identifier_stem,
                     driving_identifier_stem=self._driving_identifier_stem,
                     context=self._context,
                     driver=driver)
             except InsufficientOperationTimeException:
                 return False
     return {
         'propagation_id': self._propagation_id,
         'id_source': self._id_source
     }
Пример #3
0
def graph_links(**kwargs):
    from toll_booth.alg_obj.graph.ogm.ogm import Ogm
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem
    from toll_booth.alg_obj.graph import InternalId
    from toll_booth.alg_obj.graph.ogm.regulators import PotentialVertex

    edges = []
    driving_identifier_stem = IdentifierStem.from_raw(
        kwargs['identifier_stem'])
    id_source = driving_identifier_stem.get('id_source')
    internal_id = InternalId(''.join(['IdSource', id_source])).id_value
    identifier_stem = IdentifierStem('vertex', 'IdSource',
                                     {'id_source': id_source})
    potential_vertex = PotentialVertex('IdSource', internal_id,
                                       {'id_source': id_source},
                                       identifier_stem, id_source, 'id_source')

    vertexes = [potential_vertex]

    link_histories = kwargs.get('new_link_histories', [])
    new_links = kwargs.get('new_links', [])
    new_unlinks = kwargs.get('new_unlinks', [])
    for entry in link_histories:
        vertexes.append(entry.potential_vertex)
        edges.append(entry.generate_edge(entry.most_recent_link))
    edges.extend(x[0].generate_edge(x[1]) for x in new_links)
    edges.extend(x[0].generate_edge(x[1]) for x in new_unlinks)
    ogm = Ogm(**kwargs)
    ogm.graph_objects(vertexes, edges)
def correct():
    corrected = []
    admin_file_name = os.path.dirname(__file__)
    admin_directory = os.path.dirname(admin_file_name)
    schema_file_path = os.path.join(admin_directory, 'starters',
                                    'change_types.json')
    schema = {}
    with open(schema_file_path) as test:
        test_schema = json.load(test)
        for entry in test_schema:
            schema[entry['change_action']] = entry
    soup = bs4.BeautifulSoup(test_html, features='html.parser')
    options = soup.find_all('option')

    for option in options:
        change_id = option.attrs.get('value')
        change_action = option.text
        if not change_id or not change_action:
            continue
        action_id = int(change_id)
        schema_entry = schema.get(change_action)
        if not schema_entry:
            continue
        schema_entry['action_id'] = action_id
        identifier_stem = IdentifierStem.from_raw(
            schema_entry['identifier_stem'])
        pairs = identifier_stem.paired_identifiers
        pairs['action_id'] = action_id
        corrected_stem = IdentifierStem('vertex', 'ChangeLogType', pairs)
        sid_value = corrected_stem.for_dynamo
        schema_entry['identifier_stem'] = str(corrected_stem)
        schema_entry['sid_value'] = sid_value
        corrected.append(schema_entry)
    string_corrected = json.dumps(corrected)
    print(string_corrected)
Пример #5
0
def specified_identifier_stem(request):
    params = request.param
    identifiers = {
        'id_source': params[0],
        'id_type': params[2],
        'id_name': params[3]
    }
    identifier_stem = IdentifierStem('vertex', params[1],
                                     {'id_source': params[0]})
    driving_stem = IdentifierStem('vertex', 'ExternalId', identifiers)
    specified_stem = identifier_stem.specify(driving_stem, params[4])
    return specified_stem
Пример #6
0
def propagated_identifier_stem(request):
    params = request.param
    source_stem = IdentifierStem('vertex', params[0], {'id_source': params[1]})
    driving_stem = IdentifierStem('vertex', 'ExternalId', {
        'id_source': params[1],
        'id_type': params[2],
        'id_name': params[3]
    })
    return {
        'identifier_stem': source_stem,
        'driving_identifier_stem': driving_stem
    }
Пример #7
0
 def _populate_common_fields(self, entry):
     if not self._extracted_identifier_stem:
         self._extracted_identifier_stem = IdentifierStem.from_raw(
             entry['extracted_identifier_stem'])
     if not self._driving_identifier_stem:
         self._driving_identifier_stem = IdentifierStem.from_raw(
             entry['driving_identifier_stem'])
     if not self._schema_entry:
         self._schema_entry = SchemaVertexEntry.retrieve(
             self._extracted_identifier_stem.object_type)
     if not self._mapping:
         self._mapping = self._generate_mapping()
     if not self._local_max_value:
         self._local_max_value = entry['local_max_value']
     return
Пример #8
0
 def _creep(self, entry, **kwargs):
     driving_identifier_stem = kwargs['driving_identifier_stem']
     driver = kwargs['driver']
     identifier_stem = IdentifierStem.from_raw(entry['identifier_stem'])
     id_value = entry['driving_id_value']
     category = identifier_stem.get('category')
     change_category = self._change_types.get_category_by_name(category)
     logging.info(
         f'started the extraction for id_value: {id_value}, change_category_id: {change_category.category_id}'
     )
     local_max_value = self._get_local_max_value(id_value, change_category)
     extraction_args = {
         'driving_id_type': driving_identifier_stem.get('id_type'),
         'driving_id_name': driving_identifier_stem.get('id_name'),
         'driving_id_value': id_value,
         'local_max_value': local_max_value,
         'category_id': change_category.category_id,
         'driving_identifier_stem': driving_identifier_stem,
         'identifier_stem': identifier_stem,
         'category': change_category
     }
     remote_changes = driver.get_change_logs(**extraction_args)
     logging.info(
         f'completed the extraction for id_value: {id_value}, change_category_id: {change_category.category_id}'
     )
     self._mark_creep_vertexes(remote_changes, **extraction_args)
Пример #9
0
 def __init__(self, identifier_stem, driving_identifier_stem, **kwargs):
     identifier_stem = IdentifierStem.from_raw(identifier_stem)
     driving_identifier_stem = IdentifierStem.from_raw(
         driving_identifier_stem)
     self._spore_id = uuid.uuid4().hex
     self._identifier_stem = identifier_stem
     self._driving_identifier_stem = driving_identifier_stem
     self._leech_driver = LeechDriver(table_name='VdGraphObjects')
     self._extractor_setup = self._leech_driver.get_extractor_setup(
         driving_identifier_stem)
     self._schema_entry = SchemaVertexEntry.retrieve(
         driving_identifier_stem.object_type)
     self._sample_size = kwargs.get('sample_size', 1000)
     self._extraction_profile = self._generate_extraction_profile()
     self._driving_vertex_regulator = VertexRegulator.get_for_object_type(
         driving_identifier_stem.object_type)
Пример #10
0
def get_enrichment_for_change_action(**kwargs):
    from toll_booth.alg_obj.forge.extractors.credible_fe.mule_team import CredibleMuleTeam
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem

    driving_identifier_stem = IdentifierStem.from_raw(
        kwargs['driving_identifier_stem'])
    id_source = driving_identifier_stem.get('id_source')
    changelog_types = kwargs['changelog_types']
    action_id = kwargs['action_id']
    change_action = changelog_types[str(action_id)]
    category_id = changelog_types.get_category_id_from_action_id(
        str(action_id))
    if change_action.is_static and change_action.has_details is False and not change_action.entity_type:
        empty_data = {'change_detail': {}, 'by_emp_ids': {}, 'entity_ids': {}}
        return {'enriched_data': empty_data}
    mule_team = CredibleMuleTeam(id_source)
    enrichment_args = {
        'driving_id_type': driving_identifier_stem.get('id_type'),
        'driving_id_name': driving_identifier_stem.get('id_name'),
        'driving_id_value': kwargs['id_value'],
        'local_max_value': kwargs['local_max_value'],
        'category_id': category_id,
        'action_id': int(action_id),
        'get_details': change_action.has_details is True,
        'get_by_emp_ids': change_action.is_static is False,
        'get_entity_ids': change_action.entity_type,
        'checked_emp_ids': None
    }
    enriched_data = mule_team.enrich_data(**enrichment_args)
    return {'enriched_data': enriched_data}
Пример #11
0
 def for_link_object(self, linked_internal_id, id_source, is_unlink):
     now = self._get_decimal_timestamp()
     base = self._for_update('linking', is_initial=True)
     paired_identifiers = {
         'linked_id_source': id_source,
         'internal_id': linked_internal_id
     }
     base['Key'] = DynamoParameters(now, IdentifierStem('vertex', 'link', paired_identifiers)).as_key
     base['UpdateExpression'] = base['UpdateExpression'] + ', #d=:d, #ids=:ids, #lt=:lt, #iul=:iul, #idv=:lt, #ot=:ot, #li=:li'
     base['ExpressionAttributeNames'].update({
         '#ids': 'linked_id_source',
         '#lt': 'utc_link_time',
         '#iul': 'is_unlink',
         '#d': 'disposition',
         '#idv': 'id_value',
         '#ot': 'object_type',
         '#li': 'linked_internal_id'
     })
     base['ExpressionAttributeValues'].update({
         ':ids': id_source,
         ':lt': now,
         ':iul': is_unlink,
         ':d': 'graphing',
         ':ot': 'link',
         ':li': linked_internal_id
     })
     return base
Пример #12
0
def spike_tables():
    table = boto3.resource('dynamodb').Table('VdGraphObjects')
    sql = '''
        SELECT
            Col.Table_Name, 
            Col.Column_Name            
        FROM 
            INFORMATION_SCHEMA.TABLE_CONSTRAINTS Tab, 
            INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE Col 
        WHERE 
            Col.Constraint_Name = Tab.Constraint_Name
            AND Col.Table_Name = Tab.Table_Name
            AND Constraint_Type = 'PRIMARY KEY'       
    '''
    credible_report = CredibleReport.from_sql('MBI', sql)
    with table.batch_writer() as writer:
        for table_name, entry in credible_report.items():
            try:
                column_names = [x['Column_Name'] for x in entry]
            except TypeError:
                column_names = [entry['Column_Name']]
            pairs = {'table_name': table_name}
            identifier_stem = IdentifierStem('vertex', 'CredibleTable', pairs)
            new_item = {
                'sid_value': identifier_stem.for_dynamo,
                'identifier_stem': str(identifier_stem),
                'table_name': table_name,
                'column_names': column_names
            }
            try:
                writer.put_item(Item=new_item)
            except ClientError as e:
                print(e)
Пример #13
0
 def _extract_change_logs(cls, driver, id_value, local_max_values,
                          **kwargs):
     change_logs = []
     identifier_stem = kwargs['identifier_stem']
     driving_stem = IdentifierStem.from_raw(
         identifier_stem.get('identifier_stem'))
     driving_id_type = driving_stem.get('id_type')
     driving_id_name = driving_stem.get('id_name')
     id_source = kwargs['id_source']
     mapping = kwargs['mapping']
     id_source_mapping = mapping.get(id_source, mapping['default'])
     object_mapping = id_source_mapping[driving_id_type]
     for category_id, local_max_value in local_max_values.items():
         extraction_args = {
             'driving_id_type': driving_id_type,
             'driving_id_name': driving_id_name,
             'driving_id_value': id_value,
             'category_id': category_id,
             'local_max_value': local_max_value
         }
         source_extraction = driver.get_change_logs(**extraction_args)
         # change_detail_extraction = driver.get_change_details(**extraction_args)
         emp_ids = driver.get_emp_ids(**extraction_args)
         # for change_date, entry in source_extraction.items():
         #   entry['User'] = emp_ids[change_date]
         formatted_extraction = cls._format_change_log_data(
             identifier_stem,
             source_extraction,
             object_mapping=object_mapping,
             driver=driver)
         change_logs.extend(formatted_extraction)
Пример #14
0
def vd_identifier_stem(request):
    params = request.param
    identifier_stem = IdentifierStem('vertex', params[0], {
        'id_source': 'Algernon',
        'id_type': params[1],
        'id_name': params[2]
    })
    return identifier_stem
Пример #15
0
 def get_credible_id_name(self, id_type):
     table_identifier_stem = IdentifierStem('vertex', 'CredibleTable', {'table_name': id_type})
     results = self._table.get_item(
         Key={'identifier_stem': str(table_identifier_stem), 'sid_value': table_identifier_stem.for_dynamo})
     try:
         return results['Item']['column_names'][0]
     except KeyError:
         raise MissingObjectException
Пример #16
0
def pull_schema_entry(**kwargs):
    from toll_booth.alg_obj.graph.schemata.schema import Schema
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem

    identifier_stem = IdentifierStem.from_raw(kwargs['identifier_stem'])
    schema = Schema.retrieve(**kwargs)
    schema_entry = schema[identifier_stem.object_type]
    return {'schema_entry': schema_entry, 'schema': schema}
Пример #17
0
 def _calculate_change_log_identifier_stem(cls, extracted_data):
     pairs = {
         'id_source': extracted_data['source']['id_source'],
         'id_type': extracted_data['source']['id_type'],
         'id_name': extracted_data['source']['id_name']
     }
     identifier_stem = IdentifierStem('vertex', 'ChangeLog', pairs)
     return identifier_stem
Пример #18
0
 def __init__(self, monitor_order, **kwargs):
     identifier_stem = IdentifierStem.from_raw(
         monitor_order.identifier_stem)
     self._identifier_stem = identifier_stem
     self._id_source = monitor_order.id_source
     self._leech_driver = LeechDriver(**kwargs)
     self._local_setup = self._leech_driver.get_field_value_setup(
         self._identifier_stem)
     self._sample_size = kwargs.get('sample_size', 1000)
Пример #19
0
def get_local_ids(**kwargs):
    from toll_booth.alg_obj.graph.index_manager.index_manager import IndexManager
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem

    driving_identifier_stem = kwargs['driving_identifier_stem']
    driving_identifier_stem = IdentifierStem.from_raw(driving_identifier_stem)
    index_driver = IndexManager.from_graph_schema(kwargs['schema'], **kwargs)
    local_id_values = index_driver.get_local_id_values(driving_identifier_stem)
    return {'local_id_values': local_id_values}
Пример #20
0
def _calculate_change_log_identifier_stem(extracted_data):
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem

    pairs = {
        'id_source': extracted_data['source']['id_source'],
        'id_type': extracted_data['source']['id_type'],
        'id_name': extracted_data['source']['id_name']
    }
    identifier_stem = IdentifierStem('vertex', 'ChangeLog', pairs)
    return identifier_stem
Пример #21
0
def monitored_object_identifier_stem(request):
    params = request.param
    paired_identifiers = OrderedDict()
    paired_identifiers['id_source'] = params[1]
    paired_identifiers['id_type'] = params[2]
    paired_identifiers['id_name'] = params[3]
    paired_identifiers['id_value'] = params[4]
    paired_identifiers['data_dict_id'] = params[5]
    identifier_stem = IdentifierStem('vertex', params[0], paired_identifiers)
    return identifier_stem, params[6]
Пример #22
0
def add_data_source_vertex(id_source, **kwargs):
    internal_id = InternalId(''.join(['IdSource', id_source])).id_value
    identifier_stem = IdentifierStem('vertex', 'IdSource',
                                     {'id_source': id_source})
    potential_vertex = PotentialVertex('IdSource', internal_id,
                                       {'id_source': id_source},
                                       identifier_stem, id_source, 'id_source')
    ogm = Ogm(**kwargs)
    results = ogm.graph_objects(vertexes=[potential_vertex])
    return internal_id
Пример #23
0
 def __init__(self, identifier_stem, id_value, **kwargs):
     object_type = kwargs.get('object_type', None)
     if identifier_stem:
         identifier_stem = IdentifierStem.from_raw(identifier_stem)
         object_type = identifier_stem.object_type
     self._identifier_stem = identifier_stem
     self._id_value = id_value
     self._dynamo_parameters = DynamoParameters(identifier_stem, id_value)
     self._object_properties = kwargs.get('object_properties', {})
     self._object_type = object_type
Пример #24
0
 def mark_propagated_vertexes(self, propagation_id, identifier_stem, driving_identifier_stem, driving_id_values, **kwargs):
     driving_identifier_stem = IdentifierStem.from_raw(driving_identifier_stem)
     driving_pairs = driving_identifier_stem.paired_identifiers
     change_types = kwargs['change_types']
     with self._table.batch_writer() as writer:
         for id_value in driving_id_values:
             for change_category in change_types.categories.values():
                 change_pairs = driving_pairs.copy()
                 change_pairs['id_value'] = id_value
                 change_pairs['category'] = str(change_category)
                 change_identifier_stem = IdentifierStem('propagation', identifier_stem.object_type, change_pairs)
                 change = {
                     'identifier_stem': str(change_identifier_stem),
                     'sid_value': str(propagation_id),
                     'propagation_id': str(propagation_id),
                     'driving_identifier_stem': str(driving_identifier_stem),
                     'extracted_identifier_stem': str(identifier_stem),
                     'driving_id_value': id_value
                 }
                 writer.put_item(Item=change)
Пример #25
0
 def test_monitor_extraction(self, specified_identifier_stem):
     schema_entry = SchemaVertexEntry.retrieve(specified_identifier_stem.object_type)
     driving_stem = IdentifierStem.from_raw(specified_identifier_stem.retrieve('identifier_stem'))
     extraction_profile = schema_entry.extract['CredibleFrontEndExtractor'].extraction_properties
     extraction_profile.update(driving_stem.for_extractor)
     extraction_profile.update({
         'identifier_stems': [{'identifier_stem': specified_identifier_stem, 'id_value': None}],
         'id_source': specified_identifier_stem.retrieve('id_source')
     })
     results = CredibleFrontEndExtractor.extract(**extraction_profile)
     print()
Пример #26
0
 def __init__(self, *, identifier_stem, **kwargs):
     identifier_stem = IdentifierStem.from_raw(identifier_stem)
     self._identifier_stem = identifier_stem
     self._object_type = identifier_stem.object_type
     self._schema_entry = SchemaVertexEntry.retrieve(self._object_type)
     self._leech_driver = LeechDriver()
     self._extractor_setup = self._leech_driver.get_extractor_setup(
         identifier_stem)
     self._extraction_profile = self._generate_extraction_profile()
     self._extraction_queue = kwargs.get(
         'extraction_queue', ForgeQueue.get_for_extraction_queue(**kwargs))
     self._sample_size = kwargs.get('sample_size', 1000)
Пример #27
0
 def get_extractor_function_names(self, identifier_stem):
     identifier_stem = IdentifierStem.from_raw(identifier_stem)
     params = DynamoParameters(identifier_stem.for_dynamo, identifier_stem)
     results = self._table.get_item(Key=params.as_key)
     try:
         extractor_function_names = results['Item'][
             'extractor_function_names']
         return extractor_function_names
     except KeyError:
         raise MissingExtractionInformation(
             'could not find extractor information for identifier stem %s' %
             identifier_stem)
Пример #28
0
def generate_remote_id_change_data(**kwargs):
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem

    driving_identifier_stem = IdentifierStem.from_raw(
        kwargs['driving_identifier_stem'])
    remote_change = kwargs['remote_change']
    changelog_types = kwargs['changelog_types']
    action_id = kwargs['action_id']
    change_action = changelog_types[str(action_id)]
    enriched_data = kwargs['enriched_data']
    change_date_utc = remote_change['UTCDate']
    extracted_data = _build_change_log_extracted_data(remote_change,
                                                      kwargs['mapping'])
    id_source = driving_identifier_stem.get('id_source')
    by_emp_id = enriched_data['emp_ids'].get(change_date_utc,
                                             kwargs['id_value'])
    fungal_stem = FungalStem.from_identifier_stem(driving_identifier_stem,
                                                  kwargs['id_value'],
                                                  change_action.category)
    source_data = {
        'change_date_utc': extracted_data['change_date_utc'],
        'change_description': extracted_data['change_description'],
        'change_date': extracted_data['change_date'],
        'fungal_stem': str(fungal_stem),
        'action': extracted_data['action'],
        'action_id': str(action_id),
        'id_source': id_source,
        'id_type': 'ChangeLog',
        'id_name': 'change_date_utc',
        'by_emp_id': by_emp_id
    }
    returned_data = {
        'source':
        source_data,
        'by_emp_id_target': [{
            'id_source': id_source,
            'id_type': 'Employees',
            'id_value': by_emp_id
        }],
        'change_target': [],
        'changed_target': []
    }
    changed_targets = _build_changed_targets(id_source, extracted_data,
                                             change_action)
    if changed_targets:
        returned_data['changed_target'].extend(changed_targets)
    change_details = enriched_data.get('change_detail', {})
    change_detail_target = change_details.get(change_date_utc, None)
    if change_detail_target is not None:
        returned_data['change_target'].extend(change_detail_target)
    return {'change_data': returned_data}
Пример #29
0
 def _derive_value_field_stems(self):
     stems = []
     paired_identifiers = self._identifier_stem.paired_identifiers
     field_values = self._local_setup['field_values']
     for field_value in field_values:
         field_names = ['id_source', 'id_type', 'id_name']
         named_fields = OrderedDict()
         for field_name in field_names:
             named_fields[field_name] = paired_identifiers[field_name]
         named_fields['data_dict_id'] = field_value
         field_identifier_stem = IdentifierStem('vertex', 'FieldValue',
                                                named_fields)
         stems.append(field_identifier_stem)
     return stems
Пример #30
0
def fungus(execution_id, **kwargs):
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem

    subtask_name = 'command_fungi'
    decisions = kwargs['decisions']
    subtask_identifier = f'f-{execution_id}'
    task_args = kwargs['task_args']
    identifier_stem = IdentifierStem.from_raw(
        "#vertex#ChangeLog#{\"id_source\": \"MBI\"}#")
    driving_identifier_stem = IdentifierStem.from_raw(
        "#vertex#ExternalId#{\"id_source\": \"MBI\", \"id_type\": \"Employees\", \"id_name\": \"emp_id\"}#"
    )
    task_args.add_argument_value(
        subtask_name, {
            'identifier_stem': identifier_stem,
            'driving_identifier_stem': driving_identifier_stem
        })
    fungal_signature = SubtaskSignature(subtask_identifier, subtask_name,
                                        **kwargs)
    results = fungal_signature(**kwargs)
    if not results:
        return
    decisions.append(CompleteWork())