示例#1
0
def query_database(query_type: str, query: str, database: str) -> (bool, dict):
    """
    Process a SELECT or UPDATE query

    :param query_type: A string = 'select' or 'update'
    :param query: The text of a SPARQL query
    :param database: The database (name) to be queried
    :return: True if successful; False otherwise
             Query results (if the query_type is 'select'); An empty dictionary otherwise
    """
    logging.info(
        f'Querying database, {database}, using {query_type}, with query, {query}'
    )
    if query_type != 'select' and query_type != 'update':
        capture_error(f'Invalid query_type {query_type} for query_db', True)
        return False, dict()
    try:
        conn = stardog.Connection(database, **sd_conn_details)
        if query_type == 'select':
            # Select query, which will return results, if successful
            query_results = conn.select(
                query, content_type='application/sparql-results+json')
            if query_results['results']['bindings']:
                return True, query_results
            else:
                return True, dict()
        else:
            # Update query; No results (either success or failure)
            conn.update(query)
            return True, dict()
    except Exception as e:
        capture_error(
            f'Database ({database}) query exception for {query}: {str(e)}',
            True)
        return False, dict()
示例#2
0
def display_similarities(store_name: str):
    """
    Display a window to show 'similar' narratives.

    :param store_name: The database/data store name holding the narratives
    :return: TBD
    """
    logging.info(f'Displaying similarities in {store_name}')
    # Setup the PySimpleGUI window
    sg.theme('Material2')
    layout = [[sg.Text("Not yet implemented.", font=('Arial', 16))],
              [
                  sg.Text("To exit, press 'End' or close the window.",
                          font=('Arial', 16))
              ]]

    # Create the GUI Window
    try:
        success, narrative_results = query_database('select',
                                                    query_narrative_text,
                                                    store_name)
        number_narratives = 0
        if 'results' in narrative_results.keys() and \
                'bindings' in narrative_results['results'].keys():
            number_narratives = len(narrative_results['results']['bindings'])
        if not number_narratives:
            sg.popup_error(
                f'No narrators are defined in {store_name}. '
                f'Similarities graph cannot be displayed.',
                font=('Arial', 14),
                button_color='dark blue',
                icon=encoded_logo)
    except Exception as e:
        capture_error(
            f'Exception getting narratives for similarity analysis from {store_name}: {str(e)}',
            True)
        return
    window_similarities_list = sg.Window('Narrative Similarities',
                                         layout,
                                         icon=encoded_logo).Finalize()

    # Event Loop to process window "events"
    while True:
        event_similarities_list, values = window_similarities_list.read()
        if event_similarities_list in (sg.WIN_CLOSED, 'End'):
            # If user closes window or clicks 'End'
            break
        # TODO

    # Done
    window_similarities_list.close()
    return
示例#3
0
def display_hypotheses(store_name: str):
    """
    Display a window to show currently defined hypotheses, and allow selection of one
    for display and possibly edit, or permit definition of a new hypothesis.

    :param store_name: The database/data store name holding the hypotheses
    :return: TBD
    """
    logging.info(f'Displaying hypotheses in {store_name}')
    # Setup the PySimpleGUI window
    sg.theme('Material2')
    layout = [[sg.Text("Not yet implemented.", font=('Arial', 16))],
              [
                  sg.Text("To exit, press 'End' or close the window.",
                          font=('Arial', 16))
              ], [sg.Text()],
              [
                  sg.Button('End',
                            button_color='dark blue',
                            size=(5, 1),
                            font=('Arial', 14))
              ]]

    # Get the data for the window
    try:
        success, hypotheses_results = query_database('select',
                                                     query_hypotheses,
                                                     store_name)
        number_hypotheses = 0
        if success and 'results' in hypotheses_results.keys() and \
                'bindings' in hypotheses_results['results'].keys():
            number_hypotheses = len(hypotheses_results['results']['bindings'])
    except Exception as e:
        capture_error(
            f'Exception getting hypotheses details from {store_name}: {str(e)}',
            True)
        return
    window_hypotheses_list = sg.Window('Display Hypotheses',
                                       layout,
                                       icon=encoded_logo).Finalize()

    # Event Loop to process window "events"
    while True:
        event_hypotheses_list, values = window_hypotheses_list.read()
        if event_hypotheses_list in (sg.WIN_CLOSED, 'End'):
            # If user closes window or clicks 'End'
            break

    # Done
    window_hypotheses_list.close()
    return
示例#4
0
def _load_directory_to_database(directory_name, conn):
    """
    Loads the DNA files to a new database/data store.

    :param directory_name: String holding the directory name
    :param conn: The connection to the Stardog DB for the database
    :return: None
    """
    try:
        list_files = os.listdir(directory_name)
        for file in list_files:
            if file.endswith('.ttl'):
                conn.add(stardog.content.File(f'{directory_name}{file}'))
    except Exception as e:
        capture_error(
            f'Exception loading ontologies from {directory_name}: {str(e)}',
            True)
示例#5
0
def get_databases() -> list:
    """
    Return a list of all the databases/stores of narratives

    :return: List of database/store names
    """
    logging.info('Getting a list of all databases')
    try:
        admin = stardog.Admin(**sd_conn_details)
        databases = admin.databases()
        db_names = list()
    except Exception as e:
        capture_error(f'Exception getting list of stores: {str(e)}', True)
        return []
    for database in databases:
        db_names.append(database.name)
    return db_names
示例#6
0
def add_remove_data(op_type: str,
                    triples: str,
                    database: str,
                    graph: str = '') -> bool:
    """
    Add or remove data to/from the database/store

    :param op_type: A string = 'add' or 'remove'
    :param triples: A string with the triples to be inserted/removed
    :param database: The database name
    :param graph: An optional named graph in which to insert/remove the triples
    :return: True if successful; False otherwise
    """
    logging.info(
        f'Data {"added to" if op_type == "add" else "removed from"} {database}'
        f'{" and graph, " if graph else ""}{graph}')
    if op_type != 'add' and op_type != 'remove':
        capture_error(f'Invalid op_type {op_type} for add_remove_graph', True)
        return False
    try:
        conn = stardog.Connection(database, **sd_conn_details)
        conn.begin()
        if op_type == 'add':
            # Add to the database
            if graph:
                conn.add(stardog.content.Raw(triples, 'text/turtle'),
                         graph_uri=graph)
            else:
                conn.add(stardog.content.Raw(triples, 'text/turtle'))
        else:
            # Remove from the database
            if graph:
                conn.remove(stardog.content.Raw(triples, 'text/turtle'),
                            graph_uri=graph)
            else:
                conn.remove(stardog.content.Raw(triples, 'text/turtle'))
        conn.commit()
        return True
    except Exception as e:
        capture_error(f'Database ({op_type}) exception: {str(e)}', True)
        return False
def add_narr_data_to_store(narrative: str, narr_metadata: dict, store_name: str):
    """
    Add narrative text and meta information to generate summary statistics and for later use
    in analyses.

    :param narrative: String consisting of the full narrative text
    :param narr_metadata: Dictionary of metadata information - Keys are:
                          Source,Title,Person,Given,Surname,Maiden,Gender,Start,End,Remove,Header,Footer
    :param store_name: The database/data store name
    :return: None (Specified database/store is updated with the narrative text and metadata,
             translated into RDF)
    """
    # Construct the narrator's/subject's identifier
    if narr_metadata['Maiden'] and narr_metadata['Surname']:
        narrator = f'{narr_metadata["Given"]} {narr_metadata["Maiden"]} {narr_metadata["Surname"]}'
    elif narr_metadata['Surname']:
        narrator = f'{narr_metadata["Given"]} {narr_metadata["Surname"]}'
    else:
        narrator = f'{narr_metadata["Given"]}'
    # Create the reference to the doc in the db store
    title = narr_metadata["Title"]
    iri_narrator = narrator.replace(SPACE, EMPTY_STRING)
    # Create triples describing the narrative and narrator/subject
    triples_list = list()
    triples_list.append(f'@prefix : <urn:ontoinsights:dna:> .')
    triples_list.append(f':{title} a :Narrative ; rdfs:label "{title}" ; '
                        f':text "{narrative}" ; :subject :{iri_narrator} .')
    triples_list.append(f':{iri_narrator} a :Person ; rdfs:label "{get_narrator_names(narr_metadata)}" .')
    if narr_metadata['Gender'] != 'U':
        triples_list.append(f':{iri_narrator} :has_agent_aspect {gender_dict[narr_metadata["Gender"]]} .')
    # Get additional information - the subject's birth date and place
    new_triples = get_birth_family_triples(narrative, narr_metadata['Given'], iri_narrator)
    if new_triples:
        triples_list.extend(new_triples)
    # Add the triples to the data store
    try:
        add_remove_data('add', ' '.join(triples_list), store_name)
    except Exception as e:
        capture_error(f'Exception adding narrative ({narr_metadata["Title"]}) triples to store: {str(e)}', True)
示例#8
0
def create_delete_database(op_type: str, database: str) -> str:
    """
    Create or delete a database. If created, add the DNA ontologies.

    :param op_type: A string = 'create' or 'delete'
    :param database: The database name
    :return: Empty string if successful or the details of an exception
    """
    logging.info(f'Database {database} being {op_type}d')
    if op_type != 'create' and op_type != 'delete':
        capture_error(f'Invalid op_type {op_type} for create_delete_db', True)
        return ''
    try:
        admin = stardog.Admin(**sd_conn_details)
        if op_type == 'create':
            # Create database
            admin.new_database(
                database, {
                    'search.enabled': True,
                    'edge.properties': True,
                    'reasoning': True,
                    'reasoning.punning.enabled': True,
                    'query.timeout': '20m'
                })
            # Load ontologies to the newly created database
            conn = stardog.Connection(database, **sd_conn_details)
            conn.begin()
            logging.info(f'Loading DNA ontologies to {database}')
            _load_directory_to_database(ontol_path, conn)
            _load_directory_to_database(f'{ontol_path}domain-context/', conn)
            conn.commit()
        else:
            # Delete database
            database_obj = admin.database(database)
            database_obj.drop()
        return ''
    except Exception as e:
        return f'Database ({op_type}) exception: {str(e)}'
                'test_question'):
     display_popup_help(event)
 # New windows to process narratives
 elif event == 'From Existing Store':
     store_name = select_store()
     if store_name:
         success, count_results = query_database(
             'select', query_number_narratives, store_name)
         if success and 'results' in count_results.keys(
         ) and 'bindings' in count_results['results'].keys():
             count = int(count_results['results']['bindings'][0]['cnt']
                         ['value'])
             window['text-selected'].\
                 update(f'The data store, {store_name}, holds {count} narratives.')
         else:
             capture_error('The query for narrative count failed.',
                           True)
 elif event == 'New, From CSV Metadata':
     store_name, count = ingest_narratives()
     if store_name:
         window['text-selected'].\
             update(f'{count} narratives were added to the data store, {store_name}')
 elif event == 'Summary Statistics':
     if not store_name:
         sg.popup_error(
             "A narrative store must be loaded before selecting 'Summary Statistics'.",
             font=('Arial', 14),
             button_color='dark blue',
             icon=encoded_logo)
     else:
         display_statistics(store_name)
 elif event == 'Narrative Search/Display':
示例#10
0
def display_narratives(store_name):
    """
    Display a list of all narratives in the specified store and allow selection of one.

    :param store_name The database/data store name
    :return: None (Narrative timeline is displayed)
    """
    logging.info('Narrative selection')
    # Create the GUI Window
    narrative_dict = dict()
    try:
        success, narrative_names = query_database('select', query_narratives,
                                                  store_name)
        if success and 'results' in narrative_names.keys() and \
                'bindings' in narrative_names['results'].keys():
            for binding in narrative_names['results']['bindings']:
                narrative_dict[binding['name']['value']] = binding['narrator'][
                    'value'].split(':')[-1]
        else:
            sg.popup_error(
                f'No narratives are defined in {store_name}. '
                f'Narrative timelines cannot be displayed.',
                font=('Arial', 14),
                button_color='dark blue',
                icon=encoded_logo)
    except Exception as e:
        capture_error(
            f'Exception getting narrative names from {store_name}: {str(e)}',
            True)
        return
    if not len(narrative_dict):
        sg.popup_ok(
            'No narratives were found in the store. '
            'Please ingest one or more using the "Load Narratives" button.',
            font=('Arial', 14),
            button_color='dark blue',
            icon=encoded_logo)
        return
    else:
        narrative_list = list(narrative_dict.keys())

    # Setup the PySimpleGUI window
    sg.theme('Material2')
    layout = [
        [
            sg.Text("Select a narrative and then press 'OK'.",
                    font=('Arial', 16))
        ],
        [
            sg.Text(
                "To exit without making a selection, press 'End' or close the window.",
                font=('Arial', 16))
        ],
        [
            sg.Listbox(narrative_list,
                       size=(30, 10),
                       key='narrative_list',
                       font=('Arial', 14),
                       background_color='#fafafa',
                       highlight_background_color='light grey',
                       highlight_text_color='black',
                       text_color='black')
        ], [sg.Text()],
        [
            sg.Button('OK',
                      button_color='dark blue',
                      font=('Arial', 14),
                      size=(5, 1)),
            sg.Button('End',
                      button_color='dark blue',
                      font=('Arial', 14),
                      size=(5, 1))
        ]
    ]

    # Create the GUI Window
    window_narrative_list = sg.Window('Select Narrative',
                                      layout,
                                      icon=encoded_logo).Finalize()

    # Event Loop to process window "events"
    while True:
        event_narrative_list, values = window_narrative_list.read()
        if event_narrative_list in (sg.WIN_CLOSED, 'End'):
            # If user closes window or clicks 'End'
            break
        if event_narrative_list == 'OK':
            if len(values['narrative_list']) != 1:
                sg.popup_error(
                    'Either no narrative was selected, or more than one was selected.',
                    font=('Arial', 14),
                    button_color='dark blue',
                    icon=encoded_logo)
            else:
                narrative_name = values['narrative_list'][0]
                narrative_text = ''
                try:
                    success1, narrative_text_results = query_database(
                        'select',
                        query_narrative_text.replace('narrative_name',
                                                     narrative_name),
                        store_name)
                    if success1 and 'results' in narrative_text_results.keys() and \
                            'bindings' in narrative_text_results['results'].keys():
                        narrative_text = narrative_text_results['results'][
                            'bindings'][0]['text']['value']
                    else:
                        sg.popup_error(
                            f'Error retrieving the text for the narrative, {narrative_name}, '
                            f'from {store_name}. The narrative details cannot be displayed.',
                            font=('Arial', 14),
                            button_color='dark blue',
                            icon=encoded_logo)
                except Exception as e:
                    capture_error(
                        f'Exception getting narrative text for {narrative_name} '
                        f'from {store_name}: {str(e)}', True)
                    return
                if narrative_text:
                    _display_metadata(narrative_name,
                                      narrative_dict[narrative_name],
                                      narrative_text, store_name)
                    _display_timeline(narrative_name, narrative_text)

    # Done
    window_narrative_list.close()
    return
示例#11
0
def _display_metadata(narrative_name: str, narrator: str, narrative_text: str,
                      store_name: str):
    """

    """
    logging.info(f'Displaying metadata and text for {narrative_name}')
    narrator_names = []
    metadata_dict = dict()
    try:
        success1, metadata1_results = query_database(
            'select', query_metadata1.replace("?narrator", f':{narrator}'),
            store_name)
        if success1 and 'results' in metadata1_results.keys() and \
                'bindings' in metadata1_results['results'].keys():
            for binding in metadata1_results['results']['bindings']:
                narrator_names.append(binding['name']['value'])
        success2, metadata2_results = query_database(
            'select', query_metadata2.replace("?narrator", f':{narrator}'),
            store_name)
        if success2 and 'results' in metadata2_results.keys() and \
                'bindings' in metadata1_results['results'].keys():
            if len(metadata2_results['results']['bindings']):
                for binding in metadata2_results['results']['bindings']:
                    # There should only be one result / one set of metadata for the narrator
                    if 'country' in binding.keys():
                        metadata_dict['country'] = binding['country']['value']
                    else:
                        metadata_dict['country'] = 'Unknown'
                    if 'year' in binding.keys():
                        metadata_dict['year'] = binding['year']['value']
                    else:
                        metadata_dict['year'] = 'Unknown'
            else:
                metadata_dict['country'] = 'Unknown'
                metadata_dict['year'] = 'Unknown'
        success3, metadata3_results = query_database(
            'select', query_metadata3.replace("?narrator", f':{narrator}'),
            store_name)
        if success3 and 'results' in metadata3_results.keys() and \
                'bindings' in metadata1_results['results'].keys():
            if len(metadata3_results['results']['bindings']):
                gender = ''
                for binding in metadata3_results['results']['bindings']:
                    aspect = binding['aspect']['value'].split(':')[-1]
                    if aspect in ('Agender', 'Bigender', 'Female', 'Male'):
                        gender = aspect
                if gender:
                    metadata_dict['gender'] = gender
                else:
                    metadata_dict['gender'] = 'Unknown'
            else:
                metadata_dict['country'] = 'Unknown'
                metadata_dict['year'] = 'Unknown'
        if not (success1 or success2 or success3):
            sg.popup_error(
                f'Limited or no metadata was found for the narrator, {narrator.split(":")[-1]}. '
                f'At a minimum, the narrative text will be displayed.',
                font=('Arial', 14),
                button_color='dark blue',
                icon=encoded_logo)
    except Exception as e:
        capture_error(
            f'Exception getting narrator details from {store_name}: {str(e)}',
            True)
        return

    # Setup the PySimpleGUI window
    sg.theme('Material2')
    layout = [[
        sg.Text("Narrative Title:", font=('Arial', 16)),
        sg.Text(narrative_name, font=('Arial', 16))
    ], [sg.Text()],
              [
                  sg.Text("Narrator Names:", font=('Arial', 16)),
                  sg.Text(', '.join(narrator_names), font=('Arial', 16))
              ], [sg.Text()],
              [
                  sg.Text("Narrator Gender:", font=('Arial', 16)),
                  sg.Text(metadata_dict['gender'], font=('Arial', 16))
              ],
              [
                  sg.Text("Narrator Birth Country:", font=('Arial', 16)),
                  sg.Text(metadata_dict['country'], font=('Arial', 16))
              ],
              [
                  sg.Text("Narrator Birth Year:", font=('Arial', 16)),
                  sg.Text(metadata_dict['year'], font=('Arial', 16))
              ], [sg.Text()], [sg.Text("Text:", font=('Arial', 16))],
              [
                  sg.Multiline(key='narr_text',
                               font=('Arial', 14),
                               size=(75, 30),
                               auto_refresh=True,
                               autoscroll=True,
                               background_color='#fafafa',
                               text_color='black',
                               write_only=True)
              ], [sg.Text()],
              [sg.Text("To exit, close the window.", font=('Arial', 16))]]
    window_metadata_list = sg.Window(f'Metadata for {narrative_name}',
                                     layout,
                                     icon=encoded_logo).Finalize()
    # window_metadata_list['narr_text'].TKOut.output.config(wrap='word')
    window_metadata_list.FindElement('narr_text').Update(narrative_text)
    window_metadata_list.FindElement('narr_text').Widget.configure()

    window_metadata_list.read(timeout=0)
    return
示例#12
0
def display_statistics(store_name: str):
    """
    Display a window with buttons to show various graphs and charts, and/or output files with the
    top xx 'unknown to the ontology' nouns and verbs.

    :param store_name: The database/data store name
    :return: None (Window is displayed)
    """
    logging.info(f'Displaying summary statistics for {store_name}')
    # Setup the PySimpleGUI window
    sg.theme('Material2')
    layout = [[
        sg.Text(
            "Click one or more of the buttons to display various summary statistics.",
            font=('Arial', 16))
    ],
              [
                  sg.Text("To exit, press 'End' or close the window.",
                          font=('Arial', 16))
              ], [sg.Text()],
              [sg.Text("Narrator Characteristics:", font=('Arial', 16))],
              [
                  sg.Button('Gender Details',
                            font=('Arial', 14),
                            button_color='dark blue',
                            size=(20, 1),
                            pad=((25, 0), 3))
              ],
              [
                  sg.Button('Birth Year Details',
                            font=('Arial', 14),
                            button_color='dark blue',
                            size=(20, 1),
                            pad=((25, 0), 3))
              ],
              [
                  sg.Button('Birth Country Details',
                            font=('Arial', 14),
                            button_color='dark blue',
                            size=(20, 1),
                            pad=((25, 0), 3))
              ], [sg.Text()],
              [sg.Text("Narrative Information:", font=('Arial', 16))],
              [
                  sg.Button('Locations Mentioned',
                            font=('Arial', 14),
                            button_color='blue',
                            size=(24, 1),
                            pad=((25, 0), 3))
              ],
              [
                  sg.Button('Years and Events Mentioned',
                            font=('Arial', 14),
                            button_color='blue',
                            size=(24, 1),
                            pad=((25, 0), 3))
              ], [sg.Text()], [sg.Text("Frequent Words:", font=('Arial', 16))],
              [
                  sg.Button('Word Cloud',
                            font=('Arial', 14),
                            button_color='blue',
                            size=(24, 1),
                            pad=((25, 0), 3)),
                  sg.Text('Number of words:', font=('Arial', 16)),
                  sg.InputText(text_color='black',
                               background_color='#ede8e8',
                               size=(5, 1),
                               font=('Arial', 16),
                               key='words_in_cloud',
                               do_not_clear=True)
              ],
              [
                  sg.Button('Output "Unknown" Nouns/Verbs',
                            font=('Arial', 14),
                            button_color='blue',
                            size=(24, 1),
                            pad=((25, 0), 3)),
                  sg.Text('Number of nouns:', font=('Arial', 16)),
                  sg.InputText(text_color='black',
                               background_color='#ede8e8',
                               size=(5, 1),
                               font=('Arial', 16),
                               key='nouns_in_csv',
                               do_not_clear=True),
                  sg.Text('Number of verbs:', font=('Arial', 16)),
                  sg.InputText(text_color='black',
                               background_color='#ede8e8',
                               size=(5, 1),
                               font=('Arial', 16),
                               key='verbs_in_csv',
                               do_not_clear=True)
              ],
              [
                  sg.Text("Directory:", font=('Arial', 16), pad=((125, 0), 3)),
                  sg.FolderBrowse(target='directory_name',
                                  button_color='dark blue'),
                  sg.InputText(text_color='black',
                               background_color='#ede8e8',
                               font=('Arial', 16),
                               key='directory_name',
                               do_not_clear=True)
              ],
              [
                  sg.Text(
                      "The files, 'Nouns.csv' and 'Verbs.csv', will be written to the specified directory.",
                      font=('Arial', 16))
              ],
              [
                  sg.Text(
                      "This processing takes SEVERAL MINUTES if a large number of narratives are analyzed.",
                      font=('Arial', 16))
              ], [sg.Text()],
              [
                  sg.Button('End',
                            button_color='dark blue',
                            size=(5, 1),
                            font=('Arial', 14))
              ]]

    # Create the GUI Window
    try:
        success1, number_narrators_results = query_database(
            'select', query_number_narrators, store_name)
        if success1 and 'results' in number_narrators_results.keys() and \
                'bindings' in number_narrators_results['results'].keys():
            number_narrators = int(number_narrators_results['results']
                                   ['bindings'][0]['cnt']['value'])
        else:
            sg.popup_error(
                f'No narrators are defined in {store_name}. '
                f'Gender and birth details cannot be displayed.',
                font=('Arial', 14),
                button_color='dark blue',
                icon=encoded_logo)
            number_narrators = 0
        success2, narrative_text_results = query_database(
            'select', query_narrative_text, store_name)
        if success2 and 'results' in narrative_text_results.keys() \
                and 'bindings' in narrative_text_results['results'].keys():
            narratives = EMPTY_STRING
            for binding in narrative_text_results['results']['bindings']:
                narratives += f" {binding['narr_text']['value']}"
        else:
            sg.popup_error(
                f'No narrators are defined in {store_name}. '
                f'Summary graphs, charts and word frequencies cannot be generated.',
                font=('Arial', 14),
                button_color='dark blue',
                icon=encoded_logo)
            narratives = EMPTY_STRING
    except Exception as e:
        capture_error(
            f'Exception getting initial narrative details from {store_name}: {str(e)}',
            True)
        return
    window_stats_list = sg.Window('Display Summary Statistics',
                                  layout,
                                  icon=encoded_logo).Finalize()
    window_stats_list.FindElement('directory_name').Update(
        resources_root[0:len(resources_root) - 1])
    window_stats_list.FindElement('words_in_cloud').Update(50)
    window_stats_list.FindElement('nouns_in_csv').Update(50)
    window_stats_list.FindElement('verbs_in_csv').Update(50)

    # Event Loop to process window "events"
    while True:
        event_stats_list, values = window_stats_list.read()
        if event_stats_list in (sg.WIN_CLOSED, 'End'):
            # If user closes window or clicks 'End'
            break
        elif event_stats_list == 'Gender Details':
            if number_narrators:
                logging.info(f'Displaying gender statistics for {store_name}')
                y_values, x_values = get_y_x_values(number_narrators, 'gender',
                                                    query_genders, store_name)
                _display_horiz_histogram(y_values, x_values,
                                         'Number of Narrators/Subjects',
                                         'Narrator Genders')
            else:
                sg.popup_error(
                    f'No narrators are defined in {store_name}. '
                    f'The gender histogram cannot be displayed.',
                    font=('Arial', 14),
                    button_color='dark blue',
                    icon=encoded_logo)
        elif event_stats_list == 'Birth Year Details':
            if number_narrators:
                logging.info(
                    f'Displaying birth year statistics for {store_name}')
                y_values, x_values = get_y_x_values(number_narrators, 'year',
                                                    query_years, store_name)
                _display_horiz_histogram(
                    y_values, x_values,
                    'Number of Narrators/Subjects Born in Year',
                    'Narrator Birth Years')
            else:
                sg.popup_error(
                    f'No narrators are defined in {store_name}. '
                    f'The birth histograms cannot be displayed.',
                    font=('Arial', 14),
                    button_color='dark blue',
                    icon=encoded_logo)
        elif event_stats_list == 'Birth Country Details':
            if number_narrators:
                logging.info(
                    f'Displaying birth country statistics for {store_name}')
                y_values, x_values = get_y_x_values(number_narrators,
                                                    'country', query_countries,
                                                    store_name)
                _display_horiz_histogram(
                    y_values, x_values,
                    'Number of Narrators/Subjects Born in Country',
                    'Narrator Birth Countries')
            else:
                sg.popup_error(
                    f'No narrators are defined in {store_name}. '
                    f'The birth histograms cannot be displayed.',
                    font=('Arial', 14),
                    button_color='dark blue',
                    icon=encoded_logo)
        elif event_stats_list == 'Locations Mentioned':
            if not narratives:
                sg.popup_error(
                    f'No narrators are defined in {store_name}. '
                    f'A list of locations cannot be extracted.',
                    font=('Arial', 14),
                    button_color='dark blue',
                    icon=encoded_logo)
                continue
            _display_locations(narratives)
        elif event_stats_list == 'Years and Events Mentioned':
            if not narratives:
                sg.popup_error(
                    f'No narrators are defined in {store_name}. '
                    f'A list of years and events cannot be extracted.',
                    font=('Arial', 14),
                    button_color='dark blue',
                    icon=encoded_logo)
                continue
            _display_years_events(narratives)
        elif event_stats_list == 'Word Cloud':
            if not values['words_in_cloud']:
                sg.popup_error(
                    'A word count MUST be specified to configure the word cloud output. '
                    'Please provide a value.',
                    font=('Arial', 14),
                    button_color='dark blue',
                    icon=encoded_logo)
                continue
            if not narratives:
                sg.popup_error(
                    f'No narrators are defined in {store_name}. '
                    f'The word cloud cannot be displayed.',
                    font=('Arial', 14),
                    button_color='dark blue',
                    icon=encoded_logo)
                continue
            _display_word_cloud(narratives, int(values['words_in_cloud']))
        elif event_stats_list == 'Output Top "Unknown" Nouns and Verbs':
            if not values['directory_name'] and not values[
                    'nouns_in_csv'] and not values['verbs_in_csv']:
                sg.popup_error(
                    'A directory name and noun/verb word counts MUST be specified to save '
                    'the unknown words and their frequency counts. Please provide all of these values.',
                    font=('Arial', 14),
                    button_color='dark blue',
                    icon=encoded_logo)
                continue
            if not narratives:
                sg.popup_error(
                    f'No narratives were found in {store_name}. '
                    f'The word frequencies cannot be output.',
                    font=('Arial', 14),
                    button_color='dark blue',
                    icon=encoded_logo)
                continue
            logging.info(f'Outputting nouns/verbs for {store_name}')
            _output_words_in_csv(narratives, int(values['nouns_in_csv']),
                                 int(values['verbs_in_csv']),
                                 values['directory_name'])

    # Done
    window_stats_list.close()
    return
def test_hypothesis(store_name: str):
    """
    Display a window to show currently defined hypotheses, and allow selection and test
    of one of them.

    :param store_name: The database/data store name holding the hypotheses and narratives
    :return: TBD
    """
    logging.info(f'Test hypothesis in {store_name}')
    # Setup the PySimpleGUI window
    sg.theme('Material2')
    layout = [[sg.Text("Not yet implemented.", font=('Arial', 16))],
              [
                  sg.Text("To exit, press 'End' or close the window.",
                          font=('Arial', 16))
              ], [sg.Text()],
              [
                  sg.Button('End',
                            button_color='dark blue',
                            size=(5, 1),
                            font=('Arial', 14))
              ]]

    # Create the GUI Window
    try:
        success1, hypotheses_results = query_database('select',
                                                      query_hypotheses,
                                                      store_name)
        number_hypotheses = 0
        number_narratives = 0
        if success1 and 'results' in hypotheses_results.keys() and \
                'bindings' in hypotheses_results['results'].keys():
            number_hypotheses = len(hypotheses_results['results']['bindings'])
        success2, narratives_results = query_database('select',
                                                      query_number_narratives,
                                                      store_name)
        if success2 and 'results' in narratives_results.keys() and \
                'bindings' in narratives_results['results'].keys():
            number_narratives = int(
                narratives_results['results']['bindings'][0]['cnt']['value'])
        error_msg = ''
        if not number_hypotheses:
            error_msg = 'No hypotheses'
            if not number_narratives:
                error_msg = 'and no narratives'
        else:
            if not number_narratives:
                error_msg = 'No narratives'
        if error_msg:
            error_msg += f' are defined in {store_name}.'
            sg.popup_error(error_msg,
                           font=('Arial', 14),
                           button_color='dark blue',
                           icon=encoded_logo)
    except Exception as e:
        capture_error(
            f'Exception getting hypotheses details from {store_name}: {str(e)}',
            True)
        return
    window_test_list = sg.Window('Test Hypothesis', layout,
                                 icon=encoded_logo).Finalize()

    # Event Loop to process window "events"
    while True:
        event_test_list, values = window_test_list.read()
        if event_test_list in (sg.WIN_CLOSED, 'End'):
            # If user closes window or clicks 'End'
            break
        # TODO

    # Done
    window_test_list.close()
    return
def process_csv(csv_file: str, store_name: str, store_list: list) -> int:
    """
    Input the specified CSV file and process the narratives defined in it.
    The format of the CSV MUST be:
       Source,Title,Person,Given,Given2,Surname,Maiden,Maiden2,Gender,Start,End,Remove,Header,Footer

    :param csv_file: CSV file name
    :param store_name: Database/data store name
    :param store_list: List of the existing dbs -
                       Need to determine if a db name is new or existing
    :return: Count of the number of narratives ingested
    """
    logging.info(f'Processing the CSV, {csv_file}')
    count = 0
    db_exception = EMPTY_STRING
    if store_name not in store_list:
        db_exception = create_delete_database('create', store_name)
    if db_exception:
        capture_error(f'Error creating or deleting {store_name}: {db_exception}', True)
        return 0

    try:
        with open(csv_file, newline=EMPTY_STRING) as meta_file:
            narr_dict = csv.DictReader(meta_file)
            # Process each narrative based on the metadata:
            # Source,Title,Person,Given,Given2,Surname,Maiden,Maiden2,Gender,Start,End,Header,Footer
            for narr_meta in narr_dict:
                if 'Title' not in narr_meta.keys() or 'Given' not in narr_meta.keys():
                    capture_error('Expected columns not found in the CSV file. Processing stopped.', False)
                title = narr_meta['Title']
                logging.info(f'Ingesting the document, {title}')
                source = narr_meta['Source']
                # Must have at least the Source, Title, Person and Gender values defined
                if not source or not title or not narr_meta['Person'] \
                        or not narr_meta['Gender']:
                    sg.popup_error(f'For any source, the Source, Title, Person and Gender details MUST be '
                                   f'provided. This is not true for the CSV record with source file, '
                                   f'{source}, and narrative title, {title}. That record is skipped.',
                                   font=('Arial', 14), button_color='dark blue', icon=encoded_logo)
                    continue
                if source.endswith('.pdf'):
                    # Capture each narrative text from the metadata details in the CSV
                    if not narr_meta['Start'] and not narr_meta['End']:
                        sg.popup_error(f'For PDF source files, the Start and End page details MUST be '
                                       f'provided. This is not true for the CSV record with source file, '
                                       f'{source}, and narrative title, {title}. That record is skipped.',
                                       font=('Arial', 14), button_color='dark blue', icon=encoded_logo)
                        continue
                    in_file = f'{resources_root}{title}'
                    subprocess.run(['../tools/pdftotext', '-f', narr_meta['Start'], '-l', narr_meta['End'],
                                    '-simple', f'{resources_root}{source}', in_file])
                else:
                    in_file = f'{resources_root}{source}'
                with open(in_file, 'r', encoding='utf8', errors='ignore') as narr_in:
                    text = clean_text(narr_in.read(), narr_meta)
                    narrative = simplify_text(text, narr_meta)
                    add_narr_data_to_store(narrative.replace('"', "'"), narr_meta, store_name)
                    # create_narrative_graph(narrative, title, store_name)
                if source.endswith('.pdf'):
                    # Cleanup - Delete the text file created by pdftotext
                    os.remove(in_file)
                count += 1
        # Determine if any narrators/subjects (different names) are really the same
        logging.info('Checking if any unification can be performed')
        unified_triples = unify_narrators(store_name)
        if unified_triples:
            # Add the triples to the data store
            add_remove_data('add', ' '.join(unified_triples), store_name)
    except Exception as e:
        capture_error(f'Exception ingesting narratives: {str(e)}', True)
    return count