示例#1
0
文件: utf8.py 项目: MLGB3/mlgb.cron
def changeOneTable( table_name ): #{

  the_database_connection = c.get_database_connection()
  the_cursor = the_database_connection.cursor() 

  statement = 'truncate table %s' % table_name
  the_cursor.execute( statement )

  source_table = table_name[ 2 : ] # strip off 'u_' prefix
  statement = 'insert into %s select * from %s' % (table_name, source_table)
  the_cursor.execute( statement )

  key_list = table_keys[ table_name ]  
  select_keys = ", ".join( key_list ) 

  statement = "select %s from %s order by %s" % (select_keys, table_name, select_keys)
  the_cursor.execute( statement )
  results = the_cursor.fetchall()

  for row in results: #{
    i = -1
    where_clause = ""
    for keyname in key_list: #{
      i += 1
      if where_clause != "": where_clause += " and "
      where_clause += "%s = %d" % (keyname, row[ i ])
    #}

    field_list = fields_to_convert[ table_name ]
    for fieldname in field_list: #{
      statement = "select %s from %s where %s" % (fieldname, table_name, where_clause)
      the_cursor.execute( statement )
      one_result = the_cursor.fetchone()
      fieldval = one_result[ 0 ]
      if not fieldval: continue

      # Reformat in 2 steps: 
      # 1. Convert homespun ASCII coding invented by Richard Sharpe to HTML entities.
      # 2. Convert the HTML entities to UTF-8.

      # Turn ASCII coding into HTML
      fieldval = w.reformat( fieldval )

      # Turn HTML entities into UTF-8 characters
      fieldval = html_parser.unescape( fieldval )

      # Remove some Django template tags
      fieldval = fieldval.replace( '{% templatetag openvariable %}', '{{' )
      fieldval = fieldval.replace( '{% templatetag closevariable %}', '}}' )

      fieldval = fieldval.replace( "'", "''" ) # escape for SQL
      statement = "update %s set %s = '%s' where %s" % (table_name, fieldname, fieldval, where_clause)
      #print statement.encode( 'utf8' )
 
      the_cursor.execute( statement.encode( 'utf8' ) )
    #}
  #}

  the_cursor.close()
  the_database_connection.close()
示例#2
0
def writeFlagDecodes():  #{

    the_database_connection = None
    the_cursor = None

    try:
        outfile_handle = file
        outfile_handle = open(output_filename,
                              'wb')  # 'wb' allows entry of UTF-8

        cat.write_inherit_and_title_block(outfile_handle)
        cat.write_start_main_content(outfile_handle)

        outfile_handle.write('<h3>Definition of codes used in the index</h3>')
        outfile_handle.write('<dl class="catalogue_entry_flags">')

        the_database_connection = c.get_database_connection()
        the_cursor = the_database_connection.cursor()

        statement = "select flag_code, flag_desc, flag_example from index_entry_flags "
        statement += " order by flag_id"

        the_cursor.execute(statement)
        results = the_cursor.fetchall()

        for row in results:  #{
            flag_code = w.reformat(row[0])
            flag_desc = w.reformat(row[1])
            flag_example = w.reformat(row[2])

            outfile_handle.write('<dt>' + newline)
            outfile_handle.write(flag_code)
            if flag_example: outfile_handle.write(' e.g. %s' % flag_example)
            outfile_handle.write(newline + '</dt>' + newline + '<dd>' +
                                 newline)
            outfile_handle.write('%s' % flag_desc)
            outfile_handle.write(newline)
            outfile_handle.write('</dd>' + newline + newline)
        #}

        outfile_handle.write('</dl>')
        cat.write_end_main_content(outfile_handle,
                                   include_link_to_definitions=False)

        outfile_handle.close()

        the_cursor.close()
        the_database_connection.close()

    except:
        if isinstance(outfile_handle, file):
            if not outfile_handle.closed: outfile_handle.close()
        if the_cursor: the_cursor.close()
        if the_database_connection: the_database_connection.close()
        raise
示例#3
0
def writeAllHTMLFiles(): #{

  reload(sys)
  sys.setdefaultencoding("utf8")

  writeOneHTMLFile( 'listbydate' )

  writeOneHTMLFile( 'list' )

  the_database_connection = c.get_database_connection()
  the_cursor = the_database_connection.cursor() 

  statement = "select coalesce( doc_group_type_parent, doc_group_type ) as doc_group_type, "
  statement += " doc_group_id, doc_group_name, document_code "
  statement += " from index_medieval_documents_view where document_code > '' "
  statement += " order by doc_group_type, doc_group_name, document_code_sort, document_code" 
  the_cursor.execute( statement )
  documents = the_cursor.fetchall()

  prev_type_code = ''
  prev_loc_name = ''
 
  for document in documents: #{
    type_code     = document[ 0 ]
    loc_id        = document[ 1 ]
    loc_name      = document[ 2 ]
    document_code = document[ 3 ]

    if type_code != prev_type_code: #{
      prev_type_code = type_code
      prev_loc_name = ''
      
      print ""
      print 'Producing LIST for document group type %s' % type_code
      writeOneHTMLFile( 'list', type_code )
    #}

    if loc_name != prev_loc_name: #{
      prev_loc_name = loc_name
      
      print ""
      print 'Producing LIST for document group %s' % loc_name
      writeOneHTMLFile( 'list', type_code, loc_id, loc_name )
    #}

    print 'Producing output for ONE document code: %s' % document_code
    writeOneHTMLFile( document_code )
  #}

  the_cursor.close()
  the_database_connection.close()
  print 'Finished producing output.'
示例#4
0
def writeFlagDecodes(): #{

  the_database_connection = None
  the_cursor = None

  try:
    outfile_handle = file
    outfile_handle = open( output_filename, 'wb' ) # 'wb' allows entry of UTF-8

    cat.write_inherit_and_title_block( outfile_handle )
    cat.write_start_main_content( outfile_handle )

    outfile_handle.write( '<h3>Definition of codes used in the index</h3>' )
    outfile_handle.write( '<dl class="catalogue_entry_flags">' )

    the_database_connection = c.get_database_connection()
    the_cursor = the_database_connection.cursor() 

    statement = "select flag_code, flag_desc, flag_example from index_entry_flags "
    statement += " order by flag_id" 

    the_cursor.execute( statement )
    results = the_cursor.fetchall()

    for row in results: #{
      flag_code = w.reformat( row[ 0 ] )
      flag_desc = w.reformat( row[ 1 ] )
      flag_example = w.reformat( row[ 2 ] )

      outfile_handle.write( '<dt>' + newline )
      outfile_handle.write( flag_code )
      if flag_example: outfile_handle.write( ' e.g. %s' % flag_example)
      outfile_handle.write( newline + '</dt>' + newline + '<dd>' + newline )
      outfile_handle.write( '%s' % flag_desc ) 
      outfile_handle.write( newline )
      outfile_handle.write( '</dd>' + newline + newline )
    #}

    outfile_handle.write( '</dl>' )
    cat.write_end_main_content( outfile_handle, include_link_to_definitions = False )

    outfile_handle.close()

    the_cursor.close()
    the_database_connection.close()

  except:
    if isinstance( outfile_handle, file ):
      if not outfile_handle.closed : outfile_handle.close()
    if the_cursor: the_cursor.close()
    if the_database_connection: the_database_connection.close()
    raise
示例#5
0
def addDocumentCodeSort():  #{

    the_database_connection = None
    the_cursor = None

    #=================================================================
    # Read each line of the original file, manipulate it as necessary,
    # and then write it into the new file.
    #=================================================================
    try:
        # Connect to the database and create a cursor
        the_database_connection = c.get_database_connection()
        the_cursor = the_database_connection.cursor()

        # Update both the 'documents' lookup table and the 'copies' table.
        # Although this duplicates the same information in two places, it makes it much
        # simpler to access the data.

        tables_to_update = ['index_medieval_documents', 'index_entry_copies']

        for the_table in tables_to_update:  #{
            select = "select distinct document_code from %s " % the_table
            select += " where document_code > '' order by document_code"

            the_cursor.execute(select)
            results = the_cursor.fetchall()

            for row in results:  #{

                document_code = row[0].strip()
                new_code = pad_with_zeroes(document_code)

                upd = "update %s set document_code_sort = '%s' where document_code = '%s'" \
                    % (the_table, new_code, document_code)
                print upd

                the_cursor.execute(upd)
            #}
        #}

        the_cursor.close()
        the_database_connection.close()

    except:
        if the_cursor: the_cursor.close()
        if the_database_connection: the_database_connection.close()
        raise
示例#6
0
def process_all_documents():  #{

    the_database_connection = None
    the_cursor = None

    try:
        outfile_handle = file
        outfile_handle = open(output_filename,
                              'wb')  # 'wb' allows entry of UTF-8
        outfile_handle.write(newline +
                             '-- This script was generated by dates.py ' +
                             newline + newline)

        the_database_connection = c.get_database_connection()
        the_cursor = the_database_connection.cursor()

        statement = "select document_id, document_code, document_name "
        statement += " from index_medieval_documents where document_code > ''"
        statement += " order by document_code_sort"

        the_cursor.execute(statement)
        results = the_cursor.fetchall()

        for row in results:  #{
            output = process_document(row)
            outfile_handle.write(output.encode('utf-8'))
            outfile_handle.write(newline)
        #}

        outfile_handle.close()

        the_cursor.close()
        the_database_connection.close()

    except:
        if isinstance(outfile_handle, file):
            if not outfile_handle.closed: outfile_handle.close()
        if the_cursor: the_cursor.close()
        if the_database_connection: the_database_connection.close()
        raise
示例#7
0
文件: dates.py 项目: MLGB3/mlgb.cron
def process_all_documents(): #{

  the_database_connection = None
  the_cursor = None

  try:
    outfile_handle = file
    outfile_handle = open( output_filename, 'wb' ) # 'wb' allows entry of UTF-8
    outfile_handle.write( newline + '-- This script was generated by dates.py ' + newline + newline )

    the_database_connection = c.get_database_connection()
    the_cursor = the_database_connection.cursor() 

    statement = "select document_id, document_code, document_name " 
    statement += " from index_medieval_documents where document_code > ''"
    statement += " order by document_code_sort"

    the_cursor.execute( statement )
    results = the_cursor.fetchall()

    for row in results: #{
      output = process_document( row )
      outfile_handle.write( output.encode( 'utf-8' ) )
      outfile_handle.write( newline )
    #}

    outfile_handle.close()

    the_cursor.close()
    the_database_connection.close()

  except:
    if isinstance( outfile_handle, file ):
      if not outfile_handle.closed : outfile_handle.close()
    if the_cursor: the_cursor.close()
    if the_database_connection: the_database_connection.close()
    raise
示例#8
0
def parseCopies(): #{

  # Write the SQL to a file. We can then store this in Subversion and revert to it if necessary.
  outfile_handle = open( output_filename, 'wb' ) # 'wb' allows entry of UTF-8
  outfile_handle.write( newline )
  outfile_handle.write( '-- This SQL script is generated by importIndexedCopies.py.' )
  outfile_handle.write( newline )

  # Connect to the database
  the_database_connection = c.get_database_connection()

  # Create a Cursor object
  the_cursor = the_database_connection.cursor() 

  # Clear out the destination table ('index entry copies') before you begin
  outfile_handle.write( newline + "TRUNCATE TABLE index_entry_copies;" + newline + newline )

  # Get the 'copies' text field from the source table ('index entry books')
  statement = "SELECT e.entry_id, b.entry_book_count, b.copies, e.entry_name, b.title_of_book " \
            + " FROM index_entry_books b, index_entries e" \
            + " WHERE b.entry_id = e.entry_id " \
            + " order by entry_id, entry_book_count"
  the_cursor.execute( statement )
  results = the_cursor.fetchall()

  # Extract the copy IDs from the text field, and from each copy ID extract
  # the document code and sequence number.
  for row in results: #{
    entry_id = row[0]
    entry_book_count = row[1]
    copies = row[2]
    entry_name = row[3]
    title_of_book = row[4]

    outfile_handle.write( '%s%s/*Entry ID %d, book count %d*/%s' \
                          % (newline, newline, entry_id, entry_book_count, newline))

    author_and_title_comment = "/* %s %s */%s" % (entry_name, title_of_book, newline)
    outfile_handle.write( author_and_title_comment )
    print author_and_title_comment, 'Book count', entry_book_count

    # Remove a few known typo's
    copies = correct_typos( copies )

    copies_comment = "%s/* %s */%s" % (newline, copies, newline)
    outfile_handle.write( copies_comment )

    # Remove any final full stops, as these can cause the final copy code not to be recognised
    if copies.strip().endswith( full_stop ):
      copies = copies.strip()[ 0 : -1 ]
    
    # Remove the 'notes' sections between brackets before trying to extract the copy codes
    full_copies = copies
    copies = strip_bits_in_brackets( copies )

    copy_count = 0
    copy_codes = []
    words = copies.split() # since no separator is specified, any whitespace is used as the separator

    for word in words: #{

      copy_code = ''
      document_code = ''
      seqno_in_document = ''
      is_valid_document_code = False

      if not is_copy_code( word ): #{
        if is_document_code( word ): #{
          # Try to avoid picking up bibliographical references that are NOT document codes
          checkword = word
          if checkword.endswith( comma ): checkword = checkword.replace( comma, '', 1 )
          checkword = checkword.strip()

          statement = "select count(*) from index_medieval_documents where document_code = '%s'" \
                    % checkword
          #outfile_handle.write( "/* %s */ \n" % statement )
          the_cursor.execute( statement )
          docrow = the_cursor.fetchone()
          found = docrow[ 0 ]
          if found > 0: #{ # it is a real document code, just missing sequence no
            is_valid_document_code = True
          #}
        #}
      #}

      if is_copy_code( word ) or is_valid_document_code: #{
        copy_code = word.strip()
        if copy_code.endswith( comma ): 
          copy_code = copy_code[ 0 : -1 ]
      #}

      if copy_code: #{
        # Check that we haven't already got it
        already_in_list = False
        if copy_code in copy_codes: already_in_list = True
        if already_in_list: continue

        copy_codes.append( copy_code )
        copy_count += 1
      #}
    #}

    # Finished picking out the copy codes from the 'copies' field.
    # Now get the text in between the copy codes
    copy_count = 0
    num_copies = len( copy_codes )
    while copy_count < num_copies: #{
      copy_code = copy_codes[ copy_count ]
      copy_count += 1
      copy_notes = ''

      rest_of_line = full_copies.split( copy_code, 1 )[ 1 ]

      if copy_count < len( copy_codes ): #{ # still another copy code to come after this one
        next_copy_code = copy_codes[ copy_count ]

        copy_notes = rest_of_line.split( next_copy_code, 1 )[ 0 ]

        copy_notes = copy_notes.strip()
        last_char = ''
        if copy_notes: last_char = copy_notes[ -1 : ]
        if next_copy_code and last_char in copy_code_connectors: #{
          copy_notes = '%s %s' % (copy_notes, next_copy_code)
        #}
      #}
      else:
        copy_notes = rest_of_line

      copy_notes = copy_notes.strip()

      if copy_notes == comma or copy_notes == full_stop: #{
        copy_notes = ''
      #}
      elif copy_notes.endswith( comma ): #{
        copy_notes = copy_notes[ 0 : -1 ]
      #}

      document_code = get_document_code( copy_code )
      seqno_in_document = get_seqno_in_document( copy_code )
      if not seqno_in_document.strip(): seqno_in_document = 'null'
      copy_notes = copy_notes.strip()

      statement = get_copy_insert_statement( entry_id, entry_book_count, copy_count, copy_code, \
                                             document_code, seqno_in_document, copy_notes )
      outfile_handle.write( statement.encode( 'utf-8' ))

      if is_numeric_range( copy_code ): #{ # need to generate sequence numbers for rest of range
        rest_of_range = get_rest_of_numeric_range( copy_code )

        for int_seqno in rest_of_range: #{
          copy_count += 1
          num_copies += 1
          copy_codes.insert( 0, copy_code ) # add to start so we don't keep coming to the same one again!

          seqno_in_document = str( int_seqno )

          outfile_handle.write( '/* generating sequence no. %d for %s */' % (int_seqno, copy_code))

          statement = get_copy_insert_statement( entry_id, entry_book_count, copy_count, copy_code, \
                                                 document_code, seqno_in_document, copy_notes )
          outfile_handle.write( statement.encode( 'utf-8' ))
        #}
      #}
    #}
    print newline
  #}

  the_cursor.close()
  the_database_connection.close()
def stripUnwantedTags():  #{

    the_database_connection = None
    the_cursor = None

    #=================================================================
    # Read each line of the original file, manipulate it as necessary,
    # and then write it into the new file.
    #=================================================================
    try:
        # Connect to the database and create a cursor
        the_database_connection = c.get_database_connection()
        the_cursor = the_database_connection.cursor()

        # Look at all the text fields that could contain unwanted formatting
        for table_name, field_names in text_fields.items():  #{
            #print newline + table_name + newline

            for field in field_names:  #{
                #print newline + table_name + ': ' + field + newline

                # generate a select statement to pick up rows containing any of the problematic tags
                first_tag = True
                for problem_tag_start in problem_tags_start:  #{
                    if first_tag:
                        select = "select id, %s from %s where %s like '%s%s%s'" \
                               % (field, table_name, field, percent, problem_tag_start, percent)
                    else:
                        select = "%s or %s like '%s%s%s'" % (
                            select, field, percent, problem_tag_start, percent)
                    first_tag = False
                #}
                select += " order by id"
                #print select
                the_cursor.execute(select)
                results = the_cursor.fetchall()

                # start working through the results
                for row in results:  #{
                    row_id = row[0]
                    text_value = row[1]

                    print ''
                    print ''
                    print '======================================='
                    print table_name, field, 'ID', row_id
                    print '======================================='
                    print ''

                    print '==== RAW VALUE, ID %d ====' % row_id
                    print text_value
                    print '==== end RAW VALUE, ID %d ==== %s' % (row_id,
                                                                 newline)

                    for problem_tag_start in problem_tags_start:  #{
                        print 'Processing', problem_tag_start
                        if problem_tag_start not in text_value: continue

                        # Convert, e.g., '<div style="font-family: Courier New">' to just '<div>'
                        value_parts = text_value.split(problem_tag_start)
                        new_value_parts = []
                        new_value = ''
                        i = -1
                        for part in value_parts:  #{
                            i += 1
                            formatting = ''
                            data = ''

                            if i == 0:  #{  # the first section (index 0) is before the formatting tag
                                data = part
                            #}
                            else:  #{ # at start of formatting tag

                                formatting_and_data = part.split(
                                    closing_angle_bracket, 1)

                                if len(formatting_and_data) != 2:  #{
                                    print 'Mismatched tag start and end in:', formatting_and_data
                                    print 'Cancelling change.'
                                    continue
                                #}

                                formatting = formatting_and_data[0]
                                data = formatting_and_data[1]

                                #print newline + 'About to remove the following formatting:'
                                #print formatting + newline
                            #}

                            new_value_parts.append(data)
                        #}

                        fixed_tag = problem_tag_start.strip(
                        ) + closing_angle_bracket
                        new_value = fixed_tag.join(new_value_parts)
                        if fixed_tag == '<a>':  # no point in keeping these
                            new_value = new_value.replace('<a></a>', '')
                        text_value = new_value

                        new_value = new_value.replace("'",
                                                      "''")  # escape for SQL
                        new_value = new_value.replace("\\",
                                                      "\\\\")  # escape for SQL
                        statement = "update %s set %s = '%s' where id = %d" % (
                            table_name, field, new_value, row_id)

                        print newline, '/* new value */', statement, newline
                        the_cursor.execute(statement)

                    #} # end processing all problem tags in one field of one row of data
                #} # end loop through rows containing problem tags in a particular field
            #} # end loop through one table's fields that may contain problem tags
        #} # end loop through tables with fields that may contain problem tags

        the_cursor.close()
        the_database_connection.close()

    except:
        if the_cursor: the_cursor.close()
        if the_database_connection: the_database_connection.close()
        raise
示例#10
0
def setBookIDs(): #{
  the_database_connection = None
  the_cursor = None

  #=================================================================
  # Read each line of the original file, manipulate it as necessary,
  # and then write it into the new file.
  #=================================================================
  try:
    # Connect to the database and create a cursor
    the_database_connection = c.get_database_connection()
    the_cursor = the_database_connection.cursor() 

    # Get details of the medieval catalogues in which an MLGB book appeared.
    select = "select id, medieval_catalogue from books_book where medieval_catalogue > ''"
    select += " order by id"
    the_cursor.execute( select )
    results = the_cursor.fetchall()

    # Clear out old data
    the_cursor.execute( "TRUNCATE TABLE index_mlgb_links" )

    # Insert the new data
    for row in results: #{

      book_id = row[ 0 ]
      medieval_catalogue = row[ 1 ].strip()

      print "\n\n%s" % medieval_catalogue

      # Avoid confusion potentially caused by spaces in wrong place etc.
      medieval_catalogue = medieval_catalogue.replace( '. ', '.' )
      medieval_catalogue = medieval_catalogue.replace( '?', '' )
      medieval_catalogue = medieval_catalogue.replace( '=', ' ' )

      words = medieval_catalogue.split()
      for word in words: #{
        catalogue_entries = []

        if not i.is_copy_code( word ): continue

        word = word.strip()
        if word.endswith( ',' ): word = word[ 0 : -1 ] # take off any commas from the end

        document_code = i.get_document_code( word )
        seqno_in_document = i.get_seqno_in_document( word )

        if document_code.isalnum() and seqno_in_document.isdigit() \
        and int( seqno_in_document ) > 0: #{

          catalogue_entries.append( seqno_in_document )

        else: # some kind of incomplete or garbled entry - don't try to save it
          continue
        #}

        if i.is_numeric_range( word ): #{ # need to generate sequence numbers for rest of range
          rest_of_range = i.get_rest_of_numeric_range( word )

          for int_seqno in rest_of_range: #{
            seqno_in_document = str( int_seqno )
            catalogue_entries.append( seqno_in_document )
          #}
        #}

        for seqno_in_document in catalogue_entries: #{
          print  "%d: '%s' %s" % (book_id, document_code, seqno_in_document)

          insert_statement = 'insert into index_mlgb_links '
          insert_statement += '( mlgb_book_id, document_code, seqno_in_document ) values '
          insert_statement += "( %d, '%s', %s )" % (book_id, document_code, seqno_in_document)
          the_cursor.execute( insert_statement )
        #}
      #}
    #}

    the_cursor.close()
    the_database_connection.close()

  except:
    if the_cursor: the_cursor.close()
    if the_database_connection: the_database_connection.close()
    raise
示例#11
0
def parseCopies():  #{

    # Write the SQL to a file. We can then store this in Subversion and revert to it if necessary.
    outfile_handle = open(output_filename, 'wb')  # 'wb' allows entry of UTF-8
    outfile_handle.write(newline)
    outfile_handle.write(
        '-- This SQL script is generated by importIndexedCopies.py.')
    outfile_handle.write(newline)

    # Connect to the database
    the_database_connection = c.get_database_connection()

    # Create a Cursor object
    the_cursor = the_database_connection.cursor()

    # Clear out the destination table ('index entry copies') before you begin
    outfile_handle.write(newline + "TRUNCATE TABLE index_entry_copies;" +
                         newline + newline)

    # Get the 'copies' text field from the source table ('index entry books')
    statement = "SELECT e.entry_id, b.entry_book_count, b.copies, e.entry_name, b.title_of_book " \
              + " FROM index_entry_books b, index_entries e" \
              + " WHERE b.entry_id = e.entry_id " \
              + " order by entry_id, entry_book_count"
    the_cursor.execute(statement)
    results = the_cursor.fetchall()

    # Extract the copy IDs from the text field, and from each copy ID extract
    # the document code and sequence number.
    for row in results:  #{
        entry_id = row[0]
        entry_book_count = row[1]
        copies = row[2]
        entry_name = row[3]
        title_of_book = row[4]

        outfile_handle.write( '%s%s/*Entry ID %d, book count %d*/%s' \
                              % (newline, newline, entry_id, entry_book_count, newline))

        author_and_title_comment = "/* %s %s */%s" % (entry_name,
                                                      title_of_book, newline)
        outfile_handle.write(author_and_title_comment)
        print author_and_title_comment, 'Book count', entry_book_count

        # Remove a few known typo's
        copies = correct_typos(copies)

        copies_comment = "%s/* %s */%s" % (newline, copies, newline)
        outfile_handle.write(copies_comment)

        # Remove any final full stops, as these can cause the final copy code not to be recognised
        if copies.strip().endswith(full_stop):
            copies = copies.strip()[0:-1]

        # Remove the 'notes' sections between brackets before trying to extract the copy codes
        full_copies = copies
        copies = strip_bits_in_brackets(copies)

        copy_count = 0
        copy_codes = []
        words = copies.split(
        )  # since no separator is specified, any whitespace is used as the separator

        for word in words:  #{

            copy_code = ''
            document_code = ''
            seqno_in_document = ''
            is_valid_document_code = False

            if not is_copy_code(word):  #{
                if is_document_code(word):  #{
                    # Try to avoid picking up bibliographical references that are NOT document codes
                    checkword = word
                    if checkword.endswith(comma):
                        checkword = checkword.replace(comma, '', 1)
                    checkword = checkword.strip()

                    statement = "select count(*) from index_medieval_documents where document_code = '%s'" \
                              % checkword
                    #outfile_handle.write( "/* %s */ \n" % statement )
                    the_cursor.execute(statement)
                    docrow = the_cursor.fetchone()
                    found = docrow[0]
                    if found > 0:  #{ # it is a real document code, just missing sequence no
                        is_valid_document_code = True
                    #}
                #}
            #}

            if is_copy_code(word) or is_valid_document_code:  #{
                copy_code = word.strip()
                if copy_code.endswith(comma):
                    copy_code = copy_code[0:-1]
            #}

            if copy_code:  #{
                # Check that we haven't already got it
                already_in_list = False
                if copy_code in copy_codes: already_in_list = True
                if already_in_list: continue

                copy_codes.append(copy_code)
                copy_count += 1
            #}
        #}

        # Finished picking out the copy codes from the 'copies' field.
        # Now get the text in between the copy codes
        copy_count = 0
        num_copies = len(copy_codes)
        while copy_count < num_copies:  #{
            copy_code = copy_codes[copy_count]
            copy_count += 1
            copy_notes = ''

            rest_of_line = full_copies.split(copy_code, 1)[1]

            if copy_count < len(
                    copy_codes
            ):  #{ # still another copy code to come after this one
                next_copy_code = copy_codes[copy_count]

                copy_notes = rest_of_line.split(next_copy_code, 1)[0]

                copy_notes = copy_notes.strip()
                last_char = ''
                if copy_notes: last_char = copy_notes[-1:]
                if next_copy_code and last_char in copy_code_connectors:  #{
                    copy_notes = '%s %s' % (copy_notes, next_copy_code)
                #}
            #}
            else:
                copy_notes = rest_of_line

            copy_notes = copy_notes.strip()

            if copy_notes == comma or copy_notes == full_stop:  #{
                copy_notes = ''
            #}
            elif copy_notes.endswith(comma):  #{
                copy_notes = copy_notes[0:-1]
            #}

            document_code = get_document_code(copy_code)
            seqno_in_document = get_seqno_in_document(copy_code)
            if not seqno_in_document.strip(): seqno_in_document = 'null'
            copy_notes = copy_notes.strip()

            statement = get_copy_insert_statement( entry_id, entry_book_count, copy_count, copy_code, \
                                                   document_code, seqno_in_document, copy_notes )
            outfile_handle.write(statement.encode('utf-8'))

            if is_numeric_range(
                    copy_code
            ):  #{ # need to generate sequence numbers for rest of range
                rest_of_range = get_rest_of_numeric_range(copy_code)

                for int_seqno in rest_of_range:  #{
                    copy_count += 1
                    num_copies += 1
                    copy_codes.insert(
                        0, copy_code
                    )  # add to start so we don't keep coming to the same one again!

                    seqno_in_document = str(int_seqno)

                    outfile_handle.write(
                        '/* generating sequence no. %d for %s */' %
                        (int_seqno, copy_code))

                    statement = get_copy_insert_statement( entry_id, entry_book_count, copy_count, copy_code, \
                                                           document_code, seqno_in_document, copy_notes )
                    outfile_handle.write(statement.encode('utf-8'))
                #}
            #}
        #}
        print newline
    #}

    the_cursor.close()
    the_database_connection.close()
示例#12
0
def changeOneTextField( handle, table_name, field_name ): #{

  global unidentified_chars

  the_database_connection = c.get_database_connection()
  the_cursor = the_database_connection.cursor() 

  statement = "select distinct entry_id, %s from %s " % (field_name, table_name)
  statement += " where %s like '%s%s%s'" % (field_name, percent, greek_start, percent)
  statement += " order by entry_id, %s" % field_name
  #print statement

  the_cursor.execute( statement )
  results = the_cursor.fetchall()


  for row in results: #{
    entry_id = row[ 0 ]
    fieldval = row[ 1 ]
    field_parts = fieldval.split( greek_start )
    index = -1
    for section in field_parts: #{
      index += 1
      if index == 0: continue # before first bit of Greek

      sub_sections = section.split( greek_end )
      encoded_latin = sub_sections[ 0 ].strip()
      orig_encoded_latin = encoded_latin

      greek = u''

      print entry_id, encoded_latin

      # We need to change s/sigma at the end of words to 'final sigma'
      final_char = encoded_latin[ -1 : ]
      if final_char == 's': #{
        encoded_latin = '%s%s' % (encoded_latin[ 0 : -1 ], final_s)
      #}

      encoded_latin = encoded_latin.replace( 's ', final_s + ' ' )

      print entry_id, encoded_latin

      # Now look up the name of the Greek character corresponding to this Latin character
      for one_char in encoded_latin[ : ]: #{
        greek_char_name = ''
        greek_char = ''

        if one_char.strip() == '': # whitespace character of some kind
          greek += one_char

        elif letters.has_key( one_char ): #{
          greek_char_name = letters[ one_char ]
          #print '%s = %s' % (one_char, greek_char_name)
        #}

        elif raw_accents.has_key( one_char ): #{
          greek_char_name = raw_accents[ one_char ]
          #print '%s = %s' % (one_char, greek_char_name)
        #}

        else:
          if one_char not in unidentified_chars: unidentified_chars.append( one_char )

        if greek_char_name: #{
          greek_char = unicodedata.lookup( greek_char_name )
          charnum = ord( greek_char )
          greek += '&#%d;' % charnum
        #}
      #}

      # Breathings and accents appear BEFORE capital letters, and are written that way in the English,
      # e.g. "'Aposhmeiw<seis". However, combining characters always follow the character to which they
      # apply. So we need to do some rearrangement. In practice I think we need to add an extra space
      # before the start of the word, for the breathings and accents to sit on. 
      processed_words = []
      words = greek.split()
      for word in words: #{
        for numeric_entity in accent_entities: #{
          if word.startswith( numeric_entity ): #{
            word = numeric_non_break_space + ' ' + word
          #}
        #}
        processed_words.append( word )
      #}
      greek = ' '.join( processed_words )


      orig_encoded_latin = orig_encoded_latin.replace( "'", "''" )  # escape single quotes for SQL

      handle.write( "update %s set %s = replace( %s, '"  % (table_name, field_name, field_name))
      handle.write( orig_encoded_latin )
      handle.write( "', '" )
      handle.write( greek )
      handle.write( "' ) where entry_id = %d" % entry_id )
      handle.write( " and %s like '%s%s%s';" % (field_name, percent, orig_encoded_latin, percent) )
      handle.write( newline + newline )
      print ' '
    #}
  #}

  # remove the marker for 'Greek starts here'
  handle.write( "update %s set %s = replace( %s, '%s', '' );"  \
                % (table_name, field_name, field_name, greek_start) )
  handle.write( newline )

  # remove the marker for 'Greek end here',
  # but remember that sometimes one or two dollar signs have been missed off the end
  tmp_greek_end = full_greek_end
  while len( tmp_greek_end ) >= len( greek_end ): #{
    handle.write( "update %s set %s = replace( %s, '%s', '' );"  \
                  % (table_name, field_name, field_name, tmp_greek_end) )
    handle.write( newline )
    tmp_greek_end = tmp_greek_end[ 0 : -1 ]  # trim off the last character
  #}


  the_cursor.close()
  the_database_connection.close()

  print '----'
  print 'Finished processing %s %s' % (table_name, field_name)
  print '----'
  print ' '
def stripUnwantedTags(): #{

  the_database_connection = None
  the_cursor = None

  #=================================================================
  # Read each line of the original file, manipulate it as necessary,
  # and then write it into the new file.
  #=================================================================
  try:
    # Connect to the database and create a cursor
    the_database_connection = c.get_database_connection()
    the_cursor = the_database_connection.cursor() 

    # Look at all the text fields that could contain unwanted formatting
    for table_name, field_names in text_fields.items(): #{
      #print newline + table_name + newline

      for field in field_names: #{
        #print newline + table_name + ': ' + field + newline

        # generate a select statement to pick up rows containing any of the problematic tags
        first_tag = True
        for problem_tag_start in problem_tags_start: #{
          if first_tag:
            select = "select id, %s from %s where %s like '%s%s%s'" \
                   % (field, table_name, field, percent, problem_tag_start, percent)
          else:
            select = "%s or %s like '%s%s%s'" % (select, field, percent, problem_tag_start, percent)
          first_tag = False
        #}
        select += " order by id"
        #print select
        the_cursor.execute( select )
        results = the_cursor.fetchall()

        # start working through the results
        for row in results: #{
          row_id = row[ 0 ]
          text_value = row[ 1 ]

          print ''
          print ''
          print '======================================='
          print table_name, field, 'ID', row_id
          print '======================================='
          print ''

          print '==== RAW VALUE, ID %d ====' % row_id
          print text_value
          print '==== end RAW VALUE, ID %d ==== %s' % (row_id, newline)

          for problem_tag_start in problem_tags_start: #{
            print 'Processing', problem_tag_start
            if problem_tag_start not in text_value: continue

            # Convert, e.g., '<div style="font-family: Courier New">' to just '<div>'
            value_parts = text_value.split( problem_tag_start )
            new_value_parts = []
            new_value = ''
            i = -1
            for part in value_parts: #{
              i += 1
              formatting = ''
              data = ''

              if i == 0: #{  # the first section (index 0) is before the formatting tag
                data = part
              #}
              else: #{ # at start of formatting tag

                formatting_and_data = part.split( closing_angle_bracket, 1 )
                
                if len( formatting_and_data ) != 2: #{
                  print 'Mismatched tag start and end in:', formatting_and_data
                  print 'Cancelling change.'
                  continue
                #}

                formatting = formatting_and_data[ 0 ]
                data       = formatting_and_data[ 1 ]
                
                #print newline + 'About to remove the following formatting:'
                #print formatting + newline
              #}

              new_value_parts.append( data )
            #}

            fixed_tag = problem_tag_start.strip() + closing_angle_bracket
            new_value = fixed_tag.join( new_value_parts )
            if fixed_tag == '<a>': # no point in keeping these
              new_value = new_value.replace( '<a></a>', '' )
            text_value = new_value

            new_value = new_value.replace( "'", "''" ) # escape for SQL
            new_value = new_value.replace( "\\", "\\\\" ) # escape for SQL
            statement = "update %s set %s = '%s' where id = %d" % (table_name, field, new_value, row_id)

            print newline, '/* new value */', statement, newline
            the_cursor.execute( statement )

          #} # end processing all problem tags in one field of one row of data
        #} # end loop through rows containing problem tags in a particular field
      #} # end loop through one table's fields that may contain problem tags
    #} # end loop through tables with fields that may contain problem tags

    the_cursor.close()
    the_database_connection.close()

  except:
    if the_cursor: the_cursor.close()
    if the_database_connection: the_database_connection.close()
    raise
示例#14
0
def writeDocumentContents( handle, document_code ): #{

  write_inherit_and_title_block( handle )

  write_start_main_content( handle )

  # Connect to the database and create a cursor
  the_database_connection = c.get_database_connection()
  the_cursor = the_database_connection.cursor() 

  statement = "select coalesce( doc_group_type_parent, doc_group_type ) as doc_group_type, " 
  statement += " doc_group_type_name, doc_group_id, doc_group_name, document_name "
  statement += " from index_medieval_documents_view where document_code = '%s'" % document_code

  the_cursor.execute( statement )
  document = the_cursor.fetchone()

  type_code     = document[ 0 ]
  type_name     = w.reformat( document[ 1 ] )
  loc_id        = document[ 2 ]
  loc_name      = w.reformat( document[ 3 ] )
  document_name = w.reformat( document[ 4 ] )

  statement = "select entry_id, entry_book_count, copy_count, copy_code, copy_notes, seqno_in_document"
  statement += " from index_entry_copies where document_code = '%s'" % document_code
  statement += " order by seqno_in_document, copy_code"
  the_cursor.execute( statement )
  copy_results = the_cursor.fetchall()

  write_breadcrumbs( handle, type_code, type_name, loc_id, loc_name, document_name )

  if type_name == loc_name: # don't have 'HENRY DE KIRKESTEDE: HENRY DE KIRKESTEDE'
    handle.write( '<h2>%s</h2>' % type_name )
  else:
    handle.write( '<h2>%s: %s</h2>' % (type_name, loc_name) )
  handle.write( '<h3 class="medieval_catalogue_desc">%s. %s</h3>' % (document_code, document_name)  )

  handle.write( newline )
  write_catalogue_entries_total( handle, len( copy_results ) )
  handle.write( newline )

  handle.write( '<div class="index">' )
  handle.write( newline + newline )

  prev_copy_code = ''
  authors = []  # record which authors have already been displayed on this page
                # and don't repeat the bibliography paragraph on 2nd or subsequent appearances


  handle.write( '<ul id="catalogue_entry_list">' + newline )

  for copy_row in copy_results: #{ 

    # Extract copy information
    entry_id         = copy_row[ 0 ]
    entry_book_count = copy_row[ 1 ]
    copy_count       = copy_row[ 2 ]
    copy_code  = w.reformat( copy_row[ 3 ] )
    copy_notes = w.reformat( copy_row[ 4 ] )
    seqno_in_document = copy_row[ 5 ]

    if copy_code and copy_code == prev_copy_code: continue # don't repeat e.g. BC1.5--7
    prev_copy_code = copy_code

    # Extract book information
    statement="select role_in_book, title_of_book, book_biblio_line, xref_title_of_book, problem"
    statement += " from index_entry_books where entry_id = %d" % entry_id
    statement += " and entry_book_count = %d" % entry_book_count
    the_cursor.execute( statement )
    book = the_cursor.fetchone()

    role_in_book       = w.reformat( book[ 0 ] )
    title_of_book      = w.reformat( book[ 1 ] )
    book_biblio_line   = w.reformat( book[ 2 ] )
    xref_title_of_book = w.reformat( book[ 3 ] )
    problem            = w.reformat( book[ 4 ] )

    # Extract author information
    statement="select entry_name, xref_name, entry_biblio_line, entry_biblio_block, letter"
    statement += " from index_entries where entry_id = %d" % entry_id
    the_cursor.execute( statement )
    author = the_cursor.fetchone()

    # Get links to the main MLGB database
    mlgb_links = get_mlgb_links( the_cursor, document_code, seqno_in_document )

    entry_name         = w.reformat( author[ 0 ] )
    xref_name          = w.reformat( author[ 1 ] )
    entry_biblio_line  = w.reformat( author[ 2 ] )
    entry_biblio_block = w.reformat( author[ 3 ] )

    letter = author[ 4 ].replace( '/', '' )
   

    # Write out the details
    handle.write( '<li>' + newline )

    if mlgb_links: #{
      mlgb_book_id = mlgb_links[0][0]
      hover_title_of_book = w.strip_html_for_hover( title_of_book )
      handle.write( w.get_mlgb_book_link( mlgb_book_id, hover_title_of_book ))
    #}
    handle.write( copy_code )
    if mlgb_links: handle.write( '</a>' )

    copy_notes = copy_notes.strip()
    if copy_notes: #{
      if not copy_notes.startswith( ',' ) and not copy_notes.startswith( ': ' ):
        copy_notes = ' ' + copy_notes
      handle.write( copy_notes )
    #}

    handle.write( ': ' + newline )

    # Write out details from 'entry' table
    # linking to the main entry in the author/title index.

    link_to_authortitle = authortitle_url + '/' + letter + '/'
    anchor = '#entry%d_anchor' % entry_id
    link_to_authortitle += anchor

    handle.write( '<a href="%s%s">' % (w.if_editable, link_to_authortitle) )
    handle.write( entry_name + '</a>' + newline )
    if xref_name: handle.write( ' ' + right_arrow + ' ' + xref_name + newline )
    if entry_biblio_line: handle.write( entry_biblio_line + newline )
    handle.write( linebreak + newline )

    if entry_biblio_block: #{
      if entry_name not in authors: #{
        handle.write( entry_biblio_block + linebreak + newline )
        authors.append( entry_name )
      #}
    #}

    # Write out details from 'book' table
    if problem: handle.write( problem + newline )
    if role_in_book: handle.write( role_in_book + newline )
    if title_of_book: handle.write( '<strong>' + title_of_book + '</strong>' + newline )
    if xref_title_of_book: handle.write( ' ' + right_arrow + ' ' + xref_title_of_book + newline )
    if book_biblio_line: handle.write( book_biblio_line + newline )

    handle.write( '</li>' + newline + newline )
  #}

  handle.write( '</ul><!-- end catalogue_entry_list -->' + newline )

  handle.write( '</div><!-- end div class "index" -->' )
  handle.write( newline )

  handle.write( '<p>' + newline )
  write_catalogue_entries_total( handle, len( copy_results ) )
  handle.write( '</p>' + newline )

  write_end_main_content( handle )

  # Close your cursor and your connection
  the_cursor.close()
  the_database_connection.close()
示例#15
0
文件: utf8.py 项目: MLGB3/mlgb.cron
def changeOneTable(table_name):  #{

    the_database_connection = c.get_database_connection()
    the_cursor = the_database_connection.cursor()

    statement = 'truncate table %s' % table_name
    the_cursor.execute(statement)

    source_table = table_name[2:]  # strip off 'u_' prefix
    statement = 'insert into %s select * from %s' % (table_name, source_table)
    the_cursor.execute(statement)

    key_list = table_keys[table_name]
    select_keys = ", ".join(key_list)

    statement = "select %s from %s order by %s" % (select_keys, table_name,
                                                   select_keys)
    the_cursor.execute(statement)
    results = the_cursor.fetchall()

    for row in results:  #{
        i = -1
        where_clause = ""
        for keyname in key_list:  #{
            i += 1
            if where_clause != "": where_clause += " and "
            where_clause += "%s = %d" % (keyname, row[i])
        #}

        field_list = fields_to_convert[table_name]
        for fieldname in field_list:  #{
            statement = "select %s from %s where %s" % (fieldname, table_name,
                                                        where_clause)
            the_cursor.execute(statement)
            one_result = the_cursor.fetchone()
            fieldval = one_result[0]
            if not fieldval: continue

            # Reformat in 2 steps:
            # 1. Convert homespun ASCII coding invented by Richard Sharpe to HTML entities.
            # 2. Convert the HTML entities to UTF-8.

            # Turn ASCII coding into HTML
            fieldval = w.reformat(fieldval)

            # Turn HTML entities into UTF-8 characters
            fieldval = html_parser.unescape(fieldval)

            # Remove some Django template tags
            fieldval = fieldval.replace('{% templatetag openvariable %}', '{{')
            fieldval = fieldval.replace('{% templatetag closevariable %}',
                                        '}}')

            fieldval = fieldval.replace("'", "''")  # escape for SQL
            statement = "update %s set %s = '%s' where %s" % (
                table_name, fieldname, fieldval, where_clause)
            #print statement.encode( 'utf8' )

            the_cursor.execute(statement.encode('utf8'))
        #}
    #}

    the_cursor.close()
    the_database_connection.close()
示例#16
0
def writeDocumentListByDate( handle ): #{

  # Connect to the database and create a cursor
  the_database_connection = c.get_database_connection()
  the_cursor = the_database_connection.cursor() 

  write_inherit_and_title_block( handle )

  write_start_main_content( handle )

  handle.write( '<h2>List of medieval catalogues</h2>' )
  handle.write( newline )

  handle.write( '<div class="index">' )
  handle.write( newline + newline )

  # Write navigation by century
  centuries = [ '10', '11', '12', '13', '14', '15', '16', '17', 'undated' ]
  century_nav = ''
  for century in centuries: #{
    century_desc = get_century_desc( century )
    anchor_name = get_century_anchor( century )

    if century_nav != '': century_nav += ' | '
    century_nav += '<a href="#%s">%s</a>' % (anchor_name, century_desc )
  #}


  handle.write( '<h3 class="inline_heading">Overview by date</h3>' )
  handle.write( '{% if printing %}<br />{%else%}' )
  handle.write( ' | <a href="%s">Overview by provenance</a>' % medieval_catalogues_url )
  handle.write( '{% endif %}' )
  handle.write( newline + newline )

  statement  = "select document_code, document_name, "
  statement += " coalesce( start_date, '2000-01-01') as sort_start_date, "
  statement += " coalesce( end_date, '2000-01-01') as sort_end_date, "
  statement += " doc_group_type_name, doc_group_name "
  statement += " from index_medieval_documents_view where document_code > '' "
  statement += " order by sort_start_date, sort_end_date, document_code_sort"
  the_cursor.execute( statement )
  documents = the_cursor.fetchall()

  prev_type_code = ''
  prev_start_year = ''
  prev_century = 0
  century_desc = ''
  total_for_century = 0

  handle.write( '<div id="catalogues_by_date">' + newline )

  for document in documents: #{ 
    document_code = document[ 0 ] #document_code e.g. BC21
    document_name = document[ 1 ] #document_name e.g. 'Books read in the refectory 1473'
    start_date = document[ 2 ]
    end_date   = document[ 3 ]
    library_type = document[ 4 ] # e.g. Benedictines
    library_loc  = document[ 5 ] # e.g. Abbey of St Frideswide

    document_name = w.reformat( document_name )
    library_type  = w.reformat( library_type )
    library_loc   = w.reformat( library_loc )

    type_code = document_code[ 0 : 1 ] # this wouldn't work with 2-letter types e.g. BA
                                       # but we are only really interested in K and R
    start_year = start_date[ 0 : 4 ]
    if start_year.startswith( '0' ): start_year = start_year[ 1 : ]
    century = int( math.floor( int( start_year ) / 100 ) + 1 )

    print century, start_year, document_code, document_name

    if century != prev_century: #{
      if prev_century > 0: #{ 
        handle.write( '</td></tr></table>' + newline )
        write_total_for_century( handle, century_desc, total_for_century )
      #}

      century_desc = get_century_desc( century )
      anchor_name = get_century_anchor( century )
      handle.write( '<p><a name="%s"></a></p>' % anchor_name )
      write_century_nav( handle, century_nav )

      prev_century = century
      total_for_century = 0
      prev_type_code = ''

      handle.write( '<h4>%s</h4>' % century_desc )
      handle.write( newline )
      handle.write( '<table class="century" id="century%dtab">' % century )
      handle.write( newline )
    #}

    total_for_century += 1

    statement = "select count(*) from index_entry_copies where document_code = '%s'" \
              % document_code
    the_cursor.execute( statement )
    count_row = the_cursor.fetchone()
    num_catalogue_entries = count_row[ 0 ]

    # no need to keep repeating the same decode hundreds of times for K and R
    if type_code in inline_lists and type_code == prev_type_code: #{
      handle.write( ' &bull; ' )
      if num_catalogue_entries > 0: write_link_to_document( handle, document_code )
      handle.write( '%s (%d) '  % (document_code, num_catalogue_entries))
      if num_catalogue_entries > 0: handle.write( '</a>' + newline )
    #}

    else: #{  # not in middle of K or R, so write out a complete row for each entry
      if total_for_century > 1: handle.write( '</td></tr>' + newline )

      handle.write( '<tr><td>' + newline )
      if start_year in document_name: 
        handle.write( '<em>' + start_year + '</em>' + newline )

      handle.write( '</td><td>' + newline )
      handle.write( '%s' % library_type )
      if library_loc != library_type:
        handle.write( ': %s' % library_loc )

      handle.write( '</td><td>' + newline )
      if num_catalogue_entries > 0: write_link_to_document( handle, document_code )
      handle.write('%s. %s (%d)' % (document_code, document_name, num_catalogue_entries))
      if num_catalogue_entries > 0: handle.write( '</a>' + newline )

      # the final <td> gets finished off when you get to the next entry
    #}

    prev_type_code = type_code


    handle.write( newline );

  #}
  handle.write( '</td></tr></table>' + newline )

  write_total_for_century( handle, century_desc, total_for_century )

  handle.write( '</div><!-- end list of catalogues by date -->' + newline )
  write_century_nav( handle, century_nav )
  handle.write( '</div><!-- end div class "index" -->' )
  handle.write( newline + newline )

  write_end_main_content( handle )

  # Close your cursor and your connection
  the_cursor.close()
  the_database_connection.close()
示例#17
0
def setBookIDs():  #{
    the_database_connection = None
    the_cursor = None

    #=================================================================
    # Read each line of the original file, manipulate it as necessary,
    # and then write it into the new file.
    #=================================================================
    try:
        # Connect to the database and create a cursor
        the_database_connection = c.get_database_connection()
        the_cursor = the_database_connection.cursor()

        # Get details of the medieval catalogues in which an MLGB book appeared.
        select = "select id, medieval_catalogue from books_book where medieval_catalogue > ''"
        select += " order by id"
        the_cursor.execute(select)
        results = the_cursor.fetchall()

        # Clear out old data
        the_cursor.execute("TRUNCATE TABLE index_mlgb_links")

        # Insert the new data
        for row in results:  #{

            book_id = row[0]
            medieval_catalogue = row[1].strip()

            print "\n\n%s" % medieval_catalogue

            # Avoid confusion potentially caused by spaces in wrong place etc.
            medieval_catalogue = medieval_catalogue.replace('. ', '.')
            medieval_catalogue = medieval_catalogue.replace('?', '')
            medieval_catalogue = medieval_catalogue.replace('=', ' ')

            words = medieval_catalogue.split()
            for word in words:  #{
                catalogue_entries = []

                if not i.is_copy_code(word): continue

                word = word.strip()
                if word.endswith(','):
                    word = word[0:-1]  # take off any commas from the end

                document_code = i.get_document_code(word)
                seqno_in_document = i.get_seqno_in_document(word)

                if document_code.isalnum() and seqno_in_document.isdigit() \
                and int( seqno_in_document ) > 0: #{

                    catalogue_entries.append(seqno_in_document)

                else:  # some kind of incomplete or garbled entry - don't try to save it
                    continue
                #}

                if i.is_numeric_range(
                        word
                ):  #{ # need to generate sequence numbers for rest of range
                    rest_of_range = i.get_rest_of_numeric_range(word)

                    for int_seqno in rest_of_range:  #{
                        seqno_in_document = str(int_seqno)
                        catalogue_entries.append(seqno_in_document)
                    #}
                #}

                for seqno_in_document in catalogue_entries:  #{
                    print "%d: '%s' %s" % (book_id, document_code,
                                           seqno_in_document)

                    insert_statement = 'insert into index_mlgb_links '
                    insert_statement += '( mlgb_book_id, document_code, seqno_in_document ) values '
                    insert_statement += "( %d, '%s', %s )" % (
                        book_id, document_code, seqno_in_document)
                    the_cursor.execute(insert_statement)
                #}
            #}
        #}

        the_cursor.close()
        the_database_connection.close()

    except:
        if the_cursor: the_cursor.close()
        if the_database_connection: the_database_connection.close()
        raise
示例#18
0
def produceOutput(letter, handle):  #{

    # Connect to the database and create a cursor
    the_database_connection = c.get_database_connection()
    the_cursor = the_database_connection.cursor()

    #......................................

    # Start writing the template
    handle.write('{% extends "base.html" %}' + newline)
    handle.write('{% block title %}' + newline)
    handle.write('<title>MLGB3 Author/Title Index</title>' + newline)
    handle.write('{% endblock %}' + newline)

    #......................................

    # Write our own treeview expand/collapse function so that we can handle links
    # how we want rather than being constrained by the behaviour of the jQuery function.
    handle.write('{% block treeview %}' + newline)
    handle.write(get_expand_collapse_script() + newline)

    # Get entry/book IDs for use in expand/collapse script
    statement = 'select i.entry_id, b.entry_book_count from index_entries i, index_entry_books b '
    statement += " where i.entry_id = b.entry_id and i.letter = '%s' " % letter
    statement += " order by i.entry_id, b.entry_book_count"
    the_cursor.execute(statement)
    id_results = the_cursor.fetchall()
    ids_for_expand_collapse = []
    for row in id_results:  #{
        e_id_string = str(row[0])
        b_id_string = get_book_id_for_expand_collapse(row[0], row[1])
        if e_id_string not in ids_for_expand_collapse:
            ids_for_expand_collapse.append(e_id_string)
        ids_for_expand_collapse.append(b_id_string)
    #}

    # Write "expand/collapse all" scripts
    handle.write(get_expand_and_collapse_all_script(ids_for_expand_collapse))
    handle.write('{% endblock %}' + newline)  # end 'treeview' block

    #......................................

    # Start writing the main page content
    handle.write('{% block maincontent %}' + newline)
    handle.write('<div class="index">')
    handle.write(newline + newline)

    # Write page heading
    handle.write('<h2>Browse Author/Title Index: %s</h2>' % letter)
    handle.write(newline)

    # Write navigation by initial letter
    handle.write('{% if not printing %}<div class="letterlinks">')
    for possible_letter in letters_with_entries:  #{
        if possible_letter == 'I/J': possible_letter = 'IJ'
        selection_class = ''
        if possible_letter == letter: selection_class = ' class="selected" '
        handle.write( '<a href="%s/authortitle/browse/%s/" %s >%s</a>\n' \
                      % (if_editable, possible_letter, selection_class, possible_letter))
        if possible_letter != 'Z':
            handle.write('<span class="spacer"> </span>')
    #}
    handle.write('</div><!-- end div "letterlinks" -->{% endif %}')
    handle.write(newline + newline)

    if not letter:  #{ # just a menu of the letters available

        # Add the Advanced Search form here to fill up the blank space.
        handle.write("{% include 'includes/authortitle_adv_search.html' %}" +
                     newline)

        write_link_to_source_file(handle)
        handle.write('</div><!-- end div "index" -->')
        handle.write(newline + indexmenu + newline)
        write_end_of_page(handle)
        the_cursor.close()
        the_database_connection.close()
        return
    #}

    #......................................

    # Start writing author/title treeview
    handle.write('<div id="authortreecontrol">' + newline)
    handle.write('{% if not printing %}')
    handle.write(
        '<span class="like_a_link" onclick="collapseAllEntries()">Collapse All</span> '
    )
    handle.write(' | ')
    handle.write(
        '<span class="like_a_link" onclick="expandAllEntries()">Expand All</span> '
    )
    handle.write('{% endif %}')
    handle.write(newline)
    handle.write('</div>' + newline)

    handle.write('<ul class="authortreeview" id="authortree">' + newline)

    #......................................

    # Get entry details for main display
    statement = "select * from index_entries where letter = '%s' order by entry_id" % letter
    the_cursor.execute(statement)
    entry_results = the_cursor.fetchall()

    # Start writing the main display of results
    for entry_row in entry_results:  #{ # id, letter, name, xref name, biblio line, biblio block
        entry_id = entry_row[0]
        # we already know letter
        primary_name = reformat(entry_row[2])
        xref_name = reformat(entry_row[3])
        entry_bib_line = reformat(entry_row[4])
        entry_bib_block = reformat(entry_row[5])

        # Get a version of the index entry without HTML entities, for use in title displayed on hover
        hover_primary_name = strip_html_for_hover(primary_name)
        prev_problem = ''

        # Get the books belonging to this entry
        statement = "select * from index_entry_books where entry_id = %d order by entry_book_count" \
                  % entry_id
        the_cursor.execute(statement)
        book_results = the_cursor.fetchall()
        num_books = len(book_results)
        num_catalogue_entries = 0

        # Start writing out the entry
        handle.write(newline + newline)
        handle.write('<!-- Start new entry "%s", entry ID %d -->' %
                     (primary_name.strip(), entry_id))
        handle.write(newline + '<a name="entry%d_anchor"></a>' % entry_id)
        handle.write(newline)
        handle.write('<li class="outerhead">' + newline)

        is_expandable = False
        if num_books > 0 or entry_bib_line or entry_bib_block:
            is_expandable = True

        if is_expandable:  #{
            handle.write(get_expand_collapse_button(entry_id, '+'))
            handle.write(
                get_expand_collapse_span(entry_id, primary_name, 'outerhead'))
        #}
        else:
            handle.write('<span class="outerhead">%s</span>' %
                         primary_name.strip())

        if xref_name: handle.write(' %s %s' % (right_arrow, xref_name))

        if is_expandable:  #{

            if num_books == 1:  #{ # could be just a dummy book, i.e. this is an entry by title not author
                title_of_book = book_results[0][3].strip()
                if not title_of_book: num_books = 0
            #}

            # This 'bibliography' section should be invisible when tree is fully collapsed.
            biblio_block_id = 'biblio_' + get_outer_div_for_expand_collapse(
                entry_id)
            handle.write('<div id="%s" class="author_biblio_block" ' %
                         biblio_block_id)
            handle.write(
                'style="display:{%if printing%}block{%else%}none{%endif%}">')

            if entry_bib_line:  #{
                handle.write(entry_bib_line + newline)
            #}

            if entry_bib_line.strip() and entry_bib_block.strip():  #{
                handle.write(linebreak + newline)
            #}

            if entry_bib_block:  #{
                handle.write(entry_bib_block + newline)
            #}
            handle.write('</div>')

            # An entry by title will still have medieval catalogue entries
            statement = "select count(*) from index_entry_copies where entry_id = %d" % entry_id
            the_cursor.execute(statement)
            total_row = the_cursor.fetchone()
            num_catalogue_entries = total_row[0]

            if num_books or num_catalogue_entries:  #{
                handle.write(newline + '<div class="totals">' + newline)
                totals_string = ''
                if num_catalogue_entries:  #{
                    if num_catalogue_entries == 1:
                        catcount_desc = 'catalogue entry'
                    else:
                        catcount_desc = 'catalogue entries'
                    totals_string = '%d %s' % (num_catalogue_entries,
                                               catcount_desc)
                #}
                if num_books:  #{
                    if num_books == 1:
                        bookcount_desc = 'book'
                    else:
                        bookcount_desc = 'books'
                    totals_string += ' (%d %s)' % (num_books, bookcount_desc)
                #}
                handle.write(
                    get_expand_collapse_span(entry_id, totals_string,
                                             'outer_subhead totals'))
                handle.write('</div><!-- end "totals" div -->')
            #}

            # Begin the section that expands and collapses
            # i.e. generally the author name with a hidden list of books below.
            handle.write( newline + '<div id="%s" class="expand_entry" ' \
                                  % get_outer_div_for_expand_collapse( entry_id ) )
            handle.write(
                ' style="display:{%if printing%}block{%else%}none{%endif%}">')

            # If there are multiple books for one author, provide link to expand/collapse them all at once
            if num_books > 1:  #{
                if num_books == 2: expand_collapse_msg = 'both'
                else: expand_collapse_msg = 'all %d' % num_books
                expand_collapse_msg = ' %s books' % expand_collapse_msg

                book_ids = []
                for book in book_results:  #{
                    entry_book_count = book[1]
                    book_ids.append(
                        get_book_id_for_expand_collapse(
                            entry_id, entry_book_count))
                #}

                handle.write('<script type="text/javascript">' + newline)

                handle.write('  function expand_books_for_entry_%d() { ' %
                             entry_id)
                handle.write(newline)
                for book_id in book_ids:  #{
                    handle.write("    expand_or_collapse( '%s', '+', 2 );" %
                                 book_id)
                    handle.write(newline)
                #}
                handle.write('  }' + newline)

                handle.write('  function collapse_books_for_entry_%d() { ' %
                             entry_id)
                handle.write(newline)
                for book_id in book_ids:  #{
                    handle.write("  expand_or_collapse( '%s', '-', 2 );" %
                                 book_id)
                    handle.write(newline)
                #}
                handle.write('  }' + newline)

                handle.write('</script>' + newline)

                handle.write('{% if not printing %}')
                handle.write(
                    '<span class="like_a_link" onclick="collapse_books_for_entry_%d()">'
                    % entry_id)
                handle.write('Collapse %s</span> ' % expand_collapse_msg)
                handle.write(' | ')
                handle.write(
                    '<span class="like_a_link" onclick="expand_books_for_entry_%d()">'
                    % entry_id)
                handle.write('Expand %s</span> ' % expand_collapse_msg)
                handle.write(linebreak + linebreak + newline)
                handle.write('{% endif %}')
            #}

            # Now start writing out the list of books
            handle.write(newline + '<ul><!-- start list of books -->' +
                         newline)
        #}

        for book in book_results:  #{ 0: entry_id, 1: entry_book_count, 2: role_in_book
            #  3: title_of_book, 4: book_biblio_line, 5: xref_title_of_book
            #  6: copies, 7: problem

            entry_book_count = book[1]
            role_in_book = reformat(book[2])
            title_of_book = reformat(book[3])
            book_biblio_line = reformat(book[4])
            xref_title_of_book = reformat(book[5])
            copies = reformat(book[6], preserve_linebreaks=True)
            problem = reformat(book[7])

            # Get a version of the book title without HTML entities, for use in title displayed on hover
            hover_title_of_book = strip_html_for_hover(title_of_book)

            if problem != prev_problem:  #{
                handle.write(newline)
                handle.write('<p>%s</p>' % problem)
                handle.write(newline)
                prev_problem = problem
            #}

            handle.write(newline + '<li>')
            handle.write('<!-- start entry ID %d, book %d -->' %
                         (entry_id, entry_book_count))
            handle.write(newline)

            statement = "select copy_code, copy_notes, document_name, doc_group_name, doc_group_type_name, "
            statement += " document_code, seqno_in_document, copy_count "
            statement += " from index_entry_copies where entry_id = %d " % entry_id
            statement += " and entry_book_count = %d order by copy_count" % entry_book_count
            the_cursor.execute(statement)
            copy_results = the_cursor.fetchall()
            prev_copy_code = ''

            first_line_of_book_entry = role_in_book
            first_line_of_book_entry += title_of_book
            if book_biblio_line:
                first_line_of_book_entry += ": %s" % book_biblio_line
            if xref_title_of_book:
                first_line_of_book_entry += "%s %s" % (right_arrow,
                                                       xref_title_of_book)

            # Write out the first line for this book, initially with the catalogue entries under it hidden
            book_id_for_expand_collapse = get_book_id_for_expand_collapse(
                entry_id, entry_book_count)

            if first_line_of_book_entry > '' and len(copy_results) > 0:  #{
                handle.write(
                    get_expand_collapse_button(book_id_for_expand_collapse,
                                               '+'))
                handle.write( get_expand_collapse_span( book_id_for_expand_collapse, \
                                                        first_line_of_book_entry, 'innerhead' ) )
            #}
            else:
                handle.write(first_line_of_book_entry)

            if len(copy_results) > 0:  #{
                if first_line_of_book_entry > '':
                    initial_display_style = '{%if printing%}block{%else%}none{%endif%}'
                else:
                    initial_display_style = 'block'

                handle.write( newline + '<table id="entry%s_tab" style="display:%s" ' \
                              % (book_id_for_expand_collapse, initial_display_style) )
                handle.write(' class="catalogue_entries">')
                handle.write(newline)
                handle.write(
                    '<tr class="catalogue_entry_head"><td>Catalogue entry</td>'
                )
                handle.write('<td>Catalogue</td></tr>')
                handle.write(newline)

                for one_copy in copy_results:  #{

                    copy_code = reformat(one_copy[0])
                    copy_notes = reformat(one_copy[1])
                    document_name = reformat(one_copy[2])
                    doc_group_name = reformat(one_copy[3])
                    doc_group_type_name = reformat(one_copy[4])
                    document_code = one_copy[5]
                    seqno_in_document = one_copy[6]
                    copy_count = one_copy[7]

                    if copy_code == prev_copy_code: continue
                    prev_copy_code = copy_code

                    hover_title = hover_primary_name
                    if hover_title_of_book:
                        hover_title += ' ' + em_dash + ' ' + hover_title_of_book

                    # See if we have got any links to the actual MLGB database
                    mlgb_links = []
                    if seqno_in_document == None: seqno_in_document = '0'

                    statement = "select mlgb_book_id from index_mlgb_links "
                    statement += " where document_code = '%s' and seqno_in_document = %s " \
                               % (document_code, seqno_in_document)
                    statement += " and seqno_in_document > 0 order by mlgb_book_id"
                    the_cursor.execute(statement)
                    mlgb_links = the_cursor.fetchall()

                    handle.write( newline + '<!-- start entry %d, book %d, copy %d -->' \
                                  % (entry_id, entry_book_count, copy_count) )
                    handle.write(newline + '<tr class="catalogue_entry">' +
                                 newline)

                    handle.write('<td class="catalogue_entry_code">')
                    handle.write( get_copy_code_and_desc( copy_code, seqno_in_document, copy_notes, \
                                                          hover_title, mlgb_links ) )
                    handle.write('</td>')
                    handle.write(newline)

                    handle.write('<td class="catalogue_name">')
                    handle.write(newline)
                    handle.write(
                        '<a href="%s%s/%s"' %
                        (if_editable, medieval_catalogues_url, document_code))
                    handle.write(' title="Further details of catalogue %s" ' %
                                 document_code)
                    handle.write(' class="link_to_catalogue" >')
                    if doc_group_type_name:
                        handle.write(doc_group_type_name)
                    if doc_group_name:  #{
                        if not doc_group_type_name.endswith(
                                doc_group_name):  #{
                            handle.write(': %s' % doc_group_name)
                        #}
                    #}
                    if document_name: handle.write(': %s' % document_name)
                    handle.write('</a>')
                    handle.write('</td>')
                    handle.write(newline)

                    handle.write(newline + '</tr>')
                #}

                handle.write(newline + '</table>' + newline)
            #}

            handle.write('</li><!-- end of one book -->')
            handle.write(
                newline + newline
            )  # make it a bit clearer by having a proper gap between books
        #}

        if is_expandable:  #{
            handle.write(newline +
                         '</ul><!-- end list of books for one author -->' +
                         newline)
            handle.write(
                newline +
                '</div><!-- end outer expandable/collapsible section -->' +
                newline)
        #}

        handle.write(newline + '</li><!-- end outerhead list item -->')
        handle.write(newline + '<!-- end entry ID %d (%s) -->' %
                     (entry_id, primary_name.strip()))
    #}

    handle.write('</ul><!-- end tree -->' + newline)

    write_link_to_source_file(handle)
    handle.write('</div><!-- end div class "index" -->')
    handle.write(newline)

    handle.write(newline + indexmenu + newline)

    handle.write(
        newline +
        '{% if printing %}<script>window.print();</script>{% endif %}' +
        newline)
    write_end_of_page(handle)

    # Close your cursor and your connection
    the_cursor.close()
    the_database_connection.close()
示例#19
0
def produceOutput( letter, handle ): #{

  # Connect to the database and create a cursor
  the_database_connection = c.get_database_connection()
  the_cursor = the_database_connection.cursor() 

  #......................................

  # Start writing the template
  handle.write( '{% extends "base.html" %}'               + newline )
  handle.write( '{% block title %}'                       + newline )
  handle.write( '<title>MLGB3 Author/Title Index</title>' + newline )
  handle.write( '{% endblock %}'                          + newline )

  #......................................

  # Write our own treeview expand/collapse function so that we can handle links
  # how we want rather than being constrained by the behaviour of the jQuery function.
  handle.write( '{% block treeview %}'                    + newline )
  handle.write( get_expand_collapse_script()              + newline )

  # Get entry/book IDs for use in expand/collapse script
  statement = 'select i.entry_id, b.entry_book_count from index_entries i, index_entry_books b '
  statement += " where i.entry_id = b.entry_id and i.letter = '%s' " % letter
  statement += " order by i.entry_id, b.entry_book_count"
  the_cursor.execute( statement )
  id_results = the_cursor.fetchall()
  ids_for_expand_collapse = []
  for row in id_results: #{
    e_id_string = str( row[ 0 ] )
    b_id_string = get_book_id_for_expand_collapse( row[ 0 ], row[ 1 ] )
    if e_id_string not in ids_for_expand_collapse:
      ids_for_expand_collapse.append( e_id_string )
    ids_for_expand_collapse.append( b_id_string )
  #}

  # Write "expand/collapse all" scripts
  handle.write( get_expand_and_collapse_all_script( ids_for_expand_collapse ))
  handle.write( '{% endblock %}' + newline )  # end 'treeview' block

  #......................................

  # Start writing the main page content
  handle.write( '{% block maincontent %}' + newline )
  handle.write( '<div class="index">' )
  handle.write( newline + newline )


  # Write page heading
  handle.write( '<h2>Browse Author/Title Index: %s</h2>' % letter )
  handle.write( newline )

  # Write navigation by initial letter
  handle.write( '{% if not printing %}<div class="letterlinks">' )
  for possible_letter in letters_with_entries: #{
    if possible_letter == 'I/J': possible_letter = 'IJ'
    selection_class = ''
    if possible_letter == letter: selection_class = ' class="selected" '
    handle.write( '<a href="%s/authortitle/browse/%s/" %s >%s</a>\n' \
                  % (if_editable, possible_letter, selection_class, possible_letter))
    if possible_letter != 'Z': handle.write( '<span class="spacer"> </span>' )
  #}
  handle.write( '</div><!-- end div "letterlinks" -->{% endif %}' )
  handle.write( newline + newline )

  if not letter: #{ # just a menu of the letters available

    # Add the Advanced Search form here to fill up the blank space.
    handle.write( "{% include 'includes/authortitle_adv_search.html' %}" + newline )

    write_link_to_source_file( handle )
    handle.write( '</div><!-- end div "index" -->' )
    handle.write( newline + indexmenu + newline)
    write_end_of_page( handle )
    the_cursor.close()
    the_database_connection.close()
    return
  #}

  #......................................

  # Start writing author/title treeview
  handle.write( '<div id="authortreecontrol">' + newline )
  handle.write( '{% if not printing %}' )
  handle.write( '<span class="like_a_link" onclick="collapseAllEntries()">Collapse All</span> ' )
  handle.write( ' | ' )
  handle.write( '<span class="like_a_link" onclick="expandAllEntries()">Expand All</span> ' )
  handle.write( '{% endif %}' )
  handle.write( newline )
  handle.write( '</div>' + newline )

  handle.write( '<ul class="authortreeview" id="authortree">' + newline )

  #......................................

  # Get entry details for main display 
  statement = "select * from index_entries where letter = '%s' order by entry_id" % letter
  the_cursor.execute( statement )
  entry_results = the_cursor.fetchall()

  # Start writing the main display of results
  for entry_row in entry_results: #{ # id, letter, name, xref name, biblio line, biblio block
    entry_id = entry_row[ 0 ]
    # we already know letter
    primary_name    = reformat( entry_row[ 2 ] )
    xref_name       = reformat( entry_row[ 3 ] )
    entry_bib_line  = reformat( entry_row[ 4 ] )
    entry_bib_block = reformat( entry_row[ 5 ] )

    # Get a version of the index entry without HTML entities, for use in title displayed on hover
    hover_primary_name = strip_html_for_hover( primary_name )
    prev_problem = ''

    # Get the books belonging to this entry
    statement = "select * from index_entry_books where entry_id = %d order by entry_book_count" \
              % entry_id
    the_cursor.execute( statement )
    book_results = the_cursor.fetchall()
    num_books = len( book_results )
    num_catalogue_entries = 0

    # Start writing out the entry
    handle.write( newline + newline )
    handle.write( '<!-- Start new entry "%s", entry ID %d -->' % (primary_name.strip(), entry_id) )
    handle.write( newline + '<a name="entry%d_anchor"></a>' % entry_id )
    handle.write( newline )
    handle.write( '<li class="outerhead">' + newline )

    is_expandable = False
    if num_books > 0 or entry_bib_line or entry_bib_block: 
      is_expandable = True

    if is_expandable: #{
      handle.write( get_expand_collapse_button( entry_id, '+' ) )
      handle.write( get_expand_collapse_span( entry_id, primary_name, 'outerhead' ) )
    #}
    else:
      handle.write( '<span class="outerhead">%s</span>' % primary_name.strip() )

    if xref_name: handle.write( ' %s %s' % (right_arrow, xref_name) )

    if is_expandable: #{

      if num_books == 1: #{ # could be just a dummy book, i.e. this is an entry by title not author
        title_of_book = book_results[ 0 ][ 3 ].strip()
        if not title_of_book: num_books = 0
      #}

      # This 'bibliography' section should be invisible when tree is fully collapsed.
      biblio_block_id = 'biblio_' + get_outer_div_for_expand_collapse( entry_id )
      handle.write( '<div id="%s" class="author_biblio_block" ' % biblio_block_id )
      handle.write( 'style="display:{%if printing%}block{%else%}none{%endif%}">' ) 

      if entry_bib_line: #{
        handle.write( entry_bib_line + newline )
      #}

      if entry_bib_line.strip() and entry_bib_block.strip(): #{
        handle.write( linebreak + newline )
      #}

      if entry_bib_block: #{
        handle.write( entry_bib_block + newline )
      #}
      handle.write( '</div>' )


      # An entry by title will still have medieval catalogue entries
      statement = "select count(*) from index_entry_copies where entry_id = %d" % entry_id
      the_cursor.execute( statement )
      total_row = the_cursor.fetchone()
      num_catalogue_entries = total_row[ 0 ]
        
      if num_books or num_catalogue_entries: #{
        handle.write( newline + '<div class="totals">' + newline )
        totals_string = ''
        if num_catalogue_entries:  #{
          if num_catalogue_entries == 1:
            catcount_desc = 'catalogue entry'
          else:
            catcount_desc = 'catalogue entries'
          totals_string = '%d %s' % (num_catalogue_entries, catcount_desc)
        #}
        if num_books: #{
          if num_books == 1:
            bookcount_desc = 'book'
          else:
            bookcount_desc = 'books'
          totals_string += ' (%d %s)' % (num_books, bookcount_desc) 
        #}
        handle.write( get_expand_collapse_span( entry_id, totals_string, 'outer_subhead totals' ))
        handle.write( '</div><!-- end "totals" div -->' )
      #}

      # Begin the section that expands and collapses
      # i.e. generally the author name with a hidden list of books below.
      handle.write( newline + '<div id="%s" class="expand_entry" ' \
                            % get_outer_div_for_expand_collapse( entry_id ) )
      handle.write( ' style="display:{%if printing%}block{%else%}none{%endif%}">' )

      # If there are multiple books for one author, provide link to expand/collapse them all at once
      if num_books > 1: #{
        if num_books == 2: expand_collapse_msg = 'both'
        else: expand_collapse_msg = 'all %d' % num_books
        expand_collapse_msg = ' %s books' % expand_collapse_msg

        book_ids = []
        for book in book_results: #{
          entry_book_count = book[ 1 ]
          book_ids.append( get_book_id_for_expand_collapse( entry_id, entry_book_count ) )
        #}

        handle.write( '<script type="text/javascript">' + newline )

        handle.write( '  function expand_books_for_entry_%d() { ' % entry_id )
        handle.write( newline )
        for book_id in book_ids: #{
          handle.write( "    expand_or_collapse( '%s', '+', 2 );" % book_id )
          handle.write( newline )
        #}
        handle.write( '  }' + newline )

        handle.write( '  function collapse_books_for_entry_%d() { ' % entry_id )
        handle.write( newline )
        for book_id in book_ids: #{
          handle.write( "  expand_or_collapse( '%s', '-', 2 );" % book_id )
          handle.write( newline )
        #}
        handle.write( '  }' + newline )

        handle.write( '</script>' + newline )

        handle.write( '{% if not printing %}' )
        handle.write( '<span class="like_a_link" onclick="collapse_books_for_entry_%d()">' % entry_id )
        handle.write( 'Collapse %s</span> ' % expand_collapse_msg )
        handle.write( ' | ' )
        handle.write( '<span class="like_a_link" onclick="expand_books_for_entry_%d()">' % entry_id )
        handle.write( 'Expand %s</span> ' % expand_collapse_msg )
        handle.write( linebreak + linebreak + newline )
        handle.write( '{% endif %}' )
      #}

      # Now start writing out the list of books
      handle.write( newline + '<ul><!-- start list of books -->' + newline )
    #}

    for book in book_results: #{ 0: entry_id, 1: entry_book_count, 2: role_in_book 
                              #  3: title_of_book, 4: book_biblio_line, 5: xref_title_of_book
                              #  6: copies, 7: problem

      entry_book_count   = book[ 1 ]
      role_in_book       = reformat( book[ 2 ] )
      title_of_book      = reformat( book[ 3 ] )
      book_biblio_line   = reformat( book[ 4 ] )
      xref_title_of_book = reformat( book[ 5 ] )
      copies             = reformat( book[ 6 ], preserve_linebreaks = True )
      problem            = reformat( book[ 7 ] )

      # Get a version of the book title without HTML entities, for use in title displayed on hover
      hover_title_of_book = strip_html_for_hover( title_of_book )

      if problem != prev_problem: #{
        handle.write( newline )
        handle.write( '<p>%s</p>' % problem )
        handle.write( newline )
        prev_problem = problem
      #}

      handle.write( newline + '<li>' )
      handle.write( '<!-- start entry ID %d, book %d -->' % (entry_id, entry_book_count) )
      handle.write( newline )

      statement = "select copy_code, copy_notes, document_name, doc_group_name, doc_group_type_name, " 
      statement += " document_code, seqno_in_document, copy_count "
      statement += " from index_entry_copies where entry_id = %d " % entry_id
      statement += " and entry_book_count = %d order by copy_count" % entry_book_count
      the_cursor.execute( statement )
      copy_results = the_cursor.fetchall()
      prev_copy_code = ''

      first_line_of_book_entry = role_in_book
      first_line_of_book_entry += title_of_book
      if book_biblio_line: first_line_of_book_entry += ": %s" % book_biblio_line
      if xref_title_of_book: first_line_of_book_entry += "%s %s" % (right_arrow, xref_title_of_book)

      # Write out the first line for this book, initially with the catalogue entries under it hidden
      book_id_for_expand_collapse = get_book_id_for_expand_collapse( entry_id, entry_book_count )

      if first_line_of_book_entry > '' and len( copy_results ) > 0: #{
        handle.write( get_expand_collapse_button( book_id_for_expand_collapse, '+' ) )
        handle.write( get_expand_collapse_span( book_id_for_expand_collapse, \
                                                first_line_of_book_entry, 'innerhead' ) )
      #}
      else:
        handle.write( first_line_of_book_entry )

      if len( copy_results ) > 0: #{
        if first_line_of_book_entry > '':
          initial_display_style = '{%if printing%}block{%else%}none{%endif%}'
        else:
          initial_display_style = 'block'

        handle.write( newline + '<table id="entry%s_tab" style="display:%s" ' \
                      % (book_id_for_expand_collapse, initial_display_style) )
        handle.write( ' class="catalogue_entries">' )
        handle.write( newline )
        handle.write( '<tr class="catalogue_entry_head"><td>Catalogue entry</td>' )
        handle.write( '<td>Catalogue</td></tr>' )
        handle.write( newline )

        for one_copy in copy_results: #{

          copy_code           = reformat( one_copy[ 0 ] )
          copy_notes          = reformat( one_copy[ 1 ] )
          document_name       = reformat( one_copy[ 2 ] )
          doc_group_name      = reformat( one_copy[ 3 ] )
          doc_group_type_name = reformat( one_copy[ 4 ] )
          document_code       = one_copy[ 5 ]
          seqno_in_document   = one_copy[ 6 ]
          copy_count          = one_copy[ 7 ]

          if copy_code == prev_copy_code: continue
          prev_copy_code = copy_code

          hover_title = hover_primary_name
          if hover_title_of_book: hover_title += ' ' + em_dash + ' ' + hover_title_of_book

          # See if we have got any links to the actual MLGB database
          mlgb_links = []
          if seqno_in_document == None: seqno_in_document = '0'

          statement  = "select mlgb_book_id from index_mlgb_links "
          statement += " where document_code = '%s' and seqno_in_document = %s " \
                     % (document_code, seqno_in_document)
          statement += " and seqno_in_document > 0 order by mlgb_book_id" 
          the_cursor.execute( statement )
          mlgb_links = the_cursor.fetchall()

          handle.write( newline + '<!-- start entry %d, book %d, copy %d -->' \
                        % (entry_id, entry_book_count, copy_count) )
          handle.write( newline + '<tr class="catalogue_entry">' + newline )

          handle.write( '<td class="catalogue_entry_code">' )
          handle.write( get_copy_code_and_desc( copy_code, seqno_in_document, copy_notes, \
                                                hover_title, mlgb_links ) )
          handle.write( '</td>' )
          handle.write( newline )

          handle.write( '<td class="catalogue_name">' )
          handle.write( newline )
          handle.write( '<a href="%s%s/%s"' % (if_editable, medieval_catalogues_url, document_code))
          handle.write( ' title="Further details of catalogue %s" ' % document_code )
          handle.write( ' class="link_to_catalogue" >' )
          if doc_group_type_name:
            handle.write( doc_group_type_name )
          if doc_group_name: #{
            if not doc_group_type_name.endswith( doc_group_name ): #{
              handle.write( ': %s' % doc_group_name )
            #}
          #}
          if document_name: handle.write( ': %s' % document_name )
          handle.write( '</a>' )
          handle.write( '</td>' )
          handle.write( newline )

          handle.write( newline + '</tr>' )
        #}

        handle.write( newline + '</table>' + newline )
      #}

      handle.write( '</li><!-- end of one book -->' )
      handle.write( newline + newline ) # make it a bit clearer by having a proper gap between books
    #}

    if is_expandable: #{
      handle.write( newline + '</ul><!-- end list of books for one author -->' + newline )
      handle.write( newline + '</div><!-- end outer expandable/collapsible section -->' + newline )
    #}

    handle.write( newline + '</li><!-- end outerhead list item -->' )
    handle.write( newline + '<!-- end entry ID %d (%s) -->' % (entry_id, primary_name.strip()) )
  #}

  handle.write( '</ul><!-- end tree -->' + newline )


  write_link_to_source_file( handle )
  handle.write( '</div><!-- end div class "index" -->' )
  handle.write( newline )

  handle.write( newline + indexmenu + newline)

  handle.write( newline + '{% if printing %}<script>window.print();</script>{% endif %}' + newline )
  write_end_of_page( handle )

  # Close your cursor and your connection
  the_cursor.close()
  the_database_connection.close()
示例#20
0
def get_index_by_modern_location(): #{

  html = newline + "<ul><!-- start list of modern locations -->" + newline

  # Connect to the database and create a cursor
  the_database_connection = c.get_database_connection()
  the_cursor = the_database_connection.cursor() 

  statement = "select p.provenance, ml1.modern_location_1, ml2.modern_location_2, "
  statement += " b.shelfmark_1, b.shelfmark_2, p.institution "
  statement += " from books_provenance p, "
  statement += " books_modern_location_1 ml1, "
  statement += " books_modern_location_2 ml2, "
  statement += " books_book b  "
  statement += " where b.provenance_id = p.id "
  statement += " and b.modern_location_1_id = ml1.id "
  statement += " and b.modern_location_2_id = ml2.id "
  statement += " order by lower( replace( modern_location_1, 'St ', 'Saint ' ) ), " 
  statement += " lower( replace( modern_location_2, 'St ', 'Saint ' ) ), " 
  statement += " b.shelfmark_sort, b.id" 
  the_cursor.execute( statement )
  loc_results = the_cursor.fetchall()

  prev_location = ''
  prev_shelfmark1 = ''

  for loc in loc_results: #{
    provenance = loc[ 0 ].strip()
    location1  = loc[ 1 ].strip()
    location2  = loc[ 2 ].strip()
    shelfmark1 = loc[ 3 ].strip()
    shelfmark2 = loc[ 4 ].strip()
    inst       = loc[ 5 ].strip()

    location = location1
    if location2 and not location1.endswith( ',' ): location += ', '
    location += location2
    
    if location != prev_location: #{
      if prev_location: html += '</table></li><!-- end modern location -->' + newline
      prev_location = location
      prev_shelfmark1 = ''
      html += '<li><!-- start modern location -->' + newline
      html += '<h3>' + location + '</h3>' + newline
      html += '<table>' + newline
    #}

    html += '<tr>' + newline

    html += '<td>' 
    #if shelfmark1 != prev_shelfmark1: html += shelfmark1
    #html += '</td>' + newline

    #html += '<td>' 
    #html += shelfmark2
    html += "%s %s" % (shelfmark1, shelfmark2)
    html += '</td>' + newline

    html += '<td>' 
    html += '<i>see</i> %s, <i>%s</i>' % (provenance.upper(), inst)
    html += '</td>' + newline

    html += '</tr>' + newline

    prev_shelfmark1 = shelfmark1
  #}

  html += '</table></li><!-- end modern location -->' + newline
  html += newline + "</ul><!-- end list of modern locations -->" + newline

  # Close your cursor and your connection
  the_cursor.close()
  the_database_connection.close()

  return html
示例#21
0
def get_list_of_surviving_books(): #{

  html = "<ul><!-- start list of provenances -->" + newline + newline

  # Connect to the database and create a cursor
  the_database_connection = c.get_database_connection()
  the_cursor = the_database_connection.cursor() 

  statement = "select provenance, county, institution, cells, notes, id "
  statement += " from books_provenance "
  statement += " order by lower( replace( provenance, 'St ', 'Saint ' ) )"
  the_cursor.execute( statement )
  prov_results = the_cursor.fetchall()

  for prov in prov_results: #{
    html += linebreak + '<li><!-- start provenance -->' + newline

    provenance  = prov[ 0 ].strip().upper()
    county      = prov[ 1 ].strip()
    institution = prov[ 2 ].strip()
    cells       = prov[ 3 ].strip()
    notes       = prov[ 4 ].strip()
    prov_id     = prov[ 5 ]

    cells = cells.replace( blank_paragraph, '' ).strip()
    notes = notes.replace( blank_paragraph, '' ).strip()
     
    html += provenance

    if county: #{
      if not html.endswith( ',' ): html += ','
      html += ' ' + county
    #}

    if institution: #{
      if not html.endswith( ',' ): html += ','
      html += ' <i>' + institution + '</i>'
    #}

    html += newline

    if notes or cells: #{
      html += '<div><small>' + newline
      if notes: html += notes + newline
      if cells: html += cells + newline
      html += '</small></div>' + newline
    #}

    statement = "select distinct ml1.modern_location_1, ml1.id "
    statement += " from books_book b, books_modern_location_1 ml1 "
    statement += " where b.modern_location_1_id = ml1.id and b.provenance_id = %d " % prov_id
    statement += " order by lower( replace(modern_location_1, 'St ', 'Saint ') )"
    the_cursor.execute( statement )
    modern_city_results = the_cursor.fetchall()
    if len( modern_city_results ) > 0: #{
      html += '<ul><!-- start list of modern locations -->' + newline
      for modern_city in modern_city_results: #{
        modern_city_name = modern_city[ 0 ]
        modern_city_id   = modern_city[ 1 ]
        html += '<li><!-- start modern location 1 (city) -->' + newline
        html += modern_city_name

        statement = "select ml2.modern_location_2, b.shelfmark_1, b.shelfmark_2, b.evidence_id, "
        statement += " b.author_title, b.date, b.pressmark, b.medieval_catalogue, b.unknown "
        statement += " from books_book b, books_modern_location_2 ml2 "
        statement += " where b.modern_location_2_id = ml2.id " 
        statement += " and b.provenance_id = %d " % prov_id
        statement += " and b.modern_location_1_id = %d" % modern_city_id
        statement += " order by lower( replace( modern_location_2, 'St ', 'Saint ' ) ),"
        statement += " shelfmark_sort, b.id"
        the_cursor.execute( statement )
        modern_library_results = the_cursor.fetchall()
        if len( modern_library_results ) > 0: #{
          html += newline + '<ul><!-- start list of books and their modern libraries -->' + newline

          for book in modern_library_results: #{
            modern_library     = book[ 0 ].strip()
            shelfmark_1        = book[ 1 ].strip()
            shelfmark_2        = book[ 2 ].strip()
            evidence_code      = book[ 3 ].strip()
            author_title       = book[ 4 ].strip()
            date               = book[ 5 ].strip()
            pressmark          = book[ 6 ].strip()
            medieval_catalogue = book[ 7 ].strip()
            unknown            = book[ 8 ].strip()
          #}

          shelfmark = "%s %s" % (shelfmark_1, shelfmark_2)
          shelfmark = shelfmark.strip()
          if shelfmark and not shelfmark.endswith( '.' ): shelfmark += '.'
 
          if evidence_code: evidence_code = '<i>%s</i>' % evidence_code

          if date and not date.endswith( '.' ): date += '.'

          pressmark = pressmark.replace( '<p>', '' )
          pressmark = pressmark.replace( '</p>', '' )
          pressmark = pressmark.strip()
          if pressmark and not pressmark.endswith( '.' ): pressmark += '.'

          if medieval_catalogue: medieval_catalogue = "[%s]" % medieval_catalogue

          if unknown and not unknown.endswith( '.' ) and not unknown.endswith( '?' ): unknown += '.'

          html += '<li><!-- start one book -->' + newline
          html += '<b>%s</b>%s' % (modern_library, two_spaces)

          html += "%s " % shelfmark
          html += "%s"  % evidence_code
          html += "%s " % author_title
          html += "%s " % date
          html += "%s " % pressmark
          html += "%s " % medieval_catalogue
          html += "%s " % unknown

          html += newline
          html += '</li><!-- end one book -->' + newline

          html += '</ul><!-- end list of books and their modern libraries -->' + newline
        #}
        html += '</li><!-- end modern location 1 (city) -->' + newline
      #}
      html += '</ul><!-- end list of modern locations -->' + newline
    #}

    html += '</li><!-- end provenance -->' + newline + newline
  #}

  html += newline + "</ul><!-- end list of provenances -->" + newline

  # Close your cursor and your connection
  the_cursor.close()
  the_database_connection.close()

  return html
示例#22
0
def stripComments(): #{

  the_database_connection = None
  the_cursor = None

  #=================================================================
  # Read each line of the original file, manipulate it as necessary,
  # and then write it into the new file.
  #=================================================================
  try:
    # Connect to the database and create a cursor
    the_database_connection = c.get_database_connection()
    the_cursor = the_database_connection.cursor() 

    # Look at all the text fields that could contain unwanted XML
    for table_name, field_names in text_fields.items(): #{
      #print newline + table_name + newline

      for field in field_names: #{
        #print newline + table_name + ': ' + field + newline

        select = "select id, %s from %s where %s like '%s%s%s%s%s'" \
               % (field, table_name, field, percent, comment_start, percent, comment_end, percent)
        select += " order by id"
        the_cursor.execute( select )
        results = the_cursor.fetchall()

        # Check each value for XML/HTML comments
        for row in results: #{
          row_id = row[ 0 ]
          text_value = row[ 1 ]

          print ''
          print ''
          print '======================================='
          print table_name, field, 'ID', row_id
          print '======================================='
          print ''

          print '==== RAW VALUE, ID %d ====' % row_id
          print text_value
          print '==== end RAW VALUE, ID %d ==== %s' % (row_id, newline)

          comment_start_count = text_value.count( comment_start )
          comment_end_count = text_value.count( comment_end )

          if comment_start_count != comment_end_count: #{
            print 'Mismatched start/end tags:', comment_start_count, 'starts', comment_end_count, 'ends' 
            continue # don't risk stripping out any real data
          #}

          value_parts = text_value.split( comment_start )
          new_value_parts = []
          new_value = ''
          i = -1
          for part in value_parts: #{
            i += 1
            comment = ''
            data = ''

            if i == 0: #{  # the first section (index 0) is before the comment
              data = part.strip()
            #}
            else: #{ # at start of a comment
              comment_end_count = part.count( comment_end )
              if comment_end_count != 1: #{
                print 'Mismatched start/end tags in:', part
                continue
              #}

              comment_and_data = part.split( comment_end )
              comment = comment_and_data[ 0 ]
              data    = comment_and_data[ 1 ].strip()
              
              print newline + 'About to remove the following comment:'
              print comment + newline
            #}

            if data: #{
              print newline + 'Retaining the following data:'
              print data + newline
              new_value_parts.append( data )
            #}
          #}

          new_value = "".join( new_value_parts )
          new_value = new_value.replace( "'", "''" ) # escape for SQL
          statement = "update %s set %s = '%s' where id = %d" % (table_name, field, new_value, row_id)

          print newline, '/* new value */', statement, newline
          the_cursor.execute( statement )
        #}
      #}
    #}

    the_cursor.close()
    the_database_connection.close()

  except:
    if the_cursor: the_cursor.close()
    if the_database_connection: the_database_connection.close()
    raise
示例#23
0
def writeDocumentList( handle, selected_type_code='', selected_loc_id=0, selected_loc_name=''): #{

  # Connect to the database and create a cursor
  the_database_connection = c.get_database_connection()
  the_cursor = the_database_connection.cursor() 

  # Work out what to show in your breadcrumbs trail etc.
  display_all = False
  selected_type_name = ''

  if not selected_type_code and not selected_loc_id: #{
    display_all = True
  #}
  elif selected_type_code: #{
    statement = "select doc_group_type_name from index_medieval_doc_group_types" \
              + " where doc_group_type_code = '%s'" % selected_type_code
    the_cursor.execute( statement )
    type_row = the_cursor.fetchone()
    selected_type_name = type_row[ 0 ]
  #}

  write_inherit_and_title_block( handle )

  # Override the default treeview behaviour, which starts off expanded.
  # With such long files as the ones generated here, it might be best to start off collapsed.
  if display_all:
    set_treeview_collapsed( handle )

  write_start_main_content( handle )

  handle.write( '<h2>List of medieval catalogues</h2>' )
  handle.write( newline )

  write_breadcrumbs( handle, selected_type_code, selected_type_name, \
                     selected_loc_id, selected_loc_name )

  handle.write( newline + newline )

  if display_all: #{ # add navigation links to individual institution types
    statement = "select distinct coalesce( doc_group_type_parent, doc_group_type_code ) " \
              + " as doc_group_type, doc_group_type_name from index_medieval_doc_group_types " \
              + " order by doc_group_type"
    the_cursor.execute( statement )
    institution_types = the_cursor.fetchall()
    i = 0
    handle.write( '<p>' + newline )
    for ins_type in institution_types: #{
      if i > 0: handle.write( ' | ' )
      i += 1

      type_code = ins_type[ 0 ]
      type_name = ins_type[ 1 ]

      handle.write( '<a href="%s%s/source/%s" ' % (w.if_editable, medieval_catalogues_url, type_code) )
      handle.write( ' title="%s" >' % type_name )
      handle.write( type_name )
      handle.write( '</a> ' )

    #}
    handle.write( '</p>' + newline )
  #}

  if display_all: #{
    handle.write( '<h3 class="inline_heading">Overview by provenance</h3>' )
    handle.write( '{% if not printing %}' )
    handle.write( ' | <a href="%s">Overview by date</a>' % medieval_catalogues_by_date_url )
    handle.write( '{% endif %}' )
    handle.write( newline + newline )
    handle.write( '{% if not printing %}' )
    handle.write( '<div id="sidetreecontrol">' + newline )
    handle.write( '<a href="?#">Collapse All</a> | <a href="?#">Expand All</a>' + newline )
    handle.write( '</div>' + newline )
    handle.write( '{% endif %}' )
  #}

  statement = "select coalesce( doc_group_type_parent, doc_group_type ) as doc_group_type, " \
            + " doc_group_type_name, doc_group_id, doc_group_name, document_code, document_name " \
            + " from index_medieval_documents_view "
  if selected_type_code: #{
    statement += " where document_code > '' "
    statement += " and coalesce( doc_group_type_parent, doc_group_type ) = '%s' " % selected_type_code
  #}
  if selected_loc_id:
    statement += " and doc_group_id = %d " % selected_loc_id
  statement += " order by doc_group_type, doc_group_name, document_code_sort, document_code" 
  the_cursor.execute( statement )
  documents = the_cursor.fetchall()

  prev_type_code = ''
  prev_loc_name = ''
  inline_display = False

  if display_all:
    handle.write( '<ul class="treeview AAA" id="tree">' + newline )
  else: # use a different CSS class and ID so that links behave normally
    handle.write( '<ul class="AAA" id="catalogue_tree">' + newline )

  for document in documents: #{ 

    type_code     = document[ 0 ] # doc_group_type      e.g. BX for Benedictines
    type_name     = document[ 1 ] # doc_group_type_name e.g. Benedictines 
    loc_id        = document[ 2 ] # doc_group_name      e.g. numeric ID for Canterbury
    loc_name      = document[ 3 ] # doc_group_name      e.g. Canterbury
    document_code = document[ 4 ] # document_code       e.g. BC21
    document_name = document[ 5 ] # document_name       e.g. 'Books read in the refectory 1473'

    type_name     = w.reformat( type_name )
    loc_name      = w.reformat( loc_name )
    document_name = w.reformat( document_name )

    inline_display = False
    if type_code in inline_lists: inline_display = True

    if type_code != prev_type_code: #{
      if prev_type_code: #{
        handle.write( '</ul><!-- end CCC list -->' + newline )
        handle.write( '</li><!-- end BBB list item -->' + newline )
        handle.write( '</ul><!-- end BBB list -->' + newline )
        handle.write( '</li><!-- end AAA list item -->' + newline )
      #}

      prev_type_code = type_code
      prev_loc_name = loc_name

      write_outerhead( handle, type_name, display_all )

      if inline_display: heading = document_name # show the sole decode for K and R
      else: heading = loc_name 

      write_innerhead( handle, heading, display_all )

      if display_all:
        handle.write( '<ul style="display: {% if printing %}block{% else %}none{% endif %}"' )
        handle.write( '><!-- start CCC list -->' + newline )
      else:
        handle.write( '<ul><!-- start CCC list -->' + newline )
    #}

    elif loc_name != prev_loc_name: #{
      prev_loc_name = loc_name

      handle.write( '</ul><!-- end CCC list -->' + newline )
      handle.write( '</li><!-- end BBB list item -->' + newline )
      handle.write( '</ul><!-- end BBB list -->' + newline )
      handle.write( newline )

      if inline_display: heading = document_name # show the sole decode for K and R
      else: heading = loc_name 

      write_innerhead( handle, heading, display_all ) # start BBB list

      if display_all:
        handle.write( '<ul style="display: {% if printing %}block{% else %}none{% endif %}"' )
        handle.write( '><!-- start CCC list -->' + newline )
      else:
        handle.write( '<ul><!-- start CCC list -->' + newline )
    #}

    if document_code: #{
      statement = "select count(*) from index_entry_copies where document_code = '%s'" % document_code
      the_cursor.execute( statement )
      count_row = the_cursor.fetchone()
      num_catalogue_entries = count_row[ 0 ]
    #}
    else:
      num_catalogue_entries = 0

    print output_filename, type_name, loc_name, document_code

    handle.write( '<li' );
    if inline_display: handle.write( ' style="display: inline-block; width: 80px;" ' )
    handle.write( '><!-- start CCC list item -->' + newline );

    if num_catalogue_entries > 0: #{
      handle.write( '<a href="%s%s/%s" ' \
                    % (w.if_editable, medieval_catalogues_url, document_code) )
      handle.write( ' title="View details of catalogue %s">' % document_code )
    #}

    if inline_display: # no need to keep repeating the same decode hundreds of times for K and R
      handle.write( '&bull; %s (%d)'  % (document_code, num_catalogue_entries))
    else:
      handle.write( '%s %s (%d)'  % (document_code, document_name, num_catalogue_entries))

    if num_catalogue_entries > 0: handle.write( '</a>' + newline )

    handle.write( newline + '</li><!-- end CCC list item -->' + newline );

  #}

  handle.write( '</ul><!-- end CCC list -->' + newline )
  handle.write( '</li><!-- end BBB list item -->' + newline )
  handle.write( '</ul><!-- end BBB list -->' + newline )
  handle.write( newline + '</li><!-- end AAA outerhead list item -->' )
  handle.write( '</ul><!-- end tree AAA -->' + newline )

  handle.write( newline + linebreak + newline )

  if selected_loc_name: write_documents_total( handle, len( documents ) )

  write_end_main_content( handle )

  # Close your cursor and your connection
  the_cursor.close()
  the_database_connection.close()
示例#24
0
def stripComments():  #{

    the_database_connection = None
    the_cursor = None

    #=================================================================
    # Read each line of the original file, manipulate it as necessary,
    # and then write it into the new file.
    #=================================================================
    try:
        # Connect to the database and create a cursor
        the_database_connection = c.get_database_connection()
        the_cursor = the_database_connection.cursor()

        # Look at all the text fields that could contain unwanted XML
        for table_name, field_names in text_fields.items():  #{
            #print newline + table_name + newline

            for field in field_names:  #{
                #print newline + table_name + ': ' + field + newline

                select = "select id, %s from %s where %s like '%s%s%s%s%s'" \
                       % (field, table_name, field, percent, comment_start, percent, comment_end, percent)
                select += " order by id"
                the_cursor.execute(select)
                results = the_cursor.fetchall()

                # Check each value for XML/HTML comments
                for row in results:  #{
                    row_id = row[0]
                    text_value = row[1]

                    print ''
                    print ''
                    print '======================================='
                    print table_name, field, 'ID', row_id
                    print '======================================='
                    print ''

                    print '==== RAW VALUE, ID %d ====' % row_id
                    print text_value
                    print '==== end RAW VALUE, ID %d ==== %s' % (row_id,
                                                                 newline)

                    comment_start_count = text_value.count(comment_start)
                    comment_end_count = text_value.count(comment_end)

                    if comment_start_count != comment_end_count:  #{
                        print 'Mismatched start/end tags:', comment_start_count, 'starts', comment_end_count, 'ends'
                        continue  # don't risk stripping out any real data
                    #}

                    value_parts = text_value.split(comment_start)
                    new_value_parts = []
                    new_value = ''
                    i = -1
                    for part in value_parts:  #{
                        i += 1
                        comment = ''
                        data = ''

                        if i == 0:  #{  # the first section (index 0) is before the comment
                            data = part.strip()
                        #}
                        else:  #{ # at start of a comment
                            comment_end_count = part.count(comment_end)
                            if comment_end_count != 1:  #{
                                print 'Mismatched start/end tags in:', part
                                continue
                            #}

                            comment_and_data = part.split(comment_end)
                            comment = comment_and_data[0]
                            data = comment_and_data[1].strip()

                            print newline + 'About to remove the following comment:'
                            print comment + newline
                        #}

                        if data:  #{
                            print newline + 'Retaining the following data:'
                            print data + newline
                            new_value_parts.append(data)
                        #}
                    #}

                    new_value = "".join(new_value_parts)
                    new_value = new_value.replace("'", "''")  # escape for SQL
                    statement = "update %s set %s = '%s' where id = %d" % (
                        table_name, field, new_value, row_id)

                    print newline, '/* new value */', statement, newline
                    the_cursor.execute(statement)
                #}
            #}
        #}

        the_cursor.close()
        the_database_connection.close()

    except:
        if the_cursor: the_cursor.close()
        if the_database_connection: the_database_connection.close()
        raise
示例#25
0
def writeXML(): #{

  global document_lookup

  the_database_connection = None
  the_cursor = None
  outfile_handle = file

  try:
    the_database_connection = c.get_database_connection()
    the_cursor = the_database_connection.cursor() 

    print 'Looking up document list...'
    statement = "select document_code, document_name, doc_group_type_name, doc_group_name, "
    statement += " start_date, end_date, document_type, doc_group_id, "
    statement += " coalesce( doc_group_type_parent, doc_group_type ) as doc_group_type, "
    statement += " start_year, end_year, date_in_words "
    statement += " from u_index_medieval_documents_view"
    the_cursor.execute( statement )
    results = the_cursor.fetchall()
    for row in results: #{
      document_code = row[ 0 ] 
      document_name = row[ 1 ] 
      library_type  = row[ 2 ] #doc_group_type_name  
      library_loc   = row[ 3 ] #doc_group_name  
      start_date    = row[ 4 ] 
      end_date      = row[ 5 ] 
      document_type = row[ 6 ] 

      library_loc_id    = row[ 7 ] #doc_group_id  
      library_type_code = row[ 8 ] #doc_group_type  

      start_year    = str( row[ 9 ] )
      end_year      = str( row[ 10 ] )
      date_in_words = row[ 11 ] 

      if start_year and len( start_year ) < 4: start_year = start_year.rjust( 4, '0' )
      if end_year and len( end_year ) < 4: end_year = end_year.rjust( 4, '0' )

      document_lookup[ document_code ] = { 's_document_name' : document_name ,
                                           's_library_type'  : library_type  ,
                                           's_library_loc'   : library_loc   ,
                                           'd_document_start': start_date    ,
                                           'd_document_end'  : end_date      ,
                                           's_document_type' : document_type ,
                                           's_library_loc_id': library_loc_id,
                                           's_library_type_code': library_type_code,
                                           's_document_start_year': start_year,
                                           's_document_end_year': end_year,
                                           's_document_date_in_words': date_in_words,
                                          }
    #}


    # Get links to MLGB book IDs. Don't miss any out if there is a range of numbers.
    print 'Looking up MLGB book links...'
    statement = "select distinct copy_code, mlgb_book_id "
    statement += " from index_mlgb_links l, u_index_entry_copies c "
    statement += " where c.document_code = l.document_code "
    statement += " and c.seqno_in_document = l.seqno_in_document "
    statement += " order by copy_code, mlgb_book_id" 
    the_cursor.execute( statement )
    link_results = the_cursor.fetchall()
    for link_row in link_results: #{
      copy_code = link_row[ 0 ]
      mlgb_book_id = link_row[ 1 ]
      print copy_code, mlgb_book_id
      if mlgb_links_lookup.has_key( copy_code ):
        mlgb_links_lookup[ copy_code ].append( mlgb_book_id )
      else:
        mlgb_links_lookup[ copy_code ] = [ mlgb_book_id ]
    #}

    output_filename = virtualenv_root + '/parts/index/authortitle_to_solr.xml'
    print 'About to write %s' % output_filename
    outfile_handle = open( output_filename, 'wb' ) # 'wb' allows entry of UTF-8

    outfile_handle.write( '<doc>' + newline )

    the_cursor.execute( "select max( entry_id ) from index_entries" )
    results = the_cursor.fetchone()
    max_entry_id = results[ 0 ]
    solr_id = 0

    statement = get_entry_select_statement()
    the_cursor.execute( statement )
    entry_results = the_cursor.fetchall()
    current_entry_id = 0
    for entry in entry_results: #{

      current_entry_id = entry[ 0 ]
      print "Getting data for entry %d of %d"  % (current_entry_id, max_entry_id)

      entry_name = entry[ 2 ]

      book_results = []
      copy_results = []

      statement = get_book_select_statement( current_entry_id )

      the_cursor.execute( statement )
      book_results = the_cursor.fetchall()

      if not book_results: #{
        # This is presumably a cross-reference entry, but we cannot tell 
        # whether it is referring to an author or a book title - both are possible.
        solr_id += 1
        write_entry_fields( entry, solr_id, outfile_handle )
        write_entry_end( outfile_handle )
      #}
      else: #{
        # Work out whether the primary entry refers to an author or a book title.
        # If it refers to a book title, there will only be one (dummy) book record,
        # and the title of this dummy entry will be blank.

        author = ''
        title_of_book = ''

        if len( book_results ) == 1: #{
          book = book_results[ 0 ]
          title_of_book = book[ 1 ].strip()
          xref_title_of_book = book[ 2 ].strip()

          if title_of_book: # will be saved automatically as part of book fields
            title_of_book = ''
          else: #{
            if xref_title_of_book:
              title_of_book = xref_title_of_book
            else:
              title_of_book = entry_name
          #}
        #}

        if not title_of_book: author = entry_name

        for book in book_results: #{
          current_book_count = book[ 0 ]
          statement = get_copy_select_statement( current_entry_id, current_book_count )

          the_cursor.execute( statement )
          copy_results = the_cursor.fetchall()

          if not copy_results: #{
            solr_id += 1
            write_entry_fields( entry, solr_id, outfile_handle )
            write_author_or_title( author, title_of_book, outfile_handle )
            write_book_fields( book, outfile_handle )
            write_entry_end( outfile_handle )
          #}
          else: #{
            for copy in copy_results: #{
              solr_id += 1
              write_entry_fields( entry, solr_id, outfile_handle )
              write_author_or_title( author, title_of_book, outfile_handle )
              write_book_fields( book, outfile_handle )
              document_code = write_copy_fields( copy, outfile_handle )
              write_document_fields( document_code, outfile_handle )

              copy_code = copy[ 1 ].strip()
              write_mlgb_book_link_fields( copy_code, outfile_handle )

              write_entry_end( outfile_handle )
            #}
          #}
        #}
      #}
    #}

    outfile_handle.write( '</doc>' + newline )
    outfile_handle.close()
    the_cursor.close()
    the_database_connection.close()

  except:
    if not outfile_handle.closed: outfile_handle.close()
    if the_cursor: the_cursor.close()
    if the_database_connection: the_database_connection.close()
    raise