Пример #1
0
def kv2map(k, v):
    '''
        method generating a docmap from a list of fieldnames + a list of values
    '''
    map = {}
    if(not isinstance(k, list) or not isinstance(v, list)):
        return map
    for i in range(len(k)):
        # check of waarde leeg (--> negeren)
        if(i >= len(v)):
            continue
        value = utils.ensureUnicode(v[i])
        if not value:
            continue
        value = value.strip()
        if len(value)==0:
            continue

        key = utils.ensureUnicode(k[i])
        key = key.strip()
        # check of key leeg (--> negeren)
        if not key:
            continue
            
        if (key in map):
            oldv = map[key]
            if (type(oldv) is list):
                oldv.append(value)
            else:
                map[key] = [oldv, value]
        else:
            map[key] = value
    return map
Пример #2
0
 def getValues(self):
     '''Returns the values of all rows set in this table as a dict. The result will be of the form:
     result[name]={ "type": type, "path": path, "order": order }
     If the same name is used more than once, subsequent names will be renamed to "name#i" with i 
     an incremental number starting from 1. Order defines the order of the rows within the table widget.
     Only entries with a valid path and type will be returned.'''
     result = TEntries()
     order = 0
     for inputFileRow in self.rows:
         name = utils.ensureUnicode(inputFileRow.getName())
         type = utils.ensureUnicode(inputFileRow.getType())
         path = utils.ensureUnicode(inputFileRow.getPath())
         if not os.path.exists(path):
             continue
         if type not in self.getAvailableFiletypes():
             continue
         # Name is only required if table shows a name column, otherwise name defaults to order nb
         if not name:
             if self.nameColumn:
                 continue
             else:
                 name=order
         result.append(TEntry(name,type,path,order))
         order = order+1
     return result
Пример #3
0
 def addField(self, fieldname, value):
     if(not fieldname or not value):
         return
     fieldname = utils.ensureUnicode(fieldname)
     value = utils.ensureUnicode(value)
     if(fieldname in self.params):
         self.params[fieldname].append(value)
     else:
         self.params[fieldname] = [value]
Пример #4
0
def getCachedVersionFilename(xmlFilename):
    '''Returns the filename to the plaintext cached version
    of an XML thesaurus. Will return an empty string if no
    cached file is found, or if file is not an xml file.'''
    xmlFilename = utils.ensureUnicode(xmlFilename)
    if not xmlFilename:
        return None
    if not xmlFilename.lower().endswith('.xml'):
        return None
    plaintextFilename = utils.ensureUnicode(os.path.basename(xmlFilename))
    return assetsfolder.getAssetsPathFor(plaintextFilename.replace('.xml', '_cache.thc'))
Пример #5
0
 def addReferenceThesaurus(self, thesaurusName, thesaurusPath, type):
     '''Add reference thesaurus with specified name, path and type to
     to the settings. If the file does not exist or its type is not
     known it is not added.'''
     thesaurusName = utils.ensureUnicode(thesaurusName)
     'TODO: is it safe to convert filenames to unicode?'
     thesaurusPath = utils.ensureUnicode(thesaurusPath)
     type = utils.ensureUnicode(type)
     if not os.path.exists(thesaurusPath):
         return
     if not type in thesaurus_types:
         return
     self.thesauri[thesaurusName] = {"path": thesaurusPath, "type": type, "order": len(self.thesauri)}
Пример #6
0
def getThesauriStatusOfWord(word):
    '''Get best status from hightest rated thesaurus for the given word.'''
    tmpstatus = None
    for th in getThesauri():
        word = utils.ensureUnicode(word)
        if not word:
            return u"Leeg (niet ingevuld)"
        if (th.containsTerm(word)):
            term = th.getTerm(word)
            if (term.getUse() is not None):
                tmpstatus = bestStatus(tmpstatus,("niet_voorkeur",th.name,"Niet voorkeursterm %s" % (th.name)))
            else:
                tmpstatus = bestStatus(tmpstatus,("voorkeur",th.name,"Voorkeursterm %s" % (th.name)))
    if (tmpstatus is None):
        return u"Eigen term"
    return utils.ensureUnicode(tmpstatus[2])
Пример #7
0
 def removeField(self, fieldname):
     '''Removes the parameter with the given name, if it exists.'''
     if not fieldname:
         return
     fieldname = utils.ensureUnicode(fieldname)
     if fieldname in self.params:
         del self.params[fieldname]
Пример #8
0
 def endElement(self,name):
     tn = getConvertedTagName(name)
     if (tn == "record"):
         self.inRecord = False
         self.emit()
         return
     if (self.inTag):
         if (not (self.current_value is None)):
             n = utils.ensureUnicode(self.current_tag_name.strip())
             v = utils.ensureUnicode(self.current_value.strip())
             if (n in self.docmap.keys()):
                 self.docmap[n].append(v)
             else:
                 self.docmap[n]=[v]
         self.current_tag_name = None
         self.current_value = None
         self.inTag = False
Пример #9
0
 def parseTextFile(self, filename):
     '''Parse thesaurus from plain text file with given filename.'''
     fil = inputfileformat.getFileDescriptor(filename)
     for line in fil:
         line = line.replace("\n","")
         line = line.replace("\r","")
         line = line.strip()
         # leave off newline characters
         word = utils.ensureUnicode(line)
         if word:
             t = Term()
             t.addField(u"term", word)
             self.addTerm(t)
Пример #10
0
 def getStatusOfWord(self,word):
     '''Compare a specified word with the thesaurus. It will
     either be a preferred term, an unpreferred synonym, or
     not exist in thesaurus.'''
     word = utils.ensureUnicode(word)
     if (not word):
         return u"Leeg (niet ingevuld)"
     if (not self.containsTerm(word)):
         return u"Niet in de %s thesaurus" % (self.name)
     term = self.getTerm(word)
     if (term.getUse() is not None):
         return u"Niet de voorkeurterm"
     return u"Voorkeurterm"
Пример #11
0
 def parseDefaultAdlibDoc(self, filename):
     '''Parse adLib XML thesaurus from specified filename.
     Will check if there is a cached plaintext version of the thesaurus stored
     already. If so, this will be parsed instead of the XML version,
     because this is a lot faster.'''
     filename = utils.ensureUnicode(filename)
     if utils.cacheThesauri and cachedVersionExists(filename):
         print "    - Loading thesaurus from previously cached file %s" % getCachedVersionFilename(filename)
         cachedThesaurus = loadCachedVersion(filename)
         self.terms = cachedThesaurus.terms
         self.name = cachedThesaurus.name
         return
     inputfileformat.parseSAXFile(filename, self)
     if utils.cacheThesauri:
         print "    - Caching thesaurus to file %s" % getCachedVersionFilename(filename)
         createCachedVersion(self, filename)
Пример #12
0
 def __init__(self, name=u'Unknown'):
     self.terms = {}
     self.name = utils.ensureUnicode(name)
     pass
Пример #13
0
 def removeReferenceThesaurus(self, thesaurusName):
     '''Removes the thesaurus with specified name if it exists'''
     thesaurusName = utils.ensureUnicode(thesaurusName)
     if thesaurusName in self.thesauri:
         del self.thesauri[thesaurusName]
Пример #14
0
 def getPath(self):
     return utils.ensureUnicode(self.pathField.get()) 
Пример #15
0
 def containsTerm(self,word):
     '''Check whether term is in thesaurus. Word is case insensitive.'''
     word = utils.ensureUnicode(word)
     return word.lower() in self.terms
Пример #16
0
 def getType(self):
     return utils.ensureUnicode(self.typeSelect.get())
Пример #17
0
 def getName(self):
     if self.nameColumn:
         return utils.ensureUnicode(self.nameField.get())
     else:
         return ""
Пример #18
0
def getContent():
	return utils.ensureUnicode(_content)
Пример #19
0
    def start(self):
        museumName = self.museumnaamField.get()
        museumName = utils.ensureUnicode(museumName)
        if not museumName.strip():
            tkMessageBox.showerror('Geen naam voor de collectie opgegeven', 'Vul de naam van de collectie in, aub.');
            return
        outputFile = self.outputField.get()
        if not isValidOutputFile(outputFile):
            tkMessageBox.showerror('Fout bij het starten', 'Kon niet starten omdat er geen correct "Output" bestand is opgegeven.');
            return
        if os.path.exists(outputFile):
            doOverwrite = tkMessageBox.askyesno('Bestand overschrijven?', 'Het gekozen "Output" bestand bestaat reeds. Wilt u verder gaan en het overschrijven?')
            if not doOverwrite:
                return

        try:
            waitDialog = WaitDialog(self.parent)
            utils.setMaxDetail(self.settings.maxUniqueValues)
            # Will only return input files with valid files and names filled in
            inputFiles = self.inputFilesTable.getValues()
            if inputFiles.size() == 0:
                waitDialog.close()
                tkMessageBox.showerror('Fout bij het starten', u'Kon niet starten omdat er geen geldige "Input" bestanden zijn opgegeven.\nEr is minstens één input bestand met ingevulde naam, type en bestandslocatie vereist.');
                return

            if self.checkb["state"] != DISABLED and self.checkThesaurus.get():
                checkThesaurus = True
            else:
                checkThesaurus = False

            # Set configured reference thesauri
            err = None
            if (checkThesaurus):
                referenceThesauri = self.settings.thesauri
                err = setCustomThesauri(referenceThesauri)
            else:
                err = setCustomThesauri(TEntries())
            if (not (err is None)):
                waitDialog.close()
                tkMessageBox.showerror('Fout bij het starten', err);
                return
                
            # Set specified input files to analyse
            objects = []
            thesauri = []
            fieldstats = []
            csvfieldstats = []
            inputFiles.sort()
            for entry in inputFiles.values:
                utils.s("%s - %s - %s\n" % (entry.name, entry.type, entry.path))
                if entry.type == 'Adlib XML Objecten':
                    objects.append(entry.path)
                elif entry.type == 'XML Fieldstats' or entry.type == "Adlib XML Personen":
                    fieldstats.append(entry.path)
                elif entry.type == 'CSV Fieldstats':
                    csvfieldstats.append(entry.path)
                elif entry.type == 'Adlib XML Thesaurus':
                    thesauri.append(entry.path)
                else:
                    print "ERROR: Input bestand %s met type %s kan niet gebruikt worden" % (entry.name, entry.type)
            generateReport(museumName, objects, thesauri, fieldstats, csvfieldstats, outputFile, False)
                 
        except Exception, e:
            waitDialog.close()
            stacktrace = traceback.format_exc()
            print "exception ..."
            print stacktrace
            print "done"
            ExceptionDialog(self.parent, e, stacktrace)
            return
Пример #20
0
 def getTerm(self,word):
     '''Searches for word in thesaurus, case insensitive. Returns a term object.
     Only supply a word that is in thesaurus, test with containsTerm.'''
     word = utils.ensureUnicode(word)
     return self.terms[word.lower()]