Пример #1
0
def has_type(value):
    the_type = Types.get_entry(string.lower(value), 0)

    if the_type is None:
        raise TypeError, "no such entry type"

    return Search.TypeTester(the_type)
Пример #2
0
def has_type (value):
	the_type = Types.get_entry (string.lower (value), 0)
	
	if the_type is None:
		raise TypeError, "no such entry type"
	
	return Search.TypeTester (the_type)
Пример #3
0
    def next(self):
	while True:
	    try:
		retval = _bibtex.next_unfiltered(self.parser)
	    except IOError, error:
		raise Exceptions.ParserError((str(error),))
	    if retval == None:
		self.db.set_metadata('bibtex-preamble', self.preamble)
		return None
	    elif retval[0] == 'entry':
		retval = retval[1]
		name, fieldtype, offset, line, object = retval
		if name:
		    key = Key.Key(self.db, name)
		else:
		    key = None
		fieldtype  = Types.get_entry(fieldtype)
		return Entry(key, fieldtype, object, self.parser, line)
	    elif retval[0] == 'preamble':
		self.preamble.append(retval[1])
		continue
	    else:
		continue
Пример #4
0
 def next(self):
     while True:
         try:
             retval = _bibtex.next_unfiltered(self.parser)
         except IOError, error:
             raise Exceptions.ParserError((str(error), ))
         if retval == None:
             self.db.set_metadata('bibtex-preamble', self.preamble)
             return None
         elif retval[0] == 'entry':
             retval = retval[1]
             name, fieldtype, offset, line, object = retval
             if name:
                 key = Key.Key(self.db, name)
             else:
                 key = None
             fieldtype = Types.get_entry(fieldtype)
             return Entry(key, fieldtype, object, self.parser, line)
         elif retval[0] == 'preamble':
             self.preamble.append(retval[1])
             continue
         else:
             continue
Пример #5
0
    def next (self):
        current = None
        data    = ''

        table = {}

        # Skip whitespace
        while 1:
            line = self.file.readline ()
            if line == '': return table

            line = string.rstrip (line)
            if line != '': break

        while 1:
            head = header.match (line)
            if head:
                if current:
                    if table.has_key (current):
                        table [current].append (data)
                    else:
                        table [current] = [data]

                current = string.strip (head.group (1))
                data    = head.group (2)
            else:
                cont = contin.match (line)
                if cont:
                    data = data + ' ' + cont.group (1)

            line = self.file.readline ()
            if line == '': break

            line = string.rstrip (line)
            if line == '': break

        # don't forget the last item
        if current:
            if table.has_key (current):
                table [current].append (data)
            else:
                table [current] = [data]

        # create the entry with the actual fields
        norm = {}
        type = Types.get_entry ('article')

        if table.has_key ('PMID'):
            norm ['url'] = Fields.URL (medurl + table ['PMID'] [0])
            norm ['medline-pmid'] = Fields.Text (table ['PMID'] [0])
            del table ['PMID']

        if table.has_key ('UI'):
            norm [one_to_one ['UI']] = Fields.Text (table ['UI'] [0])
            del table ['UI']

        if table.has_key ('AU'):
            group = Fields.AuthorGroup ()

            for au in table ['AU']:
                # analyze the author by ourself.
                first, last, lineage = [], [], []

                for part in string.split (au, ' '):
		    if part.isupper ():
                        # in upper-case, this is a first name
                        if len (last) > 0:
                            first.append (part)
                        else:
                            # if there is no last name, there can't be a first name
                            last.append (part)
                    else:
                        if len (first) > 0:
                            # there was a first name, this must be a lineage
                            lineage.append (part)
                        else:
                            last.append (part)

                if len (first) > 1:
                    print "medline: long first name found. skipping."
                    first = first [0:1]

                if len (first) > 0:
                    first = string.join (first [0], '. ') + '.'
                else:
                    first = None

                if len (last) > 0:
                    last = string.join (last, ' ')
                else:
                    last = None

                if len (lineage) > 0:
                    lineage = string.join (lineage, ' ')
                else:
                    lineage = None

                group.append (Fields.Author ((None, first, last, lineage)))

            norm [one_to_one ['AU']] = group
            del table ['AU']

        if table.has_key ('DP'):
            fields = string.split (table ['DP'][0], ' ')
            norm [one_to_one ['DP']] = Fields.Date (fields [0])
            del table ['DP']

        # The simple fields...
        for f in table.keys ():
            f_mapped = one_to_one.get(f, 'medline-%s' %(f.lower()))
            text_type = Types.get_field(f_mapped).type
            norm [f_mapped] = text_type (string.join (table [f], " ; "))

        return Base.Entry (None, type, norm)
Пример #6
0
    def next (self):
        data   = ''
        fields = {}
        type   = None
        label = None

        while 1:
	    line = self.file.readline ()
	    if line == '' and not fields:
                return None

            line = string.strip (line)
            
            if line == '' and fields:

                # store the current field
                if type:
                    if type == "label":
                        label = string.join (string.split (data), ' ')
                    elif fields.has_key (type):
                        fields [type].append (string.join (string.split (data), ' '))
                    else:
                        fields [type] = [string.join (string.split (data), ' ')]
                    

                # determine the real type
                while 1:
                    if fields.has_key ('journal'):
                        type = 'article'
                        break

                    if fields.has_key ('booktitle'):
                        type = 'inbook'
                        break

                    if fields.has_key ('volume') or fields.has_key ('number'):
                        type = 'inproceedings'
                        break

                    if fields.has_key ('publisher'):
                        type = 'book'
                        break

                    if fields.has_key ('author') and fields.has_key ('title'):
                        type = 'unpublished'
                        break
                    
                    type = 'misc'
                    break
                
                entry = Types.get_entry (type)

                for f in fields.keys ():
                    type = Types.get_field (f).type
                    
                    if type == Fields.AuthorGroup:
                        group = Fields.AuthorGroup ()
                        
                        for auth in fields [f]:
                            group.append (Fields.Author (auth))
                            
                        fields [f] = group
                        
                    else:
                        if len (fields [f]) > 1:
                            sys.stderr.write ("warning: field `%s' is defined more than once" % f)
                            continue
                        
                        fields [f] = type (fields [f] [0])
                        
                if label:
                    key = Key.Key (None, label)
                    return Base.Entry (key, entry, fields)
                else:
                    return Base.Entry (None, entry, fields)
                
            
            t = tag_re.match (line)
            # we matched a new field start
            if t:
                if type:
                    if type == "label":
                        label = string.join (string.split (data), ' ')
                    elif fields.has_key (type):
                        fields [type].append (string.join (string.split (data), ' '))
                    else:
                        fields [type] = [string.join (string.split (data), ' ')]
                    
                type = t.group (1)
                if not self.mapping.has_key (type):
                    print "warning: key `%s' has been skipped" % (type)
                    type = None
                    data = ''
                else:
                    # store the current field
                    type = self.mapping [type] [0]
                    data = t.group (2)
                    
                continue

            # in the general case, append the new text
            data = data + ' ' + line
Пример #7
0
    def next (self):
        dict = {}
        
        # read entry till next blank line
        text  = []
        field = ''
        while 1:
            line = self.file.readline ()
            if line == '': break
            line = string.rstrip (line)

            # starting with a blank ?
            if line == '' or   line [0] == ' ':
                # ...then we continue the current text
                text.append (string.lstrip (line))
                continue

            # new entry ?
            if separator_re.match (line): break

            # else, this is a new field
            if field:
                # save the previous one if needed
                dict [field] = '\n'.join(text)
                text = []

            # store the name of this new field
            field = string.lower (line)

        # don't waste the last field content
        if field:
            dict [field] = '\n'.join(text)

        # did we parse a field ?
        if len (dict) == 0: return None

        # create the entry content
        entry = Base.Entry (type = self.deftype)

        for key in dict.keys ():
            if not self.mapping.has_key (key):
                #print "warning: unused key `%s'" % key
                continue

            (name, type) = self.mapping [key]
            text_type = Types.get_field (name).type

            # parse a simple text field
            if type == SimpleField:
                entry [name] = text_type (string.strip (dict [key]))

            elif type == KeywordField:
                text = string.strip (dict [key])
                if entry.has_key (name):
                    text = str (entry [name]) + '\n  ' + text
                    
                entry [name] = text_type (text)

            # parse an author field
            elif type == AuthorField:
                entry [name] = self.parse_author (dict[key])
                continue

            # parse a source field
            elif type == SourceField:
                dict_key = ' '.join(dict[key].split('\n'))
                m = self.source_re.match (dict_key.strip())
                if m:
                    year, month, day = None, None, None
                    j, v, s, n, p, o, y, d = m.group(
                        'journal', 'volume', 'inseries', 'number',
                        'pages', 'other', 'year', 'month')

                    if s:                ### article in a monograph series
                        entry['booktitle'] = Fields.Text (j)
                        if d:
                            entry['beigabevermerk'] = Fields.LongText (d)
                        entry.type = Types.get_entry('incollection')

                    elif j:
                        entry ['journal'] = Fields.Text (j)
                        if d and not d.isspace():
                            dates = d.split ()                    
                            try:
                                month = long_month [dates[0]]
                            except KeyError:
                                pass
##                                 import warnings
##                                 warnings.filterwarnings ('once',
##                                                          message='date',
##                                                          module='OvidLike')
##                                 warnings.warn (
##                                     'OVID: %s is not representable in date '
##                                     'field %s' %(dates[0], d), stacklevel=2)
                            if len(dates) > 1:
                                day = int (dates[1])

                    if v:
                        entry ['volume'] = Fields.Text (v)

                    if n:
                        entry ['number'] = Fields.Text (n)

                    if p:
                        entry ['pages'] = Fields.Text (p)

                    if o:
                        entry ['other-note'] = Fields.Text(o)

                    if y:
                        year = int(y)

                    entry ['date'] = Fields.Date((year, month, day))
                else:
                    print '>>> Error: Source field  does not parse correctly:'
                    print dict_key
                    print entry
                continue
        
        return entry
Пример #8
0
def write_source_field (output, entry, keys):
    t = []

    output.write('Source\n')
    
    if entry.type == Types.get_entry(
        'incollection') or entry.has_key ('booktitle'):
        t = [str(entry.get ('booktitle')).strip()]
        
        if entry.has_key ('volume'):
            t.append (".  %s " % (entry ['volume']))
        if entry.has_key ('number'):
            t.append ("(%s)" %  (entry ['number']))

        if entry.has_key ('pages'):
            p = str (entry ['pages'])
            #p = Utils.compress_page_range (p)
            t.append ("PG. %s." %(p))

        if entry.has_key('date'):
            date  = entry['date']
            t.append (" %s" % (date.year))
        if entry.has_key ('beigabeevermerk'):
            t.append (str(entry.get('beigabevermerk')))
    else: 

        t.append ("%s. " %(entry ['journal']))

        if entry.has_key ('volume'):
            t.append ("%s" % (entry ['volume']))

        if entry.has_key ('number'):
            t.append ("(%s)" %  (entry ['number']))

        if entry.has_key ('pages'):
            p = str (entry ['pages'])
            #p = Utils.compress_page_range (p)
            t.append (":%s" %(p))

        if entry.has_key ('other-note'):
            t.append ("; %s" %(entry ['other-note']))

        if entry.has_key('date'):
            date  = entry['date']

            t.append (", %s" % (date.year))

            if date.month:
                t.append (" %s" % (month_name [date.month - 1]))
            if date.day:
                t.append (" %s" %(date.day))
        

    # final dot.
    t.append (".")
    text = ''.join (t)

    # correct the number of dots...
    #text = compact_dot.sub ('.', text)
    output.write('  ')
    output.write (text)
    output.write ('\n')
Пример #9
0
    def apply_cb (self, widget):
        ''' Construct the new query and add it to the query tree '''
        
        page = self._w_notebook.get_current_page ()

        name = None
        
        # Expert search
        if page == 1:
            
            user_global = {
                's'   :      TextUI._split_req,
                'has' :      TextUI.has,
                'any_has'  : TextUI.any_has,
                'has_key'  : TextUI.has_key,
                'has_type' : TextUI.has_type,
                'before' :   TextUI.before,
                'after' :    TextUI.after,
                }

            search = self._w_expert.get_text().encode('latin-1')
            
            try:
                exec ('tester = ' + search, user_global)
            except:
                etype, value, tb = sys.exc_info ()
                traceback.print_exception (etype, value, tb)

                d = gtk.MessageDialog (self._w_search,
                                       gtk.DIALOG_DESTROY_WITH_PARENT | gtk.DIALOG_MODAL,
                                       gtk.MESSAGE_ERROR, gtk.BUTTONS_OK,
                                       _("internal error during evaluation"))
                d.run ()
                d.destroy ()
                return

            test = user_global ['tester']

        # Simple Search
        elif page == 0:
            field = self._w_field.get_active_text().lower()
            match = self._w_pattern.get_text()
            
            if match == '': return

            error = 0

            if field == ' - any field - ' or field == '':
                try:
                    test = Search.AnyTester (match.encode ('latin-1'))
                except re.error, err:
                    error = 1
                    
                name = 'any ~ ' + match

            elif field == ' - type - ':
                # get the type description
                the_type = Types.get_entry (string.lower (match), 0)

                if the_type is None:
                    err = ['No such Entry type']
                    error = 1
                else:
                    try:
                        test = Search.TypeTester (the_type)
                    except re.error, err:
                        error = 1
Пример #10
0
    def next(self):
        current = None
        data = ''

        table = {}

        # Skip whitespace
        while 1:
            line = self.file.readline()
            if line == '': return table

            line = string.rstrip(line)
            if line != '': break

        while 1:
            head = header.match(line)
            if head:
                if current:
                    if table.has_key(current):
                        table[current].append(data)
                    else:
                        table[current] = [data]

                current = string.strip(head.group(1))
                data = head.group(2)
            else:
                cont = contin.match(line)
                if cont:
                    data = data + ' ' + cont.group(1)

            line = self.file.readline()
            if line == '': break

            line = string.rstrip(line)
            if line == '': break

        # don't forget the last item
        if current:
            if table.has_key(current):
                table[current].append(data)
            else:
                table[current] = [data]

        # create the entry with the actual fields
        norm = {}
        type = Types.get_entry('article')

        if table.has_key('PMID'):
            norm['url'] = Fields.URL(medurl + table['PMID'][0])
            norm['medline-pmid'] = Fields.Text(table['PMID'][0])
            del table['PMID']

        if table.has_key('UI'):
            norm[one_to_one['UI']] = Fields.Text(table['UI'][0])
            del table['UI']

        if table.has_key('AU'):
            group = Fields.AuthorGroup()

            for au in table['AU']:
                # analyze the author by ourself.
                first, last, lineage = [], [], []

                for part in string.split(au, ' '):
                    if part.isupper():
                        # in upper-case, this is a first name
                        if len(last) > 0:
                            first.append(part)
                        else:
                            # if there is no last name, there can't be a first name
                            last.append(part)
                    else:
                        if len(first) > 0:
                            # there was a first name, this must be a lineage
                            lineage.append(part)
                        else:
                            last.append(part)

                if len(first) > 1:
                    print "medline: long first name found. skipping."
                    first = first[0:1]

                if len(first) > 0:
                    first = string.join(first[0], '. ') + '.'
                else:
                    first = None

                if len(last) > 0:
                    last = string.join(last, ' ')
                else:
                    last = None

                if len(lineage) > 0:
                    lineage = string.join(lineage, ' ')
                else:
                    lineage = None

                group.append(Fields.Author((None, first, last, lineage)))

            norm[one_to_one['AU']] = group
            del table['AU']

        if table.has_key('DP'):
            fields = string.split(table['DP'][0], ' ')
            norm[one_to_one['DP']] = Fields.Date(fields[0])
            del table['DP']

        # The simple fields...
        for f in table.keys():
            f_mapped = one_to_one.get(f, 'medline-%s' % (f.lower()))
            text_type = Types.get_field(f_mapped).type
            norm[f_mapped] = text_type(string.join(table[f], " ; "))

        return Base.Entry(None, type, norm)
Пример #11
0
    def next (self):

        lines = {}
        in_table = {}
        file_notes, file_time, file_version, file_format = ('','','','')

        while 1:
            line = self.file.readline()
            if line == '': return lines # what does that mean ??
            head = xheader.match(line)
            if not head :
                pass
            elif head.group(1) == 'Date:':
                file_time = time.strftime(
                    "%Y-%m-%d %H:%M", rfc822.parsedate(head.group(2)))
            elif head.group(1) == 'Notes:':
                file_notes = string.strip(head.group(2))
            elif head.group(1) == 'FN':
                file_format = head.group(2)
            elif head.group(1) == 'VR':
                file_version = head.group(2)
            elif len(head.group(1)) == 2 :
                break
            else :
                pass
            self.extraneous.append(line)

        self.isifileformat = self.isifileformat or "Isifile format %s(%s)" % (
            file_format, file_version)
        self.isifileinfo = self.isifileinfo or "ISI %s (%s) %s" %(
            file_time, file_notes, login_name)


        while 1:
            if line == 'ER':break
            if head :
                key = head.group(1)
                if key == 'ER': break
                val = head.group(3)
                if lines.has_key(key):
                    lines[key].append(val)
                else:
                    lines[key] = [val]
            else:
                cont = contin.match(line)
                if cont :
                    val = cont.group(1)
                    lines[key].append(val)
                else: break
            line = self.file.readline()
            if line == '': break # error situation
            head = header.match (line)


        key = 'PT'
        if lines.has_key(key):
            if string.strip(lines[key][0])[0] == 'J':
                del lines [key]
            else:
                print 'Warning: Unknown type of entry (%s) -- may need editing.' %(
                    lines[key])

        type = Types.get_entry ('article')


	for key in ( 'AU', 'ED'):
	    if lines.has_key(key):
		group = Fields.AuthorGroup()
		for item in lines[key]:
		    if string.strip(item) =='[Anon]' :
			auth = [item]
		    else:
			name, firstn = string.split (item, ',')
			auth = ["%s, " % name]
			for i in string.strip (firstn):
			    auth.append ("%s. " % i)
		    group.append (Fields.Author("".join(auth)))
		if key == 'AU':
		    in_table['author'] = group
		elif key == 'ED':
		    in_table['editor'] = group
		del lines[key]

        key, key1, key2 = 'PG', 'BP', 'EP'
        if lines.has_key(key1) and lines.has_key(key2):
            if len(lines[key1]) == len(lines[key2]):
                pages = []
                for i in range(len(lines[key1])):
                    firstpg = lines[key1] [i]
                    lastpg  = lines[key2] [i]
                    pages.append(('%s -- %s' % (firstpg, lastpg)))
                in_table['pages'] = Fields.Text (string.join(pages, '; '))
                del lines[key1]; del lines[key2]
            else: print 'inconsistent BP, EP fields found'

        if lines.has_key(key):
            in_table['size'] = Fields.Text ('%s p.' %(lines[key][0]))
            del lines[key]


        key = 'PY'
        if lines.has_key(key):
            val = lines[key][0]
            in_table['date'] = Fields.Date(val)
            del lines[key]

        key = 'ID'
        if lines. has_key(key):
            val = "[ISI:] %s ;;" %(string.lower(string.join(lines[key], ' ')))
            if lines.has_key('DE'):
                lines['DE'].append ('; ' + val)
            else :
                lines['DE'] = [val]
            del lines[key]

        # journal titles come in various forms

        if lines.has_key ('SO'):
            uc_title =  ' '.join(lines['SO'])
            in_table ['journal'] = Fields.Text (uc_title)
            if lines.has_key('JI'):
                uc_title = re.split(r"([- .,/]+)", uc_title)
                ca_title = re.split(r"[- .,/]+", ' '.join(lines['JI']))
                i , Title = 0, []
                for word in uc_title:
                    Word = string.capitalize(word)
                    if word == ca_title [i]:
                        Title.append (word)
                        i += 1
                    elif Word.startswith(ca_title[i]):
                        Title.append(Word)
                        i += 1
                    else:
                        Title.append(string.lower(word))
                del lines['JI']
                in_table['journal'] =  Fields.Text ("".join(Title))
            del lines['SO']



        for key in lines.keys():
            mapped_key, joiner = key_map.get(
                key, ('isifile-%s' %(key.lower()), ' ; '))
            text_type = Types.get_field (mapped_key).type
            in_table [mapped_key] = text_type (joiner.join(lines[key]))

        return Base.Entry ( None, type, in_table)
Пример #12
0
def write_source_field(output, entry, keys):
    t = []

    output.write('Source\n')

    if entry.type == Types.get_entry('incollection') or entry.has_key(
            'booktitle'):
        t = [str(entry.get('booktitle')).strip()]

        if entry.has_key('volume'):
            t.append(".  %s " % (entry['volume']))
        if entry.has_key('number'):
            t.append("(%s)" % (entry['number']))

        if entry.has_key('pages'):
            p = str(entry['pages'])
            #p = Utils.compress_page_range (p)
            t.append("PG. %s." % (p))

        if entry.has_key('date'):
            date = entry['date']
            t.append(" %s" % (date.year))
        if entry.has_key('beigabeevermerk'):
            t.append(str(entry.get('beigabevermerk')))
    else:

        t.append("%s. " % (entry['journal']))

        if entry.has_key('volume'):
            t.append("%s" % (entry['volume']))

        if entry.has_key('number'):
            t.append("(%s)" % (entry['number']))

        if entry.has_key('pages'):
            p = str(entry['pages'])
            #p = Utils.compress_page_range (p)
            t.append(":%s" % (p))

        if entry.has_key('other-note'):
            t.append("; %s" % (entry['other-note']))

        if entry.has_key('date'):
            date = entry['date']

            t.append(", %s" % (date.year))

            if date.month:
                t.append(" %s" % (month_name[date.month - 1]))
            if date.day:
                t.append(" %s" % (date.day))

    # final dot.
    t.append(".")
    text = ''.join(t)

    # correct the number of dots...
    #text = compact_dot.sub ('.', text)
    output.write('  ')
    output.write(text)
    output.write('\n')
Пример #13
0
    def next(self):
        dict = {}

        # read entry till next blank line
        text = []
        field = ''
        while 1:
            line = self.file.readline()
            if line == '': break
            line = string.rstrip(line)

            # starting with a blank ?
            if line == '' or line[0] == ' ':
                # ...then we continue the current text
                text.append(string.lstrip(line))
                continue

            # new entry ?
            if separator_re.match(line): break

            # else, this is a new field
            if field:
                # save the previous one if needed
                dict[field] = '\n'.join(text)
                text = []

            # store the name of this new field
            field = string.lower(line)

        # don't waste the last field content
        if field:
            dict[field] = '\n'.join(text)

        # did we parse a field ?
        if len(dict) == 0: return None

        # create the entry content
        entry = Base.Entry(type=self.deftype)

        for key in dict.keys():
            if not self.mapping.has_key(key):
                #print "warning: unused key `%s'" % key
                continue

            (name, type) = self.mapping[key]
            text_type = Types.get_field(name).type

            # parse a simple text field
            if type == SimpleField:
                entry[name] = text_type(string.strip(dict[key]))

            elif type == KeywordField:
                text = string.strip(dict[key])
                if entry.has_key(name):
                    text = str(entry[name]) + '\n  ' + text

                entry[name] = text_type(text)

            # parse an author field
            elif type == AuthorField:
                entry[name] = self.parse_author(dict[key])
                continue

            # parse a source field
            elif type == SourceField:
                dict_key = ' '.join(dict[key].split('\n'))
                m = self.source_re.match(dict_key.strip())
                if m:
                    year, month, day = None, None, None
                    j, v, s, n, p, o, y, d = m.group('journal', 'volume',
                                                     'inseries', 'number',
                                                     'pages', 'other', 'year',
                                                     'month')

                    if s:  ### article in a monograph series
                        entry['booktitle'] = Fields.Text(j)
                        if d:
                            entry['beigabevermerk'] = Fields.LongText(d)
                        entry.type = Types.get_entry('incollection')

                    elif j:
                        entry['journal'] = Fields.Text(j)
                        if d and not d.isspace():
                            dates = d.split()
                            try:
                                month = long_month[dates[0]]
                            except KeyError:
                                pass
##                                 import warnings
##                                 warnings.filterwarnings ('once',
##                                                          message='date',
##                                                          module='OvidLike')
##                                 warnings.warn (
##                                     'OVID: %s is not representable in date '
##                                     'field %s' %(dates[0], d), stacklevel=2)
                            if len(dates) > 1:
                                day = int(dates[1])

                    if v:
                        entry['volume'] = Fields.Text(v)

                    if n:
                        entry['number'] = Fields.Text(n)

                    if p:
                        entry['pages'] = Fields.Text(p)

                    if o:
                        entry['other-note'] = Fields.Text(o)

                    if y:
                        year = int(y)

                    entry['date'] = Fields.Date((year, month, day))
                else:
                    print '>>> Error: Source field  does not parse correctly:'
                    print dict_key
                    print entry
                continue

        return entry
Пример #14
0
    def next(self):
        data = ''
        fields = {}
        type = None
        label = None

        while 1:
            line = self.file.readline()
            if line == '' and not fields:
                return None

            line = string.strip(line)

            if line == '' and fields:

                # store the current field
                if type:
                    if type == "label":
                        label = string.join(string.split(data), ' ')
                    elif fields.has_key(type):
                        fields[type].append(
                            string.join(string.split(data), ' '))
                    else:
                        fields[type] = [string.join(string.split(data), ' ')]

                # determine the real type
                while 1:
                    if fields.has_key('journal'):
                        type = 'article'
                        break

                    if fields.has_key('booktitle'):
                        type = 'inbook'
                        break

                    if fields.has_key('volume') or fields.has_key('number'):
                        type = 'inproceedings'
                        break

                    if fields.has_key('publisher'):
                        type = 'book'
                        break

                    if fields.has_key('author') and fields.has_key('title'):
                        type = 'unpublished'
                        break

                    type = 'misc'
                    break

                entry = Types.get_entry(type)

                for f in fields.keys():
                    type = Types.get_field(f).type

                    if type == Fields.AuthorGroup:
                        group = Fields.AuthorGroup()

                        for auth in fields[f]:
                            group.append(Fields.Author(auth))

                        fields[f] = group

                    else:
                        if len(fields[f]) > 1:
                            sys.stderr.write(
                                "warning: field `%s' is defined more than once"
                                % f)
                            continue

                        fields[f] = type(fields[f][0])

                if label:
                    key = Key.Key(None, label)
                    return Base.Entry(key, entry, fields)
                else:
                    return Base.Entry(None, entry, fields)

            t = tag_re.match(line)
            # we matched a new field start
            if t:
                if type:
                    if type == "label":
                        label = string.join(string.split(data), ' ')
                    elif fields.has_key(type):
                        fields[type].append(
                            string.join(string.split(data), ' '))
                    else:
                        fields[type] = [string.join(string.split(data), ' ')]

                type = t.group(1)
                if not self.mapping.has_key(type):
                    print "warning: key `%s' has been skipped" % (type)
                    type = None
                    data = ''
                else:
                    # store the current field
                    type = self.mapping[type][0]
                    data = t.group(2)

                continue

        # in the general case, append the new text
            data = data + ' ' + line
Пример #15
0
    def next (self):
        current = None
        data    = ''
        
        table = {}

        # Skip whitespace
        while 1:
            line = self.file.readline ()
            if line == '': return table
            
            line = string.rstrip (line)
            if line != '': break

        while 1:
            head = header.match (line)
            if head:
                if current:
                    if table.has_key (current):
                        table [current].append (data)
                    else:
                        table [current] = [data]
                        
                current = string.strip (head.group (1))
                data    = head.group (2)
            else:
                cont = contin.match (line)
                if cont:
                    data = data + ' ' + cont.group (1)
        
            line = self.file.readline ()
            if line == '': break

            line = string.rstrip (line)
            if line == '': break

        # don't forget the last item
        if current:
            if table.has_key (current):
                table [current].append (data)
            else:
                table [current] = [data]

        # create the entry with the actual fields
        norm = {}
        type = Types.get_entry ('article')

        if table.has_key ('PMID'):
            norm ['url'] = Fields.URL (medurl + table ['PMID'] [0])
            norm ['medline-pmid'] = Fields.Text (table ['PMID'] [0])
            del table ['PMID']
    
        if table.has_key ('UI'):
            norm [one_to_one ['UI']] = Fields.Text (table ['UI'] [0])
            del table ['UI']

        if table.has_key ('AU'):
            group = Fields.AuthorGroup ()
            
            for au in table ['AU']:
                # analyze the author names
                first, last, lineage = [], [], []
                
                parts = string.split (au, ' ')

                # if the last part is not uppercase, it is a lineage ('Jr', '3rd', etc.)
                if not parts [-1] . isupper():
                    lineage.append (parts.pop ())
                else:
                    lineage = None

                # after removing lineage from list, last part should be the first initial(s)
                if len (parts) > 1:
                    first = parts.pop ()
                    first = string.join (first, '. ') + '.'
                else:
                    first = None

                # join remaining parts to form the last name. if it's one initial, give it a '.'
                for part in parts:
                    if len (part) == 1:
                        last.append (part + '.')
                    else:
                        last.append (part)

                if len (parts) > 0:
                    last = string.join(last, ' ')
                else:
                    last = None
                    
                group.append (Fields.Author ((None, first, last, lineage)))
                
            norm [one_to_one ['AU']] = group
            del table ['AU']

        if table.has_key ('DP'):
            fields = string.split (table ['DP'][0], ' ')
            norm [one_to_one ['DP']] = Fields.Date (fields [0])
            del table ['DP']
            
        # The simple fields...
        for f in table.keys ():
            f_mapped = one_to_one.get(f, 'medline-%s' %(f.lower()))
            text_type = Types.get_field(f_mapped).type
            norm [f_mapped] = text_type (string.join (table [f], " ; "))
        
        return Base.Entry (None, type, norm)