示例#1
0
    def index(self, pkg):
        '''index a file'''
        parser = Parser()
        data = parser.parse(pkg["fileobj"], pkg["format"])

        # prepare the data as required
        data, pkg = self.prepare(data,pkg)
        
        # if allowed to index, then index (match source or email)
        if self.can_index(pkg):
            # delete any old versions
            # should change this to do checks first, and save new ones, perhaps
            try:
                if "collection" in pkg:
                    bibserver.dao.Record.delete_by_query("collection.exact:" + pkg["collection"])
                if "source" in pkg:
                    res = bibserver.dao.Record.query(q='source:"' + pkg["source"] + '" AND type:"collection"')
                    if res["hits"]["total"] != 0:
                        coll = res["hits"]["hits"][0]["_source"]["collection"]
                    else:
                        coll = ""
                    if coll != pkg.get("collection",None):
                        bibserver.dao.Record.delete_by_query("collection.exact:" + coll)
            except:
                pass
            # send the data list for bulk upsert
            return bibserver.dao.Record.bulk_upsert(data)
        else:
            return "DUPLICATE"
示例#2
0
    def upload(self, fileobj, format_, collection=None):
        '''Import a collection into the database.
       
        :param fileobj: a fileobj pointing to file from which to import
        collection records (and possibly collection metadata)
        :param format_: format of the fileobj (e.g. bibtex)
        :param collection: collection dict for use when creating collection. If
        undefined collection must be extractable from the fileobj.

        :return: same as `index` method.
        '''
        parser = Parser()
        record_dicts, metadata = parser.parse(fileobj, format=format_)

        #collection_from_parser = None
        #if collection_from_parser:
        #    collection = collection_from_parser
        # TODO: check authz for write to this collection

        # if metadata provided from file, roll it into the collection object
        if metadata:
            metadata.update(collection)
            collection = metadata
        
        return self.index(collection, record_dicts)
示例#3
0
文件: web.py 项目: sea36/bibserver
def parse():
    # TODO: acceptable formats should be derived by some sort of introspection 
    # from the parser.py based on what parsers are actually available.
    if 'format' not in request.values or 'source' not in request.values:
        if 'format' not in request.values and 'source' not in request.values:
            resp = make_response( '{"error": "Parser cannot run without source URL parameter and source format parameter", "acceptable_formats": ["bibtex","json","csv"]}' )
        elif 'format' not in request.values:
            resp = make_response( '{"error": "Parser cannot run without source format parameter", "acceptable_formats": ["bibtex","json","csv"]}' )
        elif 'source' not in request.values:
            resp = make_response( '{"error": "Parser cannot run without source URL parameter"}')
        resp.mimetype = "application/json"
        return resp

    format = request.values.get("format").strip('"')
    source = request.values.get("source").strip('"')

    try:
        if not source.startswith('http://') and not source.startswith('https://'):
            source = 'http://' + source
        source = urllib2.unquote(source)
        fileobj = urllib2.urlopen(source)
    except:
        resp = make_response( '{"error": "Retrieval of file from source ' + source + ' failed"}' )
        resp.mimetype = "application/json"
        return resp

    parser = Parser()
    newcoll = {}
    newcoll['records'], newcoll['metadata'] = parser.parse(fileobj, format=format)
    newcoll['metadata']['source'] = source
    timestamp = datetime.now().isoformat()
    newcoll['metadata']['created'] = timestamp
    if request.values.get('collection',None):
        collection = request.values['collection'].strip('"')
        newcoll['metadata']['label'] = collection
        newcoll['metadata']['id'] = util.slugify(collection)
        for record in newcoll['records']:
            record['collection'] = newcoll['metadata']['id']
    resp = make_response( json.dumps(newcoll, sort_keys=True, indent=4) )
    resp.mimetype = "application/json"
    return resp