def index(ticket): ticket['state'] = 'populating_index' ticket.save() # Make sure the parsed content is in the cache download_cache_directory = config['download_cache_directory'] in_path = os.path.join(download_cache_directory, ticket['data_json']) if not os.path.exists(in_path): ticket.fail('Parsed content for %s not found' % in_path) return data = open(in_path).read() if len(data) < 1: raise Exception('The parsed data in this ticket is empty.' ) # TODO check for metadata section to update collection from this? owner = bibserver.dao.Account.get(ticket['owner']) importer = Importer(owner=owner) collection = { 'label': ticket['collection'], 'collection': util.slugify(ticket['collection']), 'description': ticket.get('description'), 'source': ticket['source_url'], 'format': ticket['format'], 'license': ticket.get('license', u"Not specified"), } collection, records = importer.upload(open(in_path), collection) ticket['state'] = 'done' ticket.save()
def index(ticket): ticket['state'] = 'populating_index' ticket.save() # Make sure the parsed content is in the cache download_cache_directory = config['download_cache_directory'] in_path = os.path.join(download_cache_directory, ticket['data_json']) if not os.path.exists(in_path): ticket.fail('Parsed content for %s not found' % in_path) return data = open(in_path).read() if len(data) < 1: raise Exception('The parsed data in this ticket is empty.') # TODO check for metadata section to update collection from this? owner = bibserver.dao.Account.get(ticket['owner']) importer = Importer(owner=owner) collection = { 'label': ticket['collection'], 'collection': util.slugify(ticket['collection']), 'description': ticket.get('description'), 'source': ticket['source_url'], 'format': ticket['format'], 'license': ticket.get('license', u"Not specified"), } collection, records = importer.upload(open(in_path), collection) ticket['state'] = 'done' ticket.save()
def index(self, collection_dict, record_dicts, metadata): '''Add this collection and its records to the database index. :return: (collection, records) tuple of collection and associated record objects. ''' collection = bibserver.dao.Collection(**collection_dict) if metadata: for key,val in metadata.iteritems(): collection[key] = val timestamp = datetime.now().isoformat() collection['created'] = timestamp assert 'label' in collection, 'Collection must have a label' if not 'id' in collection: collection['id'] = util.slugify(collection['label']) collection['owner'] = self.owner.id delid = collection['id'] for coll in self.owner.collections: if 'source' in coll and 'source' in collection: if coll['source'] == collection['source']: if coll['id'] != collection['id']: delid = coll['id'] bibserver.dao.Collection.delete_by_query('id:' + coll['id']) break else: collection = coll break if coll['id'] == collection['id']: collection = coll break bibserver.dao.Record.delete_by_query('collection'+config["facet_field"]+':"' + delid + '"') collection['records'] = len(record_dicts) collection['modified'] = timestamp collection.save() for rec in record_dicts: if 'collection' in rec: if collection["id"] not in rec["collection"]: rec['collection'].append(collection["id"]) else: rec['collection'] = [collection["id"]] if not self.requesturl and 'SITE_URL' in config: self.requesturl = str(config['SITE_URL']) if self.requesturl: if not self.requesturl.endswith('/'): self.requesturl += '/' rec['url'] = self.requesturl + 'record/' if 'citekey' in rec: rec['url'] += collection['id'] + '/' + rec.get('citekey') elif 'id' in rec: rec['url'] += rec['id'] else: rec['id'] = uuid.uuid4().hex rec['url'] += rec['id'] records = bibserver.dao.Record.bulk_upsert(record_dicts) return collection, records
def index(self, collection_dict, record_dicts): '''Add this collection and its records to the database index. :return: (collection, records) tuple of collection and associated record objects. ''' collection = bibserver.dao.Collection(**collection_dict) assert 'label' in collection, 'Collection must have a label' if not 'collection' in collection: collection['collection'] = util.slugify(collection['label']) collection['owner'] = self.owner.id delid = collection['collection'] for coll in self.owner.collections: if 'source' in coll and 'source' in collection: if coll['source'] == collection['source']: if coll['collection'] != collection['collection']: delid = coll['collection'] bibserver.dao.Collection.delete_by_query('collection:"' + coll['collection'] + '" AND owner:"' + collection['owner'] + '"') break else: collection = coll break if coll['collection'] == collection['collection']: collection = coll break bibserver.dao.Record.delete_by_query('collection'+config["facet_field"]+':"' + delid + '" AND owner'+config["facet_field"]+':"' + collection['owner'] + '"') collection.save() for rec in record_dicts: rec['owner'] = collection['owner'] if 'collection' in rec: if collection['collection'] != rec['collection']: rec['collection'] = collection['collection'] else: rec['collection'] = collection['collection'] if not self.requesturl and 'SITE_URL' in config: self.requesturl = str(config['SITE_URL']) if self.requesturl: if not self.requesturl.endswith('/'): self.requesturl += '/' rec['url'] = self.requesturl + collection['owner'] + '/' + collection['collection'] + '/' if 'cid' in rec: rec['url'] += rec['cid'] elif 'id' in rec: rec['url'] += rec['id'] else: rec['id'] = uuid.uuid4().hex rec['url'] += rec['id'] records = bibserver.dao.Record.bulk_upsert(record_dicts) return collection, records
def test_02_collection(self): label = u'My Collection' slug = util.slugify(label) colldict = { 'label': label, 'slug': slug, 'owner': Fixtures.account.id } coll = dao.Collection.upsert(colldict) assert coll.id, coll assert coll['label'] == label # should only be one collection for this account so this is ok account_colls = Fixtures.account.collections assert coll.id == account_colls[0].id, account_colls
def parse(): # TODO: acceptable formats should be derived by some sort of introspection # from the parser.py based on what parsers are actually available. if 'format' not in request.values or 'source' not in request.values: if 'format' not in request.values and 'source' not in request.values: resp = make_response( '{"error": "Parser cannot run without source URL parameter and source format parameter", "acceptable_formats": ["bibtex","json","csv"]}' ) elif 'format' not in request.values: resp = make_response( '{"error": "Parser cannot run without source format parameter", "acceptable_formats": ["bibtex","json","csv"]}' ) elif 'source' not in request.values: resp = make_response( '{"error": "Parser cannot run without source URL parameter"}') resp.mimetype = "application/json" return resp format = request.values.get("format").strip('"') source = request.values.get("source").strip('"') try: if not source.startswith('http://') and not source.startswith('https://'): source = 'http://' + source source = urllib2.unquote(source) fileobj = urllib2.urlopen(source) except: resp = make_response( '{"error": "Retrieval of file from source ' + source + ' failed"}' ) resp.mimetype = "application/json" return resp parser = Parser() newcoll = {} newcoll['records'], newcoll['metadata'] = parser.parse(fileobj, format=format) newcoll['metadata']['source'] = source timestamp = datetime.now().isoformat() newcoll['metadata']['created'] = timestamp if request.values.get('collection',None): collection = request.values['collection'].strip('"') newcoll['metadata']['label'] = collection newcoll['metadata']['id'] = util.slugify(collection) for record in newcoll['records']: record['collection'] = newcoll['metadata']['id'] resp = make_response( json.dumps(newcoll, sort_keys=True, indent=4) ) resp.mimetype = "application/json" return resp
def index(self, collection_dict, record_dicts): """Add this collection and its records to the database index. :return: (collection, records) tuple of collection and associated record objects. """ col_label_slug = util.slugify(collection_dict["label"]) collection = bibserver.dao.Collection.get_by_owner_coll(self.owner.id, col_label_slug) if not collection: collection = bibserver.dao.Collection(**collection_dict) assert "label" in collection, "Collection must have a label" if not "collection" in collection: collection["collection"] = col_label_slug collection["owner"] = self.owner.id collection.save() for rec in record_dicts: if not type(rec) is dict: continue rec["owner"] = collection["owner"] if "collection" in rec: if collection["collection"] != rec["collection"]: rec["collection"] = collection["collection"] else: rec["collection"] = collection["collection"] if not self.requesturl and "SITE_URL" in config: self.requesturl = str(config["SITE_URL"]) if self.requesturl: if not self.requesturl.endswith("/"): self.requesturl += "/" if "_id" not in rec: rec["_id"] = bibserver.dao.make_id(rec) rec["url"] = self.requesturl + collection["owner"] + "/" + collection["collection"] + "/" if "id" in rec: rec["url"] += rec["id"] elif "_id" in rec: rec["url"] += rec["_id"] bibserver.dao.Record.bulk_upsert(record_dicts) return collection, record_dicts
def index(self, collection_dict, record_dicts): '''Add this collection and its records to the database index. :return: (collection, records) tuple of collection and associated record objects. ''' col_label_slug = util.slugify(collection_dict['label']) collection = bibserver.dao.Collection.get_by_owner_coll(self.owner.id, col_label_slug) if not collection: collection = bibserver.dao.Collection(**collection_dict) assert 'label' in collection, 'Collection must have a label' if not 'collection' in collection: collection['collection'] = col_label_slug collection['owner'] = self.owner.id collection.save() for rec in record_dicts: if not type(rec) is dict: continue rec['owner'] = collection['owner'] if 'collection' in rec: if collection['collection'] != rec['collection']: rec['collection'] = collection['collection'] else: rec['collection'] = collection['collection'] if not self.requesturl and 'SITE_URL' in config: self.requesturl = str(config['SITE_URL']) if self.requesturl: if not self.requesturl.endswith('/'): self.requesturl += '/' if '_id' not in rec: rec['_id'] = bibserver.dao.make_id(rec) rec['url'] = self.requesturl + collection['owner'] + '/' + collection['collection'] + '/' if 'id' in rec: rec['url'] += rec['id'] elif '_id' in rec: rec['url'] += rec['_id'] bibserver.dao.Record.bulk_upsert(record_dicts) return collection, record_dicts