def sync_document(dc_slug, local_project_id, verbose=True): handler = dochandler(settings.DOCUMENT_CLOUD_LOGIN, settings.DOCUMENT_CLOUD_PASSWORD) doc_json = handler.get_doc_json(dc_slug)['document'] if (verbose): print "Retrieved Documentcloud json:\n %s" % (doc_json) local_project = None try: local_project = DocumentCollection.objects.get(project_id=local_project_id) except: raise LocalResourceNotExistError("The local project you're trying to add this document to does not exist. Perhaps it hasn't been created yet?") newdoc, created = Document.objects.get_or_create(collection=local_project, document_id=dc_slug) # will break if dc stops using gmt. What's up with %z ? created_at=datetime.strptime(doc_json['created_at'], "%a, %d %b %Y %H:%M:%S +0000") updated_at=datetime.strptime(doc_json['updated_at'], "%a, %d %b %Y %H:%M:%S +0000") newdoc.document_headline=doc_json['title'] newdoc.document_description=doc_json['description'] newdoc.source=doc_json['source'] newdoc.created_at=created_at newdoc.updated_at=updated_at newdoc.contributor=doc_json['contributor'] # it seems like related article isn't necessarily there ? try: newdoc.related_article_url=doc_json['related_article'] newdoc.save() except: newdoc.save()
def publish_local(local_project_id): handler = dochandler(settings.DOCUMENT_CLOUD_LOGIN, settings.DOCUMENT_CLOUD_PASSWORD) this_collection = DocumentCollection.objects.get(project_id=local_project_id) this_collection.public=True this_collection.save() related_documents=Document.objects.filter(collection=this_collection) for doc in related_documents: doc.public=True doc.save()
def publish_now(local_project_id, verbose): handler = dochandler(settings.DOCUMENT_CLOUD_LOGIN, settings.DOCUMENT_CLOUD_PASSWORD) this_collection = DocumentCollection.objects.get(project_id=local_project_id) this_collection.public=True this_collection.save() related_documents=Document.objects.filter(collection=this_collection) params = { 'access':'public', } for doc in related_documents: doc.public=True doc.save() results = handler.update_document(doc.document_id, params) if (verbose): print results
def push_project_url(local_project_id, verbose): handler = dochandler(settings.DOCUMENT_CLOUD_LOGIN, settings.DOCUMENT_CLOUD_PASSWORD) # will throw an error if the collection is missing. this_collection = DocumentCollection.objects.get(project_id=local_project_id) if (verbose): print "Got collection '%s'" % (this_collection) this_collection_link = this_collection.collection_backlink # get all local documents that are in the collection and have document cloud ids. If they don't have ids, we have no idea what's up with them. contained_doc_ids = Document.objects.filter(collection=this_collection).values('document_id') linkparams = { 'related_article':this_collection_link, } for dc_id in contained_doc_ids: theid = dc_id['document_id'] if (verbose): print "trying to update document %s " % (theid) results = handler.update_document(theid, linkparams) if (verbose): print results
def create_project(project_to_pull_id, verbose): handler = dochandler(settings.DOCUMENT_CLOUD_LOGIN, settings.DOCUMENT_CLOUD_PASSWORD) result = handler.get_project_json() project_json = "-1" for project in result['projects']: if project['id']==project_to_pull_id: project_json=project if (project_json=="-1"): raise DoesNotExistError("The resource you've requested does not exist or is unavailable without the proper credentials.") # raise Exception("Can't find project id %s" % (project_to_pull_id)) #print "got json: %s" % (project_json) newproj, created = DocumentCollection.objects.get_or_create(project_id=project_to_pull_id) # Regardless of whether the project already existed locally, update it with the dc version, except for the slug newproj.collection_headline = project_json['title'] newproj.collection_chatter =project_json['description'] # Don't change the slug on an existing project, since that will change the URL and break links (if there are any) if (created): newproj.collection_slug = slugify(project_json['title']) if (verbose): print "Project created with slug %s" % (slugify(project_json['title'])) else: if (verbose): print "Updating existing local document collection" newproj.save() # Next add all the documents for doc_id in project_json['document_ids']: if (verbose): print "Handling document %s" % (doc_id) sync_document(doc_id, newproj.project_id, verbose)
## Import django to run as a standalone import sys from optparse import OptionParser from django.core.management import setup_environ from helpers import django_location sys.path.append(django_location()) from myproject import settings setup_environ(settings) from myproject.documents.docwrangle import dochandler # assumes these are set in settings handler = dochandler(settings.DOCUMENT_CLOUD_LOGIN, settings.DOCUMENT_CLOUD_PASSWORD) result = handler.get_project_json() for project in result['projects']: print "Found '%s' with id: %s" % (project['title'], project['id'])