def setup(): """ Make sure we're working with a fresh database. Build a client for interacting with that database and save it as a global. """ global ROOT_CLIENT, PROJECT, USERNAME, RELOGIN_CLIENT user_info_str = subprocess.check_output('tellme lumi-test', shell=True) user_info = eval(user_info_str) USERNAME = user_info['username'] ROOT_CLIENT = LuminosoClient.connect(ROOT_URL, username=USERNAME, password=user_info['password']) RELOGIN_CLIENT = LuminosoClient.connect(ROOT_URL, username=USERNAME, password=user_info['password'], auto_login=True) # check to see if the project exists; also create the client we'll use projects = ROOT_CLIENT.get(USERNAME + '/projects') projlist = [proj['name'] for proj in projects] PROJECT = ROOT_CLIENT.change_path(USERNAME + '/projects/' + PROJECT_NAME) if PROJECT_NAME in projlist: logger.warn('The test database existed already. ' 'We have to clean it up.') ROOT_CLIENT.delete(USERNAME + '/projects', project=PROJECT_NAME) # create the project logger.info("Creating project: " + PROJECT_NAME) logger.info("Existing projects: %r" % projlist) ROOT_CLIENT.post(USERNAME + '/projects', project=PROJECT_NAME) PROJECT.get()
def post_set_topics(): app.logger.info('Set the topics') # see API documentation: https://api.luminoso.com/v4/ # use account # and username provisioned client = LuminosoClient.connect(request.args.get('luminoso_account'), username=request.args.get('luminoso_user_name'), password=request.args.get('luminoso_password')) # Get project id from project name: project = client.get(name=request.args.get('project_name'))[0] project = client.change_path(project['project_id']) # delete and then recreate topics topics = project.get('topics/') for topic in topics: project.delete('topics/id/'+topic['_id']) new_topic = project.post('topics/', name="Oceans", text="ocean marine") new_topic = project.post('topics/', name="Whales", text="whales") new_topic = project.post('topics/', name="Seals", text="seals") new_topic = project.post('topics/', name="Japan", text="Japan") new_topic = project.post('topics/', name="Overfishing", text="fishing overfishing") new_topic = project.post('topics/', name="Forests", text="forests") new_topic = project.post('topics/', name="Climate & Energy", text="climate fuel pollution") new_topic = project.post('topics/', name="Polar", text="polar ice") topics = project.get('topics/') return 'true'
def post_set_topics(): app.logger.info('Set the topics') # see API documentation: https://api.luminoso.com/v4/ # use account # and username provisioned client = LuminosoClient.connect( request.args.get('luminoso_account'), username=request.args.get('luminoso_user_name'), password=request.args.get('luminoso_password')) # Get project id from project name: project = client.get(name=request.args.get('project_name'))[0] project = client.change_path(project['project_id']) # delete and then recreate topics topics = project.get('topics/') for topic in topics: project.delete('topics/id/' + topic['_id']) new_topic = project.post('topics/', name="Oceans", text="ocean marine") new_topic = project.post('topics/', name="Whales", text="whales") new_topic = project.post('topics/', name="Seals", text="seals") new_topic = project.post('topics/', name="Japan", text="Japan") new_topic = project.post('topics/', name="Overfishing", text="fishing overfishing") new_topic = project.post('topics/', name="Forests", text="forests") new_topic = project.post('topics/', name="Climate & Energy", text="climate fuel pollution") new_topic = project.post('topics/', name="Polar", text="polar ice") topics = project.get('topics/') return 'true'
def push(request): correlations = {} # dictionary of correlations # handle lack of request body if len(request.body) == 0: return HttpResponseRedirect("/") json_data = json.loads(request.body) # load text from body of request try: # connect to Luminoso and our english corpus project_name = "GP english sample" client = LuminosoClient.connect('/projects/u64t648d/', username='******', password=pw) project = client.get(name=project_name)[0] project = client.change_path(project['project_id']) # get correlations to each topic result = project.put('topics/text_correlation', text=json_data['text']) topics = project.get('topics/') for key, value in result.iteritems(): correlations.update({(t['name'], value) for t in topics if t['_id']==key}) except KeyError: HttpResponseServerError('Malformed data') return HttpResponse(json.dumps(correlations))
def get_luminoso(): app.logger.info('Get from luminoso') client = LuminosoClient.connect(request.args.get('luminoso_account'), username=request.args.get('luminoso_user_name'), password=request.args.get('luminoso_password')) project_info_list = client.get() return json.dumps(project_info_list)
def get_luminosodelete(): app.logger.info('Delete a project on Luminoso') client = LuminosoClient.connect(request.args.get('luminoso_account'), username=request.args.get('luminoso_user_name'), password=request.args.get('luminoso_password')) # Delete new project by POSTing its name client.delete(request.args.get('project_id')) return "Project Deleted "
def get_luminosodelete(): app.logger.info('Delete a project on Luminoso') client = LuminosoClient.connect( request.args.get('luminoso_account'), username=request.args.get('luminoso_user_name'), password=request.args.get('luminoso_password')) # Delete new project by POSTing its name client.delete(request.args.get('project_id')) return "Project Deleted "
def get_luminoso(): app.logger.info('Get from luminoso') client = LuminosoClient.connect( request.args.get('luminoso_account'), username=request.args.get('luminoso_user_name'), password=request.args.get('luminoso_password')) project_info_list = client.get() return json.dumps(project_info_list)
def get_luminosocreate(): app.logger.info('Create a project on Luminoso') client = LuminosoClient.connect(request.args.get('luminoso_account'), username=request.args.get('luminoso_user_name'), password=request.args.get('luminoso_password')) # Create a new project by POSTing its name project_id = client.post(name=request.args.get('project_name'))['project_id'] # use that project from here on project = client.change_path(project_id) return 'true'
def upload_stream(stream, server, account, projname, reader_dict, username=None, password=None, append=False, stage=False): """ Given a file-like object containing a JSON stream, upload it to Luminoso with the given account name and project name. """ client = LuminosoClient.connect(server, username=username, password=password) if not append: # If we're not appending to an existing project, create new project. info = client.post('/projects/' + account, name=projname) project_id = info['project_id'] else: projects = client.get('/projects/' + account, name=projname) if len(projects) == 0: print 'No such project exists!' return if len(projects) > 1: print 'Warning: Multiple projects with name "%s". ' % projname, project_id = projects[0]['project_id'] print 'Using existing project with id %s.' % project_id project = client.change_path('/projects/' + account + '/' + project_id) if stage: url = 'docs' else: url = 'docs/preload' counter = 0 final_job_id = None for batch in batches(stream, 1000): counter += 1 documents = list(batch) job_id = project.upload(url, documents, readers=reader_dict) print 'Uploaded batch #%d' % (counter) final_job_id = job_id if not stage: # Calculate the docs into the assoc space. print 'Calculating.' final_job_id = project.post('docs/recalculate', readers=reader_dict) if final_job_id is not None: project.wait_for(final_job_id)
def get_luminosocreate(): app.logger.info('Create a project on Luminoso') client = LuminosoClient.connect( request.args.get('luminoso_account'), username=request.args.get('luminoso_user_name'), password=request.args.get('luminoso_password')) # Create a new project by POSTing its name project_id = client.post( name=request.args.get('project_name'))['project_id'] # use that project from here on project = client.change_path(project_id) return 'true'
def get_update_docs(): app.logger.info('Upload documents Luminoso') client = LuminosoClient.connect(request.args.get('luminoso_account'), username=request.args.get('luminoso_user_name'), password=request.args.get('luminoso_password')) # Create a new project by POSTing its name project = client.get(name=request.args.get('project_name'))[0] project = client.change_path(project['project_id']) try: json_object = json.loads(request.args.get('project_doc')) except ValueError, e: return False
def get_update_docs(): app.logger.info('Upload documents Luminoso') client = LuminosoClient.connect( request.args.get('luminoso_account'), username=request.args.get('luminoso_user_name'), password=request.args.get('luminoso_password')) # Create a new project by POSTing its name project = client.get(name=request.args.get('project_name'))[0] project = client.change_path(project['project_id']) try: json_object = json.loads(request.args.get('project_doc')) except ValueError, e: return False
def upload_stream(stream, server, account, projname, language=None, username=None, password=None, append=False, stage=False): """ Given a file-like object containing a JSON stream, upload it to Luminoso with the given account name and project name. """ client = LuminosoClient.connect(server, username=username, password=password) if not append: # If we're not appending to an existing project, create new project. info = client.post('/projects/' + account, name=projname) project_id = info['project_id'] print('New project ID:', project_id) else: projects = client.get('/projects/' + account, name=projname) if len(projects) == 0: print('No such project exists!') return if len(projects) > 1: print('Warning: Multiple projects with name "%s". ' % projname, end='') project_id = projects[0]['project_id'] print('Using existing project with id %s.' % project_id) project = client.change_path('/projects/' + account + '/' + project_id) counter = 0 for batch in batches(stream, 1000): counter += 1 documents = list(batch) project.upload('docs', documents) print('Uploaded batch #%d' % (counter)) if not stage: # Calculate the docs into the assoc space. print('Calculating.') kwargs = {} if language is not None: kwargs = {'language': language} job_id = project.post('docs/recalculate', **kwargs) project.wait_for(job_id)
def get_correlation(): app.logger.info('Get Correlation') client = LuminosoClient.connect(request.args.get('luminoso_account'), username=request.args.get('luminoso_user_name'), password=request.args.get('luminoso_password')) project = client.get(name=request.args.get('project_name'))[0] project = client.change_path(project['project_id']) # get list of topics topics = project.get('topics/') result = project.put('topics/text_correlation', text=request.args.get('article_text')) correlations = [] for key, value in result.iteritems(): correlations.append([(t['name'], value) for t in topics if t['_id']==key]) return json.dumps(correlations)
def upload_stream(stream, server, account, projname, reader_dict): """ Given a file-like object containing a JSON stream, upload it to Luminoso with the given account name and project name. """ client = LuminosoClient.connect(server) try: client.post(account + '/projects/', project=projname) except LuminosoAPIError as e: pass project = client.change_path(account + '/projects/' + projname) counter = 0 for batch in batches(stream, 100): counter += 1 documents = list(batch) job_id = project.upload('docs', documents, width=4, readers=reader_dict) print 'Uploaded batch #%d into job %s' % (counter, job_id) print 'Committing.' final_job_id = project.post('docs/calculate', width=4) project.wait_for(final_job_id)
def get_correlation(): app.logger.info('Get Correlation') client = LuminosoClient.connect( request.args.get('luminoso_account'), username=request.args.get('luminoso_user_name'), password=request.args.get('luminoso_password')) project = client.get(name=request.args.get('project_name'))[0] project = client.change_path(project['project_id']) # get list of topics topics = project.get('topics/') result = project.put('topics/text_correlation', text=request.args.get('article_text')) correlations = [] for key, value in result.iteritems(): correlations.append([(t['name'], value) for t in topics if t['_id'] == key]) return json.dumps(correlations)
if soup.find(id="postingbody"): post = soup.find(id="postingbody").get_text().replace("\n",'').replace("\t",'') document.update({'title' : headline}) document.update({'text' : post}) document.update({'date' : time.time()}) document.update({'source' : {'url' : site}}) document.update({'queries' : [locations[i]]}) documents.append(document) # add the document to the documents list except Exception, err: print "Error:", sys.exc_info()[0] except Exception, err: print "Error:", sys.exc_info()[0] print("Total length of documents: %s" % len(documents)) client = LuminosoClient.connect('/projects/e26y767s/', username='******') print("_________________Creating new project: %s" % project_name) project_info = client.post(name=project_name) project = client.change_path(project_info['project_id']) project.upload('docs', documents) print("_________________Documents uploaded") sys.exit() # exit here job_id = project.post('docs/recalculate') print job_id status = project.get('/jobs/id/'+str(job_id)+'/') print status project.wait_for(job_id) print("_________________Calculations complete")
def connect_client(project_name): # client connection client = LuminosoClient.connect('/projects/t55y685c/', username='******', password=pw) project = client.get(name=project_name)[0] project = client.change_path(project['project_id']) return project