예제 #1
0
def main():
    print "Content-type: text/plain; charset=UTF-8\n\n"

    sqlCmd = """select jobId, apiName, gcsLoc, globalId, batchId from speechJobs
		where beenProcessed = %s
		and respExported = %s
		and jobStatus is %s
		order by queueTimestamp asc limit 2"""
    sqlData = [1, 0, None]
    queryResp = utilities.dbExecution(sqlCmd, sqlData)

    for eachEntry in queryResp[2]:
        jobId = eachEntry[0]
        ##print jobId
        apiName = eachEntry[1]
        gcsLoc = eachEntry[2]
        globalId = eachEntry[3]
        batchId = eachEntry[4]

        if jobId:
            print "global Id: " + str(globalId)
            print "... job " + str(jobId)
            print runCycle(jobId, apiName, gcsLoc, globalId, batchId)

            sqlCmd = """select count(*) from speechJobs
				where respExported = %s
				and jobStatus is %s
				and globalId = %s"""
            sqlData = [0, None, globalId]
            queryResp = utilities.dbExecution(sqlCmd, sqlData)

            print "... " + str(queryResp[2][0][0]) + " jobs still in the queue"
            if queryResp[2][0][0] == 0:
                print nextAction(globalId)
            print ""
예제 #2
0
def runCycle(gcsLoc, jobId, globalId):
	credentialsJson = "__Credential_JSON_File_Name__"

	scopesList = ["https://www.googleapis.com/auth/cloud-platform"]
	credentialsObj = ServiceAccountCredentials.from_json_keyfile_name(
		credentialsJson,
		scopes = scopesList
	)

	payloadObj = {
		"audio": {
			"uri": gcsLoc
		},
		"config": {
			"languageCode": "en-US",
			"encoding": "FLAC",
			"sampleRateHertz": 16000,
			"enableWordTimeOffsets": True,
			"enableAutomaticPunctuation": True,
			"useEnhanced": True,
			"model": "video",
			"metadata": {
				"interaction_type": "DISCUSSION",
				"recording_device_type": "OTHER_INDOOR_DEVICE",
				"originalMediaType": "VIDEO"
			},
			"speechContexts": {
				"phrases": [
					"Louisville", "Weldona", "signage", "PROSTAC"
				]
			}
		}
	}

	try:
		httpObj = credentialsObj.authorize(httplib2.Http())
		serviceObj = build(
			serviceName = "speech",
			version = "v1p1beta1",
			http = httpObj,
			developerKey = "__Google_Speech_API_Key__"
		)
		responseObj = serviceObj.speech().longrunningrecognize(body = payloadObj).execute()

		print "job " + str(jobId) + " for global id " + str(globalId)

		apiName = responseObj["name"]
		print "request name " + str(apiName)

		sqlCmd = """update speechJobs set apiName = %s, beenProcessed = %s where jobId = %s"""
		sqlData = [apiName, 1, jobId]
		queryResp = utilities.dbExecution(sqlCmd, sqlData)
	except Exception as e:
		sqlCmd = """update speechJobs set jobStatus = %s, beenProcessed = %s where jobId = %s"""
		sqlData = ["longrunning api call failed", 1, jobId]
		queryResp = utilities.dbExecution(sqlCmd, sqlData)
		print "longrunning api call failed"
def markTranscript(globalId, prodTranscript, batchId):
    prodTranscript = str(batchId) + "-" + prodTranscript
    sqlCmd = """update meetingRegistry set beenTranscribed = %s, prodTranscript = %s where globalId = %s"""
    sqlData = [1, prodTranscript, globalId]
    resultObj = utilities.dbExecution(sqlCmd, sqlData)

    return resultObj
def runCycle(globalId, orgIdentifier, prodTranscode, batchId):
    clientObj = storage.Client()
    bucketObj = clientObj.get_bucket(bucketName)
    listObj = bucketObj.list_blobs(prefix="accounts/" + orgIdentifier +
                                   "/enrichments/" + str(globalId) +
                                   "/transcodes/" + prodTranscode)
    fileCnt = 0
    for eachEntry in listObj:

        if ".flac" in eachEntry.name:
            gcsLoc = "gs://" + eachEntry.bucket.name + "/" + eachEntry.name
            sqlCmd = """insert into speechJobs (globalId, orgIdentifier, gcsLoc, beenProcessed, batchId) values (%s, %s, %s, %s, %s)"""
            sqlData = [globalId, orgIdentifier, gcsLoc, 0, batchId]
            utilities.dbExecution(sqlCmd, sqlData)
            fileCnt += 1

    return fileCnt
def assignUrl(globalId, wcUrl):
    sqlCmd = """update meetingRegistry
		set wordCloud = %s
		where globalId = %s"""
    sqlData = (wcUrl, globalId)
    resultList = utilities.dbExecution(sqlCmd, sqlData)

    return resultList
예제 #6
0
def assignUrl(globalId, transcriptUrl):
    sqlCmd = """update meetingRegistry
		set publishedTranscript = %s
		where globalId = %s"""
    sqlData = (transcriptUrl, globalId)
    resultList = utilities.dbExecution(sqlCmd, sqlData)

    return resultList
예제 #7
0
    def get(self):
        self.response.headers["Content-Type"] = "application/json"
        self.response.headers.add_header(
            "Cache-Control", "no-cache, no-store, must-revalidate, max-age=0")
        self.response.headers.add_header("Expires", "0")

        try:
            globalId = self.request.get("gId")
            sqlData = (globalId)
            sqlCmd = "select videoName, beenTranscribed, beenTranscoded, videoDownloaded, videoLink, orgIdentifier, prodTranscript, meetingDate, meetingDesc, beenIndexed, youtubeId, meetingId, prodTranscode, urlIdentifier from meetingRegistry where globalId = %s"
            resultList = utilities.dbExecution(sqlCmd, sqlData)
            videoName = resultList[2][0][0]
            beenTranscribed = resultList[2][0][1]
            beenTranscoded = resultList[2][0][2]
            videoDownloaded = resultList[2][0][3]
            videoLink = resultList[2][0][4]
            orgIdentifier = resultList[2][0][5]
            prodTranscript = resultList[2][0][6]
            meetingDate = resultList[2][0][7]
            meetingDesc = resultList[2][0][8]
            beenIndexed = resultList[2][0][9]
            youtubeId = resultList[2][0][10]
            meetingId = resultList[2][0][11]
            prodTranscode = resultList[2][0][12]
            urlIdentifier = resultList[2][0][13]
        except:
            videoName = None
            beenTranscribed = None
            beenTranscoded = None
            videoDownloaded = None
            videoLink = None
            orgIdentifier = None
            prodTranscript = None
            meetingDate = None
            meetingDesc = None
            beenIndexed = None
            youtubeId = None
            meetingId = None
            prodTranscode = None
            urlIdentifier = None

        resultObj = {}
        resultObj["videoName"] = videoName
        resultObj["beenTranscribed"] = beenTranscribed
        resultObj["beenTranscoded"] = beenTranscoded
        resultObj["videoDownloaded"] = videoDownloaded
        resultObj["videoLink"] = videoLink
        resultObj["orgIdentifier"] = orgIdentifier
        resultObj["prodTranscript"] = prodTranscript
        resultObj["meetingDate"] = meetingDate
        resultObj["meetingDesc"] = meetingDesc
        resultObj["beenIndexed"] = beenIndexed
        resultObj["youtubeId"] = youtubeId
        resultObj["meetingId"] = meetingId
        resultObj["prodTranscode"] = prodTranscode
        resultObj["urlIdentifier"] = urlIdentifier

        self.response.out.write(ujson.dumps(resultObj))
예제 #8
0
def meetingCount(orgIdentifier):
    sqlCmd = """select
		count(*) from meetingRegistry
		where orgIdentifier = %s
		and youtubeId is not NULL"""
    sqlData = (orgIdentifier)
    resultList = utilities.dbExecution(sqlCmd, sqlData)

    return resultList[2][0][0]
예제 #9
0
def lookupFiles(urlIdentifier):
	sqlCmd = """select
		segmentJson from videoSegments
		where urlIdentifier = %s"""
	sqlData = (urlIdentifier)
	resultList = utilities.dbExecution(sqlCmd, sqlData)

	segmentJson = resultList[2][0][0]

	return segmentJson
def lookupFiles(urlIdentifier):
    sqlCmd = """select
		globalId from meetingRegistry
		where urlIdentifier = %s"""
    sqlData = (urlIdentifier)
    resultList = utilities.dbExecution(sqlCmd, sqlData)

    globalId = resultList[2][0][0]
    globalId = str(globalId)

    sqlCmd = """select
		fileId,
		fileName,
		mimeType,
		webViewLink,
		thumbnailLink,
		pageIndex from relatedFiles
		where globalId = %s
		order by fileName ASC"""
    sqlData = (globalId)
    resultList = utilities.dbExecution(sqlCmd, sqlData)

    return resultList[2]
예제 #11
0
def main():
	sqlCmd = """select globalId, jobId, gcsLoc from speechJobs where beenProcessed = %s order by jobId limit 1"""
	sqlData = [0]
	queryResp = utilities.dbExecution(sqlCmd, sqlData)

	print "Content-type: text/plain; charset=UTF-8\n\n"

	if queryResp[2]:
		gcsLoc = queryResp[2][0][2].replace("'","")
		jobId = queryResp[2][0][1]
		globalId = queryResp[2][0][0]
		print globalId
		print gcsLoc
		print jobId
		runCycle(gcsLoc, jobId, globalId)
	else:
		print "No jobs to run."
예제 #12
0
def runCycle(jobId, apiName, gcsLoc, globalId, batchId):
    credentialsJson = "__Credential_JSON_File_Name__"

    scopesList = ["https://www.googleapis.com/auth/cloud-platform"]
    credentialsObj = ServiceAccountCredentials.from_json_keyfile_name(
        credentialsJson, scopes=scopesList)

    payloadObj = {"key": "__Google_Speech_API_Key__"}

    httpObj = credentialsObj.authorize(httplib2.Http())
    serviceObj = build(serviceName="speech",
                       version="v1p1beta1",
                       http=httpObj,
                       developerKey="__Google_Speech_API_Key__")

    reqObj = serviceObj.operations().get(name=apiName).execute()

    try:
        if reqObj["metadata"]["progressPercent"] == 100:
            clientObj = storage.Client()
            bucketObj = clientObj.get_bucket(bucketName)
            cloudPath = gcsLoc.replace("'", "")
            cloudPath = cloudPath.replace(".flac", ".json")
            bucketPrexif = "gs://" + bucketName + "/"
            cloudPath = cloudPath.replace(bucketPrexif, "")
            cloudPath = cloudPath.replace("transcodes/", "")

            globalDir = "/" + str(globalId) + "/"
            transDir = globalDir + "transcripts/" + str(batchId) + "-"
            newPath = cloudPath.replace(globalDir, transDir)

            blobObj = bucketObj.blob(newPath)
            blobObj.upload_from_string(ujson.dumps(reqObj))

            sqlCmd = """update speechJobs set respExported = %s where jobId = %s"""
            sqlData = [1, jobId]
            queryResp = utilities.dbExecution(sqlCmd, sqlData)

            return "... job " + str(jobId) + " finished"
        else:
            return "... job " + str(
                reqObj["metadata"]["progressPercent"]) + "% complete"
    except Exception, e:
        return "... job queued"
예제 #13
0
    def get(self):
        self.response.headers["Content-Type"] = "application/json"
        self.response.headers.add_header(
            "Cache-Control", "no-cache, no-store, must-revalidate, max-age=0")
        self.response.headers.add_header("Expires", "0")

        try:
            globalId = self.request.get("gId")
            sqlCmd = "update meetingRegistry set transcriptErr = %s where globalId = %s"
            sqlData = (1, globalId)
            resultList = utilities.dbExecution(sqlCmd, sqlData)
            outputStr = str(resultList)
        except:
            outputStr = None

        resultObj = {}
        resultObj["response"] = outputStr

        self.response.out.write(ujson.dumps(resultObj))
예제 #14
0
def lookupMeeting(urlIdentifier):
    globalId = str(urlIdentifier)
    sqlCmd = """select
		meetingDesc,
		meetingDate,
		youtubeId,
		wordCloud,
		publishedVideo,
		publishedTranscript,
		orgIdentifier,
		publishedAgenda,
		urlIdentifier,
		hasSegments from meetingRegistry
		where urlIdentifier = %s
		and youtubeId is not NULL
		limit 1"""
    sqlData = (urlIdentifier)
    resultList = utilities.dbExecution(sqlCmd, sqlData)

    return resultList[2][0]
def getMeetings(qryLimit, qryOffset, orgIdentifier):
	sqlCmd = """select
		meetingDesc,
		meetingDate,
		urlIdentifier,
		globalId from meetingRegistry
		where orgIdentifier = %s
		and youtubeId is not NULL
		order by meetingDate DESC
		limit %s
		offset %s"""
	sqlData = (orgIdentifier, int(qryLimit), int(qryOffset))
	resultList = utilities.dbExecution(sqlCmd, sqlData)

	lastDate = None
	dateCnt = 0

	meetingDict = {}
	for eachEntry in resultList[2]:
		meetingDate = eachEntry[1]

		dateIncr = meetingDate
		if lastDate == meetingDate:
			dateCnt = dateCnt + 1
		else:
			dateCnt = 0
		dateIncr = str(meetingDate) + str(dateCnt).zfill(3)

		formattedDate, weekDay = formatDate(meetingDate)

		meetingObj = {}
		meetingObj["desc"] = eachEntry[0]
		meetingObj["date"] = formattedDate
		meetingObj["dow"] = weekDay
		meetingObj["meetingId"] = eachEntry[3]
		meetingObj["urlIdentifier"] = eachEntry[2]
		
		meetingDict[dateIncr] = meetingObj
		lastDate = meetingDate

	return meetingDict
def lookupMeeting(globalId):
    sqlCmd = """select prodTranscode, orgIdentifier from meetingRegistry where globalId = %s"""
    sqlData = [globalId]
    resultObj = utilities.dbExecution(sqlCmd, sqlData)

    return resultObj[2][0][0], resultObj[2][0][1]