def main():
    (root_dir, input_video, training_photos, reuse_trained, tolerance,
     amp_faces) = sys.argv[1:7]

    # using output instead of input filename as the latter is unique while the former could be used by multiple jobs
    logger = MgmLogger(root_dir, "face_recognition", amp_faces)
    sys.stdout = logger
    sys.stderr = logger

    # if tolerance is not specified in command, use the default value
    if not tolerance:
        tolerance = FR_DEFAULT_TOLERANCE
        tolerance = float(tolerance)

    # initialize training results
    known_names = []
    known_faces = []

    # if reuse_trained is set to true, retrieve previous training results
    if reuse_trained.lower() == "true":
        known_names, known_faces = train.retrieve_trained_results(

    # if no valid previous trained results is available, do the training
    if (known_names == [] or known_faces == []):
        known_names, known_faces = train.train_faces(training_photos, root_dir)

    # run face recognition on the given video using the trained results at the given tolerance level
    fr_result = recognize_faces(input_video, known_names, known_faces,

    # save the recognized_faces in the standard AMP Face JSON file
    mgm_utils.write_json_file(fr_result, amp_faces)
def main():
    (input_file, input_segmentation_json, remove_type, output_file,
     kept_segments_file) = sys.argv[1:6]

    # Turn segmentation json file into segmentation object
    with open(input_segmentation_json, 'r') as file:
        seg_data = Segmentation().from_json(json.load(file))

    # Remove silence and get a list of kept segments
    kept_segments = remove_silence(remove_type, seg_data, input_file,

    # Write kept segments to json file
    mgm_utils.write_json_file(kept_segments, kept_segments_file)
def main():
    (input_file, threshold, output_json, output_csv) = sys.argv[1:5]
    # Get a list of scenes as tuples (start, end)
    if threshold is None or isinstance(threshold, int) == False:
        threshold = 30
        print("Setting threshold to default because it wasn't a valid integer")

    shots = find_shots(input_file, output_csv, threshold)

    # Print for debugging purposes
    for shot in shots:
        print("start: " + str(shot[0]) + "  end: " + str(shot[1]))

    # Convert the result to json,
    shots_dict = convert_to_json(shots, input_file)

    # save the output json file
    mgm_utils.write_json_file(shots_dict, output_json)
def main():

    (segmentation_json, adj_json, output_json) = sys.argv[1:4]

    # Turn adjustment data into list of kept segments
    with open(adj_json, 'r') as file:
        adj_data = json.load(file)

    # Turn segmentation json into objects
    with open(segmentation_json, 'r') as file:
        seg = Segmentation().from_json(json.load(file))

    # List of adjustments (start, end, adjustment)
    offset_adj = []
    # Last ending position for iterating through kept segments
    last_end = 0.00
    # Running tally of removed segment lengths
    current_adj = 0.00

    # For each segment that was kept, keep track of the gaps to know how much to adjust
    for kept_segment in adj_data:
        print(kept_segment + ":" + str(adj_data[kept_segment]))
        start = float(kept_segment)
        end = adj_data[kept_segment]
        # If the start of this segment is after the last end, we have a gap
        if (start >= last_end):
            # Keep track of the gap in segments
            current_adj = current_adj + (start - last_end)
            # Add it to a list of adjustments
                Adjustment(start - current_adj, end - current_adj,
        # Keep track of the last segment end
        last_end = end
    for adj in offset_adj:
        print(str(adj.start) + ":" + str(adj.end) + ":" + str(adj.adjustment))
    # For each word, find the corresponding adjustment
    for segment in seg.segments:
        adjust_segment(segment, offset_adj)

    # Write the resulting json
    mgm_utils.write_json_file(seg, output_json)
def main():
    (input_video, azure_video_index, azure_artifact_ocr,
     amp_vocr) = sys.argv[1:5]

    # You must initialize logging, otherwise you'll not see debug output.

    # Get Azure video index json
    with open(azure_video_index, 'r') as azure_index_file:
        azure_index_json = json.load(azure_index_file)

    # Get Azure artifact OCR json
    with open(azure_artifact_ocr, 'r') as azure_ocr_file:
        azure_ocr_json = json.load(azure_ocr_file)

    # Create AMP Video OCR object
    amp_vocr_obj = create_amp_ocr(input_video, azure_index_json,

    # write AMP Video OCR JSON file
    mgm_utils.write_json_file(amp_vocr_obj, amp_vocr)
def convert(media_file, kaldi_file, kaldi_transcript_file, output_json_file):
    if not os.path.exists(kaldi_transcript_file):
        raise Exception(
            "Exception: File " + kaldi_transcript_file +
            " doesn't exist, the previous command generating it must have failed."
    results = SpeechToTextResult()

    # Open the kaldi json
    with open(kaldi_file) as json_file:
        data = json.load(json_file)

    # Get the kaldi transcript
    transcript = open(kaldi_transcript_file, "r")
    results.transcript = transcript.read()

    # Get a list of words
    words = data["words"]
    duration = 0.00

    # For each word, add a word to our results
    for w in words:
        time = float(w["time"])
        end = time + float(w["duration"])
        # Keep track of the last time and use it as the duration
        if end > duration:
            duration = end
        results.addWord("", time, end, w["word"], None, None)

    # Create the media objeect
    media = SpeechToTextMedia(duration, media_file)

    # Create the final object
    outputFile = SpeechToText(media, results)

    #write the output
    mgm_utils.write_json_file(outputFile, output_json_file)
def main():
    with tempfile.TemporaryDirectory(dir="/tmp") as tmpdir:
        (input_file, output_name) = sys.argv[1:3]
        dateTimeObj = datetime.now()

        #ffmpeg extracts the frames from the video input
        command = "ffmpeg -i " + input_file + " -an -vf fps=2 '" + tmpdir + "/frame_%05d_" + str(
            dateTimeObj) + ".jpg'"
        subprocess.call(command, shell=True)

        #Tesseract runs the ocr on frames extracted
        script_start = time.time()
        #output_name =  input_file[:-4]+ "-ocr_"+str(dateTimeObj)+".json"

        # Get some stats on the video
        (dim, frameRate, numFrames) = findVideoMetada(input_file)

        output = {
            "media": {
                "filename": input_file,
                "frameRate": frameRate,
                "numFrames": numFrames,
                "resolution": {
                    "width": int(dim[0]),
                    "height": int(dim[1])
            "frames": []

        #for every saved frame
        start_time = 0
        for num, img in enumerate(sorted(os.listdir(tmpdir))):
            start_time = +(.5 * num)
            frameList = {"start": str(start_time), "objects": []}

            #Run OCR
            result = pytesseract.image_to_data(Image.open(tmpdir + "/" + img),

            #For every result, make a box & add it to the list of boxes for this framecalled frameList
            for i in range(len(result["text"])):
                if result["text"][i].strip(
                ):  #if the text isn't empty/whitespace
                    box = {
                        "text": result["text"][i],
                        "score": {
                            "type": "confidence",
                            "scoreValue": result["conf"][i]
                        # relative coords
                        "vertices": {
                            result["left"][i] /
                            result["top"][i] /
                            "xmax": (result["left"][i] + result["width"][i]) /
                            "ymax": (result["top"][i] + result["height"][i]) /

                #save frame if it had text
            if len(frameList["objects"]) > 0:

        # save the output json file
        mgm_utils.write_json_file(output, output_name)
def main():
    apiUrl = "https://api.videoindexer.ai"

    (input_file, include_ocr, location, root_dir, index_file,
     ocr_file) = sys.argv[1:7]

        import http.client as http_client
    except ImportError:
        # Python 2
        import httplib as http_client

    config = read_config(root_dir)
    s3_bucket = config['azure']['s3Bucket']
    accountId = config['azure']['accountId']
    apiKey = config['azure']['apiKey']

    # You must initialize logging, otherwise you'll not see debug output.

    # Turn on HTTP debugging here
    http_client.HTTPConnection.debuglevel = 1

    s3_path = upload_to_s3(input_file, s3_bucket)
    print("S3 path " + s3_path)

    # Get an authorization token for subsequent requests
    auth_token = get_auth_token(apiUrl, location, accountId, apiKey)

    video_url = "https://" + s3_bucket + ".s3.us-east-2.amazonaws.com/" + s3_path

    # Upload the video and get the ID to reference for indexing status and results
    videoId = upload_video(apiUrl, location, accountId, auth_token, input_file,

    # Get the auth token associated with this video
    # video_auth_token = get_video_auth_token(apiUrl, location, accountId, apiKey, videoId)

    # Check on the indexing status
    while True:
        # The token expires after an hour.  Let's just refresh every iteration
        video_auth_token = get_video_auth_token(apiUrl, location, accountId,
                                                apiKey, videoId)

        state = get_processing_status(apiUrl, location, accountId, videoId,

        # We have a status other than uploaded or processing, it is complete
        if state != "Uploaded" and state != "Processing":

        # Wait a bit before checking again

    # Turn on HTTP debugging here
    http_client.HTTPConnection.debuglevel = 1

    # Get the simple video index json
    auth_token = get_auth_token(apiUrl, location, accountId, apiKey)
    index_json = get_video_index_json(apiUrl, location, accountId, videoId,
                                      auth_token, apiKey)
    mgm_utils.write_json_file(index_json, index_file)

    # Get the advanced OCR json via the artifact URL if requested
    if include_ocr.lower() == 'true':
        artifacts_url = get_artifacts_url(apiUrl, location, accountId, videoId,
                                          auth_token, 'ocr')
        download_artifacts(artifacts_url, ocr_file)
    # TODO otherwise do we need to generate a dummy file so the output is not empty and cause error?

    delete_from_s3(s3_path, s3_bucket)
def main():
	(media_file, transcribe_file, output_stt_json_file, output_seg_json_file) = sys.argv[1:5]

	# Open the transcribe output
	with open(transcribe_file) as json_file:
		data = json.load(json_file)
	amp_results = SpeechToTextResult()

	# Fail if we don't have results
	if "results" not in data.keys():

	aws_results = data["results"]

	if "transcripts" not in aws_results.keys():

	# Parse transcript
	transcripts = aws_results["transcripts"]
	for t in transcripts:
		amp_results.transcript = amp_results.transcript + t["transcript"]

	# Fail if we don't have any keys
	if "items" not in aws_results.keys():

	# Parse items (words)
	items = aws_results["items"]
	duration = 0.00
	# For each item, get the necessary parts and store as a word
	for i in items:
		alternatives = i["alternatives"]
		# Choose an alternative
		max_confidence = 0.00
		text = ""

		# Each word is stored as an "alternative".  Get the one with the maximum confidence
		for a in alternatives:
			if float(a["confidence"]) >= max_confidence:
				max_confidence = float(a["confidence"])
				text = a["content"]

		end_time = -1
		start_time = -1

		# Two types (punctionation, pronunciation).  Only keep times for pronunciation
		if i["type"] == "pronunciation":
			end_time = float(i["end_time"])
			start_time = float(i["start_time"])

			# If this is the greatest end time, store it as duration
			if end_time > duration:
				duration = end_time
		# Add the word to the results
		amp_results.addWord(i["type"], start_time, end_time, text, "confidence", max_confidence)
	# Create the media object
	media = SpeechToTextMedia(duration, media_file)

	# Create the final object
	outputFile = SpeechToText(media, amp_results)

	# Write the output
	mgm_utils.write_json_file(outputFile, output_stt_json_file)

	# Start segmentation schema with diarization data
	# Create a segmentation object to serialize
	seg_schema = Segmentation()

	# Create the media object
	segMedia = SegmentationMedia(duration, media_file)
	seg_schema.media = segMedia
	if "speaker_labels" in aws_results.keys():
		speakerLabels = aws_results["speaker_labels"]
		seg_schema.numSpeakers = speakerLabels["speakers"]

		# For each segment, get the start time, end time and speaker label
		segments = speakerLabels["segments"]
		for segment in segments:
			seg_schema.addDiarizationSegment(float(segment["start_time"]), float(segment["end_time"]), segment["speaker_label"])
	# Write the output
	mgm_utils.write_json_file(seg_schema, output_seg_json_file)
def main():
    (input_file, json_file, bucketName, dataAccessRoleArn) = sys.argv[1:5]

    # Read a list of categories to ignore when outputting entity list
    ignore_cats_list = list()

    if len(sys.argv) > 5:
        print("ignore cats:" + sys.argv[5])
        ignore_cats_list = split_ignore_list(sys.argv[5])

    # Variable declaration
    outputS3Uri = 's3://' + bucketName + '/'
    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
    jobName = 'AwsComprehend-' + timestamp + ".json"
    inputS3Uri = outputS3Uri + jobName

    # Get the transcript text from the input file
    with open(input_file, 'r') as file:
        stt = SpeechToText().from_json(json.load(file))

    # Create the ner object
    ner = EntityExtraction()

    # Add the media information
    if stt is None or stt.results is None:
        mediaLength = 0
        mediaLength = len(stt.results.transcript)

    # If we have a blank file, don't error.  Create another blank json file to pass to the next process
    if mediaLength == 0:
        ner.media = EntityExtractionMedia(mediaLength, input_file)
        mgm_utils.write_json_file(ner, json_file)

    # Create a temp file to upload to S3
    tmpfile = create_temp_transcript_file(jobName, stt.results.transcript)

    # Copy the temporary text file to S3
    copy_to_s3(tmpfile.name, bucketName, jobName)

    # Make call to aws comprehend
    output_uri = run_comprehend_job(jobName, inputS3Uri, outputS3Uri,

    uncompressed_file = download_from_s3(output_uri, outputS3Uri, bucketName)

    if uncompressed_file is None:

    comprehend_data = read_comprehend_response(uncompressed_file)

    ner.media = EntityExtractionMedia(mediaLength, input_file)

    # Variables for filling time offsets based on speech to text
    lastPos = 0  # Iterator to keep track of location in STT word
    sttWords = len(stt.results.words)  # Number of STT words

    if 'Entities' in comprehend_data.keys():
        for entity in comprehend_data["Entities"]:
            entity_type = entity["Type"]
            # Start and end time offsets
            start = None
            end = None
            text = entity["Text"]

            # Split the entity into an array of words based on whitespace
            entityParts = text.split()

            # For each word in the entity, find the corresponding word in the STT word list
            foundWordPos = None
            for entityPart in entityParts:
                for wordPos in range(lastPos, sttWords):
                    # If it matches, set the time offset.
                    word = stt.results.words[wordPos]
                    if clean_entity_word(
                            word.text) == clean_entity_word(entityPart):
                        # Keep track of last position to save iterations
                        foundWordPos = wordPos
                        # Set start if we haven't set it yet
                        if start is None:
                            start = word.start
                        end = word.end
                        start = None
                        end = None
                        foundWordPos = None

            if start is not None:
                lastPos = foundWordPos
                print("Could not find word")
            if clean_text(
                    entity_type) not in ignore_cats_list and start is not None:
                    entity_type, text, None, None, "relevance",
                    float(entity["Score"]), start,
                    None)  #AMP-636 removed startOffset=endOffset=end=None

    #Write the json file
    mgm_utils.write_json_file(ner, json_file)

    #Cleanup temp files
def main():
    Submit a job to run ina speech segmenter on HPC
    parser = argparse.ArgumentParser(description=main.__doc__)
                        help="Turn on debugging")
    parser.add_argument("root_dir", help="Galaxy root directory")
    parser.add_argument("input", help="input audio file")
    parser.add_argument("segments", help="INA Speech Segmenter output")
    parser.add_argument("amp_segments", help="AMP Segmentation Schema output")
    parser.add_argument("hpc_timestamps", help="HPC Timestamps output")
    args = parser.parse_args()

    # set up logging
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO,
                        format="%(asctime)s %(levelname)s %(message)s")

    config = mgm_utils.get_config(args.root_dir)
    dropbox = config["hpc"]["dropbox"]

    # job parameters
    job = {
        'script': 'ina_speech_segmenter',
        'input_map': {
            'input': args.input
        'output_map': {
            'segments': args.segments
    print("Submitting job to HPC")
    job = hpc_submit.submit_and_wait(dropbox, job)

    print("Checking job status: " + job['job']['status'])
    if job['job']['status'] != 'ok':

    print("Reading TSV into list of tuples")
    with open(args.segments, 'r') as csvin:
        data = [tuple(line) for line in csv.reader(csvin, delimiter='\t')]

    print("Converting ina output  to segmentation schema")
    # Convert the resulting list of tuples to an object for serialization
    seg_schema = convert_to_segmentation_schema(args.input, data)

    print("Writing output json")
    # Serialize the json and write it to destination file
    mgm_utils.write_json_file(seg_schema, args.amp_segments)

    print("Job output:")

    # Write the hpc timestamps output
    if "start" in job['job'].keys() and "end" in job['job'].keys():
        ts_output = {
            (datetime.strptime(job['job']["end"], '%Y-%m-%d %H:%M:%S.%f') -
                               '%Y-%m-%d %H:%M:%S.%f')).total_seconds()
        mgm_utils.write_json_file(ts_output, args.hpc_timestamps)

def main():
    (root_dir, from_draftjs, original_transcript,
     to_transcript) = sys.argv[1:5]

    # using output instead of input filename as the latter is unique while the former could be used by multiple jobs
    logger = MgmLogger(root_dir, "hmgm_transcript", to_transcript)
    sys.stdout = logger
    sys.stderr = logger

        # if from_draftjs is in error raise exception to notify HMGM job runner to fail the job
        # otherwise if from_draftjs doesn't exist yet, exit 1 to keep waiting
        print("Converting DraftJs " + from_draftjs + " to Transcript " +

        with open(from_draftjs) as json_file:
            d = json.load(json_file)
            data = eval(json.dumps(d))

        #read original file for extracting only the confidence score of each word
        original_input = open(original_transcript)
        original_json = json.loads(original_input.read())
        original_items = original_json["results"]["words"]

        #print("the data in editor output is:",data)
        results = SpeechToTextResult()
        word_type = text = ''
        confidence = start_time = end_time = -1
        duration = 0.0

        # draftJS input file here always came from converted and corrected AMP Transcript,
        # so it should always contain 'entityMap', otherwise error should occur
        #Standardising draft js format
        #         if "entityMap" in data.keys():
        transcript = ''
        entityMap = data["entityMap"]
        for i in range(0, len(entityMap.keys())):
            punctuation = ''
            if str(i) not in entityMap.keys():
            entity = entityMap[str(i)]
            if "data" in entity:
                if "text" in entity["data"].keys():
                    text = entity["data"]["text"]
                    transcript += entity["data"]["text"] + " "
                    if text[-1] in string.punctuation:  #[',','.','!','?']:
                        punctuation = text[-1]
                        text = text[0:-1]

                if "type" in entity:
                    entity_type = entity["type"]
                    if entity_type == "WORD":
                        word_type = "pronunciation"
                        if "start" in entity["data"]:
                            start_time = float(entity["data"]["start"])

                        if "end" in entity["data"]:
                            end_time = float(entity["data"]["end"])

                        if end_time > duration:
                            duration = end_time
                        word_type = entity_type

            results.addWord(word_type, start_time, end_time, text,
                            "confidence", confidence)
            if len(punctuation) > 0:
                results.addWord('punctuation', None, None, punctuation,
                                "confidence", 0.0)

        results.transcript = transcript
        words = results.words
        #Now retrieving the confidence values from the original input file and assigning them to 'results'
        list_items = []
        list_result = []
        for i in range(0, len(original_items)):

        for j in range(0, len(words)):

        d = difflib.Differ()
        res = list(d.compare(list_items, list_result))
        i = j = 0
        word_count = len(words)
        original_item_count = len(original_items)
        print("original item count: " + str(original_item_count))
        print("word count: " + str(word_count))
        for ele in res:
            if j >= word_count or i >= original_item_count:
            elif ele.startswith("- "):
                i += 1
            elif len(ele) > 2 and ele[0:2] == "+ ":
                words[j].score.scoreValue = 1.0
                j += 1
            elif ele[0:1] == " " and words[j].text == original_items[i]["text"]:
                if ("score" in original_items[i]):
                    words[j].score.scoreValue = float(
                        j].score.scoreValue = 1.0  # default score to 1.0 if not existing originally
                i += 1
                j += 1
            print("i: " + str(i) + " j:" + str(j))

        # Create the media object
        media = SpeechToTextMedia(duration, original_transcript)

        # Create the final object
        stt = SpeechToText(media, results)

        # Write the output
        mgm_utils.write_json_file(stt, to_transcript)
        print("Successfully converted from DraftJs " + from_draftjs +
              " to Transcript " + to_transcript)
        # as the last command in HMGM, implicitly exit 0 here to let the whole job complete in success
    except Exception as e:
        # as the last command in HMGM, exit -1 to let the whole job fail
            "Failed to convert from DraftJs " + from_draftjs +
            " to Transcript " + to_transcript, e)
def write_amp_json(temp_gentle_output, original_transcript, amp_transcript_output):
	# Create the amp transcript
	output = dict()
	with open(temp_gentle_output, "r") as gentle_output_file:
		gentle_output = json.load(gentle_output_file)
		output["media"] = original_transcript["media"]
		output["results"] = dict()
		output["results"]["transcript"] = original_transcript["results"]["transcript"]
		output["results"]["words"] = list()
		previous_end = 0
		last_success_index = 0
		for word in gentle_output["words"]:
			# Make sure we have all the data
			if word["case"] == 'success':
				previous_end = word["end"]
							"type": "pronunciation", 
							"start": word["start"], 
							"end": word["end"], 
							"text": word["word"],
							"score": {
									"type": "confidence", 
									"scoreValue": 1.0
				word_index = gentle_output["words"].index(word)
				next_success_index = find_next_success(gentle_output, word_index)
				avg_time = 0
				# If we found another success
				if(next_success_index > word_index):
					# Average the times based on how many words in between
					next_success_word = gentle_output["words"][next_success_index]
					skips_ahead = (next_success_index - last_success_index)
					avg_time = (next_success_word["start"] - previous_end)/skips_ahead
					print("Averaging time from next success")
					duration = original_transcript["results"]["duration"]
					skips_ahead = (len(gentle_output["words"]) - word_index) + 1
					avg_time = (duration - previous_end)/skips_ahead
					print("Averaging time from end of file")
				# From the previous words end (last recorded), skip time ahead
				time = previous_end + avg_time
				previous_end = time
				print(word["word"]  + " at index " + str(word_index))
				print("Avg_time " + str(avg_time)  + " Skips ahead " + str(skips_ahead))

				# Add the word to the results
						"type": "pronunciation", 
						"start": time, 
						"end": time, 
						"text": word["word"],
						"score": {
								"type": "confidence", 
								"scoreValue": 1.0
			last_success_index = gentle_output["words"].index(word)
		mgm_utils.write_json_file(output, amp_transcript_output)