Python get_paragraph_text示例，intent_parser.utils.intent_parser_utils.get_paragraph_text Python示例

示例#1

0

显示文件

文件： intent_parser.py 项目： SD2E/experimental-intent-parser-mw

 def generate_displayId_from_selection(self, start_paragraph, start_offset, end_offset):
     paragraphs = self.lab_experiment.paragraphs()
     paragraph_text = intent_parser_utils.get_paragraph_text(paragraphs[start_paragraph])
     selection = paragraph_text[start_offset:end_offset + 1]
     # Remove leading/trailing space
     selection = selection.strip()
     return selection, self.sbh.sanitize_name_to_display_id(selection)

示例#2

0

显示文件

文件： intent_parser.py 项目： SD2E/experimental-intent-parser-mw

    def update_experimental_results(self):
        # For test documents, replace doc id with corresponding production doc
        if self.lab_experiment.document_id() in self._test_doc_id_map:
            source_doc_uri = 'https://docs.google.com/document/d/' + self._test_doc_id_map[self.lab_experiment.document_id()]
        else:
            source_doc_uri = 'https://docs.google.com/document/d/' + self.lab_experiment.document_id()

        # Search SBH to get data
        target_collection = '%s/user/%s/experiment_test/experiment_test_collection/1' % (self.sbh.get_sbh_url(), self.sbh.get_sbh_collection_user())
        exp_collection = self.sbh.query_experiments(self.sbh, target_collection)
        data = {}
        for exp in exp_collection:
            exp_uri = exp['uri']
            timestamp = exp['timestamp']
            title = exp['title']
            request_doc = self.sbh.query_experiment_request(exp_uri)
            if source_doc_uri == request_doc:
                source_uri = self.sbh.query_experiment_source(exp_uri)  # Get the reference to the source document with lab data
                data[exp_uri] = {'timestamp' : timestamp, 'agave' : source_uri[0], 'title' : title}

        exp_data = []
        exp_links = []
        for exp in data:
            exp_data.append((data[exp]['title'], ' updated on ', data[exp]['timestamp'], ', ', 'Agave link', '\n'))
            exp_links.append((exp, '', '', '',  data[exp]['agave'], ''))

        if exp_data == '':
            exp_data = ['No currently run experiments.']

        paragraphs = self.lab_experiment.paragraphs()

        headerIdx = -1
        contentIdx = -1
        for pIdx in range(len(paragraphs)):
            para_text = intent_parser_utils.get_paragraph_text(paragraphs[pIdx])
            if para_text == "Experiment Results\n":
                headerIdx = pIdx
            elif headerIdx >= 0 and not para_text == '\n':
                contentIdx = pIdx
                break

        if headerIdx >= 0 and contentIdx == -1:
            self.logger.error('ERROR: Couldn\'t find a content paragraph index for experiment results!')

        experimental_result = {}
        experimental_result['action'] = 'updateExperimentResults'
        experimental_result['headerIdx'] = headerIdx
        experimental_result['contentIdx'] = contentIdx
        experimental_result['expData'] = exp_data
        experimental_result['expLinks'] = exp_links
        return experimental_result

示例#3

0

显示文件

def get_currently_selected_text(testcase, ips, doc_id, doc_content):
    """
    Given select start and end dicts from spelling results, retrieve the text from the test document.
    """
    spelling_index = ips.client_state_map[doc_id]['spelling_index']
    spelling_result = ips.client_state_map[doc_id]['spelling_results'][spelling_index]
    select_start = spelling_result['select_start']
    select_end = spelling_result['select_end']

    if not select_start['paragraph_index'] == select_end['paragraph_index']:
        testcase.fail('Selection starting and ending paragraphs differ! Not supported!')

    paragraphs = ips.get_paragraphs(doc_content)
    paragraph = paragraphs[select_start['paragraph_index']]
    para_text = table_utils.get_paragraph_text(paragraph)
    return para_text[select_start['cursor_index']:(select_end['cursor_index'] + 1)]

示例#4

0

显示文件

def detect_new_measurement_table(table):
    """
    Scan the header row to see if it contains what we expect in a new-style measurements table.
    """
    found_replicates = False
    found_strain = False
    found_measurement_type = False
    found_file_type = False

    rows = table['table']['tableRows']
    headerRow = rows[1]
    for cell in headerRow['tableCells']:
        cellTxt = intent_parser_utils.get_paragraph_text(
            cell['content'][0]['paragraph']).strip()
        found_replicates |= cellTxt == intent_parser_constants.HEADER_REPLICATE_VALUE
        found_strain |= cellTxt == intent_parser_constants.HEADER_STRAINS_VALUE
        found_measurement_type |= cellTxt == intent_parser_constants.HEADER_MEASUREMENT_TYPE_VALUE
        found_file_type |= cellTxt == intent_parser_constants.HEADER_FILE_TYPE_VALUE
    return found_replicates and found_strain and found_measurement_type and found_file_type

示例#5

0

显示文件

文件： intent_parser.py 项目： SD2E/experimental-intent-parser-mw

    def calculate_samples(self):
        doc_tables = self.lab_experiment.tables()
        
        table_ids = []
        sample_indices = []
        samples_values = []
        for tIdx in range(len(doc_tables)):
            table = doc_tables[tIdx]

            is_new_measurement_table = table_utils.detect_new_measurement_table(table)
            if not is_new_measurement_table:
                continue

            rows = table['tableRows']
            headerRow = rows[0]
            samples_col = -1
            for cell_idx in range(len(headerRow['tableCells'])):
                cellTxt = intent_parser_utils.get_paragraph_text(headerRow['tableCells'][cell_idx]['content'][0]['paragraph']).strip()
                if cellTxt == intent_parser_constants.COL_HEADER_SAMPLES:
                    samples_col = cell_idx

            samples = []
            numCols = len(headerRow['tableCells'])

            # Scrape data for each row
            for row in rows[1:]:
                comp_count = []
                is_type_col = False
                colIdx = 0
                # Process reagents
                while colIdx < numCols and not is_type_col:
                    paragraph_element = headerRow['tableCells'][colIdx]['content'][0]['paragraph']
                    headerTxt =  intent_parser_utils.get_paragraph_text(paragraph_element).strip()
                    if headerTxt == intent_parser_constants.COL_HEADER_MEASUREMENT_TYPE:
                        is_type_col = True
                    else:
                        cellContent = row['tableCells'][colIdx]['content']
                        cellTxt = ' '.join([intent_parser_utils.get_paragraph_text(c['paragraph']).strip() for c in cellContent]).strip()
                        comp_count.append(len(cellTxt.split(sep=',')))
                    colIdx += 1

                # Process the rest of the columns
                while colIdx < numCols:
                    paragraph_element = headerRow['tableCells'][colIdx]['content'][0]['paragraph']
                    headerTxt =  intent_parser_utils.get_paragraph_text(paragraph_element).strip()
                    # Certain columns don't contain info about samples
                    if headerTxt == intent_parser_constants.COL_HEADER_MEASUREMENT_TYPE or headerTxt == intent_parser_constants.COL_HEADER_NOTES or headerTxt == intent_parser_constants.COL_HEADER_SAMPLES:
                        colIdx += 1
                        continue

                    cellContent = row['tableCells'][colIdx]['content']
                    cellTxt = ' '.join([intent_parser_utils.get_paragraph_text(c['paragraph']).strip() for c in cellContent]).strip()

                    if headerTxt == intent_parser_constants.COL_HEADER_REPLICATE:
                        comp_count.append(int(cellTxt))
                    else:
                        comp_count.append(len(cellTxt.split(sep=',')))
                    colIdx += 1
                samples.append(int(np.prod(comp_count)))

            table_ids.append(tIdx)
            sample_indices.append(samples_col)
            samples_values.append(samples)

        samples = {}
        samples['action'] = 'calculateSamples'
        samples['tableIds'] = table_ids
        samples['sampleIndices'] = sample_indices
        samples['sampleValues'] = samples_values
        return samples