Python Preprocessing.conflate примеры использования

Язык программирования: Python

Класс/Тип: Preprocessing

Метод/Функция: conflate

Примеров на hotexamples.com: 3

Python Preprocessing.conflate - 3 примера найдено. Это лучшие примеры Python кода для Preprocessing.conflate из пакета espresso, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Preprocessing(30)

updateProgressBar(4)

conflate(3)

Preprocessor(3)

KaiserFil(2)

Prepare_Model_Input_Data(2)

PCA(2)

parseName(2)

Clean(2)

DataPreparation(2)

encryptFingerprint(1)

Preprocesser(1)

Preprocessiming(1)

PreprocessingForFeatureBasedApproach(1)

PreprocessingForSpectrogramApproach(1)

Preprocessing_df(1)

skeletonization(1)

Prospective(1)

createKeyPoints(1)

ReadCSVFile(1)

ReadNDrop(1)

Recover_classes_all_labels(1)

RedundantClean(1)

SignalsIntake(1)

improveImage(1)

Read_Metadata_File(1)

ArtifactRemoval(1)

PSD(1)

Garbages(1)

CleanDataHoles(1)

DAPS(1)

DatasetConstruct(1)

Distortion(1)

EncodeDataset(1)

FeatureEncoding(1)

FeatureScaling(1)

FreqDist(1)

GetData(1)

Balance(1)

Histogram(1)

Hyperparameter_selection(1)

Impute(1)

LBH_Norm(1)

Lematize(1)

MissingClean(1)

MissingCount(1)

NLTKTokenizer(1)

Normalise(1)

OutlierRaw(1)

Пример #1

Показать файл

Файл: collateOS_1.0.py Проект: obdurodon/CollateOS

def createJsonRepresentation(app):
    unit = getUnit(app)
    root = {}
    allWits = []
    rdgs = [el for el in app.childNodes if el.nodeType == 1]
    for rdg in rdgs:
        appLevel = {}
        appLevel['id'] = rdg.getAttribute('wit')
        tokenList = []
        ws = rdg.getElementsByTagName('w')
        for ind, w in enumerate(ws):
            if not 3 in [child.nodeType for child in w.childNodes]:
                continue
            currentWord = w
            if ind == 0:
                previousWord = ''
            else:
                previousWord = ws[ind-1]
            token = {}
            token['t'] = currentWord.toxml()[8 + len(w.getAttribute('n')):-4]
            token['n'] = Preprocessing.conflate(currentWord)
            token['u'] = unit
            tokenList.append(token)
        appLevel['tokens'] = tokenList
        allWits.append(appLevel)
    root['witnesses'] = allWits
    return json.loads(json.dumps(root))

Пример #2

Показать файл

Файл: XMLStoJSON.py Проект: obdurodon/CollateOS

    docLevel['id'] = afile
    tokenList = []
    if debug:
        html.write('<h2>' + afile + '</h2><table border = "1"><th>Original<th>Conflated</th>')
    ws = minidom.parse(os.path.join(path, afile)).getElementsByTagName('w')
    words = []
    for w in range(len(ws)):
        if not 3 in [child.nodeType for child in ws[w].childNodes]: #checking presence of text nodes inside the w
            continue
        currentWord = ws[w]
        previousWord = ''
        try:
            previousWord = ws[w-1]
        except IndexError:
            pass
        token = {}
        token['t'] = currentWord.toxml()[8+len(currentWord.getAttribute('n')):-4]
        c = Preprocessing.conflate(currentWord)
        if c == Preprocessing.conflate(previousWord):
            c += '1' # tag '1' to the end of a wod that we suspect is repeated in the manuscript.
        token['n'] = c
        token['u'] = unit
        words.append(c)
        tokenList.append(token)
    docLevel['tokens'] = tokenList
    alldocs.append(docLevel)
root['witnesses'] = alldocs
with open(os.path.join(path, jsonFileName), 'w') as Json:
    Json.write(json.dumps(root, ensure_ascii=False).encode('utf-8'))
print 'Took', datetime.datetime.now()-startTimeX2J, 'to execute XMLsToJSON.py'

Пример #3

Показать файл

Файл: XMLtoJSON.py Проект: obdurodon/CollateOS

    root = {}
    alldocs = []
    rdgs = [el for el in minidom.parse(os.path.join(path, afile)).getElementsByTagName('*') if el.localName in ['lem', 'rdg']]
    for rdg in rdgs:
        docLevel = {}
        docLevel['id'] = rdg.getAttribute('wit')
        tokenList = []
        ws = rdg.getElementsByTagName('w')
        words = []
        for w in range(len(ws)):
            if not 3 in [child.nodeType for child in ws[w].childNodes]: #checking presence of text nodes inside the w
                continue
            currentWord = ws[w]
            previousWord = ''
            try:
                previousWord = ws[w-1]
            except IndexError:
                pass
            token = {}
            token['t'] = currentWord.toxml()[8 + len(ws[w].getAttribute('n')):-4]
            c = Preprocessing.conflate(currentWord)
            token['n'] = c
            token['u'] = unit
            words.append(c)
            tokenList.append(token)
        docLevel['tokens'] = tokenList
        alldocs.append(docLevel)
    root['witnesses'] = alldocs
    with open(os.path.join(path, afile[:-3] + 'json'), 'w') as Json:
        Json.write(json.dumps(root, ensure_ascii=False).encode('utf-8'))