Python Txt2JSON.init示例

编程语言: Python

命名空间/包名称: txt2json

类/类型: Txt2JSON

方法/功能: __init__

hotexamples.com的示例: 10

Python Txt2JSON.__init__ - 已找到10个示例。这些是从开源项目中提取的最受好评的txt2json.Txt2JSON.__init__现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

__init__(10)

process_corpus(4)

示例#1

显示文件

 def __init__(self, settingsDir='conf'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.mc = MediaCutter(settings=self.corpusSettings)
     self.srcExt = 'exb'  # extension of the source files to be converted
     self.tlis = {}  # time labels (id -> {'n': number, 'time': time value})
     self.pID = 0  # id of last aligned segment
     self.glosses = set()

示例#2

显示文件

 def __init__(self, settingsDir='conf'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.rxPuncSpaceBefore = re.compile(
         self.corpusSettings['punc_space_before'])
     self.rxPuncSpaceAfter = re.compile(
         self.corpusSettings['punc_space_after'])
     self.srcExt = 'yaml'
     self.pID = 0  # id of last aligned segment

示例#3

显示文件

文件： xml_flex2json.py 项目： mansi-team/mansi_corpus

 def __init__(self, settingsDir='conf'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.srcExt = 'xml'
     self.pID = 0        # id of last aligned segment
     self.glosses = []
     self.grammRules = []
     self.posRules = {}
     self.load_rules()
     self.POSTags = set()    # All POS tags encountered in the XML
     self.rxStemGlosses = re.compile('^$')

示例#4

显示文件

文件： eaf2json.py 项目： LingConLab/Bashkir_corpus

 def __init__(self, settingsDir='conf'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.speakerMeta = self.load_speaker_meta()
     self.mc = MediaCutter(settings=self.corpusSettings)
     self.srcExt = 'eaf'
     self.tlis = {}  # time labels
     self.pID = 0  # id of last aligned segment
     self.glosses = set()
     self.participants = {}  # main tier ID -> participant ID
     self.segmentTree = {}  # aID -> (contents, parent aID, tli1, tli2)
     self.segmentChildren = {}  # (aID, child tier type) -> [child aID]

示例#5

显示文件

 def __init__(self, settingsDir='conf'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.mc = MediaCutter(settings=self.corpusSettings)
     self.srcExt = 'xml'  # extension of the source files to be converted
     self.participants = {}   # participant ID -> dictionary of properties
     self.tlis = {}       # time labels (id -> {'n': number, 'time': time value})
     self.wordsByID = {}  # word ID -> word object
     self.morph2wordID = {}   # morph ID -> (word ID, position in the word)
     self.pID = 0         # id of last aligned segment
     self.seg2pID = {}    # ids of <seg> tags -> parallel IDs of corresponding sentences
     self.wordIDseq = []  # sequence of word/punctuation/incident IDs
                          # (needed to understand ranges such as "w13 to inc2")
     self.glosses = set()
     self.posRules = {}
     self.load_pos_rules(os.path.join(self.corpusSettings['corpus_dir'], 'conf/posRules.txt'))

示例#6

显示文件

 def __init__(self, settingsDir='conf_conversion'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.srcExt = 'xml'
     self.pID = 0        # id of last aligned segment
     self.glosses = []
     self.grammRules = []
     self.posRules = {}
     self.load_rules()
     self.POSTags = set()    # All POS tags encountered in the XML
     self.rxStemGlosses = re.compile('^$')
     self.mainGlossLang = 'en'
     self.badAnalysisLangs = []
     if 'main_gloss_language' in self.corpusSettings:
         self.mainGlossLang = self.corpusSettings['main_gloss_language']
     if 'bad_analysis_languages' in self.corpusSettings:
         self.badAnalysisLangs = self.corpusSettings['bad_analysis_languages']

示例#7

显示文件

文件： eaf2json.py 项目： timarkh/tsakorpus_wc

 def __init__(self, settingsDir='conf_conversion'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.speakerMeta = self.load_speaker_meta()
     self.mc = MediaCutter(settings=self.corpusSettings)
     self.srcExt = 'eaf'
     self.tlis = {}  # time labels
     self.pID = 0  # id of last aligned segment
     self.glosses = set()
     self.participants = {}  # main tier ID -> participant ID
     self.segmentTree = {}  # aID -> (contents, parent aID, tli1, tli2)
     self.segmentChildren = {}  # (aID, child tier type) -> [child aID]
     self.spanAnnoTiers = {}  # span annotation tier type -> {tier ID -> [(tli1, tli2, contents)}
     self.alignedSpanAnnoTiers = {}  # aID of a segment -> {span annotation tier ID -> contents}
     self.additionalWordFields = []  # names of additional word-level fields associated with some analysis tiers
     self.privacySegments = {}  # segments (start_ms, end_ms) that should be beeped out, one list per source file
     self.rxIgnoreTokens = None
     self.set_ignore_tokens()
     self.usedMediaFiles = set()  # filenames of media fragments referenced in the JSONs

示例#8

显示文件

文件： img_csv2json.py 项目： mansi-team/mansi_corpus

 def __init__(self, settingsDir='conf'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.srcExt = 'csv'
     self.pID = 0  # id of last aligned segment

示例#9

显示文件

文件： social_networks2json.py 项目： mansi-team/mansi_corpus

 def __init__(self, settingsDir='conf'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.srcExt = 'json'
     self.glosses = set()

示例#10

显示文件

 def __init__(self, settingsDir='conf_conversion'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.srcExt = 'xml'
     self.pID = 0        # id of last aligned segment

Python Txt2JSON.__init__示例

Python Txt2JSON.init示例