Python Txt2JSON 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: txt2json

클래스/타입: Txt2JSON

hotexamples.com에서의 예제들: 9

Python Txt2JSON - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 txt2json.Txt2JSON에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

__init__(10)

process_corpus(4)

자주 사용되는 메소드들

__init__ (10)

process_corpus (4)

예제 #1

파일 보기

 def process_corpus(self, cutMedia=True):
     """
     Take every eaf file from the source directory subtree, turn it
     into a parsed json and store it in the target directory.
     """
     Txt2JSON.process_corpus(self)
     if not cutMedia:
         return
     for path, dirs, files in os.walk(
             os.path.join(self.corpusSettings['corpus_dir'], self.srcExt)):
         for fname in files:
             print(fname)
             fileExt = os.path.splitext(fname.lower())[1]
             if fileExt in self.mediaExtensions:
                 fname = os.path.abspath(os.path.join(path, fname))
                 print('Cutting media file', fname)
                 self.mc.cut_media(fname)

예제 #2

파일 보기

파일: xml_flex2json.py 프로젝트: timarkh/tsakorpus_wc

 def __init__(self, settingsDir='conf_conversion'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.srcExt = 'xml'
     self.pID = 0  # id of last aligned segment
     self.glosses = []
     self.grammRules = []
     self.posRules = {}
     self.load_rules()
     self.POSTags = set()  # All POS tags encountered in the XML
     self.rxStemGlosses = re.compile('^$')
     self.mainGlossLang = 'en'
     self.badAnalysisLangs = []
     if 'main_gloss_language' in self.corpusSettings:
         self.mainGlossLang = self.corpusSettings['main_gloss_language']
     if 'bad_analysis_languages' in self.corpusSettings:
         self.badAnalysisLangs = self.corpusSettings[
             'bad_analysis_languages']

예제 #3

파일 보기

 def process_corpus(self):
     """
     Take every Exmaralda file from the source directory subtree, turn it
     into a parsed json and store it in the target directory.
     Split all the corpus media files into overlapping chunks of
     small duration.
     This is the main function of the class.
     """
     Txt2JSON.process_corpus(self)
     for path, dirs, files in os.walk(
             os.path.join(self.corpusSettings['corpus_dir'], self.srcExt)):
         for fname in files:
             fileExt = os.path.splitext(fname.lower())[1]
             if fileExt in self.mediaExtensions:
                 fname = os.path.abspath(os.path.join(path, fname))
                 print('Cutting media file', fname)
                 self.mc.cut_media(fname)

예제 #4

파일 보기

 def __init__(self, settingsDir='conf'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.mc = MediaCutter(settings=self.corpusSettings)
     self.srcExt = 'xml'  # extension of the source files to be converted
     self.participants = {}  # participant ID -> dictionary of properties
     self.tlis = {}  # time labels (id -> {'n': number, 'time': time value})
     self.wordsByID = {}  # word ID -> word object
     self.morph2wordID = {}  # morph ID -> (word ID, position in the word)
     self.pID = 0  # id of last aligned segment
     self.seg2pID = {
     }  # ids of <seg> tags -> parallel IDs of corresponding sentences
     self.wordIDseq = []  # sequence of word/punctuation/incident IDs
     # (needed to understand ranges such as "w13 to inc2")
     self.glosses = set()
     self.posRules = {}
     self.load_pos_rules(
         os.path.join(self.corpusSettings['corpus_dir'],
                      'conf/posRules.txt'))

예제 #5

파일 보기

파일: eaf2json.py 프로젝트: timarkh/tsakorpus_wc

 def __init__(self, settingsDir='conf_conversion'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.speakerMeta = self.load_speaker_meta()
     self.mc = MediaCutter(settings=self.corpusSettings)
     self.srcExt = 'eaf'
     self.tlis = {}  # time labels
     self.pID = 0  # id of last aligned segment
     self.glosses = set()
     self.participants = {}  # main tier ID -> participant ID
     self.segmentTree = {}  # aID -> (contents, parent aID, tli1, tli2)
     self.segmentChildren = {}  # (aID, child tier type) -> [child aID]
     self.spanAnnoTiers = {}  # span annotation tier type -> {tier ID -> [(tli1, tli2, contents)}
     self.alignedSpanAnnoTiers = {}  # aID of a segment -> {span annotation tier ID -> contents}
     self.additionalWordFields = []  # names of additional word-level fields associated with some analysis tiers
     self.privacySegments = {}  # segments (start_ms, end_ms) that should be beeped out, one list per source file
     self.rxIgnoreTokens = None
     self.set_ignore_tokens()
     self.usedMediaFiles = set()  # filenames of media fragments referenced in the JSONs

예제 #6

파일 보기

파일: img_csv2json.py 프로젝트: mansi-team/mansi_corpus

 def __init__(self, settingsDir='conf'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.srcExt = 'csv'
     self.pID = 0  # id of last aligned segment

예제 #7

파일 보기

파일: social_networks2json.py 프로젝트: mansi-team/mansi_corpus

 def __init__(self, settingsDir='conf'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.srcExt = 'json'
     self.glosses = set()

예제 #8

파일 보기

 def __init__(self, settingsDir='conf_conversion'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.srcExt = 'xml'
     self.pID = 0        # id of last aligned segment

예제 #9

파일 보기

파일: morphy_yaml2json.py 프로젝트: timarkh/tsakorpus_wc

 def __init__(self, settingsDir='conf_conversion'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.rxPuncSpaceBefore = re.compile(self.corpusSettings['punc_space_before'])
     self.rxPuncSpaceAfter = re.compile(self.corpusSettings['punc_space_after'])
     self.srcExt = 'yaml'
     self.pID = 0        # id of last aligned segment