示例#1
0
 def __init__(self, row, transcript_metadata):
     """
     Arguments:
     row (list) -- a row from one of the corpus CSV files
     transcript_metadata (dict) -- a Metadata value based on the current conversation_no
     """        
     ##################################################
     # Utterance data:
     for i in xrange(len(Utterance.header)):
         att_name = Utterance.header[i]
         row_value = None
         if i < len(row):                
             row_value = row[i].strip()
         # Special handling of non-string values.
         if att_name == "trees":
             # if row_value: row_value = map(Tree, row_value.split("|||"))
             if row_value: row_value = Tree(None, row_value.split("|||"))
             else: row_value = []
         elif att_name == "ptb_treenumbers":
             if row_value: row_value = map(int, row_value.split("|||"))
             else: row_value = []
         elif att_name == 'act_tag':
             # I thought these conjoined tags were meant to be split.
             # The docs suggest that they are single tags, thought,
             # so skip this conditional and let it be treated as a str.
             # row_value = re.split(r"\s*[,;]\s*", row_value)
             # `` Transcription errors (typos, obvious mistranscriptions) are marked with a "*" after the discourse tag.''
             # These are removed for this version.
             row_value = row_value.replace("*", "")
         elif att_name in ('conversation_no', 'transcript_index', 'utterance_index', 'subutterance_index'):
             row_value = int(row_value)                
         # Add the attribute.
         setattr(self, att_name, row_value)
     ##################################################
     # Caller data:
     for key in ('caller_sex', 'caller_education', 'caller_birth_year', 'caller_dialect_area'):
         full_key = 'from_' + key
         if self.caller.endswith("B"):
             full_key = 'to_' + key            
         setattr(self, key, transcript_metadata[full_key])