예제 #1
0
def processWordCSV(csvPath, dicIndex, dstDir):
    with open(csvPath, "r") as wordsCsvfile:
        wordreader = csv.reader(wordsCsvfile, delimiter=',', quotechar='"')
        for row in wordreader:

            if len(row) != 7:
                raise Exception('len(row) != 7')

            if row[0] == 'db_id': continue

            path = os.path.join(dstDir, '%s' % row[4].decode('utf-8').lower())
            print(path)

            if os.path.exists(path):
                # append new data to existing data
                with open(path, 'r') as f:
                    data = json.loads(f.read())

                if dicIndex[row[2]][0] == 'zh':
                    # convert simplified chinese to traditional chinese
                    data.append([row[2], jtof(row[6])])
                else:
                    data.append([row[2], row[6]])

                with open(path, 'w') as f:
                    f.write(json.dumps(data))
            else:
                # create new data file
                if dicIndex[row[2]][0] == 'zh':
                    # convert simplified chinese to traditional chinese
                    data = [[row[2], jtof(row[6])]]
                else:
                    data = [[row[2], row[6]]]

                with open(path, 'w') as f:
                    f.write(json.dumps(data))
예제 #2
0
def processWordCSV(csvPath, dicIndex, dstDir):
  with open(csvPath, "r") as wordsCsvfile:
    wordreader = csv.reader(wordsCsvfile, delimiter=',', quotechar='"')
    for row in wordreader:

      if len(row) != 7:
        raise Exception('len(row) != 7')

      if row[0] == 'db_id': continue

      path = os.path.join(dstDir, '%s' % row[4].decode('utf-8').lower())
      print(path)

      if os.path.exists(path):
        # append new data to existing data
        with open(path, 'r') as f:
          data = json.loads(f.read())

        if dicIndex[row[2]][0] == 'zh':
          # convert simplified chinese to traditional chinese
          data.append([row[2], jtof(row[6])])
        else:
          data.append([row[2], row[6]])

        with open(path, 'w') as f:
          f.write(json.dumps(data))
      else:
        # create new data file
        if dicIndex[row[2]][0] == 'zh':
          # convert simplified chinese to traditional chinese
          data = [ [row[2], jtof(row[6])] ]
        else:
          data = [ [row[2], row[6]] ]

        with open(path, 'w') as f:
          f.write(json.dumps(data))
예제 #3
0
def processDictionariesBooks():
  with open(getDictBooksCSVPath(), "r") as booksCsvfile:
    bookreader = csv.reader(booksCsvfile, delimiter=',', quotechar='"')
    dicIndex = {}
    for row in bookreader:

      if len(row) != 4:
        raise Exception('len(row) != 4')

      if row[0] == 'b_lang': continue

      # row[1] is the id of the dictionary
      dicIndex[row[1]] = []

      if row[0] == 'C':
        # Chinese and Japanese dictionaries
        if row[1] == 'A':
          # Japanese dictionary
          dicIndex[row[1]].append('ja')
          dicIndex[row[1]].append(' -')
          dicIndex[row[1]].append('《パーリ語辞典》')
          dicIndex[row[1]].append('増補改訂パーリ語辞典  水野弘元著')

        elif row[1] == 'S':
          # Japanese dictionary
          dicIndex[row[1]].append('ja')
          dicIndex[row[1]].append(' -')
          dicIndex[row[1]].append('《パーリ語辞典》')
          dicIndex[row[1]].append('パーリ語辞典  水野弘元著')

        else:
          # Chinese dictionary
          dicIndex[row[1]].append('zh')

          if row[1] == 'D':
            dicIndex[row[1]].append('~')
          elif row[1] == 'H':
            dicIndex[row[1]].append(' -')
          elif row[1] == 'T':
            dicIndex[row[1]].append(' -')
          else:
            dicIndex[row[1]].append('。')

          dicIndex[row[1]].append(jtof(row[2]))
          dicIndex[row[1]].append(jtof(row[3]))

      else:
        # English, Vietnam, Myanmar dictionaries
        if row[1] == 'U' or \
           row[1] == 'Q' or \
           row[1] == 'E':
          # Vietnamese dictionary
          dicIndex[row[1]].append('vi')
          # FIXME: is '。' correct separator?
          dicIndex[row[1]].append('。')

        elif row[1] == 'B' or \
             row[1] == 'K' or \
             row[1] == 'O' or \
             row[1] == 'R':
          # Burmese(Myanmar) dictionary
          dicIndex[row[1]].append('my')
          # FIXME: is '。' correct separator?
          dicIndex[row[1]].append('。')

        else:
          # English dictionary
          dicIndex[row[1]].append('en')
          if row[1] == 'N':
            dicIndex[row[1]].append('<br>')
          elif row[1] == 'C':
            dicIndex[row[1]].append('<br>')
          elif row[1] == 'P':
            dicIndex[row[1]].append('<i>')
          else:
            dicIndex[row[1]].append('。')

        dicIndex[row[1]].append(row[2])
        dicIndex[row[1]].append(row[3])

  return dicIndex
예제 #4
0
def processDictionariesBooks():
    with open(getDictBooksCSVPath(), "r") as booksCsvfile:
        bookreader = csv.reader(booksCsvfile, delimiter=',', quotechar='"')
        dicIndex = {}
        for row in bookreader:

            if len(row) != 4:
                raise Exception('len(row) != 4')

            if row[0] == 'b_lang': continue

            # row[1] is the id of the dictionary
            dicIndex[row[1]] = []

            if row[0] == 'C':
                # Chinese and Japanese dictionaries
                if row[1] == 'A':
                    # Japanese dictionary
                    dicIndex[row[1]].append('ja')
                    dicIndex[row[1]].append(' -')
                    dicIndex[row[1]].append('《パーリ語辞典》')
                    dicIndex[row[1]].append('増補改訂パーリ語辞典  水野弘元著')

                elif row[1] == 'S':
                    # Japanese dictionary
                    dicIndex[row[1]].append('ja')
                    dicIndex[row[1]].append(' -')
                    dicIndex[row[1]].append('《パーリ語辞典》')
                    dicIndex[row[1]].append('パーリ語辞典  水野弘元著')

                else:
                    # Chinese dictionary
                    dicIndex[row[1]].append('zh')

                    if row[1] == 'D':
                        dicIndex[row[1]].append('~')
                    elif row[1] == 'H':
                        dicIndex[row[1]].append(' -')
                    elif row[1] == 'T':
                        dicIndex[row[1]].append(' -')
                    else:
                        dicIndex[row[1]].append('。')

                    dicIndex[row[1]].append(jtof(row[2]))
                    dicIndex[row[1]].append(jtof(row[3]))

            else:
                # English, Vietnam, Myanmar dictionaries
                if row[1] == 'U' or \
                   row[1] == 'Q' or \
                   row[1] == 'E':
                    # Vietnamese dictionary
                    dicIndex[row[1]].append('vi')
                    # FIXME: is '。' correct separator?
                    dicIndex[row[1]].append('。')

                elif row[1] == 'B' or \
                     row[1] == 'K' or \
                     row[1] == 'O' or \
                     row[1] == 'R':
                    # Burmese(Myanmar) dictionary
                    dicIndex[row[1]].append('my')
                    # FIXME: is '。' correct separator?
                    dicIndex[row[1]].append('。')

                else:
                    # English dictionary
                    dicIndex[row[1]].append('en')
                    if row[1] == 'N':
                        dicIndex[row[1]].append('<br>')
                    elif row[1] == 'C':
                        dicIndex[row[1]].append('<br>')
                    elif row[1] == 'P':
                        dicIndex[row[1]].append('<i>')
                    else:
                        dicIndex[row[1]].append('。')

                dicIndex[row[1]].append(row[2])
                dicIndex[row[1]].append(row[3])

    return dicIndex