示例#1
0
#Program Build a tf-IDF Model


import functionPython
from collections import Counter
import xlrd
import posTagger
import heapq
import xlwt
from xlwt import Workbook
import openpyxl
import numpy as np

from bnltk.stemmer import BanglaStemmer
from bnltk.tokenize import Tokenizers
t = Tokenizers()
fullStop = "ред"

#Load Main Data

loc = ("data/main-data/Cricket.xlsx")
wb = xlrd.open_workbook(loc)
sheet = wb.sheet_by_index(0)

sheet.cell_value(0, 0)


"""Load Dataset """

dataParameter = "data/Lexicon Dictionary Data/Cricket/correctPositive.txt"
listOfPositiveWord = functionPython.LoadData(dataParameter)
示例#2
0
# Program to extract a particular row value
import functionPython
import xlrd
import posTagger

import xlwt
from xlwt import Workbook
import openpyxl

from bnltk.stemmer import BanglaStemmer
from bnltk.tokenize import Tokenizers

t = Tokenizers()
fullStop = "ред"
questionMarkBN = "?"
questionMarkEN = "?"
""""""
"""Load Dataset """

dataParameter = "data/Lexicon Dictionary Data/Cricket/correctPositive.txt"
listOfPositiveWord = functionPython.LoadData(dataParameter)
dataParameter = "data/Lexicon Dictionary Data/Cricket/correctNegative.txt"
listOfNegativeWord = functionPython.LoadData(dataParameter)
dataParameter = "data/negative-word/neg.txt"
listOfNegWord = functionPython.LoadData(dataParameter)
dataParameter = "data/CCD-CCS/CCID.xlsx"
listOfcCDcCSWord = functionPython.LoadExcle(dataParameter)
dataParameter = "data/Adjective-Adverb/exel/jj-jq.xlsx"
listOfJJJQCSWord = functionPython.LoadExcle(dataParameter)
# print(listOfNegWord)
示例#3
0
import functionPython
import posTagger
from bnltk.stemmer import BanglaStemmer
import pandas as pd
import xlrd
import openpyxl

from bnltk.tokenize import Tokenizers
t = Tokenizers()

excel_file = "data/main-data/Restaurant.xlsx"
#listOfPositiveWord = functionPython.LoadData(dataParameter)

#print(listOfSentence)

#listOfNuetralData = functionPython.LoadExcle(loc)

#listOfNuetral = [list(ele) for ele in listOfNuetralData]
loc = "data/Lexicon Dictionary Data/Cricket/neutral.txt"

dataParameter = "data/Lexicon Dictionary Data/Cricket/correctPositive.txt"
listOfPositiveWord = functionPython.LoadData(dataParameter)
dataParameter = "data/Lexicon Dictionary Data/Cricket/correctNegative.txt"
listOfNegativeWord = functionPython.LoadData(dataParameter)

#print(listOfNegativeWord)
listOfNuetralData = functionPython.LoadData(loc)

listOfTotalWord = listOfPositiveWord + listOfNegativeWord

newList = []
示例#4
0
#
#Program Build a tf-IDF Model

import functionPython
from collections import Counter
import xlrd
import posTagger
import heapq
import xlwt
from xlwt import Workbook
import openpyxl
import numpy as np

from bnltk.stemmer import BanglaStemmer
from bnltk.tokenize import Tokenizers
t = Tokenizers()
fullStop = "ред"

#Load Main Data

loc = ("data/main-data/Restaurant.xlsx")
wb = xlrd.open_workbook(loc)
sheet = wb.sheet_by_index(0)

sheet.cell_value(0, 0)
"""Load Dataset """

dataParameter = "data/Lexicon Dictionary Data/Restaurant/correctNegative.txt"
listOfPositiveWord = functionPython.LoadData(dataParameter)
dataParameter = "data/Lexicon Dictionary Data/Restaurant/correctPositive.txt"
listOfNegativeWord = functionPython.LoadData(dataParameter)
示例#5
0
"""
print("Hello World")

for x in range(4):
    for y in range(3):
        print(f'({x},{y})')
    print("")

numbers=[5,2,5,2,2,2]
"""

from bnltk.stemmer import BanglaStemmer
from bnltk.tokenize import Tokenizers
t = Tokenizers()
#print(t.bn_word_tokenizer(' আমার সোনার বাংলা। , আমি তোমাকে ভালোবাসি ।'))
extract = t.bn_word_tokenizer(
    "আবরার হত্যায় নির্ভুল অভিযোগ পত্র দেওয়ার চেস্টা করেছি! ")
print(extract)
"""
test = 'চট্টগ্রাম'
if test == extract[1]:
    print("yes match")
else:
    print("no match")

"""
test = "!"
if test == "বাংলাদেশ !":
    print("yes match")
else:
    print("no match")
示例#6
0
import posTagger
from bnltk.stemmer import BanglaStemmer
from bnltk.tokenize import Tokenizers
t = Tokenizers()

#from bnltk.pos_tagger import PosTagger
#p_tagger = PosTagger()
#p_tagger.loader()

punctuation = "!"
fullStop = "ред"

with open('data/Adjective-Adverb/minimal-degree-adverb.txt',
          encoding="utf8") as myfile:
    data = myfile.read()
myfile.close()
#print(data)

sentence = ""
listOfSentence = []

res = None
for i in range(0, len(data)):
    if data[i] == fullStop:
        res = i
        listOfSentence.append(sentence)
        sentence = ""
        #break
    else:
        sentence = sentence + data[i]
示例#7
0
import random
import nltk
from collections import Counter
from bnltk.tokenize import Tokenizers
t = Tokenizers()

import functionPython
from collections import Counter
import xlrd
import posTagger
import heapq
import xlwt
from xlwt import Workbook
import openpyxl
import numpy as np

from bnltk.stemmer import BanglaStemmer
from bnltk.tokenize import Tokenizers
t = Tokenizers()
fullStop = "ред"

#Load Main Data

loc = ("data/main-data/Restaurant_Test.xlsx")
wb = xlrd.open_workbook(loc)
sheet = wb.sheet_by_index(0)

sheet.cell_value(0, 0)
"""Load Dataset """

dataParameter = "data/Lexicon Dictionary Data/Restaurant/correctNegative.txt"