def __init__(self,
                 json_folder,
                 lemmatized=True,
                 num_filter=0,
                 no_below=1,
                 no_above=1,
                 word_freq_file=None,
                 pos_tag_file=None,
                 sense_freq_file=None,
                 word_hypernyms_file=None,
                 abstraction_only=False,
                 get_sense=False,
                 sense_filter={}):
        self.json_folder = json_folder
        self.corpora = {
            'agent': None,
            'patient': None,
            'mod': None,
            'poss': None,
            'all': None,
            'a': None,
            'n': None,
            'v': None,
            'sense_all': None
        }
        self.documents = {
            'agent': [],
            'patient': [],
            'mod': [],
            'poss': [],
            'all': [],
            'a': [],
            'n': [],
            'v': [],
            'sense_all': []
        }

        # self.corpora_all = None

        self.sense_filter = sense_filter
        self.get_sense = get_sense
        self.no_above = no_above
        self.no_below = no_below

        self.character_list = {}

        json_files = os.listdir(json_folder)
        lmt = WordNetLemmatizer()

        agent = []
        patient = []
        mod = []
        poss = []

        sense_v = []
        sense_n = []
        sense_a = []

        self.modes = ['agent', 'patient', 'mod', 'poss']

        self.pos_tag = {}
        self.word_freq = {}

        self.sense_freq = {
            'a': {},
            'n': {},
            'v': {}
        }  # total sense frequency in all documents
        self.word_hypernyms = {'a': {}, 'n': {}, 'v': {}}

        self.initialize_pos_freq(json_folder, word_freq_file, pos_tag_file,
                                 sense_freq_file, word_hypernyms_file)
        print(len(self.sense_freq['v']))

        for afile in json_files:
            # print("Processing file", afile)
            name = afile[:-5]
            c = Character(name)
            afile = json_folder + '/' + afile

            try:
                f = open(afile, 'r')
                data = json.load(f)

                c.E = float(data['extroversion'])
                c.A = float(data['agreeableness'])
                c.C = float(data['conscientiousness'])
                c.S = float(data['stability'])
                c.O = float(data['openness'])

                c.zE = float(data['z_extroversion'])
                c.zA = float(data['z_agreeableness'])
                c.zC = float(data['z_conscientiousness'])
                c.zS = float(data['z_stability'])
                c.zO = float(data['z_openness'])

                c.gender = int(data['gender'])
                c.salience = int(data['salience'])
                c.valence = int(data['valence'])

                for m in self.modes:
                    for word in data[m]:
                        w = word[1].lower()
                        pos = self.pos_tag[w]

                        if self.word_freq[w] >= num_filter:
                            # -----------------------------
                            # if we use the sense features
                            # -----------------------------
                            if get_sense:
                                # print("Getting hypernyms...")
                                if 'NN' in pos:
                                    hypernyms = self.word_hypernyms['n'][w]

                                    if len(hypernyms) > 0:
                                        for h in hypernyms:
                                            if self.sense_freq['n'][
                                                    h] >= sense_filter['n']:
                                                sense_sense = self.get_hypernyms(
                                                    h[:-6], 'n', higher=True)

                                                if 'person_sense' not in sense_sense:
                                                    c.persona['n'].append(h)

                                        # maxh = hypernyms[0]
                                        # maxc = self.sense_freq['n'][maxh]
                                        # sense_sense = self.get_hypernyms(maxh[:-6], 'n', higher=True)
                                        # # print(maxh[:-6], sense_sense)

                                        # for h in hypernyms[1:]:
                                        # 	tmp = self.sense_freq['n'][h]
                                        # 	if tmp > maxc:
                                        # 		maxc = tmp
                                        # 		maxh = h
                                        # 		sense_sense = self.get_hypernyms(maxh[:-6], 'n', higher=True)

                                        # if 'person_sense' not in sense_sense:
                                        # 	c.persona['n'].append(maxh)

                                elif 'JJ' in pos:
                                    hypernyms = self.word_hypernyms['a'][w]

                                    if len(hypernyms) > 0:
                                        for h in hypernyms:
                                            if self.sense_freq['a'][
                                                    h] >= sense_filter['a']:
                                                c.persona['a'].append(h)
                                        # maxh = hypernyms[0]
                                        # maxc = self.sense_freq['a'][maxh]

                                        # for h in hypernyms[1:]:
                                        # 	tmp = self.sense_freq['a'][h]
                                        # 	if tmp > maxc:
                                        # 		maxc = tmp
                                        # 		maxh = h

                                        # c.persona['a'].append(maxh)

                                elif 'VB' in pos:
                                    hypernyms = self.word_hypernyms['v'][w]

                                    if len(hypernyms) > 0:
                                        for h in hypernyms:
                                            if self.sense_freq['v'][
                                                    h] >= sense_filter['v']:
                                                c.persona['v'].append(h)
                                        # maxh = hypernyms[0]
                                        # maxc = self.sense_freq['v'][maxh]

                                        # for h in hypernyms[1:]:
                                        # 	tmp = self.sense_freq['v'][h]
                                        # 	if tmp > maxc:
                                        # 		maxc = tmp
                                        # 		maxh = h

                                        # c.persona['v'].append(maxh)

                            # -----------------------------
                            # if we filter out physical entity
                            # -----------------------------
                            if abstraction_only and 'NN' in pos:
                                # print("Filtering out physical entity...")
                                hypernyms = self.get_hypernyms(w,
                                                               'n',
                                                               higher=True)

                                if len(
                                        hypernyms
                                ) > 0 and 'physical_entity_sense' not in hypernyms:
                                    c.persona[m].append(w)
                            else:
                                c.persona[m].append(w)
                        # end if
                    # end for
                # end for

                # print(c.name, c.persona)

                agent.append(c.persona['agent'])
                patient.append(c.persona['patient'])
                mod.append(c.persona['mod'])
                poss.append(c.persona['poss'])

                sense_a.append(c.persona['a'])
                sense_n.append(c.persona['n'])
                sense_v.append(c.persona['v'])

                self.character_list[name] = c
                # print(c.name, c.persona)

            except Exception as e:
                print(afile, e)

        self.documents['agent'] = agent
        self.documents['patient'] = patient
        self.documents['mod'] = mod
        self.documents['poss'] = poss
        self.documents['all'] = all
        self.documents['a'] = sense_a
        self.documents['v'] = sense_v
        self.documents['n'] = sense_n
        self.documents['sense_all'] = sense_v + sense_a + sense_n

        aall = agent + patient + mod + poss

        if get_sense:
            self.corpora['a'] = Dictionary(sense_a)
            self.corpora['n'] = Dictionary(sense_n)
            self.corpora['v'] = Dictionary(sense_v)
            self.corpora['sense_all'] = Dictionary(sense_v + sense_n + sense_a)

            self.corpora['a'].filter_extremes(no_below=no_below,
                                              no_above=no_above)
            self.corpora['n'].filter_extremes(no_below=no_below,
                                              no_above=no_above)
            self.corpora['v'].filter_extremes(no_below=no_below,
                                              no_above=no_above)
            self.corpora['sense_all'].filter_extremes(no_below=no_below,
                                                      no_above=no_above)

        self.corpora['agent'] = Dictionary(agent)
        self.corpora['patient'] = Dictionary(patient)
        self.corpora['mod'] = Dictionary(mod)
        self.corpora['poss'] = Dictionary(poss)
        self.corpora['all'] = Dictionary(aall)

        self.corpora['agent'].filter_extremes(no_below=no_below,
                                              no_above=no_above)
        self.corpora['patient'].filter_extremes(no_below=no_below,
                                                no_above=no_above)
        self.corpora['mod'].filter_extremes(no_below=no_below,
                                            no_above=no_above)
        self.corpora['poss'].filter_extremes(no_below=no_below,
                                             no_above=no_above)
        self.corpora['all'].filter_extremes(no_below=no_below,
                                            no_above=no_above)