Пример #1
0
#!/usr/bin/python
import os;
import helper;

#author: maksym hontar
#running notes: run from folder in terminal: ./index_vocabulary_build_matrix.py

incident_matrix_file = open(helper.getFilePath(helper.incident_matrix_file_name), 'r');
incident_matrix = helper.readIncidentMatrixFromFile(incident_matrix_file);
incident_matrix_file.close();

word1 = 'man';
boolExpression = 'AND';
word2 = 'stupid';

if len(incident_matrix):
	if len(word1) and len(word2) and len(boolExpression) :
		res = [];
		
		if word1 in incident_matrix and word2 in incident_matrix:
			word_list1 = incident_matrix[word1];
			word_list2 = incident_matrix[word2];

			if len(word_list1) and len(word_list2):
				for i in xrange(0,len(word_list1)):
					if (boolExpression == 'AND') :
						res.append(word_list1[i] & word_list2[i]);
					elif (boolExpression == 'OR') :
						res.append(word_list1[i] | word_list2[i]);
				books_names = [];
				for i in xrange(0,len(res)) :
Пример #2
0
# 	return file.read().split(new_line_char);

# def fileSize(filePath):
# 	statinfo = os.stat(filePath);
# 	return str(statinfo.st_size);

# #instantiate an array
# file_paths = [];

# for a_file_name in file_names:
# 	file_paths.append(getFilePath(a_file_name))

if len(helper.file_paths):
	#open f

	voc_file = open(helper.getFilePath(helper.voc_file_name), 'r');
	vocabulary = voc_file.read().split(helper.new_line_char);
	print 'vocabulary count before reading: ' + str(len(vocabulary));
	voc_file.close();

	for a_file_path in helper.file_paths:
		file = open(a_file_path, 'r');
		if file:
			#files reading block
			print "files is being read " + a_file_path + '. file size: '+ helper.fileSize(a_file_path);
			file_content = file.read();
			file.close();

			# replace all \n with spaces
			file_content = file_content.replace(helper.new_line_char,helper.space_char);