def __init__(self, selector, sock, addr): self.selector = selector self.sock = sock self.addr = addr self.jsonheader = None self.request = None self.response_created = None self.filename = None self._recv_buffer = b"" self._send_buffer = b"" self._jsonheader_len = None self.db = Database() self.file_size = None self.sending_file = False self.file_bytes_sent = 0 self.file_buffer = b"" self.file = None
def generate_response_header(self, byteorder, content_type, content_encoding, content_length, filename): """ This function creates the jsonHeader to be sent. The jsonHeader contains imp info such as file name, file length, md5 hash etc. """ self.byteorder = byteorder self.content_type = content_type self.content_encoding = content_encoding self.content_length = content_length self.filename = filename # if client_id is not found or server has nothing to send then return a jsonHeader such that # jsonHeader["file-name"] = 0 if self.filename == 0: self.content_length = 0 jsonHeader = { "byteorder": sys.byteorder, "content-type": self.content_type, "content-encoding": self.content_encoding, "content-length": self.content_length, "file-name": self.filename } return jsonHeader else: db = Database() md_5_hash = db.findFileHash(self.filename) extension = self.filename.split('.')[1] jsonHeader = { "byteorder": sys.byteorder, "content-type": self.content_type, "content-encoding": self.content_encoding, "content-length": self.content_length, "file-name": self.filename, "extension": extension, "md-5-hash": md_5_hash } return jsonHeader
token_list = [] tf_value_list = [] affiliation_list = [] for temp_tuple in TotalCountList: affiliation_list.append(str(temp_tuple[0])) token_list.append(temp_tuple[1].strip()) tf_value_list.append(str(temp_tuple[2])) token_str = ",".join(token_list) tf_str = ",".join(tf_value_list) affiliation_str= ",".join(affiliation_list) data.insertAffiliationToken(affiliation_str, token_str,',', tf_str) print("inserted %s tokens into database" % str(len(token_list))) data = Database() stopwords = nltk.corpus.stopwords.words('english') synopses = data.getAffiliation() affiliation_list=[] TotalCountList = [] count=0 for i in synopses: count += 1 affiliation_id = i[0] content = normalizeString(i[1]) allwords_stemmed = tokenize_and_stem(content) #print(allwords_stemmed) allwords_stemmed = [word for word in allwords_stemmed if word not in stopwords] #print(allwords_stemmed) count_list = count_words(allwords_stemmed)
content = content.replace('(', ' ') content = content.replace(')', ' ') content = content.replace('%', ' ') content = content.replace(':', ' ') content = content.replace('.', ' ') content = content.replace('∙', ' ') content = content.replace('˚', '°') content = content.replace('~', ' ') content = content.replace('=', ' ') content = content.replace('≤', ' ') content = content.replace('−', '-') content = content.replace('α', 'a') content = content.replace('δ', 'b') return content data = Database() stopwords = nltk.corpus.stopwords.words('english') synopses = data.getAbstractByTherapeuticArea('Family Medicine & Internal Medicine') abstract_list=[] TotalCountList = [] for i in synopses: abstract_id = i[0] abstract_list.append(str(abstract_id)) content = normalizeString(i[1]) allwords_stemmed = tokenize_and_stem(content) allwords_stemmed = [word for word in allwords_stemmed if word not in stopwords] count_list = count_words(allwords_stemmed) #print(tuple_count)
class Message: def __init__(self, selector, sock, addr): self.selector = selector self.sock = sock self.addr = addr self.jsonheader = None self.request = None self.response_created = None self.filename = None self._recv_buffer = b"" self._send_buffer = b"" self._jsonheader_len = None self.db = Database() self.file_size = None self.sending_file = False self.file_bytes_sent = 0 self.file_buffer = b"" self.file = None def _set_selector_events_mask(self, mode): # Set selector to listen for events: mode is 'r', 'w', or 'rw'. if mode == "r": events = selectors.EVENT_READ elif mode == "w": events = selectors.EVENT_WRITE elif mode == "rw": events = selectors.EVENT_READ | selectors.EVENT_WRITE else: raise ValueError(f"Invalid events mask mode {repr(mode)}.") # Modifying selector event mask self.selector.modify(self.sock, events, data=self) def _read(self): try: data = self.sock.recv(4096) except BlockingIOError: # Resource temporarily unavailable (errno EWOULDBLOCK) pass else: if data: self._recv_buffer += data else: raise RuntimeError("Peer closed.") def _write(self): if self._send_buffer: try: sent = self.sock.send(self._send_buffer) except BlockingIOError: # Resource temporarily unavailable (errno EWOULDBLOCK) pass else: self._send_buffer = self._send_buffer[sent:] # when this buffer is drained we will send the file if sent and not self._send_buffer: if self.filename != 0: self.sending_file = True else: self.close() def send_file(self): # fill up file buffer if empty space = 1024 - len(self.file_buffer) self.file_buffer += self.file.read(space) if self.file_bytes_sent < self.file_size: try: sent = self.sock.send(self.file_buffer) except BlockingIOError: # resource is unabliable pass else: self.file_buffer = self.file_buffer[sent:] self.file_bytes_sent += sent # Close when the buffer is drained. The response has been sent. if self.file_bytes_sent == self.file_size: client_no = self.jsonheader["client_id"] self.db.erase_entry(client_no) self.close() # Decodes bytes into JSON def _json_decode(self, json_bytes, encoding): tiow = io.TextIOWrapper(io.BytesIO(json_bytes), encoding=encoding, newline="") obj = json.load(tiow) tiow.close() return obj # Encode JSON into bytes def _json_encode(self, obj, encoding): return json.dumps(obj, ensure_ascii=False).encode(encoding) def process_events(self, mask): if selectors.EVENT_READ & mask: self.read() if selectors.EVENT_WRITE & mask: self.write() def read(self): self._read() if self._jsonheader_len is None: self.process_protoheader() if self._jsonheader_len is not None: if self.jsonheader is None: self.process_jsonheader() if self.jsonheader: self.process_request() def write(self): if self.request: if not self.response_created: self.create_response() if self.sending_file: self.send_file() else: self._write() def process_protoheader(self): hdrlen = 2 if len(self._recv_buffer) >= hdrlen: self._jsonheader_len = struct.unpack(">H", self._recv_buffer[:hdrlen])[0] self._recv_buffer = self._recv_buffer[hdrlen:] def process_jsonheader(self): hdrlen = self._jsonheader_len if len(self._recv_buffer) >= hdrlen: self.jsonheader = self._json_decode(self._recv_buffer[:hdrlen], "utf-8") self._recv_buffer = self._recv_buffer[hdrlen:] # Checking if the header has all fields for reqhdr in ("content-type", "client_id", "content-length"): if reqhdr not in self.jsonheader: raise ValueError(f'Missing required header "{reqhdr}".') def process_request(self): """ At this point we have the json header. We need to check for updates and respond. This is called by the read() function and after this function executes we only write. """ client_no = self.jsonheader["client_id"] filename = self.db.checkUpdate(client_no) if filename == 0: self.filename = 0 print('no file to send') else: self.filename = filename print(f'sending {self.filename} to {self.addr}') # Set selector to listen for write events, we're done reading. self.request = 1 print('done reading....') print(self.jsonheader) self._set_selector_events_mask("w") def create_response(self): """ This function reads the contents of the file to be sent and creates the _send_buffer. """ if self.filename == 0: content = "" self.file_size = 0 else: self.file = open(f'./serverFiles/{self.filename}', "rb") self.file_size = os.stat(f'./serverFiles/{self.filename}').st_size content_encoding = "utf-8" # self.file_size = os.stat(f'./serverFiles/{self.filename}').st_size jsonheader = self.generate_response_header(sys.byteorder, 'text/json', content_encoding, self.file_size, self.filename) jsonheader_bytes = self._json_encode(jsonheader, "utf-8") message_hdr = struct.pack(">H", len(jsonheader_bytes)) message = message_hdr + jsonheader_bytes self.response_created = True self._send_buffer += message def generate_response_header(self, byteorder, content_type, content_encoding, content_length, filename): """ This function creates the jsonHeader to be sent. The jsonHeader contains imp info such as file name, file length, md5 hash etc. """ self.byteorder = byteorder self.content_type = content_type self.content_encoding = content_encoding self.content_length = content_length self.filename = filename # if client_id is not found or server has nothing to send then return a jsonHeader such that # jsonHeader["file-name"] = 0 if self.filename == 0: self.content_length = 0 jsonHeader = { "byteorder": sys.byteorder, "content-type": self.content_type, "content-encoding": self.content_encoding, "content-length": self.content_length, "file-name": self.filename } return jsonHeader else: db = Database() md_5_hash = db.findFileHash(self.filename) extension = self.filename.split('.')[1] jsonHeader = { "byteorder": sys.byteorder, "content-type": self.content_type, "content-encoding": self.content_encoding, "content-length": self.content_length, "file-name": self.filename, "extension": extension, "md-5-hash": md_5_hash } return jsonHeader def close(self): print("closing connection to", self.addr) try: self.selector.unregister(self.sock) except Exception as e: print( "error: selector.unregister() exception for", f"{self.addr}: {repr(e)}", ) try: self.sock.close() except OSError as e: print( "error: socket.close() exception for", f"{self.addr}: {repr(e)}", ) finally: # Delete reference to socket object for garbage collection self.sock = None
from databaseConnection import Database data = Database() affiliation = data.getAffiliationToken() print(affiliation)
from databaseConnection import Database import math data = Database() affiliation_count = data.getAffiliationCount() total_token_data = data.loadAffiliationTokens() token_list=[] idf_list=[] for temp_tuple in total_token_data: token = temp_tuple[0] df = temp_tuple[1] idf = math.log(affiliation_count / df, 10) idf = round(idf, 4) if (idf < 0.0001): idf = 0.0001 token_list.append(str(token)) idf_list.append(str(idf)) if(len(token_list) == 1000): token_str = ",".join(token_list) idf_str = ",".join(idf_list) data.updateAffiliationIdfValue(token_str, idf_str) token_list = [] idf_list = [] print('update %s idf_value as: %s' % (token, str(idf))) print('-------------------')
content = content.replace('%', ' ') content = content.replace(':', ' ') content = content.replace('.', ' ') content = content.replace('∙', ' ') content = content.replace('˚', '°') content = content.replace('~', ' ') content = content.replace('=', ' ') content = content.replace('≤', ' ') content = content.replace('−', '-') content = content.replace('α', 'a') content = content.replace('δ', 'b') return content #get row data from database data = Database() stopwords = nltk.corpus.stopwords.words('english') synopses = data.getAbstract() #initial abstract list abstract_list=[] TotalCountList = [] for i in synopses: abstract_id = i[0] abstract_list.append(str(abstract_id)) content = normalizeString(i[1]) allwords_stemmed = tokenize_and_stem(content) allwords_stemmed = [word for word in allwords_stemmed if word not in stopwords] count_list = count_words(allwords_stemmed)
from databaseConnection import Database import math data = Database() total_abstract_count = data.getDistinctAbstractNumberForTA('Family Medicine & Internal Medicine') token_df_info = data.loadTokenTable(total_abstract_count, 0.2, 0.001) #print("total abstract count: " + str(total_abstract_count)) token_list = [] idf_list = [] count=0 for temp_tuple in token_df_info: count += 1 token_list.append(str(temp_tuple[0])) df = temp_tuple[1] idf = math.log(total_abstract_count/df, 10) idf = round(idf, 4) if(idf <0.0001): idf = 0.0001 idf_list.append(str(idf)) if (len(token_list) == 1000): idf_str = ",".join(idf_list) token_str = ",".join(token_list) #print(idf_str) #print(token_str) token_list = [] idf_list = []