示例#1
0
 def __init__(self, selector, sock, addr):
     self.selector = selector
     self.sock = sock
     self.addr = addr
     self.jsonheader = None
     self.request = None
     self.response_created = None
     self.filename = None
     self._recv_buffer = b""
     self._send_buffer = b""
     self._jsonheader_len = None
     self.db = Database()
     self.file_size = None
     self.sending_file = False
     self.file_bytes_sent = 0
     self.file_buffer = b""
     self.file = None
示例#2
0
    def generate_response_header(self, byteorder, content_type,
                                 content_encoding, content_length, filename):
        """
        This function creates the jsonHeader to be sent. The jsonHeader contains imp info such as
        file name, file length, md5 hash etc.
        """
        self.byteorder = byteorder
        self.content_type = content_type
        self.content_encoding = content_encoding
        self.content_length = content_length
        self.filename = filename

        # if client_id is not found or server has nothing to send then return a jsonHeader such that
        # jsonHeader["file-name"] = 0
        if self.filename == 0:
            self.content_length = 0
            jsonHeader = {
                "byteorder": sys.byteorder,
                "content-type": self.content_type,
                "content-encoding": self.content_encoding,
                "content-length": self.content_length,
                "file-name": self.filename
            }
            return jsonHeader

        else:
            db = Database()
            md_5_hash = db.findFileHash(self.filename)
            extension = self.filename.split('.')[1]
            jsonHeader = {
                "byteorder": sys.byteorder,
                "content-type": self.content_type,
                "content-encoding": self.content_encoding,
                "content-length": self.content_length,
                "file-name": self.filename,
                "extension": extension,
                "md-5-hash": md_5_hash
            }
            return jsonHeader
    token_list = []
    tf_value_list = []
    affiliation_list = []
    for temp_tuple in TotalCountList:
        affiliation_list.append(str(temp_tuple[0]))
        token_list.append(temp_tuple[1].strip())
        tf_value_list.append(str(temp_tuple[2]))
    token_str = ",".join(token_list)
    tf_str = ",".join(tf_value_list)
    affiliation_str= ",".join(affiliation_list)
    data.insertAffiliationToken(affiliation_str, token_str,',', tf_str)
    print("inserted %s tokens into database" % str(len(token_list)))



data = Database()
stopwords = nltk.corpus.stopwords.words('english')
synopses = data.getAffiliation()

affiliation_list=[]
TotalCountList = []
count=0
for i in synopses:
    count += 1
    affiliation_id = i[0]
    content = normalizeString(i[1])
    allwords_stemmed = tokenize_and_stem(content)
    #print(allwords_stemmed)
    allwords_stemmed = [word for word in allwords_stemmed if word not in stopwords]
    #print(allwords_stemmed)
    count_list = count_words(allwords_stemmed)
    content = content.replace('(', ' ')
    content = content.replace(')', ' ')
    content = content.replace('%', ' ')
    content = content.replace(':', ' ')
    content = content.replace('.', ' ')
    content = content.replace('∙', ' ')
    content = content.replace('˚', '°')
    content = content.replace('~', ' ')
    content = content.replace('=', ' ')
    content = content.replace('≤', ' ')
    content = content.replace('−', '-')
    content = content.replace('α', 'a')
    content = content.replace('δ', 'b')
    return content

data = Database()
stopwords = nltk.corpus.stopwords.words('english')
synopses = data.getAbstractByTherapeuticArea('Family Medicine & Internal Medicine')

abstract_list=[]
TotalCountList = []

for i in synopses:
    abstract_id = i[0]
    abstract_list.append(str(abstract_id))

    content = normalizeString(i[1])
    allwords_stemmed = tokenize_and_stem(content)
    allwords_stemmed = [word for word in allwords_stemmed if word not in stopwords]
    count_list = count_words(allwords_stemmed)
    #print(tuple_count)
示例#5
0
class Message:
    def __init__(self, selector, sock, addr):
        self.selector = selector
        self.sock = sock
        self.addr = addr
        self.jsonheader = None
        self.request = None
        self.response_created = None
        self.filename = None
        self._recv_buffer = b""
        self._send_buffer = b""
        self._jsonheader_len = None
        self.db = Database()
        self.file_size = None
        self.sending_file = False
        self.file_bytes_sent = 0
        self.file_buffer = b""
        self.file = None

    def _set_selector_events_mask(self, mode):
        # Set selector to listen for events: mode is 'r', 'w', or 'rw'.
        if mode == "r":
            events = selectors.EVENT_READ
        elif mode == "w":
            events = selectors.EVENT_WRITE
        elif mode == "rw":
            events = selectors.EVENT_READ | selectors.EVENT_WRITE
        else:
            raise ValueError(f"Invalid events mask mode {repr(mode)}.")

        # Modifying selector event mask
        self.selector.modify(self.sock, events, data=self)

    def _read(self):
        try:
            data = self.sock.recv(4096)
        except BlockingIOError:
            # Resource temporarily unavailable (errno EWOULDBLOCK)
            pass
        else:
            if data:
                self._recv_buffer += data
            else:
                raise RuntimeError("Peer closed.")

    def _write(self):
        if self._send_buffer:

            try:
                sent = self.sock.send(self._send_buffer)
            except BlockingIOError:
                # Resource temporarily unavailable (errno EWOULDBLOCK)
                pass
            else:
                self._send_buffer = self._send_buffer[sent:]
                # when this buffer is drained we will send the file
                if sent and not self._send_buffer:
                    if self.filename != 0:
                        self.sending_file = True
                    else:
                        self.close()

    def send_file(self):
        # fill up file buffer if empty
        space = 1024 - len(self.file_buffer)
        self.file_buffer += self.file.read(space)

        if self.file_bytes_sent < self.file_size:
            try:
                sent = self.sock.send(self.file_buffer)
            except BlockingIOError:
                # resource is unabliable
                pass
            else:
                self.file_buffer = self.file_buffer[sent:]
                self.file_bytes_sent += sent
                # Close when the buffer is drained. The response has been sent.
                if self.file_bytes_sent == self.file_size:
                    client_no = self.jsonheader["client_id"]
                    self.db.erase_entry(client_no)
                    self.close()

    # Decodes bytes into JSON
    def _json_decode(self, json_bytes, encoding):
        tiow = io.TextIOWrapper(io.BytesIO(json_bytes),
                                encoding=encoding,
                                newline="")
        obj = json.load(tiow)
        tiow.close()
        return obj

    # Encode JSON into bytes
    def _json_encode(self, obj, encoding):
        return json.dumps(obj, ensure_ascii=False).encode(encoding)

    def process_events(self, mask):
        if selectors.EVENT_READ & mask:
            self.read()
        if selectors.EVENT_WRITE & mask:
            self.write()

    def read(self):

        self._read()

        if self._jsonheader_len is None:
            self.process_protoheader()

        if self._jsonheader_len is not None:
            if self.jsonheader is None:
                self.process_jsonheader()

        if self.jsonheader:
            self.process_request()

    def write(self):
        if self.request:
            if not self.response_created:
                self.create_response()

        if self.sending_file:
            self.send_file()
        else:
            self._write()

    def process_protoheader(self):
        hdrlen = 2

        if len(self._recv_buffer) >= hdrlen:
            self._jsonheader_len = struct.unpack(">H",
                                                 self._recv_buffer[:hdrlen])[0]

            self._recv_buffer = self._recv_buffer[hdrlen:]

    def process_jsonheader(self):
        hdrlen = self._jsonheader_len

        if len(self._recv_buffer) >= hdrlen:
            self.jsonheader = self._json_decode(self._recv_buffer[:hdrlen],
                                                "utf-8")

            self._recv_buffer = self._recv_buffer[hdrlen:]

            # Checking if the header has all fields
            for reqhdr in ("content-type", "client_id", "content-length"):
                if reqhdr not in self.jsonheader:
                    raise ValueError(f'Missing required header "{reqhdr}".')

    def process_request(self):
        """
        At this point we have the json header.
        We need to check for updates and respond.
        This is called by the read() function and after this function executes we only write.
        """

        client_no = self.jsonheader["client_id"]
        filename = self.db.checkUpdate(client_no)

        if filename == 0:
            self.filename = 0
            print('no file to send')
        else:
            self.filename = filename
            print(f'sending {self.filename} to {self.addr}')

        # Set selector to listen for write events, we're done reading.
        self.request = 1
        print('done reading....')
        print(self.jsonheader)
        self._set_selector_events_mask("w")

    def create_response(self):
        """
        This function reads the contents of the file to be sent and creates the _send_buffer.
        """
        if self.filename == 0:
            content = ""
            self.file_size = 0
        else:
            self.file = open(f'./serverFiles/{self.filename}', "rb")
            self.file_size = os.stat(f'./serverFiles/{self.filename}').st_size

        content_encoding = "utf-8"
        # self.file_size = os.stat(f'./serverFiles/{self.filename}').st_size

        jsonheader = self.generate_response_header(sys.byteorder, 'text/json',
                                                   content_encoding,
                                                   self.file_size,
                                                   self.filename)

        jsonheader_bytes = self._json_encode(jsonheader, "utf-8")
        message_hdr = struct.pack(">H", len(jsonheader_bytes))
        message = message_hdr + jsonheader_bytes

        self.response_created = True
        self._send_buffer += message

    def generate_response_header(self, byteorder, content_type,
                                 content_encoding, content_length, filename):
        """
        This function creates the jsonHeader to be sent. The jsonHeader contains imp info such as
        file name, file length, md5 hash etc.
        """
        self.byteorder = byteorder
        self.content_type = content_type
        self.content_encoding = content_encoding
        self.content_length = content_length
        self.filename = filename

        # if client_id is not found or server has nothing to send then return a jsonHeader such that
        # jsonHeader["file-name"] = 0
        if self.filename == 0:
            self.content_length = 0
            jsonHeader = {
                "byteorder": sys.byteorder,
                "content-type": self.content_type,
                "content-encoding": self.content_encoding,
                "content-length": self.content_length,
                "file-name": self.filename
            }
            return jsonHeader

        else:
            db = Database()
            md_5_hash = db.findFileHash(self.filename)
            extension = self.filename.split('.')[1]
            jsonHeader = {
                "byteorder": sys.byteorder,
                "content-type": self.content_type,
                "content-encoding": self.content_encoding,
                "content-length": self.content_length,
                "file-name": self.filename,
                "extension": extension,
                "md-5-hash": md_5_hash
            }
            return jsonHeader

    def close(self):
        print("closing connection to", self.addr)
        try:
            self.selector.unregister(self.sock)
        except Exception as e:
            print(
                "error: selector.unregister() exception for",
                f"{self.addr}: {repr(e)}",
            )

        try:
            self.sock.close()
        except OSError as e:
            print(
                "error: socket.close() exception for",
                f"{self.addr}: {repr(e)}",
            )
        finally:
            # Delete reference to socket object for garbage collection
            self.sock = None
from databaseConnection import Database

data = Database()
affiliation = data.getAffiliationToken()

print(affiliation)
from databaseConnection import Database
import math


data = Database()
affiliation_count = data.getAffiliationCount()
total_token_data = data.loadAffiliationTokens()

token_list=[]
idf_list=[]
for temp_tuple in total_token_data:

    token = temp_tuple[0]
    df = temp_tuple[1]
    idf = math.log(affiliation_count / df, 10)
    idf = round(idf, 4)
    if (idf < 0.0001):
        idf = 0.0001
    token_list.append(str(token))
    idf_list.append(str(idf))
    if(len(token_list) == 1000):
        token_str = ",".join(token_list)
        idf_str = ",".join(idf_list)
        data.updateAffiliationIdfValue(token_str, idf_str)
        token_list = []
        idf_list = []
        print('update %s idf_value as: %s' % (token, str(idf)))
        print('-------------------')
    content = content.replace('%', ' ')
    content = content.replace(':', ' ')
    content = content.replace('.', ' ')
    content = content.replace('∙', ' ')
    content = content.replace('˚', '°')
    content = content.replace('~', ' ')
    content = content.replace('=', ' ')
    content = content.replace('≤', ' ')
    content = content.replace('−', '-')
    content = content.replace('α', 'a')
    content = content.replace('δ', 'b')
    return content


#get row data from database
data = Database()
stopwords = nltk.corpus.stopwords.words('english')
synopses = data.getAbstract()

#initial abstract list
abstract_list=[]
TotalCountList = []

for i in synopses:
    abstract_id = i[0]
    abstract_list.append(str(abstract_id))

    content = normalizeString(i[1])
    allwords_stemmed = tokenize_and_stem(content)
    allwords_stemmed = [word for word in allwords_stemmed if word not in stopwords]
    count_list = count_words(allwords_stemmed)
from databaseConnection import Database
import math

data = Database()
total_abstract_count = data.getDistinctAbstractNumberForTA('Family Medicine & Internal Medicine')

token_df_info =  data.loadTokenTable(total_abstract_count, 0.2, 0.001)
#print("total abstract count: " + str(total_abstract_count))

token_list = []
idf_list = []
count=0

for temp_tuple in token_df_info:
    count += 1
    token_list.append(str(temp_tuple[0]))

    df = temp_tuple[1]
    idf = math.log(total_abstract_count/df, 10)
    idf = round(idf, 4)
    if(idf <0.0001):
        idf = 0.0001
    idf_list.append(str(idf))

    if (len(token_list) == 1000):
        idf_str = ",".join(idf_list)
        token_str = ",".join(token_list)
        #print(idf_str)
        #print(token_str)
        token_list = []
        idf_list = []