Exemplo n.º 1
0
def get_file_content():
    if len(G_file_no_dict) == 0:
        print convert("索引号列表为空")
    conn = get_connection()
    if conn != 0:
        cursor = conn.cursor()
        
    result_file_name = (G_path_prefix + r"result.txt").decode('utf-8').encode('cp936') 
    file = open( result_file_name, "a+" )
    for file_uniq_no in G_file_no_dict.keys():
        sql = "select d.mr_content from jhmr_file_content_TEXT d where d.file_unique_id = '%s'" % file_uniq_no
        #print sql
        try:
            cursor.execute(sql)
            content_list = get_content_from_lob(cursor)
        except cx_Oracle.DatabaseError:
            print convert("获取病历内容失败")
            return 0
        
        for item in content_list:
            print G_file_no_dict[file_uniq_no]
            file.write(G_file_no_dict[file_uniq_no])
            file.write("\r\n")
            item = item.decode('cp936').encode('utf-8')
            file.write(item)
            file.write("\r\n")
        
    close_connection(conn)
    file.close()
def loadAnagrams():
    print("Loading Anagrams...")
    stime = time.time()
    file = open(anagramsFile, "r")
    for line in file:
        words = re.split("[^a-zA-Z0-9]", line)
        n = len(words)
        key = words[0]
        l = set()
        for i in xrange(2, n):
            l.add(words[i])
        anagrams[key] = filter(None, l)
    file.close()
    print("Anagrams loaded: %d" % len(anagrams))
    print("Time taken: %s" % (time.time() - stime))
Exemplo n.º 3
0
def export_ramp_color_library(library, library_rules):
    dir_path = tempfile.mkdtemp(suffix='', prefix='tmp-library-')

    for library_rule in library_rules:
        try:
            resource_path = dir_path + "/" + library_rule + "/"
            os.makedirs(resource_path)
            i = 0
            for color_ramp in library_rules[library_rule]:
                file = open(
                    resource_path + "/color_ramp-" + color_ramp.name + "-" +
                    str(i) + ".rmf", 'w+')
                file.write(
                    build_library_color_ramp(color_ramp.name,
                                             color_ramp.definition))
                file.close()
                i = i + 1

        except Exception as e:
            raise e

    buffer = StringIO.StringIO()
    z = zipfile.ZipFile(buffer, "w")
    relroot = dir_path
    for root, dirs, files in os.walk(dir_path):
        rel_path = os.path.relpath(root, relroot)
        if rel_path != ".":
            z.write(root, os.path.relpath(root, relroot))
        for file in files:
            filename = os.path.join(root, file)
            if os.path.isfile(filename):
                arcname = os.path.join(os.path.relpath(root, relroot), file)
                z.write(filename, arcname)
    z.close()
    buffer.seek(0)
    response = HttpResponse(content_type='application/zip; charset=utf-8')
    response[
        'Content-Disposition'] = 'attachment; filename=' + library.name + '.zip'
    response.write(buffer.read())

    utils.__delete_temporaries(dir_path)

    return response
def precomputeFromNLTK():
    """
    precompute with nltk's corpus as wordbase
    """
    language = set()
    print(len(words.words()))
    for word in words.words():
        word = word.lower()
        sortW = "".join(char for char in sorted(word))
        if sortW[0] >= "a" and sortW[0] <= "z":
            word = word + ":" + sortW
            language.add(word)
    print("Loaded %d words from NLTK wordnet" % (len(language)))
    buckets = [set() for x in xrange(25)]
    for word in language:
        buckets[len(word) / 2].add(word)
    count = 0
    for word in language:
        if count % 1000 == 0:
            print("Done for %d words" % count)
        count += 1
        sortedW = word.split(":")[1]
        if sortedW not in nltkHashMap:
            nltkHashMap[sortedW] = set()
            for word2 in buckets[len(sortedW)]:
                sortedW2 = word2.split(":")[1]
                if sortedW == sortedW2:
                    nltkHashMap[sortedW].add(word2.split(":")[0])
    file = open(nltkAnagramsFile, "w")
    file.truncate()
    count = 0
    for anagrams, listOfAnagrams in nltkHashMap.items():
        if count % 1000 == 0:
            print("%d anagram lists written" % count)
            file.flush()
        count += 1
        file.write("%s:%s\n" % (anagrams, listOfAnagrams))
    file.close()
    print("Precomputation with NLTK done")
Exemplo n.º 5
0
 def close(self):
     file.close(self)
     self.__class__.open_files.remove(self)
Exemplo n.º 6
0
yesterday_emails = list()

#gather emails of yesterday

file = open(log_path, 'r')
for line in file.readlines():
    date = line.split(" ")[0].strip()

    if (date == yesterday_date.strip()):
        yesterday_emails.append(line)

    if (date == today_date.strip()):
        break

file.close()

# write yesterday emails to a file

yesterday_emails_path = tmp_path + '_yesterday_mails'
file = open(yesterday_emails_path, 'w')
for line in yesterday_emails:
    file.write(line + "\n")

file.close()

# now its time to find spammers

possible_spammers_file_path = tmp_path + '_spammers'
cmd = "awk \'$3 ~ /^cwd/{print $3}\'  /tmp/_yesterday_mails | sort | uniq -c | sed \"s|^ *||g\"| sort -nr "
Exemplo n.º 7
0
 def spider_closed(self, spider):
     self.exporter.finish_exporting()
     file = self.files.pop(spider)
     file.close()
     pass
Exemplo n.º 8
0
 def close(self):
     file.close(self)
     self.__class__.open_files.remove(self)