Python FileUtil примеры, FileUtil Python примеры использования

Пример #1

0

Показать файл

    def write_eval_result_matrix_to_file(self, file_path):
        """
        Writes this object to a json file
        """
        complete_dict = {}
        json_compatible_eval_data = {}
        for elem_thresh in self.elem_threshs:
            m_thresh_dict = {}
            for m_thresh in self.maj_threshs:
                drop_thresh_dict = {}
                for d_thresh in self.file_level_threshs:
                    if self.is_none_entry(elem_thresh, m_thresh, d_thresh):
                        drop_thresh_dict[d_thresh] = "None"
                    else:
                        drop_thresh_dict[d_thresh] = self._eval_data[
                            elem_thresh][m_thresh][d_thresh]
                m_thresh_dict[m_thresh] = drop_thresh_dict
            json_compatible_eval_data[elem_thresh] = m_thresh_dict

        complete_dict[self.EVAL_DATA] = json_compatible_eval_data
        complete_dict[self.SOL_MATRIX_SIZE] = self._sol_matrix_size
        complete_dict[self.ELEM_THRESHS] = self.elem_threshs
        complete_dict[self.MAJ_TRHESHS] = self.maj_threshs
        complete_dict[self.FILE_LEVEL_TRHESHS] = self.file_level_threshs

        FileUtil.write_dict_to_json(file_path, complete_dict)

Пример #2

0

Показать файл

Файл: DriversDict.py Проект: mnave/tacoAtacoOOGroup3

    def __init__(self, file_name=None):
        """Creates a DriversDict composed by Driver objects,
        from a file with a list of drivers.

        Requires: If given, file_name is str with the name of a .txt file containing
        a list of drivers organized as in the examples provided in
        the general specification (omitted here for the sake of readability).

        Ensures:
        if file_name is given:
            a DriversDict, composed by objects of class Driver that correspond to the drivers listed
            in file with name file_name.
        if file_name is none:
            a empty DriversList."""

        UserDict.__init__(self)

        if file_name is not None:
            inFile = FileUtil(file_name)
            for line in inFile.getContent():
                driverData = line.rstrip().split(", ")
                driverName = driverData.pop(DriversDict.INDEXDriverName)
                driverEntryTime, driverAccumTime = driverData
                driverEntryTime = Time(driverEntryTime)
                driverAccumTime = Time(driverAccumTime)
                newDriver = Driver(driverName, driverEntryTime,
                                   driverAccumTime)

                self[driverName] = newDriver

Пример #3

0

Показать файл

Файл: Preprocessor.py Проект: FChen12/paper

    def _precalculate_spacy_lemmatizer(cls, spacy_lemmatizer, dataset_tuple,
                                       output_path):
        word_to_lemma_map = {}

        def iterate_files(tokenizer, preprecessor, folder):
            for file in FileUtil.get_files_in_directory(folder, True):
                file_representation = tokenizer.tokenize(file)
                file_representation.preprocess(preprecessor)
                for word in file_representation.token_list:
                    lemma = [token.lemma_ for token in spacy_lemmatizer(word)]
                    if len(lemma) > 1:
                        log.info(
                            f"More than one lemma {lemma} for \"{word}\". Using \"{''.join(lemma)}\" as lemma"
                        )
                    lemma = "".join(lemma)
                    if word in word_to_lemma_map:
                        if not word_to_lemma_map[word] == lemma:
                            log.info(
                                f"Different Duplicate Lemma for {word}: {word_to_lemma_dataframe[word]} <-> {lemma}"
                            )
                    else:
                        word_to_lemma_map[word] = lemma

        for dataset, code_pre, code_tok, req_pre, req_tok in dataset_tuple:

            iterate_files(req_tok, req_pre, dataset.req_folder())
            iterate_files(code_tok, code_pre, dataset.code_folder())

        word_to_lemma_dataframe = pandas.DataFrame.from_dict(
            word_to_lemma_map, orient="index", columns=[cls.COLUMN_LEMMA])
        FileUtil.write_dataframe_to_csv(word_to_lemma_dataframe, output_path)

Пример #4

0

Показать файл

def copyImage(target_path, microsoft_path, image_files):
    print('\n---------------------- start 比较和copy ----------------------')
    if not os.path.isdir(target_path):
        print("%s文件夹未创建" % target_path)
        FileUtil.mkdir(target_path)

    # 比较(文件名是否在目标文件夹已存在) 分辨率文件夹
    # 微软壁纸文件
    num = 0
    for key, value in image_files.items():

        # 目标文件路径和+分辨率
        now_path = target_path + '\\' + value

        # 路径已存在-判断文件
        if not os.path.isdir(now_path):
            # 创建目标路径+分辨率
            FileUtil.mkdir(now_path)

        # 比较文件
        files = os.listdir(now_path)
        if key + '.png' in files:
            print(key, '文件已存在')
        else:
            num = num + 1
            print(str(num) + '. ' + key + '文件不存在-copy文件')
            shutil.copyfile(microsoft_path + '\\' + key,
                            now_path + '\\' + key + '.png')

    print('---------------------- end 比较和copy ----------------------\n')

Пример #5

0

Показать файл

def main():
    index = 0
    for dp in FileUtil.get_all_datapoints():
        FileUtil.save_dp_as_image(dp, PROCESSED_WAVE_DIRECTORY, dp.filename)
        index += 1
        if not index % 10:
            print(100 * index / 170000, "%% done")

Пример #6

0

Показать файл

 def __init__(self):
     '''
      读取one hot encoding之后的数据
     '''
     self.train_x, self.train_y = FileUtil.readCSV('train',
                                                   './Dataset/train.csv')
     self.test_x = FileUtil.readCSV('test', './Dataset/test.csv')

Пример #7

0

Показать файл

Файл: tukTuk.py Проект: mnave/tacoAtacoOOGroup3

def checkPreConditions(nextPeriod, driversFileName, vehiclesFileName,
                       servicesFileName, reservationsFileName):
    """Checks the preconditions.
    Requires:
    The same as update (ommitted here to avoid redudancy)
    Ensures:
    returns bool value False if some of the conditions are not met
    and True otherwise
    """

    headerDrivers = FileUtil(driversFileName).getHeader()
    headerVehicles = FileUtil(vehiclesFileName).getHeader()
    headerServices = FileUtil(servicesFileName).getHeader()
    headerReservations = FileUtil(reservationsFileName).getHeader()

    previousPeriod = Time().getPreviousPeriod(nextPeriod)
    # Changes the format of the period to the one in the header of files
    nextPeriodOther = nextPeriod[0:2] + ":00 - " + nextPeriod[2:4] + ":00"
    previousPeriodOther = previousPeriod[0:2] + ":00 - " + previousPeriod[
        2:4] + ":00"

    # NextPeriod is a str from the set 0911, 1113, ..., 1921
    if nextPeriod not in ['0911', '1113', '1315', '1517', '1719', '1921']:
        return False

    # The files whose names are driversFileName, vehiclesFileName, servicesFileName and reservationsFileName
    # concern the same company and the same day;
    elif not (headerDrivers[INDEXCompany:INDEXDate + 1] ==
              headerVehicles[INDEXCompany:INDEXDate + 1] ==
              headerServices[INDEXCompany:INDEXDate + 1] ==
              headerReservations[INDEXCompany:INDEXDate + 1]):
        return False

    # The file whose name is reservationsFileName concerns the period indicated by nextPeriod
    elif headerReservations[INDEXPeriod].strip() != nextPeriodOther:
        return False

    # The files whose names are driversFileName, vehiclesFileName, servicesFileName concern the period
    # immediately preceding the period indicated by nextPeriod;

    elif not (headerDrivers[INDEXPeriod].strip() ==
              headerVehicles[INDEXPeriod].strip() ==
              headerServices[INDEXPeriod].strip() == previousPeriodOther):
        return False

    # The file name reservationsFileName ends (before the .txt extension) with
    # the string nextPeriod;
    elif reservationsFileName[-8:-4] != nextPeriod:
        return False

    # The file names driversFileName, vehiclesFileName and servicesFileName
    # end (before their .txt extension) with the string representing
    # the period immediately preceding the one indicated by nextPeriod,
    # from the set 0709, 0911, ..., 1719;
    elif not (driversFileName[-8:-4] == vehiclesFileName[-8:-4] ==
              servicesFileName[-8:-4] == previousPeriod):
        return False

    else:
        return True

Пример #8

0

Показать файл

    def precalculate_tracelinks(self, output_precalculated_req_filename,
                                output_precalculated_code_filename,
                                req_embedding_creator, code_embedding_creator):
        """
        if not req_embedding_creator:
            req_embedding_creator = self.default_req_emb_creator(self._word_emb_creator)
        if not code_embedding_creator:
            code_embedding_creator = self.default_code_emb_creator(self._word_emb_creator)"""
        if not output_precalculated_req_filename:
            output_precalculated_req_filename = self.default_precalculated_filename(
                req_embedding_creator.__class__.__name__)
        if not output_precalculated_code_filename:
            output_precalculated_code_filename = self.default_precalculated_filename(
                code_embedding_creator.__class__.__name__)

        req_embeddings = self._create_req_embeddings(req_embedding_creator)
        code_embeddings = self._create_code_embeddings(code_embedding_creator)

        FileUtil.write_dict_to_json(
            output_precalculated_req_filename,
            [req_emb.to_json() for req_emb in req_embeddings])
        FileUtil.write_dict_to_json(
            output_precalculated_code_filename,
            [code_emb.to_json() for code_emb in code_embeddings])

        self.build_precalculated_name_and_load(
            req_embedding_creator.__class__.__name__,
            code_embedding_creator.__class__.__name__)

Пример #9

0

Показать файл

Файл: EvalStrategy.py Проект: FChen12/paper

    def _process_eval_results(self,
                              eval_result_matrix: EvalMatrix,
                              output_file_suffix=""):
        log.info("Generationg csv...: ")
        assert len(
            self._run_config.elem_thresholds
        ) == 1, "Elem threshold needs to be a single threshold value"
        assert len(
            self._run_config.majority_thresholds
        ) == 1, "majority threshold needs to be a single threshold value"
        assert len(
            self._run_config.file_level_thresholds
        ) == 1, "file level threshold needs to be a single threshold value"

        e_thresh = self._run_config.elem_thresholds[0]
        m_thresh = self._run_config.majority_thresholds[0]
        f_thresh = self._run_config.file_level_thresholds[0]
        if not eval_result_matrix.is_none_entry(e_thresh, m_thresh, f_thresh):
            all_links = eval_result_matrix.all_trace_links(
                e_thresh, m_thresh, f_thresh)
            recall_map_dict = Evaluator.evaluateMAPRecall(
                all_links, self._trace_link_processor._dataset,
                self._trace_link_processor._run_config.reverse_compare)

            output_file_name = csv_recall_map_filename(
                self._trace_link_processor._dataset, output_file_suffix)
            FileUtil.write_recall_precision_csv(recall_map_dict,
                                                output_file_name)

            log.info("... Done: ")
        else:
            log.error(
                f"No trace links for e{e_thresh} m{m_thresh} f{f_thresh}")

Пример #10

0

Показать файл

Файл: EvalStrategy.py Проект: FChen12/paper

    def _process_eval_results(self,
                              eval_result_matrix: EvalMatrix,
                              output_file_suffix=""):
        log.info("Generationg csv...: ")
        for elem_thresh in eval_result_matrix.elem_threshs:
            recall_prec_dict = {
            }  # use this to override duplicate recall values
            for m_thresh in eval_result_matrix.maj_threshs:
                for f_thresh in eval_result_matrix.file_level_threshs:
                    if not eval_result_matrix.is_none_entry(
                            elem_thresh, m_thresh, f_thresh):
                        recall = eval_result_matrix.recall(
                            elem_thresh, m_thresh, f_thresh)
                        prec = eval_result_matrix.precision(
                            elem_thresh, m_thresh, f_thresh)
                        if recall == 0 and prec == 0:
                            continue
                        recall_prec_dict[recall] = prec

            #threshold_name = "_e{}m{}_".format(elem_thresh, self._run_config.majority_print[m_thresh])
        threshold_name = ""
        output_file_name = csv_recall_precision_filename(self._trace_link_processor._dataset, self._trace_link_processor.output_prefix() \
                                                                           + threshold_name + output_file_suffix)
        FileUtil.write_recall_precision_csv(recall_prec_dict, output_file_name)
        #FileUtil.write_dict_to_json(str(Paths.ROOT / output_file_suffix) + ".json", recall_prec_dict)
        log.info("... Done: ")

Пример #11

0

Показать файл

Файл: ReservationsList.py Проект: LLCampos/tacoAtacoOOGroup3

    def __init__(self, file_name=None):
        """Creates a ReservationList composed by Reservation objects, from a file with a list of reservations.

        Requires: If given, file_name is str with the name of a .txt file containing
        a list of reservations organized as in the examples provided in
        the general specification (omitted here for the sake of readability).

        Ensures:
        if file_name is given:
                a ReservationList, composed by objects of class Service that correspond to the services listed
                in file with name file_name.
        if file_name is none:
                a empty ServiceList."""

        UserList.__init__(self)

        if file_name is not None:
            inFile = FileUtil(file_name)

            for line in inFile.getContent():
                reservData = line.rstrip().split(", ")
                reservClient = reservData[ReservationsList.INDEXClientNameInReservation]
                reservRequestedStartTime = Time(reservData[ReservationsList.INDEXRequestedStartHour])
                reservRequestedEndTime = Time(reservData[ReservationsList.INDEXRequestedEndHour])
                reservCircuit = reservData[ReservationsList.INDEXCircuitInReservation]
                reservCircuitKms = reservData[ReservationsList.INDEXCircuitKmsInReservation]
                newReserv = Reservation(reservClient, reservRequestedStartTime, reservRequestedEndTime, reservCircuit,
                                        reservCircuitKms)
                self.append(newReserv)

Пример #12

0

Показать файл

Файл: OracleLoad.py Проект: linwenxue/WhereHows

  def __init__(self, args):
    self.logger = LoggerFactory.getLogger('jython script : ' + self.__class__.__name__)

    username = args[Constant.WH_DB_USERNAME_KEY]
    password = args[Constant.WH_DB_PASSWORD_KEY]
    JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY]
    JDBC_URL = args[Constant.WH_DB_URL_KEY]

    self.db_id = args[Constant.JOB_REF_ID_KEY]
    self.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY]
    self.conn_mysql = zxJDBC.connect(JDBC_URL, username, password, JDBC_DRIVER)
    self.conn_cursor = self.conn_mysql.cursor()

    if Constant.INNODB_LOCK_WAIT_TIMEOUT in args:
      lock_wait_time = args[Constant.INNODB_LOCK_WAIT_TIMEOUT]
      self.conn_cursor.execute("SET innodb_lock_wait_timeout = %s;" % lock_wait_time)

    temp_dir = FileUtil.etl_temp_dir(args, "ORACLE")
    self.input_table_file = os.path.join(temp_dir, args[Constant.ORA_SCHEMA_OUTPUT_KEY])
    self.input_field_file = os.path.join(temp_dir, args[Constant.ORA_FIELD_OUTPUT_KEY])
    self.input_sample_file = os.path.join(temp_dir, args[Constant.ORA_SAMPLE_OUTPUT_KEY])

    self.collect_sample = False
    if Constant.ORA_LOAD_SAMPLE in args:
      self.collect_sample = FileUtil.parse_bool(args[Constant.ORA_LOAD_SAMPLE], False)

    self.logger.info("Load Oracle Metadata into {}, db_id {}, wh_exec_id {}"
                     .format(JDBC_URL, self.db_id, self.wh_etl_exec_id))

Пример #13

0

Показать файл

Файл: OracleLoad.py Проект: wenhuaOpenx/WhereHows

  def __init__(self, args):
    self.logger = LoggerFactory.getLogger('jython script : ' + self.__class__.__name__)

    username = args[Constant.WH_DB_USERNAME_KEY]
    password = args[Constant.WH_DB_PASSWORD_KEY]
    JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY]
    JDBC_URL = args[Constant.WH_DB_URL_KEY]

    self.db_id = args[Constant.DB_ID_KEY]
    self.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY]
    self.conn_mysql = zxJDBC.connect(JDBC_URL, username, password, JDBC_DRIVER)
    self.conn_cursor = self.conn_mysql.cursor()

    if Constant.INNODB_LOCK_WAIT_TIMEOUT in args:
      lock_wait_time = args[Constant.INNODB_LOCK_WAIT_TIMEOUT]
      self.conn_cursor.execute("SET innodb_lock_wait_timeout = %s;" % lock_wait_time)

    temp_dir = FileUtil.etl_temp_dir(args, "ORACLE")
    self.input_table_file = os.path.join(temp_dir, args[Constant.ORA_SCHEMA_OUTPUT_KEY])
    self.input_field_file = os.path.join(temp_dir, args[Constant.ORA_FIELD_OUTPUT_KEY])
    self.input_sample_file = os.path.join(temp_dir, args[Constant.ORA_SAMPLE_OUTPUT_KEY])

    self.collect_sample = False
    if Constant.ORA_LOAD_SAMPLE in args:
      self.collect_sample = FileUtil.parse_bool(args[Constant.ORA_LOAD_SAMPLE], False)

    self.logger.info("Load Oracle Metadata into {}, db_id {}, wh_exec_id {}"
                     .format(JDBC_URL, self.db_id, self.wh_etl_exec_id))

Пример #14

0

Показать файл

Файл: VehiclesDict.py Проект: LLCampos/tacoAtacoOOGroup3

    def __init__(self, file_name=None):
        """Creates a VehicleDict composed by vehicles objects,
        from a file with a list of vehicles.

        Requires: If given, file_name is str with the name of a .txt file containing
        a list of vehicles organized as in the examples provided in
        the general specification (omitted here for the sake of readability).
        Ensures:
        if file_name is given:
            a VehiclesDict, composed by objects of class Vehicle that correspond to the vehicles listed
            in file with name file_name.
        if file_name is none:
            a empty VehiclesList.
        """

        UserDict.__init__(self)

        inFile = FileUtil(file_name)
        for line in inFile.getContent():
            vehicleData = line.rstrip().split(", ")
            vehiclePlate = vehicleData.pop(VehiclesDict.INDEXVehiclePlate)
            vehicleModel, vehicleAutonomy, vehicleKms = vehicleData
            newVehicle = Vehicle(vehiclePlate, vehicleModel, vehicleAutonomy, vehicleKms)

            self[vehiclePlate] = newVehicle

Пример #15

0

Показать файл

Файл: Hardware.py Проект: shengxiangithub/AEPPlay_Hi3716

def switch_4g(main_obj):
    print("配置MAC:" + str(Constant.MAC))
    mac_cmd = "ifconfig eth0 hw ether " + Constant.MAC + " & ifconfig lo up"
    os.system(mac_cmd)
    time.sleep(2)
    cmd = "ifconfig eth0 " + str(Constant.IP) + " netmask " + str(
        Constant.IPMASK)
    print(cmd)
    os.system(cmd)
    if main_obj is not None:
        main_obj.start_server_thread()
    if "1" == Constant.G4EN:
        Constant.network = 2
        print("开启4G")
        Constant.wtire_gpio(1, 0)
        time.sleep(3)
        Constant.wtire_gpio(1, 1)
        time.sleep(30)
        os.system('echo "AT\$QCRMCALL=1,1" > /dev/ttyUSB2')
        time.sleep(10)
        os.system("ifconfig wwan0 up")
        time.sleep(2)
        os.system("udhcpc -i wwan0 &")
        print("启用4G完成")
    FileUtil.write_dns(Constant.NAMESERVER_PATH, Constant.DNS1, Constant.DNS2)
    # os.system(Constant.NAMESERVER_PATH)
    cmd = "route add default gw " + Constant.GATEWAY
    print(cmd)
    os.system("route add default gw " + Constant.GATEWAY)
    print("网络配置完成")

Пример #16

0

Показать файл

Файл: SmellExtract.py Проект: tushartushar/extractSmellInfo

def process_case3(as_file, dir_in, out_file_path):
    project, *rest = as_file.split("_ArchSmells.csv")
    if os.path.isfile(os.path.join(dir_in, project + "_DesignSmells.csv")):
        ds_file = os.path.join(dir_in, project + "_DesignSmells.csv")
        with open(os.path.join(dir_in, as_file)) as asf:
            for line in asf:
                smell, aproject, namespace, cause, *rest = line.split(",")
                if (smell == 'God Component'):
                    reason = ""
                    reason_class = True
                    for m in re.finditer(r'component are: (\d+)', cause,
                                         re.IGNORECASE):
                        reason = m.group(1)
                        reason_class = True
                    if reason == "":
                        for m in re.finditer(r'LOC of the component: (\d+)',
                                             cause, re.IGNORECASE):
                            reason = m.group(1)
                            reason_class = False
                    insuff_abs = 0
                    with open(ds_file) as dsf:
                        for ds_line in dsf:
                            dsmell, dnamespace, *drest = ds_line.split(",")
                            if namespace == dnamespace:
                                if dsmell == "Insufficient Modularization":
                                    insuff_abs += 1
                    if reason_class:
                        FileUtil.writeFile(
                            out_file_path, aproject + "," + namespace + ",1," +
                            str(reason) + ",," + str(insuff_abs))
                    else:
                        FileUtil.writeFile(
                            out_file_path, aproject + "," + namespace +
                            ",1,," + str(reason) + "," + str(insuff_abs))

Пример #17

0

Показать файл

Файл: SmellExtract.py Проект: tushartushar/extractSmellInfo

def process_GC(line, OUT_FILE_PATH, dir):
    smell, project, namespace, cause, *rest = line.split(",")
    if (smell == 'God Component'):
        for m in re.finditer(r'component are: (\d+)', cause, re.IGNORECASE):
            reason = m.group(1)
            FileUtil.writeFile(os.path.join(OUT_FILE_PATH, "smellsInfo_GC.csv"), dir + "," + smell + "," + project +\
                               "," + namespace + "," + reason)

Пример #18

0

Показать файл

 def __init__(self,
              file_level_similarity_csv_file,
              reverse_similarity=False,
              req_file_ext=None,
              code_file_ext=None):
     """
     All similarities have to be between 0 and 1
     reverse_similarity=True if the smaller the better
     """
     self._similarity_dataframe = FileUtil.read_csv_to_dataframe(
         file_level_similarity_csv_file)
     self._file_path = file_level_similarity_csv_file
     self._reverse_similarity = reverse_similarity
     if req_file_ext is not None:
         modified_reqs = {}
         for req in self.all_req_files():
             modified_reqs[req] = FileUtil.set_extension(req, req_file_ext)
         self._similarity_dataframe.rename(index=modified_reqs,
                                           inplace=True)
     if code_file_ext is not None:
         modified_code = {}
         for code in self.all_code_files():
             modified_code[code] = FileUtil.set_extension(
                 code, code_file_ext)
         self._similarity_dataframe.rename(columns=modified_code,
                                           inplace=True)

Пример #19

0

Показать файл

Файл: Embedding.py Проект: FChen12/paper

 def __init__(self, file_path, vector=None, sub_vectors=[]):
     self.file_path = file_path
     self.vector = vector
     self.sub_vectors = sub_vectors
     self.file_name = FileUtil.get_filename_from_path(self.file_path)
     self.file_name_without_extension = FileUtil.get_filename_without_extension__from_path(
         self.file_path)

Пример #20

0

Показать файл

Файл: VehiclesDict.py Проект: mnave/tacoAtacoOOGroup3

    def __init__(self, file_name=None):
        """Creates a VehicleDict composed by vehicles objects,
        from a file with a list of vehicles.

        Requires: If given, file_name is str with the name of a .txt file containing
        a list of vehicles organized as in the examples provided in
        the general specification (omitted here for the sake of readability).
        Ensures:
        if file_name is given:
            a VehiclesDict, composed by objects of class Vehicle that correspond to the vehicles listed
            in file with name file_name.
        if file_name is none:
            a empty VehiclesList.
        """

        UserDict.__init__(self)

        inFile = FileUtil(file_name)
        for line in inFile.getContent():
            vehicleData = line.rstrip().split(", ")
            vehiclePlate = vehicleData.pop(VehiclesDict.INDEXVehiclePlate)
            vehicleModel, vehicleAutonomy, vehicleKms = vehicleData
            newVehicle = Vehicle(vehiclePlate, vehicleModel, vehicleAutonomy,
                                 vehicleKms)

            self[vehiclePlate] = newVehicle

Пример #21

0

Показать файл

Файл: luoo.py Проект: GhostInMatrix/PySpider

def getEssay():
    print 'about to get essay'
    baseurlLoo = 'http://www.luoo.net/essay/';
    for x in range(84,100):
        print x
        content = urllib2.urlopen(baseurlLoo+str(x)).read();
        soup = BeautifulSoup(''.join(content));
        if soup.findAll('div','error-msg'):
            continue;
        else : 
            title = soup.find('h1','essay-title').text
         
        
            essayCont = soup.find('div','essay-content').text;
        
            FileUtil.mkDir('./'+title)
            FileUtil.saveFile('./'+title+'/'+title+'.txt',essayCont)
            Essay = soup.find('div','essay-content');
            
            picUrls = Essay.findAll('img')
            
            
            for div in picUrls:
                
                picUrl = dict(div.attrs)['src']
                last = picUrl.rfind('/')
                
                picName = picUrl[last+1:]
                urllib.urlretrieve(picUrl,'./'+title+'/'+title+picName)
    print 'essay get over'

Пример #22

0

Показать файл

def _write_dronology_trace_matrix(requirement_dict, file):
    all_trace_links_string = []
    for req_name in requirement_dict:
        if requirement_dict[req_name]:
            all_trace_links_string.append(req_name + ":" + " ".join(
                [class_name for class_name in requirement_dict[req_name]]))
    FileUtil.write_file(file, "\n".join(all_trace_links_string))

Пример #23

0

Показать файл

Файл: default.py Проект: csu-xiao-an/LilacTV

def UpgradeDependency(addon_id, currentVersion):
    if os.path.exists(os.path.join(__lib__, addon_id)):
        if os.path.exists(os.path.join(__AddonPath__, addon_id)):
            version = xbmcaddon.Addon(addon_id).getAddonInfo('version')
            if not version == currentVersion:
                FileUtil.TargetFileUpdate(addon_id, __AddonPath__, isFolder = True)
        else: 
            FileUtil.TargetFileUpdate(addon_id, __AddonPath__, isFolder = True)

Пример #24

0

Показать файл

Файл: SmellExtract.py Проект: tushartushar/extractSmellInfo

def process_UD(line, OUT_FILE_PATH, dir):
    smell, project, namespace, cause, *rest = line.split(",")
    if (smell == 'Unstable Dependency'):
        for m in re.finditer(r'less stable component\(s\): ((\w|\.)*)', cause,
                             re.IGNORECASE):
            reason = m.group(1)
            FileUtil.writeFile(os.path.join(OUT_FILE_PATH, "smellsInfo_UD.csv"), dir + "," + smell + "," + project +\
                               "," + namespace + "," + reason)

Пример #25

0

Показать файл

 def _write_code_entries(self, chosen_entries, remaining_entries):
     FileUtil.write_rows_to_csv_file(
         code_csv_filename(self._split_percent, self._dataset,
                           self._tracelink_type), chosen_entries)
     FileUtil.write_rows_to_csv_file(
         code_csv_filename(Util.complement(self._split_percent),
                           self._dataset, self._tracelink_type),
         remaining_entries)

Пример #26

0

Показать файл

Файл: Game.py Проект: Dummyc0m/BattleShip

 def constructFromFile(self):
     self.constructNewBoard()
     try:
         self.file = FileUtil.openForRead(os.path.join(FileUtil.getProgramDirectory(), "maps", self.mapName + ".battlefield"))
     except IOError:
         print "Error loading map"
     ships = Formatter.stripShips(Formatter.convertMatrix(self.file))
     self.board.addShips(ships)

Пример #27

0

Показать файл

Файл: Main.py Проект: zhanmy1/first-reptile

def do():
    # 生成文件修改行数记录表-----Local
    fileds = ['文件路径', '总增加行数', '总删除行数', '总修改行数', '修改次数', '总行数']
    for project in conf.projects:
        for type in conf.type:
            FileUtil.import_local_rows_excel(
                fileds, local_modify_rows.getResult(project, type), project,
                type)

Пример #28

0

Показать файл

    def precalculate_tracelinks(self,
                                output_precalculated_req_filename,
                                output_precalculated_code_filename,
                                req_embedding_creator=None,
                                code_embedding_creator=None,
                                output_suffix=""):
        if not req_embedding_creator:
            req_embedding_creator = self.default_req_emb_creator(
                self._word_emb_creator)
        if not code_embedding_creator:
            code_embedding_creator = self.default_code_emb_creator(
                self._word_emb_creator)
        if not output_precalculated_req_filename:
            output_precalculated_req_filename = self.default_precalculated_filename(
                req_embedding_creator.__class__.__name__, output_suffix)
        if not output_precalculated_code_filename:
            output_precalculated_code_filename = self.default_precalculated_filename(
                code_embedding_creator.__class__.__name__, output_suffix)

        req_embeddings = self._create_req_embeddings(req_embedding_creator)
        code_embeddings = self._create_code_embeddings(code_embedding_creator)

        for cg_emb in code_embeddings:
            assert isinstance(cg_emb, MethodCallGraphEmbeddingMultipleSims)
            for method_name_key in cg_emb.methods_dict:
                for req_file in req_embeddings:
                    assert isinstance(req_file, RequirementEmbedding)
                    req_parts = self._choose_req_part(
                        req_file
                    )  # choose if using partial vectors or whole vector
                    sims_of_all_parts = [
                        Util.calculate_cos_sim(
                            req_vector,
                            cg_emb.get_method_vector(method_name_key))
                        for req_vector in req_parts
                    ]
                    cg_emb.add_method_sim(method_name_key, sims_of_all_parts,
                                          req_file.file_name)
            for other_key in cg_emb.non_cg_dict:
                for req_file in req_embeddings:
                    assert isinstance(req_file, RequirementEmbedding)
                    req_parts = self._choose_req_part(
                        req_file
                    )  # choose if using partial vectors or whole vector
                    sims_of_all_parts = [
                        Util.calculate_cos_sim(
                            req_vector, cg_emb.get_non_cg_vector(other_key))
                        for req_vector in req_parts
                    ]
                    cg_emb.add_non_cg_sim(other_key, sims_of_all_parts,
                                          req_file.file_name)

        FileUtil.write_dict_to_json(
            output_precalculated_req_filename,
            [req_emb.to_json() for req_emb in req_embeddings])
        FileUtil.write_dict_to_json(
            output_precalculated_code_filename,
            [code_emb.to_json() for code_emb in code_embeddings])

Пример #29

0

Показать файл

Файл: Preprocessor.py Проект: FChen12/paper

    def __init__(self, ital=False):

        if ital:
            stopwords_as_string = FileUtil.read_textfile_into_string(
                ITAL_CODE_STOPWORD_FILEPATH)
        else:
            stopwords_as_string = FileUtil.read_textfile_into_string(
                CODE_STOPWORD_FILEPATH)
        self._stop_words = stopwords_as_string.split("\n")

Пример #30

0

Показать файл

def btnsave():
    form = json.loads(str(request.data, encoding="utf-8"), encoding="utf-8")
    print(form)
    try:
        FileUtil.write_json_data(Constant.CONF_FILE_PATH, form)
        return jsonify({'s': 0, "randomNum": randomNum})
    except Exception as e:
        print(str(e))
        return jsonify({'s': 1, "randomNum": randomNum})

Пример #31

0

Показать файл

def main(resources_file_path, base_url, scratch_func):
    old_data = FileUtil.read(resources_file_path)
    new_data = scratch_func(base_url, old_data)
    if new_data:
        date_new_data = "//" + datetime.now().strftime('%Y-%m-%d') + "\n" + "\n".join(new_data) + "\n"
        FileUtil.append(resources_file_path, date_new_data)
        MongoUtil.insert(resources_file_path, date_new_data)
    else:
        print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), '----', getattr(scratch_func, '__name__'), ": nothing to update ")

Пример #32

0

Показать файл

Файл: Cnn.py Проект: 1424234500/help_note

 def loadImage(self, path):
     imgs = []
     cvHelp = CvHelp()
     fileUtil = FileUtil()
     (files, counts, dirs) = fileUtil.getSamplesLabelsDirnames("number")
     for file in files:
         img = cv2.resize(cvHelp.openGray(file), (IMAGE_SIZE, IMAGE_SIZE))
         imgs.append(img)
     return imgs, counts, dirs

Пример #33

0

Показать файл

Файл: SmellExtract.py Проект: tushartushar/extractSmellInfo

def process_DS(line, OUT_FILE_PATH, dir):
    smell, project, namespace, cause, *rest = line.split(",")
    if (smell == 'Dense Structure'):
        for m in re.finditer(r'Average degree = (\d+.\d+)', cause,
                             re.IGNORECASE):
            reason = m.group(1)
            FileUtil.writeFile(
                os.path.join(OUT_FILE_PATH, "smellsInfo_DS.csv"),
                dir + "," + smell + "," + reason)

Пример #34

0

Показать файл

    def getList(self):
        '''    
        wordList依赖rawStr，但获取较慢，且经常用，因此设置为静态属性
        '''
        wordList = FileUtil.cutWords(self.rawStr)  # 去燥并分词
        wordList = FileUtil.rmStopwords(wordList)  # 去停用词
        
        self.wordList = wordList

        return self.wordList

Пример #35

0

Показать файл

 def __init__(self,
              dataset,
              direction: RelationDirection,
              inheritance_graph_path=None):
     if not inheritance_graph_path:
         inheritance_graph_path = Paths.inheritance_graph_filename(dataset)
     self._implements_inheritance_dict = FileUtil.read_dict_from_json(
         inheritance_graph_path)
     self._direction = direction
     self._class2file_map = FileUtil.read_dict_from_json(
         Paths.classifier_to_file_map_filename(dataset))

Пример #36

0

Показать файл

Файл: HiveExtract.py Проект: alyiwang/WhereHows

  def __init__(self, args):
    self.logger = LoggerFactory.getLogger('jython script : ' + self.__class__.__name__)

    # connection
    self.username = args[Constant.HIVE_METASTORE_USERNAME]
    self.password = args[Constant.HIVE_METASTORE_PASSWORD]
    self.jdbc_driver = args[Constant.HIVE_METASTORE_JDBC_DRIVER]
    self.jdbc_url = args[Constant.HIVE_METASTORE_JDBC_URL]
    self.connection_interval = int(args[Constant.HIVE_METASTORE_RECONNECT_TIME])
    self.logger.info("DB re-connection interval: %d" % self.connection_interval)

    self.db_whitelist = args[Constant.HIVE_DATABASE_WHITELIST_KEY] if Constant.HIVE_DATABASE_WHITELIST_KEY in args else "''"
    self.db_blacklist = args[Constant.HIVE_DATABASE_BLACKLIST_KEY] if Constant.HIVE_DATABASE_BLACKLIST_KEY in args else "''"
    self.logger.info("DB whitelist: " + self.db_whitelist)
    self.logger.info("DB blacklist: " + self.db_blacklist)

    self.conn_hms = None
    self.connect_time = None
    self.db_connect(True)

    hdfs_namenode_ipc_uri = args.get(Constant.HDFS_NAMENODE_IPC_URI_KEY, None)
    kerberos_principal = args.get(Constant.KERBEROS_PRINCIPAL_KEY, None)
    keytab_file = args.get(Constant.KERBEROS_KEYTAB_FILE_KEY, None)

    kerberos_auth = False
    if Constant.KERBEROS_AUTH_KEY in args:
      kerberos_auth = FileUtil.parse_bool(args[Constant.KERBEROS_AUTH_KEY], False)

    self.table_whitelist_enabled = False
    if Constant.HIVE_TABLE_WHITELIST_ENABLED in args:
      self.table_whitelist_enabled = FileUtil.parse_bool(args[Constant.HIVE_TABLE_WHITELIST_ENABLED], False)

    self.table_blacklist_enabled = False
    if Constant.HIVE_TABLE_BLACKLIST_ENABLED in args:
      self.table_blacklist_enabled = FileUtil.parse_bool(args[Constant.HIVE_TABLE_BLACKLIST_ENABLED], False)

    self.schema_url_helper = SchemaUrlHelper.SchemaUrlHelper(hdfs_namenode_ipc_uri, kerberos_auth, kerberos_principal, keytab_file)

    # global variables
    self.databases = None
    self.db_dict = {}  # name : index
    self.table_dict = {}  # fullname : index
    self.dataset_dict = {}  # name : index
    self.instance_dict = {}  # name : index
    self.serde_param_columns = []
    # counting statistics
    self.external_url = 0
    self.hdfs_count = 0
    self.schema_registry_count = 0

Пример #37

0

Показать файл

Файл: GCov.py Проект: isolis/LongBow

def parseCreatingLine(testExecutableDirectoryName, line):
    search = re.search("(.*):creating '(.*)'", line, re.IGNORECASE)

    result = None
    if search:
        fileName = os.path.abspath(testExecutableDirectoryName + "/" + search.group(1))
        gcovFileName = os.path.abspath(testExecutableDirectoryName + "/" + search.group(2))

    result = { "fileName" : fileName, "gcovFileName" : gcovFileName, "gcovLines" : FileUtil.readFileLines(gcovFileName) }

    return result

Пример #38

0

Показать файл

Файл: ReadDBConf.py Проект: wgzhao/scripts

def readVideo():
	d={}
	for line in FileUtil.openUncertainDirFile(["../readDB/","./","./readDB/,/home/data/recomData_tmp"],"recipe.video.txt"):
		cols=line.strip().split()
		if len(cols) < 2:
			continue
		if cols[0].endswith("L"):
			cols[0]=cols[0][0:-1]
		d[cols[0]]=cols[1]
		d[int(cols[0])]=cols[1]
	return d

Пример #39

0

Показать файл

Файл: StyleReport.py Проект: PARC/LongBow

 def __init__(self, fileName, exemplarCommand, exemplarConfig):
     self.fileName = fileName
     self.nonCompliantLines = 0
     self.score = 0
     self.exemplarCommand = exemplarCommand
     self.exemplarConfig = exemplarConfig
     try:
         self.fileData = FileUtil.readFileString(self.fileName)
         self.totalLines = len(self.fileData.splitlines())
     except IOError, e:
         print >> sys.stderr, e
         sys.exit(1)

Пример #40

0

Показать файл

Файл: longbow-generate-about.py Проект: isolis/LongBow

    def __init__(self, args):
        self.prefix = args.prefix
        self.name = args.name
        self.version = validateArgument(args.version)
        self.miniNotice = ""
        self.shortNotice = ""
        self.longNotice = ""
        self.about = None
        self.what = None

        self.args = args

        self.miniNotice = FileUtil.readFileString(args.miniNotice)
        self.shortNotice = FileUtil.readFileString(args.shortNotice)
        self.longNotice = FileUtil.readFileString(args.longNotice)

        self.buildDate = datetime.datetime.utcnow().isoformat()

        if self.version == None:
            self.version = " RELEASE_VERSION "

        if self.about == None:
            self.about = createQuotedCString("%s " % (self.name)) + \
            	self.version + \
            	createQuotedCString(" %s" % (self.buildDate)) + " " + \
            	createQuotedCString("\n%s" % (self.miniNotice))

        if self.what == None:
            if self.miniNotice != None:
                notice = "\n".join(map(lambda line: "\t" + line, self.miniNotice.split("\n")[:-1]))
            else:
                notice = ""
            self.what = createQuotedCString(whatLineToken) + " " + \
						createQuotedCString(self.name + " ") + " " + \
					 	self.version + " " + \
						createQuotedCString(" " + self.buildDate) + "\n" + \
						createQuotedCString(whatLineToken) + " " + \
					    createQuotedCString(notice)
        return

Пример #41

0

Показать файл

Файл: ServicesList.py Проект: LLCampos/tacoAtacoOOGroup3

    def __init__(self, file_name=None):
        """Creates a ServicesList composed by Services objects,
        from a file with a list of services.

        Requires: If given, file_name is str with the name of a .txt file containing
        a list of services organized as in the examples provided in
        the general specification (omitted here for the sake of readability).
        Ensures:
        if file_name is given:
            a ServiceList, composed by objects of class Service that correspond to the services listed
            in file with name file_name. In this ServiceList, drivers terminating their services earlier
            have priority over the ones terminating later; lexicographic order of drivers's names
            decides eventual ties in each case above.
        if file_name is none:
            a empty ServiceList.
        """

        # creates empty ServicesList
        UserList.__init__(self)

        # if file_name is given, self is populated with Services corresponding to the
        # services on the file file_name
        if file_name is not None:
            inFile = FileUtil(file_name)

            for line in inFile.getContent():
                servData = line.rstrip().split(", ")
                servDriver = servData[ServicesList.INDEXDriverName]
                servPlate = servData[ServicesList.INDEXVehiclePlate]
                servClient = servData[ServicesList.INDEXClientName]
                servDeparTime = Time(servData[ServicesList.INDEXDepartureHour])
                servArrivalTime = Time(servData[ServicesList.INDEXArrivalHour])
                servCircuit = servData[ServicesList.INDEXCircuitId]
                servCircuitKms = servData[ServicesList.INDEXCircuitKms]
                servDriverStatus = servData[ServicesList.INDEXDriverStatus]
                newService = Service(servDriver, servPlate, servClient, servDeparTime, servArrivalTime, \
                                     servCircuit, servCircuitKms, servDriverStatus)
                self.append(newService)

Пример #42

0

Показать файл

Файл: HiveLoad.py Проект: alyiwang/WhereHows

  def __init__(self, wh_etl_exec_id='0'):
    self.logger = LoggerFactory.getLogger("%s[%s]" % (self.__class__.__name__, wh_etl_exec_id))

    # set up connection
    username = args[Constant.WH_DB_USERNAME_KEY]
    password = args[Constant.WH_DB_PASSWORD_KEY]
    JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY]
    JDBC_URL = args[Constant.WH_DB_URL_KEY]
    self.conn_mysql = zxJDBC.connect(JDBC_URL, username, password, JDBC_DRIVER)
    self.conn_cursor = self.conn_mysql.cursor()

    if Constant.INNODB_LOCK_WAIT_TIMEOUT in args:
      lock_wait_time = args[Constant.INNODB_LOCK_WAIT_TIMEOUT]
      self.conn_cursor.execute("SET innodb_lock_wait_timeout = %s;" % lock_wait_time)

    temp_dir = FileUtil.etl_temp_dir(args, "HIVE")
    self.input_schema_file = os.path.join(temp_dir, args[Constant.HIVE_SCHEMA_CSV_FILE_KEY])
    self.input_field_file = os.path.join(temp_dir, args[Constant.HIVE_FIELD_METADATA_KEY])
    self.input_instance_file = os.path.join(temp_dir, args[Constant.HIVE_INSTANCE_CSV_FILE_KEY])
    self.input_dependency_file = os.path.join(temp_dir, args[Constant.HIVE_DEPENDENCY_CSV_FILE_KEY])

    self.db_id = args[Constant.JOB_REF_ID_KEY]
    self.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY]

Пример #43

0

Показать файл

Файл: OracleExtract.py Проект: alyiwang/WhereHows

if __name__ == "__main__":
  args = sys.argv[1]

  # connection
  username = args[Constant.ORA_DB_USERNAME_KEY]
  password = args[Constant.ORA_DB_PASSWORD_KEY]
  JDBC_DRIVER = args[Constant.ORA_DB_DRIVER_KEY]
  JDBC_URL = args[Constant.ORA_DB_URL_KEY]

  e = OracleExtract()
  e.conn_db = zxJDBC.connect(JDBC_URL, username, password, JDBC_DRIVER)

  exclude_databases = filter(bool, args[Constant.ORA_EXCLUDE_DATABASES_KEY].split(','))
  collect_sample = False
  if Constant.ORA_LOAD_SAMPLE in args:
    collect_sample = FileUtil.parse_bool(args[Constant.ORA_LOAD_SAMPLE], False)

  temp_dir = FileUtil.etl_temp_dir(args, "ORACLE")
  table_output_file = os.path.join(temp_dir, args[Constant.ORA_SCHEMA_OUTPUT_KEY])
  field_output_file = os.path.join(temp_dir, args[Constant.ORA_FIELD_OUTPUT_KEY])
  sample_output_file = os.path.join(temp_dir, args[Constant.ORA_SAMPLE_OUTPUT_KEY])

  try:
    e.conn_db.cursor().execute("ALTER SESSION SET TIME_ZONE = 'US/Pacific'")
    e.conn_db.cursor().execute("ALTER SESSION SET NLS_DATE_FORMAT = 'YYYY-MM-DD HH24:MI:SS'")
    e.conn_db.cursor().execute("CALL dbms_application_info.set_module('%s','%d')" %
                               ('WhereHows (Jython)', os.getpid()))
    e.conn_db.commit()

    e.run(exclude_databases,
          None,

Пример #44

0

Показать файл

Файл: HiveExtract.py Проект: alyiwang/WhereHows

    curs.execute(sql)
    #self.debug(sql)
    rows = curs.fetchall()
    curs.close()
    return rows

  def db_connect(self, init=False):
    if init or (datetime.now() - self.connect_time).total_seconds() > self.connection_interval:
      if self.conn_hms:
        self.conn_hms.close()
      self.conn_hms = zxJDBC.connect(self.jdbc_url, self.username, self.password, self.jdbc_driver)
      self.logger.info("Connected to Hive metadata-store DB")
      self.connect_time = datetime.now()


if __name__ == "__main__":
  args = sys.argv[1]

  e = HiveExtract(args)

  temp_dir = FileUtil.etl_temp_dir(args, "HIVE")
  schema_json_file = os.path.join(temp_dir, args[Constant.HIVE_SCHEMA_JSON_FILE_KEY])
  hdfs_map_csv_file = os.path.join(temp_dir, args[Constant.HIVE_HDFS_MAP_CSV_FILE_KEY])

  try:
    e.databases = e.get_all_databases(e.db_whitelist, e.db_blacklist)
    e.run(schema_json_file, None, hdfs_map_csv_file)
  finally:
    if e.conn_hms:
      e.conn_hms.close()

Пример #45

0

Показать файл

Файл: TeradataLoad.py Проект: alyiwang/WhereHows

      self.conn_cursor.execute(cmd)
      self.conn_mysql.commit()


if __name__ == "__main__":
  args = sys.argv[1]

  l = TeradataLoad()

  # set up connection
  username = args[Constant.WH_DB_USERNAME_KEY]
  password = args[Constant.WH_DB_PASSWORD_KEY]
  JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY]
  JDBC_URL = args[Constant.WH_DB_URL_KEY]

  temp_dir = FileUtil.etl_temp_dir(args, "TERADATA")
  l.input_file = os.path.join(temp_dir, args[Constant.TD_METADATA_KEY])
  l.input_field_file = os.path.join(temp_dir, args[Constant.TD_FIELD_METADATA_KEY])
  l.input_sampledata_file = os.path.join(temp_dir, args[Constant.TD_SAMPLE_OUTPUT_KEY])

  do_sample = False
  if Constant.TD_LOAD_SAMPLE in args:
    do_sample = FileUtil.parse_bool(args[Constant.TD_LOAD_SAMPLE], False)

  if datetime.datetime.now().strftime('%a') not in args[Constant.TD_COLLECT_SAMPLE_DATA_DAYS]:
    do_sample = False

  l.db_id = args[Constant.JOB_REF_ID_KEY]
  l.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY]
  l.conn_mysql = zxJDBC.connect(JDBC_URL, username, password, JDBC_DRIVER)
  l.conn_cursor = l.conn_mysql.cursor()

Пример #46

0

Показать файл

Файл: assembleJson.py Проект: zyw61483/test0516

import ExcelUtil
import FileUtil

dict = ExcelUtil.get(r"C:\Users\zhaoyiwei\Desktop\超时还款记录\超时还款记录0604.xlsx",1,1)
result = []
target = '{{"repayOuterNo":"{0}","loanOuterNo":"{1}","repayTime":"{2}","status":{3},"outterMessage":"","repaySource":{4},"repayPlanListString":"[{{\"repayPlanId\":{5},\"principal\":{6},\"breach\":0,\"charge\":0,\"interest\":{7},\"earlyRepayBreach\":0,\"penalty\":0,\"installmentNo\":{8}}}]" }}'
# target1 = '{{"repayOuterNo":"{0}"}}'
for key in dict:
	line = dict[key]
	print(line)
	temp = target.format(line[0].strip(),line[1].strip(),line[3].strip(),round(line[4]),round(line[5]),line[6],line[7],line[10],line[13])
	# print(line[0])
	# temp = target.format(line[0].strip())
	print(temp)
	result.append(temp)

FileUtil.write(r"C:\Users\zhaoyiwei\Desktop\123\666.txt","a+",result)

Пример #47

0

Показать файл

Файл: tagRecipe.py Проект: wgzhao/scripts

import sys
sys.path.append("./")
sys.path.append("./util")

import FileUtil
TMP_DATA_DIR="/home/data/recomeData_tmp/"

from column import *
from boundedQueue import *

MAX_COOCUUR_NUM=15

rs={}
for line in FileUtil.openUncertainDirFile(["./",TMP_DATA_DIR],"quality.rid"):
	rs[line.strip()]=1

def coOccur():
	users={}
	userTags={}
	for line in sys.stdin:
		cols=line.split("\t")
		if len(cols) < APP_LOG_COLUMNS:
			#print line.strip()
			#print len(cols)
			continue
		userid=uuid(cols)
		#cols[USER_CID]
		#if userid == '0':
		#	userid=cols[IP_CID]
		rid=getRid(cols)
		#sys.stdout.write(line+"\n")

Python FileUtil, udocker примеры использования