def compute_p_word_given_class(data_paths, vocab_size):
    """
    Return a dictionary of word probabilities, P(word | class). All datapaths belong to the same class.
    Incorporate Laplacian Smoothing with k=1 here. p_word_given_class should include the probability of UNKNOWN_WORD, 
        any word that doesn't appear in the training set
    """
    p_word_given_class = dict()
    # compute number of words in the given class
    class_size = 0
    for path in data_paths:
        message = open_file(path)
        words = get_words(message)
        class_size += len(words)

    # add elements to dictionary
    for path in data_paths:
        message = open_file(path)
        words = get_words(message)
        for word in words:
            if word in p_word_given_class:
                p_word_given_class[word] += 1 / (class_size + vocab_size + 1)
            else:
                p_word_given_class[word] = 2 / (class_size + vocab_size + 1)
    p_word_given_class['UNKNOWN_WORD'] = 1 / (class_size + vocab_size + 1)
    return p_word_given_class
예제 #2
0
def main():
  """Prints a diff between two files."""
  parser = optparse.OptionParser(usage='usage: %prog file-a file-b [options]')
  parser.add_option('--text-is-case-insensitive', action='store_false',
                    dest='text_is_case_sensitive',  default=True,
                    help='<pfif:full_name>Jane</pfif:full_name> is the same as '
                    '<pfif:full_name>JANE</pfif:full_name>')
  parser.add_option('--no-grouping', action='store_false',
                    default=True, dest='group_by_record_id',
                    help='Rather than grouping all differences pertaining to '
                    'the same record together, every difference will be '
                    'displayed individually.')
  parser.add_option('--ignore-field', action='append', dest='ignore_fields',
                    default=[], help='--ignore-field photo_url will mean that '
                    'there will be no messages for photo_url fields that are '
                    'added, removed, or changed.  To specify multiple fields '
                    'to ignore, use this flag multiple times.')
  parser.add_option('--omit-blank-fields', action='store_true', default=False,
                    help='Normally, a blank field (ie, <foo></foo>) will count '
                    'as a different against a file that does not have that '
                    'field at all.  If you pass this flag, a blank field will '
                    'count as an omitted field.')
  (options, args) = parser.parse_args()

  assert len(args) >= 2, 'Must provide two files to diff.'
  messages = pfif_file_diff(
      utils.open_file(args[0]), utils.open_file(args[1]),
      text_is_case_sensitive=options.text_is_case_sensitive,
      ignore_fields=options.ignore_fields,
      omit_blank_fields=options.omit_blank_fields)
  print(utils.MessagesOutput.generate_message_summary(messages))
  if options.group_by_record_id:
    print(utils.MessagesOutput.messages_to_str_by_id(messages))
  else:
    print(utils.MessagesOutput.messages_to_str(messages))
예제 #3
0
 def btn_click(self, event):
     if not os.path.exists(self.icon_path):
         return
     ext = str(Path(self.icon_path).suffix)
     save_path = self.icon_save_path + ext
     shutil.copy(self.icon_path, save_path)
     utils.open_file(save_path, True)
def dfc2018_loader(folder):
        img = open_file(folder + '2018_IEEE_GRSS_DFC_HSI_TR.HDR')[:,:,:-2]
        gt = open_file(folder + '2018_IEEE_GRSS_DFC_GT_TR.tif')
        gt = gt.astype('uint8')

        rgb_bands = (47, 31, 15)

        label_values = ["Unclassified",
                        "Healthy grass",
                        "Stressed grass",
                        "Artificial turf",
                        "Evergreen trees",
                        "Deciduous trees",
                        "Bare earth",
                        "Water",
                        "Residential buildings",
                        "Non-residential buildings",
                        "Roads",
                        "Sidewalks",
                        "Crosswalks",
                        "Major thoroughfares",
                        "Highways",
                        "Railways",
                        "Paved parking lots",
                        "Unpaved parking lots",
                        "Cars",
                        "Trains",
                        "Stadium seats"]
        ignored_labels = [0]

        palette = None      # 自加,要不报错
        return img, gt, rgb_bands, ignored_labels, label_values, palette
예제 #5
0
 def __init__(self, src_path, tgt_path=None):
     super(NMTDataset, self).__init__()
     self.src = open_file(src_path)
     self.tgt = None
     if tgt_path is not None:
         self.tgt = open_file(tgt_path)
     self.size = len(self.src)
def read_features(pos_fname, neg_fname, chrom_lengths, bin_size):
    echo('Reading features:', pos_fname, neg_fname)
    features = dict(
        (chrom, [SKIP] * chrom_lengths[chrom]) for chrom in chrom_lengths)

    with open_file(pos_fname) as in_f:
        for l in in_f:
            chrom, start, end = l.strip().split('\t')[:3]
            start_bin = int(start) / bin_size
            end_bin = int(end) / bin_size

            if chrom not in features:
                features[chrom] = []

            for bin_i in xrange(start_bin, end_bin + 1):
                features[chrom][bin_i] = POS

    if neg_fname is not None:
        with open_file(neg_fname) as in_f:
            for l in in_f:
                chrom, start, end = l.strip().split('\t')[:3]
                start_bin = int(start) / bin_size
                end_bin = int(end) / bin_size

                if chrom not in features:
                    features[chrom] = []

                for bin_i in xrange(start_bin, end_bin + 1):
                    features[chrom][bin_i] = NEG
    else:
        for chrom in features:
            for bin_i in xrange(len(features[chrom])):
                if features[chrom][bin_i] != POS:
                    features[chrom][bin_i] = NEG
    return features
예제 #7
0
def main():
    print()
    model = int(sys.argv[1])
    file_name = sys.argv[2]
    n_classes = int(sys.argv[3])
    class_list = text_retrieve('class_list.txt')
    val_phrase = open_file('data/sign-to-gloss/cleaned/split-files/' +
                           file_name + '-phrase-' + str(n_classes))
    dataset_info = open_file(
        'data/sign-to-gloss/cleaned/split-files/dataset-info-' +
        str(n_classes))
    tar_lines, pred_lines = [], []
    for i in range(0, len(val_phrase)):
        print(i)
        inp, tar = create_batch([val_phrase[i]], dataset_info, n_classes)
        pred = translate(inp, model, n_classes)
        tar, pred = convert_tar_pred(list(tar[0][1:-1]), pred, class_list)
        print('Target phrase: ', tar)
        print('Predict phrase: ', pred)
        print()
        tar_lines.append(tar)
        pred_lines.append(pred)
    tar_text = lines_to_text(tar_lines, '\n')
    pred_text = lines_to_text(pred_lines, '\n')
    text_save(
        tar_text,
        str(n_classes) + '/luong/model_' + str(model) + '/predictions/' +
        file_name + '_tar.txt')
    text_save(
        pred_text,
        str(n_classes) + '/luong/model_' + str(model) + '/predictions/' +
        file_name + '_pred.txt')
예제 #8
0
 def edit_dataset(src_path, tgt_path):
     x = open_file(src_path)
     y = open_file(tgt_path)
     i = 0
     residual = []
     with open("./nep_dataset/src.txt", "w", encoding="utf8") as f:
         for sent in x:
             i += 1
             if len(sent.strip()) < 6:
                 residual.append(i)
                 continue
             if sent.strip()[-1] not in ['ред', '?', '!']:
                 f.write(sent + 'ред\n')
             else:
                 f.write(sent + '\n')
     j = 0
     with open("./nep_dataset/tgt.txt", "w", encoding="utf8") as f:
         for sent in y:
             j += 1
             if j in residual:
                 continue
             if sent.strip()[-1] not in ['.', '!', '?']:
                 f.write(sent + '.\n')
             else:
                 f.write(sent + '\n')
예제 #9
0
    def __get_login_qrcode(self):
        """获取登录二维码并自动打开
        https://qr.m.jd.com/show?appid=133&size=147&t=1611304511060
        return: boolean
        """
        url = "https://qr.m.jd.com/show"
        payload = {
            "appid": 133,
            "size": 147,
            "t": utils.get_current_json_timestamp()
        }

        r = self.session.get(url=url, params=payload, headers=self.headers)

        try:
            r.raise_for_status()
        except requests.HTTPError:
            logging.error(f"登录二维码获取({r.status_code}):{r.text}")
            return False

        qrcode_name = path("qrcode.png").abs_path_str()
        utils.dumps_bytes_to_file(r, qrcode_name)
        utils.open_file(qrcode_name)
        logging.info(f"登录二维码已生成:{qrcode_name}")
        logging.info(f"登录二维码已弹出,请打开京东 APP 扫码登录!")
        return True
예제 #10
0
def get_fixed_sets(run, folder_name, dataset_name, mode='both'):
    """
    Load fixed training and test set from the file.

    Arguments:
    run - number of a current run
    folder_name - folder with files
    dataset_name - name of a loaded dataset
    mode - 'both': train and test set,
           'train': only training set
           'test': only test set
    """
    if mode in ('both', 'train'):
        train_gt = open_file(folder_name + dataset_name + '/train_gt_' + str(run) + '.npz')
        if mode == 'train':
            return train_gt

    if mode in ('both', 'test'):
        test_gt = open_file(folder_name +  dataset_name + '/test_gt_' + str(run) + '.npz')
        if mode == 'test':
            return test_gt
        else:
            return train_gt, test_gt

    else:
        print('Wrong name of the mode parameter!')
        pass
예제 #11
0
def main():
    print()
    n_classes = int(sys.argv[1])
    model = int(sys.argv[2])
    dataset_info = open_file(
        'data/sign-to-gloss/cleaned/split-files/dataset-info-' +
        str(n_classes))
    print('Dataset Info set size: ', len(dataset_info.keys()))
    print()
    train_phrase = open_file(
        'data/sign-to-gloss/cleaned/split-files/train-phrase-' +
        str(n_classes))
    val_phrase = open_file(
        'data/sign-to-gloss/cleaned/split-files/val-phrase-' + str(n_classes))
    test_phrase = open_file(
        'data/sign-to-gloss/cleaned/split-files/test-phrase-' + str(n_classes))
    print('Training Phrase set size: ', len(train_phrase))
    print('Validation Phrase set size: ', len(val_phrase))
    print('Testing Phrase set size: ', len(test_phrase))
    print()
    batch_size = 50
    vocab_size = n_classes + 2
    parameters = {
        'tar_vocab_size': vocab_size,
        'emb_size': 512,
        'rnn_size': 512,
        'batch_size': batch_size,
        'epochs': 20,
        'train_steps_per_epoch': len(train_phrase) // batch_size,
        'rate': 0.3,
        'val_steps_per_epoch': len(val_phrase) // batch_size,
        'test_steps': len(test_phrase) // batch_size,
        'model': model
    }
    save_file(
        parameters, 'results/sign-to-gloss/wlasl-' + str(n_classes) +
        '/luong/model_' + str(model) + '/utils/parameters')
    print()
    print('No. of Training steps per epoch: ',
          parameters['train_steps_per_epoch'])
    print('No. of Validation steps per epoch: ',
          parameters['val_steps_per_epoch'])
    print('No. of Testing steps: ', parameters['test_steps'])
    print()
    train_dataset = tf.data.Dataset.from_tensor_slices(
        (train_phrase)).shuffle(len(train_phrase))
    train_dataset = train_dataset.batch(batch_size, drop_remainder=True)
    val_dataset = tf.data.Dataset.from_tensor_slices(
        (val_phrase)).shuffle(len(val_phrase))
    val_dataset = val_dataset.batch(batch_size, drop_remainder=True)
    test_dataset = tf.data.Dataset.from_tensor_slices(
        (test_phrase)).shuffle(len(test_phrase))
    test_dataset = test_dataset.batch(batch_size, drop_remainder=True)
    print('Model Training started')
    print()
    #model_training(train_dataset, val_dataset, dataset_info, parameters)
    print('Model Testing started')
    print()
    model_testing(test_dataset, dataset_info, parameters)
예제 #12
0
파일: svm.py 프로젝트: junyenle/l90prac2
def convertData(data_files, modelname, posorneg):
    toRet = []
    toRet2 = []
    if modelname == "naive":
        for filename in data_files:
            vector = []
            raw_data = open_file(filename)
            tokenized_data = word_tokenize(raw_data.lower())
            length = 1 * len(tokenized_data) - 1
            for word in bowvectorsource:
                #presence
                if word in tokenized_data:
                    vector.append(1)
                else:
                    vector.append(0)
                #normalized frequency
                sum = 0
                #for tokword in tokenized_data:
                #    if word == tokword:
                #        sum += 1 / length
                #vector.append(sum)
            for i, word in enumerate(bowvectorsource):
                if word in tokenized_data:
                    vector.append(1)
                else:
                    vector.append(0)
                #for tokword in tokenized_data:
                #    if word == tokword:
                #        sum += 1 / length
            toRet.append(vector)
            toRet2.append(posorneg)  
    elif modelname[:6] == "concat":
        epoch = modelname[6:]
        model1 = Doc2Vec.load("dbow_" + epoch + "e.model")
        model2 = Doc2Vec.load("dm_" + epoch + "e.model")
        for filename in data_files:
            raw_data = open_file(filename)
            #print(raw_data)
            tokenized_data = word_tokenize(raw_data.lower())
            #print(tokenized_data)
            vector1 = model1.infer_vector(tokenized_data)
            vector2 = model2.infer_vector(tokenized_data)
            toRet.append(vector1 + vector2)
            toRet2.append(posorneg)
    else:
        model= Doc2Vec.load(modelname)
        for filename in data_files:
            raw_data = open_file(filename)
            #print(raw_data)
            tokenized_data = word_tokenize(raw_data.lower())
            #print(tokenized_data)
            vector = model.infer_vector(tokenized_data)
            toRet.append(vector)
            toRet2.append(posorneg)
    return toRet, toRet2
예제 #13
0
def build_file(filename, outfilename, root=u'.', create_dir=True):
    filepath = os.path.join(root, filename)
    with utils.open_file(filepath) as infile:
        try:
            output = tags.render(infile.read(), filename=filename, rootdir=root)
        except templatelang.ParseBaseException as e:
            utils.print_parse_exception(e, filename)
            return

    with utils.open_file(outfilename, "w", create_dir=create_dir) as outfile:
        outfile.write(output)
    def gen_anchors(self, output_path, num_clusters):
        """
            Generate anchors, which tell YOLO in which scale objects are, which need to be trained and detected.

            :param output_path: path, where YOLO output files should be stored
            :param num_clusters: number of clusters need to be found
            :return: string of all generated anchors rounded to 2 decimals
        """
        np.seterr(divide='ignore', invalid='ignore')
        train_txt_path = os.path.join(output_path, self.TRAIN_TXT_FILENAME)
        f = utils.open_file(train_txt_path)

        lines = [line.rstrip('\n') for line in f.readlines()]

        annotation_dims = []

        for line in lines:
            line = line.replace('JPEGImages', 'labels')
            line = line.replace('.jpg', '.txt')
            line = line.replace('.png', '.txt')
            yolo_img_txt_filename = os.path.join(
                output_path,
                line.split('/')[-1])  # extract filenames out of 'train.txt'
            f2 = utils.open_file(yolo_img_txt_filename)
            for line in f2.readlines():
                line = line.rstrip('\n')
                w, h = line.split(' ')[3:]
                annotation_dims.append(tuple(map(float, (w, h))))
        annotation_dims = np.array(annotation_dims)

        if num_clusters == 0:
            for num_clusters_ in range(1, 11):  # we make 1 through 10 clusters
                anchors_str = None
                # kmeans returns None, if no anchors are found, then try it with new random indices
                while anchors_str is None:
                    indices = [
                        random.randrange(annotation_dims.shape[0])
                        for i in range(num_clusters_)
                    ]
                    centroids = annotation_dims[indices]
                    anchors_str = self.kmeans(annotation_dims, centroids)
        else:
            anchors_str = None
            # kmeans returns None, if no anchors are found, then try it with new random indices
            while anchors_str is None:
                indices = [
                    random.randrange(annotation_dims.shape[0])
                    for i in range(num_clusters)
                ]
                centroids = annotation_dims[indices]
                anchors_str = self.kmeans(annotation_dims, centroids)

        return anchors_str
예제 #15
0
    def write(self, outfilepath=None, mode="w"):
        """Write objects in file to disk.
        Either you can write to a new file (if outfilepath is given),
        or you can append what's in storelist to the current filepath.
        """
        # If outfilepath is specified, iterate over every object in self
        # (which might come from something else) and every object in
        # self.storelist and write them out to disk
        if outfilepath:
            with open_file(outfilepath, mode) as outfile:
                logging.info("Writing to disk...")
                outfile.write(self.get_header())
                for obj in self.source:
                    line = self.obj_to_str(obj)
                    outfile.write(line)
                for obj in self.storelist:
                    line = self.obj_to_str(obj)
                    outfile.write(line)
            # Clear storelist now that they've been written to disk
            self.clear_storelist()
            # Update file attributes (in case of new or converted file)
            self.source = self
            self.filepath = outfilepath
            self.is_new = False

        # If outfilepath is not specified, simply iterate over every
        # object in self.storelist and append them to the file on disk.
        else:
            # If the file is new and the path already exist, do not append
            if self.is_new and os.path.exists(self.filepath):
                raise CancerApiException(
                    "Output file already exists: {}".format(self.filepath))
            with open_file(self.filepath, "a+") as outfile:
                logging.info("Writing to disk...")
                # If the file is new, start with header
                if self.is_new:
                    outfile.write(self.get_header())
                # If file is new and source is not self (i.e., converted file),
                # iterate over source
                if self.is_new and self.source is not self:
                    for obj in self.source:
                        line = self.obj_to_str(obj)
                        outfile.write(line)
                # Proceed with iterating over storelist
                for obj in self.storelist:
                    line = self.obj_to_str(obj)
                    outfile.write(line)
            # Clear storelist now that they've been written to disk
            self.clear_storelist()
            # Update file attributes (in case of new or converted file)
            self.source = self
            self.is_new = False
예제 #16
0
    def write(self, outfilepath=None, mode="w"):
        """Write objects in file to disk.
        Either you can write to a new file (if outfilepath is given),
        or you can append what's in storelist to the current filepath.
        """
        # If outfilepath is specified, iterate over every object in self
        # (which might come from something else) and every object in
        # self.storelist and write them out to disk
        if outfilepath:
            with open_file(outfilepath, mode) as outfile:
                logging.info("Writing to disk...")
                outfile.write(self.get_header())
                for obj in self.source:
                    line = self.obj_to_str(obj)
                    outfile.write(line)
                for obj in self.storelist:
                    line = self.obj_to_str(obj)
                    outfile.write(line)
            # Clear storelist now that they've been written to disk
            self.clear_storelist()
            # Update file attributes (in case of new or converted file)
            self.source = self
            self.filepath = outfilepath
            self.is_new = False

        # If outfilepath is not specified, simply iterate over every
        # object in self.storelist and append them to the file on disk.
        else:
            # If the file is new and the path already exist, do not append
            if self.is_new and os.path.exists(self.filepath):
                raise CancerApiException("Output file already exists: {}".format(self.filepath))
            with open_file(self.filepath, "a+") as outfile:
                logging.info("Writing to disk...")
                # If the file is new, start with header
                if self.is_new:
                    outfile.write(self.get_header())
                # If file is new and source is not self (i.e., converted file),
                # iterate over source
                if self.is_new and self.source is not self:
                    for obj in self.source:
                        line = self.obj_to_str(obj)
                        outfile.write(line)
                # Proceed with iterating over storelist
                for obj in self.storelist:
                    line = self.obj_to_str(obj)
                    outfile.write(line)
            # Clear storelist now that they've been written to disk
            self.clear_storelist()
            # Update file attributes (in case of new or converted file)
            self.source = self
            self.is_new = False
    def check_mentions(self):
        """Checks mentions for sign up's via email or twitter
           via "Sign up / Sign up [email]"""

        try:
            mentions = self.api.mentions_timeline(count=3)

            for mention in mentions:
                if "stop" in mention.text.lower():
                    # Unsubscribe for email
                    if len(mention.text.split()) == 3:
                        email = mention.text.split()[2]
                        email_list = utils.open_file(EMAILS).split()

                        if email in email_list:
                            email_list.remove(email)
                            utils.write_to_file(EMAILS, ' '.join(email_list))

                    # Unsubscribe for Twitter handle
                    else:
                        twitter_name = mention.user.screen_name
                        twitter_name_list = utils.open_file(
                            TWITTER_NAMES).split()

                        if twitter_name in twitter_name_list:
                            twitter_name_list.remove(twitter_name)
                            utils.write_to_file(TWITTER_NAMES,
                                                ' '.join(twitter_name_list))

                elif "sign up" in mention.text.lower():
                    # Email sign up
                    if len(mention.text.split()) > 3:
                        email = mention.text.split()[3]
                        email_list = utils.open_file(EMAILS).split()

                        if email not in email_list:
                            email_list.append(email)
                            utils.append_to_file(EMAILS, email)

                    # Twitter handle sign up
                    else:
                        twitter_name = mention.user.screen_name
                        twitter_name_list = utils.open_file(
                            TWITTER_NAMES).split()

                        if twitter_name not in twitter_name_list:
                            twitter_name_list.append(twitter_name)
                            utils.append_to_file(TWITTER_NAMES, twitter_name)

        except tweepy.TweepError as error:
            utils.write_to_log(f'Error checking mentions: {error}')
예제 #18
0
def main():
    """Prints a diff between two files."""
    parser = optparse.OptionParser(
        usage='usage: %prog file-a file-b [options]')
    parser.add_option(
        '--text-is-case-insensitive',
        action='store_false',
        dest='text_is_case_sensitive',
        default=True,
        help='<pfif:full_name>Jane</pfif:full_name> is the same as '
        '<pfif:full_name>JANE</pfif:full_name>')
    parser.add_option(
        '--no-grouping',
        action='store_false',
        default=True,
        dest='group_by_record_id',
        help='Rather than grouping all differences pertaining to '
        'the same record together, every difference will be '
        'displayed individually.')
    parser.add_option(
        '--ignore-field',
        action='append',
        dest='ignore_fields',
        default=[],
        help='--ignore-field photo_url will mean that '
        'there will be no messages for photo_url fields that are '
        'added, removed, or changed.  To specify multiple fields '
        'to ignore, use this flag multiple times.')
    parser.add_option(
        '--omit-blank-fields',
        action='store_true',
        default=False,
        help='Normally, a blank field (ie, <foo></foo>) will count '
        'as a different against a file that does not have that '
        'field at all.  If you pass this flag, a blank field will '
        'count as an omitted field.')
    (options, args) = parser.parse_args()

    assert len(args) >= 2, 'Must provide two files to diff.'
    messages = pfif_file_diff(
        utils.open_file(args[0]),
        utils.open_file(args[1]),
        text_is_case_sensitive=options.text_is_case_sensitive,
        ignore_fields=options.ignore_fields,
        omit_blank_fields=options.omit_blank_fields)
    print utils.MessagesOutput.generate_message_summary(messages,
                                                        is_html=False)
    if options.group_by_record_id:
        print utils.MessagesOutput.messages_to_str_by_id(messages)
    else:
        print utils.MessagesOutput.messages_to_str(messages)
예제 #19
0
def match(t_file, dm_file):
    """
    Open files, convert the text to list and clean it. Check for identical content.
    :param t_file: Full path to file 1, as string
    :param dm_file: Full path to file 2, as string
    :return: Three sets = matching content, not matched from file 1, not matched from file 2.
    """
    legacy = utils.open_file(t_file)
    ontology = utils.open_file(dm_file)
    legacy_occupations = utils.string_to_list(legacy)
    ontology_occupations = utils.string_to_list(ontology)
    cleaned_legacy = utils.clean_text(legacy_occupations)
    cleaned_ontology = utils.clean_text(ontology_occupations)
    return match_strings(cleaned_ontology, cleaned_legacy)
예제 #20
0
def main():
    model = 1
    file_name = 'train'
    n_classes = 100
    val_phrase = open_file('data/sign-to-gloss/cleaned/split-files/' +
                           file_name + '-phrase-' + str(n_classes))
    val_info = open_file('data/sign-to-gloss/cleaned/split-files/' +
                         file_name + '-info-' + str(n_classes))
    inp_lines, tar_lines, pred_lines = [], [], []
    for i in range(10, 11):
        inp, tar = create_batch([val_phrase[i]], val_info, n_classes)
        translate(inp, model)
        print(tar)
        """print('Input sentence: ', preprocess_inp_tar(inp))
def OwnData(folder):
    img = open_file(folder + 'OwnData.mat')['Data']
    gt = open_file(folder + 'OwnData_gt.mat')['temp']
    # gt = gt.astype('uint8')

    rgb_bands = (47, 31, 15)        # 没改

    label_values = ["Background",
                    "Class_1",
                    "Class_2"]
    ignored_labels = [0]

    palette = None  # 自加,要不报错
    return img, gt, rgb_bands, ignored_labels, label_values, palette
예제 #22
0
def _calculate_total_lengths(self):
    msg = "Calculating enrichment in regions"
    if self.counts_file: 
        self.sorted_region_path = self.counts_file
        if (not self.total_reads_a or not self.total_reads_b or (not self.total_reads_replica and self.use_replica)) and not self.use_MA:
            self.logger.info("... counting from counts file...")
            self.total_reads_a = 0
            self.total_reads_b = 0
            if self.total_reads_replica:
                self.total_reads_replica = 0
            else:
                self.total_reads_replica = 1
            for line in open(self.counts_file):
                try:
                    enrich = dict(zip(enrichment_keys, line.split()))
                    self.total_reads_a += float(enrich["signal_a"])
                    self.total_reads_b += float(enrich["signal_b"])
                    if self.use_replica:
                        self.total_reads_replica += float(enrich["signal_prime_2"])
                except ValueError:
                    self.logger.debug("(Counting) skip header...")


    else:
        self.logger.info("... counting number of lines in files...")
        if not self.total_reads_a:
            if self.experiment_format == BAM:
                self.total_reads_a = bam.size(self.current_experiment_path)
            else:
                self.total_reads_a = sum(1 for line in utils.open_file(self.current_experiment_path, self.experiment_format, logger=self.logger))
        if not self.total_reads_b:
            if self.experiment_format == BAM:
                self.total_reads_b = bam.size(self.current_control_path)
            else:
                self.total_reads_b = sum(1 for line in utils.open_file(self.current_control_path, self.control_format, logger=self.logger))
        if self.use_replica and not self.total_reads_replica:
            if self.experiment_format  == BAM:
                self.total_reads_replica = bam.size(self.replica_path)
            else:
                self.total_reads_replica = sum(1 for line in utils.open_file(self.replica_path, self.experiment_format, logger=self.logger))

        self.logger.debug("Number lines in experiment A: %s Experiment B: %s"%(self.total_reads_a, self.total_reads_b))
        if self.use_replica:
            msg = "%s using replicas..."%msg
        else:
            msg = "%s using swap..."%msg

        self.logger.info(msg)

    self.average_total_reads = (self.total_reads_a+self.total_reads_b)/2        
def delete_question_by_id(question_id):

    question_lines = []
    questions = utils.open_file(questions_data, QUESTION_HEADER)
    for row in questions:
        if row['id'] != question_id:
            question_lines.append(row)
    utils.write_to_file(questions_data, question_lines, QUESTION_HEADER)

    answer_lines = []
    answers = utils.open_file(answers_data, ANSWER_HEADER)
    for row in answers:
        if row['question_id'] != question_id:
            answer_lines.append(row)
    utils.write_to_file(answers_data, answer_lines, ANSWER_HEADER)
예제 #24
0
 def _create_examples(self):
     """Given a file with feature bundle for each term and a list of positive and
     negative seeds, create a file with the vectors for the known positive
     and negative examples. Also create the model from the vectors."""
     print('Reading feature vectors and extracting pos and neg examples...')
     with open_file(self.features_file) as feats, \
          open_file(self.vectors_file, 'w') as vectors:
         for line in feats:
             try:
                 term = line.split('\t')[2]
                 label = self._get_label(term)
                 if label in ('y', 'n'):
                     vectors.write("%s\t%s" % (label, line))
             except Exception as e:
                 print('ERROR:', e)
예제 #25
0
    def include(self, in_relpath, context):
        #print("including: " + in_relpath);
        output_str = "";
        context["blog"] = self.blog;
        def include2(relpath):
            return self.include(relpath, context);

        context["include"] = include2;

        fd = utils.open_file(self.in_base_path + "/" + in_relpath, "rb");
        lines = collections.deque(fd.readlines());
        while (len(lines) > 0):
            line = lines.popleft();
            # XXX: this is not recursive
            if (re.search("#{for_each_post}", line)):
                template = "";
                while (len(lines) > 0):
                    line = lines.popleft();
                    if (re.search("#{done}", line)):
                        for p in self.posts:
                            context["post"] = p;
                            output_str += self.replace_commands(template, context);
                        del context["post"];
                        break;
                    template += line;
            
            else:
                 output_str += self.replace_commands(line, context);

        #print("include returned'" + output_str + "'");
        return output_str;
예제 #26
0
파일: queue.py 프로젝트: fatman2021/dak
def edit_note(note, upload, session, trainee=False):
    # Write the current data to a temporary file
    (fd, temp_filename) = utils.temp_filename()
    editor = os.environ.get("EDITOR","vi")
    answer = 'E'
    while answer == 'E':
        os.system("%s %s" % (editor, temp_filename))
        temp_file = utils.open_file(temp_filename)
        newnote = temp_file.read().rstrip()
        temp_file.close()
        print "New Note:"
        print utils.prefix_multi_line_string(newnote,"  ")
        prompt = "[D]one, Edit, Abandon, Quit ?"
        answer = "XXX"
        while prompt.find(answer) == -1:
            answer = utils.our_raw_input(prompt)
            m = re_default_answer.search(prompt)
            if answer == "":
                answer = m.group(1)
            answer = answer[:1].upper()
    os.unlink(temp_filename)
    if answer == 'A':
        return
    elif answer == 'Q':
        return 0

    comment = NewComment()
    comment.policy_queue = upload.policy_queue
    comment.package = upload.changes.source
    comment.version = upload.changes.version
    comment.comment = newnote
    comment.author  = utils.whoami()
    comment.trainee = trainee
    session.add(comment)
    session.commit()
def get_all_quetions_by_latest(order=False, by='submission_time'):
    data = []
    questions = utils.open_file(questions_data, QUESTION_HEADER)
    for row in questions:
        data.append(row)
        data = sorted(data, key=lambda i: i[by], reverse=order)
    return data
예제 #28
0
파일: urgencylog.py 프로젝트: abhi11/dak
    def __init__(self, *args, **kwargs):
        self.__dict__ = self.__shared_state

        if not getattr(self, 'initialised', False):
            self.initialised = True

            self.timestamp = time.strftime("%Y%m%d%H%M%S")

            cnf = Config()
            if cnf.has_key("Dir::UrgencyLog"):
                # Create the log directory if it doesn't exist
                self.log_dir = cnf["Dir::UrgencyLog"]

                if not os.path.exists(self.log_dir) or not os.access(self.log_dir, os.W_OK):
                    warn("UrgencyLog directory %s does not exist or is not writeable, using /srv/ftp.debian.org/tmp/ instead" % (self.log_dir))
                    self.log_dir = '/srv/ftp.debian.org/tmp/'

                # Open the logfile
                self.log_filename = "%s/.install-urgencies-%s.new" % (self.log_dir, self.timestamp)
                self.log_file = open_file(self.log_filename, 'w')

            else:
                self.log_dir = None
                self.log_filename = None
                self.log_file = None

            self.writes = 0
예제 #29
0
def prod_maintainer(notes, upload):
    cnf = Config()
    changes = upload.changes
    whitelists = [upload.target_suite.mail_whitelist]

    # Here we prepare an editor and get them ready to prod...
    (fd, temp_filename) = utils.temp_filename()
    temp_file = os.fdopen(fd, 'w')
    temp_file.write("\n\n=====\n\n".join([note.comment for note in notes]))
    temp_file.close()
    editor = os.environ.get("EDITOR", "vi")
    answer = 'E'
    while answer == 'E':
        os.system("%s %s" % (editor, temp_filename))
        temp_fh = utils.open_file(temp_filename)
        prod_message = "".join(temp_fh.readlines())
        temp_fh.close()
        print "Prod message:"
        print utils.prefix_multi_line_string(prod_message,
                                             "  ",
                                             include_blank_lines=1)
        prompt = "[P]rod, Edit, Abandon, Quit ?"
        answer = "XXX"
        while prompt.find(answer) == -1:
            answer = utils.our_raw_input(prompt)
            m = re_default_answer.search(prompt)
            if answer == "":
                answer = m.group(1)
            answer = answer[:1].upper()
    os.unlink(temp_filename)
    if answer == 'A':
        return
    elif answer == 'Q':
        return 0
    # Otherwise, do the proding...
    user_email_address = utils.whoami() + " <%s>" % (
        cnf["Dinstall::MyAdminAddress"])

    changed_by = changes.changedby or changes.maintainer
    maintainer = changes.maintainer
    maintainer_to = utils.mail_addresses_for_upload(maintainer, changed_by,
                                                    changes.fingerprint)

    Subst = {
        '__SOURCE__': upload.changes.source,
        '__CHANGES_FILENAME__': upload.changes.changesname,
        '__MAINTAINER_TO__': ", ".join(maintainer_to),
    }

    Subst["__FROM_ADDRESS__"] = user_email_address
    Subst["__PROD_MESSAGE__"] = prod_message
    Subst["__CC__"] = "Cc: " + cnf["Dinstall::MyEmailAddress"]

    prod_mail_message = utils.TemplateSubst(
        Subst, cnf["Dir::Templates"] + "/process-new.prod")

    # Send the prod mail
    utils.send_mail(prod_mail_message, whitelists=whitelists)

    print "Sent prodding message"
예제 #30
0
def main():
  """Runs all validations on the provided PFIF XML file"""
  assert len(sys.argv) == 2, 'Usage: python pfif_validator.py my-pyif-xml-file'
  validator = PfifValidator(utils.open_file(sys.argv[1], 'r'))
  messages = validator.run_validations()
  print(utils.MessagesOutput.generate_message_summary(messages))
  print(validator.validator_messages_to_str(messages))
예제 #31
0
파일: genomebrowser.py 프로젝트: rdcwuyy/gb
 def addSequence(self, fastafile):
     '''Add sequences on Fasta format to genome browser.
 
 Arguments:
   fastafile -- a string or iterable representing the input Fasta file(s) to be added in the genome browser.
 '''
     directory = self.__directory__
     try:
         os.mkdir(os.path.join(directory, 'sequences'))
     except:
         pass
     if isinstance(fastafile, str):
         fastafile = (fastafile, )
     for i in fastafile:
         con = open_file(i)
         seq = ''
         for line in con:
             try:
                 line = line.decode('utf-8')
             except:
                 pass
             line = line.rstrip()
             if line.startswith('>'):
                 if not isinstance(seq, str):
                     seq.close()
                 chrom = re.split(' |\|', re.sub('^>', '', line))[0]
                 seq = open(
                     os.path.join(directory, 'sequences', chrom + '.fa'),
                     'w')
                 seq.write(">" + chrom + "\n")
             else:
                 seq.write(line)
         seq.close()
         con.close()
예제 #32
0
    def __init__(self, *args, **kwargs):
        self.__dict__ = self.__shared_state

        if not getattr(self, 'initialised', False):
            self.initialised = True

            self.timestamp = time.strftime("%Y%m%d%H%M%S")

            cnf = Config()
            if "Dir::UrgencyLog" in cnf:
                # Create the log directory if it doesn't exist
                self.log_dir = cnf["Dir::UrgencyLog"]

                if not os.path.exists(self.log_dir) or not os.access(
                        self.log_dir, os.W_OK):
                    warn(
                        "UrgencyLog directory %s does not exist or is not writeable, using /srv/ftp.debian.org/tmp/ instead"
                        % (self.log_dir))
                    self.log_dir = '/srv/ftp.debian.org/tmp/'

                # Open the logfile
                self.log_filename = "%s/.install-urgencies-%s.new" % (
                    self.log_dir, self.timestamp)
                self.log_file = open_file(self.log_filename, 'w')

            else:
                self.log_dir = None
                self.log_filename = None
                self.log_file = None

            self.writes = 0
예제 #33
0
    def create_train_test_valid_data(src_path, tgt_path, base_path):

        x = open_file(src_path)
        y = open_file(tgt_path)
        src_train, src, tgt_train, tgt = train_test_split(x,
                                                          y,
                                                          test_size=0.2,
                                                          shuffle=True)
        src_valid, src_test, tgt_valid, tgt_test = train_test_split(
            src, tgt, test_size=0.5, shuffle=True)
        NMTDataset.save_file(src_train, base_path + "src_train.txt")
        NMTDataset.save_file(tgt_train, base_path + "tgt_train.txt")
        NMTDataset.save_file(src_valid, base_path + "src_valid.txt")
        NMTDataset.save_file(tgt_valid, base_path + "tgt_valid.txt")
        NMTDataset.save_file(src_test, base_path + "src_test.txt")
        NMTDataset.save_file(tgt_test, base_path + "tgt_test.txt")
예제 #34
0
    def infer(self, model_path, data_path, output):
        test_reader = paddle.batch(paddle.reader.buffered(
            reader.create_reader(data_path, self.settings),
            size=self.conf.batch_size * 1000),
                                   batch_size=self.conf.batch_size)

        # load the trained models
        parameters = paddle.parameters.Parameters.from_tar(
            utils.open_file(model_path, "r"))
        inferer = paddle.inference.Inference(output_layer=self.tags_layer,
                                             parameters=parameters)

        def count_evi_ids(test_batch):
            num = 0
            for sample in test_batch:
                num += len(sample[reader.E_IDS])
            return num

        for test_batch in test_reader():
            tags = inferer.infer(input=test_batch,
                                 field=["id"],
                                 feeding=network.feeding)
            evi_ids_num = count_evi_ids(test_batch)
            assert len(tags) == evi_ids_num
            print >> output, ";\n".join(str(tag) for tag in tags) + ";"
예제 #35
0
    def right_click_call(self, _):
        """
        右键点击 行动按钮
        """
        fp = self.txt['text']
        if not fp:
            utils.showinfo('你还没有选择文件/目录')
            return
        if not os.path.exists(fp):
            utils.showinfo('文件/目录 不存在 "{}"'.format(fp))
            return

        if self.is_folder:
            utils.open_dir(fp)
        else:
            utils.open_file(fp, True)
def get_taxonomy():
    a_file = open_file("resources/occupations_from_legacy_taxonomy.txt")
    strings = string_to_list(a_file)
    cleaned_strings = clean_text(strings)
    split_strings = map(split_slash, cleaned_strings)
    flatten_list = flatten(split_strings)
    return flatten_list
예제 #37
0
def edit_note(note, upload, session, trainee=False):
    # Write the current data to a temporary file
    (fd, temp_filename) = utils.temp_filename()
    editor = os.environ.get("EDITOR", "vi")
    answer = 'E'
    while answer == 'E':
        os.system("%s %s" % (editor, temp_filename))
        temp_file = utils.open_file(temp_filename)
        newnote = temp_file.read().rstrip()
        temp_file.close()
        print "New Note:"
        print utils.prefix_multi_line_string(newnote, "  ")
        prompt = "[D]one, Edit, Abandon, Quit ?"
        answer = "XXX"
        while prompt.find(answer) == -1:
            answer = utils.our_raw_input(prompt)
            m = re_default_answer.search(prompt)
            if answer == "":
                answer = m.group(1)
            answer = answer[:1].upper()
    os.unlink(temp_filename)
    if answer == 'A':
        return
    elif answer == 'Q':
        return 0

    comment = NewComment()
    comment.policy_queue = upload.policy_queue
    comment.package = upload.changes.source
    comment.version = upload.changes.version
    comment.comment = newnote
    comment.author = utils.whoami()
    comment.trainee = trainee
    session.add(comment)
    session.commit()
예제 #38
0
def filter(alias2ent, freq_chat, th_chat, freq_general, th_general, freq_lex, th_lex, filter_set, complex_filter_set, exceptions):
    entities = []
    for filename in filter_set:
        t_entities = load_from_file(filename, full = True)
        entities.extend(t_entities)
    entities = set(entities)

    complex_entities = []
    for filename in complex_filter_set:
        t_entities = load_from_file(filename, full = True)
        complex_entities.extend(t_entities)
    complex_entities = set(complex_entities)

    keys = alias2ent.keys()
    exceptions = set(exceptions)
    fout = utils.open_file('filter.txt', 'w')
    for alias in keys:
        if alias in exceptions: continue
        # if len(alias) <=1 or alias in entities:
        if len(alias) <= 1 or alias in entities or decouple(complex_entities, alias, True):
            alias2ent[alias] = []
            continue
        if freq_chat[alias] > th_chat or freq_general[alias] > th_general or freq_lex[alias] > th_lex:
            alias2ent[alias] = []
            fout.write(u"{}\t{}\t{}\t{}\n".format(alias, freq_chat[alias], freq_general[alias], freq_lex[alias]))
    fout.close()
예제 #39
0
    def generate(self, in_base_path, out_base_path):
        self.in_base_path = in_base_path;
        self.out_base_path = out_base_path;
        
        utils.makedirs(out_base_path);                
        imgutils.init(in_base_path);
        utils.init(in_base_path);
        
        self.blog = Struct(json.load(utils.open_file(self.in_base_path + "/blog.json")));

        # copy static content
        cmd = "cp -rf " + in_base_path + "/static/* " + out_base_path;
        print("copy static content: " + cmd)
        proc = utils.execute_shell(cmd);
        
        # 'dynamic' content
        for c in ["sticky", "posts"]:
            setattr(self, c, []);
            self.generate_content(c);        
        
        # home page
        self.generate_home();

        # feed
        self.generate_feed();
예제 #40
0
파일: queue.py 프로젝트: fatman2021/dak
def prod_maintainer(notes, upload):
    cnf = Config()
    changes = upload.changes
    whitelists = [ upload.target_suite.mail_whitelist ]

    # Here we prepare an editor and get them ready to prod...
    (fd, temp_filename) = utils.temp_filename()
    temp_file = os.fdopen(fd, 'w')
    temp_file.write("\n\n=====\n\n".join([note.comment for note in notes]))
    temp_file.close()
    editor = os.environ.get("EDITOR","vi")
    answer = 'E'
    while answer == 'E':
        os.system("%s %s" % (editor, temp_filename))
        temp_fh = utils.open_file(temp_filename)
        prod_message = "".join(temp_fh.readlines())
        temp_fh.close()
        print "Prod message:"
        print utils.prefix_multi_line_string(prod_message,"  ",include_blank_lines=1)
        prompt = "[P]rod, Edit, Abandon, Quit ?"
        answer = "XXX"
        while prompt.find(answer) == -1:
            answer = utils.our_raw_input(prompt)
            m = re_default_answer.search(prompt)
            if answer == "":
                answer = m.group(1)
            answer = answer[:1].upper()
    os.unlink(temp_filename)
    if answer == 'A':
        return
    elif answer == 'Q':
        return 0
    # Otherwise, do the proding...
    user_email_address = utils.whoami() + " <%s>" % (
        cnf["Dinstall::MyAdminAddress"])

    changed_by = changes.changedby or changes.maintainer
    maintainer = changes.maintainer
    maintainer_to = utils.mail_addresses_for_upload(maintainer, changed_by, changes.fingerprint)

    Subst = {
        '__SOURCE__': upload.changes.source,
        '__CHANGES_FILENAME__': upload.changes.changesname,
        '__MAINTAINER_TO__': ", ".join(maintainer_to),
        }

    Subst["__FROM_ADDRESS__"] = user_email_address
    Subst["__PROD_MESSAGE__"] = prod_message
    Subst["__CC__"] = "Cc: " + cnf["Dinstall::MyEmailAddress"]

    prod_mail_message = utils.TemplateSubst(
        Subst,cnf["Dir::Templates"]+"/process-new.prod")

    # Send the prod mail
    utils.send_mail(prod_mail_message, whitelists=whitelists)

    print "Sent prodding message"
예제 #41
0
def isValid(text):
	"""
	A valid input should be in the form of: weather <city>
	"""
	text = utils.escape_query(text)
	content = utils.open_file(os.path.join(os.getcwd(),"glaucobot/assets/salutations.txt"))
	for line in content:
		if bool(re.search(r'%s' %(line.lower()), text, re.IGNORECASE)):
			return True
	return False
예제 #42
0
파일: changes.py 프로젝트: abhi11/dak
    def file_summary(self):
        # changes["distribution"] may not exist in corner cases
        # (e.g. unreadable changes files)
        if not self.changes.has_key("distribution") or not \
               isinstance(self.changes["distribution"], dict):
            self.changes["distribution"] = {}

        byhand = False
        new = False
        summary = ""
        override_summary = ""

        for name, entry in sorted(self.files.items()):
            if entry.has_key("byhand"):
                byhand = True
                summary += name + " byhand\n"

            elif entry.has_key("new"):
                new = True
                summary += "(new) %s %s %s\n" % (name, entry["priority"], entry["section"])

                if entry.has_key("othercomponents"):
                    summary += "WARNING: Already present in %s distribution.\n" % (entry["othercomponents"])

                if entry["type"] == "deb":
                    deb_fh = open_file(name)
                    summary += TagSection(deb_extract_control(deb_fh))["Description"] + '\n'
                    deb_fh.close()

            else:
                entry["pool name"] = poolify(self.changes.get("source", ""), entry["component"])
                destination = entry["pool name"] + name
                summary += name + "\n  to " + destination + "\n"

                if not entry.has_key("type"):
                    entry["type"] = "unknown"

                if entry["type"] in ["deb", "udeb", "dsc"]:
                    # (queue/unchecked), there we have override entries already, use them
                    # (process-new), there we dont have override entries, use the newly generated ones.
                    override_prio = entry.get("override priority", entry["priority"])
                    override_sect = entry.get("override section", entry["section"])
                    override_summary += "%s - %s %s\n" % (name, override_prio, override_sect)

        return (byhand, new, summary, override_summary)
예제 #43
0
def new_site(root=u'.', force=False):
    try:
        os.stat(os.path.join(root, 'index.html'))
        if not force:
            msg = "Oops, there's already an index.html file in the source \n"+\
                  "folder. If you want to overwrite this folder with a new \n"+\
                  "site, use the --force option."
            print(msg)
            sys.exit(1)
    except OSError:
        pass

    print("Creating new site in '{0}'.".format(root))

    for fname, text in NEW_SITE.items():
        fpath = os.path.join(root, fname)
        with utils.open_file(fpath, "w", create_dir=True) as afile:
            afile.write(text)
예제 #44
0
파일: daklog.py 프로젝트: abhi11/dak
    def _open_log(self, debug):
        # Create the log directory if it doesn't exist
        from daklib.config import Config
        logdir = Config()["Dir::Log"]
        if not os.path.exists(logdir):
            umask = os.umask(00000)
            os.makedirs(logdir, 0o2775)
            os.umask(umask)

        # Open the logfile
        logfilename = "%s/%s" % (logdir, time.strftime("%Y-%m"))
        logfile = None

        if debug:
            logfile = sys.stderr
        else:
            umask = os.umask(0o0002)
            logfile = utils.open_file(logfilename, 'a')
            os.umask(umask)

        self.logfile = logfile
예제 #45
0
    def processHeader(self):
        fd = utils.open_file(self.page_path, "rb");
        header_json="";
        m="";
        in_header = True;
        for line in fd:
            if (in_header):
                if (line == "\n"):
                    in_header = False;
                header_json += line;
            else:
                m += line;
        try:
            self.header = json.loads(header_json);
        except ValueError as e:
            utils.fatal("malformed content header in " + self.page_path + ":\n" + str(e));
                        
        for k in self.header:
            setattr(self, k, self.header[k]);

        self.html = markdown.markdown(m);
예제 #46
0
 def open_tag_file(self):
     tag_file = "{}.tag.txt".format(self.acquisition_path.encode("utf-8"))
     open_file(tag_file)
예제 #47
0
 def process_file(self, in_relpath, out_relfpath, context):
     print("=== " + out_relfpath + " ===");
     out_fpath = self.out_base_path + "/" + out_relfpath;
     utils.makedirs(os.path.dirname(out_fpath));
     utils.open_file(out_fpath, "wb").write(self.include(in_relpath, context));
예제 #48
0
 def open_rr_file(self):
     rr_file = "{}.rr.txt".format(self.acquisition_path.encode("utf-8"))
     open_file(rr_file)
예제 #49
0
 def _open(self):
     """Use the open_file function on self.source.filepath in 'r' mode"""
     return open_file(self.source.filepath)
예제 #50
0
    def mc_study(self):
        """
        perform psedudo-experiments by generating random 
        data and models (using statistical bin uncertainty)

        pseudo data: 
            - sum all model hists
            - then randomise
        model: 
            - randomise each input hist individually
        """
        samples = self.samples

        ntrials = 10000
        if self.plot_toy_fits: ntrials = 10
        
        ## save options before mc study
        tag = self.tag 
        quiet = self.quiet
        self.quiet = True

        toy_arrays = {} 
        for s in samples: 
            if not s in toy_arrays: toy_arrays[s] = {}
            toy_arrays[s]['mean']  = []
            toy_arrays[s]['error'] = []
            toy_arrays[s]['pull']  = []
            toy_arrays[s]['diff']  = []
            toy_arrays[s]['mc']  = []
        data_array = []

        ## initialise toy fitter
        ## VERY important to prepare hists first
        ## before initialising, so TFractionFitter
        ## is not initialise with the real data
        self.prepare_toy_hists()
        self.init_fitter(toy=True)

        for i in xrange(ntrials):
            if i%100==0: print 'trial ',i
            
            #print
            #print 'trial%d summary: ' % i

            self.tag = '%s_trial%d'% (tag,i)
            self.randomise_hists()
            self.toy_fit()
            if not self.fit_status == 0: continue
            if self.plot_toy_fits: self.plot()

           
            temp_means = {}
            temp_errors = {}
            temp_pulls = {} 
            temp_diffs = {} 
            temp_mcs = {} 
            has_zero = False
            n_tot_mc = self.ntot_orig()
            #print 'n_tot_mc: %.1f, h_orig_tot.int: %.1f'% (n_tot_mc,self.h_orig_total.Integral())

            n_tot_fit = self.ntot_fit()
            #print 'n_tot_fit: %.1f, n_data: %.1f' % (n_tot_fit,self.ndata_curr())
            for s in samples: 
                n_mc   = self.nsamp_orig(s)
                n_fit  = self.nsamp_fit(s)
                en_fit = self.ensamp_fit(s)
                pull   = (n_fit-n_mc)/en_fit if en_fit else 0.0
                diff   = n_fit - n_mc
                temp_means[s] = n_fit
                temp_errors[s] = en_fit
                temp_pulls[s] = pull
                temp_diffs[s] = diff
                temp_mcs[s] = self.nsamp_curr(s)
                f_fit = self.fsamp_fit(s)
                #if f_fit == 0.: has_zero = True 
                if f_fit < 0.0000001: has_zero = True            
                f_mc = n_mc / n_tot_mc if n_tot_mc else 0.0
                f_fit = n_fit / n_tot_fit if n_tot_fit else 0.0
                #print '%s, mc: %.1f, fit: %.1f, fmc: %.4f, ffit: %.4f' % (s,n_mc,n_fit,f_mc,f_fit)

            #print 'data, mc: %.1f, fit: %.1f' % (n_tot_mc,n_tot_fit)
            ## remove cases where any component is fit to 0
            ## argument is that we would not take this 
            ## result if we got it in data
            ## probably should try to do something 
            ## better in future
            if not has_zero:      
                for s in samples: 
                    toy_arrays[s]['mean'].append(temp_means[s])
                    toy_arrays[s]['error'].append(temp_errors[s])
                    toy_arrays[s]['pull'].append(temp_pulls[s])
                    toy_arrays[s]['diff'].append(temp_diffs[s])
                    toy_arrays[s]['mc'].append(temp_mcs[s])
                    data_array.append(self.ndata_curr())
            else:
                print 'ERROR - component fit to zero'


        ## restore to original state before toys 
        self.reset_hists()
        self.tag = tag
        self.quiet = quiet

        ## set corrections from toy study
        filename = 'toy_%s.root'%(self.tag)
        for s in samples: 
            a_mean  = toy_arrays[s]['mean']
            a_error = toy_arrays[s]['error']
            a_pull  = toy_arrays[s]['pull']
            a_diff  = toy_arrays[s]['diff']
            a_mc    = toy_arrays[s]['mc']
 
            if not s in self.toy_results: self.toy_results[s] = {}
            self.toy_results[s]['meanm']  = numpy.mean(a_mean)
            self.toy_results[s]['meane']  = numpy.std(a_mean)
            self.toy_results[s]['errorm']  = numpy.mean(a_error)
            self.toy_results[s]['errore']  = numpy.std(a_error)
            self.toy_results[s]['pullm']  = numpy.mean(a_pull)
            self.toy_results[s]['pulle']  = numpy.std(a_pull)
            
            ## create plots
            h_mean  = create_mean_hist(s)
            h_error = create_error_hist(s)
            h_pull  = create_pull_hist(s)
            h_diff  = create_diff_hist(s)
            h_mc    = create_mc_hist(s)
            for v in a_mean: h_mean.Fill(v)
            for v in a_error: h_error.Fill(v)
            for v in a_pull: h_pull.Fill(v)
            for v in a_diff: h_diff.Fill(v)
            for v in a_mc  : h_mc.Fill(v)
            utils.save_object(h_mean,filename)
            utils.save_object(h_error,filename)
            utils.save_object(h_pull,filename)
            utils.save_object(h_diff,filename)
            utils.save_object(h_mc,filename)

        h_mc_data    = create_mc_hist('data')
        for v in data_array: h_mc_data.Fill(v)
        utils.save_object(h_mc_data,filename)


        for isamp in xrange(len(samples)):
            s1 = samples[isamp]
            for isamp2 in xrange(len(samples)):
                if not isamp2 < isamp: continue
                s2 = samples[isamp2]
                h = create_2d_mean_hist(s1,s2)
                for ns1,ns2 in zip(toy_arrays[s1]['mean'],
                                   toy_arrays[s2]['mean']):
                    h.Fill(ns1,ns2)
                utils.save_object(h,filename)
       
        for s in samples:
            h = create_2d_mean_hist('%s_mc'%s,'%s_fit'%s)
            for ns1,ns2 in zip(toy_arrays[s]['mc'],
                               toy_arrays[s]['mean']):
                h.Fill(ns1,ns2)
            utils.save_object(h,filename)



        f = utils.open_file(filename)
        f.Close()
예제 #51
0
파일: episode.py 프로젝트: ksaua/tvmojo
 def view(self): 
     utils.open_file(self.localfile)
     self.watched = True
예제 #52
0
def handle(text):
	content = utils.open_file(os.path.join(os.getcwd(),"glaucobot/assets/affirmatives.txt"))
	return random.choice(content)