Exemplo n.º 1
0
def split_into_chunks_by_size(df, column, max_number_of_rows):
    """
    Split a dataframe into multiple data frames, each with max_number of rows
    
    Takes a dataframe, the column on whose value to split, and maximum 
    size of the resulting dataframes.
    
    Returns two aligned lists
    * list of data frames which partition the or
    * list of sets of `column` values
    """
    
    if not column in df.columns:
        raise ValueError('Splitting column must be in dataframe')
    
    sizes = df.groupby(column).size()
    
    # There may be a more efficient bin packing solver,
    # or if there are sufficiently many groups, one could simply
    # sample N/average_grain_size times without replacement
    # and hope to end up with fairly even groups by virtue of central limit
    from binpacking import to_constant_volume
    bins = to_constant_volume(sizes.to_dict(), max_number_of_rows)
    
    allframes = []
    allindices = []
    for idx, modelbin in enumerate(bins):
        minidf = df[ df[column].isin(set(modelbin.keys())) ]
        allframes.append(minidf)
        allindices.append(set(modelbin.keys()))
        
    return allframes, allindices
Exemplo n.º 2
0
def bpck(groups, node, parent, bin_len):
    # Perform bin packing on a single node
    # it packs the clusters on groups[node] and add to groups[parent]
    # if node and parent are equal, root was reached
    at_root = True if node == parent else False

    # If there is only one cluster, do not need to pack
    if groups[node].get_cluster_count() == 1:
        if not at_root:  # transfer cluster to parent if not root
            groups[parent].merge(groups[node])
            del groups[node]
    else:
        # Perform bin packing
        clusters = binpacking.to_constant_volume(
            groups[node].get_clusters_to_bpck(), bin_len, weight_pos=1)
        if clusters:
            if not at_root:
                # Parse clustered results into parent node and remove actual node
                groups[parent].add_clusters_from_bpck(
                    clusters, leaves=groups[node].get_leaves())
                del groups[node]
            else:  #if root
                # Parse clustered results into same node (clear it before)
                groups[parent].clear_clusters()
                groups[parent].add_clusters_from_bpck(clusters)
Exemplo n.º 3
0
 def _get_binpacked_resources(self, resource_size, fragments):
     # type: (ResourceSize, Iterable[Fragment]) -> List[Dict[Fragment, FragmentSize]]
     return binpacking.to_constant_volume(
         {f: f.fragment_size
          for f in fragments},
         resource_size,
         upper_bound=resource_size + 1)
Exemplo n.º 4
0
def initial_solution_bp(r):
    dmd = r['coords_demands'][:, 2]
    dmd1 = dmd[1:]
    indexes = np.arange(1, dmd1.size + 1)
    bpsol = bp.to_constant_volume(
        {key: value
         for key, value in zip(indexes, dmd1)}, r['capacity'])
    ll = [[0] + list(d.keys()) for d in bpsol]
    sol = np.array(list(itertools.chain(*ll)))
    return sol
Exemplo n.º 5
0
def wood_pack_1d(type, *args, **kwargs):
    values = []

    key = list(kwargs.keys())[0]

    for k, v in wood_bom_table.items():
        if v["type"] == type:
            values += [v[key]] * v["count"]

    return binpacking.to_constant_volume(values, kwargs[key])
def printBins(timingdata, distro, expectfail):
    inputs = dict([(t, v["duration"]) for (t, v) in timingdata[distro].items() if v["success"] != expectfail])
    bins = binpacking.to_constant_volume(inputs, MAXBINDURATION)

    for b in bins:
	tools = " ".join(sorted(b.keys()))
	duration = sum(b.values())
	if expectfail:
		print("- DISTRO='{}' EXPECTFAIL=1 TOOL='{}' # estimated {} seconds".format(distro, tools, duration))
	else:
		print("- DISTRO='{}' TOOL='{}' # estimated {} seconds".format(distro, tools, duration))
Exemplo n.º 7
0
def bpck(d, bin_len): 
	# Only one bin, no need to pack
	if len(d)==1: return d
	else:
		ret = []
		for bin in binpacking.to_constant_volume(d, bin_len, weight_pos=0):
			if bin: #Check if the returned bin is not empty: it happens when the bin packing algorith cannot divide larger sequences
				# Convert the bin listed output to tuple format
				sum_length, ids = sum_tuple_ids(bin)
				ret.append((sum_length,) + tuple(ids)) # ret.append((sum_length,*ids)) 

		return ret
Exemplo n.º 8
0
def generate(criteria, database_location, path_folder):
    import sqlite3
    import binpacking
    import random
    from latex import build_pdf
    import hashlib
    import time

    db = sqlite3.connect(database_location)

    cursor = db.cursor()

    criteria = criteria.split(",")
    sql_statement = '''
                    SELECT question, mark, skill, type, year, paper, question_no
                    FROM questions_only_wip 
                    WHERE question IN (SELECT question FROM questions_only_wip ORDER BY RANDOM() LIMIT 100)
                    AND skill LIKE "%" ||?|| "%" 
                    '''
    if criteria[1:] != []:
        for x in criteria[1:]:
            sql_statement = sql_statement + 'OR skill LIKE "%" ||?|| "%"'

    cursor.execute(sql_statement, criteria)

    questions = cursor.fetchall()

    questionpack = binpacking.to_constant_volume(questions,
                                                 20,
                                                 weight_pos=1,
                                                 lower_bound=None,
                                                 upper_bound=None)
    with open("assets/latex_preamble.txt", "r") as inF:
        lines = inF.readlines()
    # Implement direct latex parsing
    latex_code = ""
    latex_code += ''.join(lines)
    for p in questionpack[random.randint(0, len(questionpack) - 1)]:
        currentQ = p[0].split("\n")
        finalQ = ""
        for i in range(len(currentQ)):
            if currentQ[i].strip() != "":
                finalQ += currentQ[i]
        latex_code += finalQ + "\n\n" + " \hfill{} " + \
            "["+str(p[3])+"/"+str(p[4])+"/" + \
            str(p[5])+"/"+str(p[5]) + "]\n\n"
    latex_code += "\end{enumerate} \end{document}"
    db.close()
    pdf = build_pdf(latex_code)
    saved_path = f"{path_folder}/{hash(pdf)}.{time.time_ns()}.pdf"
    pdf.save_to(saved_path)
    return saved_path
Exemplo n.º 9
0
 def _chunk_items(items, chunk_size):
     """
     Use binpacking library to chunk a set of files/directories into relatively constant volume bins.
     :param items: A dict formatted as {filepath: size in bytes}
     :param chunk_size: The optimal size (in bytes) to target for each bin.
     :return: A list of lists, each sublist representing a bin of roughly constant size.
     """
     result = []
     print('Packing bins...')
     bins = binpacking.to_constant_volume(items, chunk_size)
     for item in bins:
         result.append(list(item.keys()))
     return result
Exemplo n.º 10
0
def calc(tasks, ixlst, summin, cores, machines, grids):
    dfc = {}
    for i in tasks:
        dfc[i] = ixlst[tasks.index(i)]
    bins = binpacking.to_constant_volume(dfc, cores)
    if len(bins) <= machines:
        sumlst = []
        for b in bins:
            summachn = 0
            for key, item in b.items():
                t = grids.iat[tasks.index(key), item - 1]
                summachn = max(summachn, t)
            sumlst.append(summachn)
        tmp = max(sumlst)
        if tmp <= summin:
            return ixlst, tmp, bins
    return [], summin, []
Exemplo n.º 11
0
    def binpack_1d(self, t, *args, **kwargs):
        if isinstance(t, type):
            t = t.__name__

        values = []

        key = list(kwargs.keys())[0]

        parts = self.get_bom_parts()

        for v in parts:
            if v["type"] == t:
                values += [v[key]]

        if len(values) == 0:
            return []

        return binpacking.to_constant_volume(values, kwargs[key])
Exemplo n.º 12
0
def do_binpack_constvol(bp_dict, max_vol=100*1024):
    """
    Takes in a dictionary mapping filenames to filesizes (in bytes).
    Takes in maximum filesize of each bin. Default: 100 MB.

    Determines a packing of filenames into X bins, where X is 
    minimized, and each of the X bins is constrained to be 
    less than max_vol in size.

    Returns a list of dictionaries of size X. The pairwise
    intersection of the list returned is empty, and the union
    is the entirety of bp_dict.

    Note: requires max_vol >= the size of the largest file
    in the list (because otherwise there is no way to partition.)
    """
    assert max_vol >= max(bp_dict.values()), """Maximum volume for 
            binpacking must be larger than the largest 
            file {}""".format(max(bp_dict.values()))

    bins = binpacking.to_constant_volume(bp_dict, max_vol)
    return bins
Exemplo n.º 13
0
seconds = time.time()

# Aggregate a mapping from filename to file size.
filesizes = {}
for entry in os.scandir(directory):
    filename = entry.name

    if not entry.is_file:
        continue
    if extension and not filename.lower().endswith('.' + extension.lower()):
        continue

    filesize = entry.stat().st_size if bundleunit else 1
    filesizes[filename] = filesize

# Find an efficient packing of files into bundles of the requested size.
multiplier = UNITS[bundleunit]
bundles = binpacking.to_constant_volume(filesizes, bundlesize * multiplier)

# Create the output directory if it does not exist
if not os.path.exists(output):
    os.mkdir(output)

# Make and write zip to file
for index, bundle in enumerate(tqdm(bundles)):
    unique = datetime.fromtimestamp(seconds + index).strftime('%Y%m%d%H%M%S')
    with zipfile.ZipFile(f'{output}/{bundlename}{unique}.zip', 'w') as z:
        for filename in bundle.keys():
            filepath = os.path.join(directory, filename)
            z.write(os.path.abspath(filepath), filename)
Exemplo n.º 14
0
#cursor.execute('''SELECT question, mark
#                  FROM questions_only_wip
#                  WHERE question
#                  IN (SELECT question FROM questions_only_wip ORDER BY RANDOM() LIMIT 50)''')
#                   WHERE topic LIKE "%DIFFERENTIATION%" ''')
cursor.execute(sql_statement)

#cursor.execute('''SELECT question
#                   FROM ALVL_Math_Question_Bank
#                   WHERE mark BETWEEN 10 AND 20
#                   ''')
questions = cursor.fetchall()

questionpack = binpacking.to_constant_volume(questions,
                                             20,
                                             weight_pos=1,
                                             lower_bound=None,
                                             upper_bound=None)

inF = open("C:/Users/Dropbox/2019/latex_preamble.txt", "r")
lines = inF.readlines()

outF = open("C:/Users/Dropbox/2019/latex_recommender.txt", "w")
outF.writelines(lines)

#outF = open("E:/Dropbox/myOutFile.txt", "w")
#for p in questions:# write line to output file
#    currentQ = p[0].split("\n")
#    finalQ = ""
#    for i in range(len(currentQ)):
#        if currentQ[i].strip() != "":
Exemplo n.º 15
0
    def find_subtours(self, checkonly, solution, variable_dict):
        """ find subtours in the current solution """

        graph = nx.Graph()

        variables = list(self.solver.variable_dict.values())
        keys = list(self.solver.variable_dict.keys())
        solution = [
            self.model.getSolVal(solution, variable) for variable in variables
        ]
        self.solver.solutions.append(solution)
        indices = [num for num, item in enumerate(solution) if item > 0]
        nonzero_edges = [
            mip_utils.get_variable_tuple(variables[index].name)
            for index in indices
        ]
        graph.add_edges_from(nonzero_edges)

        if np.sum(solution) > 0 and np.sum(
                solution) < self.solver.vehicle_num + len(
                    self.solver.vertices):
            cycles = []
            while (len(graph.edges)) > 0:
                edges = nx.find_cycle(graph)
                cycles.append(edges)
                graph.remove_edges_from(edges)

                components = [set(np.unique(item)) for item in cycles]
        else:
            components = nx.connected_components(graph)

        components = [
            item for item in components
            if len(item.intersection(set(self.solver.depots))) == 0
            or np.sum([self.solver.demands[k]
                       for k in item]) > self.solver.get_capacity()
        ]

        if len(components) == 1 or self.round_counter >= self.max_rounds:
            return False
        elif checkonly:
            return True

        subtour_selector = subtourStrategy(
            cut_strategy=self.solver.cut_strategy)

        for S in subtour_selector.get_subtours(components):
            min_vehicles = len(
                bp.to_constant_volume(
                    {item: self.solver.demands[item]
                     for item in S}, self.solver.get_capacity()))
            if self.solver._is_symmetric:
                varnames = [
                    "x_{},{}".format(i, j) for i in S for j in S
                    if j > i and "x_{},{}".format(i, j) in keys
                ]
            if self.solver._is_asymmetric:
                varnames = [
                    "x_{},{}".format(i, j) for i in S for j in S
                    if "x_{},{}".format(i, j) in keys
                ]
            self.model.addCons(
                quicksum(self.solver.variable_dict[name]
                         for name in varnames) <= len(S) - min_vehicles)
        self.round_counter += 1
        self.solver.counter += 1
        return True
Exemplo n.º 16
0
 async def _do_work_transfer_request(self, lta_rc: RestClient,
                                     tr: TransferRequestType) -> None:
     self.logger.info(f"Processing TransferRequest: {tr}")
     # configure a RestClient to talk to the File Catalog
     fc_rc = RestClient(self.file_catalog_rest_url,
                        token=self.file_catalog_rest_token,
                        timeout=self.work_timeout_seconds,
                        retries=self.work_retries)
     # figure out which files need to go
     source = tr["source"]
     dest = tr["dest"]
     path = tr["path"]
     # query the file catalog for the source files
     self.logger.info(
         f"Asking the File Catalog about files in {source}:{path}")
     query_dict = {
         "locations.site": {
             "$eq": source
         },
         "locations.path": {
             "$regex": f"^{path}"
         },
         "logical_name": {
             "$regex": f"^{path}"
         },
     }
     query_json = json.dumps(query_dict)
     page_start = 0
     catalog_files = []
     fc_response = await fc_rc.request(
         'GET',
         f'/api/files?query={query_json}&keys=uuid&limit={self.file_catalog_page_size}&start={page_start}'
     )
     num_files = len(fc_response["files"])
     self.logger.info(
         f'File Catalog returned {num_files} file(s) to process.')
     catalog_files.extend(fc_response["files"])
     while num_files == self.file_catalog_page_size:
         self.logger.info(f'Paging File Catalog. start={page_start}')
         page_start += num_files
         fc_response = await fc_rc.request(
             'GET',
             f'/api/files?query={query_json}&keys=uuid&limit={self.file_catalog_page_size}&start={page_start}'
         )
         num_files = len(fc_response["files"])
         self.logger.info(
             f'File Catalog returned {num_files} file(s) to process.')
         catalog_files.extend(fc_response["files"])
     # if we didn't get any files, this is bad mojo
     if not catalog_files:
         await self._quarantine_transfer_request(
             lta_rc, tr,
             "File Catalog returned zero files for the TransferRequest")
         return
     # create a packing list by querying the File Catalog for size information
     num_catalog_files = len(catalog_files)
     self.logger.info(
         f'Processing {num_catalog_files} UUIDs returned by the File Catalog.'
     )
     packing_list = []
     for catalog_file in catalog_files:
         catalog_file_uuid = catalog_file["uuid"]
         catalog_record = await fc_rc.request(
             'GET', f'/api/files/{catalog_file_uuid}')
         file_size = catalog_record["file_size"]
         #                    0: uuid            1: size
         packing_list.append((catalog_file_uuid, file_size))
     # divide the packing list into an array of packing specifications
     packing_spec = to_constant_volume(packing_list, self.max_bundle_size,
                                       1)  # 1: size
     # for each packing list, we create a bundle in the LTA DB
     self.logger.info(
         f"Creating {len(packing_spec)} new Bundles in the LTA DB.")
     for spec in packing_spec:
         self.logger.info(
             f"Packing specification contains {len(spec)} files.")
         bundle_uuid = await self._create_bundle(
             lta_rc,
             {
                 "type": "Bundle",
                 # "uuid": unique_id(),  # provided by LTA DB
                 "status": self.output_status,
                 "reason": "",
                 # "create_timestamp": right_now,  # provided by LTA DB
                 # "update_timestamp": right_now,  # provided by LTA DB
                 "request": tr["uuid"],
                 "source": source,
                 "dest": dest,
                 "path": path,
                 "file_count": len(spec),
             })
         await self._create_metadata_mapping(lta_rc, spec, bundle_uuid)
Exemplo n.º 17
0
    #TODO: check to make sure the folders match this format:
    #FLPTY0

    #these should be in order.
    floppies = subfolders(floppies_dir)

# Keys are file locations.
# Values are file sizes.
samples = {}
wavs = glob.glob(samples_dir + "/*.wav")
for wav in wavs:
    samples[wav] = os.stat(wav).st_size  #size in bytes.

#binpack to bins of maximum size 1440kb
bins = binpacking.to_constant_volume(samples, 1440000, None, None, 1440000)
print(bins)
itr = 1

#copy all the files.
for b in bins:
    if itr == 100:
        break
    for sample in b:
        dest = floppies_dir + "/" + floppies[itr] + "/" + os.path.basename(
            sample)
        print dest
        copyfile(sample, dest)
    itr = itr + 1

msgbox("Done!", "SUCCESS!")
Exemplo n.º 18
0
 def to_spot_volume(b):
     bins = binpacking.to_constant_volume(b, spot_size)
     return bins
Exemplo n.º 19
0
    def _flush_resource_appending(self, empty=False):
        """
        fetches smallest resource payload-wise and tries to append fragments to reach the percentage fill level
        will not flush fragments, that are not "appendable" because they are too big.
        """
        with self._mutex:
            abort_appending = False
            while not abort_appending:
                smallest_resource = self.meta.getSmallestResource(
                    ignore=self.resource_reuse_blacklist)
                # check if we have fragments with size smaller than append_max_resource_size

                if smallest_resource:
                    append_max_resource_size = self.resource_size - smallest_resource.resource_payloadsize
                    having_appendable_fragments = any(
                        (f.fragment_size <= append_max_resource_size
                         for _, f in self.fragment_cache.values()))
                else:
                    append_max_resource_size = self.resource_size
                    having_appendable_fragments = False
                if having_appendable_fragments:
                    fragments_dict = {
                        f: f.fragment_size
                        for _, f in self.fragment_cache.values()
                    }
                    resources = binpacking.to_constant_volume(
                        fragments_dict,
                        append_max_resource_size,
                        upper_bound=append_max_resource_size + 1)
                    if not resources:
                        raise NotImplementedError
                    if len(resources) == 0 or len(resources[0]) == 0:
                        self._flush_percentage_filled(empty=empty)
                        abort_appending = True
                        continue

                    resources = sorted(resources,
                                       key=lambda r: sum(r.values()),
                                       reverse=True)
                    packed_fragments_size_dict = resources[0]
                    uploadable_fragment_hashes = [
                        f.fragment_hash
                        for f in packed_fragments_size_dict.keys()
                    ]
                    if self.debug:
                        resource_payload_size = sum(
                            packed_fragments_size_dict.values())
                        assert resource_payload_size <= append_max_resource_size, repr(
                            resource_payload_size) + ' ' + repr(
                                append_max_resource_size) + ' ' + repr(
                                    packed_fragments_size_dict)
                    resource_fragments = self.loadFragmentsOfResource(
                        smallest_resource)
                    for fragment, data in resource_fragments:
                        if fragment.fragment_hash not in self.fragment_cache:
                            self.fragment_cache[fragment.fragment_hash] = (
                                data, fragment)
                            self.cache_total_fragmentsize += len(data)
                            if self.debug:
                                assert fragment.fragment_hash not in uploadable_fragment_hashes
                            uploadable_fragment_hashes.append(
                                fragment.fragment_hash)
                            if self.debug:
                                assert self.cache_total_fragmentsize == sum(
                                    (f.fragment_size
                                     for _, f in self.fragment_cache.values()
                                     )), repr([
                                         self.cache_total_fragmentsize,
                                         sum((f.fragment_size for _, f in
                                              self.fragment_cache.values()))
                                     ])
                        else:
                            if self.debug:
                                assert self.fragment_cache[
                                    fragment.fragment_hash][0] == data
                                assert self.fragment_cache[
                                    fragment.fragment_hash][
                                        1].fragment_hash == fragment.fragment_hash
                                assert self.fragment_cache[
                                    fragment.fragment_hash][
                                        1].fragment_size == fragment.fragment_size

                    _old_len = len(self.fragment_cache)
                    self._upload_and_map_fragments(uploadable_fragment_hashes,
                                                   update=smallest_resource)
                    if self.debug:
                        assert len(self.fragment_cache) < _old_len
                else:
                    self._flush_percentage_filled(empty=empty)
                    abort_appending = True