Source code for distr_data

###############################################################################
# mpi-sppy: MPI-based Stochastic Programming in PYthon
#
# Copyright (c) 2024, Lawrence Livermore National Security, LLC, Alliance for
# Sustainable Energy, LLC, The Regents of the University of California, et al.
# All rights reserved. Please see the files COPYRIGHT.md and LICENSE.md for
# full copyright and license information.
###############################################################################
# Our data, gives the constraints inside each in region in region_dict_creator
# and amongst the different regions in inter_region_dict_creator
# The data slightly differs depending on the number of regions (num_scens) which is created for 2, 3 or 4 regions

### This file creates the data through the region_dict and inter_region_dict
# First there is a hard wired data_set, then there is a scalable dataset

# Hardwired data sets
import json
import re
import numpy as np


[docs] def inter_region_dict_creator(num_scens): """Creates the oriented arcs between the regions, with their capacities and costs. \n This dictionary represents the inter-region constraints and flows. It indicates where to add dummy nodes. Args: num_scens (int): select the number of scenarios (regions) wanted Returns: dict: Each arc is presented as a pair (source, target) with source and target containing (scenario_name, node_name) \n The arcs are used as keys for the dictionaries of costs and capacities """ inter_region_dict={} if num_scens == 2: inter_region_dict["arcs"]=[(("Region1","DC1"),("Region2","DC2"))] inter_region_dict["costs"]={(("Region1","DC1"),("Region2","DC2")): 100} inter_region_dict["capacities"]={(("Region1","DC1"),("Region2","DC2")): 70} elif num_scens == 3: inter_region_dict["arcs"] = [(("Region1","DC1"),("Region2","DC2")),(("Region2","DC2"),("Region1","DC1")),\ (("Region1","DC1"),("Region3","DC3_1")),(("Region3","DC3_1"),("Region1","DC1")),\ (("Region2","DC2"),("Region3","DC3_2")),(("Region3","DC3_2"),("Region2","DC2")),\ ] inter_region_dict["costs"] = {(("Region1","DC1"),("Region2","DC2")): 100, (("Region2","DC2"),("Region1","DC1")): 50,\ (("Region1","DC1"),("Region3","DC3_1")): 200, (("Region3","DC3_1"),("Region1","DC1")): 200,\ (("Region2","DC2"),("Region3","DC3_2")): 200, (("Region3","DC3_2"),("Region2","DC2")): 200,\ } inter_region_dict["capacities"] = {(("Region1","DC1"),("Region2","DC2")): 70, (("Region2","DC2"),("Region1","DC1")): 100,\ (("Region1","DC1"),("Region3","DC3_1")): 50, (("Region3","DC3_1"),("Region1","DC1")): 50,\ (("Region2","DC2"),("Region3","DC3_2")): 50, (("Region3","DC3_2"),("Region2","DC2")): 50,\ } elif num_scens == 4: inter_region_dict["arcs"] = [(("Region1","DC1"),("Region2","DC2")),(("Region2","DC2"),("Region1","DC1")),\ (("Region1","DC1"),("Region3","DC3_1")),(("Region3","DC3_1"),("Region1","DC1")),\ (("Region2","DC2"),("Region3","DC3_2")),(("Region3","DC3_2"),("Region2","DC2")),\ (("Region1","DC1"),("Region4","DC4")),(("Region4","DC4"),("Region1","DC1")),\ (("Region4","DC4"),("Region2","DC2")),(("Region2","DC2"),("Region4","DC4")),\ ] inter_region_dict["costs"] = {(("Region1","DC1"),("Region2","DC2")): 100, (("Region2","DC2"),("Region1","DC1")): 50,\ (("Region1","DC1"),("Region3","DC3_1")): 200, (("Region3","DC3_1"),("Region1","DC1")): 200,\ (("Region2","DC2"),("Region3","DC3_2")): 200, (("Region3","DC3_2"),("Region2","DC2")): 200,\ (("Region1","DC1"),("Region4","DC4")): 30, (("Region4","DC4"),("Region1","DC1")): 50,\ (("Region4","DC4"),("Region2","DC2")): 100, (("Region2","DC2"),("Region4","DC4")): 70,\ } inter_region_dict["capacities"] = {(("Region1","DC1"),("Region2","DC2")): 70, (("Region2","DC2"),("Region1","DC1")): 100,\ (("Region1","DC1"),("Region3","DC3_1")): 50, (("Region3","DC3_1"),("Region1","DC1")): 50,\ (("Region2","DC2"),("Region3","DC3_2")): 50, (("Region3","DC3_2"),("Region2","DC2")): 50,\ (("Region1","DC1"),("Region4","DC4")): 100, (("Region4","DC4"),("Region1","DC1")): 60,\ (("Region4","DC4"),("Region2","DC2")): 20, (("Region2","DC2"),("Region4","DC4")): 40,\ } return inter_region_dict
[docs] def region_dict_creator(scenario_name): """ Create a scenario for the inter-region max profit distribution example. The convention for node names is: Symbol + number of the region (+ _ + number of the example if needed), with symbols DC for distribution centers, F for factory nodes, B for buyer nodes. \n For instance: F3_1 is the 1st factory node of region 3. \n Args: scenario_name (str): Name of the scenario to construct. Returns: region_dict (dict): contains all the information in the given region to create the model. It is composed of:\n "nodes" (list of str): all the nodes. The following subsets are also nodes: "factory nodes", "buyer nodes", "distribution center nodes", \n "arcs" (list of 2 tuples of str) : (node, node) pairs\n "supply" (dict[n] of float): supply; keys are nodes (negative for demand)\n "production costs" (dict of float): at each factory node\n "revenues" (dict of float): at each buyer node \n "flow costs" (dict[a] of float) : costs per unit flow on each arc \n "flow capacities" (dict[a] of floats) : upper bound capacities of each arc \n """ def _is_partition(L, *lists): # Step 1: Verify that the union of all sublists contains all elements of L if set(L) != set().union(*lists): return False # Step 2: Ensure each element in L appears in exactly one sublist for item in L: count = 0 for sublist in lists: if item in sublist: count += 1 if count != 1: return False return True if scenario_name == "Region1" : # Creates data for Region1 region_dict={"name": "Region1"} region_dict["nodes"] = ["F1_1", "F1_2", "DC1", "B1_1", "B1_2"] region_dict["factory nodes"] = ["F1_1","F1_2"] region_dict["buyer nodes"] = ["B1_1","B1_2"] region_dict["distribution center nodes"]= ["DC1"] region_dict["supply"] = {"F1_1": 80, "F1_2": 70, "B1_1": -60, "B1_2": -90, "DC1": 0} region_dict["arcs"] = [("F1_1","DC1"), ("F1_2","DC1"), ("DC1","B1_1"), ("DC1","B1_2"), ("F1_1","B1_1"), ("F1_2","B1_2")] region_dict["production costs"] = {"F1_1": 50, "F1_2": 80} region_dict["revenues"] = {"B1_1": 800, "B1_2": 900} # most capacities are 50, so start there and then override region_dict["flow capacities"] = {a: 50 for a in region_dict["arcs"]} region_dict["flow capacities"][("F1_1","B1_1")] = None region_dict["flow capacities"][("F1_2","B1_2")] = None region_dict["flow costs"] = {("F1_1","DC1"): 300, ("F1_2","DC1"): 500, ("DC1","B1_1"): 200, ("DC1","B1_2"): 400, ("F1_1","B1_1"): 700, ("F1_2","B1_2"): 1000} elif scenario_name=="Region2": # Creates data for Region2 region_dict={"name": "Region2"} region_dict["nodes"] = ["DC2", "B2_1", "B2_2", "B2_3"] region_dict["factory nodes"] = list() region_dict["buyer nodes"] = ["B2_1","B2_2","B2_3"] region_dict["distribution center nodes"]= ["DC2"] region_dict["supply"] = {"B2_1": -200, "B2_2": -150, "B2_3": -100, "DC2": 0} region_dict["arcs"] = [("DC2","B2_1"), ("DC2","B2_2"), ("DC2","B2_3")] region_dict["production costs"] = {} region_dict["revenues"] = {"B2_1": 900, "B2_2": 800, "B2_3":1200} region_dict["flow capacities"] = {("DC2","B2_1"): 200, ("DC2","B2_2"): 150, ("DC2","B2_3"): 100} region_dict["flow costs"] = {("DC2","B2_1"): 100, ("DC2","B2_2"): 200, ("DC2","B2_3"): 300} elif scenario_name == "Region3" : # Creates data for Region3 region_dict={"name": "Region3"} region_dict["nodes"] = ["F3_1", "F3_2", "DC3_1", "DC3_2", "B3_1", "B3_2"] region_dict["factory nodes"] = ["F3_1","F3_2"] region_dict["buyer nodes"] = ["B3_1","B3_2"] region_dict["distribution center nodes"]= ["DC3_1","DC3_2"] region_dict["supply"] = {"F3_1": 80, "F3_2": 60, "B3_1": -100, "B3_2": -100, "DC3_1": 0, "DC3_2": 0} region_dict["arcs"] = [("F3_1","DC3_1"), ("F3_2","DC3_2"), ("DC3_1","B3_1"), ("DC3_2","B3_2"), ("DC3_1","DC3_2"), ("DC3_2","DC3_1")] region_dict["production costs"] = {"F3_1": 50, "F3_2": 50} region_dict["revenues"] = {"B3_1": 900, "B3_2": 700} region_dict["flow capacities"] = {("F3_1","DC3_1"): 80, ("F3_2","DC3_2"): 60, ("DC3_1","B3_1"): 100, ("DC3_2","B3_2"): 100, ("DC3_1","DC3_2"): 70, ("DC3_2","DC3_1"): 50} region_dict["flow costs"] = {("F3_1","DC3_1"): 100, ("F3_2","DC3_2"): 100, ("DC3_1","B3_1"): 201, ("DC3_2","B3_2"): 200, ("DC3_1","DC3_2"): 100, ("DC3_2","DC3_1"): 100} elif scenario_name == "Region4": # Creates data for Region4 region_dict={"name": "Region4"} region_dict["nodes"] = ["F4_1", "F4_2", "DC4", "B4_1", "B4_2"] region_dict["factory nodes"] = ["F4_1","F4_2"] region_dict["buyer nodes"] = ["B4_1","B4_2"] region_dict["distribution center nodes"] = ["DC4"] region_dict["supply"] = {"F4_1": 200, "F4_2": 30, "B4_1": -100, "B4_2": -100, "DC4": 0} region_dict["arcs"] = [("F4_1","DC4"), ("F4_2","DC4"), ("DC4","B4_1"), ("DC4","B4_2")] region_dict["production costs"] = {"F4_1": 50, "F4_2": 50} region_dict["revenues"] = {"B4_1": 900, "B4_2": 700} region_dict["flow capacities"] = {("F4_1","DC4"): 80, ("F4_2","DC4"): 60, ("DC4","B4_1"): 100, ("DC4","B4_2"): 100} region_dict["flow costs"] = {("F4_1","DC4"): 100, ("F4_2","DC4"): 80, ("DC4","B4_1"): 90, ("DC4","B4_2"): 70} else: raise RuntimeError (f"unknown Region name {scenario_name}") assert _is_partition(region_dict["nodes"], region_dict["factory nodes"], region_dict["buyer nodes"], region_dict["distribution center nodes"]) return region_dict
if __name__ == "__main__": #creating the json files for num_scens in range(2,5): inter_region_dict_path = f'json_dicts.inter_region_dict{num_scens}.json' data = inter_region_dict_creator(num_scens) # Write the data to the JSON file with open(inter_region_dict_path, 'w') as json_file: json.dump(data, json_file, indent=4) for i in range(1,5): region_dict_path = f'json_dicts.region{i}_dict.json' data = region_dict_creator(f"Region{i}") # Write the data to the JSON file with open(region_dict_path, 'w') as json_file: json.dump(data, json_file, indent=4) ######################################################################################################################## # Scalable datasets def parse_node_name(name): """ decomposes the name, for example "DC1_2 gives" "DC",1,2 Args: name (str): name of the node Returns: triplet (str, int, int): type of node ("DC", "F" or "B"), number of the region, and number of the node """ # Define the regular expression pattern pattern = r'^([A-Za-z]+)(\d+)(?:_(\d+))?$' # Match the pattern against the provided name match = re.match(pattern, name) if match: # Extract the prefix, the first number, and the second number (if present) prefix = match.group(1) first_number = match.group(2) second_number = match.group(3) if match.group(3) is not None else '1' assert prefix in ["DC", "F", "B"] return prefix, int(first_number), int(second_number) else: raise RuntimeError (f"the node {name} can't be well decomposed") def _node_num(max_node_per_region, node_type, region_num, count): """ Args: max_node_per_region (int): maximum number of node per region per type Returns: int: a number specific to the node. This allows to have unrelated seeds """ node_types = ["DC", "F", "B"] #no need to include the dummy nodes as they are added automatically return (max_node_per_region * region_num + count) * len(node_types) + node_types.index(node_type) def _pseudo_random_arc(node_1, node_2, prob, cfg, intra=True): #in a Region """decides pseudo_randomly whether an arc will be created based on the two nodes Args: node_1 (str): name of the source node node_2 (str): name of the target node prob (float): probability that the arc is created cfg (pyomo config): the config arguments intra (bool, optional): True if the arcs are inside a region, false if the arc is between different regions. Defaults to True. Returns: _type_: _description_ """ max_node_per_region = cfg.mnpr node_type1, region_num1, count1 = parse_node_name(node_1) node_type2, region_num2, count2 = parse_node_name(node_2) node_num1 = _node_num(max_node_per_region, node_type1, region_num1, count1) node_num2 = _node_num(max_node_per_region, node_type2, region_num2, count2) if intra: assert region_num1 == region_num2, f"supposed to happen in a region ({intra=}), but {region_num1, region_num2=}" else: if region_num1 == region_num2: # inside a region, however intra=False so no connexion return False max_node_num = _node_num(max_node_per_region, "B", cfg.num_scens+1, max_node_per_region+1) # maximum number possible np.random.seed(min(node_num1, node_num2) * max_node_num + max(node_num1, node_num2)) # it is symmetrical random_number = np.random.rand() # Determine if the event occurs boo = random_number < prob return boo def _intra_arc_creator(my_dict, node_1, node_2, cfg, arc, arc_params, my_seed, intra=True): """if the arc is chosen randomly to be constructed, it is added to the dictionary with its cost and capacity Args: my_dict (dict): either a region_dict if intra=True, otherwise the inter_region_dict node_1 (str): name of the source node node_2 (str): name of the target node cfg (pyomo config): the config arguments arc (pair of pair of strings): of the shape source,target with source = region_source, node_source arc_params (dict of bool): parameters for random cost and capacity my_seed (int): unique number used as seed intra (bool, optional): True if the arcs are inside a region, false if the arc is between different regions. Defaults to True. """ prob = arc_params["prob"] mean_cost = arc_params["mean cost"] cv_cost = arc_params["cv cost"] mean_capacity = arc_params["mean capacity"] cv_capacity = arc_params["cv capacity"] if _pseudo_random_arc(node_1, node_2, prob, cfg, intra=intra): my_dict["arcs"].append(arc) np.random.seed(my_seed % 2**32) if intra: # not the same names used cost_name = "flow costs" capacity_name = "flow capacities" else: cost_name = "costs" capacity_name = "capacities" my_dict[cost_name][arc] = max(np.random.normal(mean_cost,cv_cost),0) np.random.seed((2**31+my_seed) % 2**32) my_dict[capacity_name][arc] = max(np.random.normal(mean_capacity,cv_capacity),0) def scalable_inter_region_dict_creator(all_DC_nodes, cfg, data_params): # same as inter_region_dict_creator but the scalable version inter_region_arc_params = data_params["inter_region_arc"] inter_region_dict={} inter_region_dict["arcs"] = list() inter_region_dict["costs"] = {} inter_region_dict["capacities"] = {} count = 0 for node_1 in all_DC_nodes: #although inter_region_dict["costs"] and ["capacities"] could be done with comprehension, "arcs" can't for node_2 in all_DC_nodes: if node_1 != node_2: _, region_num1, _ = parse_node_name(node_1) source = f"Region{region_num1}", node_1 _, region_num2, _ = parse_node_name(node_2) target = f"Region{region_num2}", node_2 arc = source, target _intra_arc_creator(inter_region_dict, node_1, node_2, cfg, arc, inter_region_arc_params, count, intra=False) count += 1 return inter_region_dict def all_nodes_dict_creator(cfg, data_params): """ Args: cfg (pyomo config): configuration arguments data_params (nested dict): allows to construct the random probabilities Returns: (dict of str): the keys are regions containing all their nodes. """ all_nodes_dict = {} num_scens = cfg.num_scens max_node_per_region = cfg.mnpr # maximum node node of a certain type in any region all_nodes_dict = {} production_costs_mean = data_params["production costs mean"] production_costs_cv = data_params["production costs cv"] #coefficient of variation revenues_mean = data_params["revenues mean"] revenues_cv = data_params["revenues cv"] supply_factory_mean = data_params["supply factory mean"] supply_factory_cv = data_params["supply factory cv"] supply_buyer_mean = data_params["supply buyer mean"] supply_buyer_cv = data_params["supply buyer cv"] for i in range(1, num_scens+1): region_name = f"Region{i}" all_nodes_dict[region_name] = {} node_types = ["DC", "F", "B"] all_nodes_dict[region_name]["nodes"] = [] association_types = {"DC": "distribution center nodes", "F": "factory nodes", "B": "buyer nodes"} all_nodes_dict[region_name]["production costs"] = {} all_nodes_dict[region_name]["revenues"] = {} all_nodes_dict[region_name]["supply"] = {} for node_type in node_types: node_base_num = _node_num(max_node_per_region, node_type, i, 1) #the first node that will be created will have this number # That helps us to have a seed, thanks to that we choose an integer which will be the number of nodes of this type np.random.seed(node_base_num) if node_type == "F" or node_type == "B": m = np.random.randint(0, max_node_per_region) else: m = np.random.randint(1, int(np.sqrt(max_node_per_region))+1) all_nodes_dict[region_name][association_types[node_type]] = [node_type + str(i) + "_" +str(j) for j in range(1, m+1)] all_nodes_dict[region_name]["nodes"] += all_nodes_dict[region_name][association_types[node_type]] if node_type == "F": count = 1 for node_name in all_nodes_dict[region_name][association_types[node_type]]: np.random.seed(_node_num(max_node_per_region, node_type, i, count) + 2**28) all_nodes_dict[region_name]["production costs"][node_name] = max(0,np.random.normal(production_costs_mean,production_costs_cv)) np.random.seed(_node_num(max_node_per_region, node_type, i, count) + 2*2**28) all_nodes_dict[region_name]["supply"][node_name] = max(0,np.random.normal(supply_factory_mean,supply_factory_cv)) #positive count += 1 if node_type == "B": count = 1 for node_name in all_nodes_dict[region_name][association_types[node_type]]: np.random.seed(_node_num(max_node_per_region, node_type, i, count) + 2**28) all_nodes_dict[region_name]["revenues"][node_name] = min(max(0, np.random.normal(revenues_mean,revenues_cv)), data_params["max revenue"]) np.random.seed(_node_num(max_node_per_region, node_type, i, count) + 2*2**28) all_nodes_dict[region_name]["supply"][node_name] = - max(0, np.random.normal(supply_buyer_mean,supply_buyer_cv)) #negative count += 1 if node_type == "DC": for node_name in all_nodes_dict[region_name][association_types[node_type]]: all_nodes_dict[region_name]["supply"][node_name] = 0 return all_nodes_dict def scalable_region_dict_creator(scenario_name, all_nodes_dict=None, cfg=None, data_params=None): # same as region_dict_creator but the scalable version assert all_nodes_dict is not None assert cfg is not None assert data_params is not None local_nodes_dict = all_nodes_dict[scenario_name] region_dict = local_nodes_dict region_dict["name"] = scenario_name region_dict["arcs"] = list() region_dict["flow costs"] = {} region_dict["flow capacities"] = {} count = 2**30 # to have unrelated data with the production_costs for node_1 in local_nodes_dict["nodes"]: #although inter_region_dict["costs"] and ["capacities"] could be done with comprehension, "arcs" can't for node_2 in local_nodes_dict["nodes"]: if node_1 != node_2: node_type1, _, _ = parse_node_name(node_1) node_type2, _, _ = parse_node_name(node_2) arcs_association = {("F","DC") : data_params["arc_F_DC"], ("DC", "B") : data_params["arc_DC_B"], ("F", "B") : data_params["arc_F_B"], ("DC", "DC"): data_params["arc_DC_DC"]} arc_type = (node_type1, node_type2) if arc_type in arcs_association: arc_params = arcs_association[arc_type] arc = (node_1, node_2) _intra_arc_creator(region_dict, node_1, node_2, cfg, arc, arc_params, my_seed=count, intra=True) count += 1 return region_dict