Source code for gsnn.simulate.datasets

import networkx as nx 
import numpy as np 
import torch 
from gsnn.simulate.simulate import simulate, simulate_sde

[docs]def simulate_3_in_3_out(n_train, n_test, noise_scale=0.1, device='cpu', zscorey=False): G = nx.DiGraph() # Add input nodes, function nodes, and output nodes input_nodes = ['in0', 'in1', 'in2'] function_nodes = ['func0', 'func1', 'func2', 'func3', 'func4'] output_nodes = ['out0', 'out1', 'out2'] # Add edges from input nodes to function nodes G.add_edges_from([('in0', 'func0'), ('in1', 'func1'), ('in2', 'func2')]) # Add edges between function nodes G.add_edges_from([('func0', 'func3'), ('func1', 'func4'), ('func2', 'func3')]) # Add edges from function nodes to output nodes G.add_edges_from([('func3', 'out0'), ('func4', 'out1'), ('func3', 'out2')]) # Define positions for each node for plotting pos = { 'in0': (-2, 2), 'in1': (0, 2), 'in2': (2, 2), 'func0': (-2, 1), 'func1': (0, 1), 'func2': (2, 1), 'func3': (-1, 0), 'func4': (1, 0), 'out0': (-2, -1), 'out1': (0, -1), 'out2': (2, -1) } x_train, x_test, y_train, y_test = simulate(G, n_train=n_train, n_test=n_test, input_nodes=input_nodes, output_nodes=output_nodes, noise_scale=noise_scale, special_functions={'func1': lambda x: -np.mean(x), 'func2':lambda x: np.sum([np.exp(xx) for xx in x]), 'func0': lambda x: np.mean(([(xx-1)**2 for xx in x])), 'func3': lambda x: -np.mean(x) if all([xx > 0 for xx in x]) else np.mean(x)}) x_train = torch.tensor(x_train, dtype=torch.float32).to(device) x_test = torch.tensor(x_test, dtype=torch.float32).to(device) y_train = torch.tensor(y_train, dtype=torch.float32).to(device) y_test = torch.tensor(y_test, dtype=torch.float32).to(device) if zscorey: y_mu = y_train.mean(0); y_std = y_train.std(0) y_train = (y_train - y_mu)/(y_std + 1e-8) y_test = (y_test - y_mu)/(y_std + 1e-8) return G, pos, x_train, x_test, y_train, y_test, input_nodes, function_nodes, output_nodes
[docs]def simulate_10_in_25_func_10_out_cyclic(n_train, n_test, noise_scale=0.1, device='cpu', zscorey=False, dt=0.01, t_final=10.0, seed=None): """ Create a complex cyclic graph with 10 inputs, 25 function nodes, and 10 outputs. Maximum path length from input to output is 10. Uses SDE method for data generation. Args: n_train (int): Number of training samples n_test (int): Number of test samples noise_scale (float): Noise scale for SDE integration device (str): Device to place tensors on zscorey (bool): Whether to z-score normalize y values dt (float): Time step for SDE integration t_final (float): Final time for SDE integration seed (int): Random seed for reproducibility Returns: Tuple containing graph, positions, train/test data, and node lists """ G = nx.DiGraph() # Define node names input_nodes = [f'in{i}' for i in range(10)] function_nodes = [f'func{i}' for i in range(25)] output_nodes = [f'out{i}' for i in range(10)] # Layer structure for controlled path length # Layer 0: input_nodes (10 nodes) # Layer 1-2: func0-func7 (8 nodes per layer, 16 total) # Layer 3-4: func16-func24 (9 nodes in layer 3-4) # Layer 5: output_nodes (10 nodes) layer1_funcs = [f'func{i}' for i in range(8)] # func0-func7 layer2_funcs = [f'func{i}' for i in range(8, 16)] # func8-func15 layer3_funcs = [f'func{i}' for i in range(16, 25)] # func16-func24 # Connect inputs to first layer of functions for i, inp in enumerate(input_nodes): # Each input connects to 2-3 functions in layer 1 target_funcs = [layer1_funcs[i % 8], layer1_funcs[(i + 1) % 8]] if i < 2: # First two inputs get an extra connection target_funcs.append(layer1_funcs[(i + 2) % 8]) G.add_edges_from([(inp, func) for func in target_funcs]) # Connect layer 1 to layer 2 functions for i, func1 in enumerate(layer1_funcs): # Each layer 1 func connects to 2-3 layer 2 funcs target_funcs = [layer2_funcs[i % 8], layer2_funcs[(i + 1) % 8]] if i < 4: # First half get extra connections target_funcs.append(layer2_funcs[(i + 2) % 8]) G.add_edges_from([(func1, func) for func in target_funcs]) # Connect layer 2 to layer 3 functions for i, func2 in enumerate(layer2_funcs): # Each layer 2 func connects to 1-2 layer 3 funcs target_funcs = [layer3_funcs[i % 9]] if i < 5: # Some get extra connections target_funcs.append(layer3_funcs[(i + 1) % 9]) G.add_edges_from([(func2, func) for func in target_funcs]) # Add cycles within function layers # Cycles within layer 1 G.add_edges_from([ ('func0', 'func2'), ('func2', 'func4'), ('func4', 'func0'), # 3-cycle ('func1', 'func3'), ('func3', 'func1'), # 2-cycle ('func5', 'func7'), ('func7', 'func6'), ('func6', 'func5'), # 3-cycle ]) # Cycles within layer 2 G.add_edges_from([ ('func8', 'func10'), ('func10', 'func8'), # 2-cycle ('func9', 'func11'), ('func11', 'func13'), ('func13', 'func9'), # 3-cycle ('func12', 'func14'), ('func14', 'func15'), ('func15', 'func12'), # 3-cycle ]) # Cycles within layer 3 G.add_edges_from([ ('func16', 'func18'), ('func18', 'func16'), # 2-cycle ('func17', 'func19'), ('func19', 'func21'), ('func21', 'func17'), # 3-cycle ('func20', 'func22'), ('func22', 'func24'), ('func24', 'func20'), # 3-cycle ]) # Cross-layer cycles (creates longer cycles) G.add_edges_from([ ('func15', 'func1'), # layer 2 back to layer 1 ('func23', 'func9'), # layer 3 back to layer 2 ('func24', 'func2'), # layer 3 back to layer 1 ]) # Connect layer 3 functions to outputs for i, func3 in enumerate(layer3_funcs): # Each layer 3 func connects to 1-2 outputs target_outs = [output_nodes[i % 10]] if i < 5: # Some get extra connections target_outs.append(output_nodes[(i + 5) % 10]) G.add_edges_from([(func3, out) for out in target_outs]) # Define positions for visualization (layered layout) pos = {} # Input layer for i, node in enumerate(input_nodes): pos[node] = (i - 4.5, 5) # Spread across top # Function layer 1 for i, node in enumerate(layer1_funcs): pos[node] = (i - 3.5, 4) # Function layer 2 for i, node in enumerate(layer2_funcs): pos[node] = (i - 3.5, 3) # Function layer 3 for i, node in enumerate(layer3_funcs): pos[node] = (i - 4, 2) # Output layer for i, node in enumerate(output_nodes): pos[node] = (i - 4.5, 1) # Define some special functions for nonlinear behavior special_functions = { 'func0': lambda x: np.tanh(np.sum(x)), 'func5': lambda x: np.exp(-np.sum(np.array(x)**2) / len(x)), # Gaussian-like 'func10': lambda x: np.sum([xx**3 for xx in x]) / len(x), # Cubic 'func15': lambda x: np.sin(np.sum(x)), # Sine 'func20': lambda x: np.sum(x) / (1 + np.abs(np.sum(x))), # Saturating 'func24': lambda x: np.sum([xx * np.sign(xx) * np.sqrt(np.abs(xx)) for xx in x]), # Square root with sign } # Generate data using SDE method x_train, y_train, x_test, y_test = simulate_sde( G, n_train=n_train, n_test=n_test, input_nodes=input_nodes, output_nodes=output_nodes, noise_scale=noise_scale, dt=dt, t_final=t_final, special_functions=special_functions, seed=seed ) # Convert to tensors x_train = torch.tensor(x_train, dtype=torch.float32).to(device) x_test = torch.tensor(x_test, dtype=torch.float32).to(device) y_train = torch.tensor(y_train, dtype=torch.float32).to(device) y_test = torch.tensor(y_test, dtype=torch.float32).to(device) # Optional z-score normalization if zscorey: y_mu = y_train.mean(0) y_std = y_train.std(0) y_train = (y_train - y_mu) / (y_std + 1e-8) y_test = (y_test - y_mu) / (y_std + 1e-8) return G, pos, x_train, x_test, y_train, y_test, input_nodes, function_nodes, output_nodes