diff --git a/docs/advanced/model_optimization.rst b/docs/advanced/model_optimization.rst new file mode 100644 index 0000000000000000000000000000000000000000..9465353c2e18a836dc86557a277d5258f9502f5e --- /dev/null +++ b/docs/advanced/model_optimization.rst @@ -0,0 +1,127 @@ +======================== +hls4ml Optimization API +======================== + +Pruning and weight sharing are effective techniques to reduce model footprint and computational requirements. The hls4ml Optimization API introduces hardware-aware pruning and weight sharing. +By defining custom objectives, the algorithm solves a Knapsack optimization problem aimed at maximizing model performance, while keeping the target resource(s) at a minimum. Out-of-the box objectives include network sparsity, GPU FLOPs, Vivado DSPs, memory utilization etc. + +The code block below showcases three use cases of the hls4ml Optimization API - network sparsity (unstructured pruning), GPU FLOPs (structured pruning) and Vivado DSP utilization (pattern pruning). First, we start with unstructured pruning: + +.. code-block:: Python + from sklearn.metrics import accuracy_score + from tensorflow.keras.optimizers import Adam + from tensorflow.keras.metrics import CategoricalAccuracy + from tensorflow.keras.losses import CategoricalCrossentropy + from hls4ml.optimization.keras import optimize_model + from hls4ml.optimization.keras.utils import get_model_sparsity + from hls4ml.optimization.attributes import get_attributes_from_keras_model + from hls4ml.optimization.objectives import ParameterEstimator + from hls4ml.optimization.scheduler import PolynomialScheduler + # Define baseline model and load data + # X_train, y_train = ... + # X_val, y_val = ... + # X_test, y_test = ... + # baseline_model = ... + # Evaluate baseline model + y_baseline = baseline_model.predict(X_test) + acc_base = accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_baseline, axis=1)) + sparsity, layers = get_model_sparsity(baseline_model) + print(f'Baseline Keras accuracy: {acc_base}') + print(f'Baseline Keras sparsity, overall: {sparsity}') + print(f'Baseline Keras sparsity, per-layer: {layers}') + # Defining training parameters + # Epochs refers to the number of maximum epochs to train a model, after imposing some sparsity + # If the model is pre-trained, a good rule of thumb is to use between a 1/3 and 1/2 of the number of epochs used to train baseline model + epochs = 10 + batch_size = 128 + metric = 'accuracy' + optimizer = Adam() + loss_fn = CategoricalCrossentropy(from_logits=True) + + # Define the metric to monitor, as well as if its increasing or decreasing + # This disctinction allows us to optimize both regression and classification models + # In regression, e.g. minimize validation MSE & for classification e.g. maximize accuracy + metric, increasing = CategoricalAccuracy(), True + # Relative tolerance (rtol) is the the relative loss in metric the optimized model is allowed to incur + rtol = 0.975 + + # A scheduler defines how the sparsity is incremented at each step + # In this case, the maximum sparsity is 50% and it will be applied at a polynomially decreasing rate, for 10 steps + # If the final sparsity is unspecified, it is set to 100% + # The optimization algorithm stops either when (i) the relative drop in performance is below threshold or (ii) final sparsity reached + scheduler = PolynomialScheduler(5, final_sparsity=0.5) + # Get model attributes + model_attributes = get_attributes_from_keras_model(baseline_model) + + # Optimize model + # ParameterEstimator is the objective and, in this case, the objective is to minimize the total number of parameters + optimized_model = optimize_model( + baseline_model, model_attributes, ParameterEstimator, scheduler, + X_train, y_train, X_val, y_val, batch_size, epochs, optimizer, loss_fn, metric, increasing, rtol + ) + # Evaluate optimized model + y_optimized = optimized_model.predict(X_test) + acc_optimized = accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_optimized, axis=1)) + sparsity, layers = get_model_sparsity(optimized_model) + print(f'Optimized Keras accuracy: {acc_optimized}') + print(f'Optimized Keras sparsity, overall: {sparsity}') + print(f'Opimized Keras sparsity, per-layer: {layers}') + +In a similar manner, it is possible to target GPU FLOPs or Vivado DSPs. However, in that case, sparsity is not equivalent to model sparsity. +Instead, it is the sparsity of the target resource. As an example: Starting with a network utilizing 512 DSPs and a final sparsity of 50%; the optimized network will use 256 DSPs. + +To optimize GPU FLOPs, the code is similar to above: +.. code-block:: Python + from hls4ml.optimization.objectives.gpu_objectives import GPUFLOPEstimator + + # Optimize model + # Note the change from ParameterEstimator to GPUFLOPEstimator + optimized_model = optimize_model( + baseline_model, model_attributes, GPUFLOPEstimator, scheduler, + X_train, y_train, X_val, y_val, batch_size, epochs, optimizer, loss_fn, metric, increasing, rtol + ) + + # Evaluate optimized model + y_optimized = optimized_model.predict(X_test) + acc_optimized = accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_optimized, axis=1)) + print(f'Optimized Keras accuracy: {acc_optimized}') + # Note the difference in total number of parameters + # Optimizing GPU FLOPs is equivalent to removing entire structures (filters, neurons) from the network + print(baseline_model.summary()) + print(optimized_model.summary()) + +Finally, optimizing Vivado DSPs is possible, given a hls4ml config: +.. code-block:: Python + from hls4ml.utils.config import config_from_keras_model + from hls4ml.optimization.objectives.vivado_objectives import VivadoDSPEstimator + + # Note the change from optimize_model to optimize_keras_for_hls4ml + # The function optimize_keras_for_hls4ml acts as a wrapper for the function, parsing hls4ml config to model attributes + from hls4ml.optimization import optimize_keras_for_hls4ml + + # Create hls4ml config + default_reuse_factor = 4 + default_precision = 'ac_fixed<16, 6>' + hls_config = config_from_keras_model(baseline_model, granularity='name', default_precision=default_precision, default_reuse_factor=default_reuse_factor) + hls_config['IOType'] = 'io_parallel' + hls_config['Model']['Strategy'] = 'Resource' # Strategy must be present for optimisation + + # Optimize model + # Note the change from ParameterEstimator to VivadoDSPEstimator + optimized_model = optimize_keras_for_hls4ml( + baseline_model, model_attributes, VivadoDSPEstimator, scheduler, + X_train, y_train, X_val, y_val, batch_size, epochs, + optimizer, loss_fn, metric, increasing, rtol + ) + + # Evaluate optimized model + y_optimized = optimized_model.predict(X_test) + acc_optimized = accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_optimized, axis=1)) + print(f'Optimized Keras accuracy: {acc_optimized}') + +There are two more Vivado "optimizers" - VivadoFFEstimator, aimed at reducing register utilisation and VivadoMultiObjectiveEstimator, aimed at optimising BRAM and DSP utilisation. +Note, to ensure DSPs are optimized, "unrolled" Dense multiplication must be used before synthesing HLS, by modifying the config: +.. code-block:: Python + hls_config = config_from_keras_model(optimized_model) + hls_config['Model']['DenseResourceImplementation'] = 'Unrolled' + # Any addition hls4ml config, such as strategy, reuse factor etc... diff --git a/hls4ml/optimization/__init__.py b/hls4ml/optimization/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d8301f58925ce028bbba15a356e2ef04196f4d27 --- /dev/null +++ b/hls4ml/optimization/__init__.py @@ -0,0 +1,107 @@ +import numpy as np + +from hls4ml.optimization.attributes import get_attributes_from_keras_model_and_hls4ml_config +from hls4ml.optimization.keras import optimize_model + +default_regularization_range = np.logspace(-6, -2, num=16).tolist() + + +def optimize_keras_for_hls4ml( + keras_model, + hls_config, + objective, + scheduler, + X_train, + y_train, + X_val, + y_val, + batch_size, + epochs, + optimizer, + loss_fn, + validation_metric, + increasing, + rtol, + callbacks=None, + ranking_metric='l1', + local=False, + verbose=False, + rewinding_epochs=1, + cutoff_bad_trials=3, + directory='hls4ml-optimization', + tuner='Bayesian', + knapsack_solver='CBC_MIP', + regularization_range=default_regularization_range, +): + ''' + Top-level function for optimizing a Keras model, given hls4ml config and a hardware objective(s) + + Args: + - keras_model (keras.Model): Model to be optimized + - hls_config (dict): hls4ml configuration, obtained from hls4ml.utils.config.config_from_keras_model(...) + - objective (hls4ml.optimization.objectives.ObjectiveEstimator): + Parameter, hardware or user-defined objective of optimization + - scheduler (hls4ml.optimization.schduler.OptimizationScheduler): + Sparsity scheduler, choose between constant, polynomial and binary + - X_train (np.array): Training inputs + - y_train (np.array): Training labels + - X_val (np.array): Validation inputs + - y_val (np.array): Validation labels + - batch_size (int): Batch size during training + - epochs (int): Maximum number of epochs to fine-tune model, in one iteration of pruning + - optimizer (keras.optimizers.Optimizer or equivalent-string description): Optimizer used during training + - loss_fn (keras.losses.Loss or equivalent loss description): Loss function used during training + - validation_metric (keras.metrics.Metric or equivalent loss description): Validation metric, used as a baseline + - increasing (boolean): If the metric improves with increased values; + e.g. accuracy -> increasing = True, MSE -> increasing = False + - rtol (float): Relative tolerance; + pruning stops when pruned_validation_metric < (or >) rtol * baseline_validation_metric + + Kwargs: + - callbacks (list of keras.callbacks.Callback) Currently not supported, developed in future versions + - ranking_metric (string): Metric used for rannking weights and structures; + currently supported l1, l2, saliency and Oracle + - local (boolean): Layer-wise or global pruning + - verbose (boolean): Display debug logs during model optimization + - rewinding_epochs (int): Number of epochs to retrain model without weight freezing, + allows regrowth of previously pruned weights + - cutoff_bad_trials (int): After how many bad trials (performance below threshold), + should model pruning / weight sharing stop + - directory (string): Directory to store temporary results + - tuner (str): Tuning alogorithm, choose between Bayesian, Hyperband and None + - knapsack_solver (str): Algorithm to solve Knapsack problem when optimizing; + default usually works well; for very large networks, greedy algorithm might be more suitable + - regularization_range (list): List of suitable hyperparameters for weight decay + ''' + + # Extract model attributes + model_attributes = get_attributes_from_keras_model_and_hls4ml_config(keras_model, hls_config) + + # Optimize model + return optimize_model( + keras_model, + model_attributes, + objective, + scheduler, + X_train, + y_train, + X_val, + y_val, + batch_size, + epochs, + optimizer, + loss_fn, + validation_metric, + increasing, + rtol, + callbacks=callbacks, + ranking_metric=ranking_metric, + local=local, + verbose=verbose, + rewinding_epochs=rewinding_epochs, + cutoff_bad_trials=cutoff_bad_trials, + directory=directory, + tuner=tuner, + knapsack_solver=knapsack_solver, + regularization_range=regularization_range, + ) diff --git a/hls4ml/optimization/attributes.py b/hls4ml/optimization/attributes.py new file mode 100644 index 0000000000000000000000000000000000000000..672e2706a0c33d79b6f704c334335d52b30409ed --- /dev/null +++ b/hls4ml/optimization/attributes.py @@ -0,0 +1,246 @@ +import numpy as np + +import hls4ml +from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType +from hls4ml.optimization.config import SUPPORTED_STRUCTURES +from hls4ml.optimization.keras.config import SUPPORTED_LAYERS + + +class hls4mlAttributes: + ''' + A class for storing hls4ml information of a single layer + + Args: + - n_in (int): Number of inputs (rows) for Dense matrix multiplication + - n_out (int): Number of outputs (cols) for Dense matrix multiplication + - io_type (string): io_parallel or io_stream + - strategy (string): Resource or Latency + - weight_precision (FixedPrecisionType): Layer weight precision + - output_precision (FixedPrecisionType): Layer output precision + - reuse_factor (int): Layer reuse factor + - parallelization_factor (int): Layer parallelization factor - [applicable to io_parallel Conv2D] + ''' + + def __init__( + self, n_in, n_out, io_type, strategy, weight_precision, output_precision, reuse_factor, parallelization_factor=1 + ): + if not isinstance(weight_precision, (FixedPrecisionType, IntegerPrecisionType)): + raise Exception('Layer weight precision is not in valid format') + + if not isinstance(output_precision, (FixedPrecisionType, IntegerPrecisionType)): + raise Exception('Layer weight precision is not in valid format') + + if strategy not in ('Latency', 'latency', 'Resource', 'resource'): + raise Exception('Unknown layer strategy') + + if io_type not in ('io_parallel', 'io_stream'): + raise Exception('Unknown IO type') + + self.n_in = n_in + self.n_out = n_out + self.io_type = io_type + self.strategy = strategy + self.weight_precision = weight_precision + self.output_precision = output_precision + self.reuse_factor = reuse_factor + self.parallelization_factor = parallelization_factor + + +class OptimizationAttributes: + ''' + A class for storing layer optimization attributes + + Args: + - structure_type (enum): Targeted structure - unstructured, structured, pattern, block + - pruning (boolean): Should pruning be applied to the layer + - weight_sharing (boolean): Should weight sharing be applied to the layer + - block_shape (tuple): Block shape if structure_type == block + - pattern_offset (int): Length of each pattern if structure_type == pattern + - consecutive_patterns (int): How many consecutive patterns are grouped together if structure_type == pattern + + Notes: + - In the case of hls4ml, pattern_offset is equivalent to the number of weights processed in parallel + - The pattern_offset is n_in * n_out / reuse_factor; default case (=1) is equivalent to no unrolling + ''' + + def __init__( + self, + structure_type=SUPPORTED_STRUCTURES.UNSTRUCTURED, + pruning=False, + weight_sharing=False, + block_shape=(1, 1), + pattern_offset=1, + consecutive_patterns=1, + ): + if not isinstance(structure_type, SUPPORTED_STRUCTURES): + raise Exception(f'{self.__class__.__name__} unknown structure type') + + self.structure_type = structure_type + self.pruning = pruning + self.weight_sharing = weight_sharing + self.block_shape = block_shape + self.pattern_offset = pattern_offset + self.consecutive_patterns = consecutive_patterns + + +class LayerAttributes: + ''' + A class for storing layer information + + Args: + - name (string): Layer name + - layer_type (keras.Layer): Layer type (e.g. Dense, Conv2D etc.) + - inbound_layers (list): List of parent nodes, identified by name + - weight_shape (tuple): Layer weight shape + - input_shape (tuple): Layer input shape + - output_shape (tuple): Layer output shape + - optimizable (bool): Should optimizations (pruning, weight sharing) be applied to this layer + - optimization_attributes (OptimizationAttributes): Type of optimization, + pruning or weight sharing, block shape and pattern offset + - args (dict): Additional information, + e.g. hls4mlAttributes; dictionary so it can be generic enough for different platforms + ''' + + def __init__( + self, + name, + layer_type, + inbound_layers, + weight_shape, + input_shape, + output_shape, + optimizable, + optimization_attributes, + args, + ): + self.name = name + self.layer_type = layer_type + self.inbound_layers = inbound_layers + self.weight_shape = weight_shape + self.input_shape = input_shape + self.output_shape = output_shape + self.optimizable = optimizable + self.optimization_attributes = optimization_attributes + self.args = args + + def update_args(self, updates): + self.args.update(updates) + + def __str__(self): + return ( + f'name: {self.name}, ' + f'layer_type: {self.layer_type}, ' + f'inbound_layers: {self.inbound_layers}, ' + f'weight_shape: {self.weight_shape}, ' + f'input_shape: {self.input_shape}, ' + f'output_shape: {self.output_shape}, ' + f'optimizable: {self.optimizable}, ' + f'optimization_attributes: {self.optimization_attributes}, ' + f'args: {self.args}, ' + ) + + +def get_attributes_from_keras_model(model): + ''' + Given a Keras model, builds a dictionary of class attributes + Additional arguments (e.g. reuse factor), depend on the target hardware platform and are inserted later + Per-layer pruning sype (structured, pattern etc.), depend on the pruning objective and are inserted later + + Args: + - model (keras.model): Model to extract attributes from + + Return: + - model_attributes (dict): Each key corresponds to a layer name, values are instances of LayerAttribute + ''' + is_sequential = model.__class__.__name__ == 'Sequential' + model_attributes = {} + + for i, layer in enumerate(model.layers): + inbound_layers = [] + if is_sequential and i > 0: + inbound_layers.append(model.layers[i - 1]) + elif not is_sequential: + nodes = model.get_config()['layers'][i]['inbound_nodes'] + if len(nodes) > 0: + inbound_layers.append(node[0] for node in nodes[0]) + + layer_weights = layer.get_weights() + weight_shape = layer_weights[0].shape if len(layer_weights) > 0 else () + + model_attributes[layer.name] = LayerAttributes( + layer.name, + layer.__class__, + inbound_layers, + weight_shape, + layer.input_shape[1:], + layer.output_shape[1:], + False, + OptimizationAttributes(), + {}, + ) + + return model_attributes + + +def get_attributes_from_keras_model_and_hls4ml_config(model, config): + ''' + Given a Keras model and hls4ml configuration, builds a dictionary of class attributes + Per-layer pruning sype (structured, pruning etc.), depend on the pruning objective and are inserted later + + Args: + - model (keras.model): Model to extract attributes from + - config (dict): hls4ml dictionary + + Return: + - model_attributes (dict): Each key corresponds to a layer name, values are LayerAttribute instances + ''' + + # Extract Keras attributes + model_attributes = get_attributes_from_keras_model(model) + + # Extract hls4ml attributes + io_type = config['IOType'] + default_reuse_factor = config['Model']['ReuseFactor'] + default_strategy = config['Model']['Strategy'] + default_precision = config['Model']['Precision'] + + # Build dictionary + for layer in model_attributes: + if model_attributes[layer].layer_type in SUPPORTED_LAYERS: + n_in, n_out = __get_layer_mult_size(model_attributes[layer]) + layer_config = config['LayerName'][layer] if layer in config['LayerName'] else {} + reuse_factor = layer_config['ReuseFactor'] if 'ReuseFactor' in layer_config else default_reuse_factor + parallelization_factor = layer_config['ParallelizationFactor'] if 'ParallelizationFactor' in layer_config else 1 + strategy = layer_config['Strategy'] if 'Strategy' in layer_config else default_strategy + weight_precision = ( + layer_config['Precision']['weight'] if 'weight' in layer_config['Precision'] else default_precision + ) + weight_precision = hls4ml.backends.fpga.fpga_backend.FPGABackend.convert_precision_string(weight_precision) + output_precision = ( + layer_config['Precision']['result'] if 'result' in layer_config['Precision'] else default_precision + ) + output_precision = hls4ml.backends.fpga.fpga_backend.FPGABackend.convert_precision_string(output_precision) + + hls4ml_attributes = hls4mlAttributes( + n_in, n_out, io_type, strategy, weight_precision, output_precision, reuse_factor, parallelization_factor + ) + model_attributes[layer].update_args({'hls4ml_attributes': hls4ml_attributes}) + + return model_attributes + + +def __get_layer_mult_size(attributes): + ''' + Helper function to calculate layer multiplication size + ''' + if 'Dense' in attributes.layer_type.__name__: + n_in = np.prod(attributes.input_shape) + n_out = np.prod(attributes.output_shape) + return n_in, n_out + + if 'Conv' in attributes.layer_type.__name__: + n_in = np.prod(attributes.weight_shape[0:-2]) + n_out = attributes.weight_shape[-1] + return n_in, n_out + + raise Exception(f'Cannot get mult size for layer {attributes.name}') diff --git a/hls4ml/optimization/config.py b/hls4ml/optimization/config.py new file mode 100644 index 0000000000000000000000000000000000000000..0879c47f6259a1c601674157720eae64d6f572d9 --- /dev/null +++ b/hls4ml/optimization/config.py @@ -0,0 +1,47 @@ +from enum import Enum + +''' +A list of currently supported structures when optimizing (pruning, weight sharing) +For more information, see attributes.py + +1. Unstructured: + - Pruning: Y + - Weight sharing: N + - Description: Removes (zeroes out) individual weights + - Supports: All layers in SUPPORTED_LAYERS (hls4ml.optimization.keras) + +2. Structured: + - Pruning: Y + - Weight sharing: Y + - Description: Zeroes out or quantizes all the weights in a structure: + - Dense: Neurons, determined by their outgoing connections (columns in Keras weight tensors) + - Conv2D: Filters (structures of size filt_width x filt_height x n_chan) + - Notes: + - For Dense, it was also possible optimize by incoming connections (rows); + However, removing zero neurons becomes harder because of Keras Surgeon + - For Conv2D, significant literature explored pruning channels; currently not supported + - Supports: All layers in SUPPORTED_LAYERS (hls4ml.optimization.keras) + +3. Pattern: + - Pruning: Y + - Weight sharing: Y + - Description: Zeroes out or quantizes all the weights in a group + Groups are determined by a variable, n, and every n-th weight in the flattened, + Transposed (Resource) weight tensor is collected and stored in the same group + Equivalent to pruning/quantizing weight processed by the same DSP in hls4ml + - Supports: All layers in SUPPORTED_LAYERS (hls4ml.optimization.keras) + +4. Block: + - Pruning: Y + - Weight sharing: Y + - Description: Zeroes out or quantizes all the weights in a block of size (w, h) + - Supports: All rank-2 (e.g. Dense, but not Conv2D) layers in SUPPORTED_LAYERS (hls4ml.optimization.keras) + +''' + + +class SUPPORTED_STRUCTURES(Enum): + UNSTRUCTURED = 'unstructured' + STRUCTURED = 'structured' + PATTERN = 'pattern' + BLOCK = 'block' diff --git a/hls4ml/optimization/keras/__init__.py b/hls4ml/optimization/keras/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a07f0adac7c83c8e49cc0d083c49a2964711e205 --- /dev/null +++ b/hls4ml/optimization/keras/__init__.py @@ -0,0 +1,359 @@ +import os +import time + +import numpy as np +import tensorflow as tf + +# Enables printing of loss tensors during custom training loop +from tensorflow.python.ops.numpy_ops import np_config + +import hls4ml.optimization.keras.utils as utils +from hls4ml.optimization.config import SUPPORTED_STRUCTURES +from hls4ml.optimization.keras.builder import build_optimizable_model, remove_custom_regularizers +from hls4ml.optimization.keras.config import SUPPORTED_LAYERS, SUPPORTED_METRICS, TMP_DIRECTORY +from hls4ml.optimization.keras.masking import get_model_masks +from hls4ml.optimization.keras.reduction import reduce_model +from hls4ml.optimization.scheduler import OptimizationScheduler + +np_config.enable_numpy_behavior() +default_regularization_range = np.logspace(-6, -2, num=16).tolist() + + +def optimize_model( + model, + model_attributes, + objective, + scheduler, + X_train, + y_train, + X_val, + y_val, + batch_size, + epochs, + optimizer, + loss_fn, + validation_metric, + increasing, + rtol, + callbacks=None, + ranking_metric='l1', + local=False, + verbose=False, + rewinding_epochs=1, + cutoff_bad_trials=1, + directory=TMP_DIRECTORY, + tuner='Bayesian', + knapsack_solver='CBC_MIP', + regularization_range=default_regularization_range, +): + ''' + Top-level function for optimizing a Keras model, given objectives + + Args: + - model (keras.Model): Model to be optimized + - model_attributes (dict): Layer-wise model attributes, + obtained from hls4ml.optimization.get_attributes_from_keras_model(...) + - objective (hls4ml.optimization.objectives.ObjectiveEstimator): + Parameter, hardware or user-defined objective of optimization + - scheduler (hls4ml.optimization.schduler.OptimizationScheduler): + Sparsity scheduler, choose between constant, polynomial and binary + - X_train (np.array): Training inputs + - y_train (np.array): Training labels + - X_val (np.array): Validation inputs + - y_val (np.array): Validation labels + - batch_size (int): Batch size during training + - epochs (int): Maximum number of epochs to fine-tune model, in one iteration of pruning + - optimizer (keras.optimizers.Optimizer or equivalent-string description): + Optimizer used during training + - loss_fn (keras.losses.Loss or equivalent loss description): + Loss function used during training + - validation_metric (keras.metrics.Metric or equivalent loss description): + Validation metric, used as a baseline + - increasing (boolean): If the metric improves with increased values; + e.g. accuracy -> increasing = True, MSE -> increasing = False + - rtol (float): Relative tolerance; + pruning stops when pruned_validation_metric < (or >) rtol * baseline_validation_metric + + Kwargs: + - callbacks (list of keras.callbacks.Callback) Currently not supported, developed in future versions + - ranking_metric (string): Metric used for rannking weights and structures; + currently supported l1, l2, saliency and Oracle + - local (boolean): Layer-wise or global pruning + - verbose (boolean): Display debug logs during model optimization + - rewinding_epochs (int): Number of epochs to retrain model without weight freezing, + allows regrowth of previously pruned weights + - cutoff_bad_trials (int): After how many bad trials (performance below threshold), + should model pruning / weight sharing stop + - directory (string): Directory to store temporary results + - tuner (str): Tuning alogorithm, choose between Bayesian, Hyperband and None + - knapsack_solver (str): Algorithm to solve Knapsack problem when optimizing; + default usually works well; for very large networks, greedy algorithm might be more suitable + - regularization_range (list): List of suitable hyperparameters for weight decay + ''' + + if not isinstance(scheduler, OptimizationScheduler): + raise Exception( + 'Scheduler must be an instance of from hls4ml.optimization.scheduler.OptimizationScheduler' + 'If you provided string description (e.g. \'constant\')' + 'Please use an object instance (i.e. ConstantScheduler()).' + 'For a full list of supported schedulers, refer to hls4ml.optimization.scheduler.' + ) + + if epochs <= rewinding_epochs: + raise Exception( + 'Please increase the number of epochs. \ + The current epoch number is too small to perform effective pruning & weight rewinding' + ) + + if ranking_metric not in SUPPORTED_METRICS: + raise Exception('Unknown metric for ranking weights') + + # Loss function needs to be converted to a function, string description cannot be used during custom training loop + if isinstance(loss_fn, str): + loss_fn = tf.keras.losses.get(loss_fn) + + # Split data set into batches + train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)) + train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size) + validation_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)) + validation_dataset = validation_dataset.shuffle(buffer_size=1024).batch(batch_size) + + # Evaluate baseline performance + # Use built-in function, and return as list - the metric is the second element (loss if first) + model.compile(optimizer, loss_fn, metrics=[validation_metric]) + baseline_performance = model.evaluate(validation_dataset, verbose=0, return_dict=False)[-1] + if verbose: + print(f'Baseline performance on validation set: {baseline_performance}') + + # Save best weights + # Always save weights to a file, to reduce memory utilization + if not os.path.isdir(directory): + os.mkdir(directory) + if not os.path.isdir(f'{directory}/optimization'): + os.mkdir(f'{directory}/optimization') + model.save_weights(f'{directory}/optimization/best_weights.h5') + + # Identify optimizable layers, given the current objective + last_optimizable_layer = utils.get_last_layer_with_weights(model) + for i, layer in enumerate(model.layers): + if isinstance(layer, SUPPORTED_LAYERS): + optimizable, optimization_attributes = objective.is_layer_optimizable(model_attributes[layer.name]) + model_attributes[layer.name].optimizable = optimizable + model_attributes[layer.name].optimization_attributes = optimization_attributes + + # In the last layer, structured pruning can't be applied, as it removes output labels + # Weight sharing, as well as all other types of pruning (unstructured, block etc.) are applicable + if ( + i >= last_optimizable_layer + and optimization_attributes.structure_type == SUPPORTED_STRUCTURES.STRUCTURED + and optimization_attributes.pruning + ): + model_attributes[layer.name].optimization_attributes.pruning = False + model_attributes[layer.name].optimizable = model_attributes[ + layer.name + ].optimization_attributes.weight_sharing + else: + model_attributes[layer.name].optimizable = False + model_attributes[layer.name].optimization_attributes = None + + # Add regularization loss to optimizable layers + optimizable_model = build_optimizable_model( + model, + model_attributes, + optimizer, + loss_fn, + validation_metric, + increasing, + train_dataset, + validation_dataset, + batch_size, + epochs // 2, + verbose=verbose, + directory=directory, + tuner=tuner, + regularization_range=regularization_range, + ) + + # Create class for masked backprop (weight freezing) + masked_backprop = MaskedBackprop(optimizable_model, loss_fn, model_attributes) + + # In certain cases, the model might underperform at the current sparsity level, but perform better at a higher sparsity + # Therefore, monitor the models performance over several sparsity levels and + # Only stop pruning after high loss over several trials + bad_trials = 0 + sparsity_conditions = True + target_sparsity = scheduler.get_sparsity() + + while sparsity_conditions: + # TODO - This might cause OOM issues on large models / data sets, since it is not done in batches + gradients = ( + utils.get_model_gradients(optimizable_model, loss_fn, X_train, y_train) if ranking_metric == 'gradients' else {} + ) + hessians = ( + utils.get_model_hessians(optimizable_model, loss_fn, X_train, y_train) if ranking_metric == 'saliency' else {} + ) + + # Mask weights + masks, offsets = get_model_masks( + optimizable_model, + model_attributes, + target_sparsity, + objective, + metric=ranking_metric, + local=local, + gradients=gradients, + hessians=hessians, + knapsack_solver=knapsack_solver, + ) + for layer in optimizable_model.layers: + if isinstance(layer, SUPPORTED_LAYERS) and model_attributes[layer.name].optimizable: + layer_weights = layer.get_weights() + layer_weights[0] = np.multiply(layer_weights[0], masks[layer.name]) + offsets[layer.name] + layer.set_weights(layer_weights) + + # Mask gradients + # Before training the model at the next sparsity level, reset internal states + # Furthemore, modern optimizers (e.g. Adam) accumulate gradients during backprop + # Therefore, even if the gradient for a weight is zero, it might be updated, due to previous gradients + # Avoid this by resetting the internal variables of an optimizer + optimizable_model.reset_metrics() + optimizable_model.reset_states() + for x in optimizable_model.optimizer.variables(): + x.assign(tf.zeros_like(x)) + masked_backprop.update_masks(masks) + + # Train model with weight freezing [pruning] + if verbose: + print(f'Pruning with a target sparsity of {target_sparsity * 100.0}% [relative to objective]') + for epoch in range(epochs - rewinding_epochs): + start_time = time.time() + epoch_loss_avg = tf.keras.metrics.Mean() + + # Masked backprop + for X, y in train_dataset: + loss_value = masked_backprop(tf.convert_to_tensor(X), tf.convert_to_tensor(y), target_sparsity) + epoch_loss_avg.update_state(loss_value) + + # Evaluate on validation set and print epoch summary + if verbose: + val_res = optimizable_model.evaluate(validation_dataset, verbose=0, return_dict=False) + t = time.time() - start_time + avg_loss = round(epoch_loss_avg.result(), 3) + print(f'Epoch: {epoch + 1} - Time: {t}s - Average training loss: {avg_loss}') + print(f'Epoch: {epoch + 1} - learning_rate: {optimizable_model.optimizer.learning_rate.numpy()}') + print(f'Epoch: {epoch + 1} - Validation loss: {val_res[0]} - Performance on validation set: {val_res[1]}') + + # Check if model works after pruning + pruned_performance = optimizable_model.evaluate(validation_dataset, verbose=0, return_dict=False)[-1] + if verbose: + print(f'Optimized model performance on validation set, after fine-tuning: {pruned_performance}') + + if __compare__(pruned_performance, rtol * baseline_performance, not increasing): + bad_trials = 0 + sparsity_conditions, target_sparsity = scheduler.update_step() + optimizable_model.save_weights(f'{directory}/optimization/best_weights.h5') + else: + bad_trials += 1 + sparsity_conditions, target_sparsity = scheduler.repair_step() + + # If the model performed poorly over several sparsity levels, stop optimization [maximum sparsity reached] + if bad_trials > cutoff_bad_trials: + break + + # Train model without weight freezing [rewinding] + if verbose: + print(f'Starting weight rewinding for {rewinding_epochs} epochs') + optimizable_model.fit( + train_dataset, + validation_data=validation_dataset, + batch_size=batch_size, + epochs=rewinding_epochs, + callbacks=callbacks, + verbose=verbose, + ) + + # Load best weights + optimizable_model.load_weights(f'{directory}/optimization/best_weights.h5') + + # Remove regularizers and save best model + optimizable_model = remove_custom_regularizers(optimizable_model) + optimizable_model.compile(optimizer, loss_fn, metrics=[validation_metric]) + + # In GPU FLOP Optimization, remove structures to achieve speed-up & fine-tune the smaller architecture + # TODO - Extend for Resource strategy in hls4ml FF optimisation + if objective.__name__ in ('GPUFLOPEstimator'): + optimizable_model = reduce_model(optimizable_model) + optimizable_model.compile(optimizer, loss_fn, metrics=[validation_metric]) + optimizable_model.fit( + train_dataset, + validation_data=validation_dataset, + batch_size=batch_size, + epochs=int(1.5 * epochs), + callbacks=callbacks, + ) + + # Evaluate final optimized model [purely for debugging / informative purposes] + if verbose: + pruned_performance = optimizable_model.evaluate(validation_dataset, verbose=0, return_dict=False)[-1] + print(f'Optimized model performance on validation set: {pruned_performance}') + + return optimizable_model + + +class MaskedBackprop: + ''' + A helper class to perform masked backprop (training with frozen weights) + The important function is __call__ as it masks gradients, based on frozen weights + While this function can exist without a class, taking masks as input would deplete memory + Since a new graph is created for every call, causing a large run-time + The trick is to set the masks, models etc. as class variables and then pass the sparsity + As the sparsity changes, a new graph of the function is created + ''' + + def __init__(self, model, loss_fn, attributes): + self.model = model + self.loss_fn = loss_fn + self.attributes = attributes + self.masks = {} + + def update_masks(self, masks): + self.masks = masks + + @tf.function + def __call__(self, X, y, s): + ''' + Helper function performing backprop + + Args: + - X (tf.Tensor): Input data + - y (tf.Tensor): Output data + - s (float): Sparsity + + Return: + - loss (tf.Varilable): Model loss with input X and output y + ''' + grads = [] + with tf.GradientTape(persistent=True) as tape: + output = self.model(X, training=True) + loss = self.loss_fn(y, output) + loss += tf.add_n(self.model.losses) + for layer in self.model.layers: + if layer.trainable_weights: + grad = tape.gradient(loss, layer.trainable_weights) + if self.attributes[layer.name].optimizable: + grad[0] = tf.multiply(grad[0], self.masks[layer.name]) + grads += grad + self.model.optimizer.apply_gradients(zip(grads, self.model.trainable_weights)) + return loss + + +def __compare__(x, y, leq=False): + ''' + Helper function for comparing two values, x & y + Sometimes, we use the >= sign - e.g. pruned_accuracy >= tolerance * baseline_accuracy [ 0 <= tolerance <= 1] + Other times, use the <= sign - e.g. pruned_mse <= tolerance * baseline_mse [tolerance >= 1] + ''' + if leq: + return x <= y + else: + return x >= y diff --git a/hls4ml/optimization/keras/builder.py b/hls4ml/optimization/keras/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..9a3bec8d01f8090778f2e8e8fc8a11686faeaf92 --- /dev/null +++ b/hls4ml/optimization/keras/builder.py @@ -0,0 +1,252 @@ +import re + +import keras_tuner as kt +import numpy as np +import tensorflow as tf +from qkeras import QConv2D, QDense +from qkeras.utils import _add_supported_quantized_objects +from tensorflow.keras.callbacks import EarlyStopping +from tensorflow.keras.layers import Conv2D, Dense + +from hls4ml.optimization.keras.config import SUPPORTED_LAYERS, TMP_DIRECTORY +from hls4ml.optimization.keras.regularizers import Conv2DRegularizer, DenseRegularizer + +co = {} +_add_supported_quantized_objects(co) + + +class HyperOptimizationModel(kt.HyperModel): + ''' + Helper class for Keras Tuner + + Args: + - model (keras.Model): Baseline model + - attributes (dict): Layer-wise dictionary of attributes + - optimizer (keras.optimizers.Optimizer or equvialent string description): Model optimizer + - loss_fn (keras.losses.Loss or equivalent string description): Model loss function + - validation_metric (keras.metrics.Metric or equivalent string description): Model validation metric + - regularization_range (list): List of suitable hyperparameters for weight decay + ''' + + def __init__(self, model, attributes, optimizer, loss_fn, validation_metric, regularization_range): + self.model = model + self.attributes = attributes + self.optimizer = optimizer + self.loss_fn = loss_fn + self.validation_metric = validation_metric + self.regularization_range = regularization_range + + def build(self, hp): + model_to_prune = tf.keras.models.clone_model(self.model) + default_regularizaton = self.regularization_range[len(self.regularization_range) // 2] + + # Make regularization loss a tunable hyperparameter + for layer in model_to_prune.layers: + if isinstance(layer, SUPPORTED_LAYERS) and self.attributes[layer.name].optimizable: + structure_type = self.attributes[layer.name].optimization_attributes.structure_type + block_shape = self.attributes[layer.name].optimization_attributes.block_shape + pattern_offset = self.attributes[layer.name].optimization_attributes.pattern_offset + consecutive_patterns = self.attributes[layer.name].optimization_attributes.consecutive_patterns + + pruning = self.attributes[layer.name].optimization_attributes.pruning + weight_sharing = self.attributes[layer.name].optimization_attributes.weight_sharing + + alpha = ( + hp.Choice(f'{layer.name}_alpha', values=self.regularization_range, default=default_regularizaton) + if pruning + else 0 + ) + beta = ( + hp.Choice(f'{layer.name}_beta', values=self.regularization_range, default=default_regularizaton) + if weight_sharing + else 0 + ) + + if isinstance(layer, (Dense, QDense)) and self.attributes[layer.name].optimizable: + layer.kernel_regularizer = DenseRegularizer( + alpha, + beta, + norm=1, + structure_type=structure_type, + block_shape=block_shape, + pattern_offset=pattern_offset, + consecutive_patterns=consecutive_patterns, + ) + elif isinstance(layer, (Conv2D, QConv2D)) and self.attributes[layer.name].optimizable: + layer.kernel_regularizer = Conv2DRegularizer( + alpha, + beta, + norm=1, + structure_type=structure_type, + pattern_offset=pattern_offset, + consecutive_patterns=consecutive_patterns, + ) + + # Rebuild model graph + model_to_prune = tf.keras.models.model_from_json(model_to_prune.to_json(), custom_objects=co) + model_to_prune.set_weights(self.model.get_weights()) + model_to_prune.compile(optimizer=self.optimizer, loss=self.loss_fn, metrics=[self.validation_metric]) + + return model_to_prune + + +default_regularization_range = np.logspace(-6, -2, num=16).tolist() + + +def build_optimizable_model( + model, + attributes, + optimizer, + loss_fn, + validation_metric, + increasing, + train_dataset, + validation_dataset, + batch_size, + epochs, + verbose=False, + directory=TMP_DIRECTORY, + tuner='Bayesian', + regularization_range=default_regularization_range, +): + ''' + Function identifying optimizable layers and adding a regularization loss + + Args: + - model (keras.Model): Model to be optimized + - attributes (dict): Layer-wise model attributes, obtained from hls4ml.optimization.get_attributes_from_keras_model(...) + - optimizer (keras.optimizers.Optimizer): Optimizer used during training + - loss_fn (keras.losses.Loss): Loss function used during training + - validation_metric (keras.metrics.Metric): Validation metric, used as a baseline + - train_dataset (tf.Dataset): Training inputs and labels, in the form of an iterable TF Dataset + - validation_dataset (tf.Dataset): Validation inputs and labels, in the form of an iterable TF Dataset + - batch_size (int): Batch size during training + - epochs (int): Maximum number of epochs to fine-tune model, in one iteration of pruning + + Kwargs: + - verbose (bool): Whether to log tuner outputs to the console + - directory (string): Directory to store tuning results + - tuner (str): Tuning alogorithm, choose between Bayesian and Hyperband + - regularization_range (list): List of suitable hyperparameters for weight decay + - learning_rate_range (list): List of suitable hyperparameters for learning rate + + Notes: + - In general, the regularization and learning rate ranges do not need to be provided, + as the implementation sets a generic enough range. if the user has an idea on the + possible range on hyperparameter ranges, the tuning will complete faster. + - The default tuner is Bayesian & when coupled with the correct ranges of hyperparameters, + it performs quite well, fast. However, older version of Keras Tuner had a crashing bug with it. + - In general, the directory does not need to be specified. However, if pruning several models simultaneously, + to avoid conflicting intermediate results, it is useful to specify directory. + ''' + # User provided manual hyper-parameters for regularisation loss + # TODO - Maybe we could extend this to be hyper-parameters per layer? or layer-type? + # Currently, the same (manually-set) hyper-parameter is set for every layer + if tuner == 'Manual': + model_to_prune = tf.keras.models.clone_model(model) + for layer in model_to_prune.layers: + if isinstance(layer, SUPPORTED_LAYERS) and attributes[layer.name].optimizable: + structure_type = attributes[layer.name].optimization_attributes.structure_type + block_shape = attributes[layer.name].optimization_attributes.block_shape + pattern_offset = attributes[layer.name].optimization_attributes.pattern_offset + consecutive_patterns = attributes[layer.name].optimization_attributes.consecutive_patterns + + pruning = attributes[layer.name].optimization_attributes.pruning + weight_sharing = attributes[layer.name].optimization_attributes.weight_sharing + + alpha = regularization_range[0] if pruning else 0 + beta = regularization_range[0] if weight_sharing else 0 + + if isinstance(layer, (Dense, QDense)) and attributes[layer.name].optimizable: + layer.kernel_regularizer = DenseRegularizer( + alpha, + beta, + norm=1, + structure_type=structure_type, + block_shape=block_shape, + pattern_offset=pattern_offset, + consecutive_patterns=consecutive_patterns, + ) + elif isinstance(layer, (Conv2D, QConv2D)) and attributes[layer.name].optimizable: + layer.kernel_regularizer = Conv2DRegularizer( + alpha, + beta, + norm=1, + structure_type=structure_type, + pattern_offset=pattern_offset, + consecutive_patterns=consecutive_patterns, + ) + + # Rebuild model graph + model_to_prune = tf.keras.models.model_from_json(model_to_prune.to_json(), custom_objects=co) + model_to_prune.set_weights(model.get_weights()) + model_to_prune.compile(optimizer=optimizer, loss=loss_fn, metrics=[validation_metric]) + + return model_to_prune + + # User opted for hyper-parameter tuning + else: + objective_direction = 'max' if increasing else 'min' + if isinstance(validation_metric, str): + objective_name = validation_metric + else: + objective_name = re.sub(r'(?<!^)(?=[A-Z])', '_', validation_metric.__class__.__name__).lower() + if tuner == 'Bayesian': + tuner = kt.BayesianOptimization( + hypermodel=HyperOptimizationModel( + model, attributes, optimizer, loss_fn, validation_metric, regularization_range + ), + objective=kt.Objective(objective_name, objective_direction), + max_trials=10, + overwrite=True, + directory=directory + '/tuning', + ) + elif tuner == 'Hyperband': + tuner = kt.Hyperband( + hypermodel=HyperOptimizationModel( + model, attributes, optimizer, loss_fn, validation_metric, regularization_range + ), + objective=kt.Objective(objective_name, objective_direction), + max_epochs=epochs, + factor=3, + hyperband_iterations=1, + overwrite=True, + directory=directory + '/tuning', + ) + else: + raise Exception('Unknown tuner; possible options are Bayesian and Hyperband') + + if verbose: + tuner.search_space_summary() + + tuner.search( + train_dataset, + epochs=epochs, + batch_size=batch_size, + validation_data=validation_dataset, + callbacks=[EarlyStopping(monitor='val_loss', patience=1)], + ) + + if verbose: + tuner.results_summary() + + return tuner.get_best_models(num_models=1)[0] + + +def remove_custom_regularizers(model): + ''' + Helper function to remove custom regularizers (DenseRegularizer & Conv2DRegularizer) + This makes it possible to load the model in a different environment without hls4ml installed + + Args: + - model (keras.Model): Baseline model + ''' + weights = model.get_weights() + for layer in model.layers: + if hasattr(layer, 'kernel_regularizer'): + if isinstance(layer.kernel_regularizer, (DenseRegularizer, Conv2DRegularizer)): + layer.kernel_regularizer = None + + model = tf.keras.models.model_from_json(model.to_json(), custom_objects=co) + model.set_weights(weights) + return model diff --git a/hls4ml/optimization/keras/config.py b/hls4ml/optimization/keras/config.py new file mode 100644 index 0000000000000000000000000000000000000000..d3ade5933d063859feb63013f59186d6544a7cc6 --- /dev/null +++ b/hls4ml/optimization/keras/config.py @@ -0,0 +1,26 @@ +from qkeras import QConv2D, QDense +from tensorflow.keras.layers import Conv2D, Dense + +''' +Optimizable layers in Keras / QKeras +Any new layers need to be registered here first +Additional logic in the source files may need to be written (e.g. recurrent layers should also optimize recurrent kernels) +''' +SUPPORTED_LAYERS = (Dense, Conv2D, QDense, QConv2D) + + +''' +Supported ranking metrics, for classifying redundant (groups of) weights + +1. l1 - groups of weights are ranked by their l1 norm +2. l2 - groups of weights are ranked by their l2 norm +3. oracle - abs(dL / dw * w), introduced by Molchanov et al. (2016) + Pruning Convolutional Neural Networks for Resource Efficient Inference +4. saliency - (d^2L / dw^2 * w)^2, introduced by Lecun et al. (1989) Optimal Brain Damage +''' +SUPPORTED_METRICS = ('l1', 'l2', 'oracle', 'saliency') + +''' +Temporary directory for storing best models, tuning results etc. +''' +TMP_DIRECTORY = 'hls4ml-optimization-keras' diff --git a/hls4ml/optimization/keras/masking.py b/hls4ml/optimization/keras/masking.py new file mode 100644 index 0000000000000000000000000000000000000000..bf673b91549ae7adb45281763f5f13ee65306dab --- /dev/null +++ b/hls4ml/optimization/keras/masking.py @@ -0,0 +1,794 @@ +import logging +import sys + +import numpy as np +import tensorflow as tf +from qkeras import QConv2D, QDense +from tensorflow.keras.layers import Conv2D, Dense + +from hls4ml.optimization.config import SUPPORTED_STRUCTURES +from hls4ml.optimization.keras.config import SUPPORTED_LAYERS, SUPPORTED_METRICS +from hls4ml.optimization.knapsack import solve_knapsack + + +def get_model_masks( + keras_model, + model_attributes, + sparsity, + objective, + metric='l1', + local=False, + gradients=None, + hessians=None, + knapsack_solver='CBC_MIP', +): + ''' + Function calculating a binary mask for all optimizable layers + Entries equal to one correspond to the weight being updated during the training + Entries equal to zero correspond to the weight being frozen during the training + + Masking is such that: + * resource_utilization <= (1 - sparsity) * baseline_utilization OR + * resource_saving > sparsity * baseline_utilization [equivalent formulation] + + Offsets are used for weight sharing - in the case of weight sharing, the mask is set to zero + Therefore, the weights will be frozen during training; however, they still need to be the mean of the group + Offsets represent the mean of each weight-shared group - therefore, it is important to have offsets only for + frozen weights; that is where the corresponding entry in the mask tensor is zero + + If a layer supports both weight sharing and pruning, both the norm and variance of the group are calculated + And the smaller one is considered; so if the norm is smaller, the group will be considered for pruning + Otherise, the group will be considered for weight sharing. + Both the norm and variance are normalized, to avoid magnitude biases. + + Args: + - keras_model (keras.model) - Model to be masked + - model_attributes (dict) - A layer-wise dictionary of LayerAttributes classes + - sparsity (float) - Desired sparsity, with respect to the objective + - objective (ObjectiveEstimator) - Objective to be minimized (e.g. DSP, FLOPs etc.) + - metric (string) - Weight ranking metric - l1, l2, Oracle, saliency + - local (boolean) - Equal layer-wise sparsity + - gradients (dict) - A layer-wise dictionary of weight gradients + (needed for Oracle ranking) + - hessians (dict) - A layer-wise dictionary of second gradients + (needed for saliency ranking) + - knapsack_solver (str) - Algorithm for solving Knapsack problem; recommended is to use default. + Unless dealing with highly dimensional problems, in which case greedy is better. + + Return: + - masks (dict) - Layer-wise dictionary of binary tensors + - offsets (dict) - Layer-wise dictionary of offsets for every weight + ''' + + if metric not in SUPPORTED_METRICS: + raise Exception('Unknown metric for ranking weights') + + if metric == 'oracle' and gradients is None: + raise Exception('Oracle ranking requires the gradient of the loss with respect to model weights') + + if metric == 'saliency' and hessians is None: + raise Exception('Saliency ranking requires second order derivatives') + + if local: + return __get_masks_local( + keras_model, model_attributes, sparsity, objective, metric, gradients, hessians, knapsack_solver + ) + else: + return __get_masks_global( + keras_model, model_attributes, sparsity, objective, metric, gradients, hessians, knapsack_solver + ) + + +def __get_masks_local(keras_model, model_attributes, sparsity, objective, metric, gradients, hessians, knapsack_solver): + ''' + Function calculating a layer-wise binary mask for all optimizable layers + This function performs layer-wise masking, so all layers have the same sparsity (with respect to the objective) + ''' + masks = {} + offsets = {} + + for layer in keras_model.layers: + # Care needs to be taken if layer_savings = 0 + # As long as the default attributes are used (from is_layer_optimizable(...)), this should never happen + # However, if the optimization attributes are manually changed, + # It would be possible to select the structure type such that savings = 0 + # In this case, the goal is to keep resource utilization under a certain threshold; + # Savings are equivalent to resources per single group + # Therefore, in the knapsack solver, zero-saving would remain unmasked; + # As it has a "weight" of zero, it is always stored in the knapsack + # So not masking a group without saving is as expected; + # However, if solved through greedy knaspack an exception will be thrown (division by zero) + if isinstance(layer, SUPPORTED_LAYERS) and model_attributes[layer.name].optimizable: + layer_savings = objective.layer_savings(model_attributes[layer.name]) + layer_resources = objective.layer_resources(model_attributes[layer.name]) + target_resources = ((1 - sparsity) * np.array(layer_resources)).astype(int) + structure_type = model_attributes[layer.name].optimization_attributes.structure_type + + # All the groups (structures, patterns, blocks) have the same resource utilisation in one layer + # So we can greedily prune the groups with the lowest "loss" (magnitude, saliency etc.) + # Greedily pruning the groups with the lowest loss is a special case of the Knapsack problem with equal weights + value = layer.get_weights()[0] + if metric == 'oracle': + value = np.abs(np.multiply(value, gradients[layer.name])) + norm = 1 + elif metric == 'saliency': + value = np.multiply(np.square(value), hessians[layer.name]) + norm = 1 + elif metric == 'l1': + norm = 1 + else: + norm = 2 + + if structure_type == SUPPORTED_STRUCTURES.UNSTRUCTURED: + if model_attributes[layer.name].optimization_attributes.weight_sharing: + logging.warn('Weight sharing not suitable for unstructured pruning. Ignoring....') + + if model_attributes[layer.name].optimization_attributes.pruning: + # Since no norm is taken, calculate absolute value [avoids removing large negative weights] + value = np.abs(value) + + # Get all posible indices in the weight tensor + indices = np.indices(value.shape).reshape(value.ndim, -1).T + + # Find weights with the lowest loss + groups = [] + for i in indices: + groups.append(__WeightGroups__(value[tuple(i)], layer_savings, tuple(i))) + _, selected = solve_knapsack( + np.array([g.value for g in groups]), + np.array([g.resources for g in groups]).T, + target_resources, + implementation=knapsack_solver, + ) + + # Selected weights are not masked + mask = np.zeros(value.shape, dtype=value.dtype) + for i in selected: + mask[groups[i].layer_position] = 1 + + # Offsets are always zero (weight sharing not applicable to unstructured) + masks[layer.name] = mask + offsets[layer.name] = np.zeros(value.shape, dtype=value.dtype) + + if structure_type == SUPPORTED_STRUCTURES.STRUCTURED: + # Dense -> Masking neurons (columns) + if isinstance(layer, (Dense, QDense)): + # If pruning enabled, find cost associated with pruning each neuron + if model_attributes[layer.name].optimization_attributes.pruning: + vals_norm = np.linalg.norm(value, axis=0, ord=norm) + else: + vals_norm = np.full((value.shape[1]), sys.float_info.max, dtype=value.dtype) + + # If weight sharing enabled, find cost asociated with quantizing nerons to their mean + if model_attributes[layer.name].optimization_attributes.weight_sharing: + vals_var = np.var(value, axis=0) + else: + vals_var = np.full((value.shape[1]), sys.float_info.max, dtype=value.dtype) + + # Choose min(pruning, weight sharing) + groups = [] + for i in range(vals_norm.shape[0]): + if vals_norm[i] <= vals_var[i]: + groups.append(__WeightGroups__(vals_norm[i], layer_savings, i, optimization_type='pruning')) + else: + groups.append(__WeightGroups__(vals_var[i], layer_savings, i, optimization_type='sharing')) + + # Select neurons with the lowest loss + _, selected = solve_knapsack( + np.array([g.value for g in groups]), + np.array([g.resources for g in groups]).T, + target_resources, + implementation=knapsack_solver, + ) + + # Selected neurons are not masked + mask = np.zeros(value.shape, value.dtype) + for i in selected: + mask[:, groups[i].layer_position] = 1 + + # Masked neurons can either be pruned or quantized + # If quantized, add the corresponding offset + offset = np.zeros(value.shape, value.dtype) + zeros = np.where(~np.all(mask, axis=0))[0] + for i in zeros: + if groups[i].optimization_type == 'sharing': + offset[:, i] = np.mean(layer.get_weights()[0][:, i]) + masks[layer.name] = mask + offsets[layer.name] = offset + + # Conv2D -> Masking filters (W x H x C) + elif isinstance(layer, (Conv2D, QConv2D)): + if model_attributes[layer.name].optimization_attributes.pruning: + vals_norm = np.linalg.norm(np.linalg.norm(value, axis=(0, 1), ord='fro'), axis=0, ord=norm) + else: + vals_norm = np.full((value.shape[3]), sys.float_info.max, dtype=value.dtype) + + if model_attributes[layer.name].optimization_attributes.weight_sharing: + vals_var = np.var(np.linalg.norm(value, axis=(0, 1), ord='fro'), axis=0) + else: + vals_var = np.full((value.shape[3]), sys.float_info.max, dtype=value.dtype) + + groups = [] + for i in range(vals_norm.shape[0]): + if vals_norm[i] <= vals_var[i]: + groups.append(__WeightGroups__(vals_norm[i], layer_savings, i, optimization_type='pruning')) + else: + groups.append(__WeightGroups__(vals_var[i], layer_savings, i, optimization_type='sharing')) + + _, selected = solve_knapsack( + np.array([g.value for g in groups]), + np.array([g.resources for g in groups]).T, + target_resources, + implementation=knapsack_solver, + ) + + mask = np.zeros(value.shape, value.dtype) + for i in selected: + mask[:, :, :, groups[i].layer_position] = 1 + + offset = np.zeros(value.shape, value.dtype) + zeros = np.where(~np.all(mask, axis=(0, 1, 2)))[0] + for i in zeros: + if groups[i].optimization_type == 'sharing': + offset[:, :, :, i] = np.mean(layer.get_weights()[0][:, :, :, i]) + + masks[layer.name] = mask + offsets[layer.name] = offset + + if structure_type == SUPPORTED_STRUCTURES.PATTERN: + pattern_offset = model_attributes[layer.name].optimization_attributes.pattern_offset + consecutive_patterns = model_attributes[layer.name].optimization_attributes.consecutive_patterns + + if (np.prod(value.shape)) % pattern_offset != 0: + raise Exception('Pattern offset needs to be a factor of matrix size') + + if pattern_offset % consecutive_patterns != 0: + raise Exception('Consecutive patterns need to be a factor of matrix size') + + # Transpose, as done in hls4ml Resource strategy + if isinstance(layer, (Dense, QDense)): + value = value.T + transposed_shape = value.shape + elif isinstance(layer, (Conv2D, QConv2D)): + value = np.transpose(value, axes=[3, 0, 1, 2]) + transposed_shape = value.shape + + # Reshape weight matrix into [number_of_patterns, pattern_offset] + # Note, swapping the axis will mess up the weight order + # In the case of hls4ml, number_of_patterns is equivalent to reuse factor + # And, pattern_offset, is the number of multiplications done in parallel + number_of_patterns = np.prod(transposed_shape) // pattern_offset + target_shape = (pattern_offset, number_of_patterns) + reshaped = np.reshape(value, target_shape) + + # Group consecutive patterns (rows) into blocks and reshape + total_blocks = pattern_offset // consecutive_patterns + blocks = np.reshape( + tf.image.extract_patches( + np.expand_dims(np.expand_dims(reshaped, 2), 0), + [1, consecutive_patterns, number_of_patterns, 1], + [1, consecutive_patterns, number_of_patterns, 1], + [1, 1, 1, 1], + 'SAME', + ).numpy(), + (total_blocks, -1), + ) + + # If pruning enabled, find cost associated with pruning each neuron + if model_attributes[layer.name].optimization_attributes.pruning: + vals_norm = np.linalg.norm(blocks, axis=1, ord=norm) + else: + vals_norm = np.full((blocks.shape[0],), sys.float_info.max, dtype=value.dtype) + + # If weight sharing enabled, find cost asociated with quantizing nerons to their mean + if model_attributes[layer.name].optimization_attributes.weight_sharing: + vals_var = np.var(blocks, axis=1) + else: + vals_var = np.full((blocks.shape[0],), sys.float_info.max, dtype=value.dtype) + + # Choose min(pruning, weight sharing) + groups = [] + for i in range(vals_norm.shape[0]): + if vals_norm[i] <= vals_var[i]: + groups.append(__WeightGroups__(vals_norm[i], layer_savings, i, optimization_type='pruning')) + else: + groups.append(__WeightGroups__(vals_var[i], layer_savings, i, optimization_type='sharing')) + + # Select groups with highest importance + _, selected = solve_knapsack( + np.array([g.value for g in groups]), + np.array([g.resources for g in groups]).T, + target_resources, + implementation=knapsack_solver, + ) + + # Decode masked groups into transposed shape and set selected groups to one + mask = np.zeros((np.prod(transposed_shape),), value.dtype) + for i in selected: + pos = i * number_of_patterns * consecutive_patterns + for j in range(pos, pos + number_of_patterns * consecutive_patterns, number_of_patterns): + mask[range(j, j + number_of_patterns)] = 1 + + # Decode offset + not_selected = [i for i in range(len(groups)) if i not in selected] + offset = np.zeros((np.prod(transposed_shape),), value.dtype) + for i in not_selected: + if groups[i].optimization_type == 'sharing': + mean = np.mean(blocks[i, :]) + pos = i * number_of_patterns * consecutive_patterns + for j in range(pos, pos + number_of_patterns * consecutive_patterns, number_of_patterns): + offset[range(j, j + number_of_patterns)] = mean + + # Reshape into original shape and store result + if isinstance(layer, (Dense, QDense)): + mask = np.reshape(mask, transposed_shape).T + offset = np.reshape(offset, transposed_shape).T + elif isinstance(layer, (Conv2D, QConv2D)): + mask = np.transpose(np.reshape(mask, transposed_shape), (1, 2, 3, 0)) + offset = np.transpose(np.reshape(offset, transposed_shape), (1, 2, 3, 0)) + masks[layer.name] = mask + offsets[layer.name] = offset + + if structure_type == SUPPORTED_STRUCTURES.BLOCK: + if len(value.shape) != 2: + raise Exception('Block pruning is supported for 2-dimensional weight matrices') + + block_shape = model_attributes[layer.name].optimization_attributes.block_shape + if (value.shape[0] % block_shape[0]) != 0 or (value.shape[1] % block_shape[1] != 0): + raise Exception('Block sizes need to be fators of weight matrix dimensions') + + # TensorFlow has a built-in method for exctracting sub-tensors of given shape and stride + # This method is commonly used to perform im2col, + # Docs: https://www.tensorflow.org/api_docs/python/tf/image/extract_patches + total_blocks = (value.shape[0] * value.shape[1]) // (block_shape[0] * block_shape[1]) + blocks_in_row = value.shape[1] // block_shape[1] + blocks = np.reshape( + tf.image.extract_patches( + np.expand_dims(np.expand_dims(value, 2), 0), + [1, block_shape[0], block_shape[1], 1], + [1, block_shape[0], block_shape[1], 1], + [1, 1, 1, 1], + 'SAME', + ).numpy(), + (total_blocks, block_shape[0] * block_shape[1]), + ) + + # If pruning enabled, find cost associated with pruning each neuron + if model_attributes[layer.name].optimization_attributes.pruning: + vals_norm = np.linalg.norm(blocks, axis=1, ord=norm) + else: + vals_norm = np.full((blocks.shape[0],), sys.float_info.max, dtype=value.dtype) + + # If weight sharing enabled, find cost asociated with quantizing nerons to their mean + if model_attributes[layer.name].optimization_attributes.weight_sharing: + vals_var = np.var(blocks, axis=1) + else: + vals_var = np.full((blocks.shape[0],), sys.float_info.max, dtype=value.dtype) + + # Choose min(pruning, weight sharing) + groups = [] + for i in range(vals_norm.shape[0]): + if vals_norm[i] <= vals_var[i]: + groups.append(__WeightGroups__(vals_norm[i], layer_savings, i, optimization_type='pruning')) + else: + groups.append(__WeightGroups__(vals_var[i], layer_savings, i, optimization_type='sharing')) + + # Select groups with highest importance + _, selected = solve_knapsack( + np.array([g.value for g in groups]), + np.array([g.resources for g in groups]).T, + target_resources, + implementation=knapsack_solver, + ) + + # Decode position of masked weights and set selected weights to one + mask = np.zeros(value.shape, value.dtype) + for i in selected: + row = block_shape[0] * (i // blocks_in_row) + col = block_shape[1] * (i % blocks_in_row) + cols = np.linspace(col, col + block_shape[1], block_shape[1], endpoint=False, dtype=np.int32) + rows = np.linspace(row, row + block_shape[0], block_shape[0], endpoint=False, dtype=np.int32) + zeros = np.array(np.meshgrid(rows, cols)).T.reshape(-1, 2) + mask[zeros[:, 0], zeros[:, 1]] = 1 + + # Calculate offset + not_selected = [i for i in range(len(groups)) if i not in selected] + offset = np.zeros(value.shape, value.dtype) + for i in not_selected: + if groups[i].optimization_type == 'sharing': + mean = np.mean(blocks[i, :]) + row = block_shape[0] * (i // blocks_in_row) + col = block_shape[1] * (i % blocks_in_row) + cols = np.linspace(col, col + block_shape[1], block_shape[1], endpoint=False, dtype=np.int32) + rows = np.linspace(row, row + block_shape[0], block_shape[0], endpoint=False, dtype=np.int32) + pos = np.array(np.meshgrid(rows, cols)).T.reshape(-1, 2) + offset[pos[:, 0], pos[:, 1]] = mean + + masks[layer.name] = mask + offsets[layer.name] = offset + + return masks, offsets + + +def __get_masks_global(keras_model, model_attributes, sparsity, objective, metric, gradients, hessians, knapsack_solver): + ''' + Function calculating a layer-wise binary mask for all optimizable layers + Global masking, with layers of different sparsity; masks are calculated by solving a Knapsack problem + Most of the logic remains similar to local masking; comments describing implementation are given in the function above + ''' + groups = [] + total_resources = [] + + # Iterate through all layers and create a list of all the optimizable groups (single weight, structure, pattern, block) + # Each entry contains the value associated with the group, + # Alongside the layer it belongs to and its location in the layer + # The values is normalised w.r.t to the to largest element in the group, to avoid bias towards large layers + # We also keep track of total model resources, with respect to the objective + # A detailed comment in the local masking function is given for + # Considerations on exception of layer savings and how to address them + for layer in keras_model.layers: + # Optimizable should be always enabled if either pruning or weight sharing are enabled + # However, if the objectives are implemented incorrectly, + # It is possible to have optimizatons enabled without any types of optimization (pruning, weight sharing) enabled + layer_optimizable = model_attributes[layer.name].optimizable and ( + model_attributes[layer.name].optimization_attributes.weight_sharing + or model_attributes[layer.name].optimization_attributes.pruning + ) + if isinstance(layer, SUPPORTED_LAYERS) and layer_optimizable: + value = layer.get_weights()[0] + structure_type = model_attributes[layer.name].optimization_attributes.structure_type + layer_savings = objective.layer_savings(model_attributes[layer.name]) + total_resources.append(objective.layer_resources(model_attributes[layer.name])) + + if metric == 'oracle': + value = np.abs(np.multiply(value, gradients[layer.name])) + norm = 1 + elif metric == 'saliency': + value = np.multiply(np.square(value), hessians[layer.name]) + norm = 1 + elif metric == 'l1': + norm = 1 + else: + norm = 2 + + if structure_type == SUPPORTED_STRUCTURES.UNSTRUCTURED: + if model_attributes[layer.name].optimization_attributes.weight_sharing: + logging.warn('Weight sharing not suitable for unstructured pruning. Ignoring....') + + if model_attributes[layer.name].optimization_attributes.pruning: + value = np.abs(value) + value = value / np.max(value) + indices = np.indices(value.shape).reshape(value.ndim, -1).T + for i in indices: + group = __WeightGroups__( + value[tuple(i)], layer_savings, tuple(i), structure_type, layer.name, 'pruning' + ) + groups.append(group) + + if structure_type == SUPPORTED_STRUCTURES.STRUCTURED: + if isinstance(layer, (Dense, QDense)): + if model_attributes[layer.name].optimization_attributes.pruning: + vals_norm = np.linalg.norm(value, axis=0, ord=norm) + vals_norm = vals_norm / np.max(vals_norm) + else: + vals_norm = np.full((value.shape[1]), sys.float_info.max, dtype=value.dtype) + + if model_attributes[layer.name].optimization_attributes.weight_sharing: + vals_var = np.var(value, axis=0) + else: + vals_var = np.full((value.shape[1]), sys.float_info.max, dtype=value.dtype) + + for i in range(vals_norm.shape[0]): + if vals_norm[i] <= vals_var[i]: + groups.append( + __WeightGroups__( + vals_norm[i], layer_savings, i, structure_type, layer.name, optimization_type='pruning' + ) + ) + else: + groups.append( + __WeightGroups__( + vals_var[i], layer_savings, i, structure_type, layer.name, optimization_type='sharing' + ) + ) + + elif isinstance(layer, (Conv2D, QConv2D)): + if model_attributes[layer.name].optimization_attributes.pruning: + vals_norm = np.linalg.norm(np.linalg.norm(value, axis=(0, 1), ord='fro'), axis=0, ord=norm) + vals_norm = vals_norm / np.max(vals_norm) + else: + vals_norm = np.full((value.shape[3]), sys.float_info.max, dtype=value.dtype) + + if model_attributes[layer.name].optimization_attributes.weight_sharing: + vals_var = np.var(np.linalg.norm(value, axis=(0, 1), ord='fro'), axis=0) + else: + vals_var = np.full((value.shape[3]), sys.float_info.max, dtype=value.dtype) + + for i in range(vals_norm.shape[0]): + if vals_norm[i] <= vals_var[i]: + groups.append( + __WeightGroups__( + vals_norm[i], layer_savings, i, structure_type, layer.name, optimization_type='pruning' + ) + ) + else: + groups.append( + __WeightGroups__( + vals_var[i], layer_savings, i, structure_type, layer.name, optimization_type='sharing' + ) + ) + + if structure_type == SUPPORTED_STRUCTURES.PATTERN: + pattern_offset = model_attributes[layer.name].optimization_attributes.pattern_offset + consecutive_patterns = model_attributes[layer.name].optimization_attributes.consecutive_patterns + if (np.prod(value.shape)) % pattern_offset != 0: + raise Exception('Pattern offset needs to be a factor of matrix size') + + if pattern_offset % consecutive_patterns != 0: + raise Exception('Consecutive patterns need to be a factor of matrix size') + + # Transpose, as done in hls4ml Resource strategy + if isinstance(layer, (Dense, QDense)): + value = value.T + transposed_shape = value.shape + elif isinstance(layer, (Conv2D, QConv2D)): + value = np.transpose(value, axes=[3, 0, 1, 2]) + transposed_shape = value.shape + + # Reshape weight matrix into [number_of_patterns, pattern_offset] + # Note, swapping the axis will mess up the weight order + # In the case of hls4ml, number_of_patterns is equivalent to reuse factor + # And, pattern_offset, is the number of multiplications done in parallel + number_of_patterns = np.prod(transposed_shape) // pattern_offset + target_shape = (pattern_offset, number_of_patterns) + reshaped = np.reshape(value, target_shape) + + # Group consecutive patterns (rows) into blocks and reshape + total_blocks = pattern_offset // consecutive_patterns + blocks = np.reshape( + tf.image.extract_patches( + np.expand_dims(np.expand_dims(reshaped, 2), 0), + [1, consecutive_patterns, number_of_patterns, 1], + [1, consecutive_patterns, number_of_patterns, 1], + [1, 1, 1, 1], + 'SAME', + ).numpy(), + (total_blocks, -1), + ) + + # If pruning enabled, find cost associated with pruning each neuron + if model_attributes[layer.name].optimization_attributes.pruning: + vals_norm = np.linalg.norm(blocks, axis=1, ord=norm) + else: + vals_norm = np.full((blocks.shape[0],), sys.float_info.max, dtype=value.dtype) + + # If weight sharing enabled, find cost asociated with quantizing nerons to their mean + if model_attributes[layer.name].optimization_attributes.weight_sharing: + vals_var = np.var(blocks, axis=1) + else: + vals_var = np.full((blocks.shape[0],), sys.float_info.max, dtype=value.dtype) + + # Choose min(pruning, weight sharing) + for i in range(vals_norm.shape[0]): + if vals_norm[i] <= vals_var[i]: + groups.append( + __WeightGroups__( + vals_norm[i], layer_savings, i, structure_type, layer.name, optimization_type='pruning' + ) + ) + else: + groups.append( + __WeightGroups__( + vals_var[i], layer_savings, i, structure_type, layer.name, optimization_type='sharing' + ) + ) + + if structure_type == SUPPORTED_STRUCTURES.BLOCK: + if len(value.shape) != 2: + raise Exception('Block pruning is supported for 2-dimensional weight matrices') + + block_shape = model_attributes[layer.name].optimization_attributes.block_shape + if (value.shape[0] % block_shape[0]) != 0 or (value.shape[1] % block_shape[1] != 0): + raise Exception('Block sizes need to be fators of weight matrix dimensions') + + total_blocks = (value.shape[0] * value.shape[1]) // (block_shape[0] * block_shape[1]) + blocks = np.reshape( + tf.image.extract_patches( + np.expand_dims(np.expand_dims(value, 2), 0), + [1, block_shape[0], block_shape[1], 1], + [1, block_shape[0], block_shape[1], 1], + [1, 1, 1, 1], + 'SAME', + ).numpy(), + (total_blocks, block_shape[0] * block_shape[1]), + ) + + if model_attributes[layer.name].optimization_attributes.pruning: + vals_norm = np.linalg.norm(blocks, axis=1, ord=norm) + vals_norm = vals_norm / np.max(vals_norm) + else: + vals_norm = np.full((blocks.shape[0]), sys.float_info.max, dtype=value.dtype) + + if model_attributes[layer.name].optimization_attributes.weight_sharing: + vals_var = np.var(blocks, axis=1) + else: + vals_var = np.full((blocks.shape[0]), sys.float_info.max, dtype=value.dtype) + + for i in range(vals_norm.shape[0]): + if vals_norm[i] <= vals_var[i]: + groups.append( + __WeightGroups__( + vals_norm[i], layer_savings, i, structure_type, layer.name, optimization_type='pruning' + ) + ) + else: + groups.append( + __WeightGroups__( + vals_var[i], layer_savings, i, structure_type, layer.name, optimization_type='sharing' + ) + ) + + # The goal is to maximize network accuracy (values) subject to resorces (objective) staying under some threshold + # This is a Knapsack problem; several implementations are provided in the helper functions + # The selected values correspond to weight / groups being kept in the network; the rest are pruned / weight shared + total_resources = np.sum(np.array(total_resources), axis=0) + target_resources = ((1 - sparsity) * np.array(total_resources)).astype(int) + _, selected = solve_knapsack( + np.array([s.value for s in groups]), + np.array([s.resources for s in groups]).T, + target_resources, + implementation=knapsack_solver, + ) + # Update masks and offsets + masks = {} + offsets = {} + + for layer in keras_model.layers: + if isinstance(layer, SUPPORTED_LAYERS) and model_attributes[layer.name].optimizable: + structure_type = model_attributes[layer.name].optimization_attributes.structure_type + selected_layer = [i for i in selected if groups[i].layer_name == layer.name] + not_selected_layer = [ + i for i in range(len(groups)) if groups[i].layer_name == layer.name and i not in selected_layer + ] + + if structure_type == SUPPORTED_STRUCTURES.UNSTRUCTURED: + mask = np.zeros(model_attributes[layer.name].weight_shape, layer.get_weights()[0].dtype) + for i in selected_layer: + mask[groups[i].layer_position] = 1 + masks[layer.name] = mask + offsets[layer.name] = np.zeros(model_attributes[layer.name].weight_shape, layer.get_weights()[0].dtype) + + if structure_type == SUPPORTED_STRUCTURES.STRUCTURED: + mask = np.zeros(model_attributes[layer.name].weight_shape, layer.get_weights()[0].dtype) + offset = np.zeros(model_attributes[layer.name].weight_shape, layer.get_weights()[0].dtype) + if isinstance(layer, (Dense, QDense)): + for i in selected_layer: + mask[:, groups[i].layer_position] = 1 + for i in not_selected_layer: + if groups[i].optimization_type == 'sharing': + offset[:, groups[i].layer_position] = np.mean(layer.get_weights()[0][:, i]) + if isinstance(layer, (Conv2D, QConv2D)): + for i in selected_layer: + mask[:, :, :, groups[i].layer_position] = 1 + for i in not_selected_layer: + if groups[i].optimization_type == 'sharing': + offset[ + :, + :, + :, + groups[i].layer_position, + ] = np.mean(layer.get_weights()[0][:, :, :, i]) + masks[layer.name] = mask + offsets[layer.name] = offset + + if structure_type == SUPPORTED_STRUCTURES.PATTERN: + pattern_offset = model_attributes[layer.name].optimization_attributes.pattern_offset + consecutive_patterns = model_attributes[layer.name].optimization_attributes.consecutive_patterns + number_of_patterns = np.prod(model_attributes[layer.name].weight_shape) // pattern_offset + + # Transpose shape, as done in hls4ml Resource strategy + # We need the weights to recalculate the block means + weight_shape = model_attributes[layer.name].weight_shape + if isinstance(layer, (Dense, QDense)): + value = layer.get_weights()[0].T + transposed_shape = (weight_shape[1], weight_shape[0]) + elif isinstance(layer, (Conv2D, QConv2D)): + value = np.transpose(layer.get_weights()[0], (3, 0, 1, 2)) + transposed_shape = (weight_shape[3], weight_shape[0], weight_shape[1], weight_shape[2]) + + # Decode masks + mask = np.zeros((np.prod(transposed_shape),), layer.get_weights()[0].dtype) + for i in selected_layer: + pos = groups[i].layer_position * number_of_patterns * consecutive_patterns + for j in range(pos, pos + number_of_patterns * consecutive_patterns, number_of_patterns): + mask[range(j, j + number_of_patterns)] = 1 + + # Decode offsets + offset = np.zeros((np.prod(transposed_shape),), layer.get_weights()[0].dtype) + + # Re-calculate the blocks, they are needed to calculate block means + target_shape = (pattern_offset, number_of_patterns) + reshaped = np.reshape(value, target_shape) + + total_blocks = pattern_offset // consecutive_patterns + blocks = np.reshape( + tf.image.extract_patches( + np.expand_dims(np.expand_dims(reshaped, 2), 0), + [1, consecutive_patterns, number_of_patterns, 1], + [1, consecutive_patterns, number_of_patterns, 1], + [1, 1, 1, 1], + 'SAME', + ).numpy(), + (total_blocks, -1), + ) + + for i in not_selected_layer: + if groups[i].optimization_type == 'sharing': + mean = np.mean(blocks[groups[i].layer_position, :]) + pos = groups[i].layer_position * number_of_patterns * consecutive_patterns + for j in range(pos, pos + number_of_patterns * consecutive_patterns, number_of_patterns): + offset[range(j, j + number_of_patterns)] = mean + + # Reshape into original shape and store result + if isinstance(layer, (Dense, QDense)): + mask = np.reshape(mask, transposed_shape).T + offset = np.reshape(offset, transposed_shape).T + elif isinstance(layer, (Conv2D, QConv2D)): + mask = np.transpose(np.reshape(mask, transposed_shape), (1, 2, 3, 0)) + offset = np.transpose(np.reshape(offset, transposed_shape), (1, 2, 3, 0)) + masks[layer.name] = mask + offsets[layer.name] = offset + + if structure_type == SUPPORTED_STRUCTURES.BLOCK: + block_shape = model_attributes[layer.name].optimization_attributes.block_shape + total_blocks = np.prod(model_attributes[layer.name].weight_shape) // np.prod(block_shape) + blocks_in_row = model_attributes[layer.name].weight_shape[1] // block_shape[1] + blocks = np.reshape( + tf.image.extract_patches( + np.expand_dims(np.expand_dims(layer.get_weights()[0], 2), 0), + [1, block_shape[0], block_shape[1], 1], + [1, block_shape[0], block_shape[1], 1], + [1, 1, 1, 1], + 'SAME', + ).numpy(), + (total_blocks, block_shape[0] * block_shape[1]), + ) + + mask = np.zeros(model_attributes[layer.name].weight_shape, layer.get_weights()[0].dtype) + for i in selected_layer: + row = block_shape[0] * (groups[i].layer_position // blocks_in_row) + col = block_shape[1] * (groups[i].layer_position % blocks_in_row) + cols = np.linspace(col, col + block_shape[1], block_shape[1], endpoint=False, dtype=np.int32) + rows = np.linspace(row, row + block_shape[0], block_shape[0], endpoint=False, dtype=np.int32) + zeros = np.array(np.meshgrid(rows, cols)).T.reshape(-1, 2) + mask[zeros[:, 0], zeros[:, 1]] = 1 + + offset = np.zeros(model_attributes[layer.name].weight_shape, layer.get_weights()[0].dtype) + for i in not_selected_layer: + if groups[i].optimization_type == 'sharing': + mean = np.mean(blocks[groups[i].layer_position, :]) + row = block_shape[0] * (groups[i].layer_position // blocks_in_row) + col = block_shape[1] * (groups[i].layer_position % blocks_in_row) + cols = np.linspace(col, col + block_shape[1], block_shape[1], endpoint=False, dtype=np.int32) + rows = np.linspace(row, row + block_shape[0], block_shape[0], endpoint=False, dtype=np.int32) + pos = np.array(np.meshgrid(rows, cols)).T.reshape(-1, 2) + offset[pos[:, 0], pos[:, 1]] = mean + + masks[layer.name] = mask + offsets[layer.name] = offset + + return masks, offsets + + +class __WeightGroups__: + ''' + A helper class containing information about a group of weights + ''' + + def __init__(self, value, resources, layer_position, structure_type=None, layer_name=None, optimization_type=None): + self.value = value + self.resources = resources + self.layer_position = layer_position + self.structure_type = structure_type + self.layer_name = layer_name + self.optimization_type = optimization_type diff --git a/hls4ml/optimization/keras/reduction.py b/hls4ml/optimization/keras/reduction.py new file mode 100644 index 0000000000000000000000000000000000000000..bd4621ed76274fbaabc56a464e08955f3dcf912a --- /dev/null +++ b/hls4ml/optimization/keras/reduction.py @@ -0,0 +1,63 @@ +import numpy as np +from tensorflow.keras.layers import Conv2D, Dense +from tensorflow.keras.models import Sequential + +from hls4ml.optimization.keras.utils import get_last_layer_with_weights + +''' +Function for removing zero neurons & filters from a model and rewiring the model graph +This function is built on top of Keras Surgeon available at: https://github.com/BenWhetton/keras-surgeon +Keras Surgeon is no longer under active development and does not work for TensorFlow 2.3+ and QKeras +The baseline version was forked and updated, available at: https://github.com/bo3z/keras-surgeon + +Args: + - model (keras.model): Input model + +Return: + - reduced (keras.model): Modified model, with redundant structures removed + + ''' + + +def reduce_model(model): + try: + from kerassurgeon import Surgeon + except ModuleNotFoundError: + raise Exception( + 'Keras Surgeon not installed. Unable to reduce model footprint ' + 'Please install up-to-date Keras Surgeon compatible wit TensorFlow 2.3+ and QKeras ' + 'Installation from git: https://github.com/bo3z/keras-surgeon' + ) + + # Initiate surgeon + surgeon = Surgeon(model) + + # Iterate through layers and identify neurons (columns) and filters (tensors, W x H x C) to be removed + last_idx = get_last_layer_with_weights(model) + for idx, layer in enumerate(model.layers): + # Last layer with weights cannot be removed, as it maps to data set labels + if idx == last_idx: + break + + # Currently supported Dense and Conv2D; these two can be combined in a single if-statement + # Keras Surgeon has a full range of support for Conv1D / Conv3D, reucurrent etc. - might extend in the future + if isinstance(layer, Dense): + weights = layer.get_weights()[0] + zeros = np.where(~weights.any(axis=0))[0].tolist() + surgeon.add_job('delete_channels', layer, channels=zeros) + + elif isinstance(layer, Conv2D): + weights = layer.get_weights()[0] + zeros = np.where(~weights.reshape(-1, weights.shape[-1]).any(axis=0))[0].tolist() + surgeon.add_job('delete_channels', layer, channels=zeros) + + # Reduce model + reduced = surgeon.operate() + + # By default, Keras surgeon returns a Functional model + # If the original was a Sequential, convert back + is_sequential = model.__class__.__name__ == 'Sequential' + if is_sequential: + return Sequential(layers=reduced.layers) + else: + return reduced diff --git a/hls4ml/optimization/keras/regularizers.py b/hls4ml/optimization/keras/regularizers.py new file mode 100644 index 0000000000000000000000000000000000000000..fd8a2f3656f604077b5066e2dab0d126286ea2ef --- /dev/null +++ b/hls4ml/optimization/keras/regularizers.py @@ -0,0 +1,255 @@ +import numpy as np +import tensorflow as tf + +from hls4ml.optimization.config import SUPPORTED_STRUCTURES + + +@tf.keras.utils.register_keras_serializable(name='DenseRegularizer') +class DenseRegularizer(tf.keras.regularizers.Regularizer): + ''' + A flexible regularizer for Dense layers, simultaneously penalizing high values and variance + + Args: + - alpha (float): Sparse penalty; a higher value pushes more weights towards zero + - beta (float): Variance penalty; a higer value reduces variance between a group of weights + - norm (int): Norm type (l1 or l2) + - structure_type (string): Type of regularisation - unstructured, structured, pattern, block + - block_shape (tuple): Block shape if structure_type == block + - pattern_offset (int): Length of each pattern if structure_type == pattern + - consecutive_patterns (int): How many consecutive patterns should be considered + - weights (tf.Variable): Two-dimensional layer weight tensor, dimensionality (M x N) + + Return: + - Regularizer penalty (tf.Variable): Penalty associated with layer weights + + Examples: + - structure_type = unstructured: unstructured weight regularisation + - structure_type = structured: neuron regularization + (group weights by row) + - structure_type = pattern: regularization on groups of every n-th weight + (e.g. grouping by reuse factor in hls4ml) + - structure_type = block: regularisation on blocks within weight matrix + (e.g. 4x4, 8x1 for certain SIMD processors) + + - consecutive_patterns is commonly encountered with optimization of BRAM utilization - + e.g. while it is true that each DSP pattern consumes one DSP, + They likely use less than one BRAM block (e.g. if the BRAM width is 36 bit and weight width is 16) + In that case, we need to group several patterns together, + So the entire block of patterns can be removed, thus saving DSP and BRAM + ''' + + def __init__( + self, + alpha, + beta=0, + norm=1, + structure_type=SUPPORTED_STRUCTURES.UNSTRUCTURED, + block_shape=(1, 1), + pattern_offset=1, + consecutive_patterns=1, + ): + if norm != 1 and norm != 2: + raise Exception(f'{self.__class__.__name__} currently supports l1- and l2-based regularization') + + if isinstance(structure_type, str): + structure_type = SUPPORTED_STRUCTURES(structure_type) + + if not isinstance(structure_type, SUPPORTED_STRUCTURES): + raise Exception(f'{self.__class__.__name__} unknown regularization type') + + self.alpha = alpha + self.beta = beta + self.norm = norm + self.structure_type = structure_type + self.block_shape = block_shape + self.pattern_offset = pattern_offset + self.consecutive_patterns = consecutive_patterns + + @tf.function + def __call__(self, weights): + if self.structure_type == SUPPORTED_STRUCTURES.UNSTRUCTURED: + sparse_penalty = self.alpha * tf.norm(weights, ord=self.norm) + variance_penalty = self.beta * tf.math.reduce_variance(weights) + return sparse_penalty + variance_penalty + + if self.structure_type == SUPPORTED_STRUCTURES.STRUCTURED: + sparse_penalty = self.alpha * tf.norm(tf.norm(weights, axis=0, ord=2), ord=self.norm) + variance_penalty = self.beta * tf.norm(tf.math.reduce_variance(weights, axis=0), ord=self.norm) + return sparse_penalty + variance_penalty + + if self.structure_type == SUPPORTED_STRUCTURES.PATTERN: + # This is equivalent to penalising all the weights processed by the same DSP block in hls4ml. + # The matrix is transposed, according to Resource strategy and reshaped into (pattern_offset, pattern_number) + # Pattern offset corresponds to the number of patterns is equivalent to RF + if (np.prod(weights.shape)) % self.pattern_offset != 0: + print(np.prod(weights.shape), self.pattern_offset) + raise Exception(f'{self.__class__.__name__}: pattern offset needs to be a factor of matrix size') + + if self.pattern_offset % self.consecutive_patterns != 0: + raise Exception(f'{self.__class__.__name__}: consecutive patterns need to be a factor of pattern offset') + + # Reshape weight matrix into [number_of_patterns, pattern_offset] + number_of_patterns = np.prod(weights.shape) // self.pattern_offset + target_shape = (self.pattern_offset, number_of_patterns) + reshaped = tf.reshape(tf.transpose(weights), target_shape) + # Group consecutive patterns (columns) into blocks and reshape + # Docs for the functions to extract blocks are below [block regularization] + total_blocks = self.pattern_offset // self.consecutive_patterns + blocks = tf.reshape( + tf.image.extract_patches( + tf.expand_dims(tf.expand_dims(reshaped, 2), 0), + [1, self.consecutive_patterns, number_of_patterns, 1], + [1, self.consecutive_patterns, number_of_patterns, 1], + [1, 1, 1, 1], + 'SAME', + ), + (total_blocks, -1), + ) + + # Calculate penalty + sparse_penalty = self.alpha * tf.norm(tf.norm(blocks, axis=1, ord=2), ord=self.norm) + variance_penalty = self.beta * tf.norm(tf.math.reduce_variance(blocks, axis=1), ord=self.norm) + return sparse_penalty + variance_penalty + + if self.structure_type == SUPPORTED_STRUCTURES.BLOCK: + if (weights.shape[0] % self.block_shape[0]) != 0 or (weights.shape[1] % self.block_shape[1] != 0): + raise Exception(f'{self.__class__.__name__}: block sizes need to be fators of weight matrix dimensions') + + # TensorFlow has a built-in method for exctracting sub-tensors of given shape and stride + # This method is commonly used to perform im2col, + # Docs: https://www.tensorflow.org/api_docs/python/tf/image/extract_patches + total_blocks = (weights.shape[0] * weights.shape[1]) // (self.block_shape[0] * self.block_shape[1]) + blocks = tf.reshape( + tf.image.extract_patches( + tf.expand_dims(tf.expand_dims(weights, 2), 0), + [1, self.block_shape[0], self.block_shape[1], 1], + [1, self.block_shape[0], self.block_shape[1], 1], + [1, 1, 1, 1], + 'SAME', + ), + (total_blocks, self.block_shape[0] * self.block_shape[1]), + ) + + sparse_penalty = self.alpha * tf.norm(tf.norm(blocks, axis=1, ord=2), ord=self.norm) + variance_penalty = self.beta * tf.norm(tf.math.reduce_variance(blocks, axis=1), ord=self.norm) + return sparse_penalty + variance_penalty + + def get_config(self): + return { + 'alpha': self.alpha, + 'beta': self.beta, + 'norm': self.norm, + 'structure_type': self.structure_type, + 'block_shape': self.block_shape, + 'pattern_offset': self.pattern_offset, + 'consecutive_patterns': self.consecutive_patterns, + } + + +@tf.keras.utils.register_keras_serializable(name='Conv2DRegularizer') +class Conv2DRegularizer(tf.keras.regularizers.Regularizer): + ''' + A flexible regulariser for Conv2D layers, simultaneously performing pruning and clustering + + Args: + - alpha (float): Sparse penalty; a higher value pushes more weights towards zero + - beta (float): Variance penalty; a higer value reduces variance between a group of weights + - norm (int): Norm type (l1 or l2) + - structure_type (string): Type of regularisation - unstructured, structured, pattern + - pattern_offset (int): Length of each pattern if structure_type == pattern + - weights (tf.Variable): Four-dimensional layer weight tensor, dimensionality + (filter_width x filter_height x n_chan x n_filt) + + Return: + - Regularizer penalty (tf.Variable): Penalty associated with layer weights + + Example use cases: + - structure_type = unstructured: unstructured weight regularisation + - structure_type = structured: filter regularization + (group weights of dimensionality filt_width x filt_height x n_chan) + - structure_type = pattern: regularization on groups of every n-th weight in flattened array + (e.g. grouping by reuse factor in hls4ml) + ''' + + def __init__( + self, + alpha, + beta=0, + norm=1, + structure_type=SUPPORTED_STRUCTURES.UNSTRUCTURED, + pattern_offset=1, + consecutive_patterns=1, + ): + if norm != 1 and norm != 2: + raise Exception(f'{self.__class__.__name__} currently supports l1- and l2-based regularization') + + if isinstance(structure_type, str): + structure_type = SUPPORTED_STRUCTURES(structure_type) + + if not isinstance(structure_type, SUPPORTED_STRUCTURES): + raise Exception(f'{self.__class__.__name__} unknown regularization type') + + # Block pruning is only supported for Dense and QDense layers + if structure_type == SUPPORTED_STRUCTURES.BLOCK: + raise Exception('Block pruning is supported for 2-dimensional weight matrices') + + self.alpha = alpha + self.beta = beta + self.norm = norm + self.structure_type = structure_type + self.pattern_offset = pattern_offset + self.consecutive_patterns = consecutive_patterns + + @tf.function + def __call__(self, weights): + if len(weights.shape) != 4: + raise Exception(f'{self.__class__.__name__} regularizes Conv2D layers; weight matrix is not 4-dimensional') + + if self.structure_type == SUPPORTED_STRUCTURES.UNSTRUCTURED: + sparse_penalty = self.alpha * tf.norm(weights, ord=self.norm) + variance_penalty = self.beta * tf.math.reduce_variance(weights) + return sparse_penalty + variance_penalty + + if self.structure_type == SUPPORTED_STRUCTURES.STRUCTURED: + sparse_penalty = self.alpha * tf.norm( + tf.reduce_sum(tf.norm(weights, axis=(0, 1), ord='fro'), axis=0), ord=self.norm + ) + variance_penalty = self.beta * tf.norm(tf.math.reduce_variance(weights, axis=(0, 1, 2)), ord=self.norm) + return sparse_penalty + variance_penalty + + if self.structure_type == SUPPORTED_STRUCTURES.PATTERN: + if (np.prod(weights.shape)) % self.pattern_offset != 0: + raise Exception(f'{self.__class__.__name__}: pattern offset needs to be a factor of matrix size') + + if self.pattern_offset % self.consecutive_patterns != 0: + raise Exception(f'{self.__class__.__name__}: consecutive patterns need to be a factor of pattern offset') + + number_of_patterns = np.prod(weights.shape) // self.pattern_offset + target_shape = (self.pattern_offset, number_of_patterns) + reshaped = tf.reshape(tf.transpose(weights, (3, 0, 1, 2)), target_shape) + + total_blocks = self.pattern_offset // self.consecutive_patterns + blocks = tf.reshape( + tf.image.extract_patches( + tf.expand_dims(tf.expand_dims(reshaped, 2), 0), + [1, self.consecutive_patterns, number_of_patterns, 1], + [1, self.consecutive_patterns, number_of_patterns, 1], + [1, 1, 1, 1], + 'SAME', + ), + (total_blocks, -1), + ) + + sparse_penalty = self.alpha * tf.norm(tf.norm(blocks, axis=1, ord=2), ord=self.norm) + variance_penalty = self.beta * tf.norm(tf.math.reduce_variance(blocks, axis=1), ord=self.norm) + return sparse_penalty + variance_penalty + + def get_config(self): + return { + 'alpha': self.alpha, + 'beta': self.beta, + 'norm': self.norm, + 'structure_type': self.structure_type, + 'pattern_offset': self.pattern_offset, + 'consecutive_patterns': self.consecutive_patterns, + } diff --git a/hls4ml/optimization/keras/utils.py b/hls4ml/optimization/keras/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..eaf34f41fc4213d6a2eac651c83cd7f5833deaf3 --- /dev/null +++ b/hls4ml/optimization/keras/utils.py @@ -0,0 +1,107 @@ +import numpy as np +import tensorflow as tf + + +@tf.function +def get_model_gradients(model, loss_fn, X, y): + ''' + Calculate model gradients with respect to weights + + Args: + - model (keras.model): Input model + - loss_fn (keras.losses.Loss): Model loss function + - X (np.array): Input data + - y (np.array): Output data + + Return: + - grads (dict): Per-layer gradients of loss with respect to weights + ''' + grads = {} + # While persistent GradientTape slows down execution, + # Is faster than performing forward pass and non-persistent for every layer + with tf.GradientTape(persistent=True) as tape: + output = model(X, training=True) + loss_value = loss_fn(y, output) + + for layer in model.layers: + if len(layer.trainable_weights) > 0: + grads[layer.name] = tape.gradient(loss_value, layer.kernel) + + return grads + + +@tf.function +def get_model_hessians(model, loss_fn, X, y): + ''' + Calculate the second derivatives of the loss with repsect to model weights + Note, only diagonal elements of the Hessian are computed + + Args: + - model (keras.model): Input model + - loss_fn (keras.losses.Loss): Model loss function + - X (np.array): Input data + - y (np.array): Output data + + Return: + - grads (dict): Per-layer second derivatives of loss with respect to weights + ''' + grads = {} + with tf.GradientTape(persistent=True) as tape: + output = model(X, training=False) + loss_value = loss_fn(y, output) + + for layer in model.layers: + if hasattr(layer, 'kernel'): + grads[layer.name] = tape.gradient(tape.gradient(loss_value, layer.kernel), layer.kernel) + + return grads + + +def get_model_sparsity(model): + ''' + Calculate total and per-layer model sparsity + + Args: + - model (keras.model): Model to be evaluated + + Return: + - sparsity (float): Model sparsity, as a percentage of zero weights w.r.t to total number of model weights + - layers (dict): Key-value dictionary; each key is a layer name and the associated value is the layer's sparsity + + TODO - Extend support for recurrent layers (reccurent_kernel) + ''' + + total_weights = 0 + zero_weights = 0 + layer_sparsity = {} + + for layer in model.layers: + if hasattr(layer, 'kernel'): + weights = layer.get_weights()[0].flatten() + total_weights = total_weights + len(weights) + zero_weights = zero_weights + len(weights) - np.count_nonzero(weights) + layer_sparsity[layer.name] = 1.0 - np.count_nonzero(weights) / len(weights) + + try: + return zero_weights / total_weights, layer_sparsity + except ZeroDivisionError: + return 0.0, layer_sparsity + + +# TODO - Does this work for non-linear models (e.g. skip connections) ? +def get_last_layer_with_weights(model): + ''' + Finds the last layer with weights + The last layer with weights determined the output shape, so, pruning is sometimes not applicable to it + As an example, consider a network with 16 - 32 - 5 neurons - the last layer's neuron (5) cannot be removed + Since they map to the data labels + Args: + - model (keras.model): Input model + + Return: + - idx (int): Index location of last layer with params + ''' + for idx, layer in reversed(list(enumerate(model.layers))): + if hasattr(layer, 'kernel'): + return idx + return len(model.layers) diff --git a/hls4ml/optimization/knapsack.py b/hls4ml/optimization/knapsack.py new file mode 100644 index 0000000000000000000000000000000000000000..e198945b1b5c18f1fff711264b3638551d5a8fa8 --- /dev/null +++ b/hls4ml/optimization/knapsack.py @@ -0,0 +1,262 @@ +import sys +import time + +import numpy as np + + +def solve_knapsack(values, weights, capacity, implementation='CBC_MIP', **kwargs): + ''' + A function for solving the Knapsack problem + + Args: + - values (np.array, float): A one-dimensional array, where each entry is the value of an item + - weights (np.array, int): An matrix, each row represents the weights of every item, in a given knapsack + - capacity (np.array, int): A one-dimensional array, each entry is the maximum weights of a Knapsack + - implementation (string): Algorithm to solve Knapsack problem - dynamic programming, greedy, branch and bound + + Kwargs: + - time_limit (float): Limit (in seconds) after which the CBC or Branch & Bound should + stop looking for a solution and return optimal so far + - scaling_factor (float): Scaling factor for floating points values in CBC or B&B + + Return: + - optimal_value (float): The optimal values of elements in the knapsack + - selected_items (list): A list of indices, corresponding to the selected elements + + Notes: + - The general formulation of the Knapsack problem for N items and M knapsacks is: + max v.T @ x + s.t. A @ x <= W + v ~ (N, 1) x ~ (N, 1) A ~ (M, N) W ~ (M, 1) + x_{i, j} = {0, 1} and <= is the generalized, element-wise inequlaity for vectors + + - Supported implementations: + - Dynamic programming: + - Optimal solution + - Time complexity: O(nW) + - Suitable for single-dimensional constraints and a medium number of items, with integer weights + - Branch and bound: + - Optimal + - Solved using Google OR-Tools + - Suitable for multi-dimensional constraints and a large number of items + - Branch and bound: + - Solution sub-optimal, but often better than greeedy + - Solved using Google OR-Tools, with the CBC MIP Solver + - Suitable for multi-dimensional constraints and a very high number of items + - Greedy: + - Solution sub-optimal + - Time complexity: O(mn) + - Suitable for highly dimensional constraints or a very high number of items + + - Most implementations require integer values of weights and capacities; + For pruning & weight sharing this is never a problem + In case non-integer weights and capacities are requires, + All of the values should be scaled by an appropriate scaling factor + ''' + if implementation not in ('dynamic', 'greedy', 'branch_bound', 'CBC_MIP'): + raise Exception('Unknown algorithm for solving Knapsack') + + if len(values.shape) != 1: + raise Exception( + 'Current implementations of Knapsack optimization support single-objective problems. \ + Values must be one-dimensional' + ) + + if len(weights.shape) != 2: + raise Exception( + 'Current implementation of Knapsack assumes weight vector is 2-dimensional,' + 'to allow for multi-dimensional Knapsack problem. \ + If solving a one-dimensional Knapsack problem, extend dimensions of weights to a one-row matrix' + ) + + if values.shape[0] != weights.shape[1]: + raise Exception('Uneven number of items and weights') + + if not (np.all(values >= 0) and np.all(weights >= 0)): + raise Exception('Current implementation of Knapsack problem requires non-negative values and weights') + + if not np.all(np.equal(np.mod(capacity, 1), 0)) or not np.all(np.equal(np.mod(weights, 1), 0)): + raise Exception('Current implementation of Knapsack problem requires integer weights and capacities') + + print(f'Starting to solve Knapsack problem with {values.shape[0]} variables and a weight constraint of {capacity}') + start = time.time() + + # Special case, empty list + if values.shape[0] == 0: + return 0, [] + + # Special case, the sum of all weights is less than al the capacity constraints + if np.all(np.sum(weights, axis=1) <= capacity): + return np.sum(values), list(range(0, values.shape[0])) + + # Special case, all the item weights per knapsack are equal, so we can greedily select the ones with the highest value + if np.all([weights[i, :] == weights[i, 0] for i in range(weights.shape[0])]): + return __solve_knapsack_equal_weights(values, weights, capacity) + + # General cases + if implementation == 'dynamic': + if weights.shape[0] == 1: + optimal_value, selected_items = __solve_1d_knapsack_dp(values, weights[0], capacity[0]) + else: + raise Exception('Solving Knapsack with dynamic programming requires single-dimensional constraints') + elif implementation == 'branch_bound': + optimal_value, selected_items = __solve_knapsack_branch_and_bound(values, weights, capacity, **kwargs) + elif implementation == 'CBC_MIP': + optimal_value, selected_items = __solve_knapsack_cbc_mip(values, weights, capacity, **kwargs) + else: + optimal_value, selected_items = __solve_knapsack_greedy(values, weights, capacity) + + print(f'Time taken to solve Knapsack {time.time() - start}s') + return optimal_value, selected_items + + +def __solve_1d_knapsack_dp(values, weights, capacity): + ''' + Helper function to solve the 1-dimensional Knapsack problem exactly through dynamic programming + The dynamic programming approach is only suitable for one-dimensional weight constraints + Furthermore, it has a high computational complexity and it is not suitable for highly-dimensional arrays + NOTE: The weights and corresponding weight constraint need to be integers; + If not, the they should be scaled and rounded beforehand + ''' + assert len(weights.shape) == 1 + + # Build look-up table in bottom-up approach + N = values.shape[0] + K = [[0 for w in range(capacity + 1)] for i in range(N + 1)] + for i in range(1, N + 1): + for w in range(1, capacity + 1): + if weights[i - 1] <= w: + K[i][w] = max(values[i - 1] + K[i - 1][w - weights[i - 1]], K[i - 1][w]) + else: + K[i][w] = K[i - 1][w] + + # Reverse Knapsack to find selected groups + i = N + w = capacity + res = K[N][capacity] + selected = [] + while i >= 0 and res > 0: + if res == K[i - 1][w]: + pass + else: + selected.append(i - 1) + res = res - values[i - 1] + w = w - weights[i - 1] + i = i - 1 + + return K[N][capacity], selected + + +def __solve_knapsack_greedy(values, weights, capacity): + ''' + Helper function that solves the n-dimensional Knapsack algorithm with a greedy algorithm + The greedy approach should only be used for problems with many items or highly dimensional weights + The solution can [and often will] be sub-optimal; otherwise, dynamic programming, branch & bound etc. should be used + ''' + + # For each item, calculate the value per weight ratio (this can be thought of as item efficiency) + # The weights are scaled for every dimension, to avoid inherent bias towards large weights in a single dimension + weights_rescaled = weights / np.max(weights, axis=1)[:, np.newaxis] + ratios = values / weights_rescaled.sum(axis=0) + indices = np.argsort(ratios) + + # Greedily select item with the highest ratio (efficiency) + optimal = 0 + selected = [] + accum = np.zeros_like(capacity) + for i in reversed(indices): + if np.all((accum + weights[:, i]) <= capacity): + selected.append(i) + optimal += values[i] + accum += weights[:, i] + else: + break + + # The greedy algorithm can be sub-optimal; + # However, selecting the above elements or the next element that could not fit into the knapsack + # Will lead to solution that is at most (1/2) of the optimal solution; + # Therefore, take whichever is higher and satisfies the constraints + if values[i] > optimal and np.all(weights[:, i]) <= capacity: + return values[i], [i] + else: + return optimal, selected + + +def __solve_knapsack_branch_and_bound(values, weights, capacity, time_limit=sys.float_info.max, scaling_factor=10e4): + ''' + Helper function to solve Knapsack problem using Branch and Bound; + Implemented using Google OR-Tools [weights & capacities need to be integers] + The algorithm explores the search space (a tree of all the posible combinations, 2^N nodes), + But discards infeasible & sub-optimal solutions + + Additional args: + - time_limit - Time limit in seconds + After which B&B search should stop and return a sub-optimal solution + - scaling_factor - Factor to scale floats in values arrays; + OR-Tools requires all values & weights to be integers; + ''' + try: + from ortools.algorithms import pywrapknapsack_solver + except ModuleNotFoundError: + raise Exception('OR-Tools not found. Please insteal Google OR-Tools from pip.') + + solver = pywrapknapsack_solver.KnapsackSolver( + pywrapknapsack_solver.KnapsackSolver.KNAPSACK_MULTIDIMENSION_BRANCH_AND_BOUND_SOLVER, 'BB' + ) + solver.set_time_limit(time_limit) + solver.Init((values * scaling_factor).astype(int).tolist(), weights.astype(int).tolist(), capacity.astype(int).tolist()) + optimal = solver.Solve() + selected = [i for i in range(values.shape[0]) if solver.BestSolutionContains(i)] + return optimal / scaling_factor, selected + + +def __solve_knapsack_cbc_mip(values, weights, capacity, time_limit=sys.float_info.max, scaling_factor=10e4): + ''' + Helper function to solve Knapsack problem using the CBC MIP solver using Google OR-Tools + + Additional args: + - time_limit - Time limit (seconds) after which CBC solver should stop and return a sub-optimal solution + - scaling_factor - Factor to scale floats in values arrays; + OR-Tools requires all values & weights to be integers; + So all of the values are scaled by a large number + ''' + try: + from ortools.algorithms import pywrapknapsack_solver + except ModuleNotFoundError: + raise Exception('OR-Tools not found. Please insteal Google OR-Tools from pip.') + + solver = pywrapknapsack_solver.KnapsackSolver( + pywrapknapsack_solver.KnapsackSolver.KNAPSACK_MULTIDIMENSION_CBC_MIP_SOLVER, 'CBC' + ) + solver.set_time_limit(time_limit) + solver.Init((values * scaling_factor).astype(int).tolist(), weights.astype(int).tolist(), capacity.astype(int).tolist()) + optimal = solver.Solve() + selected = [i for i in range(values.shape[0]) if solver.BestSolutionContains(i)] + return optimal / scaling_factor, selected + + +def __solve_knapsack_equal_weights(values, weights, capacity): + ''' + Helper function that solves the n-dimensional Knapsack algorithm with a greedy algorithm + The assumption is that all the items have the same weight; while this seems a bit artificial + It occurs often in pruning - e.g. in pattern pruning, each DSP block saves one DSP; however, as a counter-example + In structured pruning, each structure can save a different amount of FLOPs (Conv2D filter vs Dense neuron) + ''' + assert np.all([weights[i, :] == weights[i, 0] for i in range(weights.shape[0])]) + + # Find items with the highest value + indices = np.argsort(values) + + # Greedily select item with the highest ratio + optimal = 0 + selected = [] + accum = np.zeros_like(capacity) + for i in reversed(indices): + if np.all((accum + weights[:, i]) <= capacity): + selected.append(i) + optimal += values[i] + accum += weights[:, i] + else: + break + + return optimal, selected diff --git a/hls4ml/optimization/objectives/__init__.py b/hls4ml/optimization/objectives/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..05d916d85fdb9b9ac9151661756b34d872cefd8a --- /dev/null +++ b/hls4ml/optimization/objectives/__init__.py @@ -0,0 +1,142 @@ +import logging +from abc import ABC, abstractmethod + +import numpy as np + +from hls4ml.optimization.attributes import OptimizationAttributes +from hls4ml.optimization.config import SUPPORTED_STRUCTURES + +''' +Pruning & weight sharing are formulated as an optimization problem, with the aim of minimising some metric +Metrics can include: total number of weights, DSP utilization, latency, FLOPs etc. +''' + + +class ObjectiveEstimator(ABC): + ''' + Abstract class with methods for estimating the utilization and savings of a certain layer, with respect to some objective + For each objective, an inherited class is written with the correct implementaton of the below methods + The objectives can be multi-dimensional, e.g. DSPs and BRAM + Care needs to be taken when optimizing several objectives, especially if conflicting + ''' + + @abstractmethod + def is_layer_optimizable(self, layer_attributes): + ''' + For a given layer, checks whether optimizations make sense, with respect to the given objective(s) + Furthermore, it returns the type of optimization (structured, unstructured etc.) + Most suitable for minimising the objective(s). + + Args: + - layer_attributes (hls4ml.optimiation.attributes.LayerAttributes) + + Return: + - optimizable (boolean) - can optimizations be applied to this layer + - optimization_attributes (hls4ml.optimiation.attributes.OptimizationAttributes) - + Most suitable approach for optimization + + Examples: + - Metric = Total weights, Layer = Dense, shape = (4, 4) -> return True, unstructured + - Metric = DSP, Layer = Dense, Precision = ap_fixed<8, 0> -> return False + (Vivado doesn't use DSP when precision < 9) + - Metric = DSP, Layer = Dense, Precision = ap_fixed<16, 6> -> + return True, pattern structure, both pruning and weight sharing + ''' + pass + + @abstractmethod + def layer_resources(self, layer_attributes): + ''' + For a given layer, how many units of the metric are used, given a generic weight matrix + + Args: + - layer_attributes (hls4ml.optimiation.attributes.LayerAttributes) + + Return: + - resources (list, int) - total resources (w.r.t every dimension of the objective) used + + Example: Metric = Total weights, Layer = Dense, shape = (4, 4) -> return [16] [regardless of layer sparsity] + ''' + pass + + @abstractmethod + def layer_savings(self, layer_attributes): + ''' + For a given layer, how many units of the metric are saved, when optimizing one structure + The structure type, alongside its parameters (e.g. block shape) are stored in layer attributes + For best results, OptimizationAttributes in layer_attribtues should be obtained from is_layer_optimizable + + Args: + - layer_attributes (hls4ml.optimiation.attributes.LayerAttributes) + + Return: + - savings (list, int) - savings achieved (one for every dimenson of objective) + With OptimizationAttributes from layer_attributes + + Example: Metric = Total weights, Layer = Dense, shape = (4, 4): + - structure_type == unstructured -> return [1] + - structure_type == structured -> return [4] + ''' + pass + + +''' +A class containing objective estimation with the goal of minimizing +The number of non-zero weights in a layer [corresponds to unstructured pruning] +''' + + +class ParameterEstimator(ObjectiveEstimator): + @classmethod + def is_layer_optimizable(self, layer_attributes): + if not layer_attributes.weight_shape: + return False, None + else: + return True, OptimizationAttributes(SUPPORTED_STRUCTURES.UNSTRUCTURED, pruning=True, weight_sharing=False) + + @classmethod + def layer_resources(self, layer_attributes): + if not layer_attributes.weight_shape: + return [0] + else: + return [np.prod(layer_attributes.weight_shape)] + + @classmethod + def layer_savings(self, layer_attributes): + if not layer_attributes.weight_shape: + return [0] + + structure_type = layer_attributes.optimization_attributes.structure_type + pruning = layer_attributes.optimization_attributes.pruning + weight_sharing = layer_attributes.optimization_attributes.weight_sharing + + if weight_sharing: + logging.warn( + 'Weight sharing does not decrease the number of parameters. \ + It is recommened to use the default attributes, returned from is_layer_optimizable(...)' + ) + return [0] + + if not pruning: + logging.warn( + 'Pruning needs to be enabled to decrease the number of parameters. \ + It is recommened to use the default attributes, returned from is_layer_optimizable(...)' + ) + return [0] + + # In this case, pruning = True and weight_sharing = False, + # So calculate savings incurred by removing a group of weights + if structure_type == SUPPORTED_STRUCTURES.UNSTRUCTURED: + return [1] + elif structure_type == SUPPORTED_STRUCTURES.STRUCTURED: + if 'Dense' in layer_attributes.layer_type.__name__ or 'Conv2D' in layer_attributes.layer_type.__name__: + return [np.prod(layer_attributes.weight_shape[:-1])] + else: + raise Exception('Unknown layer encountered when estimating parameter savings.') + elif structure_type == SUPPORTED_STRUCTURES.PATTERN: + number_of_patterns = ( + np.prod(layer_attributes.weight_shape) // layer_attributes.optimization_attributes.pattern_offset + ) + return [number_of_patterns * layer_attributes.optimization_attributes.consecutive_patterns] + elif structure_type == SUPPORTED_STRUCTURES.BLOCK: + return [np.prod(layer_attributes.optimization_attributes.block_shape)] diff --git a/hls4ml/optimization/objectives/gpu_objectives.py b/hls4ml/optimization/objectives/gpu_objectives.py new file mode 100644 index 0000000000000000000000000000000000000000..8528a318398cb365b4a856e6c82e8c961846d13d --- /dev/null +++ b/hls4ml/optimization/objectives/gpu_objectives.py @@ -0,0 +1,81 @@ +import logging + +import numpy as np + +from hls4ml.optimization.attributes import OptimizationAttributes +from hls4ml.optimization.config import SUPPORTED_STRUCTURES +from hls4ml.optimization.objectives import ObjectiveEstimator + + +class GPUFLOPEstimator(ObjectiveEstimator): + @classmethod + def is_layer_optimizable(self, layer_attributes): + if not layer_attributes.weight_shape: + return False, None + else: + return True, OptimizationAttributes( + structure_type=SUPPORTED_STRUCTURES.STRUCTURED, pruning=True, weight_sharing=False + ) + + @classmethod + def layer_resources(self, layer_attributes): + if not layer_attributes.weight_shape: + return [0] + else: + if 'Dense' in layer_attributes.layer_type.__name__: + return [2 * np.prod(layer_attributes.weight_shape) + layer_attributes.weight_shape[1]] + elif 'Conv2D' in layer_attributes.layer_type.__name__: + return [ + 2 + * np.prod(layer_attributes.weight_shape) + * layer_attributes.output_shape[0] + * layer_attributes.output_shape[1] + + layer_attributes.weight_shape[3] + ] + else: + raise Exception('Unknown layer encountered when estimating FLOP utilization.') + + @classmethod + def layer_savings(self, layer_attributes): + if not layer_attributes.weight_shape: + return [0] + + structure_type = layer_attributes.optimization_attributes.structure_type + pruning = layer_attributes.optimization_attributes.pruning + weight_sharing = layer_attributes.optimization_attributes.weight_sharing + + if weight_sharing: + logging.warn( + 'Weight sharing does not decrease FLOPs. \ + It is recommened to use the default attributes, returned from is_layer_optimizable(...)' + ) + return [0] + + if not pruning: + logging.warn( + 'Pruning needs to be enabled to decrease FLOPs. \ + It is recommened to use the default attributes, returned from is_layer_optimizable(...)' + ) + return [0] + + # TODO - The below formulas underestimate FLOP savings + # Removing a filter in a layer removes channels / neurons in subsequent layers + if structure_type == SUPPORTED_STRUCTURES.STRUCTURED: + if 'Dense' in layer_attributes.layer_type.__name__: + return [2 * layer_attributes.weight_shape[0] + 1] + elif 'Conv2D' in layer_attributes.layer_type.__name__: + return [ + 2 + * np.prod(layer_attributes.weight_shape[0:3]) + * layer_attributes.output_shape[0] + * layer_attributes.output_shape[1] + + 1 + ] + else: + raise Exception('Unknown layer encountered when estimating FLOP savings.') + else: + logging.warn( + 'FLOP savings occur with structured pruning. \ + It is recommened to use the default attributes, returned from is_layer_optimizable(...)' + ) + return [0] diff --git a/hls4ml/optimization/objectives/vivado_objectives.py b/hls4ml/optimization/objectives/vivado_objectives.py new file mode 100644 index 0000000000000000000000000000000000000000..c0c0c33e09df8ab6e95920236b97e19c6a91d838 --- /dev/null +++ b/hls4ml/optimization/objectives/vivado_objectives.py @@ -0,0 +1,372 @@ +import logging +import math + +import numpy as np + +from hls4ml.optimization.attributes import OptimizationAttributes +from hls4ml.optimization.config import SUPPORTED_STRUCTURES +from hls4ml.optimization.objectives import ObjectiveEstimator + + +# Optimizes DSP utilisation for Vivado backend +class VivadoDSPEstimator(ObjectiveEstimator): + @classmethod + def is_layer_optimizable(self, layer_attributes): + if not layer_attributes.weight_shape or layer_attributes.args['hls4ml_attributes'].weight_precision.width < 9: + return False, None + else: + if layer_attributes.args['hls4ml_attributes'].strategy.lower() == 'resource': + return True, OptimizationAttributes( + SUPPORTED_STRUCTURES.PATTERN, + pruning=True, + weight_sharing=False, + pattern_offset=np.prod(layer_attributes.weight_shape) + // layer_attributes.args['hls4ml_attributes'].reuse_factor, + consecutive_patterns=1, + ) + else: + return True, OptimizationAttributes(SUPPORTED_STRUCTURES.UNSTRUCTURED, pruning=True, weight_sharing=False) + + @classmethod + def layer_resources(self, layer_attributes): + if not layer_attributes.weight_shape or layer_attributes.args['hls4ml_attributes'].weight_precision.width < 9: + return [0] + else: + # TOOD - Extend for parallelisation factor + return [np.prod(layer_attributes.weight_shape) // layer_attributes.args['hls4ml_attributes'].reuse_factor] + + @classmethod + def layer_savings(self, layer_attributes): + if not layer_attributes.weight_shape or layer_attributes.args['hls4ml_attributes'].weight_precision.width < 9: + return [0] + + # TODO - Once we know how to implement constant coefficient multiplication via LUT, enable for weight sharing + pruning = layer_attributes.optimization_attributes.pruning + if not pruning: + logging.warn( + 'Pruning needs to be enabled to decrease the number of DSPs used. \ + It is recommened to use the default attributes, returned from is_layer_optimizable(...)' + ) + return [0] + + structure_type = layer_attributes.optimization_attributes.structure_type + if layer_attributes.args['hls4ml_attributes'].strategy.lower() == 'latency': + if layer_attributes.args['hls4ml_attributes'].reuse_factor == 1: + return [1] + else: + return [0] + else: + if structure_type == SUPPORTED_STRUCTURES.UNSTRUCTURED: + if layer_attributes.args['hls4ml_attributes'].reuse_factor == 1: + return [1] + else: + return [0] + elif structure_type == SUPPORTED_STRUCTURES.STRUCTURED: + if ( + layer_attributes.args['hls4ml_attributes'].reuse_factor + == layer_attributes.args['hls4ml_attributes'].n_in + ): + return [1] + else: + return [0] + elif structure_type == SUPPORTED_STRUCTURES.PATTERN: + pattern_offset = layer_attributes.optimization_attributes.pattern_offset + number_of_patterns = np.prod(layer_attributes.weight_shape) // pattern_offset + + if number_of_patterns == layer_attributes.args['hls4ml_attributes'].reuse_factor: + return [layer_attributes.optimization_attributes.consecutive_patterns] + else: + return [0] + elif structure_type == SUPPORTED_STRUCTURES.BLOCK: + logging.warn('hls4ml does not support block sparsity patterns...setting layer savings to zero') + return [0] + + +# Optimizes BRAM and DSP for Vivado backend +class VivadoMultiObjectiveEstimator(ObjectiveEstimator): + @classmethod + def is_layer_optimizable(self, layer_attributes): + if not layer_attributes.weight_shape: + return False, None + + if ( + layer_attributes.args['hls4ml_attributes'].strategy.lower() == 'resource' + and layer_attributes.args['hls4ml_attributes'].reuse_factor > 1 + ): + if 36 % layer_attributes.args['hls4ml_attributes'].weight_precision.width == 0: + consecutive_patterns = int(36 // layer_attributes.args['hls4ml_attributes'].weight_precision.width) + else: + consecutive_patterns = int( + math.ceil(2 * 36 / layer_attributes.args['hls4ml_attributes'].weight_precision.width) + ) + + return True, OptimizationAttributes( + SUPPORTED_STRUCTURES.PATTERN, + pruning=True, + weight_sharing=False, + pattern_offset=int( + np.prod(layer_attributes.weight_shape) // layer_attributes.args['hls4ml_attributes'].reuse_factor + ), + consecutive_patterns=consecutive_patterns, + ) + else: + return True, OptimizationAttributes(SUPPORTED_STRUCTURES.UNSTRUCTURED, pruning=True, weight_sharing=False) + + @classmethod + def layer_resources(self, layer_attributes): + if not layer_attributes.weight_shape: + return [0] + + # TOOD - Extend for parallelisation factor + if layer_attributes.args['hls4ml_attributes'].strategy.lower() == 'latency': + return [ + int(np.prod(layer_attributes.weight_shape) // layer_attributes.args['hls4ml_attributes'].reuse_factor), + 0, + ] + else: + # Resource strategy, RF == 1 is similar to Latency strategy (but slower) + if layer_attributes.args['hls4ml_attributes'].reuse_factor == 1: + return [ + int(np.prod(layer_attributes.weight_shape) // layer_attributes.args['hls4ml_attributes'].reuse_factor), + 0, + ] + else: + # For RF > 1, BRAM utilised by weights can be estimated by (bit_width * n_in * n_out) / (RF * 36) + return [ + int(np.prod(layer_attributes.weight_shape) // layer_attributes.args['hls4ml_attributes'].reuse_factor), + int( + math.ceil( + np.prod(layer_attributes.weight_shape) + * layer_attributes.args['hls4ml_attributes'].weight_precision.width + / (layer_attributes.args['hls4ml_attributes'].reuse_factor * 36) + ) + ), + ] + + @classmethod + def layer_savings(self, layer_attributes): + if not layer_attributes.weight_shape: + return [0] + + # TODO - Once we know how to implement constant coefficient multiplication via LUT, enable for weight sharing + pruning = layer_attributes.optimization_attributes.pruning + if not pruning: + logging.warn( + 'Pruning needs to be enabled to decrease the number of DSPs used. \ + It is recommened to use the default attributes, returned from is_layer_optimizable(...)' + ) + return [0] + + structure_type = layer_attributes.optimization_attributes.structure_type + if layer_attributes.args['hls4ml_attributes'].strategy.lower() == 'latency': + if layer_attributes.args['hls4ml_attributes'].reuse_factor == 1: + return [1, 0] + else: + return [0, 0] + else: + if ( + layer_attributes.args['hls4ml_attributes'].strategy.lower() == 'resource' + and layer_attributes.args['hls4ml_attributes'].reuse_factor == 1 + ): + return [1, 0] + else: + if structure_type == SUPPORTED_STRUCTURES.PATTERN: + pattern_offset = layer_attributes.optimization_attributes.pattern_offset + consecutive_patterns = layer_attributes.optimization_attributes.consecutive_patterns + weight_precision = layer_attributes.args['hls4ml_attributes'].weight_precision.width + + number_of_patterns = np.prod(layer_attributes.weight_shape) // pattern_offset + saved_one_bram_block = ( + 36 == consecutive_patterns * weight_precision and 36 % weight_precision == 0 + ) or (72 == consecutive_patterns * weight_precision) + + if ( + number_of_patterns == layer_attributes.args['hls4ml_attributes'].reuse_factor + and saved_one_bram_block + ): + return [consecutive_patterns, 1] + else: + logging.warn('Support for multi-objective optimisation is not fully implemented yet....') + return [0, 0] + else: + logging.warn('Support for multi-objective optimisation is not fully implemented yet....') + return [0, 0] + + +class VivadoFFEstimator(ObjectiveEstimator): + @classmethod + def is_layer_optimizable(self, layer_attributes): + if not layer_attributes.weight_shape: + return False, None + + # Resource strategy and I/O type io_stream store both weights and activation tensors in BRAM; skipping + if ( + layer_attributes.args['hls4ml_attributes'].io_type == 'io_stream' + and layer_attributes.args['hls4ml_attributes'].strategy.lower() == 'resource' + ): + logging.warn('FFs are at minimum utilization with io_stream and Resource strategy') + return False, None + + # With io_stream in Latency, weight are stored in FFs, so unstructured pruning will benefit the most + if ( + layer_attributes.args['hls4ml_attributes'].io_type == 'io_stream' + and layer_attributes.args['hls4ml_attributes'].strategy.lower() == 'latency' + ): + return True, OptimizationAttributes(SUPPORTED_STRUCTURES.UNSTRUCTURED, pruning=True, weight_sharing=False) + + # In io_parallel with Resource, weights are stored in BRAM but activation tensors in FFs, + # So structured pruning is the most suitable, it reduces the size out output before compile-time + if ( + layer_attributes.args['hls4ml_attributes'].io_type == 'io_parallel' + and layer_attributes.args['hls4ml_attributes'].strategy.lower() == 'resource' + ): + return True, OptimizationAttributes(SUPPORTED_STRUCTURES.STRUCTURED, pruning=True, weight_sharing=False) + + # In io_parallel with Latency, weights and activation tensors are all stored in FFs, + # So it is equivalent to unstructured, high sparsity pruning + if ( + layer_attributes.args['hls4ml_attributes'].io_type == 'io_parallel' + and layer_attributes.args['hls4ml_attributes'].strategy.lower() == 'latency' + ): + return True, OptimizationAttributes(SUPPORTED_STRUCTURES.UNSTRUCTURED, pruning=True, weight_sharing=False) + + # TODO - This method is inaccurate (accross all cases); in general, estimating FFs is hard, + # But as long as it is consistent(ly wrong), it should not matter for the pruning + @classmethod + def layer_resources(self, layer_attributes): + if not layer_attributes.weight_shape: + return [0] + + # Resource strategy and I/O type io_stream store both weights and activation tensors in BRAM; minimal FF utilization + if ( + layer_attributes.args['hls4ml_attributes'].io_type == 'io_stream' + and layer_attributes.args['hls4ml_attributes'].strategy.lower() == 'resource' + ): + return [0] + + # With io_stream in Latency, weight are stored in FFs, so FF ~ number_of_weights x weight_precision + if ( + layer_attributes.args['hls4ml_attributes'].io_type == 'io_stream' + and layer_attributes.args['hls4ml_attributes'].strategy.lower() == 'latency' + ): + return [ + np.prod(layer_attributes.weight_shape) * layer_attributes.args['hls4ml_attributes'].weight_precision.width + ] + + # In io_parallel with Resource, weights are stored in BRAM but activation tensors in FFs, + # So FF ~ number_of_outputs x weight_precision + if ( + layer_attributes.args['hls4ml_attributes'].io_type == 'io_parallel' + and layer_attributes.args['hls4ml_attributes'].strategy.lower() == 'resource' + ): + return [ + np.prod(layer_attributes.output_shape) * layer_attributes.args['hls4ml_attributes'].output_precision.width + ] + + # In io_parallel with Latency, weights and latency are all stored in FFs, + # So it is equivalent to the sum of the above two cases + if ( + layer_attributes.args['hls4ml_attributes'].io_type == 'io_parallel' + and layer_attributes.args['hls4ml_attributes'].strategy.lower() == 'latency' + ): + return [ + np.prod(layer_attributes.weight_shape) * layer_attributes.args['hls4ml_attributes'].weight_precision.width + + np.prod(layer_attributes.output_shape) * layer_attributes.args['hls4ml_attributes'].output_precision.width + ] + + @classmethod + def layer_savings(self, layer_attributes): + if not layer_attributes.weight_shape: + return [0] + + structure_type = layer_attributes.optimization_attributes.structure_type + pruning = layer_attributes.optimization_attributes.pruning + weight_sharing = layer_attributes.optimization_attributes.weight_sharing + + if weight_sharing: + logging.warn( + 'Weight sharing does not decrease the number of parameters. \ + It is recommened to use the default attributes, returned from is_layer_optimizable(...)' + ) + return [0] + + if not pruning: + logging.warn( + 'Pruning needs to be enabled to decrease the number of parameters. \ + It is recommened to use the default attributes, returned from is_layer_optimizable(...)' + ) + return [0] + + # Resource strategy and I/O type io_stream store both weights and activation tensors in BRAM; minimal FF utilization + if ( + layer_attributes.args['hls4ml_attributes'].io_type == 'io_stream' + and layer_attributes.args['hls4ml_attributes'].strategy.lower() == 'resource' + ): + return [0] + + # With io_stream in Latency, weight are stored in FFs, so any type of pruning will help: + if ( + layer_attributes.args['hls4ml_attributes'].io_type == 'io_stream' + and layer_attributes.args['hls4ml_attributes'].strategy.lower() == 'latency' + ): + if structure_type == SUPPORTED_STRUCTURES.UNSTRUCTURED: + return [layer_attributes.args['hls4ml_attributes'].weight_precision.width] + elif structure_type == SUPPORTED_STRUCTURES.STRUCTURED: + return [ + layer_attributes.args['hls4ml_attributes'].n_in + * layer_attributes.args['hls4ml_attributes'].weight_precision.width + ] + elif structure_type == SUPPORTED_STRUCTURES.PATTERN: + number_of_patterns = ( + np.prod(layer_attributes.weight_shape) // layer_attributes.optimization_attributes.pattern_offset + ) + return [ + number_of_patterns + * layer_attributes.optimization_attributes.consecutive_patterns + * layer_attributes.args['hls4ml_attributes'].weight_precision.width + ] + elif structure_type == SUPPORTED_STRUCTURES.BLOCK: + return [ + np.prod(layer_attributes.optimization_attributes.block_shape) + * layer_attributes.args['hls4ml_attributes'].weight_precision.width + ] + + # In io_parallel with Resource, weights are stored in BRAM but activation tensors in FFs, + # So only structured pruning helps + if ( + layer_attributes.args['hls4ml_attributes'].io_type == 'io_parallel' + and layer_attributes.args['hls4ml_attributes'].strategy.lower() == 'resource' + ): + if structure_type == SUPPORTED_STRUCTURES.STRUCTURED: + return [layer_attributes.args['hls4ml_attributes'].output_precision.width] + else: + return [0] + + # In io_parallel with Latency, weights and latency are all stored in FFs, so any type of pruning helps + if ( + layer_attributes.args['hls4ml_attributes'].io_type == 'io_parallel' + and layer_attributes.args['hls4ml_attributes'].strategy.lower() == 'latency' + ): + # This is a significant under-estimate, as some savings are incurred due to less intermediate results + if structure_type == SUPPORTED_STRUCTURES.UNSTRUCTURED: + return [layer_attributes.args['hls4ml_attributes'].weight_precision.width] + elif structure_type == SUPPORTED_STRUCTURES.STRUCTURED: + return [ + layer_attributes.args['hls4ml_attributes'].n_in + * layer_attributes.args['hls4ml_attributes'].weight_precision.width + + layer_attributes.args['hls4ml_attributes'].output_precision.width + ] + elif structure_type == SUPPORTED_STRUCTURES.PATTERN: + number_of_patterns = ( + np.prod(layer_attributes.weight_shape) // layer_attributes.optimization_attributes.pattern_offset + ) + return [ + number_of_patterns + * layer_attributes.optimization_attributes.consecutive_patterns + * layer_attributes.args['hls4ml_attributes'].weight_precision.width + ] + elif structure_type == SUPPORTED_STRUCTURES.BLOCK: + return [ + np.prod(layer_attributes.optimization_attributes.block_shape) + * layer_attributes.args['hls4ml_attributes'].weight_precision.width + ] diff --git a/hls4ml/optimization/scheduler.py b/hls4ml/optimization/scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..347427d5dd9f27511d4342f8f1303417360b5a50 --- /dev/null +++ b/hls4ml/optimization/scheduler.py @@ -0,0 +1,149 @@ +from abc import ABC, abstractmethod + + +class OptimizationScheduler(ABC): + ''' + Baseline class handling logic regarding target sparsity and its updates at every step + ''' + + def __init__(self, initial_sparsity=0, final_sparsity=1): + ''' + intial_sparsity and final_sparsity are between 0.0 and 1.0, NOT 0% and 100% + ''' + if initial_sparsity < 0 or initial_sparsity > 1: + raise Exception('intial_sparsity must be between 0.0 and 1.0') + if final_sparsity < 0 or final_sparsity > 1: + raise Exception('final_sparsity must be between 0.0 and 1.0') + self.sparsity = initial_sparsity + self.lower_bound = initial_sparsity + self.upper_bound = final_sparsity + + @abstractmethod + def update_step(self): + ''' + Increments the current sparsity, according to the rule, examples: + - ConstantScheduler, sparsity = 0.5, increment = 0.05 -> sparsity = 0.55 + - BinaryScheduler, sparsity = 0.5, target = 1.0 -> sparsity = 0.75 + + Return: + - updated (boolean) - Has the sparsity changed? If not, the optimization algorithm can stop + - sparsity (float) - Updated sparsity + ''' + pass + + @abstractmethod + def repair_step(self): + ''' + Method used when the neural architecture does not meet satisfy performance requirement for a given sparsity + Then, the target sparsity is decreased according to the rule, examples: + - ConstantScheduler, sparsity = 0.5, increment = 0.05 -> sparsity = 0.55 [see ConstantScheduler for explanation] + - BinaryScheduler, sparsity = 0.75, target = 1.0, previous = 0.5 -> sparsity = (0.5 + 0.75) / 2 = 0.625 + + Return: + - updated (boolean) - Has the sparsity changed? If not, the optimization algorithm can stop + - sparsity (float) - Updated sparsity + ''' + pass + + def get_sparsity(self): + return self.sparsity + + +class ConstantScheduler(OptimizationScheduler): + ''' + Sparsity updated by a constant term, until + (i) sparsity target reached OR + (ii) optimization algorithm stops requesting state updates + ''' + + def __init__(self, initial_sparsity=0, final_sparsity=1.0, update_step=0.05): + self.increment = update_step + super().__init__(initial_sparsity, final_sparsity) + + def update_step(self): + if self.sparsity + self.increment <= self.upper_bound: + self.sparsity += self.increment + return True, self.sparsity + else: + return False, self.sparsity + + ''' + In certain cases, a model might underperform at the current sparsity level, + But perform better at a higher sparsity + In this case, constant sparsity (since it increments by a small amount every time), + Will simply jump to the next sparsity level + The model's performance over several sparsity levels optimization is tracked and S + Stopped after high loss over several trials (see top level pruning/optimization function) + + ''' + + def repair_step(self): + return self.update_step() + + +class BinaryScheduler(OptimizationScheduler): + ''' + Sparsity updated by binary halving the search space; constantly updates lower and upper bounds + In the update step, sparsity is incremented, as the midpoint between previous sparsity and target sparsity (upper bound) + In the repair step, sparsity is decrement, as the midpoint between between the lower bound and previous sparsity + ''' + + def __init__(self, initial_sparsity=0, final_sparsity=1.0, threshold=0.01): + self.threshold = threshold + super().__init__(initial_sparsity, final_sparsity) + + def update_step(self): + if self.upper_bound - self.sparsity >= self.threshold: + self.lower_bound = self.sparsity + self.sparsity = 0.5 * (self.lower_bound + self.upper_bound) + return True, self.sparsity + else: + self.lower_bound = self.sparsity + return False, self.sparsity + + def repair_step(self): + if self.sparsity - self.lower_bound >= self.threshold: + self.upper_bound = self.sparsity + self.sparsity = 0.5 * (self.lower_bound + self.upper_bound) + return True, self.sparsity + else: + self.upper_bound = self.sparsity + return False, self.sparsity + + +class PolynomialScheduler(OptimizationScheduler): + ''' + Sparsity updated by at a polynomial decay, until + (i) sparsity target reached OR + (ii) optimization algorithm stops requesting state updates + For more information, see Zhu & Gupta (2016) - + 'To prune, or not to prune: exploring the efficacy of pruning for model compression' + Note, the implementation is slightly different, + Since TensorFlow Prune API depends on the total number of epochs and update frequency + ''' + + def __init__(self, maximum_steps, initial_sparsity=0, final_sparsity=1.0, decay_power=3): + self.decay_power = decay_power + self.current_step = 0 + self.maximum_steps = maximum_steps + super().__init__(initial_sparsity, final_sparsity) + + def update_step(self): + if self.current_step < self.maximum_steps: + self.current_step += 1 + self.sparsity = self.upper_bound + (self.lower_bound - self.upper_bound) * ( + (1 - self.current_step / self.maximum_steps) ** self.decay_power + ) + return True, self.sparsity + else: + return False, self.sparsity + + ''' + In certain cases, a model might underperform at the current sparsity level, but perform better at a higher sparsity + In this case, polynomial sparsity, will simply jump to the next sparsity level + The model's performance over several sparsity levels optimization is tracked and + toped after high loss over several trials (see top level pruning/optimization function) + ''' + + def repair_step(self): + return self.update_step() diff --git a/setup.cfg b/setup.cfg index d442412c622a8ce452a0a200f411ff7d2d167491..24e78e4bd680493e7aaceb6d6825a1670e710818 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,6 +40,10 @@ pytest_randomly.random_seeder = hls4ml = hls4ml:reseed [options.extras_require] +optimization = + keras-surgeon@git+https://github.com/fastmachinelearning/keras-surgeon.git + ortools==9.4.1874 + packaging profiling = matplotlib pandas diff --git a/test/pytest/ci-template.yml b/test/pytest/ci-template.yml index bbe8df4944f892031bda1767545afb13cf0a8173..5477da933a3af62cefbb488be27d89831b9348f9 100644 --- a/test/pytest/ci-template.yml +++ b/test/pytest/ci-template.yml @@ -7,7 +7,7 @@ - source ~/.bashrc - if [ $EXAMPLEMODEL == 1 ]; then git submodule init; git submodule update; fi - conda activate hls4ml-testing - - pip install .[testing,sr] + - pip install .[testing,sr,optimization] script: - cd test/pytest - pytest $PYTESTFILE -rA --cov-report xml --cov-report term --cov=hls4ml --junitxml=report.xml --randomly-seed=42 --randomly-dont-reorganize --randomly-dont-reset-seed diff --git a/test/pytest/generate_ci_yaml.py b/test/pytest/generate_ci_yaml.py index 6d816a7abbef94fd8fa629b8c1026e019de4fa18..46c1fa9e5b8c2f6e77798ef49dfcedd07f630ab7 100644 --- a/test/pytest/generate_ci_yaml.py +++ b/test/pytest/generate_ci_yaml.py @@ -32,5 +32,26 @@ for test in tests: else: yml.update(new_yml) +# hls4ml Optimization API +tests = glob.glob('test_optimization/test_*.py') +for test in tests: + name = test.replace('test_optimization/', '').replace('test_', '').replace('.py', '') + new_yml = yaml.safe_load(template.format(name, f'test_optimization/test_{name}.py', int(uses_example_model(test)))) + if yml is None: + yml = new_yml + else: + yml.update(new_yml) + +tests = glob.glob('test_optimization/test_keras/test_*.py') +for test in tests: + name = test.replace('test_optimization/test_keras/', '').replace('test_', '').replace('.py', '') + new_yml = yaml.safe_load( + template.format(name, f'test_optimization/test_keras/test_{name}.py', int(uses_example_model(test))) + ) + if yml is None: + yml = new_yml + else: + yml.update(new_yml) + yamlfile = open('pytests.yml', 'w') yaml.safe_dump(yml, yamlfile) diff --git a/test/pytest/test_optimization/test_attributes.py b/test/pytest/test_optimization/test_attributes.py new file mode 100644 index 0000000000000000000000000000000000000000..2669321e09d5e3853ef44a0596029ec3c3407533 --- /dev/null +++ b/test/pytest/test_optimization/test_attributes.py @@ -0,0 +1,96 @@ +from tensorflow.keras.layers import Conv2D, Dense, Flatten, ReLU +from tensorflow.keras.models import Sequential + +from hls4ml.optimization.attributes import get_attributes_from_keras_model_and_hls4ml_config +from hls4ml.utils.config import config_from_keras_model + + +def test_attributes(): + dense_units = 16 + conv_filters = 6 + conv_channels = 3 + conv_shape = (3, 3) + input_shape = (8, 8) + io_type = 'io_parallel' + strategy = 'Resource' + + model = Sequential() + model.add( + Conv2D( + conv_filters, + input_shape=(*input_shape, conv_channels), + kernel_size=conv_shape, + name='conv2d', + padding='same', + kernel_initializer='ones', + ) + ) + model.add(Flatten(name='flatten')) + model.add(Dense(dense_units, name='dense', kernel_initializer='ones')) + model.add(ReLU(name='relu')) + + default_reuse_factor = 2 + default_precision = 'ap_fixed<8, 0>' + cfg = config_from_keras_model( + model, granularity='name', default_precision=default_precision, default_reuse_factor=default_reuse_factor + ) + cfg['IOType'] = io_type + cfg['Model']['Strategy'] = strategy + cfg['LayerName']['dense']['ReuseFactor'] = 1 + + # Verify correct information for every layer + model_attributes = get_attributes_from_keras_model_and_hls4ml_config(model, cfg) + assert len(model_attributes) == 4 + + # conv2d + assert model_attributes['conv2d'].name == 'conv2d' + assert model_attributes['conv2d'].layer_type.__name__ == 'Conv2D' + assert model_attributes['conv2d'].inbound_layers == [] + assert model_attributes['conv2d'].weight_shape == (3, 3, 3, 6) + assert model_attributes['conv2d'].input_shape == (8, 8, 3) + assert model_attributes['conv2d'].output_shape == (8, 8, 6) + assert not model_attributes['conv2d'].optimizable + assert model_attributes['conv2d'].args['hls4ml_attributes'].n_in == 9 + assert model_attributes['conv2d'].args['hls4ml_attributes'].n_out == 6 + assert model_attributes['conv2d'].args['hls4ml_attributes'].io_type == io_type + assert model_attributes['conv2d'].args['hls4ml_attributes'].strategy == strategy + assert model_attributes['conv2d'].args['hls4ml_attributes'].reuse_factor == default_reuse_factor + assert model_attributes['conv2d'].args['hls4ml_attributes'].weight_precision.width == 8 + assert model_attributes['conv2d'].args['hls4ml_attributes'].parallelization_factor == 1 + + # flatten + assert model_attributes['flatten'].name == 'flatten' + assert model_attributes['flatten'].layer_type.__name__ == 'Flatten' + assert model_attributes['flatten'].weight_shape == () + assert model_attributes['flatten'].input_shape == (8, 8, 6) + assert model_attributes['flatten'].output_shape == (384,) + assert not model_attributes['flatten'].optimizable + + # Flatten is not optimizable so hls4mlAttributes (n_in, n_out, reuse factor etc.) will not be stored for it + assert 'hls4ml_attributes' not in model_attributes['flatten'].args + + # dense + assert model_attributes['dense'].name == 'dense' + assert model_attributes['dense'].layer_type.__name__ == 'Dense' + assert model_attributes['dense'].weight_shape == (384, 16) + assert model_attributes['dense'].input_shape == (384,) + assert model_attributes['dense'].output_shape == (16,) + assert not model_attributes['dense'].optimizable + assert model_attributes['dense'].args['hls4ml_attributes'].n_in == 384 + assert model_attributes['dense'].args['hls4ml_attributes'].n_out == 16 + assert model_attributes['dense'].args['hls4ml_attributes'].io_type == io_type + assert model_attributes['dense'].args['hls4ml_attributes'].strategy == strategy + assert model_attributes['dense'].args['hls4ml_attributes'].reuse_factor == 1 + assert model_attributes['dense'].args['hls4ml_attributes'].output_precision.width == 8 + assert model_attributes['dense'].args['hls4ml_attributes'].parallelization_factor == 1 + + # relu + assert model_attributes['relu'].name == 'relu' + assert model_attributes['relu'].layer_type.__name__ == 'ReLU' + assert model_attributes['relu'].weight_shape == () + assert model_attributes['relu'].input_shape == (16,) + assert model_attributes['relu'].output_shape == (16,) + assert not model_attributes['relu'].optimizable + + # ReLU is not optimizable so hls4mlAttributes (n_in, n_out, reuse factor etc.) will not be stored for it + assert 'hls4ml_attributes' not in model_attributes['relu'].args diff --git a/test/pytest/test_optimization/test_keras/test_masking.py b/test/pytest/test_optimization/test_keras/test_masking.py new file mode 100644 index 0000000000000000000000000000000000000000..5c5e60aca76bea4e6d003f247283aef0680ad19a --- /dev/null +++ b/test/pytest/test_optimization/test_keras/test_masking.py @@ -0,0 +1,451 @@ +import numpy as np +import pytest +from qkeras import QConv2D, QDense +from tensorflow.keras.layers import Conv2D, Dense, Flatten +from tensorflow.keras.models import Sequential + +from hls4ml.optimization.attributes import get_attributes_from_keras_model +from hls4ml.optimization.config import SUPPORTED_STRUCTURES +from hls4ml.optimization.keras.masking import get_model_masks +from hls4ml.optimization.objectives import ParameterEstimator + +''' +In all the tests, an artifical network with one Dense/Conv2D layer and pre-determined weights is created +Then, the tests assert zeros occur in the correct places, based on the masking structure (unstructured, block etc.) +Furthermore, tests assert the masks are binary, so only zeros and ones occur +Masking is such that: + * non_zero_params <= (1 - sparsity) * total_params OR + * zero_params > sparsity * total_params +Since the targetted objective is ParameterEstimator, weight sharing is not suitable [does not decrease the number of weights] +Therefore, all the test verify offsets are zero +''' +sparsity = 0.33 +local_masking = [True, False] +dense_layers = [Dense, QDense] +conv2d_layers = [Conv2D, QConv2D] + + +# Create a Dense layer with artificial weights, so that (1, 1) and (2, 3) (matrix indexing) are pruned +@pytest.mark.parametrize('local_masking', local_masking) +@pytest.mark.parametrize('dense', dense_layers) +def test_dense_masking_unstructured(local_masking, dense): + weight_shape = (2, 3) + model = Sequential() + model.add(dense(weight_shape[1], input_shape=(weight_shape[0],), name='dense')) + model.add(Dense(1, name='out')) + + weights = model.layers[0].get_weights() + weights[0][0, 0] = 1e-6 + weights[0][1, 2] = 1e-6 + model.layers[0].set_weights(weights) + + model_attributes = get_attributes_from_keras_model(model) + model_attributes['dense'].optimizable = True + model_attributes['dense'].optimization_attributes.pruning = True + model_attributes['dense'].optimization_attributes.structure_type = SUPPORTED_STRUCTURES.UNSTRUCTURED + + # 33% sparsity - zero 2 out of 6 blocks with lowest norm [0.33 * 6 = 1.98 -> next largest int is 2] + masks, offsets = get_model_masks(model, model_attributes, sparsity, ParameterEstimator, metric='l1', local=local_masking) + zeros = np.array([[0, 0], [1, 2]], dtype=np.int32) + nonzeros = np.stack(np.where(masks['dense'] != 0), axis=1) + + assert not np.any(offsets['dense']) + assert not np.any(masks['dense'][zeros[:, 0], zeros[:, 1]]) + assert (weight_shape[0] * weight_shape[1]) == (zeros.shape[0] + nonzeros.shape[0]) + + +# Create a Dense layer with artificial weights, so that the 1st and 3rd column (neuron) are pruned +@pytest.mark.parametrize('local_masking', local_masking) +@pytest.mark.parametrize('dense', dense_layers) +def test_dense_masking_structured(local_masking, dense): + weight_shape = (3, 6) + model = Sequential() + model.add(dense(weight_shape[1], input_shape=(weight_shape[0],), name='dense')) + model.add(Dense(1, name='out')) + + weights = model.layers[0].get_weights() + weights[0][:, 0] = 1e-6 + weights[0][:, 2] = 1e-6 + model.layers[0].set_weights(weights) + + model_attributes = get_attributes_from_keras_model(model) + model_attributes['dense'].optimizable = True + model_attributes['dense'].optimization_attributes.pruning = True + model_attributes['dense'].optimization_attributes.structure_type = SUPPORTED_STRUCTURES.STRUCTURED + + # 33% sparsity - zero 2 out of 6 blocks with lowest norm [0.33 * 6 = 1.98 -> next largest int is 2] + masks, offsets = get_model_masks(model, model_attributes, sparsity, ParameterEstimator, metric='l1', local=local_masking) + zeros = np.array( + [ + [0, 0], + [1, 0], + [2, 0], # First neuron + [0, 2], + [1, 2], + [2, 2], # Third neuron + ], + dtype=np.int32, + ) + nonzeros = np.stack(np.where(masks['dense'] != 0), axis=1) + + assert not np.any(offsets['dense']) + assert not np.any(masks['dense'][zeros[:, 0], zeros[:, 1]]) + assert (weight_shape[0] * weight_shape[1]) == (zeros.shape[0] + nonzeros.shape[0]) + + +# Create a Dense layer with artificial weights, so that some patterns are pruned +# Set pattern offset to 4, which is equivalent to RF = 3 [4 * 3 / 4] +# In this case consecutive patterns are one, so pruning per DSP block +@pytest.mark.parametrize('local_masking', local_masking) +@pytest.mark.parametrize('dense', dense_layers) +def test_dense_masking_pattern(local_masking, dense): + weight_shape = (3, 4) + model = Sequential() + model.add(dense(weight_shape[1], input_shape=(weight_shape[0],), name='dense')) + model.add(Dense(1, name='out')) + + weights = model.layers[0].get_weights() + + # Set 1st block low + weights[0][0, 0] = 1e-6 + weights[0][1, 0] = 1e-6 + weights[0][2, 0] = 1e-6 + + # Set 3rd block low + weights[0][0, 2] = 1e-6 + weights[0][1, 2] = 1e-6 + weights[0][2, 2] = 1e-6 + + model.layers[0].set_weights(weights) + + model_attributes = get_attributes_from_keras_model(model) + model_attributes['dense'].optimizable = True + model_attributes['dense'].optimization_attributes.pruning = True + model_attributes['dense'].optimization_attributes.structure_type = SUPPORTED_STRUCTURES.PATTERN + model_attributes['dense'].optimization_attributes.pattern_offset = 4 + model_attributes['dense'].optimization_attributes.consecutive_patterns = 1 + + # 33% sparsity - zero 4 from 12 weights, group by pattern [0.33 * 12 = 3.96] - so will select 2 patterns, 6 weights (>=) + masks, offsets = get_model_masks(model, model_attributes, sparsity, ParameterEstimator, metric='l1', local=local_masking) + zeros = np.array([[0, 0], [1, 0], [2, 0], [0, 2], [1, 2], [2, 2]], dtype=np.int32) + nonzeros = np.stack(np.where(masks['dense'] != 0), axis=1) + + assert not np.any(offsets['dense']) + assert not np.any(masks['dense'][zeros[:, 0], zeros[:, 1]]) + assert (weight_shape[0] * weight_shape[1]) == (zeros.shape[0] + nonzeros.shape[0]) + + +# Create a Dense layer with artificial weights, so that the 1st and 4th block are pruned +@pytest.mark.parametrize('local_masking', local_masking) +@pytest.mark.parametrize('dense', dense_layers) +@pytest.mark.skip( + reason='Currently disabled as no benefits from block pruning are achieved for hls4ml.' +) # TODO - Enable when fully tested +def test_dense_masking_block(local_masking, dense): + weight_shape = (4, 6) + model = Sequential() + model.add(dense(weight_shape[1], input_shape=(weight_shape[0],), name='dense')) + model.add(Dense(1, name='out')) + + weights = model.layers[0].get_weights() + + # Set 1st block low + weights[0][0, 0] = 1e-6 + weights[0][0, 1] = 1e-6 + weights[0][1, 0] = 1e-6 + weights[0][1, 2] = 1e-6 + + # Set 4th block low + weights[0][2, 2] = 1e-6 + weights[0][2, 3] = 1e-6 + weights[0][3, 2] = 1e-6 + weights[0][3, 3] = 1e-6 + + model.layers[0].set_weights(weights) + + model_attributes = get_attributes_from_keras_model(model) + model_attributes['dense'].optimizable = True + model_attributes['dense'].optimization_attributes.pruning = True + model_attributes['dense'].optimization_attributes.structure_type = SUPPORTED_STRUCTURES.BLOCK + model_attributes['dense'].optimization_attributes.block_shape = (2, 2) + + # 33% sparsity - zero 2 out of 6 blocks with lowest norm + # The first block is the smallest, the fourth block is set to zero + masks, offsets = get_model_masks(model, model_attributes, sparsity, ParameterEstimator, metric='l1', local=local_masking) + zeros = np.array([[0, 0], [0, 1], [1, 0], [1, 1], [2, 2], [2, 3], [3, 2], [3, 3]], dtype=np.int32) + nonzeros = np.stack(np.where(masks['dense'] != 0), axis=1) + + assert not np.any(offsets['dense']) + assert not np.any(masks['dense'][zeros[:, 0], zeros[:, 1]]) + assert (weight_shape[0] * weight_shape[1]) == (zeros.shape[0] + nonzeros.shape[0]) + + +# Create a Conv2D layer with artificial weights and mask some small weights +# Target sparsity is 0.33, so there should be <= (1 - 0.33) * 16 = 10.72 non-zero params +@pytest.mark.parametrize('local_masking', local_masking) +@pytest.mark.parametrize('conv2d', conv2d_layers) +def test_conv2d_masking_unstructured(local_masking, conv2d): + filt_width = 2 + filt_height = 2 + n_channels = 2 + n_filters = 2 + + model = Sequential() + model.add(conv2d(n_filters, input_shape=(8, 8, n_channels), kernel_size=(filt_width, filt_height), name='conv2d')) + model.add(Flatten()) + model.add(Dense(1, name='out')) + + weights = model.layers[0].get_weights() + weights[0][0, 0, 0, 0] = 1e-6 + weights[0][1, 0, 0, 0] = 1e-6 + weights[0][0, 1, 0, 0] = 1e-6 + weights[0][0, 0, 1, 0] = 1e-6 + weights[0][0, 0, 0, 1] = 1e-6 + weights[0][1, 1, 1, 1] = 1e-6 + model.layers[0].set_weights(weights) + + model_attributes = get_attributes_from_keras_model(model) + model_attributes['conv2d'].optimizable = True + model_attributes['conv2d'].optimization_attributes.pruning = True + model_attributes['conv2d'].optimization_attributes.structure_type = SUPPORTED_STRUCTURES.UNSTRUCTURED + + masks, offsets = get_model_masks(model, model_attributes, sparsity, ParameterEstimator, metric='l1', local=local_masking) + zeros = np.array([[0, 0, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1], [1, 1, 1, 1]], dtype=np.int32) + nonzeros = np.stack(np.where(masks['conv2d'] != 0), axis=1) + + assert not np.any(offsets['conv2d']) + assert not np.any(masks['conv2d'][zeros[:, 0], zeros[:, 1], zeros[:, 2], zeros[:, 3]]) + assert (filt_width * filt_height * n_channels * n_filters) == (zeros.shape[0] + nonzeros.shape[0]) + + +# Create a Conv2D layer with artificial weights, so that second and last filter are pruned +@pytest.mark.parametrize('local_masking', local_masking) +@pytest.mark.parametrize('conv2d', conv2d_layers) +def test_conv2d_masking_structured(local_masking, conv2d): + filt_width = 3 + filt_height = 3 + n_channels = 4 + n_filters = 6 + + model = Sequential() + model.add(conv2d(n_filters, input_shape=(8, 8, n_channels), kernel_size=(filt_width, filt_height), name='conv2d')) + model.add(Flatten()) + model.add(Dense(1, name='out')) + + weights = model.layers[0].get_weights() + weights[0][:, :, :, 1] = 1e-3 + weights[0][:, :, :, 5] = 1e-3 + model.layers[0].set_weights(weights) + + model_attributes = get_attributes_from_keras_model(model) + model_attributes['conv2d'].optimizable = True + model_attributes['conv2d'].optimization_attributes.pruning = True + model_attributes['conv2d'].optimization_attributes.structure_type = SUPPORTED_STRUCTURES.STRUCTURED + + # 33% sparsity - zero 2 out of 6 filters with lowest norm + # Generate all possible combinations of width and height pixels with channel using np.meshgrid() + # This represents all the positions for a single filter; then append filter position to the last columns + masks, offsets = get_model_masks(model, model_attributes, sparsity, ParameterEstimator, metric='l1', local=local_masking) + width_pixels = np.array(range(0, filt_width)) + height_pixels = np.array(range(0, filt_height)) + channels = np.array(range(0, n_channels)) + combinations = np.array(np.meshgrid(width_pixels, height_pixels, channels)).T.reshape(-1, 3) + zeros = np.array( + np.append(combinations, np.full((filt_width * filt_height * n_channels, 1), 1), axis=1).tolist() + + np.append(combinations, np.full((filt_width * filt_height * n_channels, 1), 5), axis=1).tolist(), + dtype=np.int32, + ) + nonzeros = np.stack(np.where(masks['conv2d'] != 0), axis=1) + + assert not np.any(offsets['conv2d']) + assert not np.any(masks['conv2d'][zeros[:, 0], zeros[:, 1], zeros[:, 2], zeros[:, 3]]) + assert (filt_width * filt_height * n_channels * n_filters) == (zeros.shape[0] + nonzeros.shape[0]) + + +# Create a Conv2D layer with artificial weights, so that the first and second pattern are pruned +# Set pattern offset to 4, which is equivalent to RF = 2 [2 * 2 * 2 / 4] +@pytest.mark.parametrize('local_masking', local_masking) +@pytest.mark.parametrize('conv2d', conv2d_layers) +def test_conv2d_masking_pattern(local_masking, conv2d): + filt_width = 2 + filt_height = 1 + n_channels = 2 + n_filters = 2 + + model = Sequential() + model.add(conv2d(n_filters, input_shape=(8, 8, n_channels), kernel_size=(filt_width, filt_height), name='conv2d')) + model.add(Flatten()) + model.add(Dense(1, name='out')) + + weights = model.layers[0].get_weights() + + # Set the first DSP block to be approximately zero + weights[0][0, 0, 0, 0] = 1e-6 + weights[0][0, 0, 1, 0] = 1e-6 + + # Set the third DSP block to be approximately zero + weights[0][0, 0, 0, 1] = 1e-6 + weights[0][0, 0, 1, 1] = 1e-6 + + model.layers[0].set_weights(weights) + + model_attributes = get_attributes_from_keras_model(model) + model_attributes['conv2d'].optimizable = True + model_attributes['conv2d'].optimization_attributes.pruning = True + model_attributes['conv2d'].optimization_attributes.structure_type = SUPPORTED_STRUCTURES.PATTERN + model_attributes['conv2d'].optimization_attributes.pattern_offset = 4 + + # 33% sparsity - zero out the two of the lowest groups + masks, offsets = get_model_masks(model, model_attributes, sparsity, ParameterEstimator, metric='l1', local=local_masking) + print(masks['conv2d'].shape) + print(weights[0].shape) + zeros = np.array([[0, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1], [0, 0, 1, 1]], dtype=np.int32) + nonzeros = np.stack(np.where(masks['conv2d'] != 0), axis=1) + + assert not np.any(offsets['conv2d']) + assert not np.any(masks['conv2d'][zeros[:, 0], zeros[:, 1], zeros[:, 2], zeros[:, 3]]) + assert (filt_width * filt_height * n_channels * n_filters) == (zeros.shape[0] + nonzeros.shape[0]) + + +# Block pruning is only allowed for 2-dimensional matrices, so assert a correct exception is raised when pruning with Conv2D +@pytest.mark.parametrize('local_masking', local_masking) +@pytest.mark.parametrize('conv2d', conv2d_layers) +def test_conv2d_block_masking_raises_exception(local_masking, conv2d): + model = Sequential() + model.add(conv2d(4, input_shape=(8, 8, 3), kernel_size=(3, 3), name='conv2d')) + model.add(Flatten()) + model.add(Dense(1, name='out')) + + model_attributes = get_attributes_from_keras_model(model) + model_attributes['conv2d'].optimizable = True + model_attributes['conv2d'].optimization_attributes.pruning = True + model_attributes['conv2d'].optimization_attributes.structure_type = SUPPORTED_STRUCTURES.BLOCK + + try: + get_model_masks(model, model_attributes, sparsity, ParameterEstimator, metric='l1', local=local_masking) + except Exception: + assert True + return + assert not True + + +# Test edge cases: 0% and 100% sparsity +# Test 50% sparsity with two layers +@pytest.mark.parametrize('s', [0, 0.5, 1]) +@pytest.mark.parametrize('local_masking', local_masking) +@pytest.mark.parametrize( + 'type', + [ + SUPPORTED_STRUCTURES.UNSTRUCTURED, + SUPPORTED_STRUCTURES.STRUCTURED, + SUPPORTED_STRUCTURES.PATTERN, + SUPPORTED_STRUCTURES.BLOCK, + ], +) +def test_multi_layer_masking(s, local_masking, type): + dense_units = 16 + conv_filters = 6 + conv_channels = 4 + conv_shape = (2, 2) # Using (2, 2) instead of (3, 3) as it's an even number of weights + input_shape = (8, 8) + + # Simple model, Conv2D weight shape (2, 2, 4, 6) and Dense weight shape (384, 16) + model = Sequential() + model.add( + Conv2D( + conv_filters, + input_shape=(*input_shape, conv_channels), + kernel_size=conv_shape, + name='conv2d', + padding='same', + kernel_initializer='ones', + ) + ) + model.add(Flatten()) + model.add(Dense(dense_units, name='dense', kernel_initializer='ones')) + + # Make 'dense' and 'conv2d' optimizable + model_attributes = get_attributes_from_keras_model(model) + model_attributes['dense'].optimizable = True + model_attributes['dense'].optimization_attributes.pruning = True + model_attributes['dense'].optimization_attributes.structure_type = type + model_attributes['dense'].optimization_attributes.pattern_offset = 1024 # Equivalent to RF = 6 (384 * 16 / 1024) + + model_attributes['conv2d'].optimizable = True + model_attributes['conv2d'].optimization_attributes.pruning = True + model_attributes['conv2d'].optimization_attributes.structure_type = ( + type if type != SUPPORTED_STRUCTURES.BLOCK else SUPPORTED_STRUCTURES.UNSTRUCTURED + ) + model_attributes['conv2d'].optimization_attributes.pattern_offset = 4 # Equivalent to RF = 4 (2 * 2 * 4 * 6 / 4) + + masks, offsets = get_model_masks(model, model_attributes, s, ParameterEstimator, metric='l1', local=local_masking) + if s == 1: # 100% sparsity - all masks are zero + print(np.count_nonzero(masks['dense'].flatten())) + assert not np.any(masks['dense']) + assert not np.any(masks['conv2d']) + elif s == 0.5: + conv2d_weights = conv_channels * conv_filters * np.prod(conv_shape) + dense_weights = dense_units * np.prod(input_shape) * conv_filters + if local_masking: + assert np.count_nonzero(masks['conv2d']) == int((1 - s) * conv2d_weights) + assert np.count_nonzero(masks['dense']) == int((1 - s) * dense_weights) + else: + # Less than or equal to, since Knapsack problem imposes a hard constrain on the active resources (ones) + assert np.count_nonzero(masks['conv2d']) + np.count_nonzero(masks['dense']) <= int( + (1 - s) * (conv2d_weights + dense_weights) + ) + elif s == 0: # 0% sparsity - all masks are one + assert np.all(masks['dense']) + assert np.all(masks['conv2d']) + + assert not np.any(offsets['dense']) + assert not np.any(offsets['conv2d']) + + +# Create a Dense layer with artificial weights, so that some consecutive patterns are pruned +# Set consecutive patterns to 2, so that the 1st block [1st and 2nd pattern] are pruned +@pytest.mark.parametrize('local_masking', local_masking) +@pytest.mark.parametrize('dense', dense_layers) +def test_consecutive_pattern_masking(local_masking, dense): + weight_shape = (3, 4) + model = Sequential() + model.add(dense(weight_shape[1], input_shape=(weight_shape[0],), name='dense')) + model.add(Flatten()) + model.add(Dense(1, name='out')) + weights = model.layers[0].get_weights() + + weights[0] = np.arange(np.prod(weight_shape)).reshape(weight_shape) + + # Set 1st and 2nd pattern low + weights[0][0, 0] = 1e-6 + weights[0][1, 0] = 1e-6 + weights[0][2, 0] = 1e-6 + weights[0][0, 1] = 1e-4 + weights[0][1, 1] = 1e-4 + weights[0][2, 1] = 1e-4 + + # Set 4th pattern lower than second + # This pattern should still remain unmasked [even if it has a lower value than the 2nd pattern], + # As its neigbouring block has a larger value than the 2nd pattern + weights[0][0, 3] = 1e-6 + weights[0][1, 3] = 1e-6 + weights[0][2, 3] = 1e-6 + + print(weights) + model.layers[0].set_weights(weights) + + model_attributes = get_attributes_from_keras_model(model) + model_attributes['dense'].optimizable = True + model_attributes['dense'].optimization_attributes.pruning = True + model_attributes['dense'].optimization_attributes.structure_type = SUPPORTED_STRUCTURES.PATTERN + model_attributes['dense'].optimization_attributes.pattern_offset = 4 + model_attributes['dense'].optimization_attributes.consecutive_patterns = 2 + + # 33% sparsity - zero 4 out of 12 weight, group by pattern [0.33 * 12 = 3.96] + masks, offsets = get_model_masks(model, model_attributes, sparsity, ParameterEstimator, metric='l1', local=local_masking) + zeros = np.array([[0, 0], [1, 0], [2, 0], [0, 1], [1, 1], [2, 1]], dtype=np.int32) + nonzeros = np.stack(np.where(masks['dense'] != 0), axis=1) + + assert not np.any(offsets['dense']) + assert not np.any(masks['dense'][zeros[:, 0], zeros[:, 1]]) + assert (weight_shape[0] * weight_shape[1]) == (zeros.shape[0] + nonzeros.shape[0]) diff --git a/test/pytest/test_optimization/test_keras/test_reduction.py b/test/pytest/test_optimization/test_keras/test_reduction.py new file mode 100644 index 0000000000000000000000000000000000000000..e07cbab656becec2c7558704da208d01bc929032 --- /dev/null +++ b/test/pytest/test_optimization/test_keras/test_reduction.py @@ -0,0 +1,144 @@ +import keras +import pytest +from packaging import version +from qkeras import QActivation, QConv2D, QDense, quantized_bits +from tensorflow.keras.layers import AveragePooling2D, BatchNormalization, Conv2D, Dense, Flatten, MaxPooling2D, ReLU, Softmax +from tensorflow.keras.models import Sequential + +from hls4ml.optimization.keras.reduction import reduce_model +from hls4ml.optimization.keras.utils import get_model_sparsity + +''' +Set some neurons / filters to zero and verify that these are removed +Even is some neurons (columns) in the output layer are zero, these should not be removed (to match data set labels) +Test verify the above property, by setting some zeros in the last layer and verifying these remain in place +''' + + +@pytest.mark.skipif( + version.parse(keras.__version__) > version.parse('2.12.0'), reason='Keras Surgeon only works until Keras 2.12' +) +def test_keras_model_reduction(): + model = Sequential() + model.add(Conv2D(8, (3, 3), input_shape=(64, 64, 1), name='conv2d_1', padding='same')) + model.add(MaxPooling2D()) + model.add(BatchNormalization()) + model.add(ReLU()) + model.add(Conv2D(32, (5, 5), padding='same', name='conv2d_2')) + model.add(AveragePooling2D()) + model.add(BatchNormalization()) + model.add(ReLU()) + model.add(Flatten()) + model.add(Dense(32, input_shape=(16,), name='dense_1', activation='relu')) + model.add(BatchNormalization()) + model.add(Dense(14, name='dense_2', activation='relu')) + model.add(BatchNormalization()) + model.add(Dense(5, name='dense_3')) + model.add(Softmax()) + + indices = { + 'conv2d_1': [2, 4, 7], + 'conv2d_2': [0, 1, 2, 3, 4, 5], + 'dense_1': [0, 5, 17, 28], + 'dense_2': [1, 9, 4], + 'dense_3': [3], + } + for layer in model.layers: + if isinstance(layer, Dense): + weights = layer.get_weights() + weights[0][:, indices[layer.name]] = 0 + layer.set_weights(weights) + if isinstance(layer, Conv2D): + weights = layer.get_weights() + weights[0][:, :, :, indices[layer.name]] = 0 + layer.set_weights(weights) + + sparsity, _ = get_model_sparsity(model) + assert sparsity > 0 + + reduced = reduce_model(model) + assert reduced.get_layer('conv2d_1').get_weights()[0].shape == (3, 3, 1, 5) + assert reduced.get_layer('conv2d_2').get_weights()[0].shape == (5, 5, 5, 26) + assert reduced.get_layer('dense_1').get_weights()[0].shape == (6656, 28) + assert reduced.get_layer('dense_2').get_weights()[0].shape == (28, 11) + assert reduced.get_layer('dense_3').get_weights()[0].shape == (11, 5) + + _, layer_sparsity = get_model_sparsity(reduced) + assert layer_sparsity['conv2d_1'] == 0 + assert layer_sparsity['conv2d_2'] == 0 + assert layer_sparsity['dense_1'] == 0 + assert layer_sparsity['dense_2'] == 0 + assert layer_sparsity['dense_3'] > 0 + + +@pytest.mark.skipif( + version.parse(keras.__version__) > version.parse('2.12.0'), reason='Keras Surgeon only works until Keras 2.12' +) +def test_qkeras_model_reduction(): + bits = 8 + activation = 'quantized_relu(4)' + quantizer = quantized_bits(bits, 0) + + model = Sequential() + model.add(QConv2D(8, (3, 3), input_shape=(64, 64, 1), name='qconv2d_1', padding='same', kernel_quantizer=quantizer)) + model.add(MaxPooling2D()) + model.add(BatchNormalization()) + model.add(QActivation(activation, name='qrelu_1')) + model.add(QConv2D(32, (5, 5), padding='same', name='qconv2d_2', kernel_quantizer=quantizer)) + model.add(AveragePooling2D()) + model.add(BatchNormalization()) + model.add(QActivation(activation, name='qrelu_2')) + model.add(Flatten()) + model.add(QDense(32, input_shape=(16,), name='qdense_1', kernel_quantizer=quantizer)) + model.add(QActivation(activation, name='qrelu_3')) + model.add(BatchNormalization()) + model.add(QDense(14, name='qdense_2', kernel_quantizer=quantizer)) + model.add(QActivation(activation, name='qrelu_4')) + model.add(BatchNormalization()) + model.add(QDense(5, name='qdense_3', kernel_quantizer=quantizer)) + model.add(Softmax()) + + indices = { + 'qconv2d_1': [2, 4, 7], + 'qconv2d_2': [0, 1, 2, 3, 4, 5], + 'qdense_1': [0, 5, 17, 28], + 'qdense_2': [1, 9, 4], + 'qdense_3': [3], + } + for layer in model.layers: + if isinstance(layer, QDense): + weights = layer.get_weights() + weights[0][:, indices[layer.name]] = 0 + layer.set_weights(weights) + if isinstance(layer, QConv2D): + weights = layer.get_weights() + weights[0][:, :, :, indices[layer.name]] = 0 + layer.set_weights(weights) + + sparsity, _ = get_model_sparsity(model) + assert sparsity > 0 + + reduced = reduce_model(model) + assert reduced.get_layer('qconv2d_1').get_weights()[0].shape == (3, 3, 1, 5) + assert reduced.get_layer('qconv2d_2').get_weights()[0].shape == (5, 5, 5, 26) + assert reduced.get_layer('qdense_1').get_weights()[0].shape == (6656, 28) + assert reduced.get_layer('qdense_2').get_weights()[0].shape == (28, 11) + assert reduced.get_layer('qdense_3').get_weights()[0].shape == (11, 5) + + _, layer_sparsity = get_model_sparsity(reduced) + assert layer_sparsity['qconv2d_1'] == 0 + assert layer_sparsity['qconv2d_2'] == 0 + assert layer_sparsity['qdense_1'] == 0 + assert layer_sparsity['qdense_2'] == 0 + assert layer_sparsity['qdense_3'] > 0 + + # Verify network surgery has no impact on quantization + assert isinstance(reduced.get_layer('qrelu_1'), QActivation) + assert isinstance(reduced.get_layer('qrelu_2'), QActivation) + assert isinstance(reduced.get_layer('qrelu_3'), QActivation) + assert isinstance(reduced.get_layer('qrelu_4'), QActivation) + assert reduced.get_layer('qconv2d_1').kernel_quantizer['config']['bits'] == bits + assert reduced.get_layer('qconv2d_2').kernel_quantizer['config']['bits'] == bits + assert reduced.get_layer('qdense_1').kernel_quantizer['config']['bits'] == bits + assert reduced.get_layer('qdense_2').kernel_quantizer['config']['bits'] == bits + assert reduced.get_layer('qdense_3').kernel_quantizer['config']['bits'] == bits diff --git a/test/pytest/test_optimization/test_keras/test_regularizers.py b/test/pytest/test_optimization/test_keras/test_regularizers.py new file mode 100644 index 0000000000000000000000000000000000000000..9fe518caae8e539bef232286f6a5603e17cddb57 --- /dev/null +++ b/test/pytest/test_optimization/test_keras/test_regularizers.py @@ -0,0 +1,176 @@ +import numpy as np +import pytest +import tensorflow as tf +from qkeras import QConv2D, QDense +from tensorflow.keras.layers import Conv2D, Dense, Flatten +from tensorflow.keras.models import Sequential +from tensorflow.keras.optimizers import Adam + +from hls4ml.optimization.config import SUPPORTED_STRUCTURES +from hls4ml.optimization.keras.builder import remove_custom_regularizers +from hls4ml.optimization.keras.regularizers import Conv2DRegularizer, DenseRegularizer + +# Constants +pattern_offset = 4 +consecutive_patterns = 2 +block_shape = (4, 4) + +dense_layers = [Dense, QDense] +conv2d_layers = [Conv2D, QConv2D] + + +# Sets the loss due to data to zero; train model only on regularization loss +def zero_loss(y_true, y_pred): + return tf.reduce_mean(0 * tf.square(y_true - y_pred), axis=-1) + + +# Helper function, calculates the group norm and variance for a single layer +def get_norm_and_variance(weights, structure_type, layer='dense'): + if structure_type == SUPPORTED_STRUCTURES.UNSTRUCTURED: + norm = np.linalg.norm(weights.flatten(), ord=1) + var = np.var(weights) + return norm, var + + if structure_type == SUPPORTED_STRUCTURES.STRUCTURED: + if layer == 'conv2d': + norm = np.linalg.norm(np.sum(np.linalg.norm(weights, axis=(0, 1), ord='fro'), axis=0), ord=1) + var = np.linalg.norm(np.var(weights, axis=(0, 1, 2)), ord=1) + else: + norm = np.linalg.norm(np.linalg.norm(weights, axis=0, ord=2), ord=1) + var = np.linalg.norm(np.var(weights, axis=0), ord=1) + + return norm, var + + if structure_type == SUPPORTED_STRUCTURES.PATTERN: + number_of_patterns = np.prod(weights.shape) // pattern_offset + target_shape = (number_of_patterns, pattern_offset) + reshaped = np.reshape(weights, target_shape) + total_blocks = pattern_offset // consecutive_patterns + blocks = np.reshape( + tf.image.extract_patches( + np.expand_dims(np.expand_dims(reshaped, 2), 0), + [1, number_of_patterns, consecutive_patterns, 1], + [1, number_of_patterns, consecutive_patterns, 1], + [1, 1, 1, 1], + 'SAME', + ).numpy(), + (total_blocks, number_of_patterns * consecutive_patterns), + ) + norm = np.linalg.norm(np.linalg.norm(blocks, axis=1, ord=2), ord=1) + var = np.linalg.norm(np.var(blocks, axis=1), ord=1) + return norm, var + + if structure_type == SUPPORTED_STRUCTURES.BLOCK: + total_blocks = (weights.shape[0] * weights.shape[1]) // (block_shape[0] * block_shape[1]) + blocks = np.reshape( + tf.image.extract_patches( + np.expand_dims(np.expand_dims(weights, 2), 0), + [1, block_shape[0], block_shape[1], 1], + [1, block_shape[0], block_shape[1], 1], + [1, 1, 1, 1], + 'SAME', + ).numpy(), + (total_blocks, block_shape[0] * block_shape[1]), + ) + + norm = np.linalg.norm(np.linalg.norm(blocks, axis=1, ord=2), ord=1) + var = np.linalg.norm(np.var(blocks, axis=1), ord=1) + return norm, var + + +@pytest.mark.parametrize('dense', dense_layers) +@pytest.mark.parametrize( + 'structure_type', + [ + SUPPORTED_STRUCTURES.UNSTRUCTURED, + SUPPORTED_STRUCTURES.STRUCTURED, + SUPPORTED_STRUCTURES.PATTERN, + SUPPORTED_STRUCTURES.BLOCK, + ], +) +def test_dense_regularizer(structure_type, dense): + epochs = 10 + data_points = 10 + input_shape = (32,) + output_shape = (16,) + X = np.random.rand(data_points, *input_shape) + y = np.random.rand(data_points, *output_shape) + w = np.random.rand(input_shape[0], output_shape[0]) + + # First, fit a model without regularization + model = Sequential() + model.add(dense(output_shape[0], input_shape=input_shape)) + dense_weights = model.layers[0].get_weights() + dense_weights[0] = w + model.layers[0].set_weights(dense_weights) + model.compile(loss=zero_loss, optimizer=Adam(1.0)) + model.fit(X, y, epochs=epochs) + norm, var = get_norm_and_variance(model.layers[0].get_weights()[0], structure_type) + + # Now, fit a model with strong regularization, starting with the same initial weights + dense_weights = model.layers[0].get_weights() + dense_weights[0] = w + model.layers[0].set_weights(dense_weights) + regularizer = DenseRegularizer( + alpha=0.5, beta=0.5, structure_type=structure_type, pattern_offset=pattern_offset, block_shape=block_shape + ) + model.layers[0].add_loss(lambda layer=model.layers[0]: regularizer(layer.kernel)) + model.compile(loss=zero_loss, optimizer=Adam(1.0)) + model.fit(X, y, epochs=epochs) + reg_norm, reg_var = get_norm_and_variance(model.layers[0].get_weights()[0], structure_type) + + # Verify regularization decreased weight magnitude and variance + assert reg_norm < norm + assert reg_var < var + + +@pytest.mark.parametrize('conv2d', conv2d_layers) +@pytest.mark.parametrize( + 'structure_type', [SUPPORTED_STRUCTURES.UNSTRUCTURED, SUPPORTED_STRUCTURES.STRUCTURED, SUPPORTED_STRUCTURES.PATTERN] +) +def test_conv2d_regularizer(structure_type, conv2d): + epochs = 10 + data_points = 10 + input_shape = (16, 16, 3) + num_filters = 4 + X = np.random.rand(data_points, *input_shape) + y = np.random.rand(data_points, 1) + + # First, fit a model without regularization + model = Sequential() + model.add(conv2d(num_filters, (3, 3), input_shape=input_shape)) + model.add(Flatten()) + model.add(Dense(1)) + conv_weights = model.layers[0].get_weights() + model.compile(loss=zero_loss, optimizer=Adam()) + model.fit(X, y, epochs=epochs, verbose=True) + norm, var = get_norm_and_variance(model.layers[0].get_weights()[0], structure_type, 'conv2d') + + # Now, fit a model with strong regularization, starting with the same initial weights + model.layers[0].set_weights(conv_weights) + regularizer = Conv2DRegularizer(alpha=0.5, beta=0.5, structure_type=structure_type, pattern_offset=pattern_offset) + model.layers[0].add_loss(lambda layer=model.layers[0]: regularizer(layer.kernel)) + model.compile(loss=zero_loss, optimizer=Adam()) + model.fit(X, y, epochs=epochs, verbose=True) + reg_norm, reg_var = get_norm_and_variance(model.layers[0].get_weights()[0], structure_type, 'conv2d') + + # Verify regularization decreased weight magnitude and variance + assert reg_norm < norm + assert reg_var < var + + +def test_removal_of_custom_regularizer(): + model = Sequential() + model.add(Conv2D(8, (3, 3), input_shape=(16, 16, 3), kernel_regularizer=Conv2DRegularizer(1e-3))) + model.add(Flatten()) + model.add(Dense(1, kernel_regularizer=DenseRegularizer(1e-3))) + weights = model.get_weights() + + assert isinstance(model.layers[0].kernel_regularizer, Conv2DRegularizer) + assert isinstance(model.layers[2].kernel_regularizer, DenseRegularizer) + + model = remove_custom_regularizers(model) + + assert model.layers[0].kernel_regularizer is None + for i in range(len(weights)): + assert np.all(weights[i] == model.get_weights()[i]) diff --git a/test/pytest/test_optimization/test_keras/test_weight_sharing.py b/test/pytest/test_optimization/test_keras/test_weight_sharing.py new file mode 100644 index 0000000000000000000000000000000000000000..c274a84da83612d88f9059449cc12f37cdae63fe --- /dev/null +++ b/test/pytest/test_optimization/test_keras/test_weight_sharing.py @@ -0,0 +1,155 @@ +import numpy as np +import pytest +from qkeras import QDense +from tensorflow.keras.layers import Dense +from tensorflow.keras.models import Sequential + +from hls4ml.optimization.attributes import get_attributes_from_keras_model +from hls4ml.optimization.config import SUPPORTED_STRUCTURES +from hls4ml.optimization.keras.masking import get_model_masks +from hls4ml.optimization.objectives import ObjectiveEstimator + +# Similar tests in test_masking.py, weight sharing instead of pruning +sparsity = 0.33 +local_masking = [True, False] +dense_layers = [Dense, QDense] + +''' +A mock objective class for weight sharing +When a group of weights is quantized to the mean value, resource savings are equal to the number of weights quantized +This is similar to ParameterEstimator, but instead of pruning, weight sharing is performed and +No savings are incurred with unstructured type (unstructured weight sharing doesn't make sense) +''' + + +class MockWeightSharingEstimator(ObjectiveEstimator): + @classmethod + def layer_resources(self, layer_attributes): + if not layer_attributes.weight_shape: + return [0] + else: + return [np.prod(layer_attributes.weight_shape)] + + @classmethod + def layer_savings(self, layer_attributes): + if not layer_attributes.weight_shape: + return [0] + + structure_type = layer_attributes.optimization_attributes.structure_type + + if layer_attributes.optimization_attributes.weight_sharing: + if structure_type == SUPPORTED_STRUCTURES.UNSTRUCTURED: + return [0] + elif structure_type == SUPPORTED_STRUCTURES.STRUCTURED: + if 'Dense' in layer_attributes.layer_type.__name__: + return [layer_attributes.weight_shape[1]] + elif structure_type == SUPPORTED_STRUCTURES.PATTERN: + number_of_patterns = ( + np.prod(layer_attributes.weight_shape) // layer_attributes.optimization_attributes.pattern_offset + ) + return [number_of_patterns * layer_attributes.optimization_attributes.consecutive_patterns] + elif structure_type == SUPPORTED_STRUCTURES.BLOCK: + return [np.prod(layer_attributes.optimization_attributes.block_shape)] + return [0] + + +@pytest.mark.parametrize('local_masking', local_masking) +@pytest.mark.parametrize('dense', dense_layers) +def test_weight_sharing_structured(local_masking, dense): + weight_shape = (4, 3) + + model = Sequential() + model.add(dense(weight_shape[1], input_shape=(weight_shape[0],), name='dense')) + weights = model.layers[0].get_weights() + + weights[0][:, 1] = 0.5 + weights[0][0, 1] -= 1e-4 + weights[0][2, 1] += 1e-4 + + model.layers[0].set_weights(weights) + + model_attributes = get_attributes_from_keras_model(model) + model_attributes['dense'].optimizable = True + model_attributes['dense'].optimization_attributes.pruning = False + model_attributes['dense'].optimization_attributes.weight_sharing = True + model_attributes['dense'].optimization_attributes.structure_type = SUPPORTED_STRUCTURES.STRUCTURED + + masks, offsets = get_model_masks( + model, model_attributes, sparsity, MockWeightSharingEstimator, metric='l1', local=local_masking + ) + frozen = np.array([[0, 1], [1, 1], [2, 1]], dtype=np.int32) + + assert not np.any(masks['dense'][frozen[:, 0], frozen[:, 1]]) + assert np.all(offsets['dense'][frozen[:, 0], frozen[:, 1]] == 0.5) + + +@pytest.mark.parametrize('local_masking', local_masking) +@pytest.mark.parametrize('dense', dense_layers) +def test_weight_sharing_pattern(local_masking, dense): + weight_shape = (3, 4) + + model = Sequential() + model.add(dense(weight_shape[1], input_shape=(weight_shape[0],), name='dense')) + weights = model.layers[0].get_weights() + weights[0][0, 0] = 0.5 + 1e-4 + weights[0][1, 0] = 0.5 - 1e-4 + weights[0][2, 0] = 0.5 + + weights[0][0, 2] = 0.5 + 1e-4 + weights[0][1, 2] = 0.5 - 1e-4 + weights[0][2, 2] = 0.5 + + model.layers[0].set_weights(weights) + + model_attributes = get_attributes_from_keras_model(model) + model_attributes['dense'].optimizable = True + model_attributes['dense'].optimization_attributes.pruning = False + model_attributes['dense'].optimization_attributes.weight_sharing = True + model_attributes['dense'].optimization_attributes.structure_type = SUPPORTED_STRUCTURES.PATTERN + model_attributes['dense'].optimization_attributes.pattern_offset = 4 + model_attributes['dense'].optimization_attributes.consecutive_patterns = 1 + + masks, offsets = get_model_masks( + model, model_attributes, sparsity, MockWeightSharingEstimator, metric='l1', local=local_masking + ) + frozen = np.array([[0, 0], [1, 0], [2, 0], [0, 2], [1, 2], [2, 2]], dtype=np.int32) + + assert not np.any(masks['dense'][frozen[:, 0], frozen[:, 1]]) + assert np.all(offsets['dense'][frozen[:, 0], frozen[:, 1]] == 0.5) + + +@pytest.mark.parametrize('local_masking', local_masking) +@pytest.mark.parametrize('dense', dense_layers) +def test_weight_sharing_block(local_masking, dense): + weight_shape = (4, 6) + + model = Sequential() + model.add(dense(weight_shape[1], input_shape=(weight_shape[0],), name='dense')) + weights = model.layers[0].get_weights() + + weights[0][0, 0] = 0.5 + 1e-3 + weights[0][0, 1] = 0.5 + 1e-3 + weights[0][1, 0] = 0.5 - 1e-3 + weights[0][1, 1] = 0.5 - 1e-3 + + weights[0][2, 2] = 0.5 + 1e-3 + weights[0][2, 3] = 0.5 + 1e-3 + weights[0][3, 2] = 0.5 - 1e-3 + weights[0][3, 3] = 0.5 - 1e-3 + + model.layers[0].set_weights(weights) + + model_attributes = get_attributes_from_keras_model(model) + model_attributes['dense'].optimizable = True + model_attributes['dense'].optimization_attributes.pruning = False + model_attributes['dense'].optimization_attributes.weight_sharing = True + model_attributes['dense'].optimization_attributes.structure_type = SUPPORTED_STRUCTURES.BLOCK + model_attributes['dense'].optimization_attributes.block_shape = (2, 2) + + masks, offsets = get_model_masks( + model, model_attributes, sparsity, MockWeightSharingEstimator, metric='l1', local=local_masking + ) + frozen = np.array([[0, 0], [0, 1], [1, 0], [1, 1], [2, 2], [2, 3], [3, 2], [3, 3]], dtype=np.int32) + + assert not np.any(masks['dense'][frozen[:, 0], frozen[:, 1]]) + assert np.all(offsets['dense'][frozen[:, 0], frozen[:, 1]] == 0.5) diff --git a/test/pytest/test_optimization/test_knapsack.py b/test/pytest/test_optimization/test_knapsack.py new file mode 100644 index 0000000000000000000000000000000000000000..a4145c00d03e799e4136fce47306f2b7313bdcdc --- /dev/null +++ b/test/pytest/test_optimization/test_knapsack.py @@ -0,0 +1,52 @@ +import numpy as np +import pytest + +from hls4ml.optimization.knapsack import solve_knapsack + + +# In the simple case below, both implementations give the optimal answer +# In general, the greedy algorithm will not give the optimal solution +@pytest.mark.parametrize('implementation', ['dynamic', 'greedy', 'branch_bound', 'CBC_MIP']) +def test_knapsack_1d(implementation): + values = np.array([4, 5, 6, 8, 3]) + weights = np.array([[2, 5, 3, 2, 5]]) + capacity = np.array([8]) + + optimal, selected = solve_knapsack(values, weights, capacity, implementation=implementation) + assert optimal == 18 + assert 0 in selected + assert 2 in selected + assert 3 in selected + + +@pytest.mark.parametrize('implementation', ['greedy', 'branch_bound', 'CBC_MIP']) +def test_multidimensional_knapsack(implementation): + values = np.array([10, 2, 6, 12, 3]) + weights = np.array([[3, 1, 4, 5, 5], [3, 2, 4, 1, 2]]) + capacity = np.array([8, 7]) + + optimal, selected = solve_knapsack(values, weights, capacity, implementation=implementation) + assert optimal == 22 + assert 0 in selected + assert 3 in selected + + +def test_knapsack_equal_weights(): + values = np.array([10, 2, 6, 8, 3]) + weights = np.array([[2, 2, 2, 2, 2], [3, 3, 3, 3, 3]]) + capacity = np.array([7, 7]) + + optimal, selected = solve_knapsack(values, weights, capacity) + assert optimal == 18 + assert 0 in selected + assert 3 in selected + + +def test_knapsack_all_elements_fit(): + values = np.array([10, 2, 6, 12, 3]) + weights = np.array([[3, 1, 4, 5, 5], [3, 2, 4, 1, 2]]) + capacity = np.array([19, 12]) + + optimal, selected = solve_knapsack(values, weights, capacity) + assert optimal == 33 + assert selected == list(range(0, values.shape[0])) diff --git a/test/pytest/test_optimization/test_objectives.py b/test/pytest/test_optimization/test_objectives.py new file mode 100644 index 0000000000000000000000000000000000000000..a7d81befe603b0fe3be0874822935ca21f0bc566 --- /dev/null +++ b/test/pytest/test_optimization/test_objectives.py @@ -0,0 +1,57 @@ +import numpy as np +from tensorflow.keras.layers import Conv2D, Dense, Flatten +from tensorflow.keras.models import Sequential + +from hls4ml.optimization.attributes import get_attributes_from_keras_model +from hls4ml.optimization.objectives import ParameterEstimator + + +# Test attempts to verify one of the estimators (parameter) is correctly declared, the functions are static etc. +def test_parameter_objective(): + # Model parameters + dense_units = 16 + conv_filters = 6 + conv_channels = 3 + conv_shape = (3, 3) + input_shape = (8, 8) + + model = Sequential() + model.add( + Conv2D( + conv_filters, + input_shape=(*input_shape, conv_channels), + kernel_size=conv_shape, + name='conv2d', + padding='same', + kernel_initializer='ones', + ) + ) + model.add(Flatten(name='flatten')) + model.add(Dense(dense_units, name='dense', kernel_initializer='ones')) + model_attributes = get_attributes_from_keras_model(model) + + # Identify optimizable layers and the suitable structure + for layer in model.layers: + optimizable, optimization_attributes = ParameterEstimator.is_layer_optimizable(model_attributes[layer.name]) + model_attributes[layer.name].optimizable = optimizable + model_attributes[layer.name].optimization_attributes = optimization_attributes + + # Verify conv2d and dense are optimizable, flatten is not + assert model_attributes['conv2d'].optimizable + assert not model_attributes['flatten'].optimizable + assert model_attributes['dense'].optimizable + + # Verify layer resources (number of parameters) are correct + assert [conv_filters * conv_channels * np.prod(conv_shape)] == ParameterEstimator.layer_resources( + model_attributes['conv2d'] + ) + assert [0] == ParameterEstimator.layer_resources(model_attributes['flatten']) + assert [conv_filters * np.prod(input_shape) * dense_units] == ParameterEstimator.layer_resources( + model_attributes['dense'] + ) + + # Verify layer savings are correct - is_layer_optimizable should have returned UNSTRUCTURED as the pruning type + # Since it wasn't overwritten, each pruning step saves one parameter + assert [1] == ParameterEstimator.layer_savings(model_attributes['conv2d']) + assert [0] == ParameterEstimator.layer_savings(model_attributes['flatten']) + assert [1] == ParameterEstimator.layer_savings(model_attributes['dense']) diff --git a/test/pytest/test_optimization/test_scheduler.py b/test/pytest/test_optimization/test_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..2dc7642bf639ad9a54f643afdbaaabe1ff4f49eb --- /dev/null +++ b/test/pytest/test_optimization/test_scheduler.py @@ -0,0 +1,80 @@ +import numpy as np # Use np.testing.assert_allclose due to floating point rounding errors + +from hls4ml.optimization.scheduler import BinaryScheduler, ConstantScheduler, PolynomialScheduler + + +def test_constant_scheduler(): + initial_sparsity = 0.25 + update_step = 0.10 + target_sparsity = initial_sparsity + 2.5 * update_step + + # Assert initial sparsity correct + scheduler = ConstantScheduler(initial_sparsity=initial_sparsity, final_sparsity=target_sparsity, update_step=update_step) + np.testing.assert_allclose(scheduler.get_sparsity(), initial_sparsity) + + # Assert update step is correct + np.testing.assert_allclose(scheduler.update_step(), (True, initial_sparsity + update_step)) + + # Assert repair step is correct + np.testing.assert_allclose(scheduler.repair_step(), (True, initial_sparsity + 2 * update_step)) + + # Assert cannot update again, since it would go over target sparsity + np.testing.assert_allclose(scheduler.update_step(), (False, initial_sparsity + 2 * update_step)) + + # Assert final (achievable) sparsity is correct + np.testing.assert_allclose(scheduler.get_sparsity(), initial_sparsity + 2 * update_step) + + +def test_binary_scheduler(): + initial_sparsity = 0.25 + target_sparsity = 0.5 + threshold = 0.05 + + # Assert initial sparsity correct + scheduler = BinaryScheduler(initial_sparsity=initial_sparsity, final_sparsity=target_sparsity, threshold=threshold) + np.testing.assert_allclose(scheduler.get_sparsity(), initial_sparsity) + + # Assert 1st update step is correct + s1 = 0.5 * (initial_sparsity + target_sparsity) + np.testing.assert_allclose(scheduler.update_step(), (True, s1)) + + # Assert 1st repair step is correct + s2 = 0.5 * (initial_sparsity + s1) + np.testing.assert_allclose(scheduler.repair_step(), (True, s2)) + + # Assert 2nd update step is correct + s3 = 0.5 * (s2 + s1) + np.testing.assert_allclose(scheduler.update_step(), (True, s3)) + + # Assert 2nd repair step doest not take place, difference < threshold + np.testing.assert_allclose(scheduler.repair_step(), (False, s3)) + + # Assert final (achievable) sparsity is correct + np.testing.assert_allclose(scheduler.get_sparsity(), s3) + + +def test_polynomial_scheduler(): + decay_power = 2 + maximum_steps = 2 + initial_sparsity = 0.25 + target_sparsity = 0.5 + + # Assert initial sparsity correct + scheduler = PolynomialScheduler( + maximum_steps, initial_sparsity=initial_sparsity, final_sparsity=target_sparsity, decay_power=decay_power + ) + np.testing.assert_allclose(scheduler.get_sparsity(), initial_sparsity) + + # Assert 1st update step is correct + s1 = target_sparsity + (initial_sparsity - target_sparsity) * ((1 - 1 / maximum_steps) ** decay_power) + np.testing.assert_allclose(scheduler.update_step(), (True, s1)) + + # Assert 1st repair step is correct + s2 = target_sparsity + (initial_sparsity - target_sparsity) * ((1 - 2 / maximum_steps) ** decay_power) + np.testing.assert_allclose(scheduler.repair_step(), (True, s2)) + + # Assert 2nd update step does not occur, since current_step = maximum_steps + np.testing.assert_allclose(scheduler.update_step(), (False, s2)) + + # Assert final (achievable) sparsity is correct + np.testing.assert_allclose(scheduler.get_sparsity(), target_sparsity)