Skip to content
Snippets Groups Projects
Commit 39f63ddf authored by Georg Streich's avatar Georg Streich
Browse files

Add partition map decorator

parent fe592d5e
No related branches found
No related tags found
No related merge requests found
Showing
with 456 additions and 451 deletions
......@@ -118,15 +118,17 @@ RUN pip install tokenize-rt==4.2.1
RUN pip install tclwrapper==0.0.1
# install accl dependencies
RUN apt-get install -y
cmake
libjsoncpp-dev
libtclap-dev
libopenmpi-dev
xvfb
RUN git clone https://github.com/zeromq/zmqpp
RUN cd zmqpp && make && make install
RUN apt-get install -y \
cmake \
libjsoncpp-dev \
libtclap-dev \
libopenmpi-dev \
xvfb \
libzmq3-dev
RUN git clone https://github.com/zeromq/zmqpp && \
cd zmqpp && \
make && make install
# extra environment variables for FINN compiler
ENV VIVADO_IP_CACHE "/tmp/vivado_ip_cache"
......
......@@ -1545,7 +1545,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
"version": "3.10.12"
}
},
"nbformat": 4,
......
......@@ -27,6 +27,8 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
set -e
RED='\033[0;31m'
GREEN='\033[0;32m'
NC='\033[0m' # No Color
......@@ -113,7 +115,7 @@ elif [ "$1" = "notebook" ]; then
else
JUPYTER_PASSWD_ARG="--NotebookApp.password='$JUPYTER_PASSWD_HASH'"
fi
DOCKER_CMD="jupyter notebook --allow-root --no-browser --ip=0.0.0.0 --port $JUPYTER_PORT $JUPYTER_PASSWD_ARG notebooks"
DOCKER_CMD="jupyter notebook --allow-root --no-browser --ip=0.0.0.0 --port $JUPYTER_PORT $JUPYTER_PASSWD_ARG finn-examples/finn_examples/notebooks"
FINN_DOCKER_EXTRA+="-e JUPYTER_PORT=$JUPYTER_PORT "
FINN_DOCKER_EXTRA+="-e NETRON_PORT=$NETRON_PORT "
FINN_DOCKER_EXTRA+="-p $JUPYTER_PORT:$JUPYTER_PORT "
......
......@@ -188,7 +188,6 @@ def build_dataflow_cfg(model_filename, cfg: DataflowBuildConfig):
print("Completed successfully")
return 0
def build_dataflow_directory(path_to_cfg_dir: str):
"""Best-effort build a dataflow accelerator from the specified directory.
......
......@@ -139,6 +139,7 @@ estimate_only_dataflow_steps = [
"step_streamline",
"step_convert_to_hls",
"step_create_dataflow_partition",
"step_make_distributed",
"step_target_fps_parallelization",
"step_apply_folding_config",
"step_minimize_bit_width",
......@@ -351,6 +352,9 @@ class DataflowBuildConfig:
#: rtlsim, otherwise they will be replaced by HLS implementations.
rtlsim_use_vivado_comps: Optional[bool] = True
#: If specified the model will be partitioned across the given number of boards
num_boards: Optional[int] = None
def _resolve_hls_clk_period(self):
if self.hls_clk_period_ns is None:
# use same clk for synth and hls if not explicitly specified
......
......@@ -85,8 +85,10 @@ from finn.transformation.fpgadataflow.derive_characteristic import (
DeriveFIFOSizes,
)
from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
from finn.transformation.fpgadataflow.insert_accl import InsertACCL
from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
from finn.transformation.fpgadataflow.make_distributed import MakeDistributed
from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
from finn.transformation.fpgadataflow.minimize_accumulator_width import (
......@@ -125,11 +127,11 @@ from finn.util.pyverilator import verilator_fifosim
from finn.util.test import execute_parent
def verify_step(
def verify_model(
model: ModelWrapper,
cfg: DataflowBuildConfig,
step_name: str,
need_parent: bool,
rtlsim_pre_hook=None,
):
print("Running verification for " + step_name)
......@@ -144,40 +146,22 @@ def verify_step(
for b in range(bsize_in):
in_npy = np.expand_dims(in_npy_all[b], axis=0)
exp_out_npy = np.expand_dims(exp_out_npy_all[b], axis=0)
if need_parent:
assert cfg.save_intermediate_models, "Enable save_intermediate_models for verification"
parent_model_fn = intermediate_models_dir + "/dataflow_parent.onnx"
child_model_fn = intermediate_models_dir + "/verify_%s.onnx" % step_name
model.save(child_model_fn)
parent_model = ModelWrapper(parent_model_fn)
out_tensor_name = parent_model.graph.output[0].name
exp_ishape = parent_model.get_tensor_shape(parent_model.graph.input[0].name)
if in_npy.shape != exp_ishape:
print(
"Verification input has shape %s while model expects %s"
% (str(in_npy.shape), str(exp_ishape))
)
print("Attempting to force model shape on verification input")
in_npy = in_npy.reshape(exp_ishape)
out_dict = execute_parent(parent_model_fn, child_model_fn, in_npy, return_full_ctx=True)
out_npy = out_dict[out_tensor_name]
inp_tensor_name = model.graph.input[0].name
out_tensor_name = model.graph.output[0].name
exp_ishape = model.get_tensor_shape(inp_tensor_name)
if in_npy.shape != exp_ishape:
print(
"Verification input has shape %s while model expects %s"
% (str(in_npy.shape), str(exp_ishape))
)
print("Attempting to force model shape on verification input")
in_npy = in_npy.reshape(exp_ishape)
inp_dict = {inp_tensor_name: in_npy}
if rtlsim_pre_hook is not None:
out_dict = rtlsim_exec(model, inp_dict, pre_hook=rtlsim_pre_hook)
else:
inp_tensor_name = model.graph.input[0].name
out_tensor_name = model.graph.output[0].name
exp_ishape = model.get_tensor_shape(inp_tensor_name)
if in_npy.shape != exp_ishape:
print(
"Verification input has shape %s while model expects %s"
% (str(in_npy.shape), str(exp_ishape))
)
print("Attempting to force model shape on verification input")
in_npy = in_npy.reshape(exp_ishape)
inp_dict = {inp_tensor_name: in_npy}
if rtlsim_pre_hook is not None:
out_dict = rtlsim_exec(model, inp_dict, pre_hook=rtlsim_pre_hook)
else:
out_dict = execute_onnx(model, inp_dict, True)
out_npy = out_dict[out_tensor_name]
out_dict = execute_onnx(model, inp_dict, True)
out_npy = out_dict[out_tensor_name]
exp_oshape = exp_out_npy.shape
if out_npy.shape != exp_oshape:
print(
......@@ -213,6 +197,59 @@ def verify_step(
print("Verification for %s : %s" % (step_name, res_to_str[all_res]))
def dataflow_partition_map(step):
def f(model: ModelWrapper, cfg: DataflowBuildConfig):
d_nodes = model.get_nodes_by_op_type("DistributedDataflow")
if d_nodes:
assert len(d_nodes) == 1, "Only one distributed dataflow node is supported"
d_model_file = getCustomOp(d_nodes[0]).get_nodeattr("model")
d_model = ModelWrapper(d_model_file)
p_nodes = d_model.get_nodes_by_op_type("StreamingDataflowPartition")
else:
p_nodes = model.get_nodes_by_op_type("StreamingDataflowPartition")
for p_node in p_nodes:
p_node_inst = getCustomOp(p_node)
partition_id = p_node_inst.get_nodeattr("partition_id")
p_node_file = p_node_inst.get_nodeattr("model")
p_model = ModelWrapper(p_node_file)
if len(p_nodes) > 1:
child_cfg = deepcopy(cfg)
child_cfg.output_dir = cfg.output_dir + f"/{partition_id}"
else:
child_cfg = cfg
os.makedirs(child_cfg.output_dir, exist_ok=True)
p_model = step(p_model, child_cfg)
p_model.save(p_node_file)
return model
f.__name__ = step.__name__
return f
def verify_step(step_type):
def decorator(step):
def f(model: ModelWrapper, cfg: DataflowBuildConfig):
model = step(model, cfg)
if step_type in cfg._resolve_verification_steps():
verify_model(model, cfg, step.__name__)
return model
f.__name__ = step.__name__
return f
return decorator
def prepare_for_stitched_ip_rtlsim(verify_model, cfg):
if not cfg.rtlsim_use_vivado_comps:
need_restitch = False
......@@ -257,6 +294,7 @@ def prepare_for_stitched_ip_rtlsim(verify_model, cfg):
return verify_model
@verify_step(VerificationStepType.QONNX_TO_FINN_PYTHON)
def step_qonnx_to_finn(model: ModelWrapper, cfg: DataflowBuildConfig):
"""
This step will only execute if QONNX nodes are found.
......@@ -288,6 +326,7 @@ def step_qonnx_to_finn(model: ModelWrapper, cfg: DataflowBuildConfig):
return model
@verify_step(VerificationStepType.TIDY_UP_PYTHON)
def step_tidy_up(model: ModelWrapper, cfg: DataflowBuildConfig):
"""Run the tidy-up step on given model. This includes shape and datatype
inference, constant folding, and giving nodes and tensors better names.
......@@ -300,12 +339,10 @@ def step_tidy_up(model: ModelWrapper, cfg: DataflowBuildConfig):
model = model.transform(InferDataTypes())
model = model.transform(RemoveStaticGraphInputs())
if VerificationStepType.TIDY_UP_PYTHON in cfg._resolve_verification_steps():
verify_step(model, cfg, "initial_python", need_parent=False)
return model
@verify_step(VerificationStepType.STREAMLINED_PYTHON)
def step_streamline(model: ModelWrapper, cfg: DataflowBuildConfig):
"""Run streamlining on given model. Streamlining involves moving floating point
scale/shift parameters around, collapsing adjacent ones into a single parameter,
......@@ -330,9 +367,6 @@ def step_streamline(model: ModelWrapper, cfg: DataflowBuildConfig):
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())
if VerificationStepType.STREAMLINED_PYTHON in cfg._resolve_verification_steps():
verify_step(model, cfg, "streamlined_python", need_parent=False)
return model
......@@ -376,20 +410,24 @@ def step_create_dataflow_partition(model: ModelWrapper, cfg: DataflowBuildConfig
parent_model = model.transform(
CreateDataflowPartition(
partition_model_dir=cfg.output_dir + "/intermediate_models/supported_op_partitions"
partition_model_dir=cfg.output_dir + "/intermediate_models/supported_op_partitions",
)
)
sdp_nodes = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")
assert len(sdp_nodes) == 1, "Only a single StreamingDataflowPartition supported."
sdp_node = sdp_nodes[0]
sdp_node = getCustomOp(sdp_node)
dataflow_model_filename = sdp_node.get_nodeattr("model")
if cfg.save_intermediate_models:
parent_model.save(cfg.output_dir + "/intermediate_models/dataflow_parent.onnx")
model = ModelWrapper(dataflow_model_filename)
return model
return parent_model
def step_make_distributed(model: ModelWrapper, cfg: DataflowBuildConfig):
model = model.transform(
MakeDistributed(cfg.synth_clk_period_ns, cfg.board, cfg.num_boards)
)
d_nodes = model.get_nodes_by_op_type("DistributedDataflow")
assert len(d_nodes) == 1, "Only a single DistributedDataflow node supported"
return model
@dataflow_partition_map
def step_target_fps_parallelization(model: ModelWrapper, cfg: DataflowBuildConfig):
"""If target_fps was specified, use the SetFolding transformation to determine
parallelization attributes. The auto-generated config will be saved under
......@@ -420,6 +458,7 @@ def step_target_fps_parallelization(model: ModelWrapper, cfg: DataflowBuildConfi
return model
@dataflow_partition_map
def step_apply_folding_config(model: ModelWrapper, cfg: DataflowBuildConfig):
"""Apply the folding configuration file onto the model to set folding (parallelization)
and other attributes, if config file is specified."""
......@@ -428,15 +467,24 @@ def step_apply_folding_config(model: ModelWrapper, cfg: DataflowBuildConfig):
model = model.transform(GiveUniqueNodeNames())
model = model.transform(ApplyConfig(cfg.folding_config_file))
return model
@verify_step(VerificationStepType.FOLDED_HLS_CPPSIM)
@dataflow_partition_map
def step_insert_accl_comms(model: ModelWrapper, cfg: DataflowBuildConfig):
model = model.transform(InsertACCL)
if VerificationStepType.FOLDED_HLS_CPPSIM in cfg._resolve_verification_steps():
# prepare cppsim
model = model.transform(PrepareCppSim())
model = model.transform(CompileCppSim())
model = model.transform(SetExecMode("cppsim"))
verify_step(model, cfg, "folded_hls_cppsim", need_parent=True)
return model
@dataflow_partition_map
def step_generate_estimate_reports(model: ModelWrapper, cfg: DataflowBuildConfig):
"Generate per-layer resource and cycle estimates using analytical models."
......@@ -472,6 +520,7 @@ def step_generate_estimate_reports(model: ModelWrapper, cfg: DataflowBuildConfig
return model
@dataflow_partition_map
def step_minimize_bit_width(model: ModelWrapper, cfg: DataflowBuildConfig):
"""Tighten the weight and accumulator bit widths for each layer."""
if cfg.minimize_bit_width:
......@@ -817,7 +866,6 @@ def step_deployment_package(model: ModelWrapper, cfg: DataflowBuildConfig):
copy_tree(driver_dir, deploy_dir + "/driver")
return model
#: map step name strings to step functions
build_dataflow_step_lookup = {
"step_qonnx_to_finn": step_qonnx_to_finn,
......@@ -825,6 +873,7 @@ build_dataflow_step_lookup = {
"step_streamline": step_streamline,
"step_convert_to_hls": step_convert_to_hls,
"step_create_dataflow_partition": step_create_dataflow_partition,
"step_make_distributed": step_make_distributed,
"step_target_fps_parallelization": step_target_fps_parallelization,
"step_apply_folding_config": step_apply_folding_config,
"step_minimize_bit_width": step_minimize_bit_width,
......
......@@ -39,6 +39,7 @@ from finn.custom_op.fpgadataflow.convolutioninputgenerator1d import (
from finn.custom_op.fpgadataflow.convolutioninputgenerator_rtl import (
ConvolutionInputGenerator_rtl,
)
from finn.custom_op.fpgadataflow.distributed_dataflow import DistributedDataflow
from finn.custom_op.fpgadataflow.downsampler import DownSampler
from finn.custom_op.fpgadataflow.duplicatestreams_batch import DuplicateStreams_Batch
from finn.custom_op.fpgadataflow.eltwise import StreamingEltwise
......@@ -63,7 +64,7 @@ from finn.custom_op.fpgadataflow.tlastmarker import TLastMarker
from finn.custom_op.fpgadataflow.upsampler import UpsampleNearestNeighbour_Batch
from finn.custom_op.fpgadataflow.vectorvectoractivation import VectorVectorActivation
from finn.custom_op.fpgadataflow.accl import AcclIn, AcclOut
from finn.custom_op.fpgadataflow.accl import ACCLIn, ACCLOut
custom_op = dict()
......@@ -95,5 +96,6 @@ custom_op["StreamingConcat"] = StreamingConcat
custom_op["CheckSum"] = CheckSum
custom_op["StreamingEltwise"] = StreamingEltwise
custom_op["FMPadding_rtl"] = FMPadding_rtl
custom_op["AcclIn"] = AcclIn
custom_op["AcclOut"] = AcclOut
custom_op["DistributedDataflow"] = DistributedDataflow
custom_op["ACCLIn"] = ACCLIn
custom_op["ACCLOut"] = ACCLOut
......@@ -38,7 +38,7 @@ from IPython.core.debugger import set_trace
import subprocess
import os
class AcclOp(HLSCustomOp):
class ACCLOp(HLSCustomOp):
def get_nodeattr_types(self):
my_attrs = {
"NumChannels": ("i", True, 0),
......@@ -153,7 +153,7 @@ class AcclOp(HLSCustomOp):
def verify_node(self):
...
class AcclOut(AcclOp):
class ACCLOut(ACCLOp):
def get_instream_width(self, ind=0):
return self.get_nodeattr("streamWidth")
......@@ -245,7 +245,7 @@ class AcclOut(AcclOp):
def blackboxfunction(self):
pass
class AcclIn(AcclOp):
class ACCLIn(ACCLOp):
def get_instream_width(self, ind=0):
return self.get_nodeattr("intfWidth")
......
from qonnx.custom_op.base import CustomOp
class DistributedDataflow(CustomOp):
def get_nodeattr_types(self):
return {
"model": ("s", True, ""),
"instance_name": ("s", False, ""),
"return_full_exec_context": ("i", False, 0),
"world_size": ("i", True, -1),
}
def make_shape_compatible_op(self, model):
pass
def infer_node_datatype(self, model):
pass
def verify_node(self):
pass
def execute_node():
...
......@@ -43,15 +43,13 @@ class CreateDataflowPartition(Transformation):
that indicates the filename for the second graph that only contains
dataflow nodes. No action is taken if there are no dataflow nodes."""
def __init__(self, partition_model_dir=None, num_devices=1):
def __init__(self, partition_model_dir=None):
super().__init__()
if partition_model_dir is None:
self.partition_model_dir = make_build_dir("dataflow_partition_")
else:
self.partition_model_dir = partition_model_dir
self.num_devices = num_devices
def apply(self, model):
def filter_fc_extw(x):
if x.op_type == "IODMA":
......@@ -64,23 +62,6 @@ class CreateDataflowPartition(Transformation):
if len(extw_dma_nodes) > 0:
model = model.transform(ExternalizeParams())
node_stats = dict()
def compute_node_stats(node):
if node.name not in node_stats:
num_nodes_up_to = 1
predecessors = model.find_direct_predecessors(node)
if predecessors:
for pred in predecessors:
compute_node_stats(pred)
num_nodes_up_to += node_stats[pred.name]
node_stats[node.name] = num_nodes_up_to
for node in model.graph.node:
compute_node_stats(node)
total_nodes = max(node_stats.values())
def assign_partition_id(node):
if node.op_type in ["GenericPartition", "StreamingDataflowPartition"]:
return -1
......@@ -91,7 +72,7 @@ class CreateDataflowPartition(Transformation):
if assigned_partition is not None:
return assigned_partition.i
else:
return int(node_stats[node.name] / (total_nodes + 1) * self.num_devices)
return 0
else:
return -1
......@@ -134,9 +115,4 @@ class CreateDataflowPartition(Transformation):
new_p_node_inst.set_nodeattr("slr", slr)
new_p_node_inst.set_nodeattr("mem_port", mem_port)
p_model.set_metadata_prop("accl_world_size", str(self.num_devices))
p_model.set_metadata_prop("accl_rank", str(partition_ind))
p_model.save(node_model_filename)
return (parent_model, False)
......@@ -36,7 +36,7 @@ from qonnx.transformation.general import SortGraph
from qonnx.util.basic import get_by_name
class InsertAccl(Transformation):
class InsertACCL(Transformation):
def __init__(self, max_intfwidth=512):
self.max_intfwidth = 512
......@@ -59,7 +59,7 @@ class InsertAccl(Transformation):
graph_in_names = [x.name for x in model.graph.input]
for graph_in_name in graph_in_names:
first_node = model.find_consumer(graph_in_name)
if first_node.op_type == "AcclIn":
if first_node.op_type == "ACCLIn":
continue
else:
in_shape = model.get_tensor_shape(graph_in_name)
......@@ -91,7 +91,7 @@ class InsertAccl(Transformation):
first_node.input[0] = first_node_in.name
accl_node = oh.make_node(
"AcclIn",
"ACCLIn",
[graph_in_name],
[first_node_in.name],
numInputVectors=in_folded_shape[:-1],
......@@ -111,7 +111,7 @@ class InsertAccl(Transformation):
graph_out_names = [x.name for x in model.graph.output]
for graph_out_name in graph_out_names:
final_node = model.find_producer(graph_out_name)
if final_node.op_type == "AcclOut":
if final_node.op_type == "ACCLOut":
continue
else:
out_shape = model.get_tensor_shape(graph_out_name)
......@@ -144,7 +144,7 @@ class InsertAccl(Transformation):
# FIXME: currently always using 8-bit dtypes to work around the
# padding problems for i/o DMA
dma_node = oh.make_node(
"AcclOut",
"ACCLOut",
[final_node_out.name],
[graph_out_name],
numInputVectors=out_folded_shape[:-1],
......
from qonnx.core.modelwrapper import ModelWrapper
from qonnx.custom_op.registry import getCustomOp
from qonnx.transformation.base import Transformation
from qonnx.transformation.create_generic_partitions import PartitionFromLambda
from qonnx.util.basic import get_by_name
from finn.util.basic import make_build_dir
from finnexperimental.analysis.partitioning import partition
class MakeDistributed(Transformation):
def __init__(self, target_clk_ns, target_platform, ndevices):
self.target_clk_ns = target_clk_ns
self.target_platform = target_platform
self.ndevices = ndevices
self.partition_model_dir = make_build_dir("distributed_partitions_")
def apply(self, model):
child_nodes = model.get_nodes_by_op_type("StreamingDataflowPartition")
assert len(child_nodes) == 1
child_node = child_nodes[0]
child_node_inst = getCustomOp(child_node)
child_model = ModelWrapper(child_node_inst.get_nodeattr("model"))
floorplan = partition(child_model, self.target_clk_ns, self.target_platform, self.ndevices)[0]
def assign_partition_id(node):
if node.op_type in ["GenericPartition", "StreamingDataflowPartition"]:
return -1
else:
backend = get_by_name(node.attribute, "backend")
if backend is not None and backend.s.decode("UTF-8") == "fpgadataflow":
assigned_partition = get_by_name(node.attribute, "partition_id")
if assigned_partition is not None:
return assigned_partition.i
else:
floorplan[node.name]["device_id"]
else:
return -1
distr_model = child_model.transform(
PartitionFromLambda(
partitioning=assign_partition_id, partition_dir=self.partition_model_dir
)
)
p_nodes = distr_model.get_nodes_by_op_type("GenericPartition")
print(floorplan)
for partition_ind, p_node in enumerate(p_nodes):
# done, change node type and add info in parent graph
p_node.op_type = "StreamingDataflowPartition"
p_node.domain = "finn.custom_op.fpgadataflow"
new_p_node_inst = getCustomOp(p_node)
new_p_node_inst.set_nodeattr("partition_id", partition_ind)
child_node.op_type = "DistributedDataflow"
new_child_node_inst = getCustomOp(child_node)
new_child_node_inst.set_nodeattr("world_size", len(p_nodes))
distr_model_file = self.partition_model_dir + "/distributed_dataflow.onnx"
distr_model.save(distr_model_file)
new_child_node_inst.set_nodeattr("model", distr_model_file)
return (model, False)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment