[mpact][benchmark] add more benchmarks (#27)
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 4256292..e473f9e 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt
@@ -18,8 +18,26 @@ utils/*.py ) -add_mlir_python_modules(MPACTBenchmarkPythonPythonModules +add_mlir_python_modules(MPACTBenchmarkPythonModules ROOT_PREFIX "${MPACT_PYTHON_PACKAGES_DIR}/mpact/mpact_benchmark" INSTALL_PREFIX "python_packages/mpact/mpact_benchmark" DECLARED_SOURCES MPACTBenchmarkPythonSources ) + +add_custom_target(build-benchmark-mpact) +add_dependencies(build-benchmark-mpact MPACTPythonModules MPACTBenchmarkPythonModules) + +add_custom_target(benchmark-mpact) +add_dependencies(benchmark-mpact build-benchmark-mpact) +file(GLOB PYTHON_FILES "${CMAKE_CURRENT_SOURCE_DIR}/python/benchmarks/*.py") + +# Loop over each matched .py file and create a custom command to run it. +foreach(PY_FILE IN LISTS PYTHON_FILES) + add_custom_command( + TARGET benchmark-mpact + COMMAND cmake -E echo "Running ${PY_FILE}" + COMMAND python ${PY_FILE} + DEPENDS ${PY_FILE} + USES_TERMINAL + ) +endforeach()
diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 0000000..4749547 --- /dev/null +++ b/benchmark/README.md
@@ -0,0 +1,26 @@ +### Run benchmarks + +To run all benchmarks: + +```shell +cmake --build build --target benchmark-mpact +``` + +To run selected benchmarks, build the benchmark modules first: + +```shell +cmake --build build --target build-benchmark-mpact +``` + +And then run the benchmark file: + +```shell +python path/to/the/_benchmark.py +``` + +If you would like to run selected kernels in kernels_benchmark.py, +you can use `--benchmark-filter` flag like the following example: + +```shell +python path/to/the/kernels_benchmark.py --benchmark-filter=add +```
diff --git a/benchmark/python/benchmarks/gcn_benchmark.py b/benchmark/python/benchmarks/gcn_benchmark.py old mode 100644 new mode 100755 index e69de29..ef7a3bd --- a/benchmark/python/benchmarks/gcn_benchmark.py +++ b/benchmark/python/benchmarks/gcn_benchmark.py
@@ -0,0 +1,33 @@ +import torch +import numpy as np +from mpact.models.gcn import GraphConv +from mpact_benchmark.utils.benchmark_utils import benchmark, Backends + + +@benchmark( + [ + { + "name": f"{fmt}_{shape}_{dtype.__name__}", + "shape": shape, + "formats": fmt, + "dtype": dtype, + "drange": (1, 100), + "sparsity": [0, 0.5, 0.9, 0.99], + "backends": [b for b in Backends], + } + for shape in [ + [[128, 128], [128, 128]], + [[512, 512], [512, 512]], + [[1024, 1024], [1024, 1024]], + ] + for fmt in [["dense", "csr"]] + for dtype in [np.float32] + ] +) +def GCN() -> torch.nn.Module: + """Graph Convolution Network.""" + return GraphConv + + +if __name__ == "__main__": + GCN()
diff --git a/benchmark/python/benchmarks/kernels_benchmark.py b/benchmark/python/benchmarks/kernels_benchmark.py index e69de29..3ad9cce 100644 --- a/benchmark/python/benchmarks/kernels_benchmark.py +++ b/benchmark/python/benchmarks/kernels_benchmark.py
@@ -0,0 +1,210 @@ +import torch +import argparse +import numpy as np +from mpact_benchmark.utils.benchmark_utils import benchmark, Backends + + +@benchmark( + [ + { + "name": f"{lhs_fmt}_{rhs_fmt}_{shape}_{dtype.__name__}", + "shape": shape, + "formats": (lhs_fmt, rhs_fmt), + "dtype": dtype, + "backends": [b for b in Backends], + "drange": (1, 100), + "sparsity": [0, 0.5, 0.9, 0.99], + } + for shape in [([2**i, 2**i], [2**i, 2**i]) for i in range(5, 8)] + for lhs_fmt in ["dense", "csr"] + for rhs_fmt in ["dense", "csr"] + for dtype in [np.float64] + ] +) +def matmul() -> torch.nn.Module: + """Matrix multiplication.""" + + class Net(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x, y): + return torch.matmul(x, y) + + return Net() + + +@benchmark( + [ + { + "name": f"{lhs_fmt}_{rhs_fmt}_{shape}_{dtype.__name__}", + "shape": shape, + "formats": (lhs_fmt, rhs_fmt), + "dtype": dtype, + "backends": [b for b in Backends], + "drange": (1, 100), + "sparsity": [0, 0.5, 0.9, 0.99], + } + for shape in [([2**i, 2**i], [2**i]) for i in range(5, 8)] + for lhs_fmt in ["dense", "csr"] + for rhs_fmt in ["dense"] # torch.mv only supports dense vector for now. + for dtype in [np.float64] + ] +) +def matvec() -> torch.nn.Module: + """Matrix-vector multiplication.""" + + class Net(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x, y): + return torch.mv(x, y) + + return Net() + + +@benchmark( + [ + { + "name": f"{lhs_fmt}_{rhs_fmt}_{shape}_{dtype.__name__}", + "shape": shape, + "formats": (lhs_fmt, rhs_fmt), + "dtype": dtype, + "backends": [b for b in Backends], + "drange": (1, 100), + "sparsity": [0, 0.5, 0.9, 0.99], + } + for shape in [ + ([2**i, 2**i], [2**i, 2**i]) for i in range(5, 8) + ] # 512x512 crashes runtime. + for lhs_fmt in ["dense", "csr"] + for rhs_fmt in ["dense", "csr"] + for dtype in [np.float64] + ] +) +def add() -> torch.nn.Module: + """Element-wise addition.""" + + class Net(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x, y): + return torch.add(x, y) + + return Net() + + +@benchmark( + [ + { + "name": f"{lhs_fmt}_{rhs_fmt}_{shape}_{dtype.__name__}", + "shape": shape, + "formats": (lhs_fmt, rhs_fmt), + "dtype": dtype, + "backends": [b for b in Backends], + "drange": (1, 100), + "sparsity": [0, 0.5, 0.9, 0.99], + } + for shape in [([2**i, 2**i], [2**i, 2**i]) for i in range(5, 8)] + for lhs_fmt in ["dense", "csr"] + for rhs_fmt in ["dense", "csr"] + for dtype in [np.float64] + ] +) +def elt_mul() -> torch.nn.Module: + """Element-wise addition.""" + + class Net(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x, y): + return torch.mul(x, y) + + return Net() + + +@benchmark( + [ + { + "name": f"{fmt}_{shape}_{dtype.__name__}", + "shape": shape, + "formats": (fmt,), + "dtype": dtype, + "backends": [b for b in Backends], + "drange": (1, 100), + "sparsity": [0, 0.5, 0.9, 0.99], + } + for shape in [([2**i, 2**i],) for i in range(2, 3)] + for fmt in ["dense", "csr"] + for dtype in [np.float64] + ] +) +def nop() -> torch.nn.Module: + """Returns matrix unmodified (speed of light).""" + + class Net(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + return x + + return Net() + + +@benchmark( + [ + { + "name": f"{sample_fmt}_sample_{shape}_{dtype.__name__}", + "shape": shape, + "formats": (sample_fmt, "dense", "dense"), + "dtype": dtype, + "backends": [b for b in Backends], + "drange": (1, 100), + "sparsity": [0, 0.5, 0.9, 0.99], + } + for shape in [ + ([2**i, 2**i], [2**i, 2**i], [2**i, 2**i]) for i in range(5, 8) + ] + for sample_fmt in ["dense", "csr"] + for dtype in [np.float64] + ] +) +def sddmm() -> torch.nn.Module: + """SDDMM: C = S ○ (A X B) Sampled dense-dense matrix-matrix multiplication.""" + + class Net(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x, y, z): + return torch.mul(x, torch.matmul(y, z)) + + return Net() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog="pytorch_kernel_benchmarks", + description="Run a set of given PyTorch kernel benchmarks", + ) + parser.add_argument("--benchmark-filter", type=str, default="", required=False) + arguments = parser.parse_args() + + benchmark_list = [ + "nop", + "add", + "matmul", + "matvec", + "elt_mul", + "sddmm", + ] + if arguments.benchmark_filter: + benchmark_list = arguments.benchmark_filter.split(",") + + # Run selected benchmarks. + for benchmark_name in benchmark_list: + globals()[benchmark_name]()
diff --git a/benchmark/python/benchmarks/lif_benchmark.py b/benchmark/python/benchmarks/lif_benchmark.py index 5c789ea..bd5f0c1 100644 --- a/benchmark/python/benchmarks/lif_benchmark.py +++ b/benchmark/python/benchmarks/lif_benchmark.py
@@ -9,7 +9,7 @@ { "name": f"{fmt}_{shape}_{dtype.__name__}", "shape": shape, - "formats": [fmt], + "formats": fmt, "dtype": dtype, # Simulate batch normalization. "drange": (-1, 1), @@ -31,13 +31,13 @@ [[32, 3, 64, 64, 1]], [[16, 3, 224, 224, 1]], ] - for fmt in ["dense"] + for fmt in [["dense"]] for dtype in [np.float64] ] ) def SNN() -> torch.nn.Module: """Spiking Neural Network.""" - return Block + return Block() if __name__ == "__main__":
diff --git a/benchmark/python/benchmarks/resnet_benchmark.py b/benchmark/python/benchmarks/resnet_benchmark.py index e69de29..b455ef0 100644 --- a/benchmark/python/benchmarks/resnet_benchmark.py +++ b/benchmark/python/benchmarks/resnet_benchmark.py
@@ -0,0 +1,36 @@ +import torch +import numpy as np +from mpact.models.resnet import resnet20 +from mpact_benchmark.utils.benchmark_utils import benchmark, Backends + + +@benchmark( + [ + { + "name": f"{fmt}_{shape}_{dtype.__name__}", + "shape": shape, + "formats": fmt, + "dtype": dtype, + "drange": (1, 100), + "sparsity": [0.5, 0.9], + # TODO: Torch inductor requires lower precision with larger input size, + # such as [8, 3, 32, 32]. + "precision": 1e-3, + "backends": [b for b in Backends], + } + for shape in [ + [[1, 3, 16, 16]], + ] + for fmt in [["dense"]] + for dtype in [np.float32] + ] +) +def resnet() -> torch.nn.Module: + """Restnet20 model.""" + resnet_model = resnet20() + resnet_model.train(False) + return resnet_model + + +if __name__ == "__main__": + resnet()
diff --git a/benchmark/python/utils/benchmark_utils.py b/benchmark/python/utils/benchmark_utils.py index 275853b..ec29309 100644 --- a/benchmark/python/utils/benchmark_utils.py +++ b/benchmark/python/utils/benchmark_utils.py
@@ -123,9 +123,18 @@ ) ) case Backends.MPACT_SPARSE: - output.append( - torch.from_numpy(mpact_jit(torch_net, *sparse_inputs)) - ) + sp_out = mpact_jit(torch_net, *sparse_inputs) + # Construct sparse csr tensor if the output type is csr. + # TODO: return sparse tensor directly instead of a tuple of arrays. + if type(sp_out) is tuple: + # torch.sparse_csr_tensor could deduce the size incorrectly, + # so pass the dense_out's shape explicitly. + dense_out = mpact_jit(torch_net, *dense_inputs) + output.append( + torch.sparse_csr_tensor(*sp_out, size=dense_out.shape) + ) + else: + output.append(torch.from_numpy(sp_out)) invoker, f = mpact_jit_compile(torch_net, *sparse_inputs) compile_time_results.append( timer( @@ -167,7 +176,11 @@ # Sanity check. if output: - assert all(output[0].to_dense().allclose(out.to_dense()) for out in output) + rtol = variables["precision"] if "precision" in variables else 1e-5 + assert all( + torch.allclose(output[0].to_dense(), out.to_dense(), rtol=rtol) + for out in output + ) def benchmark(*args: Any) -> Callable: @@ -176,9 +189,9 @@ def decorator(func): @functools.wraps(func) def wrapper(test_cases=args[0]): - net = func() runtime_results = [] compile_time_results = [] + torch_net = net = func() for test_case in test_cases: label = func.__name__ for sparsity in test_case["sparsity"]: @@ -190,10 +203,11 @@ test_case["dtype"], test_case["drange"], ) + if "GCN" in label: torch_net = net(*test_case["shape"][0]) - else: - torch_net = net() + if "precision" in test_case: + precision = test_case["precision"] run_benchmark( sparse_inputs,
diff --git a/setup.py b/setup.py index 4208772..929ab41 100644 --- a/setup.py +++ b/setup.py
@@ -96,7 +96,7 @@ f".", f"--target", f"MPACTPythonModules", - f"MPACTBenchmarkPythonPythonModules", + f"MPACTBenchmarkPythonModules", ] try: