[mpact][benchmark] add more benchmarks (#27)
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index 4256292..e473f9e 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -18,8 +18,26 @@
utils/*.py
)
-add_mlir_python_modules(MPACTBenchmarkPythonPythonModules
+add_mlir_python_modules(MPACTBenchmarkPythonModules
ROOT_PREFIX "${MPACT_PYTHON_PACKAGES_DIR}/mpact/mpact_benchmark"
INSTALL_PREFIX "python_packages/mpact/mpact_benchmark"
DECLARED_SOURCES MPACTBenchmarkPythonSources
)
+
+add_custom_target(build-benchmark-mpact)
+add_dependencies(build-benchmark-mpact MPACTPythonModules MPACTBenchmarkPythonModules)
+
+add_custom_target(benchmark-mpact)
+add_dependencies(benchmark-mpact build-benchmark-mpact)
+file(GLOB PYTHON_FILES "${CMAKE_CURRENT_SOURCE_DIR}/python/benchmarks/*.py")
+
+# Loop over each matched .py file and create a custom command to run it.
+foreach(PY_FILE IN LISTS PYTHON_FILES)
+ add_custom_command(
+ TARGET benchmark-mpact
+ COMMAND cmake -E echo "Running ${PY_FILE}"
+ COMMAND python ${PY_FILE}
+ DEPENDS ${PY_FILE}
+ USES_TERMINAL
+ )
+endforeach()
diff --git a/benchmark/README.md b/benchmark/README.md
new file mode 100644
index 0000000..4749547
--- /dev/null
+++ b/benchmark/README.md
@@ -0,0 +1,26 @@
+### Run benchmarks
+
+To run all benchmarks:
+
+```shell
+cmake --build build --target benchmark-mpact
+```
+
+To run selected benchmarks, build the benchmark modules first:
+
+```shell
+cmake --build build --target build-benchmark-mpact
+```
+
+And then run the benchmark file:
+
+```shell
+python path/to/the/_benchmark.py
+```
+
+If you would like to run selected kernels in kernels_benchmark.py,
+you can use `--benchmark-filter` flag like the following example:
+
+```shell
+python path/to/the/kernels_benchmark.py --benchmark-filter=add
+```
diff --git a/benchmark/python/benchmarks/gcn_benchmark.py b/benchmark/python/benchmarks/gcn_benchmark.py
old mode 100644
new mode 100755
index e69de29..ef7a3bd
--- a/benchmark/python/benchmarks/gcn_benchmark.py
+++ b/benchmark/python/benchmarks/gcn_benchmark.py
@@ -0,0 +1,33 @@
+import torch
+import numpy as np
+from mpact.models.gcn import GraphConv
+from mpact_benchmark.utils.benchmark_utils import benchmark, Backends
+
+
+@benchmark(
+ [
+ {
+ "name": f"{fmt}_{shape}_{dtype.__name__}",
+ "shape": shape,
+ "formats": fmt,
+ "dtype": dtype,
+ "drange": (1, 100),
+ "sparsity": [0, 0.5, 0.9, 0.99],
+ "backends": [b for b in Backends],
+ }
+ for shape in [
+ [[128, 128], [128, 128]],
+ [[512, 512], [512, 512]],
+ [[1024, 1024], [1024, 1024]],
+ ]
+ for fmt in [["dense", "csr"]]
+ for dtype in [np.float32]
+ ]
+)
+def GCN() -> torch.nn.Module:
+ """Graph Convolution Network."""
+ return GraphConv
+
+
+if __name__ == "__main__":
+ GCN()
diff --git a/benchmark/python/benchmarks/kernels_benchmark.py b/benchmark/python/benchmarks/kernels_benchmark.py
index e69de29..3ad9cce 100644
--- a/benchmark/python/benchmarks/kernels_benchmark.py
+++ b/benchmark/python/benchmarks/kernels_benchmark.py
@@ -0,0 +1,210 @@
+import torch
+import argparse
+import numpy as np
+from mpact_benchmark.utils.benchmark_utils import benchmark, Backends
+
+
+@benchmark(
+ [
+ {
+ "name": f"{lhs_fmt}_{rhs_fmt}_{shape}_{dtype.__name__}",
+ "shape": shape,
+ "formats": (lhs_fmt, rhs_fmt),
+ "dtype": dtype,
+ "backends": [b for b in Backends],
+ "drange": (1, 100),
+ "sparsity": [0, 0.5, 0.9, 0.99],
+ }
+ for shape in [([2**i, 2**i], [2**i, 2**i]) for i in range(5, 8)]
+ for lhs_fmt in ["dense", "csr"]
+ for rhs_fmt in ["dense", "csr"]
+ for dtype in [np.float64]
+ ]
+)
+def matmul() -> torch.nn.Module:
+ """Matrix multiplication."""
+
+ class Net(torch.nn.Module):
+ def __init__(self):
+ super().__init__()
+
+ def forward(self, x, y):
+ return torch.matmul(x, y)
+
+ return Net()
+
+
+@benchmark(
+ [
+ {
+ "name": f"{lhs_fmt}_{rhs_fmt}_{shape}_{dtype.__name__}",
+ "shape": shape,
+ "formats": (lhs_fmt, rhs_fmt),
+ "dtype": dtype,
+ "backends": [b for b in Backends],
+ "drange": (1, 100),
+ "sparsity": [0, 0.5, 0.9, 0.99],
+ }
+ for shape in [([2**i, 2**i], [2**i]) for i in range(5, 8)]
+ for lhs_fmt in ["dense", "csr"]
+ for rhs_fmt in ["dense"] # torch.mv only supports dense vector for now.
+ for dtype in [np.float64]
+ ]
+)
+def matvec() -> torch.nn.Module:
+ """Matrix-vector multiplication."""
+
+ class Net(torch.nn.Module):
+ def __init__(self):
+ super().__init__()
+
+ def forward(self, x, y):
+ return torch.mv(x, y)
+
+ return Net()
+
+
+@benchmark(
+ [
+ {
+ "name": f"{lhs_fmt}_{rhs_fmt}_{shape}_{dtype.__name__}",
+ "shape": shape,
+ "formats": (lhs_fmt, rhs_fmt),
+ "dtype": dtype,
+ "backends": [b for b in Backends],
+ "drange": (1, 100),
+ "sparsity": [0, 0.5, 0.9, 0.99],
+ }
+ for shape in [
+ ([2**i, 2**i], [2**i, 2**i]) for i in range(5, 8)
+ ] # 512x512 crashes runtime.
+ for lhs_fmt in ["dense", "csr"]
+ for rhs_fmt in ["dense", "csr"]
+ for dtype in [np.float64]
+ ]
+)
+def add() -> torch.nn.Module:
+ """Element-wise addition."""
+
+ class Net(torch.nn.Module):
+ def __init__(self):
+ super().__init__()
+
+ def forward(self, x, y):
+ return torch.add(x, y)
+
+ return Net()
+
+
+@benchmark(
+ [
+ {
+ "name": f"{lhs_fmt}_{rhs_fmt}_{shape}_{dtype.__name__}",
+ "shape": shape,
+ "formats": (lhs_fmt, rhs_fmt),
+ "dtype": dtype,
+ "backends": [b for b in Backends],
+ "drange": (1, 100),
+ "sparsity": [0, 0.5, 0.9, 0.99],
+ }
+ for shape in [([2**i, 2**i], [2**i, 2**i]) for i in range(5, 8)]
+ for lhs_fmt in ["dense", "csr"]
+ for rhs_fmt in ["dense", "csr"]
+ for dtype in [np.float64]
+ ]
+)
+def elt_mul() -> torch.nn.Module:
+ """Element-wise addition."""
+
+ class Net(torch.nn.Module):
+ def __init__(self):
+ super().__init__()
+
+ def forward(self, x, y):
+ return torch.mul(x, y)
+
+ return Net()
+
+
+@benchmark(
+ [
+ {
+ "name": f"{fmt}_{shape}_{dtype.__name__}",
+ "shape": shape,
+ "formats": (fmt,),
+ "dtype": dtype,
+ "backends": [b for b in Backends],
+ "drange": (1, 100),
+ "sparsity": [0, 0.5, 0.9, 0.99],
+ }
+ for shape in [([2**i, 2**i],) for i in range(2, 3)]
+ for fmt in ["dense", "csr"]
+ for dtype in [np.float64]
+ ]
+)
+def nop() -> torch.nn.Module:
+ """Returns matrix unmodified (speed of light)."""
+
+ class Net(torch.nn.Module):
+ def __init__(self):
+ super().__init__()
+
+ def forward(self, x):
+ return x
+
+ return Net()
+
+
+@benchmark(
+ [
+ {
+ "name": f"{sample_fmt}_sample_{shape}_{dtype.__name__}",
+ "shape": shape,
+ "formats": (sample_fmt, "dense", "dense"),
+ "dtype": dtype,
+ "backends": [b for b in Backends],
+ "drange": (1, 100),
+ "sparsity": [0, 0.5, 0.9, 0.99],
+ }
+ for shape in [
+ ([2**i, 2**i], [2**i, 2**i], [2**i, 2**i]) for i in range(5, 8)
+ ]
+ for sample_fmt in ["dense", "csr"]
+ for dtype in [np.float64]
+ ]
+)
+def sddmm() -> torch.nn.Module:
+ """SDDMM: C = S ○ (A X B) Sampled dense-dense matrix-matrix multiplication."""
+
+ class Net(torch.nn.Module):
+ def __init__(self):
+ super().__init__()
+
+ def forward(self, x, y, z):
+ return torch.mul(x, torch.matmul(y, z))
+
+ return Net()
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ prog="pytorch_kernel_benchmarks",
+ description="Run a set of given PyTorch kernel benchmarks",
+ )
+ parser.add_argument("--benchmark-filter", type=str, default="", required=False)
+ arguments = parser.parse_args()
+
+ benchmark_list = [
+ "nop",
+ "add",
+ "matmul",
+ "matvec",
+ "elt_mul",
+ "sddmm",
+ ]
+ if arguments.benchmark_filter:
+ benchmark_list = arguments.benchmark_filter.split(",")
+
+ # Run selected benchmarks.
+ for benchmark_name in benchmark_list:
+ globals()[benchmark_name]()
diff --git a/benchmark/python/benchmarks/lif_benchmark.py b/benchmark/python/benchmarks/lif_benchmark.py
index 5c789ea..bd5f0c1 100644
--- a/benchmark/python/benchmarks/lif_benchmark.py
+++ b/benchmark/python/benchmarks/lif_benchmark.py
@@ -9,7 +9,7 @@
{
"name": f"{fmt}_{shape}_{dtype.__name__}",
"shape": shape,
- "formats": [fmt],
+ "formats": fmt,
"dtype": dtype,
# Simulate batch normalization.
"drange": (-1, 1),
@@ -31,13 +31,13 @@
[[32, 3, 64, 64, 1]],
[[16, 3, 224, 224, 1]],
]
- for fmt in ["dense"]
+ for fmt in [["dense"]]
for dtype in [np.float64]
]
)
def SNN() -> torch.nn.Module:
"""Spiking Neural Network."""
- return Block
+ return Block()
if __name__ == "__main__":
diff --git a/benchmark/python/benchmarks/resnet_benchmark.py b/benchmark/python/benchmarks/resnet_benchmark.py
index e69de29..b455ef0 100644
--- a/benchmark/python/benchmarks/resnet_benchmark.py
+++ b/benchmark/python/benchmarks/resnet_benchmark.py
@@ -0,0 +1,36 @@
+import torch
+import numpy as np
+from mpact.models.resnet import resnet20
+from mpact_benchmark.utils.benchmark_utils import benchmark, Backends
+
+
+@benchmark(
+ [
+ {
+ "name": f"{fmt}_{shape}_{dtype.__name__}",
+ "shape": shape,
+ "formats": fmt,
+ "dtype": dtype,
+ "drange": (1, 100),
+ "sparsity": [0.5, 0.9],
+ # TODO: Torch inductor requires lower precision with larger input size,
+ # such as [8, 3, 32, 32].
+ "precision": 1e-3,
+ "backends": [b for b in Backends],
+ }
+ for shape in [
+ [[1, 3, 16, 16]],
+ ]
+ for fmt in [["dense"]]
+ for dtype in [np.float32]
+ ]
+)
+def resnet() -> torch.nn.Module:
+ """Restnet20 model."""
+ resnet_model = resnet20()
+ resnet_model.train(False)
+ return resnet_model
+
+
+if __name__ == "__main__":
+ resnet()
diff --git a/benchmark/python/utils/benchmark_utils.py b/benchmark/python/utils/benchmark_utils.py
index 275853b..ec29309 100644
--- a/benchmark/python/utils/benchmark_utils.py
+++ b/benchmark/python/utils/benchmark_utils.py
@@ -123,9 +123,18 @@
)
)
case Backends.MPACT_SPARSE:
- output.append(
- torch.from_numpy(mpact_jit(torch_net, *sparse_inputs))
- )
+ sp_out = mpact_jit(torch_net, *sparse_inputs)
+ # Construct sparse csr tensor if the output type is csr.
+ # TODO: return sparse tensor directly instead of a tuple of arrays.
+ if type(sp_out) is tuple:
+ # torch.sparse_csr_tensor could deduce the size incorrectly,
+ # so pass the dense_out's shape explicitly.
+ dense_out = mpact_jit(torch_net, *dense_inputs)
+ output.append(
+ torch.sparse_csr_tensor(*sp_out, size=dense_out.shape)
+ )
+ else:
+ output.append(torch.from_numpy(sp_out))
invoker, f = mpact_jit_compile(torch_net, *sparse_inputs)
compile_time_results.append(
timer(
@@ -167,7 +176,11 @@
# Sanity check.
if output:
- assert all(output[0].to_dense().allclose(out.to_dense()) for out in output)
+ rtol = variables["precision"] if "precision" in variables else 1e-5
+ assert all(
+ torch.allclose(output[0].to_dense(), out.to_dense(), rtol=rtol)
+ for out in output
+ )
def benchmark(*args: Any) -> Callable:
@@ -176,9 +189,9 @@
def decorator(func):
@functools.wraps(func)
def wrapper(test_cases=args[0]):
- net = func()
runtime_results = []
compile_time_results = []
+ torch_net = net = func()
for test_case in test_cases:
label = func.__name__
for sparsity in test_case["sparsity"]:
@@ -190,10 +203,11 @@
test_case["dtype"],
test_case["drange"],
)
+
if "GCN" in label:
torch_net = net(*test_case["shape"][0])
- else:
- torch_net = net()
+ if "precision" in test_case:
+ precision = test_case["precision"]
run_benchmark(
sparse_inputs,
diff --git a/setup.py b/setup.py
index 4208772..929ab41 100644
--- a/setup.py
+++ b/setup.py
@@ -96,7 +96,7 @@
f".",
f"--target",
f"MPACTPythonModules",
- f"MPACTBenchmarkPythonPythonModules",
+ f"MPACTBenchmarkPythonModules",
]
try: