[mpact][benchmark] manual sum of squares benchmark (#65)
* [mpact][benchmark] manual sum of squares benchmark
This introduces a "manual" benchmark where we can put
some benchmarking code but without negatively adding
more load on the regular benchmark suite times.
* use 4K instead of 1K
* lint
* undo edits
diff --git a/benchmark/python/manual/README.md b/benchmark/python/manual/README.md
new file mode 100644
index 0000000..7064502
--- /dev/null
+++ b/benchmark/python/manual/README.md
@@ -0,0 +1,9 @@
+### Benchmarks run by hand
+
+These benchmarks are not run as part of MPACT's regular testing or benchmarking.
+To run an individual test, build the MPACT compiler, cd into this directory,
+and then simply run a benchmark as follows:
+
+```shell
+python <benchmark-name>.py
+```
diff --git a/benchmark/python/manual/sum_of_sq.py b/benchmark/python/manual/sum_of_sq.py
new file mode 100644
index 0000000..717259e
--- /dev/null
+++ b/benchmark/python/manual/sum_of_sq.py
@@ -0,0 +1,59 @@
+import torch
+import numpy as np
+import time
+
+from mpact.mpactbackend import mpact_jit_compile, mpact_jit_run
+from mpact_benchmark.utils.tensor_generator import generate_tensor
+
+
+def runbench_eager(tag, sp, net, x, num_iters=1000):
+ net(x) # warmup
+ checksum = 0
+ start = time.time()
+ for i in range(num_iters):
+ res = net(x).item()
+ checksum = checksum + res
+ end = time.time()
+ time_ms = (end - start) * 1000 / num_iters
+ print("%s : %.2f : %8.4f ms. : checksum=%d" % (tag, sp, time_ms, checksum))
+
+
+def runbench_mpact(tag, sp, net, x, num_iters=1000):
+ invoker, fn = mpact_jit_compile(net, x)
+ mpact_jit_run(invoker, fn, x) # warmup
+ checksum = 0
+ start = time.time()
+ for i in range(num_iters):
+ res = mpact_jit_run(invoker, fn, x)
+ checksum = checksum + res
+ end = time.time()
+ time_ms = (end - start) * 1000 / num_iters
+ print("%s : %.2f : %8.4f ms. : checksum=%d" % (tag, sp, time_ms, checksum))
+
+
+class SqSumNet(torch.nn.Module):
+ def forward(self, x):
+ # TODO: make this work too: return (x ** 2).sum()
+ return (x * x).sum()
+
+
+net = SqSumNet()
+h = 1024 * 4
+w = 1024 * 4
+
+for d in range(0, 101, 10):
+ sparsity = 1.0 - (d / 100.0)
+ x = generate_tensor(
+ seed=0, shape=(h, w), sparsity=sparsity, dtype=np.float32, drange=(1.0, 1.0)
+ )
+
+ # Note, we don't have binary-valued sparse tensors in PyTorch
+ # so we are using csr. For now, we have to hack the
+ # "explicitVal=1.0:f32"
+ # into the MLIR sparse tensor type to make optimize it fully.
+ s = x.to_sparse_csr()
+
+ runbench_eager("PyTorch (dense) ", sparsity, net, x)
+ runbench_mpact("MPACT (dense) ", sparsity, net, x)
+ runbench_eager("PyTorch (sparse)", sparsity, net, s)
+ runbench_mpact("MPACT (sparse)", sparsity, net, s)