initial commit
diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
new file mode 100644
index 0000000..423d705
--- /dev/null
+++ b/.github/workflows/build-and-test.yml
@@ -0,0 +1,66 @@
+name: Build and test
+
+on:
+  pull_request:
+    branches: [ "main" ]
+
+concurrency:
+  # Use PR number as key for a pull request or the commit hash otherwise. This cancels
+  # queued and in-progress runs for the same PR (presubmit) or commit
+  # (postsubmit).
+  group: ci-build-test-cpp-linux-${{ github.event.number || github.sha }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    env:
+      CACHE_DIR: ${{ github.workspace }}/.ccache    
+    steps:
+    - uses: actions/checkout@v4
+      with:
+          submodules: recursive
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+    - name: Setup Python Version
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.11 # Install the python version needed
+
+    - name: Set PYTHONPATH
+      run: export PYTHONPATH=build/tools/mpact/python_packages/mpact
+      shell: bash
+
+    - name: Set up ccache
+      uses: hendrikmuhs/ccache-action@v1.2
+
+    - name: Install requirements
+      run: |
+           export CCACHE_DIR=${{ env.CACHE_DIR }}
+           python -m pip install --upgrade pip
+           python -m pip install -r externals/torch-mlir/requirements.txt
+           python -m pip install -r externals/torch-mlir/torchvision-requirements.txt
+   
+    - name: Create build directory
+      run: mkdir build
+
+    - name: Configure CMake
+      run: >
+        cmake -GNinja -Bbuild
+        -DCMAKE_BUILD_TYPE=Release
+        -DLLVM_ENABLE_PROJECTS=mlir
+        -DLLVM_ENABLE_ASSERTIONS=ON
+        -DLLVM_EXTERNAL_PROJECTS="torch-mlir;mpact"
+        -DLLVM_EXTERNAL_TORCH_MLIR_SOURCE_DIR="${PWD}/externals/torch-mlir"
+        -DLLVM_EXTERNAL_MPACT_SOURCE_DIR="${PWD}"
+        -DLLVM_TARGETS_TO_BUILD=host
+        -DMLIR_ENABLE_BINDINGS_PYTHON=ON
+        -DCMAKE_C_COMPILER_LAUNCHER=ccache
+        -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+        -DCMAKE_C_COMPILER=clang
+        -DCMAKE_CXX_COMPILER=clang++
+        "externals/torch-mlir/externals/llvm-project/llvm"
+
+    - name: Build
+      run: cmake --build build --target check-mpact
+
diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
new file mode 100644
index 0000000..8b1a835
--- /dev/null
+++ b/.github/workflows/format.yml
@@ -0,0 +1,93 @@
+name: "Check code formatting"
+
+permissions:
+  contents: read
+
+on:
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  code_formatter:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Fetch sources
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+
+      - name: Checkout through merge base
+        uses: rmacklin/fetch-through-merge-base@v0
+        with:
+          base_ref: ${{ github.event.pull_request.base.ref }}
+          head_ref: ${{ github.event.pull_request.head.sha }}
+          deepen_length: 500
+
+      - name: Get changed files
+        id: changed-files
+        uses: tj-actions/changed-files@v41
+        with:
+          separator: ","
+          skip_initial_fetch: true
+
+      # We need to pull the script from the main branch, so that we ensure
+      # we get the latest version of this script.
+      - name: Fetch code formatting utils
+        uses: actions/checkout@v4
+        with:
+          repository: llvm/llvm-project
+          ref: refs/heads/main
+          sparse-checkout: |
+            llvm/utils/git/requirements_formatting.txt
+            llvm/utils/git/code-format-helper.py
+          sparse-checkout-cone-mode: false
+          path: code-format-tools
+
+      - name: "Listed files"
+        env:
+          CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
+        run: |
+          echo "Formatting files:"
+          echo "$CHANGED_FILES"
+      - name: Install clang-format
+        uses: aminya/setup-cpp@v1
+        with:
+          clangformat: 18.1.1
+
+      - name: Setup Python env
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+          cache: 'pip'
+          cache-dependency-path: 'code-format-tools/llvm/utils/git/requirements_formatting.txt'
+
+      - name: Install python dependencies
+        run: pip install -r code-format-tools/llvm/utils/git/requirements_formatting.txt
+
+      - name: Run code formatter
+        env:
+          GITHUB_PR_NUMBER: ${{ github.event.pull_request.number }}
+          START_REV: ${{ github.event.pull_request.base.sha }}
+          END_REV: ${{ github.event.pull_request.head.sha }}
+          CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
+        # TODO: Once clang v18 is released, we should be able
+        # to take advantage of the new --diff_from_common_commit option
+        # explicitly in code-format-helper.py and not have to diff starting at
+        # the merge base.
+        # Create an empty comments file so the pr-write job doesn't fail.
+        run: |
+          echo "[]" > comments &&
+          python ./code-format-tools/llvm/utils/git/code-format-helper.py \
+            --write-comment-to-file \
+            --token ${{ secrets.GITHUB_TOKEN }} \
+            --issue-number $GITHUB_PR_NUMBER \
+            --start-rev $(git merge-base $START_REV $END_REV) \
+            --end-rev $END_REV \
+            --changed-files "$CHANGED_FILES"
+      - uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 #v4.3.0
+        if: always()
+        with:
+          name: workflow-args
+          path: |
+            comments
diff --git a/.github/workflows/regression-benchmark.yml b/.github/workflows/regression-benchmark.yml
new file mode 100644
index 0000000..c7a0970
--- /dev/null
+++ b/.github/workflows/regression-benchmark.yml
@@ -0,0 +1,59 @@
+name: Regression benchmark
+
+on:
+  workflow_run:
+    workflows: [Build and test]
+    types: [completed]
+    # branches: [main]
+
+permissions:
+  contents: write
+  deployments: write
+
+jobs:
+  benchmark:
+    name: Performance regression check
+    runs-on: ubuntu-latest
+    env:
+      CACHE_DIR: ${{ github.workspace }}/.ccache    
+    steps:
+    - uses: actions/checkout@v4
+      with:
+          submodules: recursive
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+    - name: Setup Python Version
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.11 # Install the python version needed
+
+    - name: Set PYTHONPATH
+      run: export PYTHONPATH=build/tools/mpact/python_packages/mpact
+      shell: bash
+
+    - name: Build
+      run: cmake --build build --target build-benchmark-mpact
+
+    - name: Install requirements
+      run: |
+           export CCACHE_DIR=${{ env.CACHE_DIR }}
+           python -m pip install --upgrade pip
+           python -m pip install pytest pytest-benchmark
+
+    - name: Run benchmark
+      run: pytest benchmark/python/benchmarks/regression_benchmark.py --benchmark-json output.json
+
+    - name: Store benchmark result
+      uses: benchmark-action/github-action-benchmark@v1
+      with:
+        tool: 'pytest'
+        output-file-path: output.json
+        fail-on-alert: true
+        # GitHub API token to make a commit comment
+        github-token: ${{ secrets.GITHUB_TOKEN }}
+        # Enable alert commit comment
+        comment-on-alert: true
+        # Mention @rhysd in the commit comment
+        alert-comment-cc-users: '@yinying-lisa-li'
+        # Push and deploy GitHub pages branch automatically
+        auto-push: true
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a6d9b9f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+*_venv/
+__pycache__
+/build*/
+
+# lsp files
+.cache/
+compile_commands.json
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..28810d3
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,6 @@
+[submodule "torch-mlir"]
+	path = externals/torch-mlir
+	url = https://github.com/llvm/torch-mlir.git
+[submodule "externals/Enzyme"]
+	path = externals/Enzyme
+	url = https://github.com/EnzymeAD/Enzyme.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..e09570e
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,73 @@
+#-------------------------------------------------------------------------------
+# The MPACT Compiler
+#-------------------------------------------------------------------------------
+
+cmake_minimum_required(VERSION 3.12)
+
+project(mpact VERSION 1.0 LANGUAGES CXX C)
+
+set(CMAKE_C_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
+
+set(MPACT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
+set(MPACT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}")
+message(STATUS "Building the MPACT compiler at ${MPACT_SOURCE_DIR} (into ${MPACT_BINARY_DIR})")
+
+set(MPACT_PYTHON_PACKAGES_DIR "${MPACT_BINARY_DIR}/python_packages")
+
+#-------------------------------------------------------------------------------
+# Configure out-of-tree vs in-tree build
+#-------------------------------------------------------------------------------
+
+if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
+  message(STATUS "MPACT out-of-tree build.")
+  message(FATAL_ERROR "TODO")
+else()
+  message(STATUS "MPACT in-tree build.")
+  # In-tree build with LLVM_EXTERNAL_PROJECTS=mpact
+  option(MLIR_ENABLE_BINDINGS_PYTHON "Enables MLIR Python Bindings" OFF)
+  set(MLIR_MAIN_SRC_DIR ${LLVM_MAIN_SRC_DIR}/../mlir)
+  set(MLIR_INCLUDE_DIR ${LLVM_MAIN_SRC_DIR}/../mlir/include)
+  set(MLIR_GENERATED_INCLUDE_DIR ${LLVM_BINARY_DIR}/tools/mlir/include)
+  set(MLIR_INCLUDE_DIRS "${MLIR_INCLUDE_DIR};${MLIR_GENERATED_INCLUDE_DIR}")
+endif()
+
+include_directories(${LLVM_INCLUDE_DIRS})
+include_directories(${MLIR_INCLUDE_DIRS})
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
+include_directories(${CMAKE_CURRENT_BINARY_DIR}/include)
+
+# Needed to build TorchMLIRExtensions.
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/externals/torch-mlir/include)
+
+function(mpact_target_includes target)
+  set(_dirs
+    $<BUILD_INTERFACE:${MLIR_INCLUDE_DIRS}>
+    $<BUILD_INTERFACE:${MPACT_SOURCE_DIR}/include>
+    $<BUILD_INTERFACE:${MPACT_BINARY_DIR}/include>
+  )
+  target_include_directories(${target} PUBLIC ${_dirs})
+  if(TARGET obj.${target})
+    target_include_directories(obj.${target} PRIVATE ${_dirs})
+  endif()
+endfunction()
+
+list(APPEND CMAKE_MODULE_PATH ${MLIR_MAIN_SRC_DIR}/cmake/modules)
+list(APPEND CMAKE_MODULE_PATH ${LLVM_MAIN_SRC_DIR}/cmake)
+list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/build_tools/cmake)
+
+include(TableGen)
+include(AddLLVM)
+include(AddMLIR)
+include(AddMLIRPython)
+
+include(MLIRDetectPythonEnv)
+mlir_configure_python_dev_packages()
+
+add_subdirectory(include)
+add_subdirectory(lib)
+add_subdirectory(tools)
+
+add_subdirectory(benchmark)
+add_subdirectory(python)
+add_subdirectory(test)
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..261eeb9
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..9c764b7
--- /dev/null
+++ b/README.md
@@ -0,0 +1,118 @@
+# The MPACT Project
+
+## Introduction
+
+The MPACT project's main objective is to dramatically reduce the effort
+required to create highly optimizing HPC and ML compilers for a large
+class of architectures using LLVM and MLIR. We do this by providing
+a declarative language-based mechanism for collecting and expressing
+critical aspects of a target architecture in a way that can be reasoned
+about and leveraged by all passes in both MLIR and LLVM.
+
+## Building the MPACT compiler
+
+To build and run the MPACT compiler from source (for developers),
+please follow the steps below.
+
+### Check out code and sync submodules
+
+Use the following commands to clone the MPACT compiler repository.
+
+```shell
+git clone https://github.com/MPACT-ORG/mpact-compiler.git
+cd mpact-compiler
+git submodule update --init --recursive --progress
+```
+
+To always get updated submodules through `git pull`, set the following flag:
+
+```shell
+git config --global submodule.recurse true
+```
+
+NOTE: All following commands assume you remain in the `mpact-compiler` directory.
+
+### Setup Python virtual environment
+
+The following commands initialize a virtual environment under bash/sh/etc. For other shells, see Note 1, [below](README.md#notes).
+
+```shell
+python3.11 -m venv mpact_venv   # one time set up
+source mpact_venv/bin/activate  # MUST BE REPEATED FOR EVERY SESSION
+```
+
+Next, set the Python paths as follows; for shells not in the bash/sh family, see Note 2, [below](README.md#notes).
+```shell
+export PYTHONPATH=`pwd`/build/tools/mpact/python_packages/mpact
+```
+
+### Install build requirements
+
+Note that currently we rely on `torch-mlir` requirements defined in that
+submodule to ensure all the build requirements are consistent.
+
+```shell
+python -m pip install --upgrade pip
+python -m pip install -r externals/torch-mlir/requirements.txt
+python -m pip install -r externals/torch-mlir/torchvision-requirements.txt
+```
+For shells not in the bash/sh family, see Note 3, [below](README.md#notes).
+
+### Building the MPACT compiler in-tree
+
+The following command generates configuration files to build the MPACT compiler
+project completely *in-tree*, which means that both LLVM as well as torch-mlir
+are built from source.
+
+```shell
+cmake -GNinja -Bbuild \
+  -DCMAKE_BUILD_TYPE=Release \
+  -DPython3_FIND_VIRTUALENV=ONLY \
+  -DLLVM_ENABLE_PROJECTS=mlir \
+  -DLLVM_EXTERNAL_PROJECTS="torch-mlir;mpact" \
+  -DLLVM_EXTERNAL_TORCH_MLIR_SOURCE_DIR="${PWD}/externals/torch-mlir" \
+  -DLLVM_EXTERNAL_MPACT_SOURCE_DIR="${PWD}" \
+  -DLLVM_TARGETS_TO_BUILD=host \
+  -DMLIR_ENABLE_BINDINGS_PYTHON=ON \
+  externals/torch-mlir/externals/llvm-project/llvm
+```
+
+To speed up the build process, you can set up [ccache](https://ccache.dev/download.html) and add the following flags to the command above:
+
+```shell
+-DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+```
+
+Run the following to ensure the MPACT compiler builds and runs correctly.
+
+```shell
+cmake --build build --target check-mpact
+```
+
+And the following to run all benchmarks
+(see [Benchmarks](benchmark/README.md) for more details).
+
+```shell
+cmake --build build --target benchmark-mpact
+```
+
+## Notes
+
+1. Shells other than bash/sh/etc. require a different `activate` script, as shown. Because the python environment has to be set up for every session, we recommend putting it in your .*sh startup file.
+   - For csh/tcsh/etc.:
+     ```shell
+         source `pwd`/mpact_venv/bin/activate.csh
+     ```
+   - For fish/etc.:
+     ```shell
+         source <path_to_mpact_compiler>/mpact_venv/bin/activate.fish
+     ```
+2. Shells other than bash/sh/etc. set their environment variables differently:
+   - For csh/tcsh/etc.:
+   ```shell
+       setenv PYTHONPATH `pwd`/build/tools/mpact/python_packages/mpact
+   ```
+3. If using csh/tcsh/etc., run the following command before trying to build the compiler:
+```shell
+rehash
+```
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
new file mode 100644
index 0000000..f52d4bb
--- /dev/null
+++ b/benchmark/CMakeLists.txt
@@ -0,0 +1,47 @@
+#-------------------------------------------------------------------------------
+# The MPACT Compiler Python Benchmarks
+#-------------------------------------------------------------------------------
+
+declare_mlir_python_sources(MPACTBenchmarkPythonSources)
+
+declare_mlir_python_sources(MPACTBenchmarkPythonSources.BenchmarkSuite
+  ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/python"
+  ADD_TO_PARENT MPACTBenchmarkPythonSources
+  SOURCES_GLOB
+    benchmarks/*.py
+)
+
+declare_mlir_python_sources(MPACTBenchmarkPythonSources.BenchmarkUtils
+  ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/python"
+  ADD_TO_PARENT MPACTBenchmarkPythonSources
+  SOURCES_GLOB
+    utils/*.py
+)
+
+#-------------------------------------------------------------------------------
+# Python Modules
+#-------------------------------------------------------------------------------
+
+add_mlir_python_modules(MPACTBenchmarkPythonModules
+  ROOT_PREFIX "${MPACT_PYTHON_PACKAGES_DIR}/mpact/mpact_benchmark"
+  INSTALL_PREFIX "python_packages/mpact/mpact_benchmark"
+  DECLARED_SOURCES MPACTBenchmarkPythonSources
+)
+
+add_custom_target(build-benchmark-mpact)
+add_dependencies(build-benchmark-mpact MPACTPythonModules MPACTBenchmarkPythonModules)
+
+add_custom_target(benchmark-mpact)
+add_dependencies(benchmark-mpact build-benchmark-mpact)
+file(GLOB PYTHON_FILES "${CMAKE_CURRENT_SOURCE_DIR}/python/benchmarks/*.py")
+
+# Loop over each matched .py file and create a custom command to run it.
+foreach(PY_FILE IN LISTS PYTHON_FILES)
+    add_custom_command(
+        TARGET benchmark-mpact
+        COMMAND cmake -E echo "Running ${PY_FILE}"
+        COMMAND python ${PY_FILE}
+        DEPENDS ${PY_FILE}
+        USES_TERMINAL
+    )
+endforeach()
diff --git a/benchmark/README.md b/benchmark/README.md
new file mode 100644
index 0000000..4749547
--- /dev/null
+++ b/benchmark/README.md
@@ -0,0 +1,26 @@
+### Run benchmarks
+
+To run all benchmarks:
+
+```shell
+cmake --build build --target benchmark-mpact
+```
+
+To run selected benchmarks, build the benchmark modules first:
+
+```shell
+cmake --build build --target build-benchmark-mpact
+```
+
+And then run the benchmark file:
+
+```shell
+python path/to/the/_benchmark.py
+```
+
+If you would like to run selected kernels in kernels_benchmark.py,
+you can use `--benchmark-filter` flag like the following example:
+
+```shell
+python path/to/the/kernels_benchmark.py --benchmark-filter=add
+```
diff --git a/benchmark/python/benchmarks/gcn_benchmark.py b/benchmark/python/benchmarks/gcn_benchmark.py
new file mode 100755
index 0000000..ef7a3bd
--- /dev/null
+++ b/benchmark/python/benchmarks/gcn_benchmark.py
@@ -0,0 +1,33 @@
+import torch
+import numpy as np
+from mpact.models.gcn import GraphConv
+from mpact_benchmark.utils.benchmark_utils import benchmark, Backends
+
+
+@benchmark(
+    [
+        {
+            "name": f"{fmt}_{shape}_{dtype.__name__}",
+            "shape": shape,
+            "formats": fmt,
+            "dtype": dtype,
+            "drange": (1, 100),
+            "sparsity": [0, 0.5, 0.9, 0.99],
+            "backends": [b for b in Backends],
+        }
+        for shape in [
+            [[128, 128], [128, 128]],
+            [[512, 512], [512, 512]],
+            [[1024, 1024], [1024, 1024]],
+        ]
+        for fmt in [["dense", "csr"]]
+        for dtype in [np.float32]
+    ]
+)
+def GCN() -> torch.nn.Module:
+    """Graph Convolution Network."""
+    return GraphConv
+
+
+if __name__ == "__main__":
+    GCN()
diff --git a/benchmark/python/benchmarks/kernels_benchmark.py b/benchmark/python/benchmarks/kernels_benchmark.py
new file mode 100644
index 0000000..4dee945
--- /dev/null
+++ b/benchmark/python/benchmarks/kernels_benchmark.py
@@ -0,0 +1,225 @@
+import torch
+import argparse
+import numpy as np
+from mpact.models.kernels import *
+from mpact_benchmark.utils.benchmark_utils import benchmark, Backends
+
+
+@benchmark(
+    [
+        {
+            "name": f"{lhs_fmt}_{rhs_fmt}_{shape}_{dtype.__name__}",
+            "shape": shape,
+            "formats": (lhs_fmt, rhs_fmt),
+            "dtype": dtype,
+            "backends": [b for b in Backends],
+            "drange": (1, 100),
+            "sparsity": [0, 0.5, 0.9, 0.99],
+        }
+        for shape in [([2**i, 2**i], [2**i, 2**i]) for i in range(5, 8)]
+        for lhs_fmt in ["dense", "csr"]
+        for rhs_fmt in ["dense", "csr"]
+        for dtype in [np.float64]
+    ]
+)
+def matmul() -> torch.nn.Module:
+    """Matrix multiplication."""
+    return MMNet()
+
+
+@benchmark(
+    [
+        {
+            "name": f"{lhs_fmt}_{rhs_fmt}_{shape}_{dtype.__name__}",
+            "shape": shape,
+            "formats": (lhs_fmt, rhs_fmt),
+            "dtype": dtype,
+            "backends": [b for b in Backends],
+            "drange": (1, 100),
+            "sparsity": [0, 0.5, 0.9, 0.99],
+        }
+        for shape in [([2**i, 2**i], [2**i]) for i in range(5, 8)]
+        for lhs_fmt in ["dense", "csr"]
+        for rhs_fmt in ["dense"]  # torch.mv only supports dense vector for now.
+        for dtype in [np.float64]
+    ]
+)
+def matvec() -> torch.nn.Module:
+    """Matrix-vector multiplication."""
+    return MVNet()
+
+
+@benchmark(
+    [
+        {
+            "name": f"{lhs_fmt}_{rhs_fmt}_{shape}_{dtype.__name__}",
+            "shape": shape,
+            "formats": (lhs_fmt, rhs_fmt),
+            "dtype": dtype,
+            "backends": [b for b in Backends],
+            "drange": (1, 100),
+            "sparsity": [0, 0.5, 0.9, 0.99],
+        }
+        for shape in [
+            ([2**i, 2**i], [2**i, 2**i]) for i in range(5, 8)
+        ]  # 512x512 crashes runtime.
+        for lhs_fmt in ["dense", "csr"]
+        for rhs_fmt in ["dense", "csr"]
+        for dtype in [np.float64]
+    ]
+)
+def add() -> torch.nn.Module:
+    """Element-wise addition."""
+    return AddNet()
+
+
+@benchmark(
+    [
+        {
+            "name": f"{lhs_fmt}_{rhs_fmt}_{shape}_{dtype.__name__}",
+            "shape": shape,
+            "formats": (lhs_fmt, rhs_fmt),
+            "dtype": dtype,
+            "backends": [b for b in Backends],
+            "drange": (1, 100),
+            "sparsity": [0, 0.5, 0.9, 0.99],
+        }
+        for shape in [([2**i, 2**i], [2**i, 2**i]) for i in range(5, 8)]
+        for lhs_fmt in ["dense", "csr"]
+        for rhs_fmt in ["dense", "csr"]
+        for dtype in [np.float64]
+    ]
+)
+def elt_mul() -> torch.nn.Module:
+    """Element-wise addition."""
+    return MulNet()
+
+
+@benchmark(
+    [
+        {
+            "name": f"{fmt}_{shape}_{dtype.__name__}",
+            "shape": shape,
+            "formats": (fmt,),
+            "dtype": dtype,
+            "backends": [b for b in Backends],
+            "drange": (1, 100),
+            "sparsity": [0, 0.5, 0.9, 0.99],
+        }
+        for shape in [([2**i, 2**i],) for i in range(2, 3)]
+        for fmt in ["dense", "csr"]
+        for dtype in [np.float64]
+    ]
+)
+def nop() -> torch.nn.Module:
+    """Returns matrix unmodified (speed of light)."""
+    return SelfNet()
+
+
+@benchmark(
+    [
+        {
+            "name": f"{sample_fmt}_sample_{shape}_{dtype.__name__}",
+            "shape": shape,
+            "formats": (sample_fmt, "dense", "dense"),
+            "dtype": dtype,
+            "backends": [b for b in Backends],
+            "drange": (1, 100),
+            "sparsity": [0, 0.5, 0.9, 0.99],
+        }
+        for shape in [
+            ([2**i, 2**i], [2**i, 2**i], [2**i, 2**i]) for i in range(5, 8)
+        ]
+        for sample_fmt in ["dense", "csr"]
+        for dtype in [np.float64]
+    ]
+)
+def sddmm() -> torch.nn.Module:
+    """SDDMM: C = S â—‹ (A X B) Sampled dense-dense matrix-matrix multiplication."""
+    return SDDMMNet()
+
+
+@benchmark(
+    [
+        {
+            "name": f"{fmt}_{shape}_{dtype.__name__}",
+            "shape": shape,
+            "formats": (fmt,),
+            "dtype": dtype,
+            # TODO: add mpact and torch inductor once they work.
+            "backends": [
+                b
+                for b in Backends
+                if b.value
+                in (
+                    Backends.TORCH_SPARSE_EAGER.value,
+                    Backends.TORCH_DENSE_EAGER.value,
+                )
+            ],
+            "drange": (1, 100),
+            "sparsity": [0, 0.5, 0.9, 0.99],
+        }
+        for shape in [([2**i, 2**i],) for i in range(5, 8)]
+        for fmt in ["dense"]
+        for dtype in [np.float64]
+    ]
+)
+def feature_scale() -> torch.nn.Module:
+    """Scales feature matrix in GNN."""
+    return FeatureScale()
+
+
+@benchmark(
+    [
+        {
+            "name": f"{fmt}_{shape}_{dtype.__name__}",
+            "shape": shape,
+            "formats": (fmt,),
+            "dtype": dtype,
+            # TODO: add mpact and torch inductor once they work.
+            "backends": [
+                b
+                for b in Backends
+                if b.value
+                in (
+                    Backends.TORCH_SPARSE_EAGER.value,
+                    Backends.TORCH_DENSE_EAGER.value,
+                )
+            ],
+            "drange": (1, 100),
+            "sparsity": [0, 0.5, 0.9, 0.99],
+        }
+        for shape in [([2**i, 2**i],) for i in range(5, 8)]
+        for fmt in ["dense"]
+        for dtype in [np.float64]
+    ]
+)
+def normalization() -> torch.nn.Module:
+    """Normalizes adjacency matrix in GNN."""
+    return Normalization()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        prog="pytorch_kernel_benchmarks",
+        description="Run a set of given PyTorch kernel benchmarks",
+    )
+    parser.add_argument("--benchmark-filter", type=str, default="", required=False)
+    arguments = parser.parse_args()
+
+    benchmark_list = [
+        "nop",
+        "add",
+        "matmul",
+        "matvec",
+        "elt_mul",
+        "sddmm",
+        "feature_scale",
+        "normalization",
+    ]
+    if arguments.benchmark_filter:
+        benchmark_list = arguments.benchmark_filter.split(",")
+
+    # Run selected benchmarks.
+    for benchmark_name in benchmark_list:
+        globals()[benchmark_name]()
diff --git a/benchmark/python/benchmarks/lif_benchmark.py b/benchmark/python/benchmarks/lif_benchmark.py
new file mode 100644
index 0000000..3fe9784
--- /dev/null
+++ b/benchmark/python/benchmarks/lif_benchmark.py
@@ -0,0 +1,44 @@
+import torch
+import numpy as np
+from mpact.models.lif import LIFSumOfSq
+from mpact_benchmark.utils.benchmark_utils import benchmark, Backends
+
+
+@benchmark(
+    [
+        {
+            "name": f"{fmt}_{shape}_{dtype.__name__}",
+            "shape": shape,
+            "formats": fmt,
+            "dtype": dtype,
+            # Simulate batch normalization.
+            "drange": (-1, 1),
+            "sparsity": [0, 0.5, 0.9, 0.99],
+            # to_dense() in LIF prop hack is not supported in torch inductor.
+            # TODO: add torch inductor once prop hack is no longer needed.
+            "backends": [
+                b
+                for b in Backends
+                if b.value
+                not in (
+                    Backends.TORCH_SPARSE_INDUCTOR.value,
+                    Backends.TORCH_DENSE_INDUCTOR.value,
+                )
+            ],
+        }
+        for shape in [
+            [[64, 3, 32, 32, 1]],
+            [[32, 3, 64, 64, 1]],
+            [[16, 3, 224, 224, 1]],
+        ]
+        for fmt in [["dense"]]
+        for dtype in [np.float64]
+    ]
+)
+def LifSumOfSq() -> torch.nn.Module:
+    """LIF feeding into sum of squares."""
+    return LIFSumOfSq()
+
+
+if __name__ == "__main__":
+    LifSumOfSq()
diff --git a/benchmark/python/benchmarks/regression_benchmark.py b/benchmark/python/benchmarks/regression_benchmark.py
new file mode 100644
index 0000000..6f5196e
--- /dev/null
+++ b/benchmark/python/benchmarks/regression_benchmark.py
@@ -0,0 +1,51 @@
+import pytest
+from mpact.models.kernels import *
+from mpact_benchmark.utils.tensor_generator import generate_tensor
+
+SHAPE = (1024, 1024)
+SPARSITY = 0.8
+
+dense_tensor1 = generate_tensor(0, SHAPE, SPARSITY)
+dense_tensor2 = generate_tensor(1, SHAPE, SPARSITY)
+dense_tensor3 = generate_tensor(2, SHAPE, SPARSITY)
+dense_vector = generate_tensor(1, (SHAPE[0],), SPARSITY)
+
+sparse_tensor1 = dense_tensor1.to_sparse_csr()
+sparse_tensor2 = dense_tensor2.to_sparse_csr()
+sparse_tensor3 = dense_tensor3.to_sparse_csr()
+
+def test_mv_dense(benchmark):
+    benchmark(MVNet(), dense_tensor1, dense_vector)
+
+def test_mm_dense(benchmark):
+    benchmark(MMNet(), dense_tensor1, dense_tensor2)
+
+def test_add_dense(benchmark):
+    benchmark(AddNet(), dense_tensor1, dense_tensor2)
+
+def test_mul_dense(benchmark):
+    benchmark(MulNet(), dense_tensor1, dense_tensor2)
+
+def test_nop_dense(benchmark):
+    benchmark(SelfNet(), dense_tensor1)
+
+def test_sddmm_dense(benchmark):
+    benchmark(SDDMMNet(), dense_tensor1, dense_tensor2, dense_tensor3)
+
+def test_mv_sparse(benchmark):
+    benchmark(MVNet(), sparse_tensor1, dense_vector)
+
+def test_mm_sparse(benchmark):
+    benchmark(MMNet(), sparse_tensor1, sparse_tensor2)
+
+def test_add_sparse(benchmark):
+    benchmark(AddNet(), sparse_tensor1, sparse_tensor2)
+
+def test_mul_sparse(benchmark):
+    benchmark(MulNet(), sparse_tensor1, sparse_tensor2)
+
+def test_nop_sparse(benchmark):
+    benchmark(SelfNet(), sparse_tensor1)
+
+def test_sddmm_sparse(benchmark):
+    benchmark(SDDMMNet(), sparse_tensor1, dense_tensor2, dense_tensor3)
\ No newline at end of file
diff --git a/benchmark/python/benchmarks/resnet_benchmark.py b/benchmark/python/benchmarks/resnet_benchmark.py
new file mode 100644
index 0000000..89b317e
--- /dev/null
+++ b/benchmark/python/benchmarks/resnet_benchmark.py
@@ -0,0 +1,36 @@
+import torch
+import numpy as np
+from mpact.models.resnet import resnet_20
+from mpact_benchmark.utils.benchmark_utils import benchmark, Backends
+
+
+@benchmark(
+    [
+        {
+            "name": f"{fmt}_{shape}_{dtype.__name__}",
+            "shape": shape,
+            "formats": fmt,
+            "dtype": dtype,
+            "drange": (1, 100),
+            "sparsity": [0.5, 0.9],
+            # TODO: Torch inductor requires lower precision with larger input size,
+            # such as [8, 3, 32, 32].
+            "precision": 1e-3,
+            "backends": [b for b in Backends],
+        }
+        for shape in [
+            [[1, 3, 16, 16]],
+        ]
+        for fmt in [["dense"]]
+        for dtype in [np.float32]
+    ]
+)
+def resnet() -> torch.nn.Module:
+    """Restnet20 model."""
+    resnet_model = resnet_20()
+    resnet_model.train(False)
+    return resnet_model
+
+
+if __name__ == "__main__":
+    resnet()
diff --git a/benchmark/python/utils/benchmark_utils.py b/benchmark/python/utils/benchmark_utils.py
new file mode 100644
index 0000000..ec29309
--- /dev/null
+++ b/benchmark/python/utils/benchmark_utils.py
@@ -0,0 +1,231 @@
+import functools
+import torch
+from enum import Enum
+from typing import Any, Callable
+from torch.utils import benchmark as torch_benchmark
+from mpact.mpactbackend import mpact_jit, mpact_jit_compile, mpact_jit_run
+from mpact_benchmark.utils.tensor_generator import generate_inputs
+
+
+class Backends(Enum):
+    TORCH_SPARSE_EAGER = 1
+    TORCH_DENSE_EAGER = 2
+    TORCH_SPARSE_INDUCTOR = 3
+    TORCH_DENSE_INDUCTOR = 4
+    MPACT_SPARSE = 5
+    MPACT_DENSE = 6
+
+
+def timer(stmt: str, description: str, setup: str = "", **kwargs: Any) -> Any:
+    """Timer for benchmark."""
+    return torch_benchmark.Timer(
+        stmt=stmt,
+        globals=kwargs["variables"],
+        setup=setup,
+        num_threads=1,
+        label=kwargs["variables"]["label"],
+        sub_label=kwargs["variables"]["sub_label"],
+        description=description,
+    ).adaptive_autorange()
+
+
+def get_dynamo_compile_time(sub_label: str, label: str, description: str) -> Any:
+    """Get compile time from dynamo and create a benchmark measurement object."""
+    try:
+        compile_time = torch_benchmark.Measurement(
+            1,
+            [
+                float(
+                    torch._dynamo.utils.compile_times(repr="csv")[1][0]
+                    .split(",")[-1]
+                    .strip()
+                )
+            ],
+            torch_benchmark.TaskSpec(
+                sub_label,
+                None,
+                description=description,
+                label=label,
+            ),
+        )
+        return compile_time
+    except ValueError:
+        print(f"No compilation happened for {description}: {sub_label}.")
+        return None
+
+
+def run_benchmark(
+    sparse_inputs: tuple[torch.Tensor, ...],
+    dense_inputs: tuple[torch.Tensor, ...],
+    torch_net: torch.nn.Module,
+    variables: dict[str, Any],
+    backends: tuple[Backends, ...],
+    runtime_results: list[torch_benchmark.Measurement],
+    compile_time_results: list[torch_benchmark.Measurement],
+):
+    """Run benchmark with specified backends."""
+    output = []
+
+    with torch.no_grad():
+        for backend in backends:
+            match backend:
+                case Backends.TORCH_SPARSE_EAGER:
+                    output.append(torch_net(*sparse_inputs))
+                    runtime_results.append(
+                        timer(
+                            "torch_net(*sparse_inputs)",
+                            "torch-sparse-eager",
+                            variables=variables,
+                        )
+                    )
+                case Backends.TORCH_DENSE_EAGER:
+                    output.append(torch_net(*dense_inputs))
+                    runtime_results.append(
+                        timer(
+                            "torch_net(*dense_inputs)",
+                            "torch-dense-eager",
+                            variables=variables,
+                        )
+                    )
+                case Backends.TORCH_SPARSE_INDUCTOR:
+                    torch_inductor = torch.compile(torch_net)
+                    torch_out = torch_inductor(*sparse_inputs)
+                    output.append(torch_out)
+                    compile_time = get_dynamo_compile_time(
+                        variables["sub_label"],
+                        variables["label"],
+                        "torch-sparse-inductor-compile",
+                    )
+                    if compile_time:
+                        compile_time_results.append(compile_time)
+                    runtime_results.append(
+                        timer(
+                            "torch_inductor(*sparse_inputs)",
+                            "torch-sparse-inductor-runtime",
+                            variables=dict(variables, **locals()),
+                        )
+                    )
+                case Backends.TORCH_DENSE_INDUCTOR:
+                    torch_inductor = torch.compile(torch_net)
+                    output.append(torch_inductor(*dense_inputs))
+                    compile_time = get_dynamo_compile_time(
+                        variables["sub_label"],
+                        variables["label"],
+                        "torch-dense-inductor-compile",
+                    )
+                    if compile_time:
+                        compile_time_results.append(compile_time)
+                    runtime_results.append(
+                        timer(
+                            "torch_inductor(*dense_inputs)",
+                            "torch-dense-inductor-runtime",
+                            variables=dict(variables, **locals()),
+                        )
+                    )
+                case Backends.MPACT_SPARSE:
+                    sp_out = mpact_jit(torch_net, *sparse_inputs)
+                    # Construct sparse csr tensor if the output type is csr.
+                    # TODO: return sparse tensor directly instead of a tuple of arrays.
+                    if type(sp_out) is tuple:
+                        # torch.sparse_csr_tensor could deduce the size incorrectly,
+                        # so pass the dense_out's shape explicitly.
+                        dense_out = mpact_jit(torch_net, *dense_inputs)
+                        output.append(
+                            torch.sparse_csr_tensor(*sp_out, size=dense_out.shape)
+                        )
+                    else:
+                        output.append(torch.from_numpy(sp_out))
+                    invoker, f = mpact_jit_compile(torch_net, *sparse_inputs)
+                    compile_time_results.append(
+                        timer(
+                            "mpact_jit_compile(torch_net, *sparse_inputs)",
+                            "mpact-sparse-compile",
+                            "from mpact.mpactbackend import mpact_jit_compile",
+                            variables=dict(variables, **locals()),
+                        )
+                    )
+                    runtime_results.append(
+                        timer(
+                            "mpact_jit_run(invoker, f, *sparse_inputs)",
+                            "mpact-sparse-runtime",
+                            "from mpact.mpactbackend import mpact_jit_run",
+                            variables=dict(variables, **locals()),
+                        )
+                    )
+                case Backends.MPACT_DENSE:
+                    output.append(torch.from_numpy(mpact_jit(torch_net, *dense_inputs)))
+                    invoker, f = mpact_jit_compile(torch_net, *dense_inputs)
+                    compile_time_results.append(
+                        timer(
+                            "mpact_jit_compile(torch_net, *dense_inputs)",
+                            "mpact-dense-compile",
+                            "from mpact.mpactbackend import mpact_jit_compile",
+                            variables=dict(variables, **locals()),
+                        )
+                    )
+                    runtime_results.append(
+                        timer(
+                            "mpact_jit_run(invoker, f, *dense_inputs)",
+                            "mpact-dense-runtime",
+                            "from mpact.mpactbackend import mpact_jit_run",
+                            variables=dict(variables, **locals()),
+                        )
+                    )
+                case _:
+                    print(f"{backend} is not supported yet.")
+
+    # Sanity check.
+    if output:
+        rtol = variables["precision"] if "precision" in variables else 1e-5
+        assert all(
+            torch.allclose(output[0].to_dense(), out.to_dense(), rtol=rtol)
+            for out in output
+        )
+
+
+def benchmark(*args: Any) -> Callable:
+    """Wrapper for benchmark."""
+
+    def decorator(func):
+        @functools.wraps(func)
+        def wrapper(test_cases=args[0]):
+            runtime_results = []
+            compile_time_results = []
+            torch_net = net = func()
+            for test_case in test_cases:
+                label = func.__name__
+                for sparsity in test_case["sparsity"]:
+                    sub_label = f"{test_case['name']}_{sparsity}"
+                    dense_inputs, sparse_inputs = generate_inputs(
+                        test_case["shape"],
+                        sparsity,
+                        test_case["formats"],
+                        test_case["dtype"],
+                        test_case["drange"],
+                    )
+
+                    if "GCN" in label:
+                        torch_net = net(*test_case["shape"][0])
+                    if "precision" in test_case:
+                        precision = test_case["precision"]
+
+                    run_benchmark(
+                        sparse_inputs,
+                        dense_inputs,
+                        torch_net,
+                        locals(),
+                        test_case["backends"],
+                        runtime_results,
+                        compile_time_results,
+                    )
+
+            compare1 = torch_benchmark.Compare(runtime_results)
+            compare1.print()
+            compare2 = torch_benchmark.Compare(compile_time_results)
+            compare2.print()
+
+            return func
+
+        return wrapper
+
+    return decorator
diff --git a/benchmark/python/utils/tensor_generator.py b/benchmark/python/utils/tensor_generator.py
new file mode 100644
index 0000000..5f9d304
--- /dev/null
+++ b/benchmark/python/utils/tensor_generator.py
@@ -0,0 +1,74 @@
+import torch
+import math
+import numpy as np
+from typing import Any
+
+
+def generate_inputs(
+    shapes: tuple[Any, ...],
+    sparsity: float,
+    formats: tuple[str, ...],
+    dtype: Any = np.float64,
+    drange: tuple[Any, ...] = (1, 100),
+) -> tuple[tuple[torch.Tensor, ...], tuple[torch.Tensor, ...]]:
+    """Generates dense and sparse tensor inputs.
+
+    Args:
+      shapes: Shape for each input.
+      sparsity: Sparsity level for the inputs.
+      formats: Sparsity format for each input.
+      dtype: Data type of the generated inputs. Default is np.float64.
+      drange: Data range of the non-zero values. Default is (1, 100).
+
+    Returns:
+      dense_inputs: all dense tensors.
+      sparse_inputs: inputs are of the specified sparsity format, such as CSR.
+    """
+    dense_inputs = []
+    sparse_inputs = []
+    # Each input has a different seed.
+    for seed, shape in enumerate(shapes):
+        dense_inputs.append(generate_tensor(seed, shape, sparsity, dtype, drange))
+    for idx, dense_input in enumerate(dense_inputs):
+        if formats[idx] == "dense":
+            sparse_inputs.append(dense_input)
+        else:
+            # TODO: support more sparsity formats.
+            sparse_inputs.append(dense_input.to_sparse_csr())
+    return dense_inputs, sparse_inputs
+
+
+def generate_tensor(
+    seed: int,
+    shape: tuple[Any, ...],
+    sparsity: float,
+    dtype: Any = np.float64,
+    drange: tuple[Any, ...] = (1, 100),
+) -> torch.Tensor:
+    """Generates a tensor given sparsity level, shape and data type.
+
+    Args:
+        seed: Seed value for np.random.
+        shape: A tuple for the shape of tensor.
+        sparsity: Sparsity level in the range of [0, 1].
+        dtype: Data type of the generated tensor. Default is np.float64.
+        drange: Data range of the non-zero values. Default is (1, 100).
+
+    Returns:
+        A dense torch tensor with the specified shape, sparsity level and type.
+
+    Note: the tensor generated doesn't guarantee each batch will have the same
+    number of specified elements. Therefore, for batched CSR, torch.cat can be
+    used to concatenate generated tensors in the specified dimension.
+    """
+    np.random.seed(seed)
+    size = math.prod(shape)
+    nse = size - int(math.ceil(sparsity * size))
+
+    flat_output = np.zeros(size)
+    indices = np.random.choice(size, nse, replace=False)
+    values = np.random.uniform(drange[0], drange[1], nse)
+    flat_output[indices] = values
+
+    result = np.reshape(flat_output, shape).astype(dtype)
+    return torch.from_numpy(result)
diff --git a/externals/Enzyme b/externals/Enzyme
new file mode 160000
index 0000000..cf89592
--- /dev/null
+++ b/externals/Enzyme
@@ -0,0 +1 @@
+Subproject commit cf89592eb10c2e94352954a127fc8697aef40953
diff --git a/externals/torch-mlir b/externals/torch-mlir
new file mode 160000
index 0000000..c7d52f6
--- /dev/null
+++ b/externals/torch-mlir
@@ -0,0 +1 @@
+Subproject commit c7d52f63b482b2c30f4efb435ce0cc2efeab25d9
diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt
new file mode 100644
index 0000000..711b39d
--- /dev/null
+++ b/include/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(mpact)
diff --git a/include/mpact-c/Registration.h b/include/mpact-c/Registration.h
new file mode 100644
index 0000000..877b06c
--- /dev/null
+++ b/include/mpact-c/Registration.h
@@ -0,0 +1,27 @@
+/*===-- mpact-c/Registration.h - Registration functions  -----*- C -*-===*\
+|*                                                                            *|
+|* Part of the MPACT Project, under the Apache License v2.0 with LLVM         *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef MPACT_C_REGISTRATION_H
+#define MPACT_C_REGISTRATION_H
+
+#include "mlir-c/IR.h"
+#include "mlir-c/Support.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Registers all passes for symbolic access with the global registry. */
+MLIR_CAPI_EXPORTED void mpactRegisterAllPasses(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // MPACT_C_REGISTRATION_H
diff --git a/include/mpact/CMakeLists.txt b/include/mpact/CMakeLists.txt
new file mode 100644
index 0000000..e31af32
--- /dev/null
+++ b/include/mpact/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(Transforms)
diff --git a/include/mpact/Transforms/CMakeLists.txt b/include/mpact/Transforms/CMakeLists.txt
new file mode 100644
index 0000000..17587b1
--- /dev/null
+++ b/include/mpact/Transforms/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_TARGET_DEFINITIONS Passes.td)
+mlir_tablegen(Passes.h.inc -gen-pass-decls)
+add_public_tablegen_target(MPACTTransformsPassIncGen)
+
+add_mlir_doc(Passes MPACTTransformsPass ./ -gen-pass-doc)
diff --git a/include/mpact/Transforms/Passes.h b/include/mpact/Transforms/Passes.h
new file mode 100644
index 0000000..389184b
--- /dev/null
+++ b/include/mpact/Transforms/Passes.h
@@ -0,0 +1,22 @@
+//===------------------------------------------------------------*- C++ -*-===//
+//
+// Part of the MPACT Project, under the Apache License v2.0 with LLVM
+// Exceptions. See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// Also available under a BSD-style license. See LICENSE.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MPACT_TRANSFORMS_PASSES_H
+#define MPACT_TRANSFORMS_PASSES_H
+
+namespace mlir {
+namespace mpact {
+
+/// Registers all mpact transform passes.
+void registerTransformPasses();
+
+} // namespace mpact
+} // namespace mlir
+
+#endif // MPACT_TRANSFORMS_PASSES_H
diff --git a/include/mpact/Transforms/Passes.td b/include/mpact/Transforms/Passes.td
new file mode 100644
index 0000000..c83f5d4
--- /dev/null
+++ b/include/mpact/Transforms/Passes.td
@@ -0,0 +1,51 @@
+//===-- Passes.td - Transforms pass definition file --------*- tablegen -*-===//
+//
+// Part of the MPACT Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains definitions for passes within the Transforms/ directory.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MPACT_TRANSFORMS_PASSES
+#define MPACT_TRANSFORMS_PASSES
+
+include "mlir/Pass/PassBase.td"
+
+def SparseEncodingPropagation : Pass<"sparse-encoding-propagation", "func::FuncOp"> {
+  let summary = "Propagate sparse tensor encodings";
+  let description = [{
+    A pass that propagates sparse tensor encodings.
+
+    Background: To avoid introducing repetitive operations, sparse tensors
+    in MLIR try to reuse tensor operations whenever available. However, most
+    tensor operations are canonicalized/transformed without the knowledge
+    of sparsity. The pass tries to propagate missing sparse encodings.
+
+    For example:
+    ```mlir
+    %s = tensor.extract_slice %input[0, 0,] [2, 1] [1, 1]
+       : tensor<2x3xf32, #sparse> to tensor<2x1xf32, #sparse>
+
+    // After rank reducing (by tensor dialect transformation)
+    %t = tensor.extract_slice %input[0, 0,] [2, 1] [1, 1]
+       : tensor<2x3xf32, #sparse> to tensor<2xf32>
+    %s = tensor.expand_shape [[0, 1]] %t
+       : tensor<2xf32> to tensor<2x1xf32, #sparse>
+
+    // After sparsity propagation
+    %t = tensor.extract_slice %input[0, 0,] [2, 1] [1, 1]
+       : tensor<2x3xf32, #sparse> to tensor<2xf32, #sparse1>
+    %s = tensor.expand_shape [[0, 1]] %t
+       : tensor<2xf32, #sparse1> to tensor<2x1xf32, #sparse>
+    ```
+  }];
+
+  let constructor = "mlir::mpact::createSparseEncodingPropagationPass()";
+  let dependentDialects = [];
+}
+
+#endif // MPACT_TRANSFORMS_PASSES
diff --git a/include/mpact/Transforms/Sparsity/SparseEncodingPropagate.h b/include/mpact/Transforms/Sparsity/SparseEncodingPropagate.h
new file mode 100644
index 0000000..a829400
--- /dev/null
+++ b/include/mpact/Transforms/Sparsity/SparseEncodingPropagate.h
@@ -0,0 +1,24 @@
+//===------------------------------------------------------------*- C++ -*-===//
+//
+// Part of the MPACT Project, under the Apache License v2.0 with LLVM
+// Exceptions. See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// Also available under a BSD-style license. See LICENSE.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MPACT_TRANSFORMS_SPARSITY_SPARSEENCODINGPROPAGATE_H
+#define MPACT_TRANSFORMS_SPARSITY_SPARSEENCODINGPROPAGATE_H
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/Pass/Pass.h"
+
+namespace mlir {
+namespace mpact {
+std::unique_ptr<OperationPass<func::FuncOp>>
+createSparseEncodingPropagationPass();
+}
+} // namespace mlir
+
+#endif // MPACT_TRANSFORMS_SPARSITY_SPARSEENCODINGPROPAGATE_H
diff --git a/lib/CAPI/CMakeLists.txt b/lib/CAPI/CMakeLists.txt
new file mode 100644
index 0000000..f45c469
--- /dev/null
+++ b/lib/CAPI/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_mlir_public_c_api_library(MPACTCAPI
+  Registration.cpp
+
+  ENABLE_AGGREGATION
+
+  LINK_LIBS PUBLIC
+  MLIRIR
+  MLIRSupport
+  MPACTTransformPasses
+)
+
+mpact_target_includes(MPACTCAPI)
diff --git a/lib/CAPI/Registration.cpp b/lib/CAPI/Registration.cpp
new file mode 100644
index 0000000..51234fb
--- /dev/null
+++ b/lib/CAPI/Registration.cpp
@@ -0,0 +1,20 @@
+//===- Registration.cpp - C Interface for MLIR Registration ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// Also available under a BSD-style license. See LICENSE.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mpact-c/Registration.h"
+
+#include "mlir/CAPI/IR.h"
+#include "mlir/Conversion/Passes.h"
+#include "mlir/Dialect/Linalg/Passes.h"
+#include "mlir/Transforms/Passes.h"
+#include "mpact/Transforms/Passes.h"
+
+MLIR_CAPI_EXPORTED void mpactRegisterAllPasses() {
+  mlir::mpact::registerTransformPasses();
+}
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
new file mode 100644
index 0000000..9e7dcd1
--- /dev/null
+++ b/lib/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(CAPI)
+add_subdirectory(Transforms)
diff --git a/lib/Transforms/CMakeLists.txt b/lib/Transforms/CMakeLists.txt
new file mode 100644
index 0000000..edd737f
--- /dev/null
+++ b/lib/Transforms/CMakeLists.txt
@@ -0,0 +1,13 @@
+add_subdirectory(Sparsity)
+
+set(linked_libs MPACTSparsityPropagation)
+
+add_mlir_library(MPACTTransformPasses
+  Passes.cpp
+
+  DEPENDS
+  MPACTTransformsPassIncGen
+
+  LINK_LIBS PUBLIC
+  ${linked_libs}
+)
diff --git a/lib/Transforms/Passes.cpp b/lib/Transforms/Passes.cpp
new file mode 100644
index 0000000..2363ac1
--- /dev/null
+++ b/lib/Transforms/Passes.cpp
@@ -0,0 +1,22 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the MPACT Project, under the Apache License v2.0 with LLVM
+// Exceptions. See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// Also available under a BSD-style license. See LICENSE.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mpact/Transforms/Passes.h"
+#include "mpact/Transforms/Sparsity/SparseEncodingPropagate.h"
+
+//===----------------------------------------------------------------------===//
+// Pass registration
+//===----------------------------------------------------------------------===//
+
+namespace {
+#define GEN_PASS_REGISTRATION
+#include "mpact/Transforms/Passes.h.inc"
+} // end namespace
+
+void mlir::mpact::registerTransformPasses() { ::registerPasses(); }
diff --git a/lib/Transforms/Sparsity/CMakeLists.txt b/lib/Transforms/Sparsity/CMakeLists.txt
new file mode 100644
index 0000000..9323021
--- /dev/null
+++ b/lib/Transforms/Sparsity/CMakeLists.txt
@@ -0,0 +1,15 @@
+add_mlir_conversion_library(MPACTSparsityPropagation
+  SparseEncodingPropagate.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${PROJECT_SOURCE_DIR}/include/mpact/Transforms/Sparsity
+
+  DEPENDS
+  MPACTTransformsPassIncGen
+
+  LINK_LIBS PUBLIC
+  MLIRIR
+  MLIRPass
+)
+
+mpact_target_includes(MPACTSparsityPropagation)
diff --git a/lib/Transforms/Sparsity/SparseEncodingPropagate.cpp b/lib/Transforms/Sparsity/SparseEncodingPropagate.cpp
new file mode 100644
index 0000000..f42db3b
--- /dev/null
+++ b/lib/Transforms/Sparsity/SparseEncodingPropagate.cpp
@@ -0,0 +1,35 @@
+//===- SparseEncodingPropagate.cpp ---------------------------------------===//
+//
+// Part of the MPACT Project, under the Apache License v2.0 with LLVM
+// Exceptions. See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mpact/Transforms/Sparsity/SparseEncodingPropagate.h"
+
+namespace mlir {
+#define GEN_PASS_DEF_SPARSEENCODINGPROPAGATION
+#include "mpact/Transforms/Passes.h.inc"
+} // namespace mlir
+
+using namespace mlir;
+
+// -----------------------------------------------------------------------------
+// The pass
+// -----------------------------------------------------------------------------
+
+namespace {
+struct SparseEncodingPropagation
+    : public impl::SparseEncodingPropagationBase<SparseEncodingPropagation> {
+  SparseEncodingPropagation() = default;
+  SparseEncodingPropagation(const SparseEncodingPropagation &pass) = default;
+
+  void runOnOperation() override {}
+};
+} // namespace
+
+std::unique_ptr<OperationPass<func::FuncOp>>
+mlir::mpact::createSparseEncodingPropagationPass() {
+  return std::make_unique<SparseEncodingPropagation>();
+}
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
new file mode 100644
index 0000000..aab4f9a
--- /dev/null
+++ b/python/CMakeLists.txt
@@ -0,0 +1,78 @@
+#-------------------------------------------------------------------------------
+# The MPACT Compiler Python Modules
+#-------------------------------------------------------------------------------
+
+# Disables generation of "version soname" (i.e. libFoo.so.<version>).
+set(CMAKE_PLATFORM_NO_VERSIONED_SONAME ON)
+
+# The directory at which the Python import tree begins.
+set(MPACT_PYTHON_ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mpact")
+
+# We vendor our own MLIR instance in the `mpact` namespace.
+add_compile_definitions("MLIR_PYTHON_PACKAGE_PREFIX=mpact.")
+
+declare_mlir_python_sources(MPACTPythonSources)
+declare_mlir_python_sources(MPACTPythonExtensions)
+
+declare_mlir_python_sources(MPACTPythonSources.PublicAPI
+  ROOT_DIR "${MPACT_PYTHON_ROOT_DIR}"
+  ADD_TO_PARENT MPACTPythonSources
+  SOURCES
+    mpactbackend.py
+)
+
+declare_mlir_python_sources(MPACTPythonSources.SampleModels
+  ROOT_DIR "${MPACT_PYTHON_ROOT_DIR}"
+  ADD_TO_PARENT MPACTPythonSources
+  SOURCES_GLOB
+    models/*.py
+)
+
+#-------------------------------------------------------------------------------
+# Extensions
+#-------------------------------------------------------------------------------
+
+declare_mlir_python_extension(MPACTPythonExtensions.Main
+  MODULE_NAME _mpact
+  ADD_TO_PARENT MPACTPythonExtensions
+  SOURCES
+    MPACTModule.cpp
+  EMBED_CAPI_LINK_LIBS
+    MPACTCAPI
+  PRIVATE_LINK_LIBS
+    LLVMSupport
+)
+
+#-------------------------------------------------------------------------------
+# Python Modules
+#-------------------------------------------------------------------------------
+
+set(_source_components
+  MLIRPythonSources
+  MLIRPythonExtension.Core
+  MLIRPythonExtension.RegisterEverything
+
+  # We need the FxImporter from torch-mlir
+  TorchMLIRPythonSources.Importers
+  TorchMLIRPythonSources.Dialects
+  TorchMLIRPythonExtensions
+
+  MPACTPythonSources
+  MPACTPythonExtensions
+)
+
+add_mlir_python_common_capi_library(MPACTAggregateCAPI
+  INSTALL_COMPONENT MPACTPythonModules
+  INSTALL_DESTINATION python_packages/mpact/mpact/_mlir_libs
+  OUTPUT_DIRECTORY "${MPACT_PYTHON_PACKAGES_DIR}/mpact/mpact/_mlir_libs"
+  RELATIVE_INSTALL_ROOT ".."
+  DECLARED_SOURCES ${_source_components}
+)
+
+add_mlir_python_modules(MPACTPythonModules
+  ROOT_PREFIX "${MPACT_PYTHON_PACKAGES_DIR}/mpact/mpact"
+  INSTALL_PREFIX "python_packages/mpact/mpact"
+  DECLARED_SOURCES ${_source_components}
+  COMMON_CAPI_LINK_LIBS
+    MPACTAggregateCAPI
+)
diff --git a/python/MPACTModule.cpp b/python/MPACTModule.cpp
new file mode 100644
index 0000000..5751287
--- /dev/null
+++ b/python/MPACTModule.cpp
@@ -0,0 +1,17 @@
+//===-- MPACTModule.cpp ------------------------------------*- cpp -*-===//
+//
+// Part of the MPACT Project, under the Apache License v2.0 with LLVM
+// Exceptions. See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// Also available under a BSD-style license. See LICENSE.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Bindings/Python/PybindAdaptors.h"
+#include "mpact-c/Registration.h"
+
+PYBIND11_MODULE(_mpact, m) {
+  mpactRegisterAllPasses();
+
+  m.doc() = "mpact main python extension";
+}
diff --git a/python/mpact/models/gat.py b/python/mpact/models/gat.py
new file mode 100644
index 0000000..b8ab229
--- /dev/null
+++ b/python/mpact/models/gat.py
@@ -0,0 +1,87 @@
+import torch
+import torch.nn.functional as F
+
+
+class GraphAttentionLayer(torch.nn.Module):
+    def __init__(
+        self,
+        in_features: int,
+        out_features: int,
+        n_heads: int,
+        dropout: float = 0.4,
+        leaky_relu_slope: float = 0.2,
+    ):
+        super(GraphAttentionLayer, self).__init__()
+        self.n_heads = n_heads
+        self.dropout = dropout
+        self.n_hidden = out_features
+        self.W = torch.nn.Parameter(
+            torch.empty(size=(in_features, self.n_hidden * n_heads))
+        )
+        self.a = torch.nn.Parameter(torch.empty(size=(n_heads, 2 * self.n_hidden, 1)))
+        self.leakyrelu = torch.nn.LeakyReLU(leaky_relu_slope)
+        self.softmax = torch.nn.Softmax(dim=1)
+        torch.nn.init.ones_(self.W)
+        torch.nn.init.ones_(self.a)
+
+    def forward(self, h: torch.Tensor, adj_mat: torch.Tensor):
+        n_nodes = h.shape[0]
+        h_transformed = torch.mm(h, self.W)
+        h_transformed = F.dropout(h_transformed, self.dropout, training=self.training)
+        h_transformed = h_transformed.view(
+            n_nodes, self.n_heads, self.n_hidden
+        ).permute(1, 0, 2)
+        e = self._get_attention_scores(h_transformed)
+        connectivity_mask = -9e16 * torch.ones_like(e)
+        e = torch.where(adj_mat > 0, e, connectivity_mask)
+        attention = F.softmax(e, dim=-1)
+        attention = F.dropout(attention, self.dropout, training=self.training)
+        h_prime = torch.matmul(attention, h_transformed)
+        return h_prime.mean(dim=0)
+
+    def _get_attention_scores(self, h_transformed: torch.Tensor):
+        source_scores = torch.matmul(h_transformed, self.a[:, : self.n_hidden, :])
+        target_scores = torch.matmul(h_transformed, self.a[:, self.n_hidden :, :])
+        e = source_scores + target_scores.mT
+        return self.leakyrelu(e)
+
+
+class GAT(torch.nn.Module):
+    """
+    Graph Attention Network (GAT) inspired by <https://arxiv.org/pdf/1710.10903.pdf>.
+    """
+
+    def __init__(
+        self,
+        in_features,
+        n_hidden,
+        n_heads,
+        num_classes,
+        dropout=0.4,
+        leaky_relu_slope=0.2,
+    ):
+        super(GAT, self).__init__()
+        self.gat1 = GraphAttentionLayer(
+            in_features=in_features,
+            out_features=n_hidden,
+            n_heads=n_heads,
+            dropout=dropout,
+            leaky_relu_slope=leaky_relu_slope,
+        )
+        self.gat2 = GraphAttentionLayer(
+            in_features=n_hidden,
+            out_features=num_classes,
+            n_heads=1,
+            dropout=dropout,
+            leaky_relu_slope=leaky_relu_slope,
+        )
+
+    def forward(self, input_tensor: torch.Tensor, adj_mat: torch.Tensor):
+        x = self.gat1(input_tensor, adj_mat)
+        x = F.elu(x)
+        x = self.gat2(x, adj_mat)
+        return F.log_softmax(x, dim=1)
+
+
+def gat_4_64_8_3():
+    return GAT(in_features=4, n_hidden=64, n_heads=8, num_classes=3)
diff --git a/python/mpact/models/gcn.py b/python/mpact/models/gcn.py
new file mode 100644
index 0000000..c41e6d9
--- /dev/null
+++ b/python/mpact/models/gcn.py
@@ -0,0 +1,47 @@
+import torch
+import torch.nn.functional as F
+
+
+class GraphConv(torch.nn.Module):
+    def __init__(self, input_dim, output_dim):
+        super(GraphConv, self).__init__()
+        self.kernel = torch.nn.Parameter(torch.Tensor(input_dim, output_dim))
+        torch.nn.init.ones_(self.kernel)
+        self.bias = torch.nn.Parameter(torch.Tensor(output_dim))
+        torch.nn.init.ones_(self.bias)
+
+    def forward(self, inp, adj_mat):
+        # Input matrix times weight matrix.
+        support = torch.mm(inp, self.kernel)
+        # Sparse adjacency matrix times support matrix.
+        output = torch.spmm(adj_mat, support)
+        # Add bias.
+        output = output + self.bias
+        return output
+
+
+class GCN(torch.nn.Module):
+    """
+    Graph Convolutional Network (GCN) inspired by <https://arxiv.org/pdf/1609.02907.pdf>.
+    """
+
+    def __init__(self, input_dim, hidden_dim, output_dim, dropout_p=0.1):
+        super(GCN, self).__init__()
+        self.gc1 = GraphConv(input_dim, hidden_dim)
+        self.gc2 = GraphConv(hidden_dim, output_dim)
+        self.dropout = torch.nn.Dropout(dropout_p)
+
+    def forward(self, input_tensor, adj_mat):
+        x = self.gc1(input_tensor, adj_mat)
+        x = F.relu(x)
+        x = self.dropout(x)
+        x = self.gc2(x, adj_mat)
+        return F.log_softmax(x, dim=1)
+
+
+def graphconv_4_4():
+    return GraphConv(input_dim=4, output_dim=4)
+
+
+def gcn_4_16_4():
+    return GCN(input_dim=4, hidden_dim=16, output_dim=4)
diff --git a/python/mpact/models/kernels.py b/python/mpact/models/kernels.py
new file mode 100644
index 0000000..d18d88a
--- /dev/null
+++ b/python/mpact/models/kernels.py
@@ -0,0 +1,54 @@
+import torch
+
+
+class MVNet(torch.nn.Module):
+    def forward(self, x, v):
+        return torch.mv(x, v)
+
+
+class MMNet(torch.nn.Module):
+    def forward(self, x, v):
+        return torch.mm(x, v)
+
+
+class AddNet(torch.nn.Module):
+    def forward(self, x, v):
+        return torch.add(x, v)
+
+
+class MulNet(torch.nn.Module):
+    def forward(self, x, v):
+        return torch.mul(x, v)
+
+
+class SelfNet(torch.nn.Module):
+    def forward(self, x):
+        return x
+
+
+class SDDMMNet(torch.nn.Module):
+    def forward(self, x, y, z):
+        return torch.mul(x, torch.mm(y, z))
+
+
+class SqSum(torch.nn.Module):
+    def forward(self, x):
+        return (x * x).sum()
+
+
+class FeatureScale(torch.nn.Module):
+    def forward(self, F):
+        sum_vector = torch.sum(F, dim=1)
+        reciprocal_vector = 1 / sum_vector
+        reciprocal_vector[reciprocal_vector == float("inf")] = 0
+        scaling_diagonal = torch.diag(reciprocal_vector).to_sparse()
+        return scaling_diagonal @ F
+
+
+class Normalization(torch.nn.Module):
+    def forward(self, A):
+        sum_vector = torch.sum(A, dim=1)
+        reciprocal_vector = 1 / sum_vector
+        reciprocal_vector[reciprocal_vector == float("inf")] = 0
+        scaling_diagonal = torch.diag(reciprocal_vector).to_sparse()
+        return scaling_diagonal @ A @ scaling_diagonal
diff --git a/python/mpact/models/lif.py b/python/mpact/models/lif.py
new file mode 100644
index 0000000..fcb5a55
--- /dev/null
+++ b/python/mpact/models/lif.py
@@ -0,0 +1,58 @@
+import torch
+
+
+def spike(input):
+    return (input >= 0).float()
+
+
+def sqSum(input):
+    return (input * input).sum()
+
+
+class LIF(torch.nn.Module):
+    def __init__(self):
+        super(LIF, self).__init__()
+        self.thresh = 1.0
+        self.decay = 0.5
+        self.act = spike
+
+    def forward(self, X):
+        """A filter that yields a binary-valued sparse tensor."""
+        mem = 0
+        spike_pot = []
+        T = X.size(-1)
+        for t in range(T):
+            mem = mem * self.decay + X[..., t]
+            spike = self.act(mem - self.thresh)
+            spike = spike.to_sparse().to_dense()  # prop hack
+            mem = mem * (1.0 - spike)
+            spike_pot.append(spike)
+        spike_pot = torch.stack(spike_pot, dim=-1)
+        return spike_pot
+
+
+class tdLayer(torch.nn.Module):
+    def __init__(self, layer):
+        super(tdLayer, self).__init__()
+        self.layer = layer
+
+    def forward(self, X):
+        T = X.size(-1)
+        out = []
+        for t in range(T):
+            m = self.layer(X[..., t])
+            out.append(m)
+        out = torch.stack(out, dim=-1)
+        return out
+
+
+class LIFSumOfSq(torch.nn.Module):
+    def __init__(self):
+        super(LIFSumOfSq, self).__init__()
+        self.spike = LIF()
+        self.layer = tdLayer(sqSum)
+
+    def forward(self, X):
+        out = self.spike(X)
+        out = self.layer(out)
+        return out
diff --git a/python/mpact/models/resnet.py b/python/mpact/models/resnet.py
new file mode 100644
index 0000000..2556597
--- /dev/null
+++ b/python/mpact/models/resnet.py
@@ -0,0 +1,255 @@
+import torch
+import numpy as np
+
+
+def spike(input):
+    return (input >= 0).float()
+
+
+class Straight(torch.nn.Module):
+    def forward(self, input):
+        return input
+
+
+class tdLayer(torch.nn.Module):
+    def __init__(self, layer, bn=None):
+        super(tdLayer, self).__init__()
+        self.layer = layer
+        self.bn = bn if bn is not None else Straight()
+
+    def forward(self, X):
+        T = X.size(-1)
+        out = []
+        for t in range(T):
+            m = self.layer(X[..., t])
+            out.append(m)
+        out = torch.stack(out, dim=-1)
+        out = self.bn(out)
+        return out
+
+
+class LIF(torch.nn.Module):
+    def __init__(self):
+        super(LIF, self).__init__()
+        self.thresh = 1.0
+        self.decay = 0.5
+        self.act = spike
+        self.gama = 1.0
+
+    def forward(self, X, gama=1):
+        mem = 0
+        spike_pot = []
+        T = X.size(-1)
+        for t in range(T):
+            mem = mem * self.decay + X[..., t]
+            spike = self.act(mem - self.thresh)
+            mem = mem * (1.0 - spike)
+            spike_pot.append(spike)
+        spike_pot = torch.stack(spike_pot, dim=-1)
+        return spike_pot
+
+
+class tdBatchNorm(torch.nn.BatchNorm2d):
+    def __init__(
+        self,
+        num_features,
+        eps=1e-05,
+        momentum=0.1,
+        alpha=1,
+        affine=True,
+        track_running_stats=True,
+    ):
+        super(tdBatchNorm, self).__init__(
+            num_features, eps, momentum, affine, track_running_stats
+        )
+        self.alpha = alpha
+
+    def forward(self, input):
+        exponential_average_factor = 0.0
+        mean = self.running_mean
+        var = self.running_var
+        input = (
+            self.alpha
+            * (input - mean[None, :, None, None, None])
+            / (torch.sqrt(var[None, :, None, None, None] + self.eps))
+        )
+        if self.affine:
+            input = (
+                input * self.weight[None, :, None, None, None]
+                + self.bias[None, :, None, None, None]
+            )
+        return input
+
+
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    return torch.nn.Conv2d(
+        in_planes,
+        out_planes,
+        kernel_size=3,
+        stride=stride,
+        padding=dilation,
+        groups=groups,
+        bias=False,
+        dilation=dilation,
+    )
+
+
+def conv1x1(in_planes, out_planes, stride=1):
+    return torch.nn.Conv2d(
+        in_planes, out_planes, kernel_size=1, stride=stride, bias=False
+    )
+
+
+class BasicBlock(torch.nn.Module):
+    expansion = 1
+
+    def __init__(
+        self,
+        inplanes,
+        planes,
+        stride=1,
+        downsample=None,
+        groups=1,
+        base_width=64,
+        dilation=1,
+        norm_layer=None,
+    ):
+        super(BasicBlock, self).__init__()
+        if norm_layer is None:
+            norm_layer = tdBatchNorm
+            # norm_layer = nn.BatchNorm2d
+        if groups != 1 or base_width != 64:
+            raise ValueError("BasicBlock only supports groups=1 and base_width=64")
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = norm_layer(planes)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+        self.conv1_s = tdLayer(self.conv1, self.bn1)
+        self.conv2_s = tdLayer(self.conv2, self.bn2)
+        self.spike1 = LIF()
+        self.spike2 = LIF()
+
+    def forward(self, x):
+        identity = x
+
+        out = self.conv1_s(x)
+        out = self.spike1(out)
+        out = self.conv2_s(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out += identity
+        out = self.spike2(out)
+
+        return out
+
+
+class ResNety(torch.nn.Module):
+    def __init__(
+        self,
+        block,
+        layers,
+        num_classes=10,
+        zero_init_residual=False,
+        groups=1,
+        width_per_group=64,
+        replace_stride_with_dilation=None,
+        norm_layer=None,
+    ):
+        super(ResNety, self).__init__()
+        if norm_layer is None:
+            norm_layer = tdBatchNorm
+            # norm_layer = nn.BatchNorm2d
+        self._norm_layer = norm_layer
+        self.inplanes = 64
+        self.dilation = 1
+        self.groups = groups
+        self.base_width = width_per_group
+        self.pre = torch.nn.Sequential(
+            tdLayer(
+                layer=torch.nn.Conv2d(
+                    3, self.inplanes, kernel_size=(3, 3), stride=(1, 1)
+                ),
+                bn=self._norm_layer(self.inplanes),
+            ),
+            LIF(),
+        )
+        self.layer1 = self._make_layer(block, 64, layers[0], stride=2)
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.avgpool = tdLayer(torch.nn.AdaptiveAvgPool2d((1, 1)))
+        self.fc = tdLayer(torch.nn.Linear(256, num_classes))
+        self.T = 6
+        for m in self.modules():
+            if isinstance(m, torch.nn.Conv2d):
+                torch.nn.init.kaiming_normal_(
+                    m.weight, mode="fan_out", nonlinearity="relu"
+                )
+
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = tdLayer(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(
+            block(
+                self.inplanes,
+                planes,
+                stride,
+                downsample,
+                self.groups,
+                self.base_width,
+                previous_dilation,
+                norm_layer,
+            )
+        )
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(
+                block(
+                    self.inplanes,
+                    planes,
+                    groups=self.groups,
+                    base_width=self.base_width,
+                    dilation=self.dilation,
+                    norm_layer=norm_layer,
+                )
+            )
+
+        return torch.nn.Sequential(*layers)
+
+    def _forward_impl(self, input):
+        out = []
+        input = input.unsqueeze(-1).repeat(1, 1, 1, 1, self.T)
+        x = self.pre(input)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1, x.size(-1))
+        x = self.fc(x)
+        for t in range(self.T):
+            out.append(x[..., t])
+        return torch.stack(out, dim=1)
+
+    def forward(self, x):
+        return self._forward_impl(x)
+
+
+def resnet_20():
+    return ResNety(block=BasicBlock, layers=[2, 2, 2], num_classes=10)
diff --git a/python/mpact/mpactbackend.py b/python/mpact/mpactbackend.py
new file mode 100644
index 0000000..3e8ae4b
--- /dev/null
+++ b/python/mpact/mpactbackend.py
@@ -0,0 +1,530 @@
+# Initialize mpact python extension.
+import mpact._mlir_libs._mpact
+
+import ctypes
+from io import StringIO
+import numpy as np
+import os
+import sys
+import tempfile
+import torch
+
+from typing import Any, Callable, Optional, Tuple, Dict, TypeVar, Union
+
+from mpact import ir
+from mpact.ir import Module
+from mpact.dialects import torch as torch_d
+from mpact.execution_engine import *
+from mpact.extras.fx_importer import FxImporter, SparsityMeta
+from mpact.ir import *
+from mpact.passmanager import *
+from mpact.runtime import *
+
+# One time set up of support library.
+SUPPORT_LIB = os.getenv("SUPPORT_LIB", default=None)
+SHARED_LIBS = [] if SUPPORT_LIB is None else [SUPPORT_LIB]
+
+# The result of MPACT compile() and input to load().
+MpactCompiledArtifact = TypeVar("MpactCompiledArtifact")
+
+
+def get_module_name_for_debug_dump(module):
+    """Gets a name suitable for a debug dump.
+
+    The name is not guaranteed to be unique.
+    """
+    if not "torch.debug_module_name" in module.operation.attributes:
+        return "UnnammedModule"
+    return StringAttr(module.operation.attributes["torch.debug_module_name"]).value
+
+
+class MPACTCompilerError(Exception):
+    pass
+
+
+def run_pipeline_with_repro_report(
+    module, pipeline: str, description: str, enable_ir_printing: bool = False
+):
+    """Runs `pipeline` on `module`, with a nice repro report if it fails."""
+    module_name = get_module_name_for_debug_dump(module)
+    original_stderr = sys.stderr
+    try:
+        sys.stderr = StringIO()
+        asm_for_error_report = module.operation.get_asm(
+            large_elements_limit=10, enable_debug_info=True
+        )
+        # Lower module in place to make it ready for compiler backends.
+        with module.context as ctx:
+            pm = PassManager.parse(pipeline)
+            if enable_ir_printing:
+                ctx.enable_multithreading(False)
+                pm.enable_ir_printing()
+            pm.run(module.operation)
+    except Exception as e:
+        # TODO: More robust.
+        # - don't arbitrarily clutter up /tmp. When a test suite has many
+        #   tests, this can be a big disk cost (also, /tmp/ is frequently a
+        #   RAM fs, which increases worries about capacity).
+        # - don't have colliding filenames (hard to do without cluttering
+        #   up /tmp)
+        # - if we do have have colliding filenames, writes should at least
+        #   avoid being racy.
+        filename = os.path.join(tempfile.gettempdir(), module_name + ".mlir")
+        with open(filename, "w") as f:
+            f.write(asm_for_error_report)
+        debug_options = "-mlir-print-ir-after-all -mlir-disable-threading"
+        # Put something descriptive here even if description is empty.
+        description = description or f"{module_name} compile"
+
+        message = f"""\
+            {description} failed with the following diagnostics:
+            {sys.stderr.getvalue()}
+
+            python exception: {e}
+
+            The error can be reproduced with:
+            $ mpact-opt -pass-pipeline='{pipeline}' {filename}
+            Add '{debug_options}' to get the IR dump for debugging purpose.
+            """
+        trimmed_message = "\n".join([m.lstrip() for m in message.split("\n")])
+        raise MPACTCompilerError(trimmed_message) from None
+    finally:
+        sys.stderr = original_stderr
+
+
+def assert_arg_type_is_supported(ty):
+    SUPPORTED = [
+        np.float16,
+        np.float32,
+        np.float64,
+        np.uint8,
+        np.int8,
+        np.int32,
+        np.int64,
+        np.bool_,
+        np.complex64,
+        np.complex128,
+    ]
+    assert (
+        ty in SUPPORTED
+    ), f"Only numpy arrays with dtypes in {SUPPORTED} are supported, but got {ty}"
+
+
+memref_type_to_np_dtype = {
+    "mrf16": np.float16,
+    "mrf32": np.float32,
+    "mrf64": np.float64,
+    "mri1": np.bool_,
+    "mri8": np.int8,
+    "mri32": np.int32,
+    "mri64": np.int64,
+    "mrc32": np.complex64,
+    "mrc64": np.complex128,
+}
+elemental_type_to_ctype = {
+    "i1": ctypes.c_bool,
+    "i8": ctypes.c_byte,
+    "i64": ctypes.c_int,
+    "f32": ctypes.c_float,
+    "f64": ctypes.c_double,
+}
+
+CONSUME_RETURN_FUNC_PREFIX = "refbackend_consume_func_return_"
+
+SPARSE_LAYOUTS = [
+    torch.sparse_coo,
+    torch.sparse_csr,
+    torch.sparse_csc,
+    torch.sparse_bsr,
+    torch.sparse_bsc,
+]
+
+
+def get_return_funcs(module):
+    return_prefix_len = len(CONSUME_RETURN_FUNC_PREFIX)
+    return_funcs = []
+    with module.context:
+        for func in module.body:
+            # Returns strings of the form `"refbackend.."` so `"` is deleted.
+            func_name = str(func.attributes["sym_name"]).replace('"', "")
+            if func_name[:return_prefix_len] == CONSUME_RETURN_FUNC_PREFIX:
+                return_funcs.append(func_name)
+
+    return return_funcs
+
+
+def get_ctype_func(func_name):
+    return_prefix_len = len(CONSUME_RETURN_FUNC_PREFIX)
+    ret_types = func_name[return_prefix_len:].split("_")
+    ctypes_arg = [None]
+    for type in ret_types:
+        if type in elemental_type_to_ctype:
+            ctypes_arg.append(elemental_type_to_ctype[type])
+        elif type in memref_type_to_np_dtype:
+            ctypes_arg.append(ctypes.POINTER(UnrankedMemRefDescriptor))
+        else:
+            assert False, f"Not supported type: {type}"
+
+    return ctypes.CFUNCTYPE(*ctypes_arg), ret_types
+
+
+class MpactBackendInvoker:
+    def __init__(self, module, opt_level):
+        self.ee = ExecutionEngine(module, opt_level=opt_level, shared_libs=SHARED_LIBS)
+        self.result = None
+
+        return_funcs = get_return_funcs(module)
+
+        for ret_func in return_funcs:
+            ctype_wrapper, ret_types = get_ctype_func(ret_func)
+
+            def consume_return_funcs(*args):
+                self.result = tuple(
+                    [
+                        (
+                            arg
+                            if type in elemental_type_to_ctype
+                            else unranked_memref_to_numpy(
+                                arg, memref_type_to_np_dtype[type]
+                            )
+                        )
+                        for arg, type in zip(args, ret_types)
+                    ]
+                )
+                if len(self.result) == 1:
+                    self.result = self.result[0]
+
+            self.ee.register_runtime(ret_func, ctype_wrapper(consume_return_funcs))
+
+    def __getattr__(self, function_name: str):
+        def invoke(*args):
+            ffi_args = []
+            for arg in args:
+                assert_arg_type_is_supported(arg.dtype)
+                ffi_args.append(
+                    ctypes.pointer(ctypes.pointer(get_unranked_memref_descriptor(arg)))
+                )
+
+            self.ee.invoke(function_name, *ffi_args)
+            result = self.result
+            assert result is not None, "Invocation didn't produce a result"
+            self.result = None
+            return result
+
+        return invoke
+
+
+LOWERING_PIPELINE_TEMPLATE = (
+    "builtin.module("
+    + ",".join(
+        [
+            "func.func(linalg-generalize-named-ops)",
+            "func.func(linalg-fuse-elementwise-ops)",
+            "convert-shape-to-std",
+            # Propagate sparse encodings before sparsifier mini-pipeline.
+            # TODO: the following pass currently contains no pattern. Will be
+            # added as needed.
+            "func.func(sparse-encoding-propagation)",
+            # MLIR Sparsifier mini-pipeline:
+            #   use the PyTorch assembler conventions
+            #   enable vectorization with VL=16 (more or less assumes AVX512 for float)
+            #   allow 32-bit index optimizations (unsafe for very large dimensions)
+            "sparse-assembler{{direct-out}}",
+            "sparsification-and-bufferization{{{sp_options}}}",
+            "sparse-storage-specifier-to-llvm",
+            # Buffer deallocation pass does not know how to handle realloc.
+            "func.func(expand-realloc)",
+            # Generalize pad and concat after sparse compiler, as they are handled
+            # differently when the operations involve sparse operands.
+            "func.func(refback-generalize-tensor-pad)",
+            "func.func(refback-generalize-tensor-concat)",
+            # Bufferize.
+            "func.func(tm-tensor-bufferize)",
+            "one-shot-bufferize{{copy-before-write bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map}}",
+            "refback-mlprogram-bufferize",
+            "func.func(finalizing-bufferize)",
+            "func.func(buffer-deallocation)",
+            # Inline sparse helper methods where useful (but after dealloc).
+            "inline",
+            "refback-munge-calling-conventions",
+            "func.func(tm-tensor-to-loops)",
+            "func.func(refback-munge-memref-copy)",
+            "func.func(convert-linalg-to-loops)",
+            "func.func(lower-affine)",
+            "convert-scf-to-cf",
+            "func.func(refback-expand-ops-for-llvm)",
+            "func.func(arith-expand)",
+            "func.func(convert-math-to-llvm)",
+            "convert-math-to-libm",
+            "expand-strided-metadata",
+            "finalize-memref-to-llvm",
+            "lower-affine",
+            "convert-bufferization-to-memref",
+            "finalize-memref-to-llvm",
+            "func.func(convert-arith-to-llvm)",
+            # Vector code (SIMD):
+            #   allow fp reductions to reassociate
+            #   allow 32-bit index optimizations (unsafe for very large dimensions)
+            #   assume we are running on a good ol' Intel X86 (disable for ARM/other)
+            "convert-vector-to-llvm{{reassociate-fp-reductions force-32bit-vector-indices enable-x86vector}}",
+            "convert-func-to-llvm",
+            "convert-cf-to-llvm",
+            "convert-complex-to-llvm",
+            "reconcile-unrealized-casts",
+        ]
+    )
+    + ")"
+)
+
+
+class MpactBackendCompiler:
+    """Main entry-point for the MPACT backend compiler."""
+
+    def __init__(self, opt_level, use_sp_it):
+        self.opt_level = opt_level
+        self.use_sp_it = use_sp_it
+
+    def compile(self, imported_module: Module) -> MpactCompiledArtifact:
+        sp_options = (
+            "sparse-emit-strategy=sparse-iterator"
+            if self.use_sp_it
+            else "vl=16 enable-simd-index32"
+        )
+        LOWERING_PIPELINE = LOWERING_PIPELINE_TEMPLATE.format(sp_options=sp_options)
+        """Compiles an imported module, with a flat list of functions.
+        The module is expected to be in linalg-on-tensors + scalar code form.
+
+        Args:
+          imported_module: The MLIR module in the torch dialect.
+        Returns:
+          An opaque artifact that can be passed to `load`.
+        """
+        run_pipeline_with_repro_report(
+            imported_module,
+            LOWERING_PIPELINE,
+            "Lowering Linalg-on-Tensors IR to LLVM with MpactBackendCompiler",
+            enable_ir_printing=False,
+        )
+        return imported_module
+
+    def load(self, module: MpactCompiledArtifact) -> MpactBackendInvoker:
+        """Loads a compiled artifact into the runtime.
+
+        Args:
+          module: The result of a previous call to `compile`.
+        Returns:
+          MPactInvoker to call a compiled method (viz `invoker.foo(...)`).
+        """
+        return MpactBackendInvoker(module, self.opt_level)
+
+
+def sparse_metadata(a: torch.Tensor) -> SparsityMeta:
+    """
+    Returns a meta data tuple for the given sparse tensor.
+
+    NOTE: this will be fully replaced by fx graph SparseTensorMetadata
+    """
+    sparse_dim = a.sparse_dim()
+    dense_dim = a.dense_dim()
+    batch_dim = a.ndim - dense_dim - sparse_dim
+    blocksize = None
+    if a.layout is torch.sparse_coo:
+        return SparsityMeta(
+            a.layout,
+            batch_dim,
+            sparse_dim,
+            dense_dim,
+            blocksize,
+            a._indices().dtype,
+            a._indices().dtype,
+        )
+    elif a.layout is torch.sparse_csr or a.layout is torch.sparse_bsr:
+        if a.layout is torch.sparse_bsr:
+            blocksize = a.values().shape[batch_dim + 1 : batch_dim + 3]
+        return SparsityMeta(
+            a.layout,
+            batch_dim,
+            sparse_dim,
+            dense_dim,
+            blocksize,
+            a.crow_indices().dtype,
+            a.col_indices().dtype,
+        )
+    elif a.layout is torch.sparse_csc or a.layout is torch.sparse_bsc:
+        if a.layout is torch.sparse_bsc:
+            blocksize = a.values().shape[batch_dim + 1 : batch_dim + 3]
+        return SparsityMeta(
+            a.layout,
+            batch_dim,
+            sparse_dim,
+            dense_dim,
+            blocksize,
+            a.ccol_indices().dtype,
+            a.row_indices().dtype,
+        )
+    else:
+        raise RuntimeError(f"Unsupported sparse layout for {a}")
+
+
+def sparse_arg(args, i):
+    if isinstance(args[i], torch.fx.node.Node):
+        return args[i].meta.get("sparsity", None)
+    return None
+
+
+def sparse_export(
+    f: Callable, args: Tuple[Any, ...], kwargs: Optional[Dict[str, Any]] = None
+) -> torch.export.ExportedProgram:
+    """
+    This is a ***temporary*** wrapper around `torch.export.export`
+    that eventually should be removed and simply replaced by the
+    standard API for exporting traced graphs.
+
+    But until issue
+
+      https://github.com/pytorch/pytorch/pull/117907
+
+    is addressed, this wrapper provides support for the sparse
+    tensor types by first converting all operands to dense tensors,
+    building the traced graph as for the dense case, then annotating
+    sparse parameters with their actual sparse layout attributes,
+    followed by some simple propagation rules. This temporary solution
+    accelerates testing torch-mlir with PyTorch sparse tensors until
+    the issue is resolved upstream.
+    """
+    # Convert all arguments to dense.
+    dargs = tuple(a.to_dense() if a.layout in SPARSE_LAYOUTS else a for a in args)
+    mask = [a.layout in SPARSE_LAYOUTS for a in args]
+    # Build the regular FX traced graph with only dense arguments
+    # (the current version would crash otherwise, see issue above).
+    prog = torch.export.export(f, dargs, kwargs)
+    # Annotate sparse arguments in the graph and apply some very
+    # basic propagation rules for sparsity.
+    specs = prog.graph_signature.input_specs
+    alen = len(specs)
+    k = 0
+    for i, node in enumerate(prog.graph.nodes):
+        if node.op == "placeholder":
+            # Argument.
+            spec = specs[i]
+            if spec.kind is torch.export.graph_signature.InputKind.USER_INPUT:
+                if mask[k]:
+                    node.meta["sparsity"] = sparse_metadata(args[k])
+                k = k + 1
+        elif node.op == "call_function":
+            # TODO: use upstream _opname implementation when available
+            opname = node.target._schema.name.split("::")[1]
+            # Zero preserving elt-wise unary op.
+            if opname in {"abs", "neg", "relu", "sin"}:
+                node.meta["sparsity"] = sparse_arg(node.args, 0)
+            # Some simplistic rules for preserving sparsity. Soon
+            # to be replaced by proper FX graph propagation.
+            elif opname in {"mul"}:
+                m0 = sparse_arg(node.args, 0)
+                m1 = sparse_arg(node.args, 1)
+                if m0 is not None:
+                    node.meta["sparsity"] = m0
+                elif m1 is not None:
+                    node.meta["sparsity"] = m1
+            elif opname in {"add", "mm"}:
+                m0 = sparse_arg(node.args, 0)
+                m1 = sparse_arg(node.args, 1)
+                if m0 is not None and m1 is not None:
+                    node.meta["sparsity"] = m0
+            elif opname == "_to_sparse" or opname == "to_sparse":
+                dim = len(node.meta.get("val").shape)
+                node.meta["sparsity"] = SparsityMeta(
+                    torch.sparse_coo, 0, dim, 0, None, torch.int64, torch.int64
+                )
+            # TODO: Uncomment this to hack sparsity into the network.
+            # elif opname == "_to_dense" or opname == "to_dense":
+            #     # hack (assumes we never really want the to_dense for now)
+            #     node.meta["sparsity"] = sparse_arg(node.args, 0)
+            elif opname == "select" and sparse_arg(node.args, 0):
+                dim = len(node.meta.get("val").shape)
+                node.meta["sparsity"] = SparsityMeta(
+                    torch.sparse_coo, 0, dim, 0, None, torch.int64, torch.int64
+                )
+            elif opname == "stack" and sparse_arg(node.args[0], 0):
+                dim = len(node.meta.get("val").shape)
+                node.meta["sparsity"] = SparsityMeta(
+                    torch.sparse_coo, 0, dim - 1, 1, None, torch.int64, torch.int64
+                )
+    return prog
+
+
+def export_and_import(f, *args, **kwargs):
+    """This method implements Stella's importer, stripped down to essentials."""
+    context = ir.Context()
+    torch_d.register_dialect(context)
+    fx_importer = FxImporter(context=context)
+    prog = sparse_export(f, args, kwargs)
+    fx_importer.import_frozen_program(prog)
+    return fx_importer.module
+
+
+def mpact_jit_compile(f, *args, opt_level=2, use_sp_it=False, **kwargs):
+    """This method compiles the given callable using the MPACT backend."""
+    # Import module and lower into Linalg IR.
+    module = export_and_import(f, *args, **kwargs)
+    run_pipeline_with_repro_report(
+        module,
+        (
+            "builtin.module("
+            "func.func(torch-decompose-complex-ops),"
+            "torch-backend-to-linalg-on-tensors-backend-pipeline)"
+        ),
+        "Lowering TorchFX IR -> Linalg IR",
+        enable_ir_printing=False,
+    )
+    # Compile with MPACT backend compiler.
+    backend = MpactBackendCompiler(opt_level=opt_level, use_sp_it=use_sp_it)
+    compiled = backend.compile(module)
+    invoker = backend.load(compiled)
+    return invoker, f
+
+
+def mpact_jit_run(invoker, f, *args, **kwargs):
+    """This method runs the given callable using the given MPACT invoker."""
+    xargs = []
+    # Prepare all the named buffer parameters (assume all dense).
+    # All scalar arguments are filtered out since they appear inline.
+    params = dict(f.named_buffers(remove_duplicate=True))
+    params_flat, params_spec = torch.utils._pytree.tree_flatten(params)
+    for p in params_flat:
+        if len(p.shape) > 0:
+            xargs.append(p.numpy())
+    # Prepare input parameters. Sparse input tensors are split into
+    # their composite tensors. All PyTorch tensors are converted
+    # to their backing numpy arrays. Note that the output consists
+    # of numpy arrays as well, which can trivially be reconstructed
+    # into PyTorch tensors (dense and sparse).
+    for a in args:
+        if a.layout is torch.sparse_coo:
+            # Construct the additional position array required by MLIR with data
+            # array([0, nnz]). The COO format always uses int64 indices.
+            xargs.append(np.array([0, a._nnz()], dtype=np.int64))
+            # Transform a tensor<ndim x nnz> into ndim x tensor<nnz> to conform
+            # to the MLIR SoA COO representation.
+            for idx in a._indices():
+                xargs.append(idx.numpy())
+            xargs.append(a._values().numpy())
+        elif a.layout is torch.sparse_csr or a.layout is torch.sparse_bsr:
+            xargs.append(a.crow_indices().numpy())
+            xargs.append(a.col_indices().numpy())
+            xargs.append(a.values().numpy())
+        elif a.layout is torch.sparse_csc or a.layout is torch.sparse_bsc:
+            xargs.append(a.ccol_indices().numpy())
+            xargs.append(a.row_indices().numpy())
+            xargs.append(a.values().numpy())
+        else:
+            xargs.append(a.numpy())
+    # Invoke.
+    return invoker.main(*xargs)
+
+
+# Convenience wrapper.
+def mpact_jit(f, *args, **kwargs):
+    """This method compiles and runs the given callable using the MPACT backend."""
+    invoker, fn = mpact_jit_compile(f, *args, **kwargs)
+    return mpact_jit_run(invoker, fn, *args, **kwargs)
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..17bdc0b
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,187 @@
+# Script for generating the mpact wheel.
+# ```
+# $ python setup.py bdist_wheel
+# ```
+# Environment variables you are probably interested in:
+#
+#   CMAKE_BUILD_TYPE:
+#       specify the build type: DEBUG/RelWithDebInfo/Release
+#
+#   MPACT_CMAKE_ALREADY_BUILT:
+#       the `MPACT_CMAKE_BUILD_DIR` directory has already been compiled,
+#       and the CMake compilation process will not be executed again.
+#       On CIs, it is often advantageous to re-use/control the CMake build directory.
+#
+# It is recommended to build with Ninja and ccache. To do so, set environment
+# variables by prefixing to above invocations:
+# ```
+# CMAKE_GENERATOR=Ninja CMAKE_C_COMPILER_LAUNCHER=ccache CMAKE_CXX_COMPILER_LAUNCHER=ccache
+# ```
+#
+# Implementation notes:
+# The contents of the wheel is just the contents of the `python_packages`
+# directory that our CMake build produces. We go through quite a bit of effort
+# on the CMake side to organize that directory already, so we avoid duplicating
+# that here, and just package up its contents.
+
+import os
+import pathlib
+import shutil
+import subprocess
+import sys
+
+from datetime import date
+from distutils.command.build import build as _build
+from setuptools import setup, Extension
+from setuptools.command.build_ext import build_ext
+from setuptools.command.build_py import build_py
+
+
+def _check_env_flag(name: str, default=None) -> bool:
+    return str(os.getenv(name, default)).upper() in ["ON", "1", "YES", "TRUE", "Y"]
+
+
+PACKAGE_VERSION = "".join(str(date.today()).split("-"))
+SRC_DIR = pathlib.Path(__file__).parent.absolute()
+CMAKE_BUILD_TYPE = os.getenv("CMAKE_BUILD_TYPE", "Release")
+MPACT_CMAKE_ALREADY_BUILT = _check_env_flag("MPACT_CMAKE_ALREADY_BUILT", False)
+MPACT_CMAKE_BUILD_DIR = os.path.join(SRC_DIR, "build")
+
+
+# Build phase discovery is unreliable. Just tell it what phases to run.
+class CustomBuild(_build):
+    def initialize_options(self):
+        _build.initialize_options(self)
+        # Make setuptools not steal the build directory name,
+        # because the mlir c++ developers are quite
+        # used to having build/ be for cmake
+        self.build_base = "setup_build"
+
+    def run(self):
+        self.run_command("build_py")
+        self.run_command("build_ext")
+        self.run_command("build_scripts")
+
+
+class CMakeBuild(build_py):
+    def cmake_build(self, cmake_build_dir):
+        llvm_dir = str(
+            SRC_DIR / "externals" / "torch-mlir" / "externals" / "llvm-project" / "llvm"
+        )
+        cmake_config_args = [
+            f"cmake",
+            f"-GNinja",
+            f"-DCMAKE_BUILD_TYPE=Release",
+            f"-DPython3_FIND_VIRTUALENV=ONLY",
+            f"-DLLVM_ENABLE_PROJECTS=mlir",
+            f"-DLLVM_EXTERNAL_PROJECTS='torch-mlir;mpact'",
+            f"-DLLVM_EXTERNAL_TORCH_MLIR_SOURCE_DIR='{SRC_DIR}/externals/torch-mlir'",
+            f"-DLLVM_EXTERNAL_MPACT_SOURCE_DIR='{SRC_DIR}'",
+            f"-DLLVM_TARGETS_TO_BUILD=host",
+            f"-DMLIR_ENABLE_BINDINGS_PYTHON=ON",
+            # Optimization options for building wheels.
+            f"-DCMAKE_VISIBILITY_INLINES_HIDDEN=ON",
+            f"-DCMAKE_C_VISIBILITY_PRESET=hidden",
+            f"-DCMAKE_CXX_VISIBILITY_PRESET=hidden",
+            f"{llvm_dir}",
+        ]
+
+        cmake_build_args = [
+            f"cmake",
+            f"--build",
+            f".",
+            f"--target",
+            f"MPACTPythonModules",
+            f"MPACTBenchmarkPythonModules",
+        ]
+
+        try:
+            subprocess.check_call(cmake_config_args, cwd=cmake_build_dir)
+            subprocess.check_call(cmake_build_args, cwd=cmake_build_dir)
+        except subprocess.CalledProcessError as e:
+            print("cmake build failed with\n", e)
+            print("debug by follow cmake command:")
+            sys.exit(e.returncode)
+        finally:
+            print(f"cmake config: {' '.join(cmake_config_args)}")
+            print(f"cmake build: {' '.join(cmake_build_args)}")
+            print(f"cmake workspace: {cmake_build_dir}")
+            print(SRC_DIR)
+
+    def run(self):
+        target_dir = self.build_lib
+        cmake_build_dir = MPACT_CMAKE_BUILD_DIR
+        if not cmake_build_dir:
+            cmake_build_dir = os.path.abspath(os.path.join(target_dir, "..", "build"))
+
+        python_package_dir = os.path.join(
+            cmake_build_dir, "tools", "mpact", "python_packages", "mpact"
+        )
+        if not MPACT_CMAKE_ALREADY_BUILT:
+            os.makedirs(cmake_build_dir, exist_ok=True)
+            cmake_cache_file = os.path.join(cmake_build_dir, "CMakeCache.txt")
+            if os.path.exists(cmake_cache_file):
+                os.remove(cmake_cache_file)
+            # NOTE: With repeated builds for different Python versions, the
+            # prior version binaries will continue to accumulate. Here we just
+            # delete the directory where we build native extensions to keep
+            # this from happening but still take advantage of most of the
+            # build cache.
+            mlir_libs_dir = os.path.join(python_package_dir, "mpact", "_mlir_libs")
+            if os.path.exists(mlir_libs_dir):
+                print(f"Removing _mlir_mlibs dir to force rebuild: {mlir_libs_dir}")
+                shutil.rmtree(mlir_libs_dir)
+            else:
+                print(f"Not removing _mlir_libs dir (does not exist): {mlir_libs_dir}")
+            self.cmake_build(cmake_build_dir)
+
+        if os.path.exists(target_dir):
+            shutil.rmtree(target_dir, ignore_errors=False, onerror=None)
+
+        shutil.copytree(python_package_dir, target_dir, symlinks=False)
+
+
+class CMakeExtension(Extension):
+    def __init__(self, name, sourcedir=""):
+        Extension.__init__(self, name, sources=[])
+        self.sourcedir = os.path.abspath(sourcedir)
+
+
+class NoopBuildExtension(build_ext):
+    def build_extension(self, ext):
+        pass
+
+
+with open("README.md", "r", encoding="utf-8") as fh:
+    long_description = fh.read()
+
+
+# Requires and extension modules depend on whether building PyTorch
+# extensions.
+INSTALL_REQUIRES = [
+    "numpy",
+    "packaging",
+]
+EXT_MODULES = [
+    CMakeExtension("mpact._mlir_libs._mpact"),
+]
+
+setup(
+    name="mpact",
+    version=f"{PACKAGE_VERSION}",
+    author="Reid Tatge",
+    author_email="tatge@google.com",
+    description="MPACT retargetable ML compiler",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    include_package_data=True,
+    cmdclass={
+        "build": CustomBuild,
+        "built_ext": NoopBuildExtension,
+        "build_py": CMakeBuild,
+    },
+    ext_modules=EXT_MODULES,
+    python_requires=">=3.8",
+    install_requires=INSTALL_REQUIRES,
+    zip_safe=False,
+)
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
new file mode 100644
index 0000000..019d820
--- /dev/null
+++ b/test/CMakeLists.txt
@@ -0,0 +1,25 @@
+#-------------------------------------------------------------------------------
+# The MPACT Compiler Tests
+#-------------------------------------------------------------------------------
+
+configure_lit_site_cfg(
+        ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in
+        ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py
+        MAIN_CONFIG
+        ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py
+)
+
+set(MPACT_TEST_DEPENDS
+        FileCheck count not
+	MPACTPythonModules
+        TorchMLIRPythonModules
+        torch-mlir-opt
+        )
+
+add_lit_testsuite(check-mpact "Running the MPACT regression tests"
+        ${CMAKE_CURRENT_BINARY_DIR}
+	DEPENDS ${MPACT_TEST_DEPENDS}
+        )
+set_target_properties(check-mpact PROPERTIES FOLDER "Tests")
+
+add_lit_testsuites(MPACT ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${TORCH_MLIR_TEST_DEPENDS})
diff --git a/test/lit.cfg.py b/test/lit.cfg.py
new file mode 100644
index 0000000..910acca
--- /dev/null
+++ b/test/lit.cfg.py
@@ -0,0 +1,79 @@
+#-------------------------------------------------------------------------------
+# The MPACT Compiler LIT Configuration
+#-------------------------------------------------------------------------------
+
+import os
+import platform
+import re
+import subprocess
+import tempfile
+
+import lit.formats
+import lit.util
+
+from lit.llvm import llvm_config
+from lit.llvm.subst import ToolSubst
+from lit.llvm.subst import FindTool
+
+# The name of this test suite.
+config.name = "MPACT"
+
+# The test format.
+config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell)
+
+# A list of file extensions to treat as test files.
+config.suffixes = [".py"]
+
+# A list of files to exclude from the test suite.
+config.excludes = [
+    "CMakeLists.txt",
+    "README.txt",
+    "LICENSE.txt",
+    "lit.cfg.py",
+    "lit.site.cfg.py",
+]
+
+# The root path where tests are located.
+config.test_source_root = os.path.dirname(__file__)
+
+# The root path where tests should be run.
+config.test_exec_root = os.path.join(config.mpact_obj_root, "test")
+config.standalone_tools_dir = os.path.join(config.mpact_obj_root, "bin")
+
+# Substitutions.
+config.substitutions.append(("%PATH%", config.environment["PATH"]))
+config.substitutions.append(("%shlibext", config.llvm_shlib_ext))
+
+# Tweak the PATH to include the tools dir.
+llvm_config.with_environment("PATH", config.llvm_tools_dir, append_path=True)
+llvm_config.with_environment(
+    "PATH", os.path.join(config.llvm_build_dir, "bin"), append_path=True
+)
+llvm_config.with_system_environment(["HOME", "INCLUDE", "LIB", "TMP", "TEMP"])
+
+# On Windows the path to python could contain spaces in which case it needs to
+# be provided in quotes. This is the equivalent of how %python is setup in
+# llvm/utils/lit/lit/llvm/config.py.
+if "Windows" in config.host_os:
+    config.python_executable = '"%s"' % (config.python_executable)
+
+# Tools.
+tool_dirs = [
+    config.standalone_tools_dir,
+    config.llvm_tools_dir,
+    config.mpact_obj_root,
+]
+tools = [
+    "mpact-opt",
+    ToolSubst("%PYTHON", config.python_executable, unresolved="ignore"),
+]
+
+llvm_config.add_tool_substitutions(tools, tool_dirs)
+
+llvm_config.with_environment(
+    "PYTHONPATH",
+    [
+        os.path.join(config.mpact_obj_root, "python_packages/mpact"),
+    ],
+    append_path=True,
+)
diff --git a/test/lit.site.cfg.py.in b/test/lit.site.cfg.py.in
new file mode 100644
index 0000000..01f3d01
--- /dev/null
+++ b/test/lit.site.cfg.py.in
@@ -0,0 +1,23 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+import sys
+
+config.host_os = "@HOST_OS@"
+config.mpact_src_root = "@MPACT_SOURCE_DIR@"
+config.mpact_obj_root = "@MPACT_BINARY_DIR@"
+config.torch_mlir_obj_root = "@LLVM_BINARY_DIR@/tools/torch-mlir"
+config.llvm_src_root = "@LLVM_SOURCE_DIR@"
+config.llvm_obj_root = "@LLVM_BINARY_DIR@"
+config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
+config.llvm_build_dir = "@CMAKE_BINARY_DIR@"
+config.llvm_lib_dir = "@LLVM_LIBS_DIR@"
+config.llvm_shlib_dir = "@SHLIBDIR@"
+config.llvm_shlib_ext = "@SHLIBEXT@"
+config.llvm_exe_ext = "@EXEEXT@"
+config.python_executable = "@Python3_EXECUTABLE@"
+
+import lit.llvm
+lit.llvm.initialize(lit_config, config)
+
+# Let the main config do the real work.
+lit_config.load_config(config, "@MPACT_SOURCE_DIR@/test/lit.cfg.py")
diff --git a/test/python/add.py b/test/python/add.py
new file mode 100644
index 0000000..00d4d62
--- /dev/null
+++ b/test/python/add.py
@@ -0,0 +1,89 @@
+# RUN: %PYTHON %s | FileCheck %s
+
+import torch
+import numpy as np
+
+from mpact.mpactbackend import mpact_jit, mpact_jit_compile, mpact_jit_run
+
+from mpact.models.kernels import AddNet
+
+
+def print_sparse(res):
+    print(res[0])
+    print(res[1])
+    print(res[2])
+
+
+net = AddNet()
+
+# Construct dense and sparse matrices.
+X = torch.arange(0, 16, dtype=torch.float32).view(4, 4)
+Y = torch.arange(16, 32, dtype=torch.float32).view(4, 4)
+A = torch.tensor(
+    [
+        [0.0, 1.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 2.0],
+        [0.0, 0.0, 0.0, 0.0],
+        [3.0, 0.0, 0.0, 0.0],
+    ],
+    dtype=torch.float32,
+)
+S = A.to_sparse_csr()
+
+#
+# CHECK: pytorch
+# CHECK:   tensor({{\[}}[16., 18., 20., 22.],
+# CHECK:                [24., 26., 28., 30.],
+# CHECK:                [32., 34., 36., 38.],
+# CHECK:                [40., 42., 44., 46.]{{\]}})
+# CHECK:  tensor({{\[}}[16., 18., 18., 19.],
+# CHECK:               [20., 21., 22., 25.],
+# CHECK:               [24., 25., 26., 27.],
+# CHECK:               [31., 29., 30., 31.]{{\]}})
+# CHECK:  tensor({{\[}}[ 0.,  2.,  2.,  3.],
+# CHECK:               [ 4.,  5.,  6.,  9.],
+# CHECK:               [ 8.,  9., 10., 11.],
+# CHECK:               [15., 13., 14., 15.]{{\]}})
+# CHECK:  tensor(crow_indices=tensor([0, 1, 2, 2, 3]),
+# CHECK:         col_indices=tensor([1, 3, 0]),
+# CHECK:         values=tensor([2., 4., 6.]), size=(4, 4), nnz=3,
+# CHECK:         layout=torch.sparse_csr)
+# CHECK: mpact
+# CHECK:   {{\[}}[16. 18. 20. 22.]
+# CHECK:         [24. 26. 28. 30.]
+# CHECK:         [32. 34. 36. 38.]
+# CHECK:         [40. 42. 44. 46.]{{\]}}
+# CHECK:   {{\[}}[16. 18. 18. 19.]
+# CHECK:         [20. 21. 22. 25.]
+# CHECK:         [24. 25. 26. 27.]
+# CHECK:         [31. 29. 30. 31.]{{\]}}
+# CHECK:   {{\[}}[ 0.  2.  2.  3.]
+# CHECK:         [ 4.  5.  6.  9.]
+# CHECK:         [ 8.  9. 10. 11.]
+# CHECK:         [15. 13. 14. 15.]{{\]}}
+# CHECK:  [0 1 2 2 3]
+# CHECK:  [1 3 0]
+# CHECK:  [2. 4. 6.]
+#
+
+# Run it with PyTorch.
+print("pytorch")
+res = net(X, Y)
+print(res)
+res = net(S, Y)
+print(res)
+res = net(X, S)
+print(res)
+res = net(S, S)
+print(res)
+
+# Run it with MPACT.
+print("mpact")
+res = mpact_jit(net, X, Y)
+print(res)
+res = mpact_jit(net, S, Y)
+print(res)
+res = mpact_jit(net, X, S)
+print(res)
+res = mpact_jit(net, S, S)
+print_sparse(res)
diff --git a/test/python/gat.py b/test/python/gat.py
new file mode 100644
index 0000000..283c36f
--- /dev/null
+++ b/test/python/gat.py
@@ -0,0 +1,49 @@
+# RUN: %PYTHON %s | FileCheck %s
+
+import torch
+import numpy as np
+
+from mpact.mpactbackend import mpact_jit, mpact_jit_compile, mpact_jit_run
+
+from mpact.models.gat import gat_4_64_8_3
+
+net = gat_4_64_8_3()
+net.eval()  # Switch to inference.
+
+# Sparse input.
+idx = torch.tensor([[0, 0, 1, 2], [0, 2, 3, 1]], dtype=torch.int64)
+val = torch.tensor([14.0, 3.0, -8.0, 11.0], dtype=torch.float32)
+S = torch.sparse_coo_tensor(idx, val, size=[4, 4])
+
+# Construct adjacency matrix.
+V = 4
+edges = np.array([[0, 1], [0, 2], [1, 2], [1, 3], [2, 3]], dtype=np.int32)
+E = edges.shape[0]
+adj_mat = torch.sparse_coo_tensor(edges.T, torch.ones(E), (V, V), dtype=torch.int64)
+adj_mat = (
+    torch.eye(V) + adj_mat
+)  # Add self-loops to the adjacency matrix (becomes dense)
+
+
+#
+# CHECK: pytorch gat
+# CHECK:   tensor({{\[}}[-1.0986, -1.0986, -1.0986],
+# CHECK:                [-1.0986, -1.0986, -1.0986],
+# CHECK:                [-1.0986, -1.0986, -1.0986],
+# CHECK:                [-1.0986, -1.0986, -1.0986]{{\]}}
+# CHECK: mpact gat
+# CHECK:   {{\[}}[-1.0986123 -1.0986123 -1.0986123]
+# CHECK:         [-1.0986123 -1.0986123 -1.0986123]
+# CHECK:         [-1.0986123 -1.0986123 -1.0986123]
+# CHECK:         [-1.0986123 -1.0986123 -1.0986123]{{\]}}
+#
+with torch.no_grad():
+    # Run it with PyTorch.
+    print("pytorch gat")
+    res = net(S, adj_mat)
+    print(res)
+
+    # Run it with MPACT.
+    print("mpact gat")
+    res = mpact_jit(net, S, adj_mat)
+    print(res)
diff --git a/test/python/gcn.py b/test/python/gcn.py
new file mode 100644
index 0000000..0453f83
--- /dev/null
+++ b/test/python/gcn.py
@@ -0,0 +1,95 @@
+# RUN: %PYTHON %s | FileCheck %s
+
+import torch
+
+from mpact.mpactbackend import mpact_jit, mpact_jit_compile, mpact_jit_run
+
+from mpact.models.gcn import graphconv_4_4, gcn_4_16_4
+
+net = graphconv_4_4()
+net.eval()  # Switch to inference.
+
+# Get random (but reproducible) matrices.
+torch.manual_seed(0)
+inp = torch.rand(4, 4)
+adj_mat = torch.rand(4, 4).to_sparse()
+
+#
+# CHECK: pytorch
+# CHECK:   tensor({{\[}}[4.4778, 4.4778, 4.4778, 4.4778],
+# CHECK:                [5.7502, 5.7502, 5.7502, 5.7502],
+# CHECK:                [4.6980, 4.6980, 4.6980, 4.6980],
+# CHECK:                [3.6407, 3.6407, 3.6407, 3.6407]{{\]}})
+# CHECK: mpact compile and run
+# CHECK:   {{\[}}[4.477828  4.477828  4.477828  4.477828 ]
+# CHECK:         [5.7501717 5.7501717 5.7501717 5.7501717]
+# CHECK:         [4.697952  4.697952  4.697952  4.697952 ]
+# CHECK:         [3.640687  3.640687  3.640687  3.640687 ]{{\]}}
+# CHECK: mpact compile
+# CHECK: mpact run
+# CHECK:   {{\[}}[4.477828  4.477828  4.477828  4.477828 ]
+# CHECK:         [5.7501717 5.7501717 5.7501717 5.7501717]
+# CHECK:         [4.697952  4.697952  4.697952  4.697952 ]
+# CHECK:         [3.640687  3.640687  3.640687  3.640687 ]{{\]}}
+# CHECK: mpact compile opt=3
+# CHECK: mpact run
+# CHECK:   {{\[}}[4.477828  4.477828  4.477828  4.477828 ]
+# CHECK:         [5.7501717 5.7501717 5.7501717 5.7501717]
+# CHECK:         [4.697952  4.697952  4.697952  4.697952 ]
+#
+with torch.no_grad():
+    # Run it with PyTorch.
+    print("pytorch")
+    res = net(inp, adj_mat)
+    print(res)
+
+    # Run it with MPACT (compile and run at once).
+    print("mpact compile and run")
+    res = mpact_jit(net, inp, adj_mat)
+    print(res)
+
+    # Run it with MPACT (with separate compile and run steps).
+    print("mpact compile")
+    invoker, fn = mpact_jit_compile(net, inp, adj_mat)
+    print("mpact run")
+    res = mpact_jit_run(invoker, fn, inp, adj_mat)
+    print(res)
+
+    # Run it with MPACT (with separate compile and run steps, given opt_level).
+    print("mpact compile opt=3")
+    invoker, fn = mpact_jit_compile(net, inp, adj_mat, opt_level=3)
+    print("mpact run")
+    res = mpact_jit_run(invoker, fn, inp, adj_mat)
+    print(res)
+
+net = gcn_4_16_4()
+net.eval()  # Switch to inference.
+
+
+# Sparse input.
+idx = torch.tensor([[0, 0, 1, 2], [0, 2, 3, 1]], dtype=torch.int64)
+val = torch.tensor([14.0, 3.0, -8.0, 11.0], dtype=torch.float32)
+S = torch.sparse_coo_tensor(idx, val, size=[4, 4])
+
+#
+# CHECK: pytorch gcn
+# CHECK:   tensor({{\[}}[-1.3863, -1.3863, -1.3863, -1.3863],
+# CHECK:                [-1.3863, -1.3863, -1.3863, -1.3863],
+# CHECK:                [-1.3863, -1.3863, -1.3863, -1.3863],
+# CHECK:                [-1.3863, -1.3863, -1.3863, -1.3863]])
+# CHECK: mpact gcn
+# CHECK:   {{\[}}[-1.3862944 -1.3862944 -1.3862944 -1.3862944]
+# CHECK:         [-1.3862944 -1.3862944 -1.3862944 -1.3862944]
+# CHECK:         [-1.3862944 -1.3862944 -1.3862944 -1.3862944]
+# CHECK:         [-1.3862944 -1.3862944 -1.3862944 -1.3862944]{{\]}}
+#
+with torch.no_grad():
+    # Run it with PyTorch.
+    print("pytorch gcn")
+    res = net(S, adj_mat)
+    print(res)
+
+    # Run it with MPACT.
+    print("mpact gcn")
+    res = mpact_jit(net, S, adj_mat)
+    print(res)
diff --git a/test/python/lif.py b/test/python/lif.py
new file mode 100644
index 0000000..7dc797a
--- /dev/null
+++ b/test/python/lif.py
@@ -0,0 +1,31 @@
+# RUN: %PYTHON %s | FileCheck %s
+
+import torch
+
+from mpact.mpactbackend import mpact_jit, mpact_jit_compile, mpact_jit_run
+
+from mpact.models.lif import LIFSumOfSq
+
+net = LIFSumOfSq()
+
+# Get a random (but reproducible) input, so that a
+# general sparse tensor appears after LIF.
+torch.manual_seed(0)
+x = torch.rand(2, 3, 8, 8)
+
+#
+# CHECK: pytorch
+# CHECK:   tensor([ 0., 11.,  9., 11., 13., 11., 10., 12.])
+# CHECK: mpact
+# CHECK:   [ 0. 11.  9. 11. 13. 11. 10. 12.]
+#
+
+# Run it with PyTorch.
+print("pytorch")
+res = net(x)
+print(res)
+
+# Run it with MPACT.
+print("mpact")
+res = mpact_jit(net, x)
+print(res)
diff --git a/test/python/mm.py b/test/python/mm.py
new file mode 100644
index 0000000..3c51c37
--- /dev/null
+++ b/test/python/mm.py
@@ -0,0 +1,89 @@
+# RUN: %PYTHON %s | FileCheck %s
+
+import torch
+import numpy as np
+
+from mpact.mpactbackend import mpact_jit, mpact_jit_compile, mpact_jit_run
+
+from mpact.models.kernels import MMNet
+
+
+def print_sparse(res):
+    print(res[0])
+    print(res[1])
+    print(res[2])
+
+
+net = MMNet()
+
+# Construct dense and sparse matrices.
+X = torch.arange(0, 16, dtype=torch.float32).view(4, 4)
+Y = torch.arange(16, 32, dtype=torch.float32).view(4, 4)
+A = torch.tensor(
+    [
+        [0.0, 1.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 2.0],
+        [0.0, 0.0, 0.0, 0.0],
+        [3.0, 0.0, 0.0, 0.0],
+    ],
+    dtype=torch.float32,
+)
+S = A.to_sparse_csr()
+
+#
+# CHECK: pytorch
+# CHECK:   tensor({{\[}}[ 152.,  158.,  164.,  170.],
+# CHECK:                [ 504.,  526.,  548.,  570.],
+# CHECK:                [ 856.,  894.,  932.,  970.],
+# CHECK:                [1208., 1262., 1316., 1370.]{{\]}})
+# CHECK:  tensor({{\[}}[20., 21., 22., 23.],
+# CHECK:               [56., 58., 60., 62.],
+# CHECK:               [ 0.,  0.,  0.,  0.],
+# CHECK:               [48., 51., 54., 57.]{{\]}})
+# CHECK:  tensor({{\[}}[ 9.,  0.,  0.,  2.],
+# CHECK:               [21.,  4.,  0., 10.],
+# CHECK:               [33.,  8.,  0., 18.],
+# CHECK:               [45., 12.,  0., 26.]{{\]}})
+# CHECK:  tensor(crow_indices=tensor([0, 1, 2, 2, 3]),
+# CHECK:         col_indices=tensor([3, 0, 1]),
+# CHECK:         values=tensor([2., 6., 3.]), size=(4, 4), nnz=3,
+# CHECK:         layout=torch.sparse_csr)
+# CHECK: mpact
+# CHECK:   {{\[}}[ 152.  158.  164.  170.]
+# CHECK:         [ 504.  526.  548.  570.]
+# CHECK:         [ 856.  894.  932.  970.]
+# CHECK:         [1208. 1262. 1316. 1370.]{{\]}}
+# CHECK:   {{\[}}[20. 21. 22. 23.]
+# CHECK:         [56. 58. 60. 62.]
+# CHECK:         [ 0.  0.  0.  0.]
+# CHECK:         [48. 51. 54. 57.]{{\]}}
+# CHECK:   {{\[}}[ 9.  0.  0.  2.]
+# CHECK:         [21.  4.  0. 10.]
+# CHECK:         [33.  8.  0. 18.]
+# CHECK:         [45. 12.  0. 26.]{{\]}}
+# CHECK:  [0 1 2 2 3]
+# CHECK:  [3 0 1]
+# CHECK:  [2. 6. 3.]
+#
+
+# Run it with PyTorch.
+print("pytorch")
+res = net(X, Y)
+print(res)
+res = net(S, Y)
+print(res)
+res = net(X, S)
+print(res)
+res = net(S, S)
+print(res)
+
+# Run it with MPACT.
+print("mpact")
+res = mpact_jit(net, X, Y)
+print(res)
+res = mpact_jit(net, S, Y)
+print(res)
+res = mpact_jit(net, X, S)
+print(res)
+res = mpact_jit(net, S, S)
+print_sparse(res)
diff --git a/test/python/mul.py b/test/python/mul.py
new file mode 100644
index 0000000..fd8692f
--- /dev/null
+++ b/test/python/mul.py
@@ -0,0 +1,87 @@
+# RUN: %PYTHON %s | FileCheck %s
+
+import torch
+import numpy as np
+
+from mpact.mpactbackend import mpact_jit, mpact_jit_compile, mpact_jit_run
+
+from mpact.models.kernels import MulNet
+
+
+def print_sparse(res):
+    print(res[0])
+    print(res[1])
+    print(res[2])
+
+
+net = MulNet()
+
+# Construct dense and sparse matrices.
+X = torch.arange(0, 16, dtype=torch.float32).view(4, 4)
+Y = torch.arange(16, 32, dtype=torch.float32).view(4, 4)
+A = torch.tensor(
+    [
+        [0.0, 1.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 2.0],
+        [0.0, 0.0, 0.0, 0.0],
+        [3.0, 0.0, 0.0, 0.0],
+    ],
+    dtype=torch.float32,
+)
+S = A.to_sparse_csr()
+
+#
+# CHECK: pytorch
+# CHECK: tensor({{\[}}[  0.,  17.,  36.,  57.],
+# CHECK:              [ 80., 105., 132., 161.],
+# CHECK:              [192., 225., 260., 297.],
+# CHECK:              [336., 377., 420., 465.]{{\]}})
+# CHECK: tensor(crow_indices=tensor([0, 1, 2, 2, 3]),
+# CHECK:        col_indices=tensor([1, 3, 0]),
+# CHECK:        values=tensor([17., 46., 84.]), size=(4, 4), nnz=3,
+# CHECK:        layout=torch.sparse_csr)
+# CHECK: tensor(crow_indices=tensor([0, 1, 2, 2, 3]),
+# CHECK:        col_indices=tensor([1, 3, 0]),
+# CHECK:        values=tensor([ 1., 14., 36.]), size=(4, 4), nnz=3,
+# CHECK:        layout=torch.sparse_csr)
+# CHECK: tensor(crow_indices=tensor([0, 1, 2, 2, 3]),
+# CHECK:        col_indices=tensor([1, 3, 0]),
+# CHECK:        values=tensor([1., 4., 9.]), size=(4, 4), nnz=3,
+# CHECK:        layout=torch.sparse_csr)
+# CHECK: mpact
+# CHECK:   {{\[}}[  0.  17.  36.  57.]
+# CHECK:         [ 80. 105. 132. 161.]
+# CHECK:         [192. 225. 260. 297.]
+# CHECK:         [336. 377. 420. 465.]{{\]}}
+# CHECK:  [0 1 2 2 3]
+# CHECK:  [1 3 0]
+# CHECK:  [17. 46. 84.]
+# CHECK:  [0 1 2 2 3]
+# CHECK:  [1 3 0]
+# CHECK:  [ 1. 14. 36.]
+# CHECK:  [0 1 2 2 3]
+# CHECK:  [1 3 0]
+# CHECK:  [1. 4. 9.]
+#
+
+# Run it with PyTorch.
+print("pytorch")
+res = net(X, Y)
+print(res)
+res = net(S, Y)
+print(res)
+res = net(X, S)
+print(res)
+res = net(S, S)
+print(res)
+
+# Run it with MPACT.
+print("mpact")
+res = mpact_jit(net, X, Y)
+print(res)
+res = mpact_jit(net, S, Y)
+print_sparse(res)
+res = mpact_jit(net, X, S)
+print_sparse(res)
+res = mpact_jit(net, S, S)
+print_sparse(res)
diff --git a/test/python/norm.py b/test/python/norm.py
new file mode 100644
index 0000000..f589e46
--- /dev/null
+++ b/test/python/norm.py
@@ -0,0 +1,45 @@
+# RUN: %PYTHON %s | FileCheck %s
+
+import torch
+import numpy as np
+
+from mpact.mpactbackend import mpact_jit, mpact_jit_compile, mpact_jit_run
+
+from mpact.models.kernels import Normalization
+
+net = Normalization()
+
+# Construct adjacency matrix.
+V = 8
+edges = np.array([[0, 1], [0, 4], [1, 4], [3, 4], [4, 3]], dtype=np.int32)
+E = edges.shape[0]
+adj_mat = torch.sparse_coo_tensor(edges.T, torch.ones(E), (V, V), dtype=torch.int64)
+adj_mat = (
+    torch.eye(V) + adj_mat
+)  # Add self-loops to the adjacency matrix (becomes dense)
+
+#
+# CHECK: pytorch
+# CHECK:   tensor({{\[}}[0.1111, 0.1667, 0.0000, 0.0000, 0.1667, 0.0000, 0.0000, 0.0000],
+# CHECK:                [0.0000, 0.2500, 0.0000, 0.0000, 0.2500, 0.0000, 0.0000, 0.0000],
+# CHECK:                [0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+# CHECK:                [0.0000, 0.0000, 0.0000, 0.2500, 0.2500, 0.0000, 0.0000, 0.0000],
+# CHECK:                [0.0000, 0.0000, 0.0000, 0.2500, 0.2500, 0.0000, 0.0000, 0.0000],
+# CHECK:                [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000],
+# CHECK:                [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000],
+# CHECK:                [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000]{{\]}})
+# CHECK: mpact
+#
+
+# Run it with PyTorch.
+print("pytorch")
+res = net(adj_mat)
+print(res)
+
+# Run it with MPACT.
+#
+# TODO: make this work, crashes in TORCH-MLIR
+#
+print("mpact")
+# res = mpact_jit(net, adj_mat)
+# print(res)
diff --git a/test/python/resnet.py b/test/python/resnet.py
new file mode 100644
index 0000000..7ac317b
--- /dev/null
+++ b/test/python/resnet.py
@@ -0,0 +1,37 @@
+# RUN: %PYTHON %s | FileCheck %s
+
+import torch
+import numpy as np
+
+from mpact.mpactbackend import mpact_jit, mpact_jit_compile, mpact_jit_run
+
+from mpact.models.resnet import resnet_20
+
+resnet = resnet_20()
+resnet.eval()  # Switch to inference.
+
+# Get a random input.
+#   B x RGB x H x W
+x = torch.rand(1, 3, 16, 16)
+
+#
+# CHECK: pytorch
+# CHECK: mpact
+# CHECK: passed
+#
+
+with torch.no_grad():
+    # Run it with PyTorch.
+    print("pytorch")
+    res1 = resnet(x)
+    print(res1)
+
+    # Run it with MPACT.
+    print("mpact")
+    res2 = mpact_jit(resnet, x)
+    print(res2)
+
+# Completely different inputs and weights for each run,
+# so we simply verify the two results are the same.
+np.testing.assert_allclose(res1.numpy(), res2, rtol=1e-5, atol=0)
+print("passed")
diff --git a/test/python/scale.py b/test/python/scale.py
new file mode 100644
index 0000000..880fdf2
--- /dev/null
+++ b/test/python/scale.py
@@ -0,0 +1,39 @@
+# RUN: %PYTHON %s | FileCheck %s
+
+import torch
+import numpy as np
+
+from mpact.mpactbackend import mpact_jit, mpact_jit_compile, mpact_jit_run
+
+from mpact.models.kernels import FeatureScale
+
+net = FeatureScale()
+
+# Get random (but reproducible) matrices.
+torch.manual_seed(0)
+features = torch.rand(7, 7)
+
+#
+# CHECK: pytorch
+# CHECK:   tensor({{\[}}[0.1702, 0.2634, 0.0303, 0.0453, 0.1054, 0.2174, 0.1680],
+# CHECK:                [0.3064, 0.1557, 0.2161, 0.1192, 0.1373, 0.0076, 0.0577],
+# CHECK:                [0.0856, 0.1510, 0.2031, 0.2329, 0.0469, 0.0822, 0.1984],
+# CHECK:                [0.2207, 0.0957, 0.2108, 0.1011, 0.1333, 0.2297, 0.0087],
+# CHECK:                [0.0774, 0.1561, 0.1275, 0.3896, 0.0735, 0.1128, 0.0630],
+# CHECK:                [0.0093, 0.0611, 0.2731, 0.2124, 0.2180, 0.1546, 0.0716],
+# CHECK:                [0.2026, 0.0115, 0.0481, 0.0839, 0.2826, 0.2749, 0.0964]{{\]}})
+# CHECK: mpact
+#
+
+# Run it with PyTorch.
+print("pytorch")
+res = net(features)
+print(res)
+
+# Run it with MPACT.
+#
+# TODO: make this work, crashes in TORCH-MLIR
+#
+print("mpact")
+# res = mpact_jit(net, features)
+# print(res)
diff --git a/test/python/sddmm.py b/test/python/sddmm.py
new file mode 100644
index 0000000..a0890c7
--- /dev/null
+++ b/test/python/sddmm.py
@@ -0,0 +1,65 @@
+# RUN: %PYTHON %s | FileCheck %s
+
+import torch
+import numpy as np
+
+from mpact.mpactbackend import mpact_jit, mpact_jit_compile, mpact_jit_run
+
+from mpact.models.kernels import MMNet, SDDMMNet
+
+
+def print_sparse(res):
+    print(res[0])
+    print(res[1])
+    print(res[2])
+    print(res[3])
+
+
+mmnet = MMNet()
+sddmmnet = SDDMMNet()
+
+# Construct very sparse matrix.
+idx = torch.tensor([[0, 4], [0, 4]], dtype=torch.int64)
+val = torch.tensor([2.0, 3.0], dtype=torch.float64)
+S = torch.sparse_coo_tensor(idx, val, size=[5, 5])
+
+# Trivial dense inputs.
+A = torch.arange(0, 25, dtype=torch.float32).view(5, 5)
+B = torch.arange(25, 50, dtype=torch.float32).view(5, 5)
+
+#
+# CHECK: pytorch
+# CHECK: tensor({{\[}}[ 400.,  410.,  420.,  430.,  440.],
+# CHECK:              [1275., 1310., 1345., 1380., 1415.],
+# CHECK:              [2150., 2210., 2270., 2330., 2390.],
+# CHECK:              [3025., 3110., 3195., 3280., 3365.],
+# CHECK:              [3900., 4010., 4120., 4230., 4340.]{{\]}})
+# CHECK:   tensor(indices=tensor({{\[}}[0, 4],
+# CHECK:                               [0, 4]{{\]}}),
+# CHECK:          values=tensor([  800., 13020.]),
+# CHECK:          size=(5, 5), nnz=2, dtype=torch.float64, layout=torch.sparse_coo)
+# CHECK: mpact
+# CHECK:   {{\[}}[ 400.  410.  420.  430.  440.]
+# CHECK:         [1275. 1310. 1345. 1380. 1415.]
+# CHECK:         [2150. 2210. 2270. 2330. 2390.]
+# CHECK:         [3025. 3110. 3195. 3280. 3365.]
+# CHECK:         [3900. 4010. 4120. 4230. 4340.]{{\]}}
+# CHECK:   [0 2]
+# CHECK:   [0 4]
+# CHECK:   [0 4]
+# CHECK:   [  800. 13020.]
+#
+
+# Run it with PyTorch.
+print("pytorch")
+dense = mmnet(A, B)
+print(dense)
+res = sddmmnet(S, A, B)
+print(res)
+
+# Run it with MPACT.
+print("mpact")
+dense = mpact_jit(mmnet, A, B)
+print(dense)
+res = mpact_jit(sddmmnet, S, A, B)
+print_sparse(res)
diff --git a/test/python/spmv.py b/test/python/spmv.py
new file mode 100644
index 0000000..4f52ea0
--- /dev/null
+++ b/test/python/spmv.py
@@ -0,0 +1,31 @@
+# RUN: %PYTHON %s | FileCheck %s
+
+import torch
+
+from mpact.mpactbackend import mpact_jit, mpact_jit_compile, mpact_jit_run
+
+from mpact.models.kernels import MVNet
+
+net = MVNet()
+
+# Get a fixed vector and matrix (which we make 2x2 block "sparse").
+dense_vector = torch.arange(1, 11, dtype=torch.float32)
+dense_input = torch.arange(1, 101, dtype=torch.float32).view(10, 10)
+sparse_matrix = dense_input.to_sparse_bsr(blocksize=(2, 2))
+
+#
+# CHECK: pytorch
+# CHECK:   tensor([ 385.,  935., 1485., 2035., 2585., 3135., 3685., 4235., 4785., 5335.])
+# CHECK: mpact
+# CHECK:   [ 385.  935. 1485. 2035. 2585. 3135. 3685. 4235. 4785. 5335.]
+#
+
+# Run it with PyTorch.
+print("pytorch")
+res = net(sparse_matrix, dense_vector)
+print(res)
+
+# Run it with MPACT.
+print("mpact")
+res = mpact_jit(net, sparse_matrix, dense_vector)
+print(res)
diff --git a/test/python/sqsum.py b/test/python/sqsum.py
new file mode 100644
index 0000000..2d21204
--- /dev/null
+++ b/test/python/sqsum.py
@@ -0,0 +1,35 @@
+# RUN: %PYTHON %s | FileCheck %s
+
+import torch
+import numpy as np
+
+from mpact.mpactbackend import mpact_jit, mpact_jit_compile, mpact_jit_run
+
+from mpact.models.kernels import SqSum
+
+net = SqSum()
+
+# Construct adjacency matrix.
+V = 8
+edges = np.array([[0, 1], [0, 4], [1, 4], [3, 4], [4, 3]], dtype=np.int32)
+E = edges.shape[0]
+adj_mat = torch.sparse_coo_tensor(edges.T, torch.ones(E), (V, V), dtype=torch.int64)
+
+#
+# CHECK: pytorch
+# CHECK:   tensor(5)
+# CHECK: mpact
+# CHECK:   5
+
+# Run it with PyTorch.
+print("pytorch")
+res = net(adj_mat)
+print(res)
+
+# Run it with MPACT.
+print("mpact")
+# TODO: make this work, expose `sparse-emit-strategy=sparse-iterator` to
+# mini-pipeline.
+# res = mpact_jit(net, adj_mat, use_sp_it=True)
+res = mpact_jit(net, adj_mat)
+print(res)
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
new file mode 100644
index 0000000..b7c3ca5
--- /dev/null
+++ b/tools/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(mpact-opt)
diff --git a/tools/mpact-opt/CMakeLists.txt b/tools/mpact-opt/CMakeLists.txt
new file mode 100644
index 0000000..a26b6ca
--- /dev/null
+++ b/tools/mpact-opt/CMakeLists.txt
@@ -0,0 +1,17 @@
+add_llvm_executable(mpact-opt mpact_opt.cpp)
+
+set(dependency_libraries)
+if(TORCH_MLIR_ENABLE_STABLEHLO)
+  list(APPEND dependency_libraries StablehloRegister)
+endif()
+
+target_link_libraries(mpact-opt PRIVATE
+  MLIROptLib
+  MLIRTransforms
+  TorchMLIRInitAll
+  TorchMLIRTorchDialect
+  TorchMLIRTorchPasses
+
+  MPACTTransformPasses
+  ${dependency_libraries}
+)
diff --git a/tools/mpact-opt/mpact_opt.cpp b/tools/mpact-opt/mpact_opt.cpp
new file mode 100644
index 0000000..87d88c3
--- /dev/null
+++ b/tools/mpact-opt/mpact_opt.cpp
@@ -0,0 +1,49 @@
+//===- mpact-opt.cpp - MLIR Optimizer Driver -------------------------===//
+//
+// Part of the MPACT Project, under the Apache License v2.0 with LLVM
+// Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// Also available under a BSD-style license. See LICENSE.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Tools/mlir-opt/MlirOptMain.h"
+#include "mlir/Transforms/Passes.h"
+#include "mpact/Transforms/Passes.h"
+#include "torch-mlir/InitAll.h"
+
+#ifdef TORCH_MLIR_ENABLE_STABLEHLO
+#include "stablehlo/dialect/Register.h"
+#endif
+
+using namespace mlir;
+
+int main(int argc, char **argv) {
+  mlir::mpact::registerTransformPasses();
+
+  mlir::torch::registerAllPasses();
+
+  // Core Transforms
+  registerCanonicalizerPass();
+  registerCSEPass();
+  registerInlinerPass();
+  registerLocationSnapshotPass();
+  registerLoopInvariantCodeMotionPass();
+  registerPrintOpStatsPass();
+  registerViewOpGraphPass();
+  registerStripDebugInfoPass();
+  registerSymbolDCEPass();
+
+  DialectRegistry registry;
+  mlir::torch::registerAllDialects(registry);
+  mlir::torch::registerOptionalInputDialects(registry);
+
+#ifdef TORCH_MLIR_ENABLE_STABLEHLO
+  mlir::stablehlo::registerAllDialects(registry);
+#endif
+  return mlir::asMainReturnCode(mlir::MlirOptMain(
+      argc, argv, "MLIR modular optimizer driver\n", registry));
+}