Publications

2025

LUT Tensor Core: Lookup Table Enables Efficient Low-Bit LLM Inference Acceleration

Zhiwen Mo, Lei Wang, Jianyu Wei, Zhichen Zeng, Shijie Cao, Lingxiao Ma, Naifeng Jing, Ting Cao, Jilong Xue, Fan Yang, Mao Yang

2025 52rd IEEE/ACM Annual International Symposium on Computer Architecture, Jun 2025

arXiv
@article{Mo2024LUT,	author = {Mo, Zhiwen and Wang, Lei and Wei, Jianyu and Zeng, Zhichen and Cao, Shijie and Ma, Lingxiao and Jing, Naifeng and Cao, Ting and Xue, Jilong and Yang, Fan and Yang, Mao},	doi = {10.48550/ARXIV.2408.06003},	year = {2024},	publisher = {arXiv},	title = {LUT {Tensor} {Core}: Lookup {Table} {Enables} {Efficient} {Low}-{Bit} {LLM} {Inference} {Acceleration}},	url = {https://arxiv.org/abs/2408.06003},}

Exploring the Performance Improvement of Tensor Processing Engines through Transformation in the Bit-weight Dimension of MACs

Qizhe Wu, Huawen Liang, Yuchen Gui, Zhichen Zeng, Zerong He, Linfeng Tao, Xiaotian Wang, Letian Zhao, Zhaoxi Zeng, Wei Yuan, Wei Wu, Xi Jin

2025 IEEE International Symposium on High-Performance Computer Architecture, Mar 2025

arXiv
@article{Wu2025Exploring,
	author = {Wu, Qizhe and Liang, Huawen and Gui, Yuchen and Zeng, Zhichen and He, Zerong and Tao, Linfeng and Wang, Xiaotian and Zhao, Letian and Zeng, Zhaoxi and Yuan, Wei and Wu, Wei and Jin, Xi},
	doi = {10.48550/ARXIV.2503.06342},
	year = {2025},
	publisher = {arXiv},
	title = {Exploring the {Performance} {Improvement} of {Tensor} {Processing} {Engines} through {Transformation} in the {Bit}-weight {Dimension} of {MACs}},
	url = {https://arxiv.org/abs/2503.06342},
}

Tactic: Adaptive Sparse Attention with Clustering and Distribution Fitting for Long-Context LLMs

Kan Zhu, Tian Tang, Qinyu Xu, Yile Gu, Zhichen Zeng, Rohan Kadekodi, Liangyu Zhao, Ang Li, Arvind Krishnamurthy, Baris Kasikci

arXiv
@article{Zhu2025Tactic,	author = {Zhu, Kan and Tang, Tian and Xu, Qinyu and Gu, Yile and Zeng, Zhichen and Kadekodi, Rohan and Zhao, Liangyu and Li, Ang and Krishnamurthy, Arvind and Kasikci, Baris},	doi = {10.48550/ARXIV.2502.12216},	year = {2025},	publisher = {arXiv},	title = {Tactic: Adaptive {Sparse} {Attention} with {Clustering} and {Distribution} {Fitting} for {Long}-{Context} {LLMs}},	url = {https://arxiv.org/abs/2502.12216},}

2024

EN-T: Optimizing Tensor Computing Engines Performance via Encoder-Based Methodology

Qizhe Wu, Yuchen Gui, Zhichen Zeng, Xiaotian Wang, Huawen Liang, Xi Jin

2024 IEEE 42nd International Conference on Computer Design (ICCD), Nov 2024

DOI
@inproceedings{Wu2024EN,
	author = {Wu, Qizhe and Gui, Yuchen and Zeng, Zhichen and Wang, Xiaotian and Liang, Huawen and Jin, Xi},
	booktitle = {2024 {IEEE} 42nd {International} {Conference} on {Computer} {Design} ({ICCD})},
	doi = {10.1109/iccd63220.2024.00097},
	year = {2024},
	month = {nov 18},
	pages = {608--615},
	organization = {IEEE},
	title = {EN-{T}: Optimizing {Tensor} {Computing} {Engines} {Performance} via {Encoder}-{Based} {Methodology}},
	url = {http://dx.doi.org/10.1109/ICCD63220.2024.00097},
}

Fusion-3D: Integrated Acceleration for Instant 3D Reconstruction and Real-Time Rendering

Sixu Li, Yang Zhao, Chaojian Li, Bowei Guo, Jingqun Zhang, Wenbo Zhu, Zhifan Ye, Cheng Wan, Yingyan "Celine" Lin

2024 57th IEEE/ACM International Symposium on Microarchitecture, Nov 2024

DOI
@inproceedings{Li2024Fusion,
	author = {Li, Sixu and Zhao, Yang and Li, Chaojian and Guo, Bowei and Zhang, Jingqun and Zhu, Wenbo and Ye, Zhifan and Wan, Cheng and Lin, Yingyan Celine},
	booktitle = {2024 57th {IEEE}/{ACM} {International} {Symposium} on {Microarchitecture} ({MICRO})},
	doi = {10.1109/micro61859.2024.00016},
	year = {2024},
	month = {nov 2},
	pages = {78--91},
	organization = {IEEE},
	title = {Fusion-3D: Integrated {Acceleration} for {Instant} 3D {Reconstruction} and {Real}-{Time} {Rendering}},
	url = {http://dx.doi.org/10.1109/MICRO61859.2024.00016},
}

SeerAttention: Learning Intrinsic Sparse Attention in Your LLMs

Yizhao Gao, Zhichen Zeng, Dayou Du, Shijie Cao, Peiyuan Zhou, Jiaxing Qi, Junjie Lai, Hayden Kwok-Hay So, Ting Cao, Fan Yang, Mao Yang

arXiv
@article{Gao2024SeerAttention,	author = {Gao, Yizhao and Zeng, Zhichen and Du, Dayou and Cao, Shijie and Zhou, Peiyuan and Qi, Jiaxing and Lai, Junjie and So, Hayden Kwok-Hay and Cao, Ting and Yang, Fan and Yang, Mao},	doi = {10.48550/ARXIV.2410.13276},	year = {2024},	publisher = {arXiv},	title = {SeerAttention: Learning {Intrinsic} {Sparse} {Attention} in {Your} {LLMs}},	url = {https://arxiv.org/abs/2410.13276},}

Allo: A Programming Model for Composable Accelerator Design

Hongzheng Chen, Niansong Zhang, Shaojie Xiang, Zhichen Zeng, Mengjia Dai, Zhiru Zhang

Proceedings of the ACM on Programming Languages, Jun 2024

DOI
@article{Chen2024Allo,
	author = {Chen, Hongzheng and Zhang, Niansong and Xiang, Shaojie and Zeng, Zhichen and Dai, Mengjia and Zhang, Zhiru},
	journal = {Proceedings of the ACM on Programming Languages},
	doi = {10.1145/3656401},
	issn = {2475-1421},
	year = {2024},
	month = {jun 20},
	pages = {593--620},
	publisher = {Association for Computing Machinery (ACM)},
	title = {Allo: A {Programming} {Model} for {Composable} {Accelerator} {Design}},
	url = {http://dx.doi.org/10.1145/3656401},
	volume = {8},
}

Highly stable and fast response photodetector based on double perovskite Cs2AgBiCl6 crystals

Zhengyu Han, Mengjia Dai, Zhichen Zeng, Chunhui Ye, Rucheng Dai, Zhongping Wang, Xiaoyu Sun, Zengming Zhang

Journal of Physics D: Applied Physics, Feb 2024

DOI
@article{Han2024Highly,
	author = {Han, Zhengyu and Dai, Mengjia and Zeng, Zhichen and Ye, Chunhui and Dai, Rucheng and Wang, Zhongping and Sun, Xiaoyu and Zhang, Zengming},
	journal = {Journal of Physics D: Applied Physics},
	doi = {10.1088/1361-6463/ad291a},
	issn = {0022-3727},
	number = {21},
	year = {2024},
	month = {feb 27},
	pages = {215102},
	publisher = {IOP Publishing},
	title = {Highly stable and fast response photodetector based on double perovskite {Cs}\textsubscript{2}{AgBiCl}\textsubscript{6} crystals},
	url = {http://dx.doi.org/10.1088/1361-6463/ad291a},
	volume = {57},
}

2023

Instant-NeRF: Instant On-Device Neural Radiance Field Training via Algorithm-Accelerator Co-Designed Near-Memory Processing

Yang Katie Zhao, Shang Wu, Jingqun Zhang, Sixu Li, Chaojian Li, Yingyan "Celine" Lin

2023 60th ACM/IEEE Design Automation Conference (DAC), Jul 2023

DOI
@inproceedings{Zhao2023Instant,
	author = {Zhao, Yang Katie and Wu, Shang and Zhang, Jingqun and Li, Sixu and Li, Chaojian and Lin, Yingyan Celine},
	booktitle = {2023 60th {ACM}/{IEEE} {Design} {Automation} {Conference} ({DAC})},
	doi = {10.1109/dac56929.2023.10247710},
	year = {2023},
	month = {jul 9},
	pages = {1--6},
	organization = {IEEE},
	title = {Instant-{NeRF}: Instant {On}-{Device} {Neural} {Radiance} {Field} {Training} via {Algorithm}-{Accelerator} {Co}-{Designed} {Near}-{Memory} {Processing}},
	url = {http://dx.doi.org/10.1109/DAC56929.2023.10247710},
}

Open-Source FPGA on Silicon: Case Studies on PRGA, an Open-Source Framework for Building & Programming Custom FPGAs

Ang Li, Ting-Jung Chang, Fei Gao, David Wentzlaff

2023 Open-Source Computer Architecture Research, Jun 2023

Evaluating Shared Memory Heterogeneous Systems Using Traverse-Compute Workloads

Yanwen Xu, Ang Li, Tyler Sorensen

2023 Open-Source Computer Architecture Research, Jun 2023

CIFER: A Cache-Coherent 12nm 16mm2 SoC with Four 64-Bit RISC-V Application Cores, 18 32-Bit RISC-V Compute Cores, and a 1541 LUT6/mm2 Synthesizable eFPGA

Ang Li, Ting-Jung Chang, Fei Gao, Tuan Ta, Georgios Tziantzioulis, Yanghui Ou, Moyang Wang, Jinzheng Tu, Kaifeng Xu, Paul Jackson, August Ning, Grigory Chirkov, Marcelo Orenes-Vera, Shady Agwa, Xiaoyu Yan, Eric Tang, Jonathan Balkind, Christopher Batten, David Wentzlaff

IEEE Solid-State Circuits Letters, Jun 2023

@article{Li2023CIFER,
	author = {Li, Ang and Chang, Ting-Jung and Gao, Fei and Ta, Tuan and Tziantzioulis, Georgios and Ou, Yanghui and Wang, Moyang and Tu, Jinzheng and Xu, Kaifeng and Jackson, Paul and Ning, August and Chirkov, Grigory and Orenes-Vera, Marcelo and Agwa, Shady and Yan, Xiaoyu and Tang, Eric and Balkind, Jonathan and Batten, Christopher and Wentzlaff, David},
	journal = {IEEE Solid-State Circuits Letters},
	doi = {10.1109/lssc.2023.3303111},
	issn = {2573-9603},
	year = {2023},
	pages = {229--232},
	publisher = {{Institute of Electrical and Electronics Engineers (IEEE)}},
	title = {CIFER: A {Cache}-{Coherent} 12-nm 16-mm\textsuperscript{2} {SoC} {With} {Four} 64-{Bit} {RISC}-{V} {Application} {Cores}, 18 32-{Bit} {RISC}-{V} {Compute} {Cores}, and a 1541 {LUT6}/mm\textsuperscript{2} {Synthesizable} {eFPGA}},
	url = {http://dx.doi.org/10.1109/LSSC.2023.3303111},
	volume = {6},
}

DECADES: A 67mm2, 1.46TOPS, 55 Giga Cache-Coherent 64-bit RISC-V Instructions per second, Heterogeneous Manycore SoC with 109 Tiles including Accelerators, Intelligent Storage, and eFPGA in 12nm FinFET

Fei Gao, Ting-Jung Chang, Ang Li, Marcelo Orenes-Vera, Davide Giri, Paul Jackson, August Ning, Georgios Tziantzioulis, Joseph Zuckerman, Jinzheng Tu, Kaifeng Xu, Grigory Chirkov, Gabriele Tombesi, Jonathan Balkind, Margaret Martonosi, Luca Carloni, David Wentzlaff

2023 IEEE Custom Integrated Circuits Conference, Apr 2023

@inproceedings{Gao2023DECADES,
	author = {Gao, Fei and Chang, Ting-Jung and Li, Ang and Orenes-Vera, Marcelo and Giri, Davide and Jackson, Paul J. and Ning, August and Tziantzioulis, Georgios and Zuckerman, Joseph and Tu, Jinzheng and Xu, Kaifeng and Chirkov, Grigory and Tombesi, Gabriele and Balkind, Jonathan and Martonosi, Margaret and Carloni, Luca and Wentzlaff, David},
	booktitle = {2023 {IEEE} {Custom} {Integrated} {Circuits} {Conference} ({CICC})},
	doi = {10.1109/cicc57935.2023.10121257},
	year = {2023},
	month = {4},
	pages = {1--2},
	organization = {IEEE},
	title = {DECADES: A 67mm\textsuperscript{2}, 1.46TOPS, 55 {Giga} {Cache}-{Coherent} 64-bit {RISC}-{V} {Instructions} per second, {Heterogeneous} {Manycore} {SoC} with 109 {Tiles} including {Accelerators}, {Intelligent} {Storage}, and {eFPGA} in 12nm {FinFET}},
	url = {http://dx.doi.org/10.1109/CICC57935.2023.10121257},
}

Redwood: Flexible and Portable Heterogeneous Tree Traversal Workloads

Yanwen Xu, Ang Li, Tyler Sorensen

2023 IEEE International Symposium on Performance Analysis of Systems and Software, Apr 2023

@inproceedings{Xu2023Redwood,
	author = {Xu, Yanwen and Li, Ang and Sorensen, Tyler},
	booktitle = {2023 {IEEE} {International} {Symposium} on {Performance} {Analysis} of {Systems} and {Software} ({ISPASS})},
	doi = {10.1109/ispass57527.2023.00028},
	year = {2023},
	month = {4},
	pages = {201--213},
	organization = {IEEE},
	title = {Redwood: Flexible and {Portable} {Heterogeneous} {Tree} {Traversal} {Workloads}},
	url = {http://dx.doi.org/10.1109/ISPASS57527.2023.00028},
}

CIFER: A 12nm, 16mm2, 22-Core SoC with a 1541 LUT6/mm2, 1.92 MOPS/LUT, Fully Synthesizable, Cache-Coherent, Embedded FPGA

Ting-Jung Chang, Ang Li, Fei Gao, Tuan Ta, Georgios Tziantzioulis, Yanghui Ou, Moyang Wang, Jinzheng Tu, Kaifeng Xu, Paul Jackson, August Ning, Grigory Chirkov, Marcelo Orenes-Vera, Shady Agwa, Xiaoyu Yan, Eric Tang, Jonathan Balkind, Christopher Batten, David Wentzlaff

2023 IEEE Custom Integrated Circuits Conference, Apr 2023

@inproceedings{Chang2023CIFER,
	author = {Chang, Ting-Jung and Li, Ang and Gao, Fei and Ta, Tuan and Tziantzioulis, Georgios and Ou, Yanghui and Wang, Moyang and Tu, Jinzheng and Xu, Kaifeng and Jackson, Paul J. and Ning, August and Chirkov, Grigory and Orenes-Vera, Marcelo and Agwa, Shady and Yan, Xiaoyu and Tang, Eric and Balkind, Jonathan and Batten, Christopher and Wentzlaff, David},
	booktitle = {2023 {IEEE} {Custom} {Integrated} {Circuits} {Conference} ({CICC})},
	doi = {10.1109/cicc57935.2023.10121294},
	year = {2023},
	month = {4},
	pages = {1--2},
	organization = {IEEE},
	title = {CIFER: A 12nm, 16mm\textsuperscript{2}, 22-{Core} {SoC} with a 1541 {LUT6}/mm\textsuperscript{2} 1.92 {MOPS}/{LUT}, {Fully} {Synthesizable}, {CacheCoherent}, {Embedded} {FPGA}},
	url = {http://dx.doi.org/10.1109/CICC57935.2023.10121294},
}

Duet: Creating Harmony between Processors and Embedded FPGAs

Ang Li, August Ning, David Wentzlaff

2023 IEEE International Symposium on High-Performance Computer Architecture, Feb 2023

DOIAuthors' CopyarXivGitHub
@inproceedings{Li2023Duet,
	author = {Li, Ang and Ning, August and Wentzlaff, David},
	booktitle = {2023 {IEEE} {International} {Symposium} on {High}-{Performance} {Computer} {Architecture} ({HPCA})},
	doi = {10.1109/hpca56546.2023.10070989},
	year = {2023},
	month = {2},
	pages = {745--758},
	organization = {IEEE},
	title = {Duet: Creating {Harmony} between {Processors} and {Embedded} {FPGAs}},
	url = {http://dx.doi.org/10.1109/HPCA56546.2023.10070989},
}

@article{Li2023Duet,
	author = {Li, Ang and Ning, August and Wentzlaff, David},
	doi = {10.48550/ARXIV.2301.02785},
	year = {2023},
	publisher = {arXiv},
	title = {Duet: Creating {Harmony} between {Processors} and {Embedded} {FPGAs}},
	url = {https://arxiv.org/abs/2301.02785},
}

2021

PRGA: An Open-Source FPGA Research and Prototyping Framework

Ang Li, David Wentzlaff

The 2021 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays, Feb 2021

@inproceedings{Li2021PRGA,
	author = {Li, Ang and Wentzlaff, David},
	booktitle = {The 2021 {ACM}/{SIGDA} {International} {Symposium} on {Field}-{Programmable} {Gate} {Arrays}},
	doi = {10.1145/3431920.3439294},
	year = {2021},
	month = {feb 17},
	pages = {127--137},
	organization = {ACM},
	title = {PRGA: An {Open}-{Source} {FPGA} {Research} and {Prototyping} {Framework}},
	url = {http://dx.doi.org/10.1145/3431920.3439294},
}

2020

Automated Design of FPGAs Facilitated by Cycle-Free Routing

Ang Li, Ting-Jung Chang, David Wentzlaff

2020 30th International Conference on Field-Programmable Logic and Applications, Aug 2020

@inproceedings{Li2020Automated,
	author = {Li, Ang and Chang, Ting-Jung and Wentzlaff, David},
	booktitle = {2020 30th {International} {Conference} on {Field}-{Programmable} {Logic} and {Applications} ({FPL})},
	doi = {10.1109/fpl50879.2020.00042},
	year = {2020},
	month = {8},
	pages = {208--213},
	organization = {IEEE},
	title = {Automated {Design} of {FPGAs} {Facilitated} by {Cycle}-{Free} {Routing}},
	url = {http://dx.doi.org/10.1109/FPL50879.2020.00042},
}

OpenPiton at 5: A Nexus for Open and Agile Hardware Design

Jonathan Balkind, Ting-Jung Chang, Paul Jackson, Georgios Tziantzioulis, Ang Li, Fei Gao, Alexey Lavrov, Grigory Chirkov, Jinzheng Tu, Mohammad Shahrad, David Wentzlaff

IEEE Micro, Jul 2020

@article{Balkind2020OpenPiton,
	author = {Balkind, Jonathan and Chang, Ting-Jung and Jackson, Paul J. and Tziantzioulis, Georgios and Li, Ang and Gao, Fei and Lavrov, Alexey and Chirkov, Grigory and Tu, Jinzheng and Shahrad, Mohammad and Wentzlaff, David},
	journal = {IEEE Micro},
	doi = {10.1109/mm.2020.2997706},
	issn = {0272-1732},
	number = {4},
	year = {2020},
	month = {jul 1},
	pages = {22--31},
	publisher = {{Institute of Electrical and Electronics Engineers (IEEE)}},
	title = {OpenPiton at 5: A {Nexus} for {Open} and {Agile} {Hardware} {Design}},
	url = {http://dx.doi.org/10.1109/MM.2020.2997706},
	volume = {40},
}

BYOC: A “Bring Your Own Core” Framework for Heterogeneous-ISA Research

Jonathan Balkind, Katie Lim, Michael Schaffner, Fei Gao, Grigory Chirkov, Ang Li, Alexey Lavrov, Tri Nguyen, Yaosheng Fu, Florian Zaruba, Kunal Gulati, Luca Benini, David Wentzlaff

Proceedings of the Twenty-Fifth International Conference on Architectural Support for Programming Languages and Operating Systems, Mar 2020

@inproceedings{Balkind2020BYOC,
	author = {Balkind, Jonathan and Lim, Katie and Schaffner, Michael and Gao, Fei and Chirkov, Grigory and Li, Ang and Lavrov, Alexey and Nguyen, Tri M. and Fu, Yaosheng and Zaruba, Florian and Gulati, Kunal and Benini, Luca and Wentzlaff, David},
	booktitle = {Proceedings of the {Twenty}-{Fifth} {International} {Conference} on {Architectural} {Support} for {Programming} {Languages} and {Operating} {Systems}},
	doi = {10.1145/3373376.3378479},
	year = {2020},
	month = {mar 9},
	pages = {699--714},
	organization = {ACM},
	title = {BYOC},
	url = {http://dx.doi.org/10.1145/3373376.3378479},
}

Cycle-Free FPGA Routing Graphs

Ang Li, David Wentzlaff

Proceedings of the 2020 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays, Feb 2020

@inproceedings{Li2020Cycle,
	author = {Li, Ang and Wentzlaff, David},
	booktitle = {Proceedings of the 2020 {ACM}/{SIGDA} {International} {Symposium} on {Field}-{Programmable} {Gate} {Arrays}},
	doi = {10.1145/3373087.3375354},
	year = {2020},
	month = {feb 23},
	pages = {322--322},
	organization = {ACM},
	title = {Cycle-{Free} {FPGA} {Routing} {Graphs}},
	url = {http://dx.doi.org/10.1145/3373087.3375354},
}

2019

PRGA: An Open-source Framework for Building and Using Custom FPGAs

Ang Li, David Wentzlaff

The 1st Workshop on Open Source Design Automation, Mar 2019

2018

OpenPiton: An Emerging Standard for Open-Source EDA Tool Development

Jonathan Balkind, Alexey Lavrov, Michael McKeown, Yaosheng Fu, Tri Nguyen, Mohammad Shahrad, Ang Li, Katie Lim, Yanqi Zhou, Ting-Jung Chang, Paul Jackson, Adi Fuchs, Samuel Payne, Xiaohua Liang, Matthew Matl, David Wentzlaff

The 1st Workshop on Open-Source EDA Technology, Nov 2018

2015

Leveraging Emerging Nonvolatile Memory in High-Level Synthesis with Loop Transformations

Shuangchen Li, Ang Li, Yuan Zhe, Yongpan Liu, Peng Li, Guangyu Sun, Yu Wang, Huazhong Yang, Yuan Xie

2015 IEEE/ACM International Symposium on Low Power Electronics and Design, Jul 2015

DOI
@inproceedings{Li2015Leveraging,
	author = {Li, Shuangchen and Li, Ang and Zhe, Yuan and Liu, Yongpan and Li, Peng and Sun, Guangyu and Wang, Yu and Yang, Huazhong and {Yuan Xie}},
	booktitle = {2015 {IEEE}/{ACM} {International} {Symposium} on {Low} {Power} {Electronics} and {Design} ({ISLPED})},
	doi = {10.1109/islped.2015.7273491},
	year = {2015},
	month = {7},
	pages = {61--66},
	organization = {IEEE},
	title = {Leveraging emerging nonvolatile memory in high-level synthesis with loop transformations},
	url = {http://dx.doi.org/10.1109/ISLPED.2015.7273491},
}

Nonvolatile Memory Allocation and Hierarchy Optimization for High-Level Synthesis

Shuangchen Li, Ang Li, Yongpan Liu, Yuan Xie, Huazhong Yang

The 20th Asia and South Pacific Design Automation Conference, Jan 2015

DOI
@inproceedings{Li2015Nonvolatile,
	author = {Li, Shuangchen and {Ang Li} and Liu, Yongpan and Xie, Yuan and {Huazhong Yang}},
	booktitle = {The 20th {Asia} and {South} {Pacific} {Design} {Automation} {Conference}},
	doi = {10.1109/aspdac.2015.7058999},
	year = {2015},
	month = {1},
	pages = {166--171},
	organization = {IEEE},
	title = {Nonvolatile memory allocation and hierarchy optimization for high-level synthesis},
	url = {http://dx.doi.org/10.1109/ASPDAC.2015.7058999},
}

PᴺCEL member

Equal contribution