DICE: Enabling Efficient General-Purpose SIMT Execution with Statically Scheduled Coarse-Grained Reconfigurable Arrays
Jiayi Wang, Darren Lu, Zhichen Zeng, Ang Li
The 53rd IEEE/ACM International Symposium on Computer Architecture, Jun 2026
@article{Wang2026DICE,
author = {Wang, Jiayi and Da Lu, Ang and Zeng, Zhichen and Li, Ang},
doi = {10.48550/ARXIV.2605.05496},
year = {2026},
publisher = {arXiv},
title = {DICE: Enabling {Efficient} {General}-{Purpose} {SIMT} {Execution} with {Statically} {Scheduled} {Coarse}-{Grained} {Reconfigurable} {Arrays}},
url = {https://arxiv.org/abs/2605.05496},
}
DisagMoE: Computation-Communication overlapped MoE Training via Disaggregated AF-Pipe Parallelism
Zhichen Zeng, Chi-Chih Chang, Jiayi Wang, Zezhou Wang, Ningxin Zheng, Zheng Zhong, Cesar A. Stuardo, Dongyang Wang, Mohamed S. Abdelfattah, Haibin Lin, Banghua Zhu, Ang Li, Ziheng Jiang
@article{Zeng2026DisagMoE,
author = {Zeng, Zhichen and Chang, Chi-Chih and Wang, Jiayi and Wang, Zezhou and Zheng, Ningxin and Zhong, Zheng and Stuardo, Cesar A. and Wang, Dongyang and Abdelfattah, Mohamed S. and Lin, Haibin and Zhu, Banghua and Li, Ang and Jiang, Ziheng},
doi = {10.48550/ARXIV.2605.11005},
year = {2026},
publisher = {arXiv},
title = {DisagMoE: Computation-{Communication} overlapped {MoE} {Training} via {Disaggregated} {AF}-{Pipe} {Parallelism}},
url = {https://arxiv.org/abs/2605.11005},
}
SeerAttention: Learning Intrinsic Sparse Attention in Your LLMs
Yizhao Gao, Zhichen Zeng, Dayou Du, Shijie Cao, Peiyuan Zhou, Jiaxing Qi, Junjie Lai, Hayden Kwok-Hay So, Ting Cao, Fan Yang, Mao Yang
The Thirty-ninth Annual Conference on Neural Information Processing Systems, Dec 2025
@article{Gao2024SeerAttention,
author = {Gao, Yizhao and Zeng, Zhichen and Du, Dayou and Cao, Shijie and Zhou, Peiyuan and Qi, Jiaxing and Lai, Junjie and So, Hayden Kwok-Hay and Cao, Ting and Yang, Fan and Yang, Mao},
doi = {10.48550/ARXIV.2410.13276},
year = {2024},
publisher = {arXiv},
title = {SeerAttention: Learning {Intrinsic} {Sparse} {Attention} in {Your} {LLMs}},
url = {https://arxiv.org/abs/2410.13276},
}
LUT Tensor Core: Lookup Table Enables Efficient Low-Bit LLM Inference Acceleration
Zhiwen Mo, Lei Wang, Jianyu Wei, Zhichen Zeng, Shijie Cao, Lingxiao Ma, Naifeng Jing, Ting Cao, Jilong Xue, Fan Yang, Mao Yang
2025 52rd IEEE/ACM Annual International Symposium on Computer Architecture, Jun 2025
@inproceedings{Mo2025LUT,
author = {Mo, Zhiwen and Wang, Lei and Wei, Jianyu and Zeng, Zhichen and Cao, Shijie and Ma, Lingxiao and Jing, Naifeng and Cao, Ting and Xue, Jilong and Yang, Fan and Yang, Mao},
booktitle = {Proceedings of the 52nd {Annual} {International} {Symposium} on {Computer} {Architecture}},
doi = {10.1145/3695053.3731057},
year = {2025},
month = {jun 20},
pages = {514--528},
organization = {ACM},
title = {LUT {Tensor} {Core}: A {Software}-{Hardware} {Co}-{Design} for {LUT}-{Based} {Low}-{Bit} {LLM} {Inference}},
url = {http://dx.doi.org/10.1145/3695053.3731057},
}
@article{Mo2024LUT,
author = {Mo, Zhiwen and Wang, Lei and Wei, Jianyu and Zeng, Zhichen and Cao, Shijie and Ma, Lingxiao and Jing, Naifeng and Cao, Ting and Xue, Jilong and Yang, Fan and Yang, Mao},
doi = {10.48550/ARXIV.2408.06003},
year = {2024},
publisher = {arXiv},
title = {LUT {Tensor} {Core}: A {Software}-{Hardware} {Co}-{Design} for {LUT}-{Based} {Low}-{Bit} {LLM} {Inference}},
url = {https://arxiv.org/abs/2408.06003},
}
Tactic: Adaptive Sparse Attention with Clustering and Distribution Fitting for Long-Context LLMs
Kan Zhu, Tian Tang, Qinyu Xu, Yile Gu, Zhichen Zeng, Rohan Kadekodi, Liangyu Zhao, Ang Li, Arvind Krishnamurthy, Baris Kasikci
The Fourteenth International Conference on Learning Representations, Apr 2025
@article{Zhu2025Tactic,
author = {Zhu, Kan and Tang, Tian and Xu, Qinyu and Gu, Yile and Zeng, Zhichen and Kadekodi, Rohan and Zhao, Liangyu and Li, Ang and Krishnamurthy, Arvind and Kasikci, Baris},
doi = {10.48550/ARXIV.2502.12216},
year = {2025},
publisher = {arXiv},
title = {Tactic: Adaptive {Sparse} {Attention} with {Clustering} and {Distribution} {Fitting} for {Long}-{Context} {LLMs}},
url = {https://arxiv.org/abs/2502.12216},
}
Local Linear Attention: An Optimal Interpolation of Linear and Softmax Attention For Test-Time Regression
Yifei Zuo, Yutong Yin, Zhichen Zeng, Ang Li, Banghua Zhu, Zhaoran Wang
The Fourteenth International Conference on Learning Representations, Apr 2025
@article{Zuo2025Local,
author = {Zuo, Yifei and Yin, Yutong and Zeng, Zhichen and Li, Ang and Zhu, Banghua and Wang, Zhaoran},
doi = {10.48550/ARXIV.2510.01450},
year = {2025},
publisher = {arXiv},
title = {Local {Linear} {Attention}: An {Optimal} {Interpolation} of {Linear} and {Softmax} {Attention} {For} {Test}-{Time} {Regression}},
url = {https://arxiv.org/abs/2510.01450},
}
Exploring the Performance Improvement of Tensor Processing Engines through Transformation in the Bit-weight Dimension of MACs
Qizhe Wu, Huawen Liang, Yuchen Gui, Zhichen Zeng, Zerong He, Linfeng Tao, Xiaotian Wang, Letian Zhao, Zhaoxi Zeng, Wei Yuan, Wei Wu, Xi Jin
2025 IEEE International Symposium on High-Performance Computer Architecture, Mar 2025
@inproceedings{Wu2025Exploring,
author = {Wu, Qizhe and Liang, Huawen and Gui, Yuchen and Zeng, Zhichen and He, Zerong and Tao, Linfeng and Wang, Xiaotian and Zhao, Letian and Zeng, Zhaoxi and Yuan, Wei and Wu, Wei and Jin, Xi},
booktitle = {2025 {IEEE} {International} {Symposium} on {High} {Performance} {Computer} {Architecture} ({HPCA})},
doi = {10.1109/hpca61900.2025.00058},
year = {2025},
month = {mar 1},
pages = {685--700},
organization = {IEEE},
title = {Exploring the {Performance} {Improvement} of {Tensor} {Processing} {Engines} through {Transformation} in the {Bit}-weight {Dimension} of {MACs}},
url = {http://dx.doi.org/10.1109/HPCA61900.2025.00058},
}
@article{Wu2025Exploring,
author = {Wu, Qizhe and Liang, Huawen and Gui, Yuchen and Zeng, Zhichen and He, Zerong and Tao, Linfeng and Wang, Xiaotian and Zhao, Letian and Zeng, Zhaoxi and Yuan, Wei and Wu, Wei and Jin, Xi},
doi = {10.48550/ARXIV.2503.06342},
year = {2025},
publisher = {arXiv},
title = {Exploring the {Performance} {Improvement} of {Tensor} {Processing} {Engines} through {Transformation} in the {Bit}-weight {Dimension} of {MACs}},
url = {https://arxiv.org/abs/2503.06342},
}
EN-T: Optimizing Tensor Computing Engines Performance via Encoder-Based Methodology
Qizhe Wu, Yuchen Gui, Zhichen Zeng, Xiaotian Wang, Huawen Liang, Xi Jin
2024 IEEE 42nd International Conference on Computer Design (ICCD), Nov 2024
@inproceedings{Wu2024EN,
author = {Wu, Qizhe and Gui, Yuchen and Zeng, Zhichen and Wang, Xiaotian and Liang, Huawen and Jin, Xi},
booktitle = {2024 {IEEE} 42nd {International} {Conference} on {Computer} {Design} ({ICCD})},
doi = {10.1109/iccd63220.2024.00097},
year = {2024},
month = {nov 18},
pages = {608--615},
organization = {IEEE},
title = {EN-{T}: Optimizing {Tensor} {Computing} {Engines} {Performance} via {Encoder}-{Based} {Methodology}},
url = {http://dx.doi.org/10.1109/ICCD63220.2024.00097},
}
Allo: A Programming Model for Composable Accelerator Design
Hongzheng Chen, Niansong Zhang, Shaojie Xiang, Zhichen Zeng, Mengjia Dai, Zhiru Zhang
Proceedings of the ACM on Programming Languages, Jun 2024
@article{Chen2024Allo,
author = {Chen, Hongzheng and Zhang, Niansong and Xiang, Shaojie and Zeng, Zhichen and Dai, Mengjia and Zhang, Zhiru},
journal = {Proceedings of the ACM on Programming Languages},
doi = {10.1145/3656401},
issn = {2475-1421},
year = {2024},
month = {jun 20},
pages = {593--620},
publisher = {Association for Computing Machinery (ACM)},
title = {Allo: A {Programming} {Model} for {Composable} {Accelerator} {Design}},
url = {http://dx.doi.org/10.1145/3656401},
volume = {8},
}
Highly stable and fast response photodetector based on double perovskite Cs2AgBiCl6 crystals
Zhengyu Han, Mengjia Dai, Zhichen Zeng, Chunhui Ye, Rucheng Dai, Zhongping Wang, Xiaoyu Sun, Zengming Zhang
Journal of Physics D: Applied Physics, Feb 2024
@article{Han2024Highly,
author = {Han, Zhengyu and Dai, Mengjia and Zeng, Zhichen and Ye, Chunhui and Dai, Rucheng and Wang, Zhongping and Sun, Xiaoyu and Zhang, Zengming},
journal = {Journal of Physics D: Applied Physics},
doi = {10.1088/1361-6463/ad291a},
issn = {0022-3727},
number = {21},
year = {2024},
month = {feb 27},
pages = {215102},
publisher = {IOP Publishing},
title = {Highly stable and fast response photodetector based on double perovskite {Cs}\textsubscript{2}{AgBiCl}\textsubscript{6} crystals},
url = {http://dx.doi.org/10.1088/1361-6463/ad291a},
volume = {57},
}
PᴺCEL member
Equal contribution