Full Publication List: Zhichen Zeng

2026

DICE: Enabling Efficient General-Purpose SIMT Execution with Statically Scheduled Coarse-Grained Reconfigurable Arrays

Jiayi Wang, Darren Lu, Zhichen Zeng, Ang Li

The 53rd IEEE/ACM International Symposium on Computer Architecture, Jun 2026

@article{Wang2026DICE,
	author = {Wang, Jiayi and Da Lu, Ang and Zeng, Zhichen and Li, Ang},
	doi = {10.48550/ARXIV.2605.05496},
	year = {2026},
	publisher = {arXiv},
	title = {DICE: Enabling {Efficient} {General}-{Purpose} {SIMT} {Execution} with {Statically} {Scheduled} {Coarse}-{Grained} {Reconfigurable} {Arrays}},
	url = {https://arxiv.org/abs/2605.05496},
}

DisagMoE: Computation-Communication overlapped MoE Training via Disaggregated AF-Pipe Parallelism

Zhichen Zeng, Chi-Chih Chang, Jiayi Wang, Zezhou Wang, Ningxin Zheng, Zheng Zhong, Cesar A. Stuardo, Dongyang Wang, Mohamed S. Abdelfattah, Haibin Lin, Banghua Zhu, Ang Li, Ziheng Jiang

arXiv

@article{Zeng2026DisagMoE,
	author = {Zeng, Zhichen and Chang, Chi-Chih and Wang, Jiayi and Wang, Zezhou and Zheng, Ningxin and Zhong, Zheng and Stuardo, Cesar A. and Wang, Dongyang and Abdelfattah, Mohamed S. and Lin, Haibin and Zhu, Banghua and Li, Ang and Jiang, Ziheng},
	doi = {10.48550/ARXIV.2605.11005},
	year = {2026},
	publisher = {arXiv},
	title = {DisagMoE: Computation-{Communication} overlapped {MoE} {Training} via {Disaggregated} {AF}-{Pipe} {Parallelism}},
	url = {https://arxiv.org/abs/2605.11005},
}

2025

SeerAttention: Learning Intrinsic Sparse Attention in Your LLMs

NeurIPS

Yizhao Gao, Zhichen Zeng, Dayou Du, Shijie Cao, Peiyuan Zhou, Jiaxing Qi, Junjie Lai, Hayden Kwok-Hay So, Ting Cao, Fan Yang, Mao Yang

The Thirty-ninth Annual Conference on Neural Information Processing Systems, Dec 2025

DOI arXiv

@article{Gao2024SeerAttention,
	author = {Gao, Yizhao and Zeng, Zhichen and Du, Dayou and Cao, Shijie and Zhou, Peiyuan and Qi, Jiaxing and Lai, Junjie and So, Hayden Kwok-Hay and Cao, Ting and Yang, Fan and Yang, Mao},
	doi = {10.48550/ARXIV.2410.13276},
	year = {2024},
	publisher = {arXiv},
	title = {SeerAttention: Learning {Intrinsic} {Sparse} {Attention} in {Your} {LLMs}},
	url = {https://arxiv.org/abs/2410.13276},
}

LUT Tensor Core: Lookup Table Enables Efficient Low-Bit LLM Inference Acceleration

ISCA

Zhiwen Mo, Lei Wang, Jianyu Wei, Zhichen Zeng, Shijie Cao, Lingxiao Ma, Naifeng Jing, Ting Cao, Jilong Xue, Fan Yang, Mao Yang

2025 52rd IEEE/ACM Annual International Symposium on Computer Architecture, Jun 2025

DOIarXiv

@inproceedings{Mo2025LUT,
	author = {Mo, Zhiwen and Wang, Lei and Wei, Jianyu and Zeng, Zhichen and Cao, Shijie and Ma, Lingxiao and Jing, Naifeng and Cao, Ting and Xue, Jilong and Yang, Fan and Yang, Mao},
	booktitle = {Proceedings of the 52nd {Annual} {International} {Symposium} on {Computer} {Architecture}},
	doi = {10.1145/3695053.3731057},
	year = {2025},
	month = {jun 20},
	pages = {514--528},
	organization = {ACM},
	title = {LUT {Tensor} {Core}: A {Software}-{Hardware} {Co}-{Design} for {LUT}-{Based} {Low}-{Bit} {LLM} {Inference}},
	url = {http://dx.doi.org/10.1145/3695053.3731057},
}

@article{Mo2024LUT,
	author = {Mo, Zhiwen and Wang, Lei and Wei, Jianyu and Zeng, Zhichen and Cao, Shijie and Ma, Lingxiao and Jing, Naifeng and Cao, Ting and Xue, Jilong and Yang, Fan and Yang, Mao},
	doi = {10.48550/ARXIV.2408.06003},
	year = {2024},
	publisher = {arXiv},
	title = {LUT {Tensor} {Core}: A {Software}-{Hardware} {Co}-{Design} for {LUT}-{Based} {Low}-{Bit} {LLM} {Inference}},
	url = {https://arxiv.org/abs/2408.06003},
}

Tactic: Adaptive Sparse Attention with Clustering and Distribution Fitting for Long-Context LLMs

ICLR

Kan Zhu, Tian Tang, Qinyu Xu, Yile Gu, Zhichen Zeng, Rohan Kadekodi, Liangyu Zhao, Ang Li, Arvind Krishnamurthy, Baris Kasikci

The Fourteenth International Conference on Learning Representations, Apr 2025

DOI arXiv

@article{Zhu2025Tactic,
	author = {Zhu, Kan and Tang, Tian and Xu, Qinyu and Gu, Yile and Zeng, Zhichen and Kadekodi, Rohan and Zhao, Liangyu and Li, Ang and Krishnamurthy, Arvind and Kasikci, Baris},
	doi = {10.48550/ARXIV.2502.12216},
	year = {2025},
	publisher = {arXiv},
	title = {Tactic: Adaptive {Sparse} {Attention} with {Clustering} and {Distribution} {Fitting} for {Long}-{Context} {LLMs}},
	url = {https://arxiv.org/abs/2502.12216},
}

Local Linear Attention: An Optimal Interpolation of Linear and Softmax Attention For Test-Time Regression

ICLR

Yifei Zuo, Yutong Yin, Zhichen Zeng, Ang Li, Banghua Zhu, Zhaoran Wang

The Fourteenth International Conference on Learning Representations, Apr 2025

DOI arXiv

@article{Zuo2025Local,
	author = {Zuo, Yifei and Yin, Yutong and Zeng, Zhichen and Li, Ang and Zhu, Banghua and Wang, Zhaoran},
	doi = {10.48550/ARXIV.2510.01450},
	year = {2025},
	publisher = {arXiv},
	title = {Local {Linear} {Attention}: An {Optimal} {Interpolation} of {Linear} and {Softmax} {Attention} {For} {Test}-{Time} {Regression}},
	url = {https://arxiv.org/abs/2510.01450},
}

Exploring the Performance Improvement of Tensor Processing Engines through Transformation in the Bit-weight Dimension of MACs

HPCA

Qizhe Wu, Huawen Liang, Yuchen Gui, Zhichen Zeng, Zerong He, Linfeng Tao, Xiaotian Wang, Letian Zhao, Zhaoxi Zeng, Wei Yuan, Wei Wu, Xi Jin

2025 IEEE International Symposium on High-Performance Computer Architecture, Mar 2025

DOIarXiv

@inproceedings{Wu2025Exploring,
	author = {Wu, Qizhe and Liang, Huawen and Gui, Yuchen and Zeng, Zhichen and He, Zerong and Tao, Linfeng and Wang, Xiaotian and Zhao, Letian and Zeng, Zhaoxi and Yuan, Wei and Wu, Wei and Jin, Xi},
	booktitle = {2025 {IEEE} {International} {Symposium} on {High} {Performance} {Computer} {Architecture} ({HPCA})},
	doi = {10.1109/hpca61900.2025.00058},
	year = {2025},
	month = {mar 1},
	pages = {685--700},
	organization = {IEEE},
	title = {Exploring the {Performance} {Improvement} of {Tensor} {Processing} {Engines} through {Transformation} in the {Bit}-weight {Dimension} of {MACs}},
	url = {http://dx.doi.org/10.1109/HPCA61900.2025.00058},
}

@article{Wu2025Exploring,
	author = {Wu, Qizhe and Liang, Huawen and Gui, Yuchen and Zeng, Zhichen and He, Zerong and Tao, Linfeng and Wang, Xiaotian and Zhao, Letian and Zeng, Zhaoxi and Yuan, Wei and Wu, Wei and Jin, Xi},
	doi = {10.48550/ARXIV.2503.06342},
	year = {2025},
	publisher = {arXiv},
	title = {Exploring the {Performance} {Improvement} of {Tensor} {Processing} {Engines} through {Transformation} in the {Bit}-weight {Dimension} of {MACs}},
	url = {https://arxiv.org/abs/2503.06342},
}

2024

EN-T: Optimizing Tensor Computing Engines Performance via Encoder-Based Methodology

ICCD

Qizhe Wu, Yuchen Gui, Zhichen Zeng, Xiaotian Wang, Huawen Liang, Xi Jin

2024 IEEE 42nd International Conference on Computer Design (ICCD), Nov 2024

DOI

@inproceedings{Wu2024EN,
	author = {Wu, Qizhe and Gui, Yuchen and Zeng, Zhichen and Wang, Xiaotian and Liang, Huawen and Jin, Xi},
	booktitle = {2024 {IEEE} 42nd {International} {Conference} on {Computer} {Design} ({ICCD})},
	doi = {10.1109/iccd63220.2024.00097},
	year = {2024},
	month = {nov 18},
	pages = {608--615},
	organization = {IEEE},
	title = {EN-{T}: Optimizing {Tensor} {Computing} {Engines} {Performance} via {Encoder}-{Based} {Methodology}},
	url = {http://dx.doi.org/10.1109/ICCD63220.2024.00097},
}

Allo: A Programming Model for Composable Accelerator Design

PLDI

Hongzheng Chen, Niansong Zhang, Shaojie Xiang, Zhichen Zeng, Mengjia Dai, Zhiru Zhang

Proceedings of the ACM on Programming Languages, Jun 2024

DOI

@article{Chen2024Allo,
	author = {Chen, Hongzheng and Zhang, Niansong and Xiang, Shaojie and Zeng, Zhichen and Dai, Mengjia and Zhang, Zhiru},
	journal = {Proceedings of the ACM on Programming Languages},
	doi = {10.1145/3656401},
	issn = {2475-1421},
	year = {2024},
	month = {jun 20},
	pages = {593--620},
	publisher = {Association for Computing Machinery (ACM)},
	title = {Allo: A {Programming} {Model} for {Composable} {Accelerator} {Design}},
	url = {http://dx.doi.org/10.1145/3656401},
	volume = {8},
}

Highly stable and fast response photodetector based on double perovskite Cs₂AgBiCl₆ crystals

Journal of Physics D

Zhengyu Han, Mengjia Dai, Zhichen Zeng, Chunhui Ye, Rucheng Dai, Zhongping Wang, Xiaoyu Sun, Zengming Zhang

Journal of Physics D: Applied Physics, Feb 2024

DOI

@article{Han2024Highly,
	author = {Han, Zhengyu and Dai, Mengjia and Zeng, Zhichen and Ye, Chunhui and Dai, Rucheng and Wang, Zhongping and Sun, Xiaoyu and Zhang, Zengming},
	journal = {Journal of Physics D: Applied Physics},
	doi = {10.1088/1361-6463/ad291a},
	issn = {0022-3727},
	number = {21},
	year = {2024},
	month = {feb 27},
	pages = {215102},
	publisher = {IOP Publishing},
	title = {Highly stable and fast response photodetector based on double perovskite {Cs}\textsubscript{2}{AgBiCl}\textsubscript{6} crystals},
	url = {http://dx.doi.org/10.1088/1361-6463/ad291a},
	volume = {57},
}