Personal Statement

This person is busy changing the world...

Selected Publications

Full publication list

LUT Tensor Core: Lookup Table Enables Efficient Low-Bit LLM Inference Acceleration

Zhiwen Mo, Lei Wang, Jianyu Wei, Zhichen Zeng, Shijie Cao, Lingxiao Ma, Naifeng Jing, Ting Cao, Jilong Xue, Fan Yang, Mao Yang

2025 52rd IEEE/ACM Annual International Symposium on Computer Architecture, Jun 2025

arXiv
@article{Mo2024LUT,	author = {Mo, Zhiwen and Wang, Lei and Wei, Jianyu and Zeng, Zhichen and Cao, Shijie and Ma, Lingxiao and Jing, Naifeng and Cao, Ting and Xue, Jilong and Yang, Fan and Yang, Mao},	doi = {10.48550/ARXIV.2408.06003},	year = {2024},	publisher = {arXiv},	title = {LUT {Tensor} {Core}: Lookup {Table} {Enables} {Efficient} {Low}-{Bit} {LLM} {Inference} {Acceleration}},	url = {https://arxiv.org/abs/2408.06003},}

Exploring the Performance Improvement of Tensor Processing Engines through Transformation in the Bit-weight Dimension of MACs

Qizhe Wu, Huawen Liang, Yuchen Gui, Zhichen Zeng, Zerong He, Linfeng Tao, Xiaotian Wang, Letian Zhao, Zhaoxi Zeng, Wei Yuan, Wei Wu, Xi Jin

2025 IEEE International Symposium on High-Performance Computer Architecture, Mar 2025

arXiv
@article{Wu2025Exploring,
	author = {Wu, Qizhe and Liang, Huawen and Gui, Yuchen and Zeng, Zhichen and He, Zerong and Tao, Linfeng and Wang, Xiaotian and Zhao, Letian and Zeng, Zhaoxi and Yuan, Wei and Wu, Wei and Jin, Xi},
	doi = {10.48550/ARXIV.2503.06342},
	year = {2025},
	publisher = {arXiv},
	title = {Exploring the {Performance} {Improvement} of {Tensor} {Processing} {Engines} through {Transformation} in the {Bit}-weight {Dimension} of {MACs}},
	url = {https://arxiv.org/abs/2503.06342},
}

Tactic: Adaptive Sparse Attention with Clustering and Distribution Fitting for Long-Context LLMs

Kan Zhu, Tian Tang, Qinyu Xu, Yile Gu, Zhichen Zeng, Rohan Kadekodi, Liangyu Zhao, Ang Li, Arvind Krishnamurthy, Baris Kasikci

arXiv
@article{Zhu2025Tactic,	author = {Zhu, Kan and Tang, Tian and Xu, Qinyu and Gu, Yile and Zeng, Zhichen and Kadekodi, Rohan and Zhao, Liangyu and Li, Ang and Krishnamurthy, Arvind and Kasikci, Baris},	doi = {10.48550/ARXIV.2502.12216},	year = {2025},	publisher = {arXiv},	title = {Tactic: Adaptive {Sparse} {Attention} with {Clustering} and {Distribution} {Fitting} for {Long}-{Context} {LLMs}},	url = {https://arxiv.org/abs/2502.12216},}

EN-T: Optimizing Tensor Computing Engines Performance via Encoder-Based Methodology

Qizhe Wu, Yuchen Gui, Zhichen Zeng, Xiaotian Wang, Huawen Liang, Xi Jin

2024 IEEE 42nd International Conference on Computer Design (ICCD), Nov 2024

DOI
@inproceedings{Wu2024EN,
	author = {Wu, Qizhe and Gui, Yuchen and Zeng, Zhichen and Wang, Xiaotian and Liang, Huawen and Jin, Xi},
	booktitle = {2024 {IEEE} 42nd {International} {Conference} on {Computer} {Design} ({ICCD})},
	doi = {10.1109/iccd63220.2024.00097},
	year = {2024},
	month = {nov 18},
	pages = {608--615},
	organization = {IEEE},
	title = {EN-{T}: Optimizing {Tensor} {Computing} {Engines} {Performance} via {Encoder}-{Based} {Methodology}},
	url = {http://dx.doi.org/10.1109/ICCD63220.2024.00097},
}

SeerAttention: Learning Intrinsic Sparse Attention in Your LLMs

Yizhao Gao, Zhichen Zeng, Dayou Du, Shijie Cao, Peiyuan Zhou, Jiaxing Qi, Junjie Lai, Hayden Kwok-Hay So, Ting Cao, Fan Yang, Mao Yang

arXiv
@article{Gao2024SeerAttention,	author = {Gao, Yizhao and Zeng, Zhichen and Du, Dayou and Cao, Shijie and Zhou, Peiyuan and Qi, Jiaxing and Lai, Junjie and So, Hayden Kwok-Hay and Cao, Ting and Yang, Fan and Yang, Mao},	doi = {10.48550/ARXIV.2410.13276},	year = {2024},	publisher = {arXiv},	title = {SeerAttention: Learning {Intrinsic} {Sparse} {Attention} in {Your} {LLMs}},	url = {https://arxiv.org/abs/2410.13276},}

Allo: A Programming Model for Composable Accelerator Design

Hongzheng Chen, Niansong Zhang, Shaojie Xiang, Zhichen Zeng, Mengjia Dai, Zhiru Zhang

Proceedings of the ACM on Programming Languages, Jun 2024

DOI
@article{Chen2024Allo,
	author = {Chen, Hongzheng and Zhang, Niansong and Xiang, Shaojie and Zeng, Zhichen and Dai, Mengjia and Zhang, Zhiru},
	journal = {Proceedings of the ACM on Programming Languages},
	doi = {10.1145/3656401},
	issn = {2475-1421},
	year = {2024},
	month = {jun 20},
	pages = {593--620},
	publisher = {Association for Computing Machinery (ACM)},
	title = {Allo: A {Programming} {Model} for {Composable} {Accelerator} {Design}},
	url = {http://dx.doi.org/10.1145/3656401},
	volume = {8},
}

Highly stable and fast response photodetector based on double perovskite Cs2AgBiCl6 crystals

Zhengyu Han, Mengjia Dai, Zhichen Zeng, Chunhui Ye, Rucheng Dai, Zhongping Wang, Xiaoyu Sun, Zengming Zhang

Journal of Physics D: Applied Physics, Feb 2024

DOI
@article{Han2024Highly,
	author = {Han, Zhengyu and Dai, Mengjia and Zeng, Zhichen and Ye, Chunhui and Dai, Rucheng and Wang, Zhongping and Sun, Xiaoyu and Zhang, Zengming},
	journal = {Journal of Physics D: Applied Physics},
	doi = {10.1088/1361-6463/ad291a},
	issn = {0022-3727},
	number = {21},
	year = {2024},
	month = {feb 27},
	pages = {215102},
	publisher = {IOP Publishing},
	title = {Highly stable and fast response photodetector based on double perovskite {Cs}\textsubscript{2}{AgBiCl}\textsubscript{6} crystals},
	url = {http://dx.doi.org/10.1088/1361-6463/ad291a},
	volume = {57},
}

PᴺCEL member

Equal contribution