This person is busy changing the world...
LUT Tensor Core: Lookup Table Enables Efficient Low-Bit LLM Inference Acceleration
Zhiwen Mo, Lei Wang, Jianyu Wei, Zhichen Zeng, Shijie Cao, Lingxiao Ma, Naifeng Jing, Ting Cao, Jilong Xue, Fan Yang, Mao Yang
2025 52rd IEEE/ACM Annual International Symposium on Computer Architecture, Jun 2025
@article{Mo2024LUT, author = {Mo, Zhiwen and Wang, Lei and Wei, Jianyu and Zeng, Zhichen and Cao, Shijie and Ma, Lingxiao and Jing, Naifeng and Cao, Ting and Xue, Jilong and Yang, Fan and Yang, Mao}, doi = {10.48550/ARXIV.2408.06003}, year = {2024}, publisher = {arXiv}, title = {LUT {Tensor} {Core}: Lookup {Table} {Enables} {Efficient} {Low}-{Bit} {LLM} {Inference} {Acceleration}}, url = {https://arxiv.org/abs/2408.06003},}
Exploring the Performance Improvement of Tensor Processing Engines through Transformation in the Bit-weight Dimension of MACs
Qizhe Wu, Huawen Liang, Yuchen Gui, Zhichen Zeng, Zerong He, Linfeng Tao, Xiaotian Wang, Letian Zhao, Zhaoxi Zeng, Wei Yuan, Wei Wu, Xi Jin
2025 IEEE International Symposium on High-Performance Computer Architecture, Mar 2025
@article{Wu2025Exploring,
author = {Wu, Qizhe and Liang, Huawen and Gui, Yuchen and Zeng, Zhichen and He, Zerong and Tao, Linfeng and Wang, Xiaotian and Zhao, Letian and Zeng, Zhaoxi and Yuan, Wei and Wu, Wei and Jin, Xi},
doi = {10.48550/ARXIV.2503.06342},
year = {2025},
publisher = {arXiv},
title = {Exploring the {Performance} {Improvement} of {Tensor} {Processing} {Engines} through {Transformation} in the {Bit}-weight {Dimension} of {MACs}},
url = {https://arxiv.org/abs/2503.06342},
}
Tactic: Adaptive Sparse Attention with Clustering and Distribution Fitting for Long-Context LLMs
Kan Zhu, Tian Tang, Qinyu Xu, Yile Gu, Zhichen Zeng, Rohan Kadekodi, Liangyu Zhao, Ang Li, Arvind Krishnamurthy, Baris Kasikci
@article{Zhu2025Tactic, author = {Zhu, Kan and Tang, Tian and Xu, Qinyu and Gu, Yile and Zeng, Zhichen and Kadekodi, Rohan and Zhao, Liangyu and Li, Ang and Krishnamurthy, Arvind and Kasikci, Baris}, doi = {10.48550/ARXIV.2502.12216}, year = {2025}, publisher = {arXiv}, title = {Tactic: Adaptive {Sparse} {Attention} with {Clustering} and {Distribution} {Fitting} for {Long}-{Context} {LLMs}}, url = {https://arxiv.org/abs/2502.12216},}
EN-T: Optimizing Tensor Computing Engines Performance via Encoder-Based Methodology
Qizhe Wu, Yuchen Gui, Zhichen Zeng, Xiaotian Wang, Huawen Liang, Xi Jin
2024 IEEE 42nd International Conference on Computer Design (ICCD), Nov 2024
@inproceedings{Wu2024EN,
author = {Wu, Qizhe and Gui, Yuchen and Zeng, Zhichen and Wang, Xiaotian and Liang, Huawen and Jin, Xi},
booktitle = {2024 {IEEE} 42nd {International} {Conference} on {Computer} {Design} ({ICCD})},
doi = {10.1109/iccd63220.2024.00097},
year = {2024},
month = {nov 18},
pages = {608--615},
organization = {IEEE},
title = {EN-{T}: Optimizing {Tensor} {Computing} {Engines} {Performance} via {Encoder}-{Based} {Methodology}},
url = {http://dx.doi.org/10.1109/ICCD63220.2024.00097},
}
SeerAttention: Learning Intrinsic Sparse Attention in Your LLMs
Yizhao Gao, Zhichen Zeng, Dayou Du, Shijie Cao, Peiyuan Zhou, Jiaxing Qi, Junjie Lai, Hayden Kwok-Hay So, Ting Cao, Fan Yang, Mao Yang
@article{Gao2024SeerAttention, author = {Gao, Yizhao and Zeng, Zhichen and Du, Dayou and Cao, Shijie and Zhou, Peiyuan and Qi, Jiaxing and Lai, Junjie and So, Hayden Kwok-Hay and Cao, Ting and Yang, Fan and Yang, Mao}, doi = {10.48550/ARXIV.2410.13276}, year = {2024}, publisher = {arXiv}, title = {SeerAttention: Learning {Intrinsic} {Sparse} {Attention} in {Your} {LLMs}}, url = {https://arxiv.org/abs/2410.13276},}
Allo: A Programming Model for Composable Accelerator Design
Hongzheng Chen, Niansong Zhang, Shaojie Xiang, Zhichen Zeng, Mengjia Dai, Zhiru Zhang
Proceedings of the ACM on Programming Languages, Jun 2024
@article{Chen2024Allo,
author = {Chen, Hongzheng and Zhang, Niansong and Xiang, Shaojie and Zeng, Zhichen and Dai, Mengjia and Zhang, Zhiru},
journal = {Proceedings of the ACM on Programming Languages},
doi = {10.1145/3656401},
issn = {2475-1421},
year = {2024},
month = {jun 20},
pages = {593--620},
publisher = {Association for Computing Machinery (ACM)},
title = {Allo: A {Programming} {Model} for {Composable} {Accelerator} {Design}},
url = {http://dx.doi.org/10.1145/3656401},
volume = {8},
}
Highly stable and fast response photodetector based on double perovskite Cs2AgBiCl6 crystals
Zhengyu Han, Mengjia Dai, Zhichen Zeng, Chunhui Ye, Rucheng Dai, Zhongping Wang, Xiaoyu Sun, Zengming Zhang
Journal of Physics D: Applied Physics, Feb 2024
@article{Han2024Highly,
author = {Han, Zhengyu and Dai, Mengjia and Zeng, Zhichen and Ye, Chunhui and Dai, Rucheng and Wang, Zhongping and Sun, Xiaoyu and Zhang, Zengming},
journal = {Journal of Physics D: Applied Physics},
doi = {10.1088/1361-6463/ad291a},
issn = {0022-3727},
number = {21},
year = {2024},
month = {feb 27},
pages = {215102},
publisher = {IOP Publishing},
title = {Highly stable and fast response photodetector based on double perovskite {Cs}\textsubscript{2}{AgBiCl}\textsubscript{6} crystals},
url = {http://dx.doi.org/10.1088/1361-6463/ad291a},
volume = {57},
}
PᴺCEL member
Equal contribution