@inproceedings{zhang-etal-2025-agentcpm,
title = "{A}gent{CPM}-{GUI}: Building Mobile-Use Agents with Reinforcement Fine-Tuning",
author = "Zhang, Zhong and
Lu, Yaxi and
Fu, Yikun and
Huo, Yupeng and
Yang, Shenzhi and
Wu, Yesai and
Si, Han and
Cong, Xin and
Chen, Haotian and
Lin, Yankai and
Xie, Jie and
Zhou, Wei and
Xu, Wang and
Zhang, Yuanheng and
Su, Zhou and
Zhai, Zhongwu and
Liu, Xiaoming and
Mei, Yudong and
Xu, Jianming and
Tian, Hongyan and
Wang, Chongyi and
Chen, Chi and
Yao, Yuan and
Liu, Zhiyuan and
Sun, Maosong",
editor = {Habernal, Ivan and
Schulam, Peter and
Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-demos.12/",
doi = "10.18653/v1/2025.emnlp-demos.12",
pages = "155--180",
ISBN = "979-8-89176-334-0",
abstract = "Large language model agents have enabled GUI-based automation, particularly for mobile devices. However, deployment remains limited by noisy data, poor generalization, and lack of support for non-English GUIs. In this work, we present AgentCPM-GUI, an 8B-parameter GUI agent built for robust and efficient on-device GUI interaction. Our training pipeline includes grounding-aware pre-training to enhance perception, supervised fine-tuning on high-quality Chinese and English trajectories to imitate human-like actions, and reinforcement fine-tuning with GRPO to improve reasoning capability. AgentCPM-GUI achieves promising performance on five public benchmarks and our proposed Chinese benchmark CAGUI. To facilitate reproducibility and further research, we publicly release all code, model checkpoint, and evaluation data at: https://github.com/OpenBMB/AgentCPM-GUI"
}
@inproceedings{zhang-etal-2023-fine,
title = "Fine-tuning Happens in Tiny Subspaces: Exploring Intrinsic Task-specific Subspaces of Pre-trained Language Models",
author = "Zhang, Zhong and
Liu, Bang and
Shao, Junming",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.acl-long.95/",
doi = "10.18653/v1/2023.acl-long.95",
pages = "1701--1713",
abstract = "Pre-trained language models (PLMs) are known to be overly parameterized and have significant redundancy, indicating a small degree of freedom of the PLMs. Motivated by the observation, in this paper, we study the problem of re-parameterizing and fine-tuning PLMs from a new perspective: Discovery of intrinsic task-specific subspace. Specifically, by exploiting the dynamics of the fine-tuning process for a given task, the parameter optimization trajectory is learned to uncover its intrinsic task-specific subspace. A key finding is that PLMs can be effectively fine-tuned in the subspace with a small number of free parameters. Beyond, we observe some outlier dimensions emerging during fine-tuning in the subspace. Disabling these dimensions degrades the model performance significantly. This suggests that these dimensions are crucial to induce task-specific knowledge to downstream tasks."
}
@inproceedings{
chen2025learning,
title={Learning Evolving Tools for Large Language Models},
author={Guoxin Chen and Zhong Zhang and Xin Cong and Fangda Guo and Yesai Wu and Yankai Lin and Wenzheng Feng and Yasheng Wang},
booktitle={The Thirteenth International Conference on Learning Representations},
year={2025},
url={https://openreview.net/forum?id=wtrDLMFU9v}
}
@inproceedings{lu-etal-2025-learning,
title = "Learning to Generate Structured Output with Schema Reinforcement Learning",
author = "Lu, Yaxi and
Li, Haolun and
Cong, Xin and
Zhang, Zhong and
Wu, Yesai and
Lin, Yankai and
Liu, Zhiyuan and
Liu, Fangming and
Sun, Maosong",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.243/",
doi = "10.18653/v1/2025.acl-long.243",
pages = "4905--4918",
ISBN = "979-8-89176-251-0",
abstract = "This study investigates the structured generation capabilities of large language models (LLMs), focusing on producing valid JSON outputs against a given schema. Despite the widespread use of JSON in integrating language models with programs, there is a lack of comprehensive analysis and benchmarking of these capabilities. We explore various aspects of JSON generation, such as structure understanding, escaping, and natural language description, to determine how to assess and enable LLMs to generate valid responses. Building upon this, we propose SchemaBench features around 40K different JSON schemas to obtain and assess models' abilities in generating valid JSON. We find that the latest LLMs are still struggling to generate a valid JSON string. Moreover, we demonstrate that incorporating reinforcement learning with a Fine-grained Schema Validator can further enhance models' understanding of JSON schema, leading to improved performance. Our models demonstrate significant improvement in both generating JSON outputs and downstream tasks."
}
@inproceedings{luo-etal-2024-repoagent,
title = "{R}epo{A}gent: An {LLM}-Powered Open-Source Framework for Repository-level Code Documentation Generation",
author = "Luo, Qinyu and
Ye, Yining and
Liang, Shihao and
Zhang, Zhong and
Qin, Yujia and
Lu, Yaxi and
Wu, Yesai and
Cong, Xin and
Lin, Yankai and
Zhang, Yingli and
Che, Xiaoyin and
Liu, Zhiyuan and
Sun, Maosong",
editor = "Hernandez Farias, Delia Irazu and
Hope, Tom and
Li, Manling",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-demo.46/",
doi = "10.18653/v1/2024.emnlp-demo.46",
pages = "436--464",
abstract = "Generative models have demonstrated considerable potential in software engineering, particularly in tasks such as code generation and debugging. However, their utilization in the domain of code documentation generation remains underexplored. To this end, we introduce RepoAgent, a large language model powered open-source framework aimed at proactively generating, maintaining, and updating code documentation. Through both qualitative and quantitative evaluations, we have validated the effectiveness of our approach, showing that RepoAgent excels in generating high-quality repository-level documentation. The code and results are publicly accessible at https://github.com/OpenBMB/RepoAgent."
}
@article{ZHANG2022108075,
title = {Mixhead: Breaking the low-rank bottleneck in multi-head attention language models},
journal = {Knowledge-Based Systems},
volume = {240},
pages = {108075},
year = {2022},
issn = {0950-7051},
doi = {https://doi.org/10.1016/j.knosys.2021.108075},
url = {https://www.sciencedirect.com/science/article/pii/S0950705121011503},
author = {Zhong Zhang and Nian Shao and Chongming Gao and Rui Miao and Qinli Yang and Junming Shao},
keywords = {Language model, Multi-head attention, Low-rank bottleneck},
abstract = {The Transformer-based models have achieved significant advances in language modeling, while the multi-head attention mechanism in Transformers plays an indispensable part in their success. However, the too-small head size caused by the multi-head mechanism will lead to one problem called the low-rank bottleneck, which means that the rank of the attention weight matrix is too small to represent any desired attention. Naively increasing the head size is insufficient to solve the problem because it leads to severe parameter explosion and overfitting. To tackle this problem, we propose a mix-head attention (Mixhead) which mixes multiple attention heads by learnable mixing weights to improve the expressive power of the model. In contrast, Mixhead achieves a higher rank of the attention weight matrix while introducing a negligible number of parameters. Furthermore, Mixhead is quite general and can be easily adopted to most multi-head attention based models. We conduct extensive experiments including language modeling, machine translation, and finetuning BERT to demonstrate the effectiveness of our method.}
}
@inproceedings{zhang-etal-2020-revisiting,
title = "Revisiting Representation Degeneration Problem in Language Modeling",
author = "Zhang, Zhong and
Gao, Chongming and
Xu, Cong and
Miao, Rui and
Yang, Qinli and
Shao, Junming",
editor = "Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.findings-emnlp.46/",
doi = "10.18653/v1/2020.findings-emnlp.46",
pages = "518--527",
abstract = "Weight tying is now a common setting in many language generation tasks such as language modeling and machine translation. However, a recent study reveals that there is a potential flaw in weight tying. They find that the learned word embeddings are likely to degenerate and lie in a narrow cone when training a language model. They call it the representation degeneration problem and propose a cosine regularization to solve it. Nevertheless, we prove that the cosine regularization is insufficient to solve the problem, as the degeneration is still likely to happen under certain conditions. In this paper, we revisit the representation degeneration problem and theoretically analyze the limitations of the previously proposed solution. Afterward, we propose an alternative regularization method called Laplacian regularization to tackle the problem. Experiments on language modeling demonstrate the effectiveness of the proposed Laplacian regularization."
}
@InProceedings{10.1007/978-3-030-18576-3_17,
author="Zhang, Zhong
and Gao, Chongming
and Liu, Chongzhi
and Yang, Qinli
and Shao, Junming",
editor="Li, Guoliang
and Yang, Jun
and Gama, Joao
and Natwichai, Juggapong
and Tong, Yongxin",
title="Towards Robust Arbitrarily Oriented Subspace Clustering",
booktitle="Database Systems for Advanced Applications",
year="2019",
publisher="Springer International Publishing",
address="Cham",
pages="276--291",
abstract="Clustering high-dimensional data is challenging since meaningful clusters usually hide in the arbitrarily oriented subspaces, and classical clustering algorithms like k-means tend to fail in such case. Subspace clustering has thus attracted growing attention in the last decade and many algorithms have been proposed such as ORCLUS and 4C. However, existing approaches are usually sensitive to global and/or local noisy points, and the overlapping subspace clusters are little explored. Beyond, these approaches usually involve the exhaustive local search for correlated points or subspaces, which is infeasible in some cases. To deal with these problems, in this paper, we introduce a new subspace clustering algorithm called RAOSC, which formulates the Robust Arbitrarily Oriented Subspace Clustering as a group structure low-rank optimization problem. RAOSC is able to recover subspace clusters from a sea of noise while noise and overlapping points can be naturally identified during the optimization process. Unlike existing low-rank based subspace clustering methods, RAOSC can explicitly produce the subspaces of clusters without any prior knowledge of subspace dimensionality. Furthermore, RAOSC does not need a post-processing procedure to obtain the clustering result. Extensive experiments on both synthetic and real-world data sets have demonstrated that RAOSC allows yielding high-quality clusterings and outperforms many state-of-the-art algorithms.",
isbn="978-3-030-18576-3"
}
@InProceedings{10.1007/978-3-030-18590-9_45,
author="Zhang, Zhong
and Kang, Didi
and Gao, Chongming
and Shao, Junming",
editor="Li, Guoliang
and Yang, Jun
and Gama, Joao
and Natwichai, Juggapong
and Tong, Yongxin",
title="SemiSync: Semi-supervised Clustering by Synchronization",
booktitle="Database Systems for Advanced Applications",
year="2019",
publisher="Springer International Publishing",
address="Cham",
pages="358--362",
abstract="In this paper, we consider the semi-supervised clustering problem, where the prior knowledge is formalized as the Cannot-Link (CL) and Must-Link (ML) pairwise constraints. We propose an algorithm called SemiSync that tackles this problem from a novel perspective: synchronization. The basic idea is to regard the data points as a set of (constrained) phase oscillators, and simulate their dynamics to form clusters automatically. SemiSync allows dynamically propagating the constraints to unlabelled data points driven by their local data distributions, which effectively boosts the clustering performance even if little prior knowledge is available. We experimentally demonstrate the effectiveness of the proposed method.",
isbn="978-3-030-18590-9"
}
@InProceedings{10.1007/978-3-319-91458-9_33,
author="Zhang, Zhong
and Qin, Zhili
and Li, Peiyan
and Yang, Qinli
and Shao, Junming",
editor="Pei, Jian
and Manolopoulos, Yannis
and Sadiq, Shazia
and Li, Jianxin",
title="Multi-view Discriminative Learning via Joint Non-negative Matrix Factorization",
booktitle="Database Systems for Advanced Applications",
year="2018",
publisher="Springer International Publishing",
address="Cham",
pages="542--557",
abstract="Multi-view learning attempts to generate a classifier with a better performance by exploiting relationship among multiple views. Existing approaches often focus on learning the consistency and/or complementarity among different views. However, not all consistent or complementary information is useful for learning, instead, only class-specific discriminative information is essential. In this paper, we propose a new robust multi-view learning algorithm, called DICS, by exploring the Discriminative and non-discriminative Information existing in Common and view-Specific parts among different views via joint non-negative matrix factorization. The basic idea is to learn a latent common subspace and view-specific subspaces, and more importantly, discriminative and non-discriminative information from all subspaces are further extracted to support a better classification. Empirical extensive experiments on seven real-world data sets have demonstrated the effectiveness of DICS, and show its superiority over many state-of-the-art algorithms.",
isbn="978-3-319-91458-9"
}
@inproceedings{10.1145/3539618.3591636,
author = {Gao, Chongming and Huang, Kexin and Chen, Jiawei and Zhang, Yuan and Li, Biao and Jiang, Peng and Wang, Shiqi and Zhang, Zhong and He, Xiangnan},
title = {Alleviating Matthew Effect of Offline Reinforcement Learning in Interactive Recommendation},
year = {2023},
isbn = {9781450394086},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3539618.3591636},
doi = {10.1145/3539618.3591636},
abstract = {Offline reinforcement learning (RL), a technology that offline learns a policy from logged data without the need to interact with online environments, has become a favorable choice in decision-making processes like interactive recommendation. Offline RL faces the value overestimation problem. To address it, existing methods employ conservatism, e.g., by constraining the learned policy to be close to behavior policies or punishing the rarely visited state-action pairs. However, when applying such offline RL to recommendation, it will cause a severe Matthew effect, i.e., the rich get richer and the poor get poorer, by promoting popular items or categories while suppressing the less popular ones. It is a notorious issue that needs to be addressed in practical recommender systems.In this paper, we aim to alleviate the Matthew effect in offline RL-based recommendation. Through theoretical analyses, we find that the conservatism of existing methods fails in pursuing users' long-term satisfaction. It inspires us to add a penalty term to relax the pessimism on states with high entropy of the logging policy and indirectly penalizes actions leading to less diverse states. This leads to the main technical contribution of the work: Debiased model-based Offline RL (DORL) method. Experiments show that DORL not only captures user interests well but also alleviates the Matthew effect. The implementation is available via https://github.com/chongminggao/DORL-codes},
booktitle = {Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages = {238–248},
numpages = {11},
keywords = {interactive recommendation, matthew effect, offline reinforcement learning},
location = {Taipei, Taiwan},
series = {SIGIR '23}
}
@inproceedings{ijcai2019p469,
title = {Community Detection and Link Prediction via Cluster-driven Low-rank Matrix Completion},
author = {Shao, Junming and Zhang, Zhong and Yu, Zhongjing and Wang, Jun and Zhao, Yi and Yang, Qinli},
booktitle = {Proceedings of the Twenty-Eighth International Joint Conference on
Artificial Intelligence, {IJCAI-19}},
publisher = {International Joint Conferences on Artificial Intelligence Organization},
pages = {3382--3388},
year = {2019},
month = {7},
doi = {10.24963/ijcai.2019/469},
url = {https://doi.org/10.24963/ijcai.2019/469},
}
@article{liang2019classification,
title={Classification of first-episode schizophrenia using multimodal brain features: a combined structural and diffusion imaging study},
author={Liang, Sugai and Li, Yinfei and Zhang, Zhong and Kong, Xiangzhen and Wang, Qiang and Deng, Wei and Li, Xiaojing and Zhao, Liansheng and Li, Mingli and Meng, Yajing and others},
journal={Schizophrenia bulletin},
volume={45},
number={3},
pages={591--599},
year={2019},
publisher={Oxford University Press US}
}
@misc{zhang2025hcrreasonersynergizinglargelanguage,
title={HCR-Reasoner: Synergizing Large Language Models and Theory for Human-like Causal Reasoning},
author={Yanxi Zhang and Xin Cong and Zhong Zhang and Xiao Liu and Dongyan Zhao and Yesai Wu},
year={2025},
eprint={2505.08750},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2505.08750},
}
Bio
I am currently a postdoctoral researcher in the "Shuimu Tsinghua Scholar Program 水木学者" at the THUNLP Lab, Department of Computer Science and Technology, Tsinghua University, advised by Assoc. Prof. Zhiyuan Liu (刘知远). I received my B.E. (2016) and Ph.D. (2023) from the University of Electronic Science and Technology of China, advised by Prof. Junming Shao (邵俊明). I was a visiting scholar at the Université de Montréal in 2022, advised by Assoc. Prof. Bang Liu (刘邦).
I am working on Large Language Models and LLM-based Agents, particularly GUI Agents and Tool Learning. My work has appeared at top-tier venues including ACL, ICLR, EMNLP, SIGIR, and IJCAI, with
.
👋👋👋 We are continuously seeking highly motivated interns who are interested in LLM-based Agent research. See more details here.
News
-
2025.11: 🏠🏠🏠 My new homepage is online!
Selected Publications
See full publications at Google Scholar
AgentCPM-GUI: Building Mobile-Use Agents with Reinforcement Fine-Tuning
Zhong Zhang*, Yaxi Lu*, Yikun Fu, Yupeng Huo, Shenzhi Yang, Yesai Wu, Han Si, Xin Cong, Haotian Chen, Yankai Lin✉︎, Jie Xie, Wei Zhou, Wang Xu, Yuanheng Zhang, Zhou Su, Zhongwu Zhai, Xiaoming Liu, Yudong Mei, Jianming Xu, Hongyan Tian, Chongyi Wang, Chi Chen, Yuan Yao, Zhiyuan Liu✉︎, Maosong Sun✉︎
|
| 
-
AgentCPM-GUI is a GUI agent built upon MiniCPM-V 2.6 (latest update here), delivering strong GUI perception and enhanced reasoning for autonomously operating mobile phones.
-
ICLR 2025 Learning Evolving Tools for Large Language Models. Guoxin Chen, Zhong Zhang✉︎, Xin Cong✉︎, Fangda Guo, Yesai Wu, Yankai Lin, Wenzheng Feng, Yasheng Wang

-
ACL 2025 Learning to Generate Structured Output with Schema Reinforcement Learning. Yaxi Lu*, Haolun Li*, Xin Cong, Zhong Zhang✉︎, Yesai Wu, Yankai Lin, Zhiyuan Liu, Fangming Liu✉︎, Maosong Sun

-
EMNLP Demo 2024 RepoAgent: An LLM-Powered Open-Source Framework for Repository-level Code Documentation Generation. Qinyu Luo*, Yining Ye*, Shihao Liang, Zhong Zhang✉︎, Yujia Qin, Yaxi Lu, Yesai Wu, Xin Cong, Yankai Lin, Yingli Zhang, Xiaoyin Che, Zhiyuan Liu✉︎, Maosong Sun

-
ACL 2023 Fine-tuning Happens in Tiny Subspaces: Exploring Intrinsic Task-specific Subspaces of Pre-trained Language Models. Zhong Zhang, Bang Liu✉︎, Junming Shao✉︎
-
Knowledge-Based Systems 2022 Mixhead: Breaking the low-rank bottleneck in multi-head attention language models. Zhong Zhang, Nian Shao, Chongming Gao, Rui Miao, Qinli Yang, Junming Shao✉︎
-
EMNLP Findings 2020 Revisiting Representation Degeneration Problem in Language Modeling. Zhong Zhang, Chongming Gao, Cong Xu, Rui Miao, Qinli Yang, Junming Shao✉︎
-
SIGIR 2023 Alleviating matthew effect of offline reinforcement learning in interactive recommendation. Chongming Gao, Kexin Huang, Jiawei Chen✉︎, Yuan Zhang, Biao Li, Peng Jiang, Shiqi Wang, Zhong Zhang, Xiangnan He✉︎

-
DASFAA 2019 Towards robust arbitrarily oriented subspace clustering. Zhong Zhang, Chongming Gao, Chongzhi Liu, Qinli Yang, Junming Shao✉︎

-
DASFAA 2019 SemiSync: Semi-supervised clustering by synchronization. Zhong Zhang, Didi Kang, Chongming Gao, Junming Shao✉︎
-
IJCAI 2019 Community Detection and Link Prediction via Cluster-driven Low-rank Matrix Completion. Junming Shao✉︎, Zhong Zhang, Zhongjing Yu, Jun Wang, Yi Zhao, Qinli Yang
-
Schizophr Bull. 2019 Classification of first-episode schizophrenia using multimodal brain features: a combined structural and diffusion imaging study. Sugai Liang, Yinfei Li, Zhong Zhang, Xiangzhen Kong, Qiang Wang, Wei Deng, Xiaojing Li, Liansheng Zhao, Mingli Li, Yajing Meng, Feng Huang, Xiaohong Ma, Xin-Min Li, Andrew J Greenshaw, Junming Shao, Tao Li✉︎
-
DASFAA 2018 Multi-view discriminative learning via joint non-negative matrix factorization. Zhong Zhang, Zhili Qin, Peiyan Li, Qinli Yang, Junming Shao✉︎
Research Grants
-
2025-2026, PI, China Postdoctoral Science Foundation, General Program.
-
2024-2026, PI, China Postdoctoral Science Foundation, Postdoctoral Fellowship Program.
Invited Talks
-
2025.11, GUI Agents Tutorial, Huawei Talk.
-
2025.08, Getting Started with LLM Research, CCL 2025.
-
2025.06, AgentCPM-GUI: Building Mobile-Use Agents with Reinforcement Fine-Tuning, Wisemodel Talk.
-
2024.08, Large Language Model-driven Autonomous Agents, MLNLP 2024.
-
2024.08, Latest Advances in LLMs, InfiniTensor 2024.
Professional Services
Teaching
Teaching Assistant
-
Spring 2020, Big Data Analysis and Mining (English), for Graduate Students
-
Spring 2019, Data Mining and Big Data Analysis, for Undergraduate Students
-
Spring 2017, Big Data Analysis and Mining (English), for Graduate Students
-
Autumn 2016, Probability and Statistics (English), for Undergraduate Students