@inproceedings{tjuatja25acl, title = {BehaviorBox: Automated Discovery of Fine-Grained Performance Differences Between Language Models}, author = {Lindia Tjuatja and Graham Neubig}, booktitle = {Annual Meeting of the Association for Computational Linguistics (ACL)}, address = {Vienna, Austria}, month = {July}, url = {https://arxiv.org/abs/2506.02204}, year = {2025} }