b'@online{Wrobel2602.06613,'b"\nTITLE = {{DAVE}: Distribution-aware Attribution via {ViT} Gradient Decomposition},\nAUTHOR = {Wr{\\'o}bel, Adam and Gairola, Siddhartha and Tabor, Jacek and Schiele, Bernt and Zieli{\\'n}ski, Bartosz and Rymarczyk, Dawid},\nLANGUAGE = {eng},\nURL = {https://arxiv.org/abs/2602.06613},\nEPRINT = {2602.06613},\nEPRINTTYPE = {arXiv},\nYEAR = {2026},\nABSTRACT = {Vision Transformers (ViTs) have become a dominant architecture in computer vision, yet producing stable and high-resolution attribution maps for these models remains challenging. Architectural components such as patch embeddings and attention routing often introduce structured artifacts in pixel-level explanations, causing many existing methods to rely on coarse patch-level attributions. We introduce DAVE \\textit{(\\underline{D}istribution-aware \\underline{A}ttribution via \\underline{V}iT Gradient D\\underline{E}composition)}, a mathematically grounded attribution method for ViTs based on a structured decomposition of the input gradient. By exploiting architectural properties of ViTs, DAVE isolates locally equivariant and stable components of the effective input--output mapping. It separates these from architecture-induced artifacts and other sources of instability.},\n}\n"