Last Year

Article
Ansari, N., Alizadeh-Mousavi, O., Seidel, H.-P., and Babaei, V. 2020. Mixed Integer Ink Selection for Spectral Reproduction. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2020)39, 6.
Export
BibTeX
@article{Ansari_ToG2020, TITLE = {Mixed Integer Ink Selection for Spectral Reproduction}, AUTHOR = {Ansari, Navid and Alizadeh-Mousavi, Omid and Seidel, Hans-Peter and Babaei, Vahid}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3414685.3417761}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {39}, NUMBER = {6}, EID = {255}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2020}, EDITOR = {Myszkowski, Karol}, }
Endnote
%0 Journal Article %A Ansari, Navid %A Alizadeh-Mousavi, Omid %A Seidel, Hans-Peter %A Babaei, Vahid %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Mixed Integer Ink Selection for Spectral Reproduction : %G eng %U http://hdl.handle.net/21.11116/0000-0007-9B23-3 %R 10.1145/3414685.3417761 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 6 %Z sequence number: 255 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2020 %O ACM SIGGRAPH Asia 2020 SA'20 SA 2020
Bemana, M., Myszkowski, K., Seidel, H.-P., and Ritschel, T. 2020a. X-Fields: Implicit Neural View-, Light- and Time-Image Interpolation. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2020)39, 6.
Export
BibTeX
@article{Bemana2020, TITLE = {X-{F}ields: {I}mplicit Neural View-, Light- and Time-Image Interpolation}, AUTHOR = {Bemana, Mojtaba and Myszkowski, Karol and Seidel, Hans-Peter and Ritschel, Tobias}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3414685.3417827}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {39}, NUMBER = {6}, EID = {257}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2020}, EDITOR = {Myszkowski, Karol}, }
Endnote
%0 Journal Article %A Bemana, Mojtaba %A Myszkowski, Karol %A Seidel, Hans-Peter %A Ritschel, Tobias %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T X-Fields: Implicit Neural View-, Light- and Time-Image Interpolation : %G eng %U http://hdl.handle.net/21.11116/0000-0006-FBF0-0 %R 10.1145/3414685.3417827 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 6 %Z sequence number: 257 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2020 %O ACM SIGGRAPH Asia 2020 SA'20 SA 2020
Çoğalan, U. and Akyüz, A.O. 2020. Deep Joint Deinterlacing and Denoising for Single Shot Dual-ISO HDR Reconstruction. IEEE Transactions on Image Processing29.
Export
BibTeX
@article{Cogalan2020, TITLE = {Deep Joint Deinterlacing and Denoising for Single Shot Dual-{ISO HDR} Reconstruction}, AUTHOR = {{\c C}o{\u g}alan, U{\u g}ur and Aky{\"u}z, Ahmet O{\u g}uz}, LANGUAGE = {eng}, ISSN = {1057-7149}, DOI = {10.1109/TIP.2020.3004014}, PUBLISHER = {IEEE}, ADDRESS = {Piscataway, NJ}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, JOURNAL = {IEEE Transactions on Image Processing}, VOLUME = {29}, PAGES = {7511--7524}, }
Endnote
%0 Journal Article %A Çoğalan, Uğur %A Akyüz , Ahmet Oğuz %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Deep Joint Deinterlacing and Denoising for Single Shot Dual-ISO HDR Reconstruction : %G eng %U http://hdl.handle.net/21.11116/0000-0006-DCA7-6 %R 10.1109/TIP.2020.3004014 %7 2020 %D 2020 %J IEEE Transactions on Image Processing %V 29 %& 7511 %P 7511 - 7524 %I IEEE %C Piscataway, NJ %@ false
Cucerca, S., Didyk, P., Seidel, H.-P., and Babaei, V. 2020. Computational Image Marking on Metals via Laser Induced Heating. ACM Transactions on Graphics (Proc. ACM SIGGRAPH 2020)39, 4.
Export
BibTeX
@article{Cucerca_SIGGRAPH2020, TITLE = {Computational Image Marking on Metals via Laser Induced Heating}, AUTHOR = {Cucerca, Sebastian and Didyk, Piotr and Seidel, Hans-Peter and Babaei, Vahid}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3386569.3392423}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH)}, VOLUME = {39}, NUMBER = {4}, EID = {70}, BOOKTITLE = {Proceedings of ACM SIGGRAPH 2020}, }
Endnote
%0 Journal Article %A Cucerca, Sebastian %A Didyk, Piotr %A Seidel, Hans-Peter %A Babaei, Vahid %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Computational Image Marking on Metals via Laser Induced Heating : %G eng %U http://hdl.handle.net/21.11116/0000-0007-9664-F %R 10.1145/3386569.3392423 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 4 %Z sequence number: 70 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH 2020 %O ACM SIGGRAPH 2020 Virtual Conference ; 2020, 17-28 August
Egger, B., Smith, W.A.P., Tewari, A., et al. 2020. 3D Morphable Face Models -Past, Present and Future. ACM Transactions on Graphics39, 5.
Export
BibTeX
@article{Egger_TOG2020, TITLE = {{3D} Morphable Face Models -- Past, Present and Future}, AUTHOR = {Egger, Bernhard and Smith, William A. P. and Tewari, Ayush and Wuhrer, Stefanie and Zollh{\"o}fer, Michael and Beeler, Thabo and Bernard, Florian and Bolkart, Timo and Kortylewski, Adam and Romdhani, Sami and Theobalt, Christian and Blanz, Volker and Vetter, Thomas}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3395208}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics}, VOLUME = {39}, NUMBER = {5}, EID = {157}, }
Endnote
%0 Journal Article %A Egger, Bernhard %A Smith, William A. P. %A Tewari, Ayush %A Wuhrer, Stefanie %A Zollhöfer, Michael %A Beeler, Thabo %A Bernard, Florian %A Bolkart, Timo %A Kortylewski, Adam %A Romdhani, Sami %A Theobalt, Christian %A Blanz, Volker %A Vetter, Thomas %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations %T 3D Morphable Face Models -Past, Present and Future : %G eng %U http://hdl.handle.net/21.11116/0000-0007-1CF5-6 %R 10.1145/3395208 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 5 %Z sequence number: 157 %I ACM %C New York, NY %@ false
Elgharib, M., Mendiratta, M., Thies, J., et al. 2020. Egocentric Videoconferencing. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2020)39, 6.
Export
BibTeX
@article{Elgharib_ToG2020, TITLE = {Egocentric Videoconferencing}, AUTHOR = {Elgharib, Mohamed and Mendiratta, Mohit and Thies, Justus and Nie{\ss}ner, Matthias and Seidel, Hans-Peter and Tewari, Ayush and Golyanik, Vladislav and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3414685.3417808}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {39}, NUMBER = {6}, EID = {268}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2020}, EDITOR = {Myszkowski, Karol}, }
Endnote
%0 Journal Article %A Elgharib, Mohamed %A Mendiratta, Mohit %A Thies, Justus %A Nießner, Matthias %A Seidel, Hans-Peter %A Tewari, Ayush %A Golyanik, Vladislav %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Egocentric Videoconferencing : %G eng %U http://hdl.handle.net/21.11116/0000-0007-9B36-E %R 10.1145/3414685.3417808 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 6 %Z sequence number: 268 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2020 %O ACM SIGGRAPH Asia 2020 SA'20 SA 2020
Günther, F., Jiang, C., and Pottmann, H. 2020. Smooth Polyhedral Surfaces. Advances in Mathematics363.
(arXiv: 1703.05318)
Abstract
Polyhedral surfaces are fundamental objects in architectural geometry and industrial design. Whereas closeness of a given mesh to a smooth reference surface and its suitability for numerical simulations were already studied extensively, the aim of our work is to find and to discuss suitable assessments of smoothness of polyhedral surfaces that only take the geometry of the polyhedral surface itself into account. Motivated by analogies to classical differential geometry, we propose a theory of smoothness of polyhedral surfaces including suitable notions of normal vectors, tangent planes, asymptotic directions, and parabolic curves that are invariant under projective transformations. It is remarkable that seemingly mild conditions significantly limit the shapes of faces of a smooth polyhedral surface. Besides being of theoretical interest, we believe that smoothness of polyhedral surfaces is of interest in the architectural context, where vertices and edges of polyhedral surfaces are highly visible.
Export
BibTeX
@article{Guenther2020, TITLE = {Smooth Polyhedral Surfaces}, AUTHOR = {G{\"u}nther, Felix and Jiang, Caigui and Pottmann, Helmut}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1703.05318}, DOI = {10.1016/j.aim.2020.107004}, EPRINT = {1703.05318}, EPRINTTYPE = {arXiv}, PUBLISHER = {Elsevier}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, ABSTRACT = {Polyhedral surfaces are fundamental objects in architectural geometry and industrial design. Whereas closeness of a given mesh to a smooth reference surface and its suitability for numerical simulations were already studied extensively, the aim of our work is to find and to discuss suitable assessments of smoothness of polyhedral surfaces that only take the geometry of the polyhedral surface itself into account. Motivated by analogies to classical differential geometry, we propose a theory of smoothness of polyhedral surfaces including suitable notions of normal vectors, tangent planes, asymptotic directions, and parabolic curves that are invariant under projective transformations. It is remarkable that seemingly mild conditions significantly limit the shapes of faces of a smooth polyhedral surface. Besides being of theoretical interest, we believe that smoothness of polyhedral surfaces is of interest in the architectural context, where vertices and edges of polyhedral surfaces are highly visible.}, JOURNAL = {Advances in Mathematics}, VOLUME = {363}, EID = {107004}, }
Endnote
%0 Journal Article %A Günther, Felix %A Jiang, Caigui %A Pottmann, Helmut %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Smooth Polyhedral Surfaces : %G eng %U http://hdl.handle.net/21.11116/0000-0006-9760-3 %U http://arxiv.org/abs/1703.05318 %R 10.1016/j.aim.2020.107004 %D 2020 %* Review method: peer-reviewed %X Polyhedral surfaces are fundamental objects in architectural geometry and industrial design. Whereas closeness of a given mesh to a smooth reference surface and its suitability for numerical simulations were already studied extensively, the aim of our work is to find and to discuss suitable assessments of smoothness of polyhedral surfaces that only take the geometry of the polyhedral surface itself into account. Motivated by analogies to classical differential geometry, we propose a theory of smoothness of polyhedral surfaces including suitable notions of normal vectors, tangent planes, asymptotic directions, and parabolic curves that are invariant under projective transformations. It is remarkable that seemingly mild conditions significantly limit the shapes of faces of a smooth polyhedral surface. Besides being of theoretical interest, we believe that smoothness of polyhedral surfaces is of interest in the architectural context, where vertices and edges of polyhedral surfaces are highly visible. %K Mathematics, Metric Geometry, Mathematics, Differential Geometry %J Advances in Mathematics %O Adv. Math. %V 363 %Z sequence number: 107004 %I Elsevier
Liu, L., Xu, W., Habermann, M., et al. 2020a. Neural Human Video Rendering by Learning Dynamic Textures and Rendering-to-Video Translation. IEEE Transactions on Visualization and Computer Graphics.
Export
BibTeX
@article{liu2020NeuralHumanRendering, TITLE = {Neural Human Video Rendering by Learning Dynamic Textures and Rendering-to-Video Translation}, AUTHOR = {Liu, Lingjie and Xu, Weipeng and Habermann, Marc and Zollh{\"o}fer, Michael and Bernard, Florian and Kim, Hyeongwoo and Wang, Wenping and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {1077-2626}, DOI = {10.1109/TVCG.2020.2996594}, PUBLISHER = {IEEE}, ADDRESS = {Piscataway, NJ}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {IEEE Transactions on Visualization and Computer Graphics}, }
Endnote
%0 Journal Article %A Liu, Lingjie %A Xu, Weipeng %A Habermann, Marc %A Zollhöfer, Michael %A Bernard, Florian %A Kim, Hyeongwoo %A Wang, Wenping %A Theobalt, Christian %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Neural Human Video Rendering by Learning Dynamic Textures and Rendering-to-Video Translation : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0369-F %R 10.1109/TVCG.2020.2996594 %7 2020 %D 2020 %J IEEE Transactions on Visualization and Computer Graphics %I IEEE %C Piscataway, NJ %@ false
Mehta, D., Sotnychenko, O., Mueller, F., et al. 2020. XNect: Real-time Multi-person 3D Human Pose Estimation with a Single RGB Camera. ACM Transactions on Graphics (Proc. ACM SIGGRAPH 2020)39, 4.
Export
BibTeX
@article{Mehta_TOG2020, TITLE = {{XNect}: {R}eal-time Multi-person {3D} Human Pose Estimation with a Single {RGB} Camera}, AUTHOR = {Mehta, Dushyant and Sotnychenko, Oleksandr and Mueller, Franziska and Xu, Weipeng and Elgharib, Mohamed and Fua, Pascal and Seidel, Hans-Peter and Rhodin, Helge and Pons-Moll, Gerard and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3386569.3392410}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH)}, VOLUME = {39}, NUMBER = {4}, EID = {82}, BOOKTITLE = {Proceedings of ACM SIGGRAPH 2020}, }
Endnote
%0 Journal Article %A Mehta, Dushyant %A Sotnychenko, Oleksandr %A Mueller, Franziska %A Xu, Weipeng %A Elgharib, Mohamed %A Fua, Pascal %A Seidel, Hans-Peter %A Rhodin, Helge %A Pons-Moll, Gerard %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T XNect: Real-time Multi-person 3D Human Pose Estimation with a Single RGB Camera : %G eng %U http://hdl.handle.net/21.11116/0000-0007-832D-3 %R 10.1145/3386569.3392410 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 4 %Z sequence number: 82 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH 2020 %O ACM SIGGRAPH 2020 Virtual Conference ; 2020, 17-28 August
Meka, A., Pandey, R., Häne, C., et al. 2020. Deep Relightable Textures Volumetric Performance Capture with Neural Rendering. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2020)39, 6.
Export
BibTeX
@article{Meka_ToG2020, TITLE = {Deep Relightable Textures Volumetric Performance Capture with Neural Rendering}, AUTHOR = {Meka, Abhimitra and Pandey, Rohit and H{\"a}ne, Christian and Orts-Escolano, Sergio and Barnum, Peter and David-Son, Philip and Erickson, Daniel and Zhang, Yinda and Taylor, Jonathan and Bouaziz, Sofien and Legendre, Chloe and Ma, Wan-Chun and Overbeck, Ryan and Beeler, Thabo and Debevec, Paul and Izadi, Shahram and Theobalt, Christian and Rhemann, Christoph and Fanello, Sean}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3414685.3417814}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {39}, NUMBER = {6}, EID = {259}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2020}, EDITOR = {Myszkowski, Karol}, }
Endnote
%0 Journal Article %A Meka, Abhimitra %A Pandey, Rohit %A Häne, Christian %A Orts-Escolano, Sergio %A Barnum, Peter %A David-Son, Philip %A Erickson, Daniel %A Zhang, Yinda %A Taylor, Jonathan %A Bouaziz, Sofien %A Legendre, Chloe %A Ma, Wan-Chun %A Overbeck, Ryan %A Beeler, Thabo %A Debevec, Paul %A Izadi, Shahram %A Theobalt, Christian %A Rhemann, Christoph %A Fanello, Sean %+ External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Deep Relightable Textures Volumetric Performance Capture with Neural Rendering : %G eng %U http://hdl.handle.net/21.11116/0000-0007-A6FA-4 %R 10.1145/3414685.3417814 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 6 %Z sequence number: 259 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2020 %O ACM SIGGRAPH Asia 2020 SA'20 SA 2020 %U https://dl.acm.org/doi/pdf/10.1145/3414685.3417814
Mlakar, D., Winter, M., Stadlbauer, P., Seidel, H.-P., Steinberger, M., and Zayer, R. 2020. Subdivision-Specialized Linear Algebra Kernels for Static and Dynamic Mesh Connectivity on the GPU. Computer Graphics Forum (Proc. EUROGRAPHICS 2020)39, 2.
Export
BibTeX
@article{Mlakar_EG2020, TITLE = {Subdivision-Specialized Linear Algebra Kernels for Static and Dynamic Mesh Connectivity on the {GPU}}, AUTHOR = {Mlakar, Daniel and Winter, M. and Stadlbauer, Pascal and Seidel, Hans-Peter and Steinberger, Markus and Zayer, Rhaleb}, LANGUAGE = {eng}, ISSN = {0167-7055}, DOI = {10.1111/cgf.13934}, PUBLISHER = {Blackwell-Wiley}, ADDRESS = {Oxford}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, JOURNAL = {Computer Graphics Forum (Proc. EUROGRAPHICS)}, VOLUME = {39}, NUMBER = {2}, PAGES = {335--349}, BOOKTITLE = {The European Association for Computer Graphics 41st Annual Conference (EUROGRAPHICS 2020)}, EDITOR = {Panozzo, Daniele and Assarsson, Ulf}, }
Endnote
%0 Journal Article %A Mlakar, Daniel %A Winter, M. %A Stadlbauer, Pascal %A Seidel, Hans-Peter %A Steinberger, Markus %A Zayer, Rhaleb %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Subdivision-Specialized Linear Algebra Kernels for Static and Dynamic Mesh Connectivity on the GPU : %G eng %U http://hdl.handle.net/21.11116/0000-0006-DB80-2 %R 10.1111/cgf.13934 %7 2020 %D 2020 %J Computer Graphics Forum %O Computer Graphics Forum : journal of the European Association for Computer Graphics Comput. Graph. Forum %V 39 %N 2 %& 335 %P 335 - 349 %I Blackwell-Wiley %C Oxford %@ false %B The European Association for Computer Graphics 41st Annual Conference %O EUROGRAPHICS 2020 EG 2020 The European Association for Computer Graphics 41st Annual Conference ; Norrköping, Sweden, May 25 – 29, 2020
Piovarči, M., Foshey, M., Babaei, V., Rusinkiewicz, S., Matusik, W., and Didyk, P. 2020. Towards Spatially Varying Gloss Reproduction for 3D Printing. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2020)39, 6.
Export
BibTeX
@article{Piovarci_ToG2020, TITLE = {Towards Spatially Varying Gloss Reproduction for {3D} Printing}, AUTHOR = {Piovar{\v c}i, Michal and Foshey, Michael and Babaei, Vahid and Rusinkiewicz, Szymon and Matusik, Wojciech and Didyk, Piotr}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3414685.3417850}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {39}, NUMBER = {6}, EID = {206}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2020}, EDITOR = {Myszkowski, Karol}, }
Endnote
%0 Journal Article %A Piovarči, Michal %A Foshey, Michael %A Babaei, Vahid %A Rusinkiewicz, Szymon %A Matusik, Wojciech %A Didyk, Piotr %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T Towards Spatially Varying Gloss Reproduction for 3D Printing : %G eng %U http://hdl.handle.net/21.11116/0000-0007-A6FE-0 %R 10.1145/3414685.3417850 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 6 %Z sequence number: 206 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2020 %O ACM SIGGRAPH Asia 2020 SA'20 SA 2020
Saberpour, A., Hersch, R.D., Fang, J., Zayer, R., Seidel, H.-P., and Babaei, V. 2020. Fabrication of Moiré on Curved Surfaces. Optics Express28, 13.
Export
BibTeX
@article{Saberpour2020, TITLE = {Fabrication of Moir{\'e} on Curved Surfaces}, AUTHOR = {Saberpour, Artin and Hersch, Roger D. and Fang, Jiajing and Zayer, Rhaleb and Seidel, Hans-Peter and Babaei, Vahid}, LANGUAGE = {eng}, ISSN = {1094-4087}, DOI = {10.1364/OE.393843}, PUBLISHER = {Optical Society of America}, ADDRESS = {Washington, DC}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, JOURNAL = {Optics Express}, VOLUME = {28}, NUMBER = {13}, PAGES = {19413--19427}, }
Endnote
%0 Journal Article %A Saberpour, Artin %A Hersch, Roger D. %A Fang, Jiajing %A Zayer, Rhaleb %A Seidel, Hans-Peter %A Babaei, Vahid %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Fabrication of Moiré on Curved Surfaces : %G eng %U http://hdl.handle.net/21.11116/0000-0006-D39D-B %R 10.1364/OE.393843 %7 2020 %D 2020 %J Optics Express %O Opt. Express %V 28 %N 13 %& 19413 %P 19413 - 19427 %I Optical Society of America %C Washington, DC %@ false
Serrano, A., Martin, D., Gutierrez, D., Myszkowski, K., and Masia, B. 2020. Imperceptible Manipulation of Lateral Camera Motion for Improved Virtual Reality Applications. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2020)39, 6.
Export
BibTeX
@article{Serrano2020, TITLE = {Imperceptible Manipulation of Lateral Camera Motion for Improved Virtual Reality Applications}, AUTHOR = {Serrano, Ana and Martin, Daniel and Gutierrez, Diego and Myszkowski, Karol and Masia, Belen}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3414685.3417773}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {39}, NUMBER = {6}, EID = {268}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2020}, EDITOR = {Myszkowski, Karol}, }
Endnote
%0 Journal Article %A Serrano, Ana %A Martin, Daniel %A Gutierrez, Diego %A Myszkowski, Karol %A Masia, Belen %+ External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Imperceptible Manipulation of Lateral Camera Motion for Improved Virtual Reality Applications : %G eng %U http://hdl.handle.net/21.11116/0000-0006-FBE8-A %R 10.1145/3414685.3417773 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 6 %Z sequence number: 268 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2020 %O ACM SIGGRAPH Asia 2020 SA'20 SA 2020
Shahmirzadi, A.A., Babaei, V., and Seidel, H.-P. 2020. A Multispectral Dataset of Oil and Watercolor Paints. Electronic Imaging.
Export
BibTeX
@article{shahmirzadi2020multispectral, TITLE = {A Multispectral Dataset of Oil and Watercolor Paints}, AUTHOR = {Shahmirzadi, Azadeh Asadi and Babaei, Vahid and Seidel, Hans-Peter}, LANGUAGE = {eng}, DOI = {10.2352/ISSN.2470-1173.2020.5.MAAP-107}, PUBLISHER = {IS\&T}, ADDRESS = {Springfield, VA}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {Electronic Imaging}, PAGES = {1--4}, EID = {107}, }
Endnote
%0 Journal Article %A Shahmirzadi, Azadeh Asadi %A Babaei, Vahid %A Seidel, Hans-Peter %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T A Multispectral Dataset of Oil and Watercolor Paints : %G eng %U http://hdl.handle.net/21.11116/0000-0007-F064-9 %R 10.2352/ISSN.2470-1173.2020.5.MAAP-107 %7 2020 %D 2020 %J Electronic Imaging %& 1 %P 1 - 4 %Z sequence number: 107 %I IS&T %C Springfield, VA
Shimada, S., Golyanik, V., Xu, W., and Theobalt, C. 2020a. PhysCap: Physically Plausible Monocular 3D Motion Capture in Real Time. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2020)39, 6.
Export
BibTeX
@article{Shimada_ToG2020, TITLE = {{PhysCap}: {P}hysically Plausible Monocular {3D} Motion Capture in Real Time}, AUTHOR = {Shimada, Soshi and Golyanik, Vladislav and Xu, Weipeng and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3414685.3417877}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {39}, NUMBER = {6}, EID = {235}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2020}, EDITOR = {Myszkowski, Karol}, }
Endnote
%0 Journal Article %A Shimada, Soshi %A Golyanik, Vladislav %A Xu, Weipeng %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T PhysCap: Physically Plausible Monocular 3D Motion Capture in Real Time : %G eng %U http://hdl.handle.net/21.11116/0000-0007-A709-3 %R 10.1145/3414685.3417877 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 6 %Z sequence number: 235 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2020 %O ACM SIGGRAPH Asia 2020 SA'20 SA 2020
Singh, G., Subr, K., Coeurjolly, D., Ostromoukhov, V., and Jarosz, W. 2020. Fourier Analysis of Correlated Monte Carlo Importance Sampling. Computer Graphics Forum39, 1.
Export
BibTeX
@article{SinghCGF2020, TITLE = {Fourier Analysis of Correlated {Monte Carlo} Importance Sampling}, AUTHOR = {Singh, Gurprit and Subr, Kartic and Coeurjolly, David and Ostromoukhov, Victor and Jarosz, Wojciech}, LANGUAGE = {eng}, ISSN = {0167-7055}, DOI = {10.1111/cgf.13613}, PUBLISHER = {Blackwell-Wiley}, ADDRESS = {Oxford}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, JOURNAL = {Computer Graphics Forum}, VOLUME = {39}, NUMBER = {1}, PAGES = {7--19}, }
Endnote
%0 Journal Article %A Singh, Gurprit %A Subr, Kartic %A Coeurjolly, David %A Ostromoukhov, Victor %A Jarosz, Wojciech %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations %T Fourier Analysis of Correlated Monte Carlo Importance Sampling : %G eng %U http://hdl.handle.net/21.11116/0000-0006-978D-1 %R 10.1111/cgf.13613 %7 2020 %D 2020 %J Computer Graphics Forum %O Computer Graphics Forum : journal of the European Association for Computer Graphics Comput. Graph. Forum %V 39 %N 1 %& 7 %P 7 - 19 %I Blackwell-Wiley %C Oxford %@ false
Stadlbauer, P., Mlakar, D., Seidel, H.-P., Steinberger, M., and Zayer, R. 2020. Interactive Modeling of Cellular Structures on Surfaces with Application to Additive Manufacturing. Computer Graphics Forum (Proc. EUROGRAPHICS 2020)39, 2.
Export
BibTeX
@article{Stadlbauer_EG2020, TITLE = {Interactive Modeling of Cellular Structures on Surfaces with Application to Additive Manufacturing}, AUTHOR = {Stadlbauer, Pascal and Mlakar, Daniel and Seidel, Hans-Peter and Steinberger, Markus and Zayer, Rhaleb}, LANGUAGE = {eng}, ISSN = {0167-7055}, DOI = {10.1111/cgf.13929}, PUBLISHER = {Blackwell-Wiley}, ADDRESS = {Oxford}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, JOURNAL = {Computer Graphics Forum (Proc. EUROGRAPHICS)}, VOLUME = {39}, NUMBER = {2}, PAGES = {277--289}, BOOKTITLE = {The European Association for Computer Graphics 41st Annual Conference (EUROGRAPHICS 2020)}, EDITOR = {Panozzo, Daniele and Assarsson, Ulf}, }
Endnote
%0 Journal Article %A Stadlbauer, Pascal %A Mlakar, Daniel %A Seidel, Hans-Peter %A Steinberger, Markus %A Zayer, Rhaleb %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Interactive Modeling of Cellular Structures on Surfaces with Application to Additive Manufacturing : %G eng %U http://hdl.handle.net/21.11116/0000-0006-DB8A-8 %R 10.1111/cgf.13929 %7 2020 %D 2020 %J Computer Graphics Forum %O Computer Graphics Forum : journal of the European Association for Computer Graphics Comput. Graph. Forum %V 39 %N 2 %& 277 %P 277 - 289 %I Blackwell-Wiley %C Oxford %@ false %B The European Association for Computer Graphics 41st Annual Conference %O EUROGRAPHICS 2020 EG 2020 The European Association for Computer Graphics 41st Annual Conference ; Norrköping, Sweden, May 25 – 29, 2020
Sultan, A.S., Elgharib, M., Tavares, T., Jessri, M., and Basile, J.R. 2020. The Use of Artificial Intelligence, Machine Learning and Deep Learning in Oncologic Histopathology. Journal of Oral Pathology & Medicine49, 9.
Export
BibTeX
@article{Sultan2020, TITLE = {The Use of Artificial Intelligence, Machine Learning and Deep Learning in Oncologic Histopathology}, AUTHOR = {Sultan, Ahmed S. and Elgharib, Mohamed and Tavares, Tiffany and Jessri, Maryam and Basile, John R.}, LANGUAGE = {eng}, ISSN = {0904-2512}, DOI = {10.1111/jop.13042}, PUBLISHER = {Wiley-Blackwell}, ADDRESS = {Oxford}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {Journal of Oral Pathology \& Medicine}, VOLUME = {49}, NUMBER = {9}, PAGES = {849--856}, }
Endnote
%0 Journal Article %A Sultan, Ahmed S. %A Elgharib, Mohamed %A Tavares, Tiffany %A Jessri, Maryam %A Basile, John R. %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T The Use of Artificial Intelligence, Machine Learning and Deep Learning in Oncologic Histopathology : %G eng %U http://hdl.handle.net/21.11116/0000-0006-A2C9-0 %R 10.1111/jop.13042 %7 2020 %D 2020 %J Journal of Oral Pathology & Medicine %V 49 %N 9 %& 849 %P 849 - 856 %I Wiley-Blackwell %C Oxford %@ false
Tewari, A., Zollhöfer, M., Bernard, F., et al. 2020a. High-Fidelity Monocular Face Reconstruction based on an Unsupervised Model-based Face Autoencoder. IEEE Transactions on Pattern Analysis and Machine Intelligence42, 2.
Export
BibTeX
@article{8496850, TITLE = {High-Fidelity Monocular Face Reconstruction based on an Unsupervised Model-based Face Autoencoder}, AUTHOR = {Tewari, Ayush and Zollh{\"o}fer, Michael and Bernard, Florian and Garrido, Pablo and Kim, Hyeongwoo and P{\'e}rez, Patrick and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0162-8828}, DOI = {10.1109/TPAMI.2018.2876842}, PUBLISHER = {IEEE}, ADDRESS = {Piscataway, NJ}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, JOURNAL = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, VOLUME = {42}, NUMBER = {2}, PAGES = {357--370}, }
Endnote
%0 Journal Article %A Tewari, Ayush %A Zollhöfer, Michael %A Bernard, Florian %A Garrido, Pablo %A Kim, Hyeongwoo %A Pérez, Patrick %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T High-Fidelity Monocular Face Reconstruction based on an Unsupervised Model-based Face Autoencoder : %G eng %U http://hdl.handle.net/21.11116/0000-0002-EF5B-C %R 10.1109/TPAMI.2018.2876842 %7 2018 %D 2020 %J IEEE Transactions on Pattern Analysis and Machine Intelligence %O IEEE Trans. Pattern Anal. Mach. Intell. %V 42 %N 2 %& 357 %P 357 - 370 %I IEEE %C Piscataway, NJ %@ false
Tewari, A., Fried, O., Thies, J., et al. 2020b. State of the Art on Neural Rendering. Computer Graphics Forum (Proc. EUROGRAPHICS 2020)39, 2.
Export
BibTeX
@article{Tewari_EG2020, TITLE = {State of the Art on Neural Rendering}, AUTHOR = {Tewari, Ayush and Fried, O. and Thies, J. and Sitzmann, V. and Lombardi, S. and Sunkavalli, K. and Martin-Brualla, R. and Simon, T. and Saragih, J. and Nie{\ss}ner, M. and Pandey, R. and Fanello, S. and Wetzstein, G. and Zhu, J.-Y. and Theobalt, Christian and Agrawala, M. and Shechtman, E. and Goldman, D. B and Zollh{\"o}fer, Michael}, LANGUAGE = {eng}, ISSN = {0167-7055}, DOI = {10.1111/cgf.14022}, PUBLISHER = {Blackwell-Wiley}, ADDRESS = {Oxford}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, JOURNAL = {Computer Graphics Forum (Proc. EUROGRAPHICS)}, VOLUME = {39}, NUMBER = {2}, PAGES = {701--727}, BOOKTITLE = {The European Association for Computer Graphics 41st Annual Conference (EUROGRAPHICS 2020)}, EDITOR = {Panozzo, Daniele and Assarsson, Ulf}, }
Endnote
%0 Journal Article %A Tewari, Ayush %A Fried, O. %A Thies, J. %A Sitzmann, V. %A Lombardi, S. %A Sunkavalli, K. %A Martin‐Brualla, R. %A Simon, T. %A Saragih, J. %A Nießner, M. %A Pandey, R. %A Fanello, S. %A Wetzstein, G. %A Zhu, J.‐Y. %A Theobalt, Christian %A Agrawala, M. %A Shechtman, E. %A Goldman, D. B %A Zollhöfer, Michael %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations %T State of the Art on Neural Rendering : %G eng %U http://hdl.handle.net/21.11116/0000-0006-DB93-D %R 10.1111/cgf.14022 %7 2020 %D 2020 %J Computer Graphics Forum %O Computer Graphics Forum : journal of the European Association for Computer Graphics Comput. Graph. Forum %V 39 %N 2 %& 701 %P 701 - 727 %I Blackwell-Wiley %C Oxford %@ false %B The European Association for Computer Graphics 41st Annual Conference %O EUROGRAPHICS 2020 EG 2020 The European Association for Computer Graphics 41st Annual Conference ; Norrköping, Sweden, May 25 – 29, 2020
Tewari, A., Elgharib, M., Mallikarjun B R, et al. 2020c. PIE: Portrait Image Embedding for Semantic Control. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2020)39, 6.
Export
BibTeX
@article{Tewari_ToG2020, TITLE = {{PIE}: {P}ortrait Image Embedding for Semantic Control}, AUTHOR = {Tewari, Ayush and Elgharib, Mohamed and Mallikarjun B R, and Bernard, Florian and Seidel, Hans-Peter and P{\'e}rez, Patrick and Zollh{\"o}fer, Michael and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3414685.3417803}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {39}, NUMBER = {6}, EID = {223}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2020}, EDITOR = {Myszkowski, Karol}, }
Endnote
%0 Journal Article %A Tewari, Ayush %A Elgharib, Mohamed %A Mallikarjun B R, %A Bernard, Florian %A Seidel, Hans-Peter %A Pérez, Patrick %A Zollhöfer, Michael %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T PIE: Portrait Image Embedding for Semantic Control : %G eng %U http://hdl.handle.net/21.11116/0000-0007-9B0C-E %R 10.1145/3414685.3417803 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 6 %Z sequence number: 223 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2020 %O ACM SIGGRAPH Asia 2020 SA'20 SA 2020
Tong, X., Myszkowski, K., and Huang, J. 2020. Foreword to the Special Section on the International Conference on Computer-Aided Design and Computer Graphics (CAD/Graphics) 2019. Computers and Graphics86.
Export
BibTeX
@article{Tong_CAD19, TITLE = {Foreword to the Special Section on the International Conference on Computer-Aided Design and Computer Graphics (CAD/Graphics) 2019}, AUTHOR = {Tong, Xin and Myszkowski, Karol and Huang, Jin}, LANGUAGE = {eng}, ISSN = {0097-8493}, DOI = {10.1016/j.cag.2019.12.002}, PUBLISHER = {Elsevier}, ADDRESS = {Amsterdam}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, JOURNAL = {Computers and Graphics}, VOLUME = {86}, PAGES = {A5--A6}, }
Endnote
%0 Journal Article %A Tong, Xin %A Myszkowski, Karol %A Huang, Jin %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Foreword to the Special Section on the International Conference on Computer-Aided Design and Computer Graphics (CAD/Graphics) 2019 : %G eng %U http://hdl.handle.net/21.11116/0000-0007-CEAF-D %R 10.1016/j.cag.2019.12.002 %7 2019 %D 2020 %J Computers and Graphics %V 86 %& A5 %P A5 - A6 %I Elsevier %C Amsterdam %@ false
Wang, J., Mueller, F., Bernard, F., et al. 2020a. RGB2Hands: Real-Time Tracking of 3D Hand Interactions from Monocular RGB Video. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2020)39, 6.
Export
BibTeX
@article{Wang_ToG2020, TITLE = {{RGB2Hands}: {R}eal-Time Tracking of {3D} Hand Interactions from Monocular {RGB} Video}, AUTHOR = {Wang, Jiayi and Mueller, Franziska and Bernard, Florian and Sorli, Suzanne and Sotnychenko, Oleksandr and Qian, Neng and Otaduy, Miguel A. and Casas, Dan and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3414685.3417852}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {39}, NUMBER = {6}, EID = {218}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2020}, EDITOR = {Myszkowski, Karol}, }
Endnote
%0 Journal Article %A Wang, Jiayi %A Mueller, Franziska %A Bernard, Florian %A Sorli, Suzanne %A Sotnychenko, Oleksandr %A Qian, Neng %A Otaduy, Miguel A. %A Casas, Dan %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T RGB2Hands: Real-Time Tracking of 3D Hand Interactions from Monocular RGB Video : %G eng %U http://hdl.handle.net/21.11116/0000-0007-CF20-C %R 10.1145/3414685.3417852 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 6 %Z sequence number: 218 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2020 %O ACM SIGGRAPH Asia 2020 SA'20 SA 2020
Wang, P., Liu, L., Chen, N., Chu, H.-K., Theobalt, C., and Wang, W. 2020b. Vid2Curve: Simultaneous Camera Motion Estimation and Thin Structure Reconstruction from an RGB Video. ACM Transactions on Graphics (Proc. ACM SIGGRAPH 2020)39, 4.
Export
BibTeX
@article{Wang_SIGGRAPH2020, TITLE = {{Vid2Curve}: {S}imultaneous Camera Motion Estimation and Thin Structure Reconstruction from an {RGB} Video}, AUTHOR = {Wang, Peng and Liu, Lingjie and Chen, Nenglun and Chu, Hung-Kuo and Theobalt, Christian and Wang, Wenping}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3386569.3392476}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH)}, VOLUME = {39}, NUMBER = {4}, EID = {132}, BOOKTITLE = {Proceedings of ACM SIGGRAPH 2020}, }
Endnote
%0 Journal Article %A Wang, Peng %A Liu, Lingjie %A Chen, Nenglun %A Chu, Hung-Kuo %A Theobalt, Christian %A Wang, Wenping %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Vid2Curve: Simultaneous Camera Motion Estimation and Thin Structure Reconstruction from an RGB Video : %G eng %U http://hdl.handle.net/21.11116/0000-0007-9A74-9 %R 10.1145/3386569.3392476 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 4 %Z sequence number: 132 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH 2020 %O ACM SIGGRAPH 2020 Virtual Conference ; 2020, 17-28 August
Zheng, Q., Babaei, V., Wetzstein, G., Seidel, H.-P., Zwicker, M., and Singh, G. 2020. Neural Light Field 3D Printing. ACM Transactions on Graphics (Proc. SIGGRAPH Asia 2020)39, 6.
Export
BibTeX
@article{Zheng_TOG2020, TITLE = {Neural Light Field {3D} Printing}, AUTHOR = {Zheng, Quan and Babaei, Vahid and Wetzstein, Gordon and Seidel, Hans-Peter and Zwicker, Matthias and Singh, Gurprit}, ISSN = {0730-0301}, DOI = {10.1145/3414685.3417879}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. SIGGRAPH Asia)}, VOLUME = {39}, NUMBER = {6}, EID = {207}, BOOKTITLE = {Proceedings of the SIGGRAPH Asia 2020}, EDITOR = {Myszkowski, Karol}, }
Endnote
%0 Journal Article %A Zheng, Quan %A Babaei, Vahid %A Wetzstein, Gordon %A Seidel, Hans-Peter %A Zwicker, Matthias %A Singh, Gurprit %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Neural Light Field 3D Printing : %U http://hdl.handle.net/21.11116/0000-0007-9AA8-E %R 10.1145/3414685.3417879 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 6 %Z sequence number: 207 %I ACM %C New York, NY %@ false %B Proceedings of the SIGGRAPH Asia 2020 %O SIGGRAPH Asia 2020 SA'20 SA 2020
Conference Paper
Bernard, F., Suri, Z.K., and Theobalt, C. 2020a. MINA: Convex Mixed-Integer Programming for Non-Rigid Shape Alignment. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020), IEEE.
Export
BibTeX
@inproceedings{Bernard_CVPR2020, TITLE = {{MINA}: {C}onvex Mixed-Integer Programming for Non-Rigid Shape Alignment}, AUTHOR = {Bernard, Florian and Suri, Zeeshan Khan and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-7168-5}, DOI = {10.1109/CVPR42600.2020.01384}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020)}, PAGES = {13823--13832}, ADDRESS = {Seattle, WA, USA (Virtual)}, }
Endnote
%0 Conference Proceedings %A Bernard, Florian %A Suri, Zeeshan Khan %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T MINA: Convex Mixed-Integer Programming for Non-Rigid Shape Alignment : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D059-A %R 10.1109/CVPR42600.2020.01384 %D 2020 %B 33rd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2020-06-14 - 2020-06-19 %C Seattle, WA, USA (Virtual) %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 13823 - 13832 %I IEEE %@ 978-1-7281-7168-5
Bhatnagar, B.L., Sminchisescu, C., Theobalt, C., and Pons-Moll, G. 2020a. Combining Implicit Function Learning and Parametric Models for 3D Human Reconstruction. Computer Vision -- ECCV 2020, Springer.
Export
BibTeX
@inproceedings{bhatnagar2020ipnet, TITLE = {Combining Implicit Function Learning and Parametric Models for {3D} Human Reconstruction}, AUTHOR = {Bhatnagar, Bharat Lal and Sminchisescu, Cristian and Theobalt, Christian and Pons-Moll, Gerard}, LANGUAGE = {eng}, ISBN = {978-3-030-58535-8}, DOI = {10.1007/978-3-030-58536-5_19}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Computer Vision -- ECCV 2020}, EDITOR = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael}, PAGES = {311--329}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12347}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Bhatnagar, Bharat Lal %A Sminchisescu, Cristian %A Theobalt, Christian %A Pons-Moll, Gerard %+ Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society %T Combining Implicit Function Learning and Parametric Models for 3D Human Reconstruction : %G eng %U http://hdl.handle.net/21.11116/0000-0006-E89E-3 %R 10.1007/978-3-030-58536-5_19 %D 2020 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV 2020 %E Vedaldi, Andrea; Bischof, Horst; Brox, Thomas; Frahm, Jan-Michael %P 311 - 329 %I Springer %@ 978-3-030-58535-8 %B Lecture Notes in Computer Science %N 12347
Bhatnagar, B.L., Sminchisescu, C., Theobalt, C., and Pons-Moll, G. 2020b. LoopReg: Self-supervised Learning of Implicit Surface Correspondences, Pose and Shape for 3D Human Mesh Registration. Advances in Neural Information Processing Systems 33 (NIPS 2020), Curran Associates, Inc.
Export
BibTeX
@inproceedings{bhatnagar2020loopreg, TITLE = {{LoopReg}: Self-supervised Learning of Implicit Surface Correspondences, Pose and Shape for {3D} Human Mesh Registration}, AUTHOR = {Bhatnagar, Bharat Lal and Sminchisescu, Cristian and Theobalt, Christian and Pons-Moll, Gerard}, LANGUAGE = {eng}, PUBLISHER = {Curran Associates, Inc.}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Advances in Neural Information Processing Systems 33 (NIPS 2020)}, EDITOR = {Larochelle, H. and Ranzato, M. and Hadsell, R. and Balcan, M. F. and Lin, H.}, ADDRESS = {Virtual Event}, }
Endnote
%0 Conference Proceedings %A Bhatnagar, Bharat Lal %A Sminchisescu, Cristian %A Theobalt, Christian %A Pons-Moll, Gerard %+ Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society %T LoopReg: Self-supervised Learning of Implicit Surface Correspondences, Pose and Shape for 3D Human Mesh Registration : %G eng %U http://hdl.handle.net/21.11116/0000-0007-6FD1-1 %D 2020 %B 34th Conference on Neural Information Processing Systems %Z date of event: 2020-12-06 - 2020-12-12 %C Virtual Event %B Advances in Neural Information Processing Systems 33 %E Larochelle, H.; Ranzato, M.; Hadsell, R.; Balcan, M. F.; Lin, H. %I Curran Associates, Inc. %U https://papers.nips.cc/paper/2020/file/970af30e481057c48f87e101b61e6994-Paper.pdf
Božič, A., Zollhöfer, M., Theobalt, C., and Nießner, M. 2020. DeepDeform: Learning Non-Rigid RGB-D Reconstruction With Semi-Supervised Data. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020), IEEE.
Export
BibTeX
@inproceedings{Bozic_CVPR2020, TITLE = {{DeepDeform}: {L}earning Non-Rigid {RGB}-{D} Reconstruction With Semi-Supervised Data}, AUTHOR = {Bo{\v z}i{\v c}, Alja{\v z} and Zollh{\"o}fer, Michael and Theobalt, Christian and Nie{\ss}ner, Matthias}, LANGUAGE = {eng}, ISBN = {978-1-7281-7168-5}, DOI = {10.1109/CVPR42600.2020.00703}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020)}, PAGES = {7000--7010}, ADDRESS = {Seattle, WA, USA (Virtual)}, }
Endnote
%0 Conference Proceedings %A Božič, Aljaž %A Zollhöfer, Michael %A Theobalt, Christian %A Nießner, Matthias %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T DeepDeform: Learning Non-Rigid RGB-D Reconstruction With Semi-Supervised Data : %G eng %U http://hdl.handle.net/21.11116/0000-0007-CF63-1 %R 10.1109/CVPR42600.2020.00703 %D 2020 %B 33rd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2020-06-14 - 2020-06-19 %C Seattle, WA, USA (Virtual) %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 7000 - 7010 %I IEEE %@ 978-1-7281-7168-5
Dunn, D., Tursun, O., Yu, H., Didyk, P., Myszkowski, K., and Fuchs, H. 2020. Stimulating the Human Visual System Beyond Real World Performance in Future Augmented Reality Displays. IEEE International Symposium on Mixed and Augmented Reality (ISMAR 2020), IEEE.
Export
BibTeX
@inproceedings{Dunn2020, TITLE = {Stimulating the Human Visual System Beyond Real World Performance in Future Augmented Reality Displays}, AUTHOR = {Dunn, David and Tursun, Okan and Yu, Hyeonseung and Didyk, Piotr and Myszkowski, Karol and Fuchs, Henry}, LANGUAGE = {eng}, ISBN = {978-1-7281-8508-8}, DOI = {10.1109/ISMAR50242.2020.00029}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {IEEE International Symposium on Mixed and Augmented Reality (ISMAR 2020)}, PAGES = {90--100}, ADDRESS = {Recife/Porto de Galinhas, Brazil (Virtual Conference)}, }
Endnote
%0 Conference Proceedings %A Dunn, David %A Tursun, Okan %A Yu, Hyeonseung %A Didyk, Piotr %A Myszkowski, Karol %A Fuchs, Henry %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Stimulating the Human Visual System Beyond Real World Performance in Future Augmented Reality Displays : %G eng %U http://hdl.handle.net/21.11116/0000-0006-FBDF-5 %R 10.1109/ISMAR50242.2020.00029 %D 2020 %B International Symposium on Mixed and Augmented Reality %Z date of event: 2020-11-09 - 2020-11-13 %C Recife/Porto de Galinhas, Brazil (Virtual Conference) %B IEEE International Symposium on Mixed and Augmented Reality %P 90 - 100 %I IEEE %@ 978-1-7281-8508-8
Golyanik, V. and Theobalt, C. 2020. A Quantum Computational Approach to Correspondence Problems on Point Sets. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020), IEEE.
Export
BibTeX
@inproceedings{Golyanik_CVPR2020, TITLE = {A Quantum Computational Approach to Correspondence Problems on Point Sets}, AUTHOR = {Golyanik, Vladislav and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-7168-5}, DOI = {10.1109/CVPR42600.2020.00920}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020)}, PAGES = {9179--9188}, ADDRESS = {Seattle, WA, USA (Virtual)}, }
Endnote
%0 Conference Proceedings %A Golyanik, Vladislav %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T A Quantum Computational Approach to Correspondence Problems on Point Sets : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D053-0 %R 10.1109/CVPR42600.2020.00920 %D 2020 %B 33rd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2020-06-14 - 2020-06-19 %C Seattle, WA, USA (Virtual) %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 9179 - 9188 %I IEEE %@ 978-1-7281-7168-5
Golyanik, V., Shimada, S., and Theobalt, C. 2020a. Fast Simultaneous Gravitational Alignment of Multiple Point Sets. International Conference on 3D Vision, IEEE.
Export
BibTeX
@inproceedings{Golyanik_MBGA2020, TITLE = {Fast Simultaneous Gravitational Alignment of Multiple Point Sets}, AUTHOR = {Golyanik, Vladislav and Shimada, Soshi and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-8128-8}, DOI = {10.1109/3DV50981.2020.00019}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {International Conference on 3D Vision}, PAGES = {91--100}, ADDRESS = {Fukuoka, Japan (Virtual Event)}, }
Endnote
%0 Conference Proceedings %A Golyanik, Vladislav %A Shimada, Soshi %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Fast Simultaneous Gravitational Alignment of Multiple Point Sets : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0358-2 %R 10.1109/3DV50981.2020.00019 %D 2020 %B International Conference on 3D Vision %Z date of event: 2020-11-25 - 2020-11-28 %C Fukuoka, Japan (Virtual Event) %B International Conference on 3D Vision %P 91 - 100 %I IEEE %@ 978-1-7281-8128-8
Golyanik, V., Jonas, A., Stricker, D., and Theobalt, C. 2020b. Intrinsic Dynamic Shape Prior for Dense Non-Rigid Structure from Motion. International Conference on 3D Vision, IEEE.
Export
BibTeX
@inproceedings{Golyanik2020DSPR, TITLE = {Intrinsic Dynamic Shape Prior for Dense Non-Rigid Structure from Motion}, AUTHOR = {Golyanik, Vladislav and Jonas, Andr{\'e} and Stricker, Didier and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-8128-8}, DOI = {10.1109/3DV50981.2020.00079}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {International Conference on 3D Vision}, PAGES = {692--701}, ADDRESS = {Fukuoka, Japan (Virtual Event)}, }
Endnote
%0 Conference Proceedings %A Golyanik, Vladislav %A Jonas, André %A Stricker, Didier %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Intrinsic Dynamic Shape Prior for Dense Non-Rigid Structure from Motion : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0364-4 %R 10.1109/3DV50981.2020.00079 %D 2020 %B International Conference on 3D Vision %Z date of event: 2020-11-25 - 2020-11-28 %C Fukuoka, Japan (Virtual Event) %B International Conference on 3D Vision %P 692 - 701 %I IEEE %@ 978-1-7281-8128-8
Habermann, M., Xu, W., Zollhöfer, M., Pons-Moll, G., and Theobalt, C. 2020a. DeepCap: Monocular Human Performance Capture Using Weak Supervision. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020), IEEE.
Export
BibTeX
@inproceedings{deepcap2020, TITLE = {{DeepCap}: {M}onocular Human Performance Capture Using Weak Supervision}, AUTHOR = {Habermann, Marc and Xu, Weipeng and Zollh{\"o}fer, Michael and Pons-Moll, Gerard and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-7168-5}, DOI = {10.1109/CVPR42600.2020.00510}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020)}, PAGES = {5051--5062}, ADDRESS = {Seattle, WA, USA (Virtual)}, }
Endnote
%0 Conference Proceedings %A Habermann, Marc %A Xu, Weipeng %A Zollhöfer, Michael %A Pons-Moll, Gerard %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T DeepCap: Monocular Human Performance Capture Using Weak Supervision : %G eng %U http://hdl.handle.net/21.11116/0000-0006-A895-4 %R 10.1109/CVPR42600.2020.00510 %D 2020 %B 33rd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2020-06-14 - 2020-06-19 %C Seattle, WA, USA (Virtual) %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 5051 - 5062 %I IEEE %@ 978-1-7281-7168-5
Huang, L., Gao, C., Zhou, Y., et al. 2020. Universal Physical Camouflage Attacks on Object Detectors. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020), IEEE.
Export
BibTeX
@inproceedings{Huang_CVPR2020, TITLE = {Universal Physical Camou{fl}age Attacks on Object Detectors}, AUTHOR = {Huang, Lifeng and Gao, Chengying and Zhou, Yuyin and Xie, Cihang and Yuille, Alan and Zou, Changqing and Liu, Ning}, LANGUAGE = {eng}, ISBN = {978-1-7281-7168-5}, DOI = {10.1109/CVPR42600.2020.00080}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020)}, PAGES = {717--726}, ADDRESS = {Seattle, WA, USA (Virtual)}, }
Endnote
%0 Conference Proceedings %A Huang, Lifeng %A Gao, Chengying %A Zhou, Yuyin %A Xie, Cihang %A Yuille, Alan %A Zou, Changqing %A Liu, Ning %+ External Organizations External Organizations External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Universal Physical Camouflage Attacks on Object Detectors : %G eng %U http://hdl.handle.net/21.11116/0000-0006-09F0-1 %R 10.1109/CVPR42600.2020.00080 %D 2020 %B 33rd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2020-06-14 - 2020-06-19 %C Seattle, WA, USA (Virtual) %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 717 - 726 %I IEEE %@ 978-1-7281-7168-5
Liu, L., Gu, J., Lin, K.Z., Chua, T.-S., and Theobalt, C. 2020b. Neural Sparse Voxel Fields. Advances in Neural Information Processing Systems 33 (NIPS 2020), Curran Associates, Inc.
Export
BibTeX
@inproceedings{LiuNeural20, TITLE = {Neural Sparse Voxel Fields}, AUTHOR = {Liu, Lingjie and Gu, Jiatao and Lin, Kyaw Zaw and Chua, Tat-Seng and Theobalt, Christian}, LANGUAGE = {eng}, PUBLISHER = {Curran Associates, Inc.}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Advances in Neural Information Processing Systems 33 (NIPS 2020)}, EDITOR = {Larochelle, H. and Ranzato, M. and Hadsell, R. and Balcan, M. F. and Lin, H.}, ADDRESS = {Virtual Event}, }
Endnote
%0 Conference Proceedings %A Liu, Lingjie %A Gu, Jiatao %A Lin, Kyaw Zaw %A Chua, Tat-Seng %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Neural Sparse Voxel Fields : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D437-C %D 2020 %B 34th Conference on Neural Information Processing Systems %Z date of event: 2020-12-06 - 2020-12-12 %C Virtual Event %B Advances in Neural Information Processing Systems 33 %E Larochelle, H.; Ranzato, M.; Hadsell, R.; Balcan, M. F.; Lin, H. %I Curran Associates, Inc. %U https://proceedings.neurips.cc/paper/2020/file/b4b758962f17808746e9bb832a6fa4b8-Paper.pdf
Long, X., Liu, L., Theobalt, C., and Wang, W. 2020a. Occlusion-Aware Depth Estimation with Adaptive Normal Constraints. Computer Vision -- ECCV 2020, Springer.
Export
BibTeX
@inproceedings{Long_ECCV20, TITLE = {Occlusion-Aware Depth Estimation with Adaptive Normal Constraints}, AUTHOR = {Long, Xiaoxiao and Liu, Lingjie and Theobalt, Christian and Wang, Wenping}, LANGUAGE = {eng}, ISBN = {978-3-030-58544-0}, DOI = {10.1007/978-3-030-58545-7_37}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Computer Vision -- ECCV 2020}, EDITOR = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael}, PAGES = {640--657}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12354}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Long, Xiaoxiao %A Liu, Lingjie %A Theobalt, Christian %A Wang, Wenping %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Occlusion-Aware Depth Estimation with Adaptive Normal Constraints : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D42B-A %R 10.1007/978-3-030-58545-7_37 %D 2020 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV 2020 %E Vedaldi, Andrea; Bischof, Horst; Brox, Thomas; Frahm, Jan-Michael %P 640 - 657 %I Springer %@ 978-3-030-58544-0 %B Lecture Notes in Computer Science %N 12354
Malik, J., Abdelaziz, I., Elhayek, A., et al. 2020a. HandVoxNet: Deep Voxel-Based Network for 3D Hand Shape and Pose Estimation From a Single Depth Map. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020), IEEE.
Export
BibTeX
@inproceedings{Malik_CVPR2020, TITLE = {{HandVoxNet}: {D}eep Voxel-Based Network for {3D} Hand Shape and Pose Estimation From a Single Depth Map}, AUTHOR = {Malik, Jameel and Abdelaziz, Ibrahim and Elhayek, Ahmed and Shimada, Soshi and Ali, Sk Aziz and Golyanik, Vladislav and Theobalt, Christian and Stricker, Didier}, LANGUAGE = {eng}, ISBN = {978-1-7281-7168-5}, DOI = {10.1109/CVPR42600.2020.00714}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020)}, PAGES = {7111--7120}, ADDRESS = {Seattle, WA, USA (Virtual)}, }
Endnote
%0 Conference Proceedings %A Malik, Jameel %A Abdelaziz, Ibrahim %A Elhayek, Ahmed %A Shimada, Soshi %A Ali, Sk Aziz %A Golyanik, Vladislav %A Theobalt, Christian %A Stricker, Didier %+ External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T HandVoxNet: Deep Voxel-Based Network for 3D Hand Shape and Pose Estimation From a Single Depth Map : %G eng %U http://hdl.handle.net/21.11116/0000-0007-CFCA-D %R 10.1109/CVPR42600.2020.00714 %D 2020 %B 33rd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2020-06-14 - 2020-06-19 %C Seattle, WA, USA (Virtual) %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 7111 - 7120 %I IEEE %@ 978-1-7281-7168-5
Meng, X., Zheng, Q., Varshney, A., Singh, G., and Zwicker, M. 2020. Real-time Monte Carlo Denoising with the Neural Bilateral Grid. Rendering 2020 - DL-only Track (Eurographics Symposium on Rendering 2020), The Eurographics Association.
Export
BibTeX
@inproceedings{Meng_EGRendering20, TITLE = {Real-time {Monte Carlo} Denoising with the Neural Bilateral Grid}, AUTHOR = {Meng, Xiaoxu and Zheng, Quan and Varshney, Amitabh and Singh, Gurprit and Zwicker, Matthias}, LANGUAGE = {eng}, ISBN = {978-3-03868-117-5}, URL = {https://diglib.eg.org:443/handle/10.2312/sr20201133}, DOI = {10.2312/sr.20201133}, PUBLISHER = {The Eurographics Association}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Rendering 2020 -- DL-only Track (Eurographics Symposium on Rendering 2020)}, EDITOR = {Dachsbacher, Carsten and Pharr, Matt}, PAGES = {1--12}, ADDRESS = {London, UK}, }
Endnote
%0 Conference Proceedings %A Meng, Xiaoxu %A Zheng, Quan %A Varshney, Amitabh %A Singh, Gurprit %A Zwicker, Matthias %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Real-time Monte Carlo Denoising with the Neural Bilateral Grid : %G eng %U http://hdl.handle.net/21.11116/0000-0007-CEC2-6 %R 10.2312/sr.20201133 %U https://diglib.eg.org:443/handle/10.2312/sr20201133 %D 2020 %B 31st Eurographics Symposium on Rendering %Z date of event: 2020-06-29 - 2020-07-02 %C London, UK %B Rendering 2020 - DL-only Track %E Dachsbacher, Carsten; Pharr, Matt %P 1 - 12 %I The Eurographics Association %@ 978-3-03868-117-5
Qian, N., Wang, J., Mueller, F., Bernard, F., Golyanik, V., and Theobalt, C. 2020a. HTML: A Parametric Hand Texture Model for 3D Hand Reconstruction and Personalization. Computer Vision -- ECCV 2020, Springer.
Export
BibTeX
@inproceedings{Qian_ECCV20, TITLE = {{HTML}: {A} Parametric Hand Texture Model for {3D} Hand Reconstruction and Personalization}, AUTHOR = {Qian, Neng and Wang, Jiayi and Mueller, Franziska and Bernard, Florian and Golyanik, Vladislav and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-3-030-58621-8}, DOI = {10.1007/978-3-030-58621-8_4}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Computer Vision -- ECCV 2020}, EDITOR = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael}, PAGES = {54--71}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12356}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Qian, Neng %A Wang, Jiayi %A Mueller, Franziska %A Bernard, Florian %A Golyanik, Vladislav %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T HTML: A Parametric Hand Texture Model for 3D Hand Reconstruction and Personalization : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D062-F %R 10.1007/978-3-030-58621-8_4 %D 2020 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV 2020 %E Vedaldi, Andrea; Bischof, Horst; Brox, Thomas; Frahm, Jan-Michael %P 54 - 71 %I Springer %@ 978-3-030-58621-8 %B Lecture Notes in Computer Science %N 12356
Sarkar, K., Mehta, D., Xu, W., Golyanik, V., and Theobalt, C. 2020. Neural Re-rendering of Humans from a Single Image. Computer Vision -- ECCV 2020, Springer.
Export
BibTeX
@inproceedings{Sarkar_ECCV20, TITLE = {Neural Re-rendering of Humans from a Single Image}, AUTHOR = {Sarkar, Kripasindhu and Mehta, Dushyant and Xu, Weipeng and Golyanik, Vladislav and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-3-030-58621-8}, DOI = {10.1007/978-3-030-58621-8_35}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Computer Vision -- ECCV 2020}, EDITOR = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael}, PAGES = {596--613}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12356}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Sarkar, Kripasindhu %A Mehta, Dushyant %A Xu, Weipeng %A Golyanik, Vladislav %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Neural Re-rendering of Humans from a Single Image : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D0A4-4 %R 10.1007/978-3-030-58621-8_35 %D 2020 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV 2020 %E Vedaldi, Andrea; Bischof, Horst; Brox, Thomas; Frahm, Jan-Michael %P 596 - 613 %I Springer %@ 978-3-030-58621-8 %B Lecture Notes in Computer Science %N 12356
Seelbach Benkner, M., Golyanik, V., Theobalt, C., and Moeller, M. 2020. Adiabatic Quantum Graph Matching with Permutation Matrix Constraints. International Conference on 3D Vision, IEEE.
Export
BibTeX
@inproceedings{SeelbachBenkner2020, TITLE = {Adiabatic Quantum Graph Matching with Permutation Matrix Constraints}, AUTHOR = {Seelbach Benkner, Marcel and Golyanik, Vladislav and Theobalt, Christian and Moeller, Michael}, LANGUAGE = {eng}, ISBN = {978-1-7281-8128-8}, DOI = {10.1109/3DV50981.2020.00068}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {International Conference on 3D Vision}, PAGES = {583--592}, ADDRESS = {Fukuoka, Japan (Virtual Event)}, }
Endnote
%0 Conference Proceedings %A Seelbach Benkner, Marcel %A Golyanik, Vladislav %A Theobalt, Christian %A Moeller, Michael %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Adiabatic Quantum Graph Matching with Permutation Matrix Constraints : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0362-6 %R 10.1109/3DV50981.2020.00068 %D 2020 %B International Conference on 3D Vision %Z date of event: 2020-11-25 - 2020-11-28 %C Fukuoka, Japan (Virtual Event) %B International Conference on 3D Vision %P 583 - 592 %I IEEE %@ 978-1-7281-8128-8
Sidhu, V., Tretschk, E., Golyanik, V., Agudo, A., and Theobalt, C. 2020. Neural Dense Non-Rigid Structure from Motion with Latent Space Constraints. Computer Vision -- ECCV 2020, Springer.
Export
BibTeX
@inproceedings{Sidhu_ECCV20, TITLE = {Neural Dense Non-Rigid Structure from Motion with Latent Space Constraints}, AUTHOR = {Sidhu, Vikramjit and Tretschk, Edgar and Golyanik, Vladislav and Agudo, Antonio and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-3-030-58516-7}, DOI = {10.1007/978-3-030-58517-4_13}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Computer Vision -- ECCV 2020}, EDITOR = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael}, PAGES = {204--222}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12361}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Sidhu, Vikramjit %A Tretschk, Edgar %A Golyanik, Vladislav %A Agudo, Antonio %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Neural Dense Non-Rigid Structure from Motion with Latent Space Constraints : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D080-C %R 10.1007/978-3-030-58517-4_13 %D 2020 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV 2020 %E Vedaldi, Andrea; Bischof, Horst; Brox, Thomas; Frahm, Jan-Michael %P 204 - 222 %I Springer %@ 978-3-030-58516-7 %B Lecture Notes in Computer Science %N 12361
Tewari, A., Elgharib, M., Bharaj, G., et al. 2020d. StyleRig: Rigging StyleGAN for 3D Control Over Portrait Images. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020), IEEE.
Export
BibTeX
@inproceedings{Tewari_CVPR2020, TITLE = {{StyleRig}: {R}igging {StyleGAN} for {3D} Control Over Portrait Images}, AUTHOR = {Tewari, Ayush and Elgharib, Mohamed and Bharaj, Gaurav and Bernard, Florian and Seidel, Hans-Peter and P{\'e}rez, Patrick and Zollh{\"o}fer, Michael and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-7168-5}, DOI = {10.1109/CVPR42600.2020.00618}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020)}, PAGES = {6141--6150}, ADDRESS = {Seattle, WA, USA (Virtual)}, }
Endnote
%0 Conference Proceedings %A Tewari, Ayush %A Elgharib, Mohamed %A Bharaj, Gaurav %A Bernard, Florian %A Seidel, Hans-Peter %A Pérez, Patrick %A Zollhöfer, Michael %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T StyleRig: Rigging StyleGAN for 3D Control Over Portrait Images : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B0E7-D %R 10.1109/CVPR42600.2020.00618 %D 2020 %B 33rd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2020-06-14 - 2020-06-19 %C Seattle, WA, USA (Virtual) %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 6141 - 6150 %I IEEE %@ 978-1-7281-7168-5
Thies, J., Zollhöfer, M., Theobalt, C., Stamminger, M., and Nießner, M. 2020a. Image-guided Neural Object Rendering. International Conference on Learning Representations (ICLR 2020), OpenReview.net.
Export
BibTeX
@inproceedings{Thies_ICLR2020, TITLE = {Image-guided Neural Object Rendering}, AUTHOR = {Thies, Justus and Zollh{\"o}fer, Michael and Theobalt, Christian and Stamminger, Marc and Nie{\ss}ner, Matthias}, LANGUAGE = {eng}, URL = {https://openreview.net/forum?id=Hyg9anEFPS; https://iclr.cc/Conferences/2020}, PUBLISHER = {OpenReview.net}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {International Conference on Learning Representations (ICLR 2020)}, ADDRESS = {Addis Ababa, Ethopia}, }
Endnote
%0 Conference Proceedings %A Thies, Justus %A Zollhöfer, Michael %A Theobalt, Christian %A Stamminger, Marc %A Nießner, Matthias %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Image-guided Neural Object Rendering : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D431-2 %U https://openreview.net/forum?id=Hyg9anEFPS %D 2020 %B 8th International Conference on Learning Representations %Z date of event: 2020-04-26 - 2020-04-30 %C Addis Ababa, Ethopia %B International Conference on Learning Representations %I OpenReview.net %U https://openreview.net/forum?id=Hyg9anEFPS
Thies, J., Elgharib, M., Tewari, A., Theobalt, C., and Nießner, M. 2020b. Neural Voice Puppetry: Audio-Driven Facial Reenactment. Computer Vision -- ECCV 2020, Springer.
Export
BibTeX
@inproceedings{Thies_ECCV20, TITLE = {Neural Voice Puppetry: {A}udio-Driven Facial Reenactment}, AUTHOR = {Thies, Justus and Elgharib, Mohamed and Tewari, Ayush and Theobalt, Christian and Nie{\ss}ner, Matthias}, LANGUAGE = {eng}, ISBN = {978-3-030-58516-7}, DOI = {10.1007/978-3-030-58517-4_42}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Computer Vision -- ECCV 2020}, EDITOR = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael}, PAGES = {716--731}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12361}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Thies, Justus %A Elgharib, Mohamed %A Tewari, Ayush %A Theobalt, Christian %A Nießner, Matthias %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Neural Voice Puppetry: Audio-Driven Facial Reenactment : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D42F-6 %R 10.1007/978-3-030-58517-4_42 %D 2020 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV 2020 %E Vedaldi, Andrea; Bischof, Horst; Brox, Thomas; Frahm, Jan-Michael %P 716 - 731 %I Springer %@ 978-3-030-58516-7 %B Lecture Notes in Computer Science %N 12361
Tretschk, E., Tewari, A., Golyanik, V., Zollhöfer, M., Stoll, C., and Theobalt, C. 2020a. PatchNets: Patch-Based Generalizable Deep Implicit 3D Shape Representations. Computer Vision -- ECCV 2020, Springer.
Export
BibTeX
@inproceedings{Tretschk_ECCV20a, TITLE = {{PatchNets}: {P}atch-Based Generalizable Deep Implicit {3D} Shape Representations}, AUTHOR = {Tretschk, Edgar and Tewari, Ayush and Golyanik, Vladislav and Zollh{\"o}fer, Michael and Stoll, Carsten and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-3-030-58516-7}, DOI = {10.1007/978-3-030-58517-4_18}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Computer Vision -- ECCV 2020}, EDITOR = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael}, PAGES = {293--309}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12361}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Tretschk, Edgar %A Tewari, Ayush %A Golyanik, Vladislav %A Zollhöfer, Michael %A Stoll, Carsten %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T PatchNets: Patch-Based Generalizable Deep Implicit 3D Shape Representations : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D095-5 %R 10.1007/978-3-030-58517-4_18 %D 2020 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV 2020 %E Vedaldi, Andrea; Bischof, Horst; Brox, Thomas; Frahm, Jan-Michael %P 293 - 309 %I Springer %@ 978-3-030-58516-7 %B Lecture Notes in Computer Science %N 12361
Tretschk, E., Tewari, A., Zollhöfer, M., Golyanik, V., and Theobalt, C. 2020b. DEMEA: Deep Mesh Autoencoders for Non-rigidly Deforming Objects. Computer Vision -- ECCV 2020, Springer.
Export
BibTeX
@inproceedings{Tretschk_ECCV20, TITLE = {{DEMEA}: {D}eep Mesh Autoencoders for Non-rigidly Deforming Objects}, AUTHOR = {Tretschk, Edgar and Tewari, Ayush and Zollh{\"o}fer, Michael and Golyanik, Vladislav and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-3-030-58516-7}, DOI = {10.1007/978-3-030-58548-8_35}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Computer Vision -- ECCV 2020}, EDITOR = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael}, PAGES = {601--617}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12349}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Tretschk, Edgar %A Tewari, Ayush %A Zollhöfer, Michael %A Golyanik, Vladislav %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T DEMEA: Deep Mesh Autoencoders for Non-rigidly Deforming Objects : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D425-0 %R 10.1007/978-3-030-58548-8_35 %D 2020 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV 2020 %E Vedaldi, Andrea; Bischof, Horst; Brox, Thomas; Frahm, Jan-Michael %P 601 - 617 %I Springer %@ 978-3-030-58516-7 %B Lecture Notes in Computer Science %N 12349
Xu, L., Xu, W., Golyanik, V., Habermann, M., Fang, L., and Theobalt, C. 2020a. EventCap: Monocular 3D Capture of High-Speed Human Motions Using an Event Camera. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020), IEEE.
Export
BibTeX
@inproceedings{Xu_CVPR2020, TITLE = {{EventCap}: {M}onocular {3D} Capture of High-Speed Human Motions Using an Event Camera}, AUTHOR = {Xu, Lan and Xu, Weipeng and Golyanik, Vladislav and Habermann, Marc and Fang, Lu and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-7168-5}, DOI = {10.1109/CVPR42600.2020.00502}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020)}, PAGES = {4967--4977}, ADDRESS = {Seattle, WA, USA (Virtual)}, }
Endnote
%0 Conference Proceedings %A Xu, Lan %A Xu, Weipeng %A Golyanik, Vladislav %A Habermann, Marc %A Fang, Lu %A Theobalt, Christian %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T EventCap: Monocular 3D Capture of High-Speed Human Motions Using an Event Camera : %G eng %U http://hdl.handle.net/21.11116/0000-0007-CF57-F %R 10.1109/CVPR42600.2020.00502 %D 2020 %B 33rd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2020-06-14 - 2020-06-19 %C Seattle, WA, USA (Virtual) %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 4967 - 4977 %I IEEE %@ 978-1-7281-7168-5
Xu, Y., Fan, T., Yuan, Y., and Singh, G. 2020b. Ladybird: Quasi-Monte Carlo Sampling for Deep Implicit Field Based 3D Reconstruction with Symmetry. Computer Vision -- ECCV 2020, Springer.
Export
BibTeX
@inproceedings{Xu_ECCV20, TITLE = {Ladybird: {Quasi-Monte Carlo} Sampling for Deep Implicit Field Based {3D} Reconstruction with Symmetry}, AUTHOR = {Xu, Yifan and Fan, Tianqi and Yuan, Yi and Singh, Gurprit}, LANGUAGE = {eng}, ISBN = {978-3-030-58451-1}, DOI = {10.1007/978-3-030-58452-8_15}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Computer Vision -- ECCV 2020}, EDITOR = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael}, PAGES = {248--263}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12346}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Xu, Yifan %A Fan, Tianqi %A Yuan, Yi %A Singh, Gurprit %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Ladybird: Quasi-Monte Carlo Sampling for Deep Implicit Field Based 3D Reconstruction with Symmetry : %G eng %U http://hdl.handle.net/21.11116/0000-0007-CEBE-C %R 10.1007/978-3-030-58452-8_15 %D 2020 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV 2020 %E Vedaldi, Andrea; Bischof, Horst; Brox, Thomas; Frahm, Jan-Michael %P 248 - 263 %I Springer %@ 978-3-030-58451-1 %B Lecture Notes in Computer Science %N 12346
Yu, T., Zheng, Z., Zhong, Y., et al. 2020a. SimulCap : Single-View Human Performance Capture with Cloth Simulation. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2019), IEEE.
Export
BibTeX
@inproceedings{SimulCap19, TITLE = {{SimulCap} : {S}ingle-View Human Performance Capture with Cloth Simulation}, AUTHOR = {Yu, Tao and Zheng, Zerong and Zhong, Yuan and Zhao, Jianhui and Quionhai, Dai and Pons-Moll, Gerard and Liu, Yebin}, ISBN = {978-1-7281-3293-8}, DOI = {10.1109/CVPR.2019.00565}, PUBLISHER = {IEEE}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2019)}, PAGES = {5499--5509}, ADDRESS = {Long Beach, CA, USA}, }
Endnote
%0 Conference Proceedings %A Yu, Tao %A Zheng, Zerong %A Zhong, Yuan %A Zhao, Jianhui %A Quionhai, Dai %A Pons-Moll, Gerard %A Liu, Yebin %+ External Organizations External Organizations External Organizations External Organizations External Organizations Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T SimulCap : Single-View Human Performance Capture with Cloth Simulation : %U http://hdl.handle.net/21.11116/0000-0003-651E-B %R 10.1109/CVPR.2019.00565 %D 2020 %B 32nd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2019-06-16 - 2019-06-20 %C Long Beach, CA, USA %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 5499 - 5509 %I IEEE %@ 978-1-7281-3293-8
Yu, Y., Meka, A., Elgharib, M., Seidel, H.-P., Theobalt, C., and Smith, W.A.P. 2020b. Self-supervised Outdoor Scene Relighting. Computer Vision -- ECCV 2020, Springer.
Export
BibTeX
@inproceedings{yu_ECCV20, TITLE = {Self-supervised Outdoor Scene Relighting}, AUTHOR = {Yu, Ye and Meka, Abhimitra and Elgharib, Mohamed and Seidel, Hans-Peter and Theobalt, Christian and Smith, William A. P.}, LANGUAGE = {eng}, ISBN = {978-3-030-58541-9}, DOI = {10.1007/978-3-030-58542-6_6}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Computer Vision -- ECCV 2020}, EDITOR = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael}, PAGES = {84--101}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12367}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Yu, Ye %A Meka, Abhimitra %A Elgharib, Mohamed %A Seidel, Hans-Peter %A Theobalt, Christian %A Smith, William A. P. %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Self-supervised Outdoor Scene Relighting : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B0F6-C %R 10.1007/978-3-030-58542-6_6 %D 2020 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV 2020 %E Vedaldi, Andrea; Bischof, Horst; Brox, Thomas; Frahm, Jan-Michael %P 84 - 101 %I Springer %@ 978-3-030-58541-9 %B Lecture Notes in Computer Science %N 12367
Zhou, Y., Habermann, M., Xu, W., Habibie, I., Theobalt, C., and Xu, F. 2020a. Monocular Real-time Hand Shape and Motion Capture using Multi-modal Data. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020), IEEE.
Export
BibTeX
@inproceedings{zhou2019monocular, TITLE = {Monocular Real-time Hand Shape and Motion Capture using Multi-modal Data}, AUTHOR = {Zhou, Yuxiao and Habermann, Marc and Xu, Weipeng and Habibie, Ikhsanul and Theobalt, Christian and Xu, Feng}, LANGUAGE = {eng}, ISBN = {978-1-7281-7168-5}, DOI = {10.1109/CVPR42600.2020.00539}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020)}, PAGES = {5345--5354}, ADDRESS = {Seattle, WA, USA (Virtual)}, }
Endnote
%0 Conference Proceedings %A Zhou, Yuxiao %A Habermann, Marc %A Xu, Weipeng %A Habibie, Ikhsanul %A Theobalt, Christian %A Xu, Feng %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Monocular Real-time Hand Shape and Motion Capture using Multi-modal Data : %G eng %U http://hdl.handle.net/21.11116/0000-0006-A89E-B %R 10.1109/CVPR42600.2020.00539 %D 2020 %B 33rd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2020-06-14 - 2020-06-19 %C Seattle, WA, USA (Virtual) %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 5345 - 5354 %I IEEE %@ 978-1-7281-7168-5
Paper
Ali, S.A., Kahraman, K., Theobalt, C., Stricker, D., and Golyanik, V. 2020. Fast Gravitational Approach for Rigid Point Set Registration with Ordinary Differential Equations. https://arxiv.org/abs/2009.14005.
(arXiv: 2009.14005)
Abstract
This article introduces a new physics-based method for rigid point set alignment called Fast Gravitational Approach (FGA). In FGA, the source and target point sets are interpreted as rigid particle swarms with masses interacting in a globally multiply-linked manner while moving in a simulated gravitational force field. The optimal alignment is obtained by explicit modeling of forces acting on the particles as well as their velocities and displacements with second-order ordinary differential equations of motion. Additional alignment cues (point-based or geometric features, and other boundary conditions) can be integrated into FGA through particle masses. We propose a smooth-particle mass function for point mass initialization, which improves robustness to noise and structural discontinuities. To avoid prohibitive quadratic complexity of all-to-all point interactions, we adapt a Barnes-Hut tree for accelerated force computation and achieve quasilinear computational complexity. We show that the new method class has characteristics not found in previous alignment methods such as efficient handling of partial overlaps, inhomogeneous point sampling densities, and coping with large point clouds with reduced runtime compared to the state of the art. Experiments show that our method performs on par with or outperforms all compared competing non-deep-learning-based and general-purpose techniques (which do not assume the availability of training data and a scene prior) in resolving transformations for LiDAR data and gains state-of-the-art accuracy and speed when coping with different types of data disturbances.
Export
BibTeX
@online{Ali_2009.14005, TITLE = {Fast Gravitational Approach for Rigid Point Set Registration with Ordinary Differential Equations}, AUTHOR = {Ali, Sk Aziz and Kahraman, Kerem and Theobalt, Christian and Stricker, Didier and Golyanik, Vladislav}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2009.14005}, EPRINT = {2009.14005}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {This article introduces a new physics-based method for rigid point set alignment called Fast Gravitational Approach (FGA). In FGA, the source and target point sets are interpreted as rigid particle swarms with masses interacting in a globally multiply-linked manner while moving in a simulated gravitational force field. The optimal alignment is obtained by explicit modeling of forces acting on the particles as well as their velocities and displacements with second-order ordinary differential equations of motion. Additional alignment cues (point-based or geometric features, and other boundary conditions) can be integrated into FGA through particle masses. We propose a smooth-particle mass function for point mass initialization, which improves robustness to noise and structural discontinuities. To avoid prohibitive quadratic complexity of all-to-all point interactions, we adapt a Barnes-Hut tree for accelerated force computation and achieve quasilinear computational complexity. We show that the new method class has characteristics not found in previous alignment methods such as efficient handling of partial overlaps, inhomogeneous point sampling densities, and coping with large point clouds with reduced runtime compared to the state of the art. Experiments show that our method performs on par with or outperforms all compared competing non-deep-learning-based and general-purpose techniques (which do not assume the availability of training data and a scene prior) in resolving transformations for LiDAR data and gains state-of-the-art accuracy and speed when coping with different types of data disturbances.}, }
Endnote
%0 Report %A Ali, Sk Aziz %A Kahraman, Kerem %A Theobalt, Christian %A Stricker, Didier %A Golyanik, Vladislav %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Fast Gravitational Approach for Rigid Point Set Registration with Ordinary Differential Equations : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E8FA-A %U https://arxiv.org/abs/2009.14005 %D 2020 %X This article introduces a new physics-based method for rigid point set alignment called Fast Gravitational Approach (FGA). In FGA, the source and target point sets are interpreted as rigid particle swarms with masses interacting in a globally multiply-linked manner while moving in a simulated gravitational force field. The optimal alignment is obtained by explicit modeling of forces acting on the particles as well as their velocities and displacements with second-order ordinary differential equations of motion. Additional alignment cues (point-based or geometric features, and other boundary conditions) can be integrated into FGA through particle masses. We propose a smooth-particle mass function for point mass initialization, which improves robustness to noise and structural discontinuities. To avoid prohibitive quadratic complexity of all-to-all point interactions, we adapt a Barnes-Hut tree for accelerated force computation and achieve quasilinear computational complexity. We show that the new method class has characteristics not found in previous alignment methods such as efficient handling of partial overlaps, inhomogeneous point sampling densities, and coping with large point clouds with reduced runtime compared to the state of the art. Experiments show that our method performs on par with or outperforms all compared competing non-deep-learning-based and general-purpose techniques (which do not assume the availability of training data and a scene prior) in resolving transformations for LiDAR data and gains state-of-the-art accuracy and speed when coping with different types of data disturbances. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,eess.IV
Bemana, M., Myszkowski, K., Seidel, H.-P., and Ritschel, T. 2020b. X-Fields: Implicit Neural View-, Light- and Time-Image Interpolation. https://arxiv.org/abs/2010.00450.
(arXiv: 2010.00450)
Abstract
We suggest to represent an X-Field -a set of 2D images taken across different view, time or illumination conditions, i.e., video, light field, reflectance fields or combinations thereof-by learning a neural network (NN) to map their view, time or light coordinates to 2D images. Executing this NN at new coordinates results in joint view, time or light interpolation. The key idea to make this workable is a NN that already knows the "basic tricks" of graphics (lighting, 3D projection, occlusion) in a hard-coded and differentiable form. The NN represents the input to that rendering as an implicit map, that for any view, time, or light coordinate and for any pixel can quantify how it will move if view, time or light coordinates change (Jacobian of pixel position with respect to view, time, illumination, etc.). Our X-Field representation is trained for one scene within minutes, leading to a compact set of trainable parameters and hence real-time navigation in view, time and illumination.
Export
BibTeX
@online{Bemana_arXiv2010.00450, TITLE = {X-Fields: Implicit Neural View-, Light- and Time-Image Interpolation}, AUTHOR = {Bemana, Mojtaba and Myszkowski, Karol and Seidel, Hans-Peter and Ritschel, Tobias}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2010.00450}, EPRINT = {2010.00450}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We suggest to represent an X-Field -a set of 2D images taken across different view, time or illumination conditions, i.e., video, light field, reflectance fields or combinations thereof-by learning a neural network (NN) to map their view, time or light coordinates to 2D images. Executing this NN at new coordinates results in joint view, time or light interpolation. The key idea to make this workable is a NN that already knows the "basic tricks" of graphics (lighting, 3D projection, occlusion) in a hard-coded and differentiable form. The NN represents the input to that rendering as an implicit map, that for any view, time, or light coordinate and for any pixel can quantify how it will move if view, time or light coordinates change (Jacobian of pixel position with respect to view, time, illumination, etc.). Our X-Field representation is trained for one scene within minutes, leading to a compact set of trainable parameters and hence real-time navigation in view, time and illumination.}, }
Endnote
%0 Report %A Bemana, Mojtaba %A Myszkowski, Karol %A Seidel, Hans-Peter %A Ritschel, Tobias %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T X-Fields: Implicit Neural View-, Light- and Time-Image Interpolation : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B6EC-2 %U https://arxiv.org/abs/2010.00450 %D 2020 %X We suggest to represent an X-Field -a set of 2D images taken across different view, time or illumination conditions, i.e., video, light field, reflectance fields or combinations thereof-by learning a neural network (NN) to map their view, time or light coordinates to 2D images. Executing this NN at new coordinates results in joint view, time or light interpolation. The key idea to make this workable is a NN that already knows the "basic tricks" of graphics (lighting, 3D projection, occlusion) in a hard-coded and differentiable form. The NN represents the input to that rendering as an implicit map, that for any view, time, or light coordinate and for any pixel can quantify how it will move if view, time or light coordinates change (Jacobian of pixel position with respect to view, time, illumination, etc.). Our X-Field representation is trained for one scene within minutes, leading to a compact set of trainable parameters and hence real-time navigation in view, time and illumination. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR
Bernard, F., Suri, Z.K., and Theobalt, C. 2020b. MINA: Convex Mixed-Integer Programming for Non-Rigid Shape Alignment. https://arxiv.org/abs/2002.12623.
(arXiv: 2002.12623)
Abstract
We present a convex mixed-integer programming formulation for non-rigid shape matching. To this end, we propose a novel shape deformation model based on an efficient low-dimensional discrete model, so that finding a globally optimal solution is tractable in (most) practical cases. Our approach combines several favourable properties: it is independent of the initialisation, it is much more efficient to solve to global optimality compared to analogous quadratic assignment problem formulations, and it is highly flexible in terms of the variants of matching problems it can handle. Experimentally we demonstrate that our approach outperforms existing methods for sparse shape matching, that it can be used for initialising dense shape matching methods, and we showcase its flexibility on several examples.
Export
BibTeX
@online{Bernard_arXiv2002.12623, TITLE = {MINA: {C}onvex Mixed-Integer Programming for Non-Rigid Shape Alignment}, AUTHOR = {Bernard, Florian and Suri, Zeeshan Khan and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2002.12623}, EPRINT = {2002.12623}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present a convex mixed-integer programming formulation for non-rigid shape matching. To this end, we propose a novel shape deformation model based on an efficient low-dimensional discrete model, so that finding a globally optimal solution is tractable in (most) practical cases. Our approach combines several favourable properties: it is independent of the initialisation, it is much more efficient to solve to global optimality compared to analogous quadratic assignment problem formulations, and it is highly flexible in terms of the variants of matching problems it can handle. Experimentally we demonstrate that our approach outperforms existing methods for sparse shape matching, that it can be used for initialising dense shape matching methods, and we showcase its flexibility on several examples.}, }
Endnote
%0 Report %A Bernard, Florian %A Suri, Zeeshan Khan %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T MINA: Convex Mixed-Integer Programming for Non-Rigid Shape Alignment : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E00C-F %U https://arxiv.org/abs/2002.12623 %D 2020 %X We present a convex mixed-integer programming formulation for non-rigid shape matching. To this end, we propose a novel shape deformation model based on an efficient low-dimensional discrete model, so that finding a globally optimal solution is tractable in (most) practical cases. Our approach combines several favourable properties: it is independent of the initialisation, it is much more efficient to solve to global optimality compared to analogous quadratic assignment problem formulations, and it is highly flexible in terms of the variants of matching problems it can handle. Experimentally we demonstrate that our approach outperforms existing methods for sparse shape matching, that it can be used for initialising dense shape matching methods, and we showcase its flexibility on several examples. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR,Computer Science, Learning, cs.LG,Mathematics, Optimization and Control, math.OC
Bhatnagar, B.L., Sminchisescu, C., Theobalt, C., and Pons-Moll, G. 2020c. Combining Implicit Function Learning and Parametric Models for 3D Human Reconstruction. https://arxiv.org/abs/2007.11432.
(arXiv: 2007.11432)
Abstract
Implicit functions represented as deep learning approximations are powerful for reconstructing 3D surfaces. However, they can only produce static surfaces that are not controllable, which provides limited ability to modify the resulting model by editing its pose or shape parameters. Nevertheless, such features are essential in building flexible models for both computer graphics and computer vision. In this work, we present methodology that combines detail-rich implicit functions and parametric representations in order to reconstruct 3D models of people that remain controllable and accurate even in the presence of clothing. Given sparse 3D point clouds sampled on the surface of a dressed person, we use an Implicit Part Network (IP-Net)to jointly predict the outer 3D surface of the dressed person, the and inner body surface, and the semantic correspondences to a parametric body model. We subsequently use correspondences to fit the body model to our inner surface and then non-rigidly deform it (under a parametric body + displacement model) to the outer surface in order to capture garment, face and hair detail. In quantitative and qualitative experiments with both full body data and hand scans we show that the proposed methodology generalizes, and is effective even given incomplete point clouds collected from single-view depth images. Our models and code can be downloaded from http://virtualhumans.mpi-inf.mpg.de/ipnet.
Export
BibTeX
@online{Bhatnagar_2007.11432, TITLE = {Combining Implicit Function Learning and Parametric Models for {3D} Human Reconstruction}, AUTHOR = {Bhatnagar, Bharat Lal and Sminchisescu, Cristian and Theobalt, Christian and Pons-Moll, Gerard}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2007.11432}, EPRINT = {2007.11432}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Implicit functions represented as deep learning approximations are powerful for reconstructing 3D surfaces. However, they can only produce static surfaces that are not controllable, which provides limited ability to modify the resulting model by editing its pose or shape parameters. Nevertheless, such features are essential in building flexible models for both computer graphics and computer vision. In this work, we present methodology that combines detail-rich implicit functions and parametric representations in order to reconstruct 3D models of people that remain controllable and accurate even in the presence of clothing. Given sparse 3D point clouds sampled on the surface of a dressed person, we use an Implicit Part Network (IP-Net)to jointly predict the outer 3D surface of the dressed person, the and inner body surface, and the semantic correspondences to a parametric body model. We subsequently use correspondences to fit the body model to our inner surface and then non-rigidly deform it (under a parametric body + displacement model) to the outer surface in order to capture garment, face and hair detail. In quantitative and qualitative experiments with both full body data and hand scans we show that the proposed methodology generalizes, and is effective even given incomplete point clouds collected from single-view depth images. Our models and code can be downloaded from http://virtualhumans.mpi-inf.mpg.de/ipnet.}, }
Endnote
%0 Report %A Bhatnagar, Bharat Lal %A Sminchisescu, Cristian %A Theobalt, Christian %A Pons-Moll, Gerard %+ Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society %T Combining Implicit Function Learning and Parametric Models for 3D Human Reconstruction : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E8A0-E %U https://arxiv.org/abs/2007.11432 %D 2020 %X Implicit functions represented as deep learning approximations are powerful for reconstructing 3D surfaces. However, they can only produce static surfaces that are not controllable, which provides limited ability to modify the resulting model by editing its pose or shape parameters. Nevertheless, such features are essential in building flexible models for both computer graphics and computer vision. In this work, we present methodology that combines detail-rich implicit functions and parametric representations in order to reconstruct 3D models of people that remain controllable and accurate even in the presence of clothing. Given sparse 3D point clouds sampled on the surface of a dressed person, we use an Implicit Part Network (IP-Net)to jointly predict the outer 3D surface of the dressed person, the and inner body surface, and the semantic correspondences to a parametric body model. We subsequently use correspondences to fit the body model to our inner surface and then non-rigidly deform it (under a parametric body + displacement model) to the outer surface in order to capture garment, face and hair detail. In quantitative and qualitative experiments with both full body data and hand scans we show that the proposed methodology generalizes, and is effective even given incomplete point clouds collected from single-view depth images. Our models and code can be downloaded from http://virtualhumans.mpi-inf.mpg.de/ipnet. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Bhatnagar, B.L., Sminchisescu, C., Theobalt, C., and Pons-Moll, G. 2020d. LoopReg: Self-supervised Learning of Implicit Surface Correspondences, Pose and Shape for 3D Human Mesh Registration. https://arxiv.org/abs/2010.12447.
(arXiv: 2010.12447)
Abstract
We address the problem of fitting 3D human models to 3D scans of dressed humans. Classical methods optimize both the data-to-model correspondences and the human model parameters (pose and shape), but are reliable only when initialized close to the solution. Some methods initialize the optimization based on fully supervised correspondence predictors, which is not differentiable end-to-end, and can only process a single scan at a time. Our main contribution is LoopReg, an end-to-end learning framework to register a corpus of scans to a common 3D human model. The key idea is to create a self-supervised loop. A backward map, parameterized by a Neural Network, predicts the correspondence from every scan point to the surface of the human model. A forward map, parameterized by a human model, transforms the corresponding points back to the scan based on the model parameters (pose and shape), thus closing the loop. Formulating this closed loop is not straightforward because it is not trivial to force the output of the NN to be on the surface of the human model - outside this surface the human model is not even defined. To this end, we propose two key innovations. First, we define the canonical surface implicitly as the zero level set of a distance field in R3, which in contrast to morecommon UV parameterizations, does not require cutting the surface, does not have discontinuities, and does not induce distortion. Second, we diffuse the human model to the 3D domain R3. This allows to map the NN predictions forward,even when they slightly deviate from the zero level set. Results demonstrate that we can train LoopRegmainly self-supervised - following a supervised warm-start, the model becomes increasingly more accurate as additional unlabelled raw scans are processed. Our code and pre-trained models can be downloaded for research.
Export
BibTeX
@online{Bhatnagar_2010.12447, TITLE = {{LoopReg}: {S}elf-supervised Learning of Implicit Surface Correspondences, Pose and Shape for {3D} Human Mesh Registration}, AUTHOR = {Bhatnagar, Bharat Lal and Sminchisescu, Cristian and Theobalt, Christian and Pons-Moll, Gerard}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2010.12447}, EPRINT = {2010.12447}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We address the problem of fitting 3D human models to 3D scans of dressed humans. Classical methods optimize both the data-to-model correspondences and the human model parameters (pose and shape), but are reliable only when initialized close to the solution. Some methods initialize the optimization based on fully supervised correspondence predictors, which is not differentiable end-to-end, and can only process a single scan at a time. Our main contribution is LoopReg, an end-to-end learning framework to register a corpus of scans to a common 3D human model. The key idea is to create a self-supervised loop. A backward map, parameterized by a Neural Network, predicts the correspondence from every scan point to the surface of the human model. A forward map, parameterized by a human model, transforms the corresponding points back to the scan based on the model parameters (pose and shape), thus closing the loop. Formulating this closed loop is not straightforward because it is not trivial to force the output of the NN to be on the surface of the human model -- outside this surface the human model is not even defined. To this end, we propose two key innovations. First, we define the canonical surface implicitly as the zero level set of a distance field in R3, which in contrast to morecommon UV parameterizations, does not require cutting the surface, does not have discontinuities, and does not induce distortion. Second, we diffuse the human model to the 3D domain R3. This allows to map the NN predictions forward,even when they slightly deviate from the zero level set. Results demonstrate that we can train LoopRegmainly self-supervised -- following a supervised warm-start, the model becomes increasingly more accurate as additional unlabelled raw scans are processed. Our code and pre-trained models can be downloaded for research.}, }
Endnote
%0 Report %A Bhatnagar, Bharat Lal %A Sminchisescu, Cristian %A Theobalt, Christian %A Pons-Moll, Gerard %+ Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society %T LoopReg: Self-supervised Learning of Implicit Surface Correspondences, Pose and Shape for 3D Human Mesh Registration : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E91C-4 %U https://arxiv.org/abs/2010.12447 %D 2020 %X We address the problem of fitting 3D human models to 3D scans of dressed humans. Classical methods optimize both the data-to-model correspondences and the human model parameters (pose and shape), but are reliable only when initialized close to the solution. Some methods initialize the optimization based on fully supervised correspondence predictors, which is not differentiable end-to-end, and can only process a single scan at a time. Our main contribution is LoopReg, an end-to-end learning framework to register a corpus of scans to a common 3D human model. The key idea is to create a self-supervised loop. A backward map, parameterized by a Neural Network, predicts the correspondence from every scan point to the surface of the human model. A forward map, parameterized by a human model, transforms the corresponding points back to the scan based on the model parameters (pose and shape), thus closing the loop. Formulating this closed loop is not straightforward because it is not trivial to force the output of the NN to be on the surface of the human model - outside this surface the human model is not even defined. To this end, we propose two key innovations. First, we define the canonical surface implicitly as the zero level set of a distance field in R3, which in contrast to morecommon UV parameterizations, does not require cutting the surface, does not have discontinuities, and does not induce distortion. Second, we diffuse the human model to the 3D domain R3. This allows to map the NN predictions forward,even when they slightly deviate from the zero level set. Results demonstrate that we can train LoopRegmainly self-supervised - following a supervised warm-start, the model becomes increasingly more accurate as additional unlabelled raw scans are processed. Our code and pre-trained models can be downloaded for research. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Chizhov, V., Georgiev, I., Myszkowski, K., and Singh, G. 2020. Perceptual Error Optimization for Monte Carlo Rendering. https://arxiv.org/abs/2012.02344.
(arXiv: 2012.02344)
Abstract
Realistic image synthesis involves computing high-dimensional light transport integrals which in practice are numerically estimated using Monte Carlo integration. The error of this estimation manifests itself in the image as visually displeasing aliasing or noise. To ameliorate this, we develop a theoretical framework for optimizing screen-space error distribution. Our model is flexible and works for arbitrary target error power spectra. We focus on perceptual error optimization by leveraging models of the human visual system's (HVS) point spread function (PSF) from halftoning literature. This results in a specific optimization problem whose solution distributes the error as visually pleasing blue noise in image space. We develop a set of algorithms that provide a trade-off between quality and speed, showing substantial improvements over prior state of the art. We perform evaluations using both quantitative and perceptual error metrics to support our analysis, and provide extensive supplemental material to help evaluate the perceptual improvements achieved by our methods.
Export
BibTeX
@online{Chizhov_arXiv2012.02344, TITLE = {Perceptual Error Optimization for {Monte Carlo} Rendering}, AUTHOR = {Chizhov, Vassillen and Georgiev, Iliyan and Myszkowski, Karol and Singh, Gurprit}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2012.02344}, EPRINT = {2012.02344}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Realistic image synthesis involves computing high-dimensional light transport integrals which in practice are numerically estimated using Monte Carlo integration. The error of this estimation manifests itself in the image as visually displeasing aliasing or noise. To ameliorate this, we develop a theoretical framework for optimizing screen-space error distribution. Our model is flexible and works for arbitrary target error power spectra. We focus on perceptual error optimization by leveraging models of the human visual system's (HVS) point spread function (PSF) from halftoning literature. This results in a specific optimization problem whose solution distributes the error as visually pleasing blue noise in image space. We develop a set of algorithms that provide a trade-off between quality and speed, showing substantial improvements over prior state of the art. We perform evaluations using both quantitative and perceptual error metrics to support our analysis, and provide extensive supplemental material to help evaluate the perceptual improvements achieved by our methods.}, }
Endnote
%0 Report %A Chizhov, Vassillen %A Georgiev, Iliyan %A Myszkowski, Karol %A Singh, Gurprit %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Perceptual Error Optimization for Monte Carlo Rendering : %G eng %U http://hdl.handle.net/21.11116/0000-0007-CEB7-3 %U https://arxiv.org/abs/2012.02344 %D 2020 %X Realistic image synthesis involves computing high-dimensional light transport integrals which in practice are numerically estimated using Monte Carlo integration. The error of this estimation manifests itself in the image as visually displeasing aliasing or noise. To ameliorate this, we develop a theoretical framework for optimizing screen-space error distribution. Our model is flexible and works for arbitrary target error power spectra. We focus on perceptual error optimization by leveraging models of the human visual system's (HVS) point spread function (PSF) from halftoning literature. This results in a specific optimization problem whose solution distributes the error as visually pleasing blue noise in image space. We develop a set of algorithms that provide a trade-off between quality and speed, showing substantial improvements over prior state of the art. We perform evaluations using both quantitative and perceptual error metrics to support our analysis, and provide extensive supplemental material to help evaluate the perceptual improvements achieved by our methods. %K Computer Science, Graphics, cs.GR
Çoğalan, U., Bemana, M., Myszkowski, K., Seidel, H.-P., and Ritschel, T. 2020. HDR Denoising and Deblurring by Learning Spatio-temporal Distortion Models. https://arxiv.org/abs/2012.12009.
(arXiv: 2012.12009)
Abstract
We seek to reconstruct sharp and noise-free high-dynamic range (HDR) video from a dual-exposure sensor that records different low-dynamic range (LDR) information in different pixel columns: Odd columns provide low-exposure, sharp, but noisy information; even columns complement this with less noisy, high-exposure, but motion-blurred data. Previous LDR work learns to deblur and denoise (DISTORTED->CLEAN) supervised by pairs of CLEAN and DISTORTED images. Regrettably, capturing DISTORTED sensor readings is time-consuming; as well, there is a lack of CLEAN HDR videos. We suggest a method to overcome those two limitations. First, we learn a different function instead: CLEAN->DISTORTED, which generates samples containing correlated pixel noise, and row and column noise, as well as motion blur from a low number of CLEAN sensor readings. Second, as there is not enough CLEAN HDR video available, we devise a method to learn from LDR video in-stead. Our approach compares favorably to several strong baselines, and can boost existing methods when they are re-trained on our data. Combined with spatial and temporal super-resolution, it enables applications such as re-lighting with low noise or blur.
Export
BibTeX
@online{Cogalan_arXiv2012.12009, TITLE = {{HDR} Denoising and Deblurring by Learning Spatio-temporal Distortion Model}, AUTHOR = {{\c C}o{\u g}alan, U{\u g}ur and Bemana, Mojtaba and Myszkowski, Karol and Seidel, Hans-Peter and Ritschel, Tobias}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2012.12009}, EPRINT = {2012.12009}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We seek to reconstruct sharp and noise-free high-dynamic range (HDR) video from a dual-exposure sensor that records different low-dynamic range (LDR) information in different pixel columns: Odd columns provide low-exposure, sharp, but noisy information; even columns complement this with less noisy, high-exposure, but motion-blurred data. Previous LDR work learns to deblur and denoise (DISTORTED->CLEAN) supervised by pairs of CLEAN and DISTORTED images. Regrettably, capturing DISTORTED sensor readings is time-consuming; as well, there is a lack of CLEAN HDR videos. We suggest a method to overcome those two limitations. First, we learn a different function instead: CLEAN->DISTORTED, which generates samples containing correlated pixel noise, and row and column noise, as well as motion blur from a low number of CLEAN sensor readings. Second, as there is not enough CLEAN HDR video available, we devise a method to learn from LDR video in-stead. Our approach compares favorably to several strong baselines, and can boost existing methods when they are re-trained on our data. Combined with spatial and temporal super-resolution, it enables applications such as re-lighting with low noise or blur.}, }
Endnote
%0 Report %A Çoğalan, Uğur %A Bemana, Mojtaba %A Myszkowski, Karol %A Seidel, Hans-Peter %A Ritschel, Tobias %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T HDR Denoising and Deblurring by Learning Spatio-temporal Distortion Models : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B721-5 %U https://arxiv.org/abs/2012.12009 %D 2020 %X We seek to reconstruct sharp and noise-free high-dynamic range (HDR) video from a dual-exposure sensor that records different low-dynamic range (LDR) information in different pixel columns: Odd columns provide low-exposure, sharp, but noisy information; even columns complement this with less noisy, high-exposure, but motion-blurred data. Previous LDR work learns to deblur and denoise (DISTORTED->CLEAN) supervised by pairs of CLEAN and DISTORTED images. Regrettably, capturing DISTORTED sensor readings is time-consuming; as well, there is a lack of CLEAN HDR videos. We suggest a method to overcome those two limitations. First, we learn a different function instead: CLEAN->DISTORTED, which generates samples containing correlated pixel noise, and row and column noise, as well as motion blur from a low number of CLEAN sensor readings. Second, as there is not enough CLEAN HDR video available, we devise a method to learn from LDR video in-stead. Our approach compares favorably to several strong baselines, and can boost existing methods when they are re-trained on our data. Combined with spatial and temporal super-resolution, it enables applications such as re-lighting with low noise or blur. %K eess.IV,Computer Science, Computer Vision and Pattern Recognition, cs.CV
Fox, G., Liu, W., Kim, H., Seidel, H.-P., Elgharib, M., and Theobalt, C. 2020. VideoForensicsHQ: Detecting High-quality Manipulated Face Videos. https://arxiv.org/abs/2005.10360.
(arXiv: 2005.10360)
Abstract
New approaches to synthesize and manipulate face videos at very high quality have paved the way for new applications in computer animation, virtual and augmented reality, or face video analysis. However, there are concerns that they may be used in a malicious way, e.g. to manipulate videos of public figures, politicians or reporters, to spread false information. The research community therefore developed techniques for automated detection of modified imagery, and assembled benchmark datasets showing manipulatons by state-of-the-art techniques. In this paper, we contribute to this initiative in two ways: First, we present a new audio-visual benchmark dataset. It shows some of the highest quality visual manipulations available today. Human observers find them significantly harder to identify as forged than videos from other benchmarks. Furthermore we propose new family of deep-learning-based fake detectors, demonstrating that existing detectors are not well-suited for detecting fakes of a quality as high as presented in our dataset. Our detectors examine spatial and temporal features. This allows them to outperform existing approaches both in terms of high detection accuracy and generalization to unseen fake generation methods and unseen identities.
Export
BibTeX
@online{Fox_2005.10360, TITLE = {{VideoForensicsHQ}: {D}etecting High-quality Manipulated Face Videos}, AUTHOR = {Fox, Gereon and Liu, Wentao and Kim, Hyeongwoo and Seidel, Hans-Peter and Elgharib, Mohamed and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2005.10360}, EPRINT = {2005.10360}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {New approaches to synthesize and manipulate face videos at very high quality have paved the way for new applications in computer animation, virtual and augmented reality, or face video analysis. However, there are concerns that they may be used in a malicious way, e.g. to manipulate videos of public figures, politicians or reporters, to spread false information. The research community therefore developed techniques for automated detection of modified imagery, and assembled benchmark datasets showing manipulatons by state-of-the-art techniques. In this paper, we contribute to this initiative in two ways: First, we present a new audio-visual benchmark dataset. It shows some of the highest quality visual manipulations available today. Human observers find them significantly harder to identify as forged than videos from other benchmarks. Furthermore we propose new family of deep-learning-based fake detectors, demonstrating that existing detectors are not well-suited for detecting fakes of a quality as high as presented in our dataset. Our detectors examine spatial and temporal features. This allows them to outperform existing approaches both in terms of high detection accuracy and generalization to unseen fake generation methods and unseen identities.}, }
Endnote
%0 Report %A Fox, Gereon %A Liu, Wentao %A Kim, Hyeongwoo %A Seidel, Hans-Peter %A Elgharib, Mohamed %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T VideoForensicsHQ: Detecting High-quality Manipulated Face Videos : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B109-7 %U https://arxiv.org/abs/2005.10360 %D 2020 %X New approaches to synthesize and manipulate face videos at very high quality have paved the way for new applications in computer animation, virtual and augmented reality, or face video analysis. However, there are concerns that they may be used in a malicious way, e.g. to manipulate videos of public figures, politicians or reporters, to spread false information. The research community therefore developed techniques for automated detection of modified imagery, and assembled benchmark datasets showing manipulatons by state-of-the-art techniques. In this paper, we contribute to this initiative in two ways: First, we present a new audio-visual benchmark dataset. It shows some of the highest quality visual manipulations available today. Human observers find them significantly harder to identify as forged than videos from other benchmarks. Furthermore we propose new family of deep-learning-based fake detectors, demonstrating that existing detectors are not well-suited for detecting fakes of a quality as high as presented in our dataset. Our detectors examine spatial and temporal features. This allows them to outperform existing approaches both in terms of high detection accuracy and generalization to unseen fake generation methods and unseen identities. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Habermann, M., Xu, W., Zollhöfer, M., Pons-Moll, G., and Theobalt, C. 2020b. DeepCap: Monocular Human Performance Capture Using Weak Supervision. https://arxiv.org/abs/2003.08325.
(arXiv: 2003.08325)
Abstract
Human performance capture is a highly important computer vision problem with many applications in movie production and virtual/augmented reality. Many previous performance capture approaches either required expensive multi-view setups or did not recover dense space-time coherent geometry with frame-to-frame correspondences. We propose a novel deep learning approach for monocular dense human performance capture. Our method is trained in a weakly supervised manner based on multi-view supervision completely removing the need for training data with 3D ground truth annotations. The network architecture is based on two separate networks that disentangle the task into a pose estimation and a non-rigid surface deformation step. Extensive qualitative and quantitative evaluations show that our approach outperforms the state of the art in terms of quality and robustness.
Export
BibTeX
@online{Habermann2003.08325, TITLE = {{DeepCap}: {M}onocular Human Performance Capture Using Weak Supervision}, AUTHOR = {Habermann, Marc and Xu, Weipeng and Zollh{\"o}fer, Michael and Pons-Moll, Gerard and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2003.08325}, EPRINT = {2003.08325}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Human performance capture is a highly important computer vision problem with many applications in movie production and virtual/augmented reality. Many previous performance capture approaches either required expensive multi-view setups or did not recover dense space-time coherent geometry with frame-to-frame correspondences. We propose a novel deep learning approach for monocular dense human performance capture. Our method is trained in a weakly supervised manner based on multi-view supervision completely removing the need for training data with 3D ground truth annotations. The network architecture is based on two separate networks that disentangle the task into a pose estimation and a non-rigid surface deformation step. Extensive qualitative and quantitative evaluations show that our approach outperforms the state of the art in terms of quality and robustness.}, }
Endnote
%0 Report %A Habermann, Marc %A Xu, Weipeng %A Zollhöfer, Michael %A Pons-Moll, Gerard %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T DeepCap: Monocular Human Performance Capture Using Weak Supervision : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E010-9 %U https://arxiv.org/abs/2003.08325 %D 2020 %X Human performance capture is a highly important computer vision problem with many applications in movie production and virtual/augmented reality. Many previous performance capture approaches either required expensive multi-view setups or did not recover dense space-time coherent geometry with frame-to-frame correspondences. We propose a novel deep learning approach for monocular dense human performance capture. Our method is trained in a weakly supervised manner based on multi-view supervision completely removing the need for training data with 3D ground truth annotations. The network architecture is based on two separate networks that disentangle the task into a pose estimation and a non-rigid surface deformation step. Extensive qualitative and quantitative evaluations show that our approach outperforms the state of the art in terms of quality and robustness. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Kappel, M., Golyanik, V., Elgharib, M., et al. 2020. High-Fidelity Neural Human Motion Transfer from Monocular Video. https://arxiv.org/abs/2012.10974.
(arXiv: 2012.10974)
Abstract
Video-based human motion transfer creates video animations of humans following a source motion. Current methods show remarkable results for tightly-clad subjects. However, the lack of temporally consistent handling of plausible clothing dynamics, including fine and high-frequency details, significantly limits the attainable visual quality. We address these limitations for the first time in the literature and present a new framework which performs high-fidelity and temporally-consistent human motion transfer with natural pose-dependent non-rigid deformations, for several types of loose garments. In contrast to the previous techniques, we perform image generation in three subsequent stages, synthesizing human shape, structure, and appearance. Given a monocular RGB video of an actor, we train a stack of recurrent deep neural networks that generate these intermediate representations from 2D poses and their temporal derivatives. Splitting the difficult motion transfer problem into subtasks that are aware of the temporal motion context helps us to synthesize results with plausible dynamics and pose-dependent detail. It also allows artistic control of results by manipulation of individual framework stages. In the experimental results, we significantly outperform the state-of-the-art in terms of video realism. Our code and data will be made publicly available.
Export
BibTeX
@online{Kappel_arXiv2012.10974, TITLE = {High-Fidelity Neural Human Motion Transfer from Monocular Video}, AUTHOR = {Kappel, Moritz and Golyanik, Vladislav and Elgharib, Mohamed and Henningson, Jann-Ole and Seidel, Hans-Peter and Castillo, Susana and Theobalt, Christian and Magnor, Marcus A.}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2012.10974}, EPRINT = {2012.10974}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Video-based human motion transfer creates video animations of humans following a source motion. Current methods show remarkable results for tightly-clad subjects. However, the lack of temporally consistent handling of plausible clothing dynamics, including fine and high-frequency details, significantly limits the attainable visual quality. We address these limitations for the first time in the literature and present a new framework which performs high-fidelity and temporally-consistent human motion transfer with natural pose-dependent non-rigid deformations, for several types of loose garments. In contrast to the previous techniques, we perform image generation in three subsequent stages, synthesizing human shape, structure, and appearance. Given a monocular RGB video of an actor, we train a stack of recurrent deep neural networks that generate these intermediate representations from 2D poses and their temporal derivatives. Splitting the difficult motion transfer problem into subtasks that are aware of the temporal motion context helps us to synthesize results with plausible dynamics and pose-dependent detail. It also allows artistic control of results by manipulation of individual framework stages. In the experimental results, we significantly outperform the state-of-the-art in terms of video realism. Our code and data will be made publicly available.}, }
Endnote
%0 Report %A Kappel, Moritz %A Golyanik, Vladislav %A Elgharib, Mohamed %A Henningson, Jann-Ole %A Seidel, Hans-Peter %A Castillo, Susana %A Theobalt, Christian %A Magnor, Marcus A. %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T High-Fidelity Neural Human Motion Transfer from Monocular Video : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B715-3 %U https://arxiv.org/abs/2012.10974 %D 2020 %X Video-based human motion transfer creates video animations of humans following a source motion. Current methods show remarkable results for tightly-clad subjects. However, the lack of temporally consistent handling of plausible clothing dynamics, including fine and high-frequency details, significantly limits the attainable visual quality. We address these limitations for the first time in the literature and present a new framework which performs high-fidelity and temporally-consistent human motion transfer with natural pose-dependent non-rigid deformations, for several types of loose garments. In contrast to the previous techniques, we perform image generation in three subsequent stages, synthesizing human shape, structure, and appearance. Given a monocular RGB video of an actor, we train a stack of recurrent deep neural networks that generate these intermediate representations from 2D poses and their temporal derivatives. Splitting the difficult motion transfer problem into subtasks that are aware of the temporal motion context helps us to synthesize results with plausible dynamics and pose-dependent detail. It also allows artistic control of results by manipulation of individual framework stages. In the experimental results, we significantly outperform the state-of-the-art in terms of video realism. Our code and data will be made publicly available. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR,Computer Science, Learning, cs.LG
Liu, L., Gu, J., Lin, K.Z., Chua, T.-S., and Theobalt, C. 2020c. Neural Sparse Voxel Fields. https://arxiv.org/abs/2007.11571.
(arXiv: 2007.11571)
Abstract
Photo-realistic free-viewpoint rendering of real-world scenes using classical computer graphics techniques is challenging, because it requires the difficult step of capturing detailed appearance and geometry models. Recent studies have demonstrated promising results by learning scene representations that implicitly encode both geometry and appearance without 3D supervision. However, existing approaches in practice often show blurry renderings caused by the limited network capacity or the difficulty in finding accurate intersections of camera rays with the scene geometry. Synthesizing high-resolution imagery from these representations often requires time-consuming optical ray marching. In this work, we introduce Neural Sparse Voxel Fields (NSVF), a new neural scene representation for fast and high-quality free-viewpoint rendering. NSVF defines a set of voxel-bounded implicit fields organized in a sparse voxel octree to model local properties in each cell. We progressively learn the underlying voxel structures with a differentiable ray-marching operation from only a set of posed RGB images. With the sparse voxel octree structure, rendering novel views can be accelerated by skipping the voxels containing no relevant scene content. Our method is typically over 10 times faster than the state-of-the-art (namely, NeRF(Mildenhall et al., 2020)) at inference time while achieving higher quality results. Furthermore, by utilizing an explicit sparse voxel representation, our method can easily be applied to scene editing and scene composition. We also demonstrate several challenging tasks, including multi-scene learning, free-viewpoint rendering of a moving human, and large-scale scene rendering. Code and data are available at our website: https://github.com/facebookresearch/NSVF.
Export
BibTeX
@online{Liu_2007.11571, TITLE = {Neural Sparse Voxel Fields}, AUTHOR = {Liu, Lingjie and Gu, Jiatao and Lin, Kyaw Zaw and Chua, Tat-Seng and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2007.11571}, EPRINT = {2007.11571}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Photo-realistic free-viewpoint rendering of real-world scenes using classical computer graphics techniques is challenging, because it requires the difficult step of capturing detailed appearance and geometry models. Recent studies have demonstrated promising results by learning scene representations that implicitly encode both geometry and appearance without 3D supervision. However, existing approaches in practice often show blurry renderings caused by the limited network capacity or the difficulty in finding accurate intersections of camera rays with the scene geometry. Synthesizing high-resolution imagery from these representations often requires time-consuming optical ray marching. In this work, we introduce Neural Sparse Voxel Fields (NSVF), a new neural scene representation for fast and high-quality free-viewpoint rendering. NSVF defines a set of voxel-bounded implicit fields organized in a sparse voxel octree to model local properties in each cell. We progressively learn the underlying voxel structures with a differentiable ray-marching operation from only a set of posed RGB images. With the sparse voxel octree structure, rendering novel views can be accelerated by skipping the voxels containing no relevant scene content. Our method is typically over 10 times faster than the state-of-the-art (namely, NeRF(Mildenhall et al., 2020)) at inference time while achieving higher quality results. Furthermore, by utilizing an explicit sparse voxel representation, our method can easily be applied to scene editing and scene composition. We also demonstrate several challenging tasks, including multi-scene learning, free-viewpoint rendering of a moving human, and large-scale scene rendering. Code and data are available at our website: https://github.com/facebookresearch/NSVF.}, }
Endnote
%0 Report %A Liu, Lingjie %A Gu, Jiatao %A Lin, Kyaw Zaw %A Chua, Tat-Seng %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Neural Sparse Voxel Fields : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E8B2-A %U https://arxiv.org/abs/2007.11571 %D 2020 %X Photo-realistic free-viewpoint rendering of real-world scenes using classical computer graphics techniques is challenging, because it requires the difficult step of capturing detailed appearance and geometry models. Recent studies have demonstrated promising results by learning scene representations that implicitly encode both geometry and appearance without 3D supervision. However, existing approaches in practice often show blurry renderings caused by the limited network capacity or the difficulty in finding accurate intersections of camera rays with the scene geometry. Synthesizing high-resolution imagery from these representations often requires time-consuming optical ray marching. In this work, we introduce Neural Sparse Voxel Fields (NSVF), a new neural scene representation for fast and high-quality free-viewpoint rendering. NSVF defines a set of voxel-bounded implicit fields organized in a sparse voxel octree to model local properties in each cell. We progressively learn the underlying voxel structures with a differentiable ray-marching operation from only a set of posed RGB images. With the sparse voxel octree structure, rendering novel views can be accelerated by skipping the voxels containing no relevant scene content. Our method is typically over 10 times faster than the state-of-the-art (namely, NeRF(Mildenhall et al., 2020)) at inference time while achieving higher quality results. Furthermore, by utilizing an explicit sparse voxel representation, our method can easily be applied to scene editing and scene composition. We also demonstrate several challenging tasks, including multi-scene learning, free-viewpoint rendering of a moving human, and large-scale scene rendering. Code and data are available at our website: https://github.com/facebookresearch/NSVF. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR,Computer Science, Learning, cs.LG
Li, Y., Habermann, M., Thomaszewski,, B., Coros, S., Beeler, T., and Theobalt, C. 2020. Deep Physics-aware Inference of Cloth Deformation for Monocular Human Performance Capture. https://arxiv.org/abs/2011.12866.
(arXiv: 2011.12866)
Abstract
Recent monocular human performance capture approaches have shown compelling dense tracking results of the full body from a single RGB camera. However, existing methods either do not estimate clothing at all or model cloth deformation with simple geometric priors instead of taking into account the underlying physical principles. This leads to noticeable artifacts in their reconstructions, such as baked-in wrinkles, implausible deformations that seemingly defy gravity, and intersections between cloth and body. To address these problems, we propose a person-specific, learning-based method that integrates a finite element-based simulation layer into the training process to provide for the first time physics supervision in the context of weakly-supervised deep monocular human performance capture. We show how integrating physics into the training process improves the learned cloth deformations, allows modeling clothing as a separate piece of geometry, and largely reduces cloth-body intersections. Relying only on weak 2D multi-view supervision during training, our approach leads to a significant improvement over current state-of-the-art methods and is thus a clear step towards realistic monocular capture of the entire deforming surface of a clothed human.
Export
BibTeX
@online{Li_2011.12866, TITLE = {Deep Physics-aware Inference of Cloth Deformation for Monocular Human Performance Capture}, AUTHOR = {Li, Yue and Habermann, Marc and Thomaszewski,, Bernhard and Coros, Stelian and Beeler, Thabo and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2011.12866}, EPRINT = {2011.12866}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Recent monocular human performance capture approaches have shown compelling dense tracking results of the full body from a single RGB camera. However, existing methods either do not estimate clothing at all or model cloth deformation with simple geometric priors instead of taking into account the underlying physical principles. This leads to noticeable artifacts in their reconstructions, such as baked-in wrinkles, implausible deformations that seemingly defy gravity, and intersections between cloth and body. To address these problems, we propose a person-specific, learning-based method that integrates a finite element-based simulation layer into the training process to provide for the first time physics supervision in the context of weakly-supervised deep monocular human performance capture. We show how integrating physics into the training process improves the learned cloth deformations, allows modeling clothing as a separate piece of geometry, and largely reduces cloth-body intersections. Relying only on weak 2D multi-view supervision during training, our approach leads to a significant improvement over current state-of-the-art methods and is thus a clear step towards realistic monocular capture of the entire deforming surface of a clothed human.}, }
Endnote
%0 Report %A Li, Yue %A Habermann, Marc %A Thomaszewski,, Bernhard %A Coros, Stelian %A Beeler, Thabo %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Deep Physics-aware Inference of Cloth Deformation for Monocular Human Performance Capture : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E9D0-7 %U https://arxiv.org/abs/2011.12866 %D 2020 %X Recent monocular human performance capture approaches have shown compelling dense tracking results of the full body from a single RGB camera. However, existing methods either do not estimate clothing at all or model cloth deformation with simple geometric priors instead of taking into account the underlying physical principles. This leads to noticeable artifacts in their reconstructions, such as baked-in wrinkles, implausible deformations that seemingly defy gravity, and intersections between cloth and body. To address these problems, we propose a person-specific, learning-based method that integrates a finite element-based simulation layer into the training process to provide for the first time physics supervision in the context of weakly-supervised deep monocular human performance capture. We show how integrating physics into the training process improves the learned cloth deformations, allows modeling clothing as a separate piece of geometry, and largely reduces cloth-body intersections. Relying only on weak 2D multi-view supervision during training, our approach leads to a significant improvement over current state-of-the-art methods and is thus a clear step towards realistic monocular capture of the entire deforming surface of a clothed human. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Long, X., Liu, L., Theobalt, C., and Wang, W. 2020b. Occlusion-Aware Depth Estimation with Adaptive Normal Constraints. ECCV 2020. Lecture Notes in Computer Science, vol 12354. Springer, Cham. https://arxiv.org/abs/2004.00845.
(arXiv: 2004.00845)
Abstract
We present a new learning-based method for multi-frame depth estimation from a color video, which is a fundamental problem in scene understanding, robot navigation or handheld 3D reconstruction. While recent learning-based methods estimate depth at high accuracy, 3D point clouds exported from their depth maps often fail to preserve important geometric feature (e.g., corners, edges, planes) of man-made scenes. Widely-used pixel-wise depth errors do not specifically penalize inconsistency on these features. These inaccuracies are particularly severe when subsequent depth reconstructions are accumulated in an attempt to scan a full environment with man-made objects with this kind of features. Our depth estimation algorithm therefore introduces a Combined Normal Map (CNM) constraint, which is designed to better preserve high-curvature features and global planar regions. In order to further improve the depth estimation accuracy, we introduce a new occlusion-aware strategy that aggregates initial depth predictions from multiple adjacent views into one final depth map and one occlusion probability map for the current reference view. Our method outperforms the state-of-the-art in terms of depth estimation accuracy, and preserves essential geometric features of man-made indoor scenes much better than other algorithms.
Export
BibTeX
@online{Long2004.00845, TITLE = {Occlusion-Aware Depth Estimation with Adaptive Normal Constraints}, AUTHOR = {Long, Xiaoxiao and Liu, Lingjie and Theobalt, Christian and Wang, Wenping}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2004.00845}, EPRINT = {2004.00845}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present a new learning-based method for multi-frame depth estimation from a color video, which is a fundamental problem in scene understanding, robot navigation or handheld 3D reconstruction. While recent learning-based methods estimate depth at high accuracy, 3D point clouds exported from their depth maps often fail to preserve important geometric feature (e.g., corners, edges, planes) of man-made scenes. Widely-used pixel-wise depth errors do not specifically penalize inconsistency on these features. These inaccuracies are particularly severe when subsequent depth reconstructions are accumulated in an attempt to scan a full environment with man-made objects with this kind of features. Our depth estimation algorithm therefore introduces a Combined Normal Map (CNM) constraint, which is designed to better preserve high-curvature features and global planar regions. In order to further improve the depth estimation accuracy, we introduce a new occlusion-aware strategy that aggregates initial depth predictions from multiple adjacent views into one final depth map and one occlusion probability map for the current reference view. Our method outperforms the state-of-the-art in terms of depth estimation accuracy, and preserves essential geometric features of man-made indoor scenes much better than other algorithms.}, JOURNAL = {ECCV 2020. Lecture Notes in Computer Science, vol 12354. Springer, Cham}, }
Endnote
%0 Report %A Long, Xiaoxiao %A Liu, Lingjie %A Theobalt, Christian %A Wang, Wenping %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Occlusion-Aware Depth Estimation with Adaptive Normal Constraints : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E0E9-5 %U https://arxiv.org/abs/2004.00845 %D 2020 %X We present a new learning-based method for multi-frame depth estimation from a color video, which is a fundamental problem in scene understanding, robot navigation or handheld 3D reconstruction. While recent learning-based methods estimate depth at high accuracy, 3D point clouds exported from their depth maps often fail to preserve important geometric feature (e.g., corners, edges, planes) of man-made scenes. Widely-used pixel-wise depth errors do not specifically penalize inconsistency on these features. These inaccuracies are particularly severe when subsequent depth reconstructions are accumulated in an attempt to scan a full environment with man-made objects with this kind of features. Our depth estimation algorithm therefore introduces a Combined Normal Map (CNM) constraint, which is designed to better preserve high-curvature features and global planar regions. In order to further improve the depth estimation accuracy, we introduce a new occlusion-aware strategy that aggregates initial depth predictions from multiple adjacent views into one final depth map and one occlusion probability map for the current reference view. Our method outperforms the state-of-the-art in terms of depth estimation accuracy, and preserves essential geometric features of man-made indoor scenes much better than other algorithms. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV %J ECCV 2020. Lecture Notes in Computer Science, vol 12354. Springer, Cham
Long, X., Liu, L., Li, W., Theobalt, C., and Wang, W. 2020c. Multi-view Depth Estimation using Epipolar Spatio-Temporal Networks. https://arxiv.org/abs/2011.13118.
(arXiv: 2011.13118)
Abstract
We present a novel method for multi-view depth estimation from a single video, which is a critical task in various applications, such as perception, reconstruction and robot navigation. Although previous learning-based methods have demonstrated compelling results, most works estimate depth maps of individual video frames independently, without taking into consideration the strong geometric and temporal coherence among the frames. Moreover, current state-of-the-art (SOTA) models mostly adopt a fully 3D convolution network for cost regularization and therefore require high computational cost, thus limiting their deployment in real-world applications. Our method achieves temporally coherent depth estimation results by using a novel Epipolar Spatio-Temporal (EST) transformer to explicitly associate geometric and temporal correlation with multiple estimated depth maps. Furthermore, to reduce the computational cost, inspired by recent Mixture-of-Experts models, we design a compact hybrid network consisting of a 2D context-aware network and a 3D matching network which learn 2D context information and 3D disparity cues separately. Extensive experiments demonstrate that our method achieves higher accuracy in depth estimation and significant speedup than the SOTA methods.
Export
BibTeX
@online{Long_2011.13118, TITLE = {Multi-view Depth Estimation using Epipolar Spatio-Temporal Networks}, AUTHOR = {Long, Xiaoxiao and Liu, Lingjie and Li, Wei and Theobalt, Christian and Wang, Wenping}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2011.13118}, EPRINT = {2011.13118}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present a novel method for multi-view depth estimation from a single video, which is a critical task in various applications, such as perception, reconstruction and robot navigation. Although previous learning-based methods have demonstrated compelling results, most works estimate depth maps of individual video frames independently, without taking into consideration the strong geometric and temporal coherence among the frames. Moreover, current state-of-the-art (SOTA) models mostly adopt a fully 3D convolution network for cost regularization and therefore require high computational cost, thus limiting their deployment in real-world applications. Our method achieves temporally coherent depth estimation results by using a novel Epipolar Spatio-Temporal (EST) transformer to explicitly associate geometric and temporal correlation with multiple estimated depth maps. Furthermore, to reduce the computational cost, inspired by recent Mixture-of-Experts models, we design a compact hybrid network consisting of a 2D context-aware network and a 3D matching network which learn 2D context information and 3D disparity cues separately. Extensive experiments demonstrate that our method achieves higher accuracy in depth estimation and significant speedup than the SOTA methods.}, }
Endnote
%0 Report %A Long, Xiaoxiao %A Liu, Lingjie %A Li, Wei %A Theobalt, Christian %A Wang, Wenping %+ External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Multi-view Depth Estimation using Epipolar Spatio-Temporal Networks : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E9EA-B %U https://arxiv.org/abs/2011.13118 %D 2020 %X We present a novel method for multi-view depth estimation from a single video, which is a critical task in various applications, such as perception, reconstruction and robot navigation. Although previous learning-based methods have demonstrated compelling results, most works estimate depth maps of individual video frames independently, without taking into consideration the strong geometric and temporal coherence among the frames. Moreover, current state-of-the-art (SOTA) models mostly adopt a fully 3D convolution network for cost regularization and therefore require high computational cost, thus limiting their deployment in real-world applications. Our method achieves temporally coherent depth estimation results by using a novel Epipolar Spatio-Temporal (EST) transformer to explicitly associate geometric and temporal correlation with multiple estimated depth maps. Furthermore, to reduce the computational cost, inspired by recent Mixture-of-Experts models, we design a compact hybrid network consisting of a 2D context-aware network and a 3D matching network which learn 2D context information and 3D disparity cues separately. Extensive experiments demonstrate that our method achieves higher accuracy in depth estimation and significant speedup than the SOTA methods. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Malik, J., Abdelaziz, I., Elhayek, A., et al. 2020b. HandVoxNet: Deep Voxel-Based Network for 3D Hand Shape and Pose Estimation from a Single Depth Map. https://arxiv.org/abs/2004.01588.
(arXiv: 2004.01588)
Abstract
3D hand shape and pose estimation from a single depth map is a new and challenging computer vision problem with many applications. The state-of-the-art methods directly regress 3D hand meshes from 2D depth images via 2D convolutional neural networks, which leads to artefacts in the estimations due to perspective distortions in the images. In contrast, we propose a novel architecture with 3D convolutions trained in a weakly-supervised manner. The input to our method is a 3D voxelized depth map, and we rely on two hand shape representations. The first one is the 3D voxelized grid of the shape which is accurate but does not preserve the mesh topology and the number of mesh vertices. The second representation is the 3D hand surface which is less accurate but does not suffer from the limitations of the first representation. We combine the advantages of these two representations by registering the hand surface to the voxelized hand shape. In the extensive experiments, the proposed approach improves over the state of the art by 47.8% on the SynHand5M dataset. Moreover, our augmentation policy for voxelized depth maps further enhances the accuracy of 3D hand pose estimation on real data. Our method produces visually more reasonable and realistic hand shapes on NYU and BigHand2.2M datasets compared to the existing approaches.
Export
BibTeX
@online{Malik2004.01588, TITLE = {{HandVoxNet}: {D}eep Voxel-Based Network for {3D} Hand Shape and Pose Estimation from a Single Depth Map}, AUTHOR = {Malik, Jameel and Abdelaziz, Ibrahim and Elhayek, Ahmed and Shimada, Soshi and Ali, Sk Aziz and Golyanik, Vladislav and Theobalt, Christian and Stricker, Didier}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2004.01588}, EPRINT = {2004.01588}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {3D hand shape and pose estimation from a single depth map is a new and challenging computer vision problem with many applications. The state-of-the-art methods directly regress 3D hand meshes from 2D depth images via 2D convolutional neural networks, which leads to artefacts in the estimations due to perspective distortions in the images. In contrast, we propose a novel architecture with 3D convolutions trained in a weakly-supervised manner. The input to our method is a 3D voxelized depth map, and we rely on two hand shape representations. The first one is the 3D voxelized grid of the shape which is accurate but does not preserve the mesh topology and the number of mesh vertices. The second representation is the 3D hand surface which is less accurate but does not suffer from the limitations of the first representation. We combine the advantages of these two representations by registering the hand surface to the voxelized hand shape. In the extensive experiments, the proposed approach improves over the state of the art by 47.8% on the SynHand5M dataset. Moreover, our augmentation policy for voxelized depth maps further enhances the accuracy of 3D hand pose estimation on real data. Our method produces visually more reasonable and realistic hand shapes on NYU and BigHand2.2M datasets compared to the existing approaches.}, }
Endnote
%0 Report %A Malik, Jameel %A Abdelaziz, Ibrahim %A Elhayek, Ahmed %A Shimada, Soshi %A Ali, Sk Aziz %A Golyanik, Vladislav %A Theobalt, Christian %A Stricker, Didier %+ External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T HandVoxNet: Deep Voxel-Based Network for 3D Hand Shape and Pose Estimation from a Single Depth Map : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E0FF-D %U https://arxiv.org/abs/2004.01588 %D 2020 %X 3D hand shape and pose estimation from a single depth map is a new and challenging computer vision problem with many applications. The state-of-the-art methods directly regress 3D hand meshes from 2D depth images via 2D convolutional neural networks, which leads to artefacts in the estimations due to perspective distortions in the images. In contrast, we propose a novel architecture with 3D convolutions trained in a weakly-supervised manner. The input to our method is a 3D voxelized depth map, and we rely on two hand shape representations. The first one is the 3D voxelized grid of the shape which is accurate but does not preserve the mesh topology and the number of mesh vertices. The second representation is the 3D hand surface which is less accurate but does not suffer from the limitations of the first representation. We combine the advantages of these two representations by registering the hand surface to the voxelized hand shape. In the extensive experiments, the proposed approach improves over the state of the art by 47.8% on the SynHand5M dataset. Moreover, our augmentation policy for voxelized depth maps further enhances the accuracy of 3D hand pose estimation on real data. Our method produces visually more reasonable and realistic hand shapes on NYU and BigHand2.2M datasets compared to the existing approaches. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Mallikarjun B R, Tewari, A., Oh, T.-H., et al. 2020a. Monocular Reconstruction of Neural Face Reflectance Fields. https://arxiv.org/abs/2008.10247.
(arXiv: 2008.10247)
Abstract
The reflectance field of a face describes the reflectance properties responsible for complex lighting effects including diffuse, specular, inter-reflection and self shadowing. Most existing methods for estimating the face reflectance from a monocular image assume faces to be diffuse with very few approaches adding a specular component. This still leaves out important perceptual aspects of reflectance as higher-order global illumination effects and self-shadowing are not modeled. We present a new neural representation for face reflectance where we can estimate all components of the reflectance responsible for the final appearance from a single monocular image. Instead of modeling each component of the reflectance separately using parametric models, our neural representation allows us to generate a basis set of faces in a geometric deformation-invariant space, parameterized by the input light direction, viewpoint and face geometry. We learn to reconstruct this reflectance field of a face just from a monocular image, which can be used to render the face from any viewpoint in any light condition. Our method is trained on a light-stage training dataset, which captures 300 people illuminated with 150 light conditions from 8 viewpoints. We show that our method outperforms existing monocular reflectance reconstruction methods, in terms of photorealism due to better capturing of physical premitives, such as sub-surface scattering, specularities, self-shadows and other higher-order effects.
Export
BibTeX
@online{Mallikarjun_2008.10247, TITLE = {Monocular Reconstruction of Neural Face Reflectance Fields}, AUTHOR = {Mallikarjun B R, and Tewari, Ayush and Oh, Tae-Hyun and Weyrich, Tim and Bickel, Bernd and Seidel, Hans-Peter and Pfister, Hanspeter and Matusik, Wojciech and Elgharib, Mohamed and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2008.10247}, EPRINT = {2008.10247}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {The reflectance field of a face describes the reflectance properties responsible for complex lighting effects including diffuse, specular, inter-reflection and self shadowing. Most existing methods for estimating the face reflectance from a monocular image assume faces to be diffuse with very few approaches adding a specular component. This still leaves out important perceptual aspects of reflectance as higher-order global illumination effects and self-shadowing are not modeled. We present a new neural representation for face reflectance where we can estimate all components of the reflectance responsible for the final appearance from a single monocular image. Instead of modeling each component of the reflectance separately using parametric models, our neural representation allows us to generate a basis set of faces in a geometric deformation-invariant space, parameterized by the input light direction, viewpoint and face geometry. We learn to reconstruct this reflectance field of a face just from a monocular image, which can be used to render the face from any viewpoint in any light condition. Our method is trained on a light-stage training dataset, which captures 300 people illuminated with 150 light conditions from 8 viewpoints. We show that our method outperforms existing monocular reflectance reconstruction methods, in terms of photorealism due to better capturing of physical premitives, such as sub-surface scattering, specularities, self-shadows and other higher-order effects.}, }
Endnote
%0 Report %A Mallikarjun B R, %A Tewari, Ayush %A Oh, Tae-Hyun %A Weyrich, Tim %A Bickel, Bernd %A Seidel, Hans-Peter %A Pfister, Hanspeter %A Matusik, Wojciech %A Elgharib, Mohamed %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Monocular Reconstruction of Neural Face Reflectance Fields : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B110-E %U https://arxiv.org/abs/2008.10247 %D 2020 %X The reflectance field of a face describes the reflectance properties responsible for complex lighting effects including diffuse, specular, inter-reflection and self shadowing. Most existing methods for estimating the face reflectance from a monocular image assume faces to be diffuse with very few approaches adding a specular component. This still leaves out important perceptual aspects of reflectance as higher-order global illumination effects and self-shadowing are not modeled. We present a new neural representation for face reflectance where we can estimate all components of the reflectance responsible for the final appearance from a single monocular image. Instead of modeling each component of the reflectance separately using parametric models, our neural representation allows us to generate a basis set of faces in a geometric deformation-invariant space, parameterized by the input light direction, viewpoint and face geometry. We learn to reconstruct this reflectance field of a face just from a monocular image, which can be used to render the face from any viewpoint in any light condition. Our method is trained on a light-stage training dataset, which captures 300 people illuminated with 150 light conditions from 8 viewpoints. We show that our method outperforms existing monocular reflectance reconstruction methods, in terms of photorealism due to better capturing of physical premitives, such as sub-surface scattering, specularities, self-shadows and other higher-order effects. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR,Computer Science, Learning, cs.LG
Mallikarjun B R, Tewari, A., Seidel, H.-P., Elgharib, M., and Theobalt, C. 2020b. Learning Complete 3D Morphable Face Models from Images and Videos. https://arxiv.org/abs/2010.01679.
(arXiv: 2010.01679)
Abstract
Most 3D face reconstruction methods rely on 3D morphable models, which disentangle the space of facial deformations into identity geometry, expressions and skin reflectance. These models are typically learned from a limited number of 3D scans and thus do not generalize well across different identities and expressions. We present the first approach to learn complete 3D models of face identity geometry, albedo and expression just from images and videos. The virtually endless collection of such data, in combination with our self-supervised learning-based approach allows for learning face models that generalize beyond the span of existing approaches. Our network design and loss functions ensure a disentangled parameterization of not only identity and albedo, but also, for the first time, an expression basis. Our method also allows for in-the-wild monocular reconstruction at test time. We show that our learned models better generalize and lead to higher quality image-based reconstructions than existing approaches.
Export
BibTeX
@online{Mallikarjun_arXiv2010.01679, TITLE = {Learning Complete {3D} Morphable Face Models from Images and Videos}, AUTHOR = {Mallikarjun B R, and Tewari, Ayush and Seidel, Hans-Peter and Elgharib, Mohamed and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2010.01679}, EPRINT = {2010.01679}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Most 3D face reconstruction methods rely on 3D morphable models, which disentangle the space of facial deformations into identity geometry, expressions and skin reflectance. These models are typically learned from a limited number of 3D scans and thus do not generalize well across different identities and expressions. We present the first approach to learn complete 3D models of face identity geometry, albedo and expression just from images and videos. The virtually endless collection of such data, in combination with our self-supervised learning-based approach allows for learning face models that generalize beyond the span of existing approaches. Our network design and loss functions ensure a disentangled parameterization of not only identity and albedo, but also, for the first time, an expression basis. Our method also allows for in-the-wild monocular reconstruction at test time. We show that our learned models better generalize and lead to higher quality image-based reconstructions than existing approaches.}, }
Endnote
%0 Report %A Mallikarjun B R, %A Tewari, Ayush %A Seidel, Hans-Peter %A Elgharib, Mohamed %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Learning Complete 3D Morphable Face Models from Images and Videos : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B6FB-1 %U https://arxiv.org/abs/2010.01679 %D 2020 %X Most 3D face reconstruction methods rely on 3D morphable models, which disentangle the space of facial deformations into identity geometry, expressions and skin reflectance. These models are typically learned from a limited number of 3D scans and thus do not generalize well across different identities and expressions. We present the first approach to learn complete 3D models of face identity geometry, albedo and expression just from images and videos. The virtually endless collection of such data, in combination with our self-supervised learning-based approach allows for learning face models that generalize beyond the span of existing approaches. Our network design and loss functions ensure a disentangled parameterization of not only identity and albedo, but also, for the first time, an expression basis. Our method also allows for in-the-wild monocular reconstruction at test time. We show that our learned models better generalize and lead to higher quality image-based reconstructions than existing approaches. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Artificial Intelligence, cs.AI,Computer Science, Graphics, cs.GR,Computer Science, Learning, cs.LG,Computer Science, Multimedia, cs.MM
Rao, S., Stutz, D., and Schiele, B. 2020. Adversarial Training against Location-Optimized Adversarial Patches. https://arxiv.org/abs/2005.02313.
(arXiv: 2005.02313)
Abstract
Deep neural networks have been shown to be susceptible to adversarial examples -- small, imperceptible changes constructed to cause mis-classification in otherwise highly accurate image classifiers. As a practical alternative, recent work proposed so-called adversarial patches: clearly visible, but adversarially crafted rectangular patches in images. These patches can easily be printed and applied in the physical world. While defenses against imperceptible adversarial examples have been studied extensively, robustness against adversarial patches is poorly understood. In this work, we first devise a practical approach to obtain adversarial patches while actively optimizing their location within the image. Then, we apply adversarial training on these location-optimized adversarial patches and demonstrate significantly improved robustness on CIFAR10 and GTSRB. Additionally, in contrast to adversarial training on imperceptible adversarial examples, our adversarial patch training does not reduce accuracy.
Export
BibTeX
@online{Rao_arXiv2005.02313, TITLE = {Adversarial Training against Location-Optimized Adversarial Patches}, AUTHOR = {Rao, Sukrut and Stutz, David and Schiele, Bernt}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2005.02313}, EPRINT = {2005.02313}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Deep neural networks have been shown to be susceptible to adversarial examples -- small, imperceptible changes constructed to cause mis-classification in otherwise highly accurate image classifiers. As a practical alternative, recent work proposed so-called adversarial patches: clearly visible, but adversarially crafted rectangular patches in images. These patches can easily be printed and applied in the physical world. While defenses against imperceptible adversarial examples have been studied extensively, robustness against adversarial patches is poorly understood. In this work, we first devise a practical approach to obtain adversarial patches while actively optimizing their location within the image. Then, we apply adversarial training on these location-optimized adversarial patches and demonstrate significantly improved robustness on CIFAR10 and GTSRB. Additionally, in contrast to adversarial training on imperceptible adversarial examples, our adversarial patch training does not reduce accuracy.}, }
Endnote
%0 Report %A Rao, Sukrut %A Stutz, David %A Schiele, Bernt %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society %T Adversarial Training against Location-Optimized Adversarial Patches : %G eng %U http://hdl.handle.net/21.11116/0000-0007-80D0-C %U https://arxiv.org/abs/2005.02313 %D 2020 %X Deep neural networks have been shown to be susceptible to adversarial examples -- small, imperceptible changes constructed to cause mis-classification in otherwise highly accurate image classifiers. As a practical alternative, recent work proposed so-called adversarial patches: clearly visible, but adversarially crafted rectangular patches in images. These patches can easily be printed and applied in the physical world. While defenses against imperceptible adversarial examples have been studied extensively, robustness against adversarial patches is poorly understood. In this work, we first devise a practical approach to obtain adversarial patches while actively optimizing their location within the image. Then, we apply adversarial training on these location-optimized adversarial patches and demonstrate significantly improved robustness on CIFAR10 and GTSRB. Additionally, in contrast to adversarial training on imperceptible adversarial examples, our adversarial patch training does not reduce accuracy. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Cryptography and Security, cs.CR,Computer Science, Learning, cs.LG,Statistics, Machine Learning, stat.ML
Rudnev, V., Golyanik, V., Wang, J., et al. 2020. EventHands: Real-Time Neural 3D Hand Reconstruction from an Event Stream. https://arxiv.org/abs/2012.06475.
(arXiv: 2012.06475)
Abstract
3D hand pose estimation from monocular videos is a long-standing and challenging problem, which is now seeing a strong upturn. In this work, we address it for the first time using a single event camera, i.e., an asynchronous vision sensor reacting on brightness changes. Our EventHands approach has characteristics previously not demonstrated with a single RGB or depth camera such as high temporal resolution at low data throughputs and real-time performance at 1000 Hz. Due to the different data modality of event cameras compared to classical cameras, existing methods cannot be directly applied to and re-trained for event streams. We thus design a new neural approach which accepts a new event stream representation suitable for learning, which is trained on newly-generated synthetic event streams and can generalise to real data. Experiments show that EventHands outperforms recent monocular methods using a colour (or depth) camera in terms of accuracy and its ability to capture hand motions of unprecedented speed. Our method, the event stream simulator and the dataset will be made publicly available.
Export
BibTeX
@online{Rudnev_arXiv2012.06475, TITLE = {{EventHands}: {R}eal-Time Neural {3D} Hand Reconstruction from an Event Stream}, AUTHOR = {Rudnev, Viktor and Golyanik, Vladislav and Wang, Jiayi and Seidel, Hans-Peter and Mueller, Franziska and Elgharib, Mohamed and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2012.06475}, EPRINT = {2012.06475}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {3D hand pose estimation from monocular videos is a long-standing and challenging problem, which is now seeing a strong upturn. In this work, we address it for the first time using a single event camera, i.e., an asynchronous vision sensor reacting on brightness changes. Our EventHands approach has characteristics previously not demonstrated with a single RGB or depth camera such as high temporal resolution at low data throughputs and real-time performance at 1000 Hz. Due to the different data modality of event cameras compared to classical cameras, existing methods cannot be directly applied to and re-trained for event streams. We thus design a new neural approach which accepts a new event stream representation suitable for learning, which is trained on newly-generated synthetic event streams and can generalise to real data. Experiments show that EventHands outperforms recent monocular methods using a colour (or depth) camera in terms of accuracy and its ability to capture hand motions of unprecedented speed. Our method, the event stream simulator and the dataset will be made publicly available.}, }
Endnote
%0 Report %A Rudnev, Viktor %A Golyanik, Vladislav %A Wang, Jiayi %A Seidel, Hans-Peter %A Mueller, Franziska %A Elgharib, Mohamed %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T EventHands: Real-Time Neural 3D Hand Reconstruction from an Event Stream : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B709-1 %U https://arxiv.org/abs/2012.06475 %D 2020 %X 3D hand pose estimation from monocular videos is a long-standing and challenging problem, which is now seeing a strong upturn. In this work, we address it for the first time using a single event camera, i.e., an asynchronous vision sensor reacting on brightness changes. Our EventHands approach has characteristics previously not demonstrated with a single RGB or depth camera such as high temporal resolution at low data throughputs and real-time performance at 1000 Hz. Due to the different data modality of event cameras compared to classical cameras, existing methods cannot be directly applied to and re-trained for event streams. We thus design a new neural approach which accepts a new event stream representation suitable for learning, which is trained on newly-generated synthetic event streams and can generalise to real data. Experiments show that EventHands outperforms recent monocular methods using a colour (or depth) camera in terms of accuracy and its ability to capture hand motions of unprecedented speed. Our method, the event stream simulator and the dataset will be made publicly available. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Shimada, S., Golyanik, V., Xu, W., and Theobalt, C. 2020b. PhysCap: Physically Plausible Monocular 3D Motion Capture in Real Time. https://arxiv.org/abs/2008.08880.
(arXiv: 2008.08880)
Abstract
Marker-less 3D human motion capture from a single colour camera has seen significant progress. However, it is a very challenging and severely ill-posed problem. In consequence, even the most accurate state-of-the-art approaches have significant limitations. Purely kinematic formulations on the basis of individual joints or skeletons, and the frequent frame-wise reconstruction in state-of-the-art methods greatly limit 3D accuracy and temporal stability compared to multi-view or marker-based motion capture. Further, captured 3D poses are often physically incorrect and biomechanically implausible, or exhibit implausible environment interactions (floor penetration, foot skating, unnatural body leaning and strong shifting in depth), which is problematic for any use case in computer graphics. We, therefore, present PhysCap, the first algorithm for physically plausible, real-time and marker-less human 3D motion capture with a single colour camera at 25 fps. Our algorithm first captures 3D human poses purely kinematically. To this end, a CNN infers 2D and 3D joint positions, and subsequently, an inverse kinematics step finds space-time coherent joint angles and global 3D pose. Next, these kinematic reconstructions are used as constraints in a real-time physics-based pose optimiser that accounts for environment constraints (e.g., collision handling and floor placement), gravity, and biophysical plausibility of human postures. Our approach employs a combination of ground reaction force and residual force for plausible root control, and uses a trained neural network to detect foot contact events in images. Our method captures physically plausible and temporally stable global 3D human motion, without physically implausible postures, floor penetrations or foot skating, from video in real time and in general scenes. The video is available at http://gvv.mpi-inf.mpg.de/projects/PhysCap
Export
BibTeX
@online{Shimada_2008.08880, TITLE = {{PhysCap}: {P}hysically Plausible Monocular {3D} Motion Capture in Real Time}, AUTHOR = {Shimada, Soshi and Golyanik, Vladislav and Xu, Weipeng and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2008.08880}, EPRINT = {2008.08880}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Marker-less 3D human motion capture from a single colour camera has seen significant progress. However, it is a very challenging and severely ill-posed problem. In consequence, even the most accurate state-of-the-art approaches have significant limitations. Purely kinematic formulations on the basis of individual joints or skeletons, and the frequent frame-wise reconstruction in state-of-the-art methods greatly limit 3D accuracy and temporal stability compared to multi-view or marker-based motion capture. Further, captured 3D poses are often physically incorrect and biomechanically implausible, or exhibit implausible environment interactions (floor penetration, foot skating, unnatural body leaning and strong shifting in depth), which is problematic for any use case in computer graphics. We, therefore, present PhysCap, the first algorithm for physically plausible, real-time and marker-less human 3D motion capture with a single colour camera at 25 fps. Our algorithm first captures 3D human poses purely kinematically. To this end, a CNN infers 2D and 3D joint positions, and subsequently, an inverse kinematics step finds space-time coherent joint angles and global 3D pose. Next, these kinematic reconstructions are used as constraints in a real-time physics-based pose optimiser that accounts for environment constraints (e.g., collision handling and floor placement), gravity, and biophysical plausibility of human postures. Our approach employs a combination of ground reaction force and residual force for plausible root control, and uses a trained neural network to detect foot contact events in images. Our method captures physically plausible and temporally stable global 3D human motion, without physically implausible postures, floor penetrations or foot skating, from video in real time and in general scenes. The video is available at http://gvv.mpi-inf.mpg.de/projects/PhysCap}, }
Endnote
%0 Report %A Shimada, Soshi %A Golyanik, Vladislav %A Xu, Weipeng %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T PhysCap: Physically Plausible Monocular 3D Motion Capture in Real Time : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E8F3-1 %U https://arxiv.org/abs/2008.08880 %D 2020 %X Marker-less 3D human motion capture from a single colour camera has seen significant progress. However, it is a very challenging and severely ill-posed problem. In consequence, even the most accurate state-of-the-art approaches have significant limitations. Purely kinematic formulations on the basis of individual joints or skeletons, and the frequent frame-wise reconstruction in state-of-the-art methods greatly limit 3D accuracy and temporal stability compared to multi-view or marker-based motion capture. Further, captured 3D poses are often physically incorrect and biomechanically implausible, or exhibit implausible environment interactions (floor penetration, foot skating, unnatural body leaning and strong shifting in depth), which is problematic for any use case in computer graphics. We, therefore, present PhysCap, the first algorithm for physically plausible, real-time and marker-less human 3D motion capture with a single colour camera at 25 fps. Our algorithm first captures 3D human poses purely kinematically. To this end, a CNN infers 2D and 3D joint positions, and subsequently, an inverse kinematics step finds space-time coherent joint angles and global 3D pose. Next, these kinematic reconstructions are used as constraints in a real-time physics-based pose optimiser that accounts for environment constraints (e.g., collision handling and floor placement), gravity, and biophysical plausibility of human postures. Our approach employs a combination of ground reaction force and residual force for plausible root control, and uses a trained neural network to detect foot contact events in images. Our method captures physically plausible and temporally stable global 3D human motion, without physically implausible postures, floor penetrations or foot skating, from video in real time and in general scenes. The video is available at http://gvv.mpi-inf.mpg.de/projects/PhysCap %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR
Tewari, A., Elgharib, M., Bharaj, G., et al. 2020e. StyleRig: Rigging StyleGAN for 3D Control over Portrait Images. https://arxiv.org/abs/2004.00121.
(arXiv: 2004.00121)
Abstract
StyleGAN generates photorealistic portrait images of faces with eyes, teeth, hair and context (neck, shoulders, background), but lacks a rig-like control over semantic face parameters that are interpretable in 3D, such as face pose, expressions, and scene illumination. Three-dimensional morphable face models (3DMMs) on the other hand offer control over the semantic parameters, but lack photorealism when rendered and only model the face interior, not other parts of a portrait image (hair, mouth interior, background). We present the first method to provide a face rig-like control over a pretrained and fixed StyleGAN via a 3DMM. A new rigging network, RigNet is trained between the 3DMM's semantic parameters and StyleGAN's input. The network is trained in a self-supervised manner, without the need for manual annotations. At test time, our method generates portrait images with the photorealism of StyleGAN and provides explicit control over the 3D semantic parameters of the face.
Export
BibTeX
@online{Tewari_2004.00121, TITLE = {{StyleRig}: Rigging {StyleGAN} for {3D} Control over Portrait Images}, AUTHOR = {Tewari, Ayush and Elgharib, Mohamed and Bharaj, Gaurav and Bernard, Florian and Seidel, Hans-Peter and P{\'e}rez, Patrick and Zollh{\"o}fer, Michael and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2004.00121}, EPRINT = {2004.00121}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {StyleGAN generates photorealistic portrait images of faces with eyes, teeth, hair and context (neck, shoulders, background), but lacks a rig-like control over semantic face parameters that are interpretable in 3D, such as face pose, expressions, and scene illumination. Three-dimensional morphable face models (3DMMs) on the other hand offer control over the semantic parameters, but lack photorealism when rendered and only model the face interior, not other parts of a portrait image (hair, mouth interior, background). We present the first method to provide a face rig-like control over a pretrained and fixed StyleGAN via a 3DMM. A new rigging network, RigNet is trained between the 3DMM's semantic parameters and StyleGAN's input. The network is trained in a self-supervised manner, without the need for manual annotations. At test time, our method generates portrait images with the photorealism of StyleGAN and provides explicit control over the 3D semantic parameters of the face.}, }
Endnote
%0 Report %A Tewari, Ayush %A Elgharib, Mohamed %A Bharaj, Gaurav %A Bernard, Florian %A Seidel, Hans-Peter %A Pérez, Patrick %A Zollhöfer, Michael %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T StyleRig: Rigging StyleGAN for 3D Control over Portrait Images : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B0FC-6 %U https://arxiv.org/abs/2004.00121 %D 2020 %X StyleGAN generates photorealistic portrait images of faces with eyes, teeth, hair and context (neck, shoulders, background), but lacks a rig-like control over semantic face parameters that are interpretable in 3D, such as face pose, expressions, and scene illumination. Three-dimensional morphable face models (3DMMs) on the other hand offer control over the semantic parameters, but lack photorealism when rendered and only model the face interior, not other parts of a portrait image (hair, mouth interior, background). We present the first method to provide a face rig-like control over a pretrained and fixed StyleGAN via a 3DMM. A new rigging network, RigNet is trained between the 3DMM's semantic parameters and StyleGAN's input. The network is trained in a self-supervised manner, without the need for manual annotations. At test time, our method generates portrait images with the photorealism of StyleGAN and provides explicit control over the 3D semantic parameters of the face. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR
Tewari, A., Elgharib, M., Mallikarjun B R, et al. 2020f. PIE: Portrait Image Embedding for Semantic Control. https://arxiv.org/abs/2009.09485.
(arXiv: 2009.09485)
Abstract
Editing of portrait images is a very popular and important research topic with a large variety of applications. For ease of use, control should be provided via a semantically meaningful parameterization that is akin to computer animation controls. The vast majority of existing techniques do not provide such intuitive and fine-grained control, or only enable coarse editing of a single isolated control parameter. Very recently, high-quality semantically controlled editing has been demonstrated, however only on synthetically created StyleGAN images. We present the first approach for embedding real portrait images in the latent space of StyleGAN, which allows for intuitive editing of the head pose, facial expression, and scene illumination in the image. Semantic editing in parameter space is achieved based on StyleRig, a pretrained neural network that maps the control space of a 3D morphable face model to the latent space of the GAN. We design a novel hierarchical non-linear optimization problem to obtain the embedding. An identity preservation energy term allows spatially coherent edits while maintaining facial integrity. Our approach runs at interactive frame rates and thus allows the user to explore the space of possible edits. We evaluate our approach on a wide set of portrait photos, compare it to the current state of the art, and validate the effectiveness of its components in an ablation study.
Export
BibTeX
@online{Tewari_2009.09485, TITLE = {{PIE}: {P}ortrait Image Embedding for Semantic Control}, AUTHOR = {Tewari, Ayush and Elgharib, Mohamed and Mallikarjun B R, and Bernard, Florian and Seidel, Hans-Peter and P{\'e}rez, Patrick and Zollh{\"o}fer, Michael and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2009.09485}, EPRINT = {2009.09485}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Editing of portrait images is a very popular and important research topic with a large variety of applications. For ease of use, control should be provided via a semantically meaningful parameterization that is akin to computer animation controls. The vast majority of existing techniques do not provide such intuitive and fine-grained control, or only enable coarse editing of a single isolated control parameter. Very recently, high-quality semantically controlled editing has been demonstrated, however only on synthetically created StyleGAN images. We present the first approach for embedding real portrait images in the latent space of StyleGAN, which allows for intuitive editing of the head pose, facial expression, and scene illumination in the image. Semantic editing in parameter space is achieved based on StyleRig, a pretrained neural network that maps the control space of a 3D morphable face model to the latent space of the GAN. We design a novel hierarchical non-linear optimization problem to obtain the embedding. An identity preservation energy term allows spatially coherent edits while maintaining facial integrity. Our approach runs at interactive frame rates and thus allows the user to explore the space of possible edits. We evaluate our approach on a wide set of portrait photos, compare it to the current state of the art, and validate the effectiveness of its components in an ablation study.}, }
Endnote
%0 Report %A Tewari, Ayush %A Elgharib, Mohamed %A Mallikarjun B R, %A Bernard, Florian %A Seidel, Hans-Peter %A Pérez, Patrick %A Zollhöfer, Michael %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T PIE: Portrait Image Embedding for Semantic Control : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B117-7 %U https://arxiv.org/abs/2009.09485 %D 2020 %X Editing of portrait images is a very popular and important research topic with a large variety of applications. For ease of use, control should be provided via a semantically meaningful parameterization that is akin to computer animation controls. The vast majority of existing techniques do not provide such intuitive and fine-grained control, or only enable coarse editing of a single isolated control parameter. Very recently, high-quality semantically controlled editing has been demonstrated, however only on synthetically created StyleGAN images. We present the first approach for embedding real portrait images in the latent space of StyleGAN, which allows for intuitive editing of the head pose, facial expression, and scene illumination in the image. Semantic editing in parameter space is achieved based on StyleRig, a pretrained neural network that maps the control space of a 3D morphable face model to the latent space of the GAN. We design a novel hierarchical non-linear optimization problem to obtain the embedding. An identity preservation energy term allows spatially coherent edits while maintaining facial integrity. Our approach runs at interactive frame rates and thus allows the user to explore the space of possible edits. We evaluate our approach on a wide set of portrait photos, compare it to the current state of the art, and validate the effectiveness of its components in an ablation study. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR
Tewari, A., Fried, O., Thies, J., et al. 2020g. State of the Art on Neural Rendering. https://arxiv.org/abs/2004.03805.
(arXiv: 2004.03805)
Abstract
Efficient rendering of photo-realistic virtual worlds is a long standing effort of computer graphics. Modern graphics techniques have succeeded in synthesizing photo-realistic images from hand-crafted scene representations. However, the automatic generation of shape, materials, lighting, and other aspects of scenes remains a challenging problem that, if solved, would make photo-realistic computer graphics more widely accessible. Concurrently, progress in computer vision and machine learning have given rise to a new approach to image synthesis and editing, namely deep generative models. Neural rendering is a new and rapidly emerging field that combines generative machine learning techniques with physical knowledge from computer graphics, e.g., by the integration of differentiable rendering into network training. With a plethora of applications in computer graphics and vision, neural rendering is poised to become a new area in the graphics community, yet no survey of this emerging field exists. This state-of-the-art report summarizes the recent trends and applications of neural rendering. We focus on approaches that combine classic computer graphics techniques with deep generative models to obtain controllable and photo-realistic outputs. Starting with an overview of the underlying computer graphics and machine learning concepts, we discuss critical aspects of neural rendering approaches. This state-of-the-art report is focused on the many important use cases for the described algorithms such as novel view synthesis, semantic photo manipulation, facial and body reenactment, relighting, free-viewpoint video, and the creation of photo-realistic avatars for virtual and augmented reality telepresence. Finally, we conclude with a discussion of the social implications of such technology and investigate open research problems.
Export
BibTeX
@online{Tewari2004.03805, TITLE = {State of the Art on Neural Rendering}, AUTHOR = {Tewari, Ayush and Fried, Ohad and Thies, Justus and Sitzmann, Vincent and Lombardi, Stephen and Sunkavalli, Kalyan and Martin-Brualla, Ricardo and Simon, Tomas and Saragih, Jason and Nie{\ss}ner, Matthias and Pandey, Rohit and Fanello, Sean and Wetzstein, Gordon and Zhu, Jun-Yan and Theobalt, Christian and Agrawala, Maneesh and Shechtman, Eli and Goldman, Dan B and Zollh{\"o}fer, Michael}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2004.03805}, EPRINT = {2004.03805}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Efficient rendering of photo-realistic virtual worlds is a long standing effort of computer graphics. Modern graphics techniques have succeeded in synthesizing photo-realistic images from hand-crafted scene representations. However, the automatic generation of shape, materials, lighting, and other aspects of scenes remains a challenging problem that, if solved, would make photo-realistic computer graphics more widely accessible. Concurrently, progress in computer vision and machine learning have given rise to a new approach to image synthesis and editing, namely deep generative models. Neural rendering is a new and rapidly emerging field that combines generative machine learning techniques with physical knowledge from computer graphics, e.g., by the integration of differentiable rendering into network training. With a plethora of applications in computer graphics and vision, neural rendering is poised to become a new area in the graphics community, yet no survey of this emerging field exists. This state-of-the-art report summarizes the recent trends and applications of neural rendering. We focus on approaches that combine classic computer graphics techniques with deep generative models to obtain controllable and photo-realistic outputs. Starting with an overview of the underlying computer graphics and machine learning concepts, we discuss critical aspects of neural rendering approaches. This state-of-the-art report is focused on the many important use cases for the described algorithms such as novel view synthesis, semantic photo manipulation, facial and body reenactment, relighting, free-viewpoint video, and the creation of photo-realistic avatars for virtual and augmented reality telepresence. Finally, we conclude with a discussion of the social implications of such technology and investigate open research problems.}, }
Endnote
%0 Report %A Tewari, Ayush %A Fried, Ohad %A Thies, Justus %A Sitzmann, Vincent %A Lombardi, Stephen %A Sunkavalli, Kalyan %A Martin-Brualla, Ricardo %A Simon, Tomas %A Saragih, Jason %A Nießner, Matthias %A Pandey, Rohit %A Fanello, Sean %A Wetzstein, Gordon %A Zhu, Jun-Yan %A Theobalt, Christian %A Agrawala, Maneesh %A Shechtman, Eli %A Goldman, Dan B %A Zollhöfer, Michael %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations %T State of the Art on Neural Rendering : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E114-4 %U https://arxiv.org/abs/2004.03805 %D 2020 %X Efficient rendering of photo-realistic virtual worlds is a long standing effort of computer graphics. Modern graphics techniques have succeeded in synthesizing photo-realistic images from hand-crafted scene representations. However, the automatic generation of shape, materials, lighting, and other aspects of scenes remains a challenging problem that, if solved, would make photo-realistic computer graphics more widely accessible. Concurrently, progress in computer vision and machine learning have given rise to a new approach to image synthesis and editing, namely deep generative models. Neural rendering is a new and rapidly emerging field that combines generative machine learning techniques with physical knowledge from computer graphics, e.g., by the integration of differentiable rendering into network training. With a plethora of applications in computer graphics and vision, neural rendering is poised to become a new area in the graphics community, yet no survey of this emerging field exists. This state-of-the-art report summarizes the recent trends and applications of neural rendering. We focus on approaches that combine classic computer graphics techniques with deep generative models to obtain controllable and photo-realistic outputs. Starting with an overview of the underlying computer graphics and machine learning concepts, we discuss critical aspects of neural rendering approaches. This state-of-the-art report is focused on the many important use cases for the described algorithms such as novel view synthesis, semantic photo manipulation, facial and body reenactment, relighting, free-viewpoint video, and the creation of photo-realistic avatars for virtual and augmented reality telepresence. Finally, we conclude with a discussion of the social implications of such technology and investigate open research problems. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR
Thies, J., Zollhöfer, M., Stamminger, M., Theobalt, C., and Nießner, M. 2020c. Face2Face: Real-time Face Capture and Reenactment of RGB Videos. https://arxiv.org/abs/2007.14808.
(arXiv: 2007.14808)
Abstract
We present Face2Face, a novel approach for real-time facial reenactment of a monocular target video sequence (e.g., Youtube video). The source sequence is also a monocular video stream, captured live with a commodity webcam. Our goal is to animate the facial expressions of the target video by a source actor and re-render the manipulated output video in a photo-realistic fashion. To this end, we first address the under-constrained problem of facial identity recovery from monocular video by non-rigid model-based bundling. At run time, we track facial expressions of both source and target video using a dense photometric consistency measure. Reenactment is then achieved by fast and efficient deformation transfer between source and target. The mouth interior that best matches the re-targeted expression is retrieved from the target sequence and warped to produce an accurate fit. Finally, we convincingly re-render the synthesized target face on top of the corresponding video stream such that it seamlessly blends with the real-world illumination. We demonstrate our method in a live setup, where Youtube videos are reenacted in real time.
Export
BibTeX
@online{Thies_2007.14808, TITLE = {{Face2Face}: {R}eal-time Face Capture and Reenactment of {RGB} Videos}, AUTHOR = {Thies, Justus and Zollh{\"o}fer, Michael and Stamminger, Marc and Theobalt, Christian and Nie{\ss}ner, Matthias}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2007.14808}, EPRINT = {2007.14808}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present Face2Face, a novel approach for real-time facial reenactment of a monocular target video sequence (e.g., Youtube video). The source sequence is also a monocular video stream, captured live with a commodity webcam. Our goal is to animate the facial expressions of the target video by a source actor and re-render the manipulated output video in a photo-realistic fashion. To this end, we first address the under-constrained problem of facial identity recovery from monocular video by non-rigid model-based bundling. At run time, we track facial expressions of both source and target video using a dense photometric consistency measure. Reenactment is then achieved by fast and efficient deformation transfer between source and target. The mouth interior that best matches the re-targeted expression is retrieved from the target sequence and warped to produce an accurate fit. Finally, we convincingly re-render the synthesized target face on top of the corresponding video stream such that it seamlessly blends with the real-world illumination. We demonstrate our method in a live setup, where Youtube videos are reenacted in real time.}, }
Endnote
%0 Report %A Thies, Justus %A Zollhöfer, Michael %A Stamminger, Marc %A Theobalt, Christian %A Nießner, Matthias %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Face2Face: Real-time Face Capture and Reenactment of RGB Videos : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E8E9-D %U https://arxiv.org/abs/2007.14808 %D 2020 %X We present Face2Face, a novel approach for real-time facial reenactment of a monocular target video sequence (e.g., Youtube video). The source sequence is also a monocular video stream, captured live with a commodity webcam. Our goal is to animate the facial expressions of the target video by a source actor and re-render the manipulated output video in a photo-realistic fashion. To this end, we first address the under-constrained problem of facial identity recovery from monocular video by non-rigid model-based bundling. At run time, we track facial expressions of both source and target video using a dense photometric consistency measure. Reenactment is then achieved by fast and efficient deformation transfer between source and target. The mouth interior that best matches the re-targeted expression is retrieved from the target sequence and warped to produce an accurate fit. Finally, we convincingly re-render the synthesized target face on top of the corresponding video stream such that it seamlessly blends with the real-world illumination. We demonstrate our method in a live setup, where Youtube videos are reenacted in real time. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Tretschk, E., Tewari, A., Golyanik, V., Zollhöfer, M., Stoll, C., and Theobalt, C. 2020c. PatchNets: Patch-Based Generalizable Deep Implicit 3D Shape Representations. https://arxiv.org/abs/2008.01639.
(arXiv: 2008.01639)
Abstract
Implicit surface representations, such as signed-distance functions, combined with deep learning have led to impressive models which can represent detailed shapes of objects with arbitrary topology. Since a continuous function is learned, the reconstructions can also be extracted at any arbitrary resolution. However, large datasets such as ShapeNet are required to train such models. In this paper, we present a new mid-level patch-based surface representation. At the level of patches, objects across different categories share similarities, which leads to more generalizable models. We then introduce a novel method to learn this patch-based representation in a canonical space, such that it is as object-agnostic as possible. We show that our representation trained on one category of objects from ShapeNet can also well represent detailed shapes from any other category. In addition, it can be trained using much fewer shapes, compared to existing approaches. We show several applications of our new representation, including shape interpolation and partial point cloud completion. Due to explicit control over positions, orientations and scales of patches, our representation is also more controllable compared to object-level representations, which enables us to deform encoded shapes non-rigidly.
Export
BibTeX
@online{Tretschk_2008.01639, TITLE = {{PatchNets}: {P}atch-Based Generalizable Deep Implicit {3D} Shape Representations}, AUTHOR = {Tretschk, Edgar and Tewari, Ayush and Golyanik, Vladislav and Zollh{\"o}fer, Michael and Stoll, Carsten and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2008.01639}, EPRINT = {2008.01639}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Implicit surface representations, such as signed-distance functions, combined with deep learning have led to impressive models which can represent detailed shapes of objects with arbitrary topology. Since a continuous function is learned, the reconstructions can also be extracted at any arbitrary resolution. However, large datasets such as ShapeNet are required to train such models. In this paper, we present a new mid-level patch-based surface representation. At the level of patches, objects across different categories share similarities, which leads to more generalizable models. We then introduce a novel method to learn this patch-based representation in a canonical space, such that it is as object-agnostic as possible. We show that our representation trained on one category of objects from ShapeNet can also well represent detailed shapes from any other category. In addition, it can be trained using much fewer shapes, compared to existing approaches. We show several applications of our new representation, including shape interpolation and partial point cloud completion. Due to explicit control over positions, orientations and scales of patches, our representation is also more controllable compared to object-level representations, which enables us to deform encoded shapes non-rigidly.}, }
Endnote
%0 Report %A Tretschk, Edgar %A Tewari, Ayush %A Golyanik, Vladislav %A Zollhöfer, Michael %A Stoll, Carsten %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T PatchNets: Patch-Based Generalizable Deep Implicit 3D Shape Representations : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E8ED-9 %U https://arxiv.org/abs/2008.01639 %D 2020 %X Implicit surface representations, such as signed-distance functions, combined with deep learning have led to impressive models which can represent detailed shapes of objects with arbitrary topology. Since a continuous function is learned, the reconstructions can also be extracted at any arbitrary resolution. However, large datasets such as ShapeNet are required to train such models. In this paper, we present a new mid-level patch-based surface representation. At the level of patches, objects across different categories share similarities, which leads to more generalizable models. We then introduce a novel method to learn this patch-based representation in a canonical space, such that it is as object-agnostic as possible. We show that our representation trained on one category of objects from ShapeNet can also well represent detailed shapes from any other category. In addition, it can be trained using much fewer shapes, compared to existing approaches. We show several applications of our new representation, including shape interpolation and partial point cloud completion. Due to explicit control over positions, orientations and scales of patches, our representation is also more controllable compared to object-level representations, which enables us to deform encoded shapes non-rigidly. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR
Tretschk, E., Tewari, A., Golyanik, V., Zollhöfer, M., Lassner, C., and Theobalt, C. 2020d. Non-Rigid Neural Radiance Fields: Reconstruction and Novel View Synthesis of a Deforming Scene from Monocular Video. https://arxiv.org/abs/2012.12247.
(arXiv: 2012.12247)
Abstract
In this tech report, we present the current state of our ongoing work on reconstructing Neural Radiance Fields (NERF) of general non-rigid scenes via ray bending. Non-rigid NeRF (NR-NeRF) takes RGB images of a deforming object (e.g., from a monocular video) as input and then learns a geometry and appearance representation that not only allows to reconstruct the input sequence but also to re-render any time step into novel camera views with high fidelity. In particular, we show that a consumer-grade camera is sufficient to synthesize convincing bullet-time videos of short and simple scenes. In addition, the resulting representation enables correspondence estimation across views and time, and provides rigidity scores for each point in the scene. We urge the reader to watch the supplemental videos for qualitative results. We will release our code.
Export
BibTeX
@online{Tretschk_2012.12247, TITLE = {Non-Rigid Neural Radiance Fields: Reconstruction and Novel View Synthesis of a Deforming Scene from Monocular Video}, AUTHOR = {Tretschk, Edgar and Tewari, Ayush and Golyanik, Vladislav and Zollh{\"o}fer, Michael and Lassner, Christoph and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2012.12247}, EPRINT = {2012.12247}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {In this tech report, we present the current state of our ongoing work on reconstructing Neural Radiance Fields (NERF) of general non-rigid scenes via ray bending. Non-rigid NeRF (NR-NeRF) takes RGB images of a deforming object (e.g., from a monocular video) as input and then learns a geometry and appearance representation that not only allows to reconstruct the input sequence but also to re-render any time step into novel camera views with high fidelity. In particular, we show that a consumer-grade camera is sufficient to synthesize convincing bullet-time videos of short and simple scenes. In addition, the resulting representation enables correspondence estimation across views and time, and provides rigidity scores for each point in the scene. We urge the reader to watch the supplemental videos for qualitative results. We will release our code.}, }
Endnote
%0 Report %A Tretschk, Edgar %A Tewari, Ayush %A Golyanik, Vladislav %A Zollhöfer, Michael %A Lassner, Christoph %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Non-Rigid Neural Radiance Fields: Reconstruction and Novel View Synthesis of a Deforming Scene from Monocular Video : %G eng %U http://hdl.handle.net/21.11116/0000-0007-EA00-1 %U https://arxiv.org/abs/2012.12247 %D 2020 %X In this tech report, we present the current state of our ongoing work on reconstructing Neural Radiance Fields (NERF) of general non-rigid scenes via ray bending. Non-rigid NeRF (NR-NeRF) takes RGB images of a deforming object (e.g., from a monocular video) as input and then learns a geometry and appearance representation that not only allows to reconstruct the input sequence but also to re-render any time step into novel camera views with high fidelity. In particular, we show that a consumer-grade camera is sufficient to synthesize convincing bullet-time videos of short and simple scenes. In addition, the resulting representation enables correspondence estimation across views and time, and provides rigidity scores for each point in the scene. We urge the reader to watch the supplemental videos for qualitative results. We will release our code. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR
Wang, J., Mueller, F., Bernard, F., and Theobalt, C. 2020c. Generative Model-Based Loss to the Rescue: A Method to Overcome Annotation Errors for Depth-Based Hand Pose Estimation. https://arxiv.org/abs/2007.03073.
(arXiv: 2007.03073)
Abstract
We propose to use a model-based generative loss for training hand pose estimators on depth images based on a volumetric hand model. This additional loss allows training of a hand pose estimator that accurately infers the entire set of 21 hand keypoints while only using supervision for 6 easy-to-annotate keypoints (fingertips and wrist). We show that our partially-supervised method achieves results that are comparable to those of fully-supervised methods which enforce articulation consistency. Moreover, for the first time we demonstrate that such an approach can be used to train on datasets that have erroneous annotations, i.e. "ground truth" with notable measurement errors, while obtaining predictions that explain the depth images better than the given "ground truth".
Export
BibTeX
@online{Wang_2007.03073, TITLE = {Generative Model-Based Loss to the Rescue: {A} Method to Overcome Annotation Errors for Depth-Based Hand Pose Estimation}, AUTHOR = {Wang, Jiayi and Mueller, Franziska and Bernard, Florian and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2007.03073}, EPRINT = {2007.03073}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We propose to use a model-based generative loss for training hand pose estimators on depth images based on a volumetric hand model. This additional loss allows training of a hand pose estimator that accurately infers the entire set of 21 hand keypoints while only using supervision for 6 easy-to-annotate keypoints (fingertips and wrist). We show that our partially-supervised method achieves results that are comparable to those of fully-supervised methods which enforce articulation consistency. Moreover, for the first time we demonstrate that such an approach can be used to train on datasets that have erroneous annotations, i.e. "ground truth" with notable measurement errors, while obtaining predictions that explain the depth images better than the given "ground truth".}, }
Endnote
%0 Report %A Wang, Jiayi %A Mueller, Franziska %A Bernard, Florian %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Generative Model-Based Loss to the Rescue: A Method to Overcome Annotation Errors for Depth-Based Hand Pose Estimation : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E89A-6 %U https://arxiv.org/abs/2007.03073 %D 2020 %X We propose to use a model-based generative loss for training hand pose estimators on depth images based on a volumetric hand model. This additional loss allows training of a hand pose estimator that accurately infers the entire set of 21 hand keypoints while only using supervision for 6 easy-to-annotate keypoints (fingertips and wrist). We show that our partially-supervised method achieves results that are comparable to those of fully-supervised methods which enforce articulation consistency. Moreover, for the first time we demonstrate that such an approach can be used to train on datasets that have erroneous annotations, i.e. "ground truth" with notable measurement errors, while obtaining predictions that explain the depth images better than the given "ground truth". %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Wang, P., Liu, L., Chen, N., Chu, H.-K., Theobalt, C., and Wang, W. 2020d. Vid2Curve: Simultaneous Camera Motion Estimation and Thin Structure Reconstruction from an RGB Video. https://arxiv.org/abs/2005.03372.
(arXiv: 2005.03372)
Abstract
Thin structures, such as wire-frame sculptures, fences, cables, power lines, and tree branches, are common in the real world. It is extremely challenging to acquire their 3D digital models using traditional image-based or depth-based reconstruction methods because thin structures often lack distinct point features and have severe self-occlusion. We propose the first approach that simultaneously estimates camera motion and reconstructs the geometry of complex 3D thin structures in high quality from a color video captured by a handheld camera. Specifically, we present a new curve-based approach to estimate accurate camera poses by establishing correspondences between featureless thin objects in the foreground in consecutive video frames, without requiring visual texture in the background scene to lock on. Enabled by this effective curve-based camera pose estimation strategy, we develop an iterative optimization method with tailored measures on geometry, topology as well as self-occlusion handling for reconstructing 3D thin structures. Extensive validations on a variety of thin structures show that our method achieves accurate camera pose estimation and faithful reconstruction of 3D thin structures with complex shape and topology at a level that has not been attained by other existing reconstruction methods.
Export
BibTeX
@online{Wang2005.03372, TITLE = {{Vid2Curve}: {S}imultaneous Camera Motion Estimation and Thin Structure Reconstruction from an {RGB} Video}, AUTHOR = {Wang, Peng and Liu, Lingjie and Chen, Nenglun and Chu, Hung-Kuo and Theobalt, Christian and Wang, Wenping}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2005.03372}, EPRINT = {2005.03372}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Thin structures, such as wire-frame sculptures, fences, cables, power lines, and tree branches, are common in the real world. It is extremely challenging to acquire their 3D digital models using traditional image-based or depth-based reconstruction methods because thin structures often lack distinct point features and have severe self-occlusion. We propose the first approach that simultaneously estimates camera motion and reconstructs the geometry of complex 3D thin structures in high quality from a color video captured by a handheld camera. Specifically, we present a new curve-based approach to estimate accurate camera poses by establishing correspondences between featureless thin objects in the foreground in consecutive video frames, without requiring visual texture in the background scene to lock on. Enabled by this effective curve-based camera pose estimation strategy, we develop an iterative optimization method with tailored measures on geometry, topology as well as self-occlusion handling for reconstructing 3D thin structures. Extensive validations on a variety of thin structures show that our method achieves accurate camera pose estimation and faithful reconstruction of 3D thin structures with complex shape and topology at a level that has not been attained by other existing reconstruction methods.}, }
Endnote
%0 Report %A Wang, Peng %A Liu, Lingjie %A Chen, Nenglun %A Chu, Hung-Kuo %A Theobalt, Christian %A Wang, Wenping %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Vid2Curve: Simultaneous Camera Motion Estimation and Thin Structure Reconstruction from an RGB Video : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E122-4 %U https://arxiv.org/abs/2005.03372 %D 2020 %X Thin structures, such as wire-frame sculptures, fences, cables, power lines, and tree branches, are common in the real world. It is extremely challenging to acquire their 3D digital models using traditional image-based or depth-based reconstruction methods because thin structures often lack distinct point features and have severe self-occlusion. We propose the first approach that simultaneously estimates camera motion and reconstructs the geometry of complex 3D thin structures in high quality from a color video captured by a handheld camera. Specifically, we present a new curve-based approach to estimate accurate camera poses by establishing correspondences between featureless thin objects in the foreground in consecutive video frames, without requiring visual texture in the background scene to lock on. Enabled by this effective curve-based camera pose estimation strategy, we develop an iterative optimization method with tailored measures on geometry, topology as well as self-occlusion handling for reconstructing 3D thin structures. Extensive validations on a variety of thin structures show that our method achieves accurate camera pose estimation and faithful reconstruction of 3D thin structures with complex shape and topology at a level that has not been attained by other existing reconstruction methods. %K Computer Science, Graphics, cs.GR,Computer Science, Computer Vision and Pattern Recognition, cs.CV,eess.IV
Xu, Y., Fan, T., Yuan, Y., and Singh, G. 2020c. Ladybird: Quasi-Monte Carlo Sampling for Deep Implicit Field Based 3D Reconstruction with Symmetry. https://arxiv.org/abs/2007.13393.
(arXiv: 2007.13393)
Abstract
Deep implicit field regression methods are effective for 3D reconstruction from single-view images. However, the impact of different sampling patterns on the reconstruction quality is not well-understood. In this work, we first study the effect of point set discrepancy on the network training. Based on Farthest Point Sampling algorithm, we propose a sampling scheme that theoretically encourages better generalization performance, and results in fast convergence for SGD-based optimization algorithms. Secondly, based on the reflective symmetry of an object, we propose a feature fusion method that alleviates issues due to self-occlusions which makes it difficult to utilize local image features. Our proposed system Ladybird is able to create high quality 3D object reconstructions from a single input image. We evaluate Ladybird on a large scale 3D dataset (ShapeNet) demonstrating highly competitive results in terms of Chamfer distance, Earth Mover's distance and Intersection Over Union (IoU).
Export
BibTeX
@online{Xu_arXiv2007.13393, TITLE = {Ladybird: {Quasi-Monte Carlo} Sampling for Deep Implicit Field Based {3D} Reconstruction with Symmetry}, AUTHOR = {Xu, Yifan and Fan, Tianqi and Yuan, Yi and Singh, Gurprit}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2007.13393}, EPRINT = {2007.13393}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Deep implicit field regression methods are effective for 3D reconstruction from single-view images. However, the impact of different sampling patterns on the reconstruction quality is not well-understood. In this work, we first study the effect of point set discrepancy on the network training. Based on Farthest Point Sampling algorithm, we propose a sampling scheme that theoretically encourages better generalization performance, and results in fast convergence for SGD-based optimization algorithms. Secondly, based on the reflective symmetry of an object, we propose a feature fusion method that alleviates issues due to self-occlusions which makes it difficult to utilize local image features. Our proposed system Ladybird is able to create high quality 3D object reconstructions from a single input image. We evaluate Ladybird on a large scale 3D dataset (ShapeNet) demonstrating highly competitive results in terms of Chamfer distance, Earth Mover's distance and Intersection Over Union (IoU).}, }
Endnote
%0 Report %A Xu, Yifan %A Fan, Tianqi %A Yuan, Yi %A Singh, Gurprit %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Ladybird: Quasi-Monte Carlo Sampling for Deep Implicit Field Based 3D Reconstruction with Symmetry : %G eng %U http://hdl.handle.net/21.11116/0000-0007-CECA-E %U https://arxiv.org/abs/2007.13393 %D 2020 %X Deep implicit field regression methods are effective for 3D reconstruction from single-view images. However, the impact of different sampling patterns on the reconstruction quality is not well-understood. In this work, we first study the effect of point set discrepancy on the network training. Based on Farthest Point Sampling algorithm, we propose a sampling scheme that theoretically encourages better generalization performance, and results in fast convergence for SGD-based optimization algorithms. Secondly, based on the reflective symmetry of an object, we propose a feature fusion method that alleviates issues due to self-occlusions which makes it difficult to utilize local image features. Our proposed system Ladybird is able to create high quality 3D object reconstructions from a single input image. We evaluate Ladybird on a large scale 3D dataset (ShapeNet) demonstrating highly competitive results in terms of Chamfer distance, Earth Mover's distance and Intersection Over Union (IoU). %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Yenamandra, T., Tewari, A., Bernard, F., et al. 2020. i3DMM: Deep Implicit 3D Morphable Model of Human Heads. https://arxiv.org/abs/2011.14143.
(arXiv: 2011.14143)
Abstract
We present the first deep implicit 3D morphable model (i3DMM) of full heads. Unlike earlier morphable face models it not only captures identity-specific geometry, texture, and expressions of the frontal face, but also models the entire head, including hair. We collect a new dataset consisting of 64 people with different expressions and hairstyles to train i3DMM. Our approach has the following favorable properties: (i) It is the first full head morphable model that includes hair. (ii) In contrast to mesh-based models it can be trained on merely rigidly aligned scans, without requiring difficult non-rigid registration. (iii) We design a novel architecture to decouple the shape model into an implicit reference shape and a deformation of this reference shape. With that, dense correspondences between shapes can be learned implicitly. (iv) This architecture allows us to semantically disentangle the geometry and color components, as color is learned in the reference space. Geometry is further disentangled as identity, expressions, and hairstyle, while color is disentangled as identity and hairstyle components. We show the merits of i3DMM using ablation studies, comparisons to state-of-the-art models, and applications such as semantic head editing and texture transfer. We will make our model publicly available.
Export
BibTeX
@online{Yenamandra_arXiv2011.14143, TITLE = {i{3D}MM: Deep Implicit {3D} Morphable Model of Human Heads}, AUTHOR = {Yenamandra, Tarun and Tewari, Ayush and Bernard, Florian and Seidel, Hans-Peter and Elgharib, Mohamed and Cremers, Daniel and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2011.14143}, EPRINT = {2011.14143}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present the first deep implicit 3D morphable model (i3DMM) of full heads. Unlike earlier morphable face models it not only captures identity-specific geometry, texture, and expressions of the frontal face, but also models the entire head, including hair. We collect a new dataset consisting of 64 people with different expressions and hairstyles to train i3DMM. Our approach has the following favorable properties: (i) It is the first full head morphable model that includes hair. (ii) In contrast to mesh-based models it can be trained on merely rigidly aligned scans, without requiring difficult non-rigid registration. (iii) We design a novel architecture to decouple the shape model into an implicit reference shape and a deformation of this reference shape. With that, dense correspondences between shapes can be learned implicitly. (iv) This architecture allows us to semantically disentangle the geometry and color components, as color is learned in the reference space. Geometry is further disentangled as identity, expressions, and hairstyle, while color is disentangled as identity and hairstyle components. We show the merits of i3DMM using ablation studies, comparisons to state-of-the-art models, and applications such as semantic head editing and texture transfer. We will make our model publicly available.}, }
Endnote
%0 Report %A Yenamandra, Tarun %A Tewari, Ayush %A Bernard, Florian %A Seidel, Hans-Peter %A Elgharib, Mohamed %A Cremers, Daniel %A Theobalt, Christian %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T i3DMM: Deep Implicit 3D Morphable Model of Human Heads : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B702-8 %U https://arxiv.org/abs/2011.14143 %D 2020 %X We present the first deep implicit 3D morphable model (i3DMM) of full heads. Unlike earlier morphable face models it not only captures identity-specific geometry, texture, and expressions of the frontal face, but also models the entire head, including hair. We collect a new dataset consisting of 64 people with different expressions and hairstyles to train i3DMM. Our approach has the following favorable properties: (i) It is the first full head morphable model that includes hair. (ii) In contrast to mesh-based models it can be trained on merely rigidly aligned scans, without requiring difficult non-rigid registration. (iii) We design a novel architecture to decouple the shape model into an implicit reference shape and a deformation of this reference shape. With that, dense correspondences between shapes can be learned implicitly. (iv) This architecture allows us to semantically disentangle the geometry and color components, as color is learned in the reference space. Geometry is further disentangled as identity, expressions, and hairstyle, while color is disentangled as identity and hairstyle components. We show the merits of i3DMM using ablation studies, comparisons to state-of-the-art models, and applications such as semantic head editing and texture transfer. We will make our model publicly available. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR,Computer Science, Learning, cs.LG
Yoon, J.S., Liu, L., Golyanik, V., Sarkar, K., Park, H.S., and Theobalt, C. 2020. Pose-Guided Human Animation from a Single Image in the Wild. https://arxiv.org/abs/2012.03796.
(arXiv: 2012.03796)
Abstract
We present a new pose transfer method for synthesizing a human animation from a single image of a person controlled by a sequence of body poses. Existing pose transfer methods exhibit significant visual artifacts when applying to a novel scene, resulting in temporal inconsistency and failures in preserving the identity and textures of the person. To address these limitations, we design a compositional neural network that predicts the silhouette, garment labels, and textures. Each modular network is explicitly dedicated to a subtask that can be learned from the synthetic data. At the inference time, we utilize the trained network to produce a unified representation of appearance and its labels in UV coordinates, which remains constant across poses. The unified representation provides an incomplete yet strong guidance to generating the appearance in response to the pose change. We use the trained network to complete the appearance and render it with the background. With these strategies, we are able to synthesize human animations that can preserve the identity and appearance of the person in a temporally coherent way without any fine-tuning of the network on the testing scene. Experiments show that our method outperforms the state-of-the-arts in terms of synthesis quality, temporal coherence, and generalization ability.
Export
BibTeX
@online{Yoon_2012.03796, TITLE = {Pose-Guided Human Animation from a Single Image in the Wild}, AUTHOR = {Yoon, Jae Shin and Liu, Lingjie and Golyanik, Vladislav and Sarkar, Kripasindhu and Park, Hyun Soo and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2012.03796}, EPRINT = {2012.03796}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present a new pose transfer method for synthesizing a human animation from a single image of a person controlled by a sequence of body poses. Existing pose transfer methods exhibit significant visual artifacts when applying to a novel scene, resulting in temporal inconsistency and failures in preserving the identity and textures of the person. To address these limitations, we design a compositional neural network that predicts the silhouette, garment labels, and textures. Each modular network is explicitly dedicated to a subtask that can be learned from the synthetic data. At the inference time, we utilize the trained network to produce a unified representation of appearance and its labels in UV coordinates, which remains constant across poses. The unified representation provides an incomplete yet strong guidance to generating the appearance in response to the pose change. We use the trained network to complete the appearance and render it with the background. With these strategies, we are able to synthesize human animations that can preserve the identity and appearance of the person in a temporally coherent way without any fine-tuning of the network on the testing scene. Experiments show that our method outperforms the state-of-the-arts in terms of synthesis quality, temporal coherence, and generalization ability.}, }
Endnote
%0 Report %A Yoon, Jae Shin %A Liu, Lingjie %A Golyanik, Vladislav %A Sarkar, Kripasindhu %A Park, Hyun Soo %A Theobalt, Christian %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Pose-Guided Human Animation from a Single Image in the Wild : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E9F3-0 %U https://arxiv.org/abs/2012.03796 %D 2020 %X We present a new pose transfer method for synthesizing a human animation from a single image of a person controlled by a sequence of body poses. Existing pose transfer methods exhibit significant visual artifacts when applying to a novel scene, resulting in temporal inconsistency and failures in preserving the identity and textures of the person. To address these limitations, we design a compositional neural network that predicts the silhouette, garment labels, and textures. Each modular network is explicitly dedicated to a subtask that can be learned from the synthetic data. At the inference time, we utilize the trained network to produce a unified representation of appearance and its labels in UV coordinates, which remains constant across poses. The unified representation provides an incomplete yet strong guidance to generating the appearance in response to the pose change. We use the trained network to complete the appearance and render it with the background. With these strategies, we are able to synthesize human animations that can preserve the identity and appearance of the person in a temporally coherent way without any fine-tuning of the network on the testing scene. Experiments show that our method outperforms the state-of-the-arts in terms of synthesis quality, temporal coherence, and generalization ability. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Zhou, Y., Habermann, M., Xu, W., Habibie, I., Theobalt, C., and Xu, F. 2020b. Monocular Real-time Hand Shape and Motion Capture using Multi-modal Data. https://arxiv.org/abs/2003.09572.
(arXiv: 2003.09572)
Abstract
We present a novel method for monocular hand shape and pose estimation at unprecedented runtime performance of 100fps and at state-of-the-art accuracy. This is enabled by a new learning based architecture designed such that it can make use of all the sources of available hand training data: image data with either 2D or 3D annotations, as well as stand-alone 3D animations without corresponding image data. It features a 3D hand joint detection module and an inverse kinematics module which regresses not only 3D joint positions but also maps them to joint rotations in a single feed-forward pass. This output makes the method more directly usable for applications in computer vision and graphics compared to only regressing 3D joint positions. We demonstrate that our architectural design leads to a significant quantitative and qualitative improvement over the state of the art on several challenging benchmarks. Our model is publicly available for future research.
Export
BibTeX
@online{Zhou2003.09572, TITLE = {Monocular Real-time Hand Shape and Motion Capture using Multi-modal Data}, AUTHOR = {Zhou, Yuxiao and Habermann, Marc and Xu, Weipeng and Habibie, Ikhsanul and Theobalt, Christian and Xu, Feng}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2003.09572}, EPRINT = {2003.09572}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present a novel method for monocular hand shape and pose estimation at unprecedented runtime performance of 100fps and at state-of-the-art accuracy. This is enabled by a new learning based architecture designed such that it can make use of all the sources of available hand training data: image data with either 2D or 3D annotations, as well as stand-alone 3D animations without corresponding image data. It features a 3D hand joint detection module and an inverse kinematics module which regresses not only 3D joint positions but also maps them to joint rotations in a single feed-forward pass. This output makes the method more directly usable for applications in computer vision and graphics compared to only regressing 3D joint positions. We demonstrate that our architectural design leads to a significant quantitative and qualitative improvement over the state of the art on several challenging benchmarks. Our model is publicly available for future research.}, }
Endnote
%0 Report %A Zhou, Yuxiao %A Habermann, Marc %A Xu, Weipeng %A Habibie, Ikhsanul %A Theobalt, Christian %A Xu, Feng %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Monocular Real-time Hand Shape and Motion Capture using Multi-modal Data : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E0D3-D %U https://arxiv.org/abs/2003.09572 %D 2020 %X We present a novel method for monocular hand shape and pose estimation at unprecedented runtime performance of 100fps and at state-of-the-art accuracy. This is enabled by a new learning based architecture designed such that it can make use of all the sources of available hand training data: image data with either 2D or 3D annotations, as well as stand-alone 3D animations without corresponding image data. It features a 3D hand joint detection module and an inverse kinematics module which regresses not only 3D joint positions but also maps them to joint rotations in a single feed-forward pass. This output makes the method more directly usable for applications in computer vision and graphics compared to only regressing 3D joint positions. We demonstrate that our architectural design leads to a significant quantitative and qualitative improvement over the state of the art on several challenging benchmarks. Our model is publicly available for future research. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Zhou, Y., Habermann, M., Habibie, I., Tewari, A., Theobalt, C., and Xu, F. 2020c. Monocular Real-time Full Body Capture with Inter-part Correlations. https://arxiv.org/abs/2012.06087.
(arXiv: 2012.06087)
Abstract
We present the first method for real-time full body capture that estimates shape and motion of body and hands together with a dynamic 3D face model from a single color image. Our approach uses a new neural network architecture that exploits correlations between body and hands at high computational efficiency. Unlike previous works, our approach is jointly trained on multiple datasets focusing on hand, body or face separately, without requiring data where all the parts are annotated at the same time, which is much more difficult to create at sufficient variety. The possibility of such multi-dataset training enables superior generalization ability. In contrast to earlier monocular full body methods, our approach captures more expressive 3D face geometry and color by estimating the shape, expression, albedo and illumination parameters of a statistical face model. Our method achieves competitive accuracy on public benchmarks, while being significantly faster and providing more complete face reconstructions.
Export
BibTeX
@online{Zhou_2012.06087, TITLE = {Monocular Real-time Full Body Capture with Inter-part Correlations}, AUTHOR = {Zhou, Yuxiao and Habermann, Marc and Habibie, Ikhsanul and Tewari, Ayush and Theobalt, Christian and Xu, Feng}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2012.06087}, EPRINT = {2012.06087}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present the first method for real-time full body capture that estimates shape and motion of body and hands together with a dynamic 3D face model from a single color image. Our approach uses a new neural network architecture that exploits correlations between body and hands at high computational efficiency. Unlike previous works, our approach is jointly trained on multiple datasets focusing on hand, body or face separately, without requiring data where all the parts are annotated at the same time, which is much more difficult to create at sufficient variety. The possibility of such multi-dataset training enables superior generalization ability. In contrast to earlier monocular full body methods, our approach captures more expressive 3D face geometry and color by estimating the shape, expression, albedo and illumination parameters of a statistical face model. Our method achieves competitive accuracy on public benchmarks, while being significantly faster and providing more complete face reconstructions.}, }
Endnote
%0 Report %A Zhou, Yuxiao %A Habermann, Marc %A Habibie, Ikhsanul %A Tewari, Ayush %A Theobalt, Christian %A Xu, Feng %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Monocular Real-time Full Body Capture with Inter-part Correlations : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E9FB-8 %U https://arxiv.org/abs/2012.06087 %D 2020 %X We present the first method for real-time full body capture that estimates shape and motion of body and hands together with a dynamic 3D face model from a single color image. Our approach uses a new neural network architecture that exploits correlations between body and hands at high computational efficiency. Unlike previous works, our approach is jointly trained on multiple datasets focusing on hand, body or face separately, without requiring data where all the parts are annotated at the same time, which is much more difficult to create at sufficient variety. The possibility of such multi-dataset training enables superior generalization ability. In contrast to earlier monocular full body methods, our approach captures more expressive 3D face geometry and color by estimating the shape, expression, albedo and illumination parameters of a statistical face model. Our method achieves competitive accuracy on public benchmarks, while being significantly faster and providing more complete face reconstructions. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Report
Qian, N., Wang, J., Mueller, F., Bernard, F., Golyanik, V., and Theobalt, C. 2020b. Parametric Hand Texture Model for 3D Hand Reconstruction and Personalization. Max-Planck-Institut für Informatik, Saarbrücken.
Abstract
3D hand reconstruction from image data is a widely-studied problem in com- puter vision and graphics, and has a particularly high relevance for virtual and augmented reality. Although several 3D hand reconstruction approaches leverage hand models as a strong prior to resolve ambiguities and achieve a more robust reconstruction, most existing models account only for the hand shape and poses and do not model the texture. To fill this gap, in this work we present the first parametric texture model of human hands. Our model spans several dimensions of hand appearance variability (e.g., related to gen- der, ethnicity, or age) and only requires a commodity camera for data acqui- sition. Experimentally, we demonstrate that our appearance model can be used to tackle a range of challenging problems such as 3D hand reconstruc- tion from a single monocular image. Furthermore, our appearance model can be used to define a neural rendering layer that enables training with a self-supervised photometric loss. We make our model publicly available.
Export
BibTeX
@techreport{Qian_report2020, TITLE = {Parametric Hand Texture Model for {3D} Hand Reconstruction and Personalization}, AUTHOR = {Qian, Neng and Wang, Jiayi and Mueller, Franziska and Bernard, Florian and Golyanik, Vladislav and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0946-011X}, NUMBER = {MPI-I-2020-4-001}, INSTITUTION = {Max-Planck-Institut f{\"u}r Informatik}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {3D hand reconstruction from image data is a widely-studied problem in com- puter vision and graphics, and has a particularly high relevance for virtual and augmented reality. Although several 3D hand reconstruction approaches leverage hand models as a strong prior to resolve ambiguities and achieve a more robust reconstruction, most existing models account only for the hand shape and poses and do not model the texture. To {fi}ll this gap, in this work we present the {fi}rst parametric texture model of human hands. Our model spans several dimensions of hand appearance variability (e.g., related to gen- der, ethnicity, or age) and only requires a commodity camera for data acqui- sition. Experimentally, we demonstrate that our appearance model can be used to tackle a range of challenging problems such as 3D hand reconstruc- tion from a single monocular image. Furthermore, our appearance model can be used to de{fi}ne a neural rendering layer that enables training with a self-supervised photometric loss. We make our model publicly available.}, TYPE = {Research Report}, }
Endnote
%0 Report %A Qian, Neng %A Wang, Jiayi %A Mueller, Franziska %A Bernard, Florian %A Golyanik, Vladislav %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Parametric Hand Texture Model for 3D Hand Reconstruction and Personalization : %G eng %U http://hdl.handle.net/21.11116/0000-0006-9128-9 %Y Max-Planck-Institut für Informatik %C Saarbrücken %D 2020 %P 37 p. %X 3D hand reconstruction from image data is a widely-studied problem in com- puter vision and graphics, and has a particularly high relevance for virtual and augmented reality. Although several 3D hand reconstruction approaches leverage hand models as a strong prior to resolve ambiguities and achieve a more robust reconstruction, most existing models account only for the hand shape and poses and do not model the texture. To fill this gap, in this work we present the first parametric texture model of human hands. Our model spans several dimensions of hand appearance variability (e.g., related to gen- der, ethnicity, or age) and only requires a commodity camera for data acqui- sition. Experimentally, we demonstrate that our appearance model can be used to tackle a range of challenging problems such as 3D hand reconstruc- tion from a single monocular image. Furthermore, our appearance model can be used to define a neural rendering layer that enables training with a self-supervised photometric loss. We make our model publicly available. %K hand texture model, appearance modeling, hand tracking, 3D hand recon- struction %B Research Report %@ false
Thesis
Meka, A. 2020. Live inverse rendering. .
Export
BibTeX
@phdthesis{Meka_2019, TITLE = {Live inverse rendering}, AUTHOR = {Meka, Abhimitra}, LANGUAGE = {eng}, DOI = {http://dx.doi.org/10.22028/D291-30206}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, }
Endnote
%0 Thesis %A Meka, Abhimitra %Y Theobalt, Christian %A referee: Drettakis, George %+ Computer Graphics, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Live inverse rendering : %G eng %U http://hdl.handle.net/21.11116/0000-0007-715A-5 %R http://dx.doi.org/10.22028/D291-30206 %I Universität des Saarlandes %C Saarbrücken %D 2020 %P 189 p. %V phd %9 phd %U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/28721
Mueller, F. 2020. Real-time 3D Hand Reconstruction in Challenging Scenes from a Single Color or Depth Camera. .
Export
BibTeX
@phdthesis{MuellerFDiss_2020, TITLE = {Real-time 3{D} Hand Reconstruction in Challenging Scenes from a Single Color or Depth Camera}, AUTHOR = {Mueller, Franziska}, LANGUAGE = {eng}, DOI = {10.22028/D291-32846}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, }
Endnote
%0 Thesis %A Mueller, Franziska %Y Theobalt, Christian %A referee: Seidel, Hans-Peter %A referee: Izadi, Shahram %+ Computer Graphics, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Real-time 3D Hand Reconstruction in Challenging Scenes from a Single Color or Depth Camera : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D8C7-5 %R 10.22028/D291-32846 %I Universität des Saarlandes %C Saarbrücken %D 2020 %P 155 p. %V phd %9 phd %U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/30313