D4
Computer Graphics

Publications

2021
Ali, J., Lahoti, P., and Gummadi, K.P. Accounting for Model Uncertainty in Algorithmic Discrimination. Fourth AAAI/ACM Conference on Artificial Intelligence, Ethics and Society.
(Accepted/in press)
Export
BibTeX
@inproceedings{Ali_AIES2021, TITLE = {Accounting for Model Uncertainty in Algorithmic Discrimination}, AUTHOR = {Ali, Junaid and Lahoti, Preethi and Gummadi, Krishna P.}, LANGUAGE = {eng}, YEAR = {2021}, PUBLREMARK = {Accepted}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Fourth AAAI/ACM Conference on Artificial Intelligence, Ethics and Society}, ADDRESS = {Virtual Conference}, }
Endnote
%0 Conference Proceedings %A Ali, Junaid %A Lahoti, Preethi %A Gummadi, Krishna P. %+ Computer Graphics, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Group K. Gummadi, Max Planck Institute for Software Systems, Max Planck Society %T Accounting for Model Uncertainty in Algorithmic Discrimination : %G eng %U http://hdl.handle.net/21.11116/0000-0008-72E3-7 %D 2021 %B Fourth AAAI/ACM Conference on Artificial Intelligence, Ethics and Society %Z date of event: 2021-05-19 - 2021-05-21 %C Virtual Conference %B Fourth AAAI/ACM Conference on Artificial Intelligence, Ethics and Society
Chen, B., Wang, C., Piovarči, M., et al. 2021. The Effect of Geometry and Illumination on Appearance Perception of Different Material Categories. The Visual Computer.
Export
BibTeX
@article{Chen2021, TITLE = {The Effect of Geometry and Illumination on Appearance Perception of Different Material Categories}, AUTHOR = {Chen, Bin and Wang, Chao and Piovar{\v c}i, Michal and Seidel, Hans-Peter and Didyk, Piotr and Myszkowski, Karol and Serrano, Ana}, LANGUAGE = {eng}, ISSN = {0178-2789}, DOI = {10.1007/s00371-021-02227-x}, PUBLISHER = {Springer}, ADDRESS = {Berlin}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, JOURNAL = {The Visual Computer}, }
Endnote
%0 Journal Article %A Chen, Bin %A Wang, Chao %A Piovarči, Michal %A Seidel, Hans-Peter %A Didyk, Piotr %A Myszkowski, Karol %A Serrano, Ana %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T The Effect of Geometry and Illumination on Appearance Perception of Different Material Categories : %G eng %U http://hdl.handle.net/21.11116/0000-0008-F05C-2 %R 10.1007/s00371-021-02227-x %7 2021 %D 2021 %J The Visual Computer %I Springer %C Berlin %@ false
Delanoy, J., Serrano, A., Masia, B., and Gutierrez, D. 2021. Perception of Material Appearance: A Comparison between Painted and Rendered Images. Journal of Vision21, 5.
Export
BibTeX
@article{Delanoy2021, TITLE = {Perception of Material Appearance: {A} Comparison between Painted and Rendered Images}, AUTHOR = {Delanoy, Johanna and Serrano, Ana and Masia, Belen and Gutierrez, Diego}, LANGUAGE = {eng}, ISSN = {1534-7362}, DOI = {10.1167/jov.21.5.16}, PUBLISHER = {Scholar One, Inc.}, ADDRESS = {Charlottesville, VA}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, JOURNAL = {Journal of Vision}, VOLUME = {21}, NUMBER = {5}, EID = {16}, }
Endnote
%0 Journal Article %A Delanoy, Johanna %A Serrano, Ana %A Masia, Belen %A Gutierrez, Diego %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Perception of Material Appearance: A Comparison between Painted and Rendered Images : %G eng %U http://hdl.handle.net/21.11116/0000-0008-A6CC-7 %R 10.1167/jov.21.5.16 %2 PMC8131993 %7 2021 %D 2021 %J Journal of Vision %V 21 %N 5 %Z sequence number: 16 %I Scholar One, Inc. %C Charlottesville, VA %@ false
Elek, O., Zhang, R., Sumin, D., et al. 2021. Robust and Practical Measurement of Volume Transport Parameters in Solid Photo-polymer Materials for 3D Printing. Optics Express29, 5.
Export
BibTeX
@article{Elek2021, TITLE = {Robust and Practical Measurement of Volume Transport Parameters in Solid Photo-polymer Materials for {3D} Printing}, AUTHOR = {Elek, Oskar and Zhang, Ran and Sumin, Denis and Myszkowski, Karol and Bickel, Bernd and Wilkie, Alexander and Krivanek, Jaroslav and Weyrich, Tim}, LANGUAGE = {eng}, ISSN = {1094-4087}, DOI = {10.1364/OE.406095}, PUBLISHER = {Optical Society of America}, ADDRESS = {Washington, DC}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, JOURNAL = {Optics Express}, VOLUME = {29}, NUMBER = {5}, PAGES = {7568--7588}, }
Endnote
%0 Journal Article %A Elek, Oskar %A Zhang, Ran %A Sumin, Denis %A Myszkowski, Karol %A Bickel, Bernd %A Wilkie, Alexander %A Krivanek, Jaroslav %A Weyrich, Tim %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations %T Robust and Practical Measurement of Volume Transport Parameters in Solid Photo-polymer Materials for 3D Printing : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E013-6 %R 10.1364/OE.406095 %7 2021 %D 2021 %J Optics Express %O Opt. Express %V 29 %N 5 %& 7568 %P 7568 - 7588 %I Optical Society of America %C Washington, DC %@ false
Fox, G., Liu, W., Kim, H., Seidel, H.-P., Elgharib, M., and Theobalt, C. 2021. VideoForensicsHQ: Detecting High-quality Manipulated Face Videos. IEEE International Conference on Multimedia and Expo (ICME 2021), IEEE.
Export
BibTeX
@inproceedings{Fox_ICME2021, TITLE = {{Video\-Foren\-sics\-HQ}: {D}etecting High-quality Manipulated Face Videos}, AUTHOR = {Fox, Gereon and Liu, Wentao and Kim, Hyeongwoo and Seidel, Hans-Peter and Elgharib, Mohamed and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-6654-3864-3}, DOI = {10.1109/ICME51207.2021.9428101}, PUBLISHER = {IEEE}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE International Conference on Multimedia and Expo (ICME 2021)}, ADDRESS = {Shenzhen, China (Virtual)}, }
Endnote
%0 Conference Proceedings %A Fox, Gereon %A Liu, Wentao %A Kim, Hyeongwoo %A Seidel, Hans-Peter %A Elgharib, Mohamed %A Theobalt, Christian %+ Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society %T VideoForensicsHQ: Detecting High-quality Manipulated Face Videos : %G eng %U http://hdl.handle.net/21.11116/0000-0008-88DF-4 %R 10.1109/ICME51207.2021.9428101 %D 2021 %B 22nd IEEE International Conference on Multimedia and Expo %Z date of event: 2021-07-05 - 2021-07-07 %C Shenzhen, China (Virtual) %B IEEE International Conference on Multimedia and Expo %I IEEE %@ 978-1-6654-3864-3 %U http://gvv.mpi-inf.mpg.de/projects/VForensicsHQ/
Hladký, J., Seidel, H.-P., and Steinberger, M. 2021. SnakeBinning: Efficient Temporally Coherent Triangle Packing for Shading Streaming. Computer Graphics Forum (Proc. EUROGRAPHICS 2021)40, 2.
Export
BibTeX
@article{10.1111:cgf.142648, TITLE = {{SnakeBinning}: {E}fficient Temporally Coherent Triangle Packing for Shading Streaming}, AUTHOR = {Hladk{\'y}, Jozef and Seidel, Hans-Peter and Steinberger, Markus}, LANGUAGE = {eng}, ISSN = {0167-7055}, DOI = {10.1111/cgf.142648}, PUBLISHER = {Blackwell-Wiley}, ADDRESS = {Oxford}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, JOURNAL = {Computer Graphics Forum (Proc. EUROGRAPHICS)}, VOLUME = {40}, NUMBER = {2}, BOOKTITLE = {42nd Annual Conference of the European Association for Computer Graphics (EUROGRAPHICS 2021)}, EDITOR = {Mitra, Niloy and Viola, Ivan}, }
Endnote
%0 Journal Article %A Hladký, Jozef %A Seidel, Hans-Peter %A Steinberger, Markus %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T SnakeBinning: Efficient Temporally Coherent Triangle Packing for Shading Streaming : %G eng %U http://hdl.handle.net/21.11116/0000-0008-7AFD-3 %R 10.1111/cgf.142648 %7 2021 %D 2021 %J Computer Graphics Forum %O Computer Graphics Forum : journal of the European Association for Computer Graphics Comput. Graph. Forum %V 40 %N 2 %I Blackwell-Wiley %C Oxford %@ false %B 42nd Annual Conference of the European Association for Computer Graphics %O EUROGRAPHICS 2021 EG 2021
Kappel, M., Golyanik, V., Elgharib, M., et al. High-Fidelity Neural Human Motion Transfer from Monocular Video Computer Vision and Pattern Recognition. IEEE/CVF Conference on Computer Vision and Pattern Recognition (Oral) (CVPR 2021), IEEE.
(Accepted/in press)
Export
BibTeX
@inproceedings{Kappel_CVPR2021, TITLE = {High-Fidelity Neural Human Motion Transfer from Monocular Video Computer Vision and Pattern Recognition}, AUTHOR = {Kappel, Moritz and Golyanik, Vladislav and Elgharib, Mohamed and Henningson, Jann-Ole and Seidel, Hans-Peter and Castillo, Susana and Theobalt, Christian and Magnor, Marcus A.}, LANGUAGE = {eng}, PUBLISHER = {IEEE}, YEAR = {2021}, PUBLREMARK = {Accepted}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (Oral) (CVPR 2021)}, ADDRESS = {Virtual Conference}, }
Endnote
%0 Conference Proceedings %A Kappel, Moritz %A Golyanik, Vladislav %A Elgharib, Mohamed %A Henningson, Jann-Ole %A Seidel, Hans-Peter %A Castillo, Susana %A Theobalt, Christian %A Magnor, Marcus A. %+ External Organizations Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society External Organizations %T High-Fidelity Neural Human Motion Transfer from Monocular Video Computer Vision and Pattern Recognition : %G eng %U http://hdl.handle.net/21.11116/0000-0008-8947-E %D 2021 %B 34th IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2021-06-19 - 2021-06-25 %C Virtual Conference %B IEEE/CVF Conference on Computer Vision and Pattern Recognition (Oral) %I IEEE %U https://gvv.mpi-inf.mpg.de/projects/NHMT/
Lagunas, M., Serrano, A., Gutierrez, D., and Masia, B. 2021. The Joint Role of Geometry and Illumination on Material Recognition. Journal of Vision21, 2.
Export
BibTeX
@article{Lagunas2021_MatRecog, TITLE = {The Joint Role of Geometry and Illumination on Material Recognition}, AUTHOR = {Lagunas, Manuel and Serrano, Ana and Gutierrez, Diego and Masia, Belen}, LANGUAGE = {eng}, ISSN = {1534-7362}, DOI = {10.1167/jov.21.2.2}, PUBLISHER = {Scholar One, Inc.}, ADDRESS = {Charlottesville, VA}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, JOURNAL = {Journal of Vision}, VOLUME = {21}, NUMBER = {2}, PAGES = {1--18}, }
Endnote
%0 Journal Article %A Lagunas, Manuel %A Serrano, Ana %A Gutierrez, Diego %A Masia, Belen %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations %T The Joint Role of Geometry and Illumination on Material Recognition : %G eng %U http://hdl.handle.net/21.11116/0000-0007-EAF9-9 %R 10.1167/jov.21.2.2 %7 2021 %D 2021 %J Journal of Vision %V 21 %N 2 %& 1 %P 1 - 18 %I Scholar One, Inc. %C Charlottesville, VA %@ false
Mallikarjun B R, Tewari, A., Seidel, H.-P., Elgharib, M., and Theobalt, C. Learning Complete 3D Morphable Face Models from Images and Videos. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2021), IEEE.
(Accepted/in press)
Export
BibTeX
@inproceedings{Mallikarjun_CVPR2021b, TITLE = {Learning Complete {3D} Morphable Face Models from Images and Videos}, AUTHOR = {Mallikarjun B R, and Tewari, Ayush and Seidel, Hans-Peter and Elgharib, Mohamed and Theobalt, Christian}, LANGUAGE = {eng}, PUBLISHER = {IEEE}, YEAR = {2021}, PUBLREMARK = {Accepted}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2021)}, ADDRESS = {Virtual Conference}, }
Endnote
%0 Conference Proceedings %A Mallikarjun B R, %A Tewari, Ayush %A Seidel, Hans-Peter %A Elgharib, Mohamed %A Theobalt, Christian %+ Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society %T Learning Complete 3D Morphable Face Models from Images and Videos : %G eng %U http://hdl.handle.net/21.11116/0000-0008-8926-3 %D 2021 %B 34th IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2021-06-19 - 2021-06-25 %C Virtual Conference %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %I IEEE %U https://gvv.mpi-inf.mpg.de/projects/LeMoMo/
Mallikarjun B R, Tewari, A., Oh, T.-H., et al. Monocular Reconstruction of Neural Face Reflectance Fields. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2021), IEEE.
(Accepted/in press)
Export
BibTeX
@inproceedings{Mallikarjun_CVPR2021, TITLE = {Monocular Reconstruction of Neural Face Reflectance Fields}, AUTHOR = {Mallikarjun B R, and Tewari, Ayush and Oh, Tae-Hyun and Weyrich, Tim and Bickel, Bernd and Seidel, Hans-Peter and Pfister, Hanspeter and Matusik, Wojciech and Elgharib, Mohamed and Theobalt, Christian}, LANGUAGE = {eng}, PUBLISHER = {IEEE}, YEAR = {2021}, PUBLREMARK = {Accepted}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2021)}, ADDRESS = {Virtual Conference}, }
Endnote
%0 Conference Proceedings %A Mallikarjun B R, %A Tewari, Ayush %A Oh, Tae-Hyun %A Weyrich, Tim %A Bickel, Bernd %A Seidel, Hans-Peter %A Pfister, Hanspeter %A Matusik, Wojciech %A Elgharib, Mohamed %A Theobalt, Christian %+ Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society %T Monocular Reconstruction of Neural Face Reflectance Fields : %G eng %U http://hdl.handle.net/21.11116/0000-0008-88FB-4 %D 2021 %B 34th IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2021-06-19 - 2021-06-25 %C Virtual Conference %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %I IEEE %U https://gvv.mpi-inf.mpg.de/projects/FaceReflectanceFields/
Mallikarjun B R, Tewari, A., Dib, A., et al. 2021. PhotoApp: Photorealistic Appearance Editing of Head Portraits. ACM Transactions on Graphics (Proc. ACM SIGGRAPH 2021)40, 4.
Export
BibTeX
@article{MallikarjunBR2021, TITLE = {{PhotoApp}: {P}hotorealistic Appearance Editing of Head Portraits}, AUTHOR = {Mallikarjun B R, and Tewari, Ayush and Dib, Abdallah and Weyrich, Tim and Bickel, Bernd and Seidel, Hans-Peter and Pfister, Hanspeter and Matusik, Wojciech and Chevallier, Louis and Elgharib, Mohamed and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3450626.3459765}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH)}, VOLUME = {40}, NUMBER = {4}, PAGES = {1--16}, EID = {44}, BOOKTITLE = {Proceedings of ACM SIGGRAPH 2021}, }
Endnote
%0 Journal Article %A Mallikarjun B R, %A Tewari, Ayush %A Dib, Abdallah %A Weyrich, Tim %A Bickel, Bernd %A Seidel, Hans-Peter %A Pfister, Hanspeter %A Matusik, Wojciech %A Chevallier, Louis %A Elgharib, Mohamed %A Theobalt, Christian %+ Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society %T PhotoApp: Photorealistic Appearance Editing of Head Portraits : %G eng %U http://hdl.handle.net/21.11116/0000-0009-2A9B-A %R 10.1145/3450626.3459765 %7 2021 %D 2021 %J ACM Transactions on Graphics %V 40 %N 4 %& 1 %P 1 - 16 %Z sequence number: 44 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH 2021 %O ACM SIGGRAPH 2021
Martin, D., Malpica, S., Gutierrez, D., Masia, B., and Serrano, A. 2021. Multimodality in VR: A Survey. https://arxiv.org/abs/2101.07906.
(arXiv: 2101.07906)
Abstract
Virtual reality has the potential to change the way we create and consume content in our everyday life. Entertainment, training, design and manufacturing, communication, or advertising are all applications that already benefit from this new medium reaching consumer level. VR is inherently different from traditional media: it offers a more immersive experience, and has the ability to elicit a sense of presence through the place and plausibility illusions. It also gives the user unprecedented capabilities to explore their environment, in contrast with traditional media. In VR, like in the real world, users integrate the multimodal sensory information they receive to create a unified perception of the virtual world. Therefore, the sensory cues that are available in a virtual environment can be leveraged to enhance the final experience. This may include increasing realism, or the sense of presence; predicting or guiding the attention of the user through the experience; or increasing their performance if the experience involves the completion of certain tasks. In this state-of-the-art report, we survey the body of work addressing multimodality in virtual reality, its role and benefits in the final user experience. The works here reviewed thus encompass several fields of research, including computer graphics, human computer interaction, or psychology and perception. Additionally, we give an overview of different applications that leverage multimodal input in areas such as medicine, training and education, or entertainment; we include works in which the integration of multiple sensory information yields significant improvements, demonstrating how multimodality can play a fundamental role in the way VR systems are designed, and VR experiences created and consumed.
Export
BibTeX
@online{Martin2021_VRsurvey, TITLE = {Multimodality in {VR}: {A} Survey}, AUTHOR = {Martin, Daniel and Malpica, Sandra and Gutierrez, Diego and Masia, Belen and Serrano, Ana}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2101.07906}, EPRINT = {2101.07906}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Virtual reality has the potential to change the way we create and consume content in our everyday life. Entertainment, training, design and manufacturing, communication, or advertising are all applications that already benefit from this new medium reaching consumer level. VR is inherently different from traditional media: it offers a more immersive experience, and has the ability to elicit a sense of presence through the place and plausibility illusions. It also gives the user unprecedented capabilities to explore their environment, in contrast with traditional media. In VR, like in the real world, users integrate the multimodal sensory information they receive to create a unified perception of the virtual world. Therefore, the sensory cues that are available in a virtual environment can be leveraged to enhance the final experience. This may include increasing realism, or the sense of presence; predicting or guiding the attention of the user through the experience; or increasing their performance if the experience involves the completion of certain tasks. In this state-of-the-art report, we survey the body of work addressing multimodality in virtual reality, its role and benefits in the final user experience. The works here reviewed thus encompass several fields of research, including computer graphics, human computer interaction, or psychology and perception. Additionally, we give an overview of different applications that leverage multimodal input in areas such as medicine, training and education, or entertainment; we include works in which the integration of multiple sensory information yields significant improvements, demonstrating how multimodality can play a fundamental role in the way VR systems are designed, and VR experiences created and consumed.}, }
Endnote
%0 Report %A Martin, Daniel %A Malpica, Sandra %A Gutierrez, Diego %A Masia, Belen %A Serrano, Ana %+ External Organizations External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Multimodality in VR: A Survey : %G eng %U http://hdl.handle.net/21.11116/0000-0007-EB00-0 %U https://arxiv.org/abs/2101.07906 %D 2021 %X Virtual reality has the potential to change the way we create and consume content in our everyday life. Entertainment, training, design and manufacturing, communication, or advertising are all applications that already benefit from this new medium reaching consumer level. VR is inherently different from traditional media: it offers a more immersive experience, and has the ability to elicit a sense of presence through the place and plausibility illusions. It also gives the user unprecedented capabilities to explore their environment, in contrast with traditional media. In VR, like in the real world, users integrate the multimodal sensory information they receive to create a unified perception of the virtual world. Therefore, the sensory cues that are available in a virtual environment can be leveraged to enhance the final experience. This may include increasing realism, or the sense of presence; predicting or guiding the attention of the user through the experience; or increasing their performance if the experience involves the completion of certain tasks. In this state-of-the-art report, we survey the body of work addressing multimodality in virtual reality, its role and benefits in the final user experience. The works here reviewed thus encompass several fields of research, including computer graphics, human computer interaction, or psychology and perception. Additionally, we give an overview of different applications that leverage multimodal input in areas such as medicine, training and education, or entertainment; we include works in which the integration of multiple sensory information yields significant improvements, demonstrating how multimodality can play a fundamental role in the way VR systems are designed, and VR experiences created and consumed. %K Computer Science, Human-Computer Interaction, cs.HC,Computer Science, Graphics, cs.GR
Masia, B., Camon, J., Gutierrez,, D., and Serrano, A. 2021. Influence of Directional Sound Cues on Users’ Exploration Across 360° Movie Cuts. IEEE Computer Graphics and Applications41, 4.
Export
BibTeX
@article{Masia2021, TITLE = {Influence of Directional Sound Cues on Users' Exploration Across 360 {\degrees} Movie Cuts}, AUTHOR = {Masia, Belen and Camon, Javier and Gutierrez,, Diego and Serrano, Ana}, LANGUAGE = {eng}, ISSN = {0272-1716}, DOI = {10.1109/MCG.2021.3064688}, PUBLISHER = {IEEE}, ADDRESS = {Piscataway, NJ}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, JOURNAL = {IEEE Computer Graphics and Applications}, VOLUME = {41}, NUMBER = {4}, PAGES = {64--75}, }
Endnote
%0 Journal Article %A Masia, Belen %A Camon, Javier %A Gutierrez,, Diego %A Serrano, Ana %+ External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Influence of Directional Sound Cues on Users' Exploration Across 360° Movie Cuts : %G eng %U http://hdl.handle.net/21.11116/0000-0008-F077-3 %R 10.1109/MCG.2021.3064688 %7 2021 %D 2021 %J IEEE Computer Graphics and Applications %V 41 %N 4 %& 64 %P 64 - 75 %I IEEE %C Piscataway, NJ %@ false
Meka, A., Shafiei, M., Zollhöfer, M., Richardt, C., and Theobalt, C. Real-time Global Illumination Decomposition of Videos. ACM Transactions on Graphics.
(Accepted/in press)
Export
BibTeX
@article{Meka:2021, TITLE = {Real-time Global Illumination Decomposition of Videos}, AUTHOR = {Meka, Abhimitra and Shafiei, Mohammad and Zollh{\"o}fer, Michael and Richardt, Christian and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3374753}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2021}, PUBLREMARK = {Accepted}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics}, }
Endnote
%0 Journal Article %A Meka, Abhimitra %A Shafiei, Mohammad %A Zollhöfer, Michael %A Richardt, Christian %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society %T Real-time Global Illumination Decomposition of Videos : %G eng %U http://hdl.handle.net/21.11116/0000-0007-EE07-6 %R 10.1145/3374753 %D 2021 %J ACM Transactions on Graphics %I ACM %C New York, NY %@ false %U http://gvv.mpi-inf.mpg.de/projects/LiveIlluminationDecomposition/
Nehvi, J., Golyanik, V., Mueller, F., Seidel, H.-P., Elgharib, M., and Theobalt, C. Differentiable Event Stream Simulator for Non-Rigid 3D Tracking. Third International Workshop on Event-Based Vision (CVPR 2021).
(Accepted/in press)
Export
BibTeX
@inproceedings{Nehvi_CVPR2021Workshop, TITLE = {Differentiable Event Stream Simulator for Non-Rigid {3D} Tracking}, AUTHOR = {Nehvi, Jalees and Golyanik, Vladislav and Mueller, Franziska and Seidel, Hans-Peter and Elgharib, Mohamed and Theobalt, Christian}, LANGUAGE = {eng}, YEAR = {2021}, PUBLREMARK = {Accepted}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Third International Workshop on Event-Based Vision (CVPR 2021)}, ADDRESS = {Virtual Workshop}, }
Endnote
%0 Conference Proceedings %A Nehvi, Jalees %A Golyanik, Vladislav %A Mueller, Franziska %A Seidel, Hans-Peter %A Elgharib, Mohamed %A Theobalt, Christian %+ Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society %T Differentiable Event Stream Simulator for Non-Rigid 3D Tracking : %G eng %U http://hdl.handle.net/21.11116/0000-0008-8957-C %D 2021 %B Third International Workshop on Event-Based Vision %Z date of event: 2021-06-19 - 2021-06-19 %C Virtual Workshop %B Third International Workshop on Event-Based Vision %U https://tub-rip.github.io/eventvision2021/papers/2021CVPRW_Differentiable_Event_Stream_Simulator_for_Non-Rigid_3D_Tracking.pdfhttps://gvv.mpi-inf.mpg.de/projects/Event-based_Non-rigid_3D_Tracking/
Rao, S., Stutz, D., and Schiele, B. 2021. Adversarial Training Against Location-Optimized Adversarial Patches. Computer Vision -- ECCV Workshops 2020, Springer.
Export
BibTeX
@inproceedings{DBLP:conf/eccv/RaoSS20, TITLE = {Adversarial Training Against Location-Optimized Adversarial Patches}, AUTHOR = {Rao, Sukrut and Stutz, David and Schiele, Bernt}, LANGUAGE = {eng}, ISBN = {978-3-030-68237-8}, DOI = {10.1007/978-3-030-68238-5_32}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2021}, BOOKTITLE = {Computer Vision -- ECCV Workshops 2020}, EDITOR = {Bartoli, Adrian and Fusiello, Andrea}, PAGES = {429--448}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12539}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Rao, Sukrut %A Stutz, David %A Schiele, Bernt %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society %T Adversarial Training Against Location-Optimized Adversarial Patches : %G eng %U http://hdl.handle.net/21.11116/0000-0008-1662-1 %R 10.1007/978-3-030-68238-5_32 %D 2021 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV Workshops 2020 %E Bartoli, Adrian; Fusiello, Andrea %P 429 - 448 %I Springer %@ 978-3-030-68237-8 %B Lecture Notes in Computer Science %N 12539
Rittig, T., Sumin, D., Babaei, V., et al. 2021. Neural Acceleration of Scattering-Aware Color 3D Printing. Computer Graphics Forum (Proc. EUROGRAPHICS 2021)40, 2.
Export
BibTeX
@article{rittig2021neural, TITLE = {Neural Acceleration of Scattering-Aware Color {3D} Printing}, AUTHOR = {Rittig, Tobias and Sumin, Denis and Babaei, Vahid and Didyk, Piotr and Voloboy, Alexei and Wilkie, Alexander and Bickel, Bernd and Myszkowski, Karol and Weyrich, Tim and Krivanek, Jaroslav}, LANGUAGE = {eng}, ISSN = {0167-7055}, DOI = {10.1111/cgf.142626}, PUBLISHER = {Blackwell-Wiley}, ADDRESS = {Oxford}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, JOURNAL = {Computer Graphics Forum (Proc. EUROGRAPHICS)}, EDITOR = {Mitra, Niloy and Violoa, Ivan}, VOLUME = {40}, NUMBER = {2}, BOOKTITLE = {42nd Annual Conference of the European Association for Computer Graphics (EUROGRAPHICS 2021)}, }
Endnote
%0 Journal Article %A Rittig, Tobias %A Sumin, Denis %A Babaei, Vahid %A Didyk, Piotr %A Voloboy, Alexei %A Wilkie, Alexander %A Bickel, Bernd %A Myszkowski, Karol %A Weyrich, Tim %A Krivanek, Jaroslav %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Neural Acceleration of Scattering-Aware Color 3D Printing : %G eng %U http://hdl.handle.net/21.11116/0000-0007-F073-8 %R 10.1111/cgf.142626 %7 2021 %D 2021 %J Computer Graphics Forum %O Computer Graphics Forum : journal of the European Association for Computer Graphics Comput. Graph. Forum %V 40 %N 2 %I Blackwell-Wiley %C Oxford %@ false %B 42nd Annual Conference of the European Association for Computer Graphics %O EUROGRAPHICS 2021 EG 2021
Sarkar, K., Mehta, D., Xu, W., Golyanik, V., and Theobalt, C. 2021. Neural Re-Rendering of Humans from a Single Image. https://arxiv.org/abs/2101.04104.
(arXiv: 2101.04104)
Abstract
Human re-rendering from a single image is a starkly under-constrained problem, and state-of-the-art algorithms often exhibit undesired artefacts, such as over-smoothing, unrealistic distortions of the body parts and garments, or implausible changes of the texture. To address these challenges, we propose a new method for neural re-rendering of a human under a novel user-defined pose and viewpoint, given one input image. Our algorithm represents body pose and shape as a parametric mesh which can be reconstructed from a single image and easily reposed. Instead of a colour-based UV texture map, our approach further employs a learned high-dimensional UV feature map to encode appearance. This rich implicit representation captures detailed appearance variation across poses, viewpoints, person identities and clothing styles better than learned colour texture maps. The body model with the rendered feature maps is fed through a neural image-translation network that creates the final rendered colour image. The above components are combined in an end-to-end-trained neural network architecture that takes as input a source person image, and images of the parametric body model in the source pose and desired target pose. Experimental evaluation demonstrates that our approach produces higher quality single image re-rendering results than existing methods.
Export
BibTeX
@online{Sarkar_arXiv2101.04104, TITLE = {Neural Re-Rendering of Humans from a Single Image}, AUTHOR = {Sarkar, Kripasindhu and Mehta, Dushyant and Xu, Weipeng and Golyanik, Vladislav and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2101.04104}, EPRINT = {2101.04104}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Human re-rendering from a single image is a starkly under-constrained problem, and state-of-the-art algorithms often exhibit undesired artefacts, such as over-smoothing, unrealistic distortions of the body parts and garments, or implausible changes of the texture. To address these challenges, we propose a new method for neural re-rendering of a human under a novel user-defined pose and viewpoint, given one input image. Our algorithm represents body pose and shape as a parametric mesh which can be reconstructed from a single image and easily reposed. Instead of a colour-based UV texture map, our approach further employs a learned high-dimensional UV feature map to encode appearance. This rich implicit representation captures detailed appearance variation across poses, viewpoints, person identities and clothing styles better than learned colour texture maps. The body model with the rendered feature maps is fed through a neural image-translation network that creates the final rendered colour image. The above components are combined in an end-to-end-trained neural network architecture that takes as input a source person image, and images of the parametric body model in the source pose and desired target pose. Experimental evaluation demonstrates that our approach produces higher quality single image re-rendering results than existing methods.}, }
Endnote
%0 Report %A Sarkar, Kripasindhu %A Mehta, Dushyant %A Xu, Weipeng %A Golyanik, Vladislav %A Theobalt, Christian %+ Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society %T Neural Re-Rendering of Humans from a Single Image : %G eng %U http://hdl.handle.net/21.11116/0000-0007-CF05-B %U https://arxiv.org/abs/2101.04104 %D 2021 %X Human re-rendering from a single image is a starkly under-constrained problem, and state-of-the-art algorithms often exhibit undesired artefacts, such as over-smoothing, unrealistic distortions of the body parts and garments, or implausible changes of the texture. To address these challenges, we propose a new method for neural re-rendering of a human under a novel user-defined pose and viewpoint, given one input image. Our algorithm represents body pose and shape as a parametric mesh which can be reconstructed from a single image and easily reposed. Instead of a colour-based UV texture map, our approach further employs a learned high-dimensional UV feature map to encode appearance. This rich implicit representation captures detailed appearance variation across poses, viewpoints, person identities and clothing styles better than learned colour texture maps. The body model with the rendered feature maps is fed through a neural image-translation network that creates the final rendered colour image. The above components are combined in an end-to-end-trained neural network architecture that takes as input a source person image, and images of the parametric body model in the source pose and desired target pose. Experimental evaluation demonstrates that our approach produces higher quality single image re-rendering results than existing methods. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Serrano, A., Chen, B., Wang, C., et al. 2021. The Effect of Shape and Illumination on Material Perception. ACM Transactions on Graphics (Proc. SIGG 2021)40, 4.
Export
BibTeX
@article{SIGG2021_Materials, TITLE = {The Effect of Shape and Illumination on Material Perception}, AUTHOR = {Serrano, Ana and Chen, Bin and Wang, Chao and Piovar{\v c}i, Michal and Seidel, Hans-Peter and Didyk, Piotr and Myszkowski, Karol}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3450626.3459813}, PUBLISHER = {Association for Computing Machinery}, ADDRESS = {New York, NY}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, JOURNAL = {ACM Transactions on Graphics (Proc. SIGG)}, VOLUME = {40}, NUMBER = {4}, PAGES = {1--16}, EID = {125}, BOOKTITLE = {SIGGRAPH 2021}, }
Endnote
%0 Journal Article %A Serrano, Ana %A Chen, Bin %A Wang, Chao %A Piovarči, Michal %A Seidel, Hans-Peter %A Didyk, Piotr %A Myszkowski, Karol %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T The Effect of Shape and Illumination on Material Perception : Model and Applications %G eng %U http://hdl.handle.net/21.11116/0000-0009-0565-0 %R 10.1145/3450626.3459813 %7 2021 %D 2021 %J ACM Transactions on Graphics %V 40 %N 4 %& 1 %P 1 - 16 %Z sequence number: 125 %I Association for Computing Machinery %C New York, NY %@ false %B SIGGRAPH 2021 %O SIGG 2021
Van Onzenoodt, C., Singh, G., Ropinski, T., and Ritschel, T. 2021a. Blue Noise Plots. Computer Graphics Forum (Proc. EUROGRAPHICS 2021)40, 2.
Export
BibTeX
@article{onzenoodt2021blue, TITLE = {Blue Noise Plots}, AUTHOR = {van Onzenoodt, Christian and Singh, Gurprit and Ropinski, Timo and Ritschel, Tobias}, LANGUAGE = {eng}, ISSN = {0167-7055}, DOI = {10.1111/cgf.142644}, PUBLISHER = {Blackwell-Wiley}, ADDRESS = {Oxford}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, JOURNAL = {Computer Graphics Forum (Proc. EUROGRAPHICS)}, VOLUME = {40}, NUMBER = {2}, BOOKTITLE = {42nd Annual Conference of the European Association for Computer Graphics (EUROGRAPHICS 2021)}, EDITOR = {Mitra, Niloy and Viola, Ivan}, }
Endnote
%0 Journal Article %A van Onzenoodt, Christian %A Singh, Gurprit %A Ropinski, Timo %A Ritschel, Tobias %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Blue Noise Plots : %G eng %U http://hdl.handle.net/21.11116/0000-0008-010F-7 %R 10.1111/cgf.142644 %7 2021 %D 2021 %J Computer Graphics Forum %O Computer Graphics Forum : journal of the European Association for Computer Graphics Comput. Graph. Forum %V 40 %N 2 %I Blackwell-Wiley %C Oxford %@ false %B 42nd Annual Conference of the European Association for Computer Graphics %O EUROGRAPHICS 2021 EG 2021
Van Onzenoodt, C., Singh, G., Ropinski, T., and Ritschel, T. 2021b. Blue Noise Plots. https://arxiv.org/abs/2102.04072.
(arXiv: 2102.04072)
Abstract
We propose Blue Noise Plots, two-dimensional dot plots that depict data points of univariate data sets. While often one-dimensional strip plots are used to depict such data, one of their main problems is visual clutter which results from overlap. To reduce this overlap, jitter plots were introduced, whereby an additional, non-encoding plot dimension is introduced, along which the data point representing dots are randomly perturbed. Unfortunately, this randomness can suggest non-existent clusters, and often leads to visually unappealing plots, in which overlap might still occur. To overcome these shortcomings, we introduce BlueNoise Plots where random jitter along the non-encoding plot dimension is replaced by optimizing all dots to keep a minimum distance in 2D i. e., Blue Noise. We evaluate the effectiveness as well as the aesthetics of Blue Noise Plots through both, a quantitative and a qualitative user study.
Export
BibTeX
@online{Onzenoodt_2102.04072, TITLE = {Blue Noise Plots}, AUTHOR = {van Onzenoodt, Christian and Singh, Gurprit and Ropinski, Timo and Ritschel, Tobias}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2102.04072}, EPRINT = {2102.04072}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We propose Blue Noise Plots, two-dimensional dot plots that depict data points of univariate data sets. While often one-dimensional strip plots are used to depict such data, one of their main problems is visual clutter which results from overlap. To reduce this overlap, jitter plots were introduced, whereby an additional, non-encoding plot dimension is introduced, along which the data point representing dots are randomly perturbed. Unfortunately, this randomness can suggest non-existent clusters, and often leads to visually unappealing plots, in which overlap might still occur. To overcome these shortcomings, we introduce BlueNoise Plots where random jitter along the non-encoding plot dimension is replaced by optimizing all dots to keep a minimum distance in 2D i. e., Blue Noise. We evaluate the effectiveness as well as the aesthetics of Blue Noise Plots through both, a quantitative and a qualitative user study.}, }
Endnote
%0 Report %A van Onzenoodt, Christian %A Singh, Gurprit %A Ropinski, Timo %A Ritschel, Tobias %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Blue Noise Plots : %G eng %U http://hdl.handle.net/21.11116/0000-0008-01ED-C %U https://arxiv.org/abs/2102.04072 %D 2021 %X We propose Blue Noise Plots, two-dimensional dot plots that depict data points of univariate data sets. While often one-dimensional strip plots are used to depict such data, one of their main problems is visual clutter which results from overlap. To reduce this overlap, jitter plots were introduced, whereby an additional, non-encoding plot dimension is introduced, along which the data point representing dots are randomly perturbed. Unfortunately, this randomness can suggest non-existent clusters, and often leads to visually unappealing plots, in which overlap might still occur. To overcome these shortcomings, we introduce BlueNoise Plots where random jitter along the non-encoding plot dimension is replaced by optimizing all dots to keep a minimum distance in 2D i. e., Blue Noise. We evaluate the effectiveness as well as the aesthetics of Blue Noise Plots through both, a quantitative and a qualitative user study. %K Computer Science, Graphics, cs.GR
Yenamandra, T., Tewari, A., Bernard, F., et al. i3DMM: Deep Implicit 3D Morphable Model of Human Heads. IEEE/CVF Conference on Computer Vision and Pattern Recognition (Oral) (CVPR 2021), IEEE.
(Accepted/in press)
Export
BibTeX
@inproceedings{Yenamandra_CVPR2021, TITLE = {{i3DMM}: {D}eep Implicit {3D} Morphable Model of Human Heads}, AUTHOR = {Yenamandra, Tarun and Tewari, Ayush and Bernard, Florian and Seidel, Hans-Peter and Elgharib, Mohamed and Cremers, Daniel and Theobalt, Christian}, LANGUAGE = {eng}, PUBLISHER = {IEEE}, YEAR = {2021}, PUBLREMARK = {Accepted}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (Oral) (CVPR 2021)}, ADDRESS = {Virtual Conference}, }
Endnote
%0 Conference Proceedings %A Yenamandra, Tarun %A Tewari, Ayush %A Bernard, Florian %A Seidel, Hans-Peter %A Elgharib, Mohamed %A Cremers, Daniel %A Theobalt, Christian %+ External Organizations Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society External Organizations Visual Computing and Artificial Intelligence, MPI for Informatics, Max Planck Society %T i3DMM: Deep Implicit 3D Morphable Model of Human Heads : %G eng %U http://hdl.handle.net/21.11116/0000-0008-8966-B %D 2021 %B 34th IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2021-06-19 - 2021-06-25 %C Virtual Conference %B IEEE/CVF Conference on Computer Vision and Pattern Recognition (Oral) %I IEEE %U https://gvv.mpi-inf.mpg.de/projects/i3DMM/
2020
Ali, S.A., Kahraman, K., Theobalt, C., Stricker, D., and Golyanik, V. 2020. Fast Gravitational Approach for Rigid Point Set Registration with Ordinary Differential Equations. https://arxiv.org/abs/2009.14005.
(arXiv: 2009.14005)
Abstract
This article introduces a new physics-based method for rigid point set alignment called Fast Gravitational Approach (FGA). In FGA, the source and target point sets are interpreted as rigid particle swarms with masses interacting in a globally multiply-linked manner while moving in a simulated gravitational force field. The optimal alignment is obtained by explicit modeling of forces acting on the particles as well as their velocities and displacements with second-order ordinary differential equations of motion. Additional alignment cues (point-based or geometric features, and other boundary conditions) can be integrated into FGA through particle masses. We propose a smooth-particle mass function for point mass initialization, which improves robustness to noise and structural discontinuities. To avoid prohibitive quadratic complexity of all-to-all point interactions, we adapt a Barnes-Hut tree for accelerated force computation and achieve quasilinear computational complexity. We show that the new method class has characteristics not found in previous alignment methods such as efficient handling of partial overlaps, inhomogeneous point sampling densities, and coping with large point clouds with reduced runtime compared to the state of the art. Experiments show that our method performs on par with or outperforms all compared competing non-deep-learning-based and general-purpose techniques (which do not assume the availability of training data and a scene prior) in resolving transformations for LiDAR data and gains state-of-the-art accuracy and speed when coping with different types of data disturbances.
Export
BibTeX
@online{Ali_2009.14005, TITLE = {Fast Gravitational Approach for Rigid Point Set Registration with Ordinary Differential Equations}, AUTHOR = {Ali, Sk Aziz and Kahraman, Kerem and Theobalt, Christian and Stricker, Didier and Golyanik, Vladislav}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2009.14005}, EPRINT = {2009.14005}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {This article introduces a new physics-based method for rigid point set alignment called Fast Gravitational Approach (FGA). In FGA, the source and target point sets are interpreted as rigid particle swarms with masses interacting in a globally multiply-linked manner while moving in a simulated gravitational force field. The optimal alignment is obtained by explicit modeling of forces acting on the particles as well as their velocities and displacements with second-order ordinary differential equations of motion. Additional alignment cues (point-based or geometric features, and other boundary conditions) can be integrated into FGA through particle masses. We propose a smooth-particle mass function for point mass initialization, which improves robustness to noise and structural discontinuities. To avoid prohibitive quadratic complexity of all-to-all point interactions, we adapt a Barnes-Hut tree for accelerated force computation and achieve quasilinear computational complexity. We show that the new method class has characteristics not found in previous alignment methods such as efficient handling of partial overlaps, inhomogeneous point sampling densities, and coping with large point clouds with reduced runtime compared to the state of the art. Experiments show that our method performs on par with or outperforms all compared competing non-deep-learning-based and general-purpose techniques (which do not assume the availability of training data and a scene prior) in resolving transformations for LiDAR data and gains state-of-the-art accuracy and speed when coping with different types of data disturbances.}, }
Endnote
%0 Report %A Ali, Sk Aziz %A Kahraman, Kerem %A Theobalt, Christian %A Stricker, Didier %A Golyanik, Vladislav %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Fast Gravitational Approach for Rigid Point Set Registration with Ordinary Differential Equations : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E8FA-A %U https://arxiv.org/abs/2009.14005 %D 2020 %X This article introduces a new physics-based method for rigid point set alignment called Fast Gravitational Approach (FGA). In FGA, the source and target point sets are interpreted as rigid particle swarms with masses interacting in a globally multiply-linked manner while moving in a simulated gravitational force field. The optimal alignment is obtained by explicit modeling of forces acting on the particles as well as their velocities and displacements with second-order ordinary differential equations of motion. Additional alignment cues (point-based or geometric features, and other boundary conditions) can be integrated into FGA through particle masses. We propose a smooth-particle mass function for point mass initialization, which improves robustness to noise and structural discontinuities. To avoid prohibitive quadratic complexity of all-to-all point interactions, we adapt a Barnes-Hut tree for accelerated force computation and achieve quasilinear computational complexity. We show that the new method class has characteristics not found in previous alignment methods such as efficient handling of partial overlaps, inhomogeneous point sampling densities, and coping with large point clouds with reduced runtime compared to the state of the art. Experiments show that our method performs on par with or outperforms all compared competing non-deep-learning-based and general-purpose techniques (which do not assume the availability of training data and a scene prior) in resolving transformations for LiDAR data and gains state-of-the-art accuracy and speed when coping with different types of data disturbances. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,eess.IV
Ansari, N., Alizadeh-Mousavi, O., Seidel, H.-P., and Babaei, V. 2020. Mixed Integer Ink Selection for Spectral Reproduction. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2020)39, 6.
Export
BibTeX
@article{Ansari_ToG2020, TITLE = {Mixed Integer Ink Selection for Spectral Reproduction}, AUTHOR = {Ansari, Navid and Alizadeh-Mousavi, Omid and Seidel, Hans-Peter and Babaei, Vahid}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3414685.3417761}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {39}, NUMBER = {6}, EID = {255}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2020}, EDITOR = {Myszkowski, Karol}, }
Endnote
%0 Journal Article %A Ansari, Navid %A Alizadeh-Mousavi, Omid %A Seidel, Hans-Peter %A Babaei, Vahid %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Mixed Integer Ink Selection for Spectral Reproduction : %G eng %U http://hdl.handle.net/21.11116/0000-0007-9B23-3 %R 10.1145/3414685.3417761 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 6 %Z sequence number: 255 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2020 %O ACM SIGGRAPH Asia 2020 SA'20 SA 2020
Bemana, M., Myszkowski, K., Seidel, H.-P., and Ritschel, T. 2020a. X-Fields: Implicit Neural View-, Light- and Time-Image Interpolation. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2020)39, 6.
Export
BibTeX
@article{Bemana2020, TITLE = {X-{F}ields: {I}mplicit Neural View-, Light- and Time-Image Interpolation}, AUTHOR = {Bemana, Mojtaba and Myszkowski, Karol and Seidel, Hans-Peter and Ritschel, Tobias}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3414685.3417827}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {39}, NUMBER = {6}, EID = {257}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2020}, EDITOR = {Myszkowski, Karol}, }
Endnote
%0 Journal Article %A Bemana, Mojtaba %A Myszkowski, Karol %A Seidel, Hans-Peter %A Ritschel, Tobias %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T X-Fields: Implicit Neural View-, Light- and Time-Image Interpolation : %G eng %U http://hdl.handle.net/21.11116/0000-0006-FBF0-0 %R 10.1145/3414685.3417827 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 6 %Z sequence number: 257 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2020 %O ACM SIGGRAPH Asia 2020 SA'20 SA 2020
Bemana, M., Myszkowski, K., Seidel, H.-P., and Ritschel, T. 2020b. X-Fields: Implicit Neural View-, Light- and Time-Image Interpolation. https://arxiv.org/abs/2010.00450.
(arXiv: 2010.00450)
Abstract
We suggest to represent an X-Field -a set of 2D images taken across different view, time or illumination conditions, i.e., video, light field, reflectance fields or combinations thereof-by learning a neural network (NN) to map their view, time or light coordinates to 2D images. Executing this NN at new coordinates results in joint view, time or light interpolation. The key idea to make this workable is a NN that already knows the "basic tricks" of graphics (lighting, 3D projection, occlusion) in a hard-coded and differentiable form. The NN represents the input to that rendering as an implicit map, that for any view, time, or light coordinate and for any pixel can quantify how it will move if view, time or light coordinates change (Jacobian of pixel position with respect to view, time, illumination, etc.). Our X-Field representation is trained for one scene within minutes, leading to a compact set of trainable parameters and hence real-time navigation in view, time and illumination.
Export
BibTeX
@online{Bemana_arXiv2010.00450, TITLE = {X-Fields: Implicit Neural View-, Light- and Time-Image Interpolation}, AUTHOR = {Bemana, Mojtaba and Myszkowski, Karol and Seidel, Hans-Peter and Ritschel, Tobias}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2010.00450}, EPRINT = {2010.00450}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We suggest to represent an X-Field -a set of 2D images taken across different view, time or illumination conditions, i.e., video, light field, reflectance fields or combinations thereof-by learning a neural network (NN) to map their view, time or light coordinates to 2D images. Executing this NN at new coordinates results in joint view, time or light interpolation. The key idea to make this workable is a NN that already knows the "basic tricks" of graphics (lighting, 3D projection, occlusion) in a hard-coded and differentiable form. The NN represents the input to that rendering as an implicit map, that for any view, time, or light coordinate and for any pixel can quantify how it will move if view, time or light coordinates change (Jacobian of pixel position with respect to view, time, illumination, etc.). Our X-Field representation is trained for one scene within minutes, leading to a compact set of trainable parameters and hence real-time navigation in view, time and illumination.}, }
Endnote
%0 Report %A Bemana, Mojtaba %A Myszkowski, Karol %A Seidel, Hans-Peter %A Ritschel, Tobias %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T X-Fields: Implicit Neural View-, Light- and Time-Image Interpolation : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B6EC-2 %U https://arxiv.org/abs/2010.00450 %D 2020 %X We suggest to represent an X-Field -a set of 2D images taken across different view, time or illumination conditions, i.e., video, light field, reflectance fields or combinations thereof-by learning a neural network (NN) to map their view, time or light coordinates to 2D images. Executing this NN at new coordinates results in joint view, time or light interpolation. The key idea to make this workable is a NN that already knows the "basic tricks" of graphics (lighting, 3D projection, occlusion) in a hard-coded and differentiable form. The NN represents the input to that rendering as an implicit map, that for any view, time, or light coordinate and for any pixel can quantify how it will move if view, time or light coordinates change (Jacobian of pixel position with respect to view, time, illumination, etc.). Our X-Field representation is trained for one scene within minutes, leading to a compact set of trainable parameters and hence real-time navigation in view, time and illumination. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR
Bernard, F., Suri, Z.K., and Theobalt, C. 2020a. MINA: Convex Mixed-Integer Programming for Non-Rigid Shape Alignment. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020), IEEE.
Export
BibTeX
@inproceedings{Bernard_CVPR2020, TITLE = {{MINA}: {C}onvex Mixed-Integer Programming for Non-Rigid Shape Alignment}, AUTHOR = {Bernard, Florian and Suri, Zeeshan Khan and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-7168-5}, DOI = {10.1109/CVPR42600.2020.01384}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020)}, PAGES = {13823--13832}, ADDRESS = {Seattle, WA, USA (Virtual)}, }
Endnote
%0 Conference Proceedings %A Bernard, Florian %A Suri, Zeeshan Khan %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T MINA: Convex Mixed-Integer Programming for Non-Rigid Shape Alignment : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D059-A %R 10.1109/CVPR42600.2020.01384 %D 2020 %B 33rd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2020-06-14 - 2020-06-19 %C Seattle, WA, USA (Virtual) %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 13823 - 13832 %I IEEE %@ 978-1-7281-7168-5
Bernard, F., Suri, Z.K., and Theobalt, C. 2020b. MINA: Convex Mixed-Integer Programming for Non-Rigid Shape Alignment. https://arxiv.org/abs/2002.12623.
(arXiv: 2002.12623)
Abstract
We present a convex mixed-integer programming formulation for non-rigid shape matching. To this end, we propose a novel shape deformation model based on an efficient low-dimensional discrete model, so that finding a globally optimal solution is tractable in (most) practical cases. Our approach combines several favourable properties: it is independent of the initialisation, it is much more efficient to solve to global optimality compared to analogous quadratic assignment problem formulations, and it is highly flexible in terms of the variants of matching problems it can handle. Experimentally we demonstrate that our approach outperforms existing methods for sparse shape matching, that it can be used for initialising dense shape matching methods, and we showcase its flexibility on several examples.
Export
BibTeX
@online{Bernard_arXiv2002.12623, TITLE = {MINA: {C}onvex Mixed-Integer Programming for Non-Rigid Shape Alignment}, AUTHOR = {Bernard, Florian and Suri, Zeeshan Khan and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2002.12623}, EPRINT = {2002.12623}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present a convex mixed-integer programming formulation for non-rigid shape matching. To this end, we propose a novel shape deformation model based on an efficient low-dimensional discrete model, so that finding a globally optimal solution is tractable in (most) practical cases. Our approach combines several favourable properties: it is independent of the initialisation, it is much more efficient to solve to global optimality compared to analogous quadratic assignment problem formulations, and it is highly flexible in terms of the variants of matching problems it can handle. Experimentally we demonstrate that our approach outperforms existing methods for sparse shape matching, that it can be used for initialising dense shape matching methods, and we showcase its flexibility on several examples.}, }
Endnote
%0 Report %A Bernard, Florian %A Suri, Zeeshan Khan %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T MINA: Convex Mixed-Integer Programming for Non-Rigid Shape Alignment : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E00C-F %U https://arxiv.org/abs/2002.12623 %D 2020 %X We present a convex mixed-integer programming formulation for non-rigid shape matching. To this end, we propose a novel shape deformation model based on an efficient low-dimensional discrete model, so that finding a globally optimal solution is tractable in (most) practical cases. Our approach combines several favourable properties: it is independent of the initialisation, it is much more efficient to solve to global optimality compared to analogous quadratic assignment problem formulations, and it is highly flexible in terms of the variants of matching problems it can handle. Experimentally we demonstrate that our approach outperforms existing methods for sparse shape matching, that it can be used for initialising dense shape matching methods, and we showcase its flexibility on several examples. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR,Computer Science, Learning, cs.LG,Mathematics, Optimization and Control, math.OC
Bhatnagar, B.L., Sminchisescu, C., Theobalt, C., and Pons-Moll, G. 2020a. Combining Implicit Function Learning and Parametric Models for 3D Human Reconstruction. Computer Vision -- ECCV 2020, Springer.
Export
BibTeX
@inproceedings{bhatnagar2020ipnet, TITLE = {Combining Implicit Function Learning and Parametric Models for {3D} Human Reconstruction}, AUTHOR = {Bhatnagar, Bharat Lal and Sminchisescu, Cristian and Theobalt, Christian and Pons-Moll, Gerard}, LANGUAGE = {eng}, ISBN = {978-3-030-58535-8}, DOI = {10.1007/978-3-030-58536-5_19}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Computer Vision -- ECCV 2020}, EDITOR = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael}, PAGES = {311--329}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12347}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Bhatnagar, Bharat Lal %A Sminchisescu, Cristian %A Theobalt, Christian %A Pons-Moll, Gerard %+ Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society %T Combining Implicit Function Learning and Parametric Models for 3D Human Reconstruction : %G eng %U http://hdl.handle.net/21.11116/0000-0006-E89E-3 %R 10.1007/978-3-030-58536-5_19 %D 2020 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV 2020 %E Vedaldi, Andrea; Bischof, Horst; Brox, Thomas; Frahm, Jan-Michael %P 311 - 329 %I Springer %@ 978-3-030-58535-8 %B Lecture Notes in Computer Science %N 12347
Bhatnagar, B.L., Sminchisescu, C., Theobalt, C., and Pons-Moll, G. 2020b. LoopReg: Self-supervised Learning of Implicit Surface Correspondences, Pose and Shape for 3D Human Mesh Registration. Advances in Neural Information Processing Systems 33 (NeurIPS 2020), Curran Associates, Inc.
Export
BibTeX
@inproceedings{bhatnagar2020loopreg, TITLE = {{LoopReg}: Self-supervised Learning of Implicit Surface Correspondences, Pose and Shape for {3D} Human Mesh Registration}, AUTHOR = {Bhatnagar, Bharat Lal and Sminchisescu, Cristian and Theobalt, Christian and Pons-Moll, Gerard}, LANGUAGE = {eng}, PUBLISHER = {Curran Associates, Inc.}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Advances in Neural Information Processing Systems 33 (NeurIPS 2020)}, EDITOR = {Larochelle, H. and Ranzato, M. and Hadsell, R. and Balcan, M. F. and Lin, H.}, ADDRESS = {Virtual Event}, }
Endnote
%0 Conference Proceedings %A Bhatnagar, Bharat Lal %A Sminchisescu, Cristian %A Theobalt, Christian %A Pons-Moll, Gerard %+ Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society %T LoopReg: Self-supervised Learning of Implicit Surface Correspondences, Pose and Shape for 3D Human Mesh Registration : %G eng %U http://hdl.handle.net/21.11116/0000-0007-6FD1-1 %D 2020 %B 34th Conference on Neural Information Processing Systems %Z date of event: 2020-12-06 - 2020-12-12 %C Virtual Event %B Advances in Neural Information Processing Systems 33 %E Larochelle, H.; Ranzato, M.; Hadsell, R.; Balcan, M. F.; Lin, H. %I Curran Associates, Inc. %U https://papers.nips.cc/paper/2020/file/970af30e481057c48f87e101b61e6994-Paper.pdf
Bhatnagar, B.L., Sminchisescu, C., Theobalt, C., and Pons-Moll, G. 2020c. Combining Implicit Function Learning and Parametric Models for 3D Human Reconstruction. https://arxiv.org/abs/2007.11432.
(arXiv: 2007.11432)
Abstract
Implicit functions represented as deep learning approximations are powerful for reconstructing 3D surfaces. However, they can only produce static surfaces that are not controllable, which provides limited ability to modify the resulting model by editing its pose or shape parameters. Nevertheless, such features are essential in building flexible models for both computer graphics and computer vision. In this work, we present methodology that combines detail-rich implicit functions and parametric representations in order to reconstruct 3D models of people that remain controllable and accurate even in the presence of clothing. Given sparse 3D point clouds sampled on the surface of a dressed person, we use an Implicit Part Network (IP-Net)to jointly predict the outer 3D surface of the dressed person, the and inner body surface, and the semantic correspondences to a parametric body model. We subsequently use correspondences to fit the body model to our inner surface and then non-rigidly deform it (under a parametric body + displacement model) to the outer surface in order to capture garment, face and hair detail. In quantitative and qualitative experiments with both full body data and hand scans we show that the proposed methodology generalizes, and is effective even given incomplete point clouds collected from single-view depth images. Our models and code can be downloaded from http://virtualhumans.mpi-inf.mpg.de/ipnet.
Export
BibTeX
@online{Bhatnagar_2007.11432, TITLE = {Combining Implicit Function Learning and Parametric Models for {3D} Human Reconstruction}, AUTHOR = {Bhatnagar, Bharat Lal and Sminchisescu, Cristian and Theobalt, Christian and Pons-Moll, Gerard}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2007.11432}, EPRINT = {2007.11432}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Implicit functions represented as deep learning approximations are powerful for reconstructing 3D surfaces. However, they can only produce static surfaces that are not controllable, which provides limited ability to modify the resulting model by editing its pose or shape parameters. Nevertheless, such features are essential in building flexible models for both computer graphics and computer vision. In this work, we present methodology that combines detail-rich implicit functions and parametric representations in order to reconstruct 3D models of people that remain controllable and accurate even in the presence of clothing. Given sparse 3D point clouds sampled on the surface of a dressed person, we use an Implicit Part Network (IP-Net)to jointly predict the outer 3D surface of the dressed person, the and inner body surface, and the semantic correspondences to a parametric body model. We subsequently use correspondences to fit the body model to our inner surface and then non-rigidly deform it (under a parametric body + displacement model) to the outer surface in order to capture garment, face and hair detail. In quantitative and qualitative experiments with both full body data and hand scans we show that the proposed methodology generalizes, and is effective even given incomplete point clouds collected from single-view depth images. Our models and code can be downloaded from http://virtualhumans.mpi-inf.mpg.de/ipnet.}, }
Endnote
%0 Report %A Bhatnagar, Bharat Lal %A Sminchisescu, Cristian %A Theobalt, Christian %A Pons-Moll, Gerard %+ Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society %T Combining Implicit Function Learning and Parametric Models for 3D Human Reconstruction : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E8A0-E %U https://arxiv.org/abs/2007.11432 %D 2020 %X Implicit functions represented as deep learning approximations are powerful for reconstructing 3D surfaces. However, they can only produce static surfaces that are not controllable, which provides limited ability to modify the resulting model by editing its pose or shape parameters. Nevertheless, such features are essential in building flexible models for both computer graphics and computer vision. In this work, we present methodology that combines detail-rich implicit functions and parametric representations in order to reconstruct 3D models of people that remain controllable and accurate even in the presence of clothing. Given sparse 3D point clouds sampled on the surface of a dressed person, we use an Implicit Part Network (IP-Net)to jointly predict the outer 3D surface of the dressed person, the and inner body surface, and the semantic correspondences to a parametric body model. We subsequently use correspondences to fit the body model to our inner surface and then non-rigidly deform it (under a parametric body + displacement model) to the outer surface in order to capture garment, face and hair detail. In quantitative and qualitative experiments with both full body data and hand scans we show that the proposed methodology generalizes, and is effective even given incomplete point clouds collected from single-view depth images. Our models and code can be downloaded from http://virtualhumans.mpi-inf.mpg.de/ipnet. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Bhatnagar, B.L., Sminchisescu, C., Theobalt, C., and Pons-Moll, G. 2020d. LoopReg: Self-supervised Learning of Implicit Surface Correspondences, Pose and Shape for 3D Human Mesh Registration. https://arxiv.org/abs/2010.12447.
(arXiv: 2010.12447)
Abstract
We address the problem of fitting 3D human models to 3D scans of dressed humans. Classical methods optimize both the data-to-model correspondences and the human model parameters (pose and shape), but are reliable only when initialized close to the solution. Some methods initialize the optimization based on fully supervised correspondence predictors, which is not differentiable end-to-end, and can only process a single scan at a time. Our main contribution is LoopReg, an end-to-end learning framework to register a corpus of scans to a common 3D human model. The key idea is to create a self-supervised loop. A backward map, parameterized by a Neural Network, predicts the correspondence from every scan point to the surface of the human model. A forward map, parameterized by a human model, transforms the corresponding points back to the scan based on the model parameters (pose and shape), thus closing the loop. Formulating this closed loop is not straightforward because it is not trivial to force the output of the NN to be on the surface of the human model - outside this surface the human model is not even defined. To this end, we propose two key innovations. First, we define the canonical surface implicitly as the zero level set of a distance field in R3, which in contrast to morecommon UV parameterizations, does not require cutting the surface, does not have discontinuities, and does not induce distortion. Second, we diffuse the human model to the 3D domain R3. This allows to map the NN predictions forward,even when they slightly deviate from the zero level set. Results demonstrate that we can train LoopRegmainly self-supervised - following a supervised warm-start, the model becomes increasingly more accurate as additional unlabelled raw scans are processed. Our code and pre-trained models can be downloaded for research.
Export
BibTeX
@online{Bhatnagar_2010.12447, TITLE = {{LoopReg}: {S}elf-supervised Learning of Implicit Surface Correspondences, Pose and Shape for {3D} Human Mesh Registration}, AUTHOR = {Bhatnagar, Bharat Lal and Sminchisescu, Cristian and Theobalt, Christian and Pons-Moll, Gerard}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2010.12447}, EPRINT = {2010.12447}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We address the problem of fitting 3D human models to 3D scans of dressed humans. Classical methods optimize both the data-to-model correspondences and the human model parameters (pose and shape), but are reliable only when initialized close to the solution. Some methods initialize the optimization based on fully supervised correspondence predictors, which is not differentiable end-to-end, and can only process a single scan at a time. Our main contribution is LoopReg, an end-to-end learning framework to register a corpus of scans to a common 3D human model. The key idea is to create a self-supervised loop. A backward map, parameterized by a Neural Network, predicts the correspondence from every scan point to the surface of the human model. A forward map, parameterized by a human model, transforms the corresponding points back to the scan based on the model parameters (pose and shape), thus closing the loop. Formulating this closed loop is not straightforward because it is not trivial to force the output of the NN to be on the surface of the human model -- outside this surface the human model is not even defined. To this end, we propose two key innovations. First, we define the canonical surface implicitly as the zero level set of a distance field in R3, which in contrast to morecommon UV parameterizations, does not require cutting the surface, does not have discontinuities, and does not induce distortion. Second, we diffuse the human model to the 3D domain R3. This allows to map the NN predictions forward,even when they slightly deviate from the zero level set. Results demonstrate that we can train LoopRegmainly self-supervised -- following a supervised warm-start, the model becomes increasingly more accurate as additional unlabelled raw scans are processed. Our code and pre-trained models can be downloaded for research.}, }
Endnote
%0 Report %A Bhatnagar, Bharat Lal %A Sminchisescu, Cristian %A Theobalt, Christian %A Pons-Moll, Gerard %+ Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society %T LoopReg: Self-supervised Learning of Implicit Surface Correspondences, Pose and Shape for 3D Human Mesh Registration : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E91C-4 %U https://arxiv.org/abs/2010.12447 %D 2020 %X We address the problem of fitting 3D human models to 3D scans of dressed humans. Classical methods optimize both the data-to-model correspondences and the human model parameters (pose and shape), but are reliable only when initialized close to the solution. Some methods initialize the optimization based on fully supervised correspondence predictors, which is not differentiable end-to-end, and can only process a single scan at a time. Our main contribution is LoopReg, an end-to-end learning framework to register a corpus of scans to a common 3D human model. The key idea is to create a self-supervised loop. A backward map, parameterized by a Neural Network, predicts the correspondence from every scan point to the surface of the human model. A forward map, parameterized by a human model, transforms the corresponding points back to the scan based on the model parameters (pose and shape), thus closing the loop. Formulating this closed loop is not straightforward because it is not trivial to force the output of the NN to be on the surface of the human model - outside this surface the human model is not even defined. To this end, we propose two key innovations. First, we define the canonical surface implicitly as the zero level set of a distance field in R3, which in contrast to morecommon UV parameterizations, does not require cutting the surface, does not have discontinuities, and does not induce distortion. Second, we diffuse the human model to the 3D domain R3. This allows to map the NN predictions forward,even when they slightly deviate from the zero level set. Results demonstrate that we can train LoopRegmainly self-supervised - following a supervised warm-start, the model becomes increasingly more accurate as additional unlabelled raw scans are processed. Our code and pre-trained models can be downloaded for research. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Božič, A., Zollhöfer, M., Theobalt, C., and Nießner, M. 2020. DeepDeform: Learning Non-Rigid RGB-D Reconstruction With Semi-Supervised Data. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020), IEEE.
Export
BibTeX
@inproceedings{Bozic_CVPR2020, TITLE = {{DeepDeform}: {L}earning Non-Rigid {RGB}-{D} Reconstruction With Semi-Supervised Data}, AUTHOR = {Bo{\v z}i{\v c}, Alja{\v z} and Zollh{\"o}fer, Michael and Theobalt, Christian and Nie{\ss}ner, Matthias}, LANGUAGE = {eng}, ISBN = {978-1-7281-7168-5}, DOI = {10.1109/CVPR42600.2020.00703}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020)}, PAGES = {7000--7010}, ADDRESS = {Seattle, WA, USA (Virtual)}, }
Endnote
%0 Conference Proceedings %A Božič, Aljaž %A Zollhöfer, Michael %A Theobalt, Christian %A Nießner, Matthias %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T DeepDeform: Learning Non-Rigid RGB-D Reconstruction With Semi-Supervised Data : %G eng %U http://hdl.handle.net/21.11116/0000-0007-CF63-1 %R 10.1109/CVPR42600.2020.00703 %D 2020 %B 33rd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2020-06-14 - 2020-06-19 %C Seattle, WA, USA (Virtual) %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 7000 - 7010 %I IEEE %@ 978-1-7281-7168-5
Chizhov, V., Georgiev, I., Myszkowski, K., and Singh, G. 2020. Perceptual Error Optimization for Monte Carlo Rendering. https://arxiv.org/abs/2012.02344.
(arXiv: 2012.02344)
Abstract
Realistic image synthesis involves computing high-dimensional light transport integrals which in practice are numerically estimated using Monte Carlo integration. The error of this estimation manifests itself in the image as visually displeasing aliasing or noise. To ameliorate this, we develop a theoretical framework for optimizing screen-space error distribution. Our model is flexible and works for arbitrary target error power spectra. We focus on perceptual error optimization by leveraging models of the human visual system's (HVS) point spread function (PSF) from halftoning literature. This results in a specific optimization problem whose solution distributes the error as visually pleasing blue noise in image space. We develop a set of algorithms that provide a trade-off between quality and speed, showing substantial improvements over prior state of the art. We perform evaluations using both quantitative and perceptual error metrics to support our analysis, and provide extensive supplemental material to help evaluate the perceptual improvements achieved by our methods.
Export
BibTeX
@online{Chizhov_arXiv2012.02344, TITLE = {Perceptual Error Optimization for {Monte Carlo} Rendering}, AUTHOR = {Chizhov, Vassillen and Georgiev, Iliyan and Myszkowski, Karol and Singh, Gurprit}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2012.02344}, EPRINT = {2012.02344}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Realistic image synthesis involves computing high-dimensional light transport integrals which in practice are numerically estimated using Monte Carlo integration. The error of this estimation manifests itself in the image as visually displeasing aliasing or noise. To ameliorate this, we develop a theoretical framework for optimizing screen-space error distribution. Our model is flexible and works for arbitrary target error power spectra. We focus on perceptual error optimization by leveraging models of the human visual system's (HVS) point spread function (PSF) from halftoning literature. This results in a specific optimization problem whose solution distributes the error as visually pleasing blue noise in image space. We develop a set of algorithms that provide a trade-off between quality and speed, showing substantial improvements over prior state of the art. We perform evaluations using both quantitative and perceptual error metrics to support our analysis, and provide extensive supplemental material to help evaluate the perceptual improvements achieved by our methods.}, }
Endnote
%0 Report %A Chizhov, Vassillen %A Georgiev, Iliyan %A Myszkowski, Karol %A Singh, Gurprit %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Perceptual Error Optimization for Monte Carlo Rendering : %G eng %U http://hdl.handle.net/21.11116/0000-0007-CEB7-3 %U https://arxiv.org/abs/2012.02344 %D 2020 %X Realistic image synthesis involves computing high-dimensional light transport integrals which in practice are numerically estimated using Monte Carlo integration. The error of this estimation manifests itself in the image as visually displeasing aliasing or noise. To ameliorate this, we develop a theoretical framework for optimizing screen-space error distribution. Our model is flexible and works for arbitrary target error power spectra. We focus on perceptual error optimization by leveraging models of the human visual system's (HVS) point spread function (PSF) from halftoning literature. This results in a specific optimization problem whose solution distributes the error as visually pleasing blue noise in image space. We develop a set of algorithms that provide a trade-off between quality and speed, showing substantial improvements over prior state of the art. We perform evaluations using both quantitative and perceptual error metrics to support our analysis, and provide extensive supplemental material to help evaluate the perceptual improvements achieved by our methods. %K Computer Science, Graphics, cs.GR
Çoğalan, U. and Akyüz, A.O. 2020. Deep Joint Deinterlacing and Denoising for Single Shot Dual-ISO HDR Reconstruction. IEEE Transactions on Image Processing29.
Export
BibTeX
@article{Cogalan2020, TITLE = {Deep Joint Deinterlacing and Denoising for Single Shot Dual-{ISO HDR} Reconstruction}, AUTHOR = {{\c C}o{\u g}alan, U{\u g}ur and Aky{\"u}z, Ahmet O{\u g}uz}, LANGUAGE = {eng}, ISSN = {1057-7149}, DOI = {10.1109/TIP.2020.3004014}, PUBLISHER = {IEEE}, ADDRESS = {Piscataway, NJ}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, JOURNAL = {IEEE Transactions on Image Processing}, VOLUME = {29}, PAGES = {7511--7524}, }
Endnote
%0 Journal Article %A Çoğalan, Uğur %A Akyüz , Ahmet Oğuz %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Deep Joint Deinterlacing and Denoising for Single Shot Dual-ISO HDR Reconstruction : %G eng %U http://hdl.handle.net/21.11116/0000-0006-DCA7-6 %R 10.1109/TIP.2020.3004014 %7 2020 %D 2020 %J IEEE Transactions on Image Processing %V 29 %& 7511 %P 7511 - 7524 %I IEEE %C Piscataway, NJ %@ false
Çoğalan, U., Bemana, M., Myszkowski, K., Seidel, H.-P., and Ritschel, T. 2020. HDR Denoising and Deblurring by Learning Spatio-temporal Distortion Models. https://arxiv.org/abs/2012.12009.
(arXiv: 2012.12009)
Abstract
We seek to reconstruct sharp and noise-free high-dynamic range (HDR) video from a dual-exposure sensor that records different low-dynamic range (LDR) information in different pixel columns: Odd columns provide low-exposure, sharp, but noisy information; even columns complement this with less noisy, high-exposure, but motion-blurred data. Previous LDR work learns to deblur and denoise (DISTORTED->CLEAN) supervised by pairs of CLEAN and DISTORTED images. Regrettably, capturing DISTORTED sensor readings is time-consuming; as well, there is a lack of CLEAN HDR videos. We suggest a method to overcome those two limitations. First, we learn a different function instead: CLEAN->DISTORTED, which generates samples containing correlated pixel noise, and row and column noise, as well as motion blur from a low number of CLEAN sensor readings. Second, as there is not enough CLEAN HDR video available, we devise a method to learn from LDR video in-stead. Our approach compares favorably to several strong baselines, and can boost existing methods when they are re-trained on our data. Combined with spatial and temporal super-resolution, it enables applications such as re-lighting with low noise or blur.
Export
BibTeX
@online{Cogalan_arXiv2012.12009, TITLE = {{HDR} Denoising and Deblurring by Learning Spatio-temporal Distortion Model}, AUTHOR = {{\c C}o{\u g}alan, U{\u g}ur and Bemana, Mojtaba and Myszkowski, Karol and Seidel, Hans-Peter and Ritschel, Tobias}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2012.12009}, EPRINT = {2012.12009}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We seek to reconstruct sharp and noise-free high-dynamic range (HDR) video from a dual-exposure sensor that records different low-dynamic range (LDR) information in different pixel columns: Odd columns provide low-exposure, sharp, but noisy information; even columns complement this with less noisy, high-exposure, but motion-blurred data. Previous LDR work learns to deblur and denoise (DISTORTED->CLEAN) supervised by pairs of CLEAN and DISTORTED images. Regrettably, capturing DISTORTED sensor readings is time-consuming; as well, there is a lack of CLEAN HDR videos. We suggest a method to overcome those two limitations. First, we learn a different function instead: CLEAN->DISTORTED, which generates samples containing correlated pixel noise, and row and column noise, as well as motion blur from a low number of CLEAN sensor readings. Second, as there is not enough CLEAN HDR video available, we devise a method to learn from LDR video in-stead. Our approach compares favorably to several strong baselines, and can boost existing methods when they are re-trained on our data. Combined with spatial and temporal super-resolution, it enables applications such as re-lighting with low noise or blur.}, }
Endnote
%0 Report %A Çoğalan, Uğur %A Bemana, Mojtaba %A Myszkowski, Karol %A Seidel, Hans-Peter %A Ritschel, Tobias %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T HDR Denoising and Deblurring by Learning Spatio-temporal Distortion Models : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B721-5 %U https://arxiv.org/abs/2012.12009 %D 2020 %X We seek to reconstruct sharp and noise-free high-dynamic range (HDR) video from a dual-exposure sensor that records different low-dynamic range (LDR) information in different pixel columns: Odd columns provide low-exposure, sharp, but noisy information; even columns complement this with less noisy, high-exposure, but motion-blurred data. Previous LDR work learns to deblur and denoise (DISTORTED->CLEAN) supervised by pairs of CLEAN and DISTORTED images. Regrettably, capturing DISTORTED sensor readings is time-consuming; as well, there is a lack of CLEAN HDR videos. We suggest a method to overcome those two limitations. First, we learn a different function instead: CLEAN->DISTORTED, which generates samples containing correlated pixel noise, and row and column noise, as well as motion blur from a low number of CLEAN sensor readings. Second, as there is not enough CLEAN HDR video available, we devise a method to learn from LDR video in-stead. Our approach compares favorably to several strong baselines, and can boost existing methods when they are re-trained on our data. Combined with spatial and temporal super-resolution, it enables applications such as re-lighting with low noise or blur. %K eess.IV,Computer Science, Computer Vision and Pattern Recognition, cs.CV
Cucerca, S., Didyk, P., Seidel, H.-P., and Babaei, V. 2020. Computational Image Marking on Metals via Laser Induced Heating. ACM Transactions on Graphics (Proc. ACM SIGGRAPH 2020)39, 4.
Export
BibTeX
@article{Cucerca_SIGGRAPH2020, TITLE = {Computational Image Marking on Metals via Laser Induced Heating}, AUTHOR = {Cucerca, Sebastian and Didyk, Piotr and Seidel, Hans-Peter and Babaei, Vahid}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3386569.3392423}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH)}, VOLUME = {39}, NUMBER = {4}, EID = {70}, BOOKTITLE = {Proceedings of ACM SIGGRAPH 2020}, }
Endnote
%0 Journal Article %A Cucerca, Sebastian %A Didyk, Piotr %A Seidel, Hans-Peter %A Babaei, Vahid %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Computational Image Marking on Metals via Laser Induced Heating : %G eng %U http://hdl.handle.net/21.11116/0000-0007-9664-F %R 10.1145/3386569.3392423 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 4 %Z sequence number: 70 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH 2020 %O ACM SIGGRAPH 2020 Virtual Conference ; 2020, 17-28 August
Dunn, D., Tursun, O., Yu, H., Didyk, P., Myszkowski, K., and Fuchs, H. 2020. Stimulating the Human Visual System Beyond Real World Performance in Future Augmented Reality Displays. IEEE International Symposium on Mixed and Augmented Reality (ISMAR 2020), IEEE.
Export
BibTeX
@inproceedings{Dunn2020, TITLE = {Stimulating the Human Visual System Beyond Real World Performance in Future Augmented Reality Displays}, AUTHOR = {Dunn, David and Tursun, Okan and Yu, Hyeonseung and Didyk, Piotr and Myszkowski, Karol and Fuchs, Henry}, LANGUAGE = {eng}, ISBN = {978-1-7281-8508-8}, DOI = {10.1109/ISMAR50242.2020.00029}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {IEEE International Symposium on Mixed and Augmented Reality (ISMAR 2020)}, PAGES = {90--100}, ADDRESS = {Recife/Porto de Galinhas, Brazil (Virtual Conference)}, }
Endnote
%0 Conference Proceedings %A Dunn, David %A Tursun, Okan %A Yu, Hyeonseung %A Didyk, Piotr %A Myszkowski, Karol %A Fuchs, Henry %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Stimulating the Human Visual System Beyond Real World Performance in Future Augmented Reality Displays : %G eng %U http://hdl.handle.net/21.11116/0000-0006-FBDF-5 %R 10.1109/ISMAR50242.2020.00029 %D 2020 %B International Symposium on Mixed and Augmented Reality %Z date of event: 2020-11-09 - 2020-11-13 %C Recife/Porto de Galinhas, Brazil (Virtual Conference) %B IEEE International Symposium on Mixed and Augmented Reality %P 90 - 100 %I IEEE %@ 978-1-7281-8508-8
Egger, B., Smith, W.A.P., Tewari, A., et al. 2020. 3D Morphable Face Models -Past, Present and Future. ACM Transactions on Graphics39, 5.
Export
BibTeX
@article{Egger_TOG2020, TITLE = {{3D} Morphable Face Models -- Past, Present and Future}, AUTHOR = {Egger, Bernhard and Smith, William A. P. and Tewari, Ayush and Wuhrer, Stefanie and Zollh{\"o}fer, Michael and Beeler, Thabo and Bernard, Florian and Bolkart, Timo and Kortylewski, Adam and Romdhani, Sami and Theobalt, Christian and Blanz, Volker and Vetter, Thomas}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3395208}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics}, VOLUME = {39}, NUMBER = {5}, EID = {157}, }
Endnote
%0 Journal Article %A Egger, Bernhard %A Smith, William A. P. %A Tewari, Ayush %A Wuhrer, Stefanie %A Zollhöfer, Michael %A Beeler, Thabo %A Bernard, Florian %A Bolkart, Timo %A Kortylewski, Adam %A Romdhani, Sami %A Theobalt, Christian %A Blanz, Volker %A Vetter, Thomas %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations %T 3D Morphable Face Models -Past, Present and Future : %G eng %U http://hdl.handle.net/21.11116/0000-0007-1CF5-6 %R 10.1145/3395208 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 5 %Z sequence number: 157 %I ACM %C New York, NY %@ false
Elgharib, M., Mendiratta, M., Thies, J., et al. 2020. Egocentric Videoconferencing. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2020)39, 6.
Export
BibTeX
@article{Elgharib_ToG2020, TITLE = {Egocentric Videoconferencing}, AUTHOR = {Elgharib, Mohamed and Mendiratta, Mohit and Thies, Justus and Nie{\ss}ner, Matthias and Seidel, Hans-Peter and Tewari, Ayush and Golyanik, Vladislav and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3414685.3417808}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {39}, NUMBER = {6}, EID = {268}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2020}, EDITOR = {Myszkowski, Karol}, }
Endnote
%0 Journal Article %A Elgharib, Mohamed %A Mendiratta, Mohit %A Thies, Justus %A Nießner, Matthias %A Seidel, Hans-Peter %A Tewari, Ayush %A Golyanik, Vladislav %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Egocentric Videoconferencing : %G eng %U http://hdl.handle.net/21.11116/0000-0007-9B36-E %R 10.1145/3414685.3417808 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 6 %Z sequence number: 268 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2020 %O ACM SIGGRAPH Asia 2020 SA'20 SA 2020
Fox, G., Liu, W., Kim, H., Seidel, H.-P., Elgharib, M., and Theobalt, C. 2020. VideoForensicsHQ: Detecting High-quality Manipulated Face Videos. https://arxiv.org/abs/2005.10360.
(arXiv: 2005.10360)
Abstract
New approaches to synthesize and manipulate face videos at very high quality have paved the way for new applications in computer animation, virtual and augmented reality, or face video analysis. However, there are concerns that they may be used in a malicious way, e.g. to manipulate videos of public figures, politicians or reporters, to spread false information. The research community therefore developed techniques for automated detection of modified imagery, and assembled benchmark datasets showing manipulatons by state-of-the-art techniques. In this paper, we contribute to this initiative in two ways: First, we present a new audio-visual benchmark dataset. It shows some of the highest quality visual manipulations available today. Human observers find them significantly harder to identify as forged than videos from other benchmarks. Furthermore we propose new family of deep-learning-based fake detectors, demonstrating that existing detectors are not well-suited for detecting fakes of a quality as high as presented in our dataset. Our detectors examine spatial and temporal features. This allows them to outperform existing approaches both in terms of high detection accuracy and generalization to unseen fake generation methods and unseen identities.
Export
BibTeX
@online{Fox_2005.10360, TITLE = {{Video\-Foren\-sics\-HQ}: {D}etecting High-quality Manipulated Face Videos}, AUTHOR = {Fox, Gereon and Liu, Wentao and Kim, Hyeongwoo and Seidel, Hans-Peter and Elgharib, Mohamed and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2005.10360}, EPRINT = {2005.10360}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {New approaches to synthesize and manipulate face videos at very high quality have paved the way for new applications in computer animation, virtual and augmented reality, or face video analysis. However, there are concerns that they may be used in a malicious way, e.g. to manipulate videos of public figures, politicians or reporters, to spread false information. The research community therefore developed techniques for automated detection of modified imagery, and assembled benchmark datasets showing manipulatons by state-of-the-art techniques. In this paper, we contribute to this initiative in two ways: First, we present a new audio-visual benchmark dataset. It shows some of the highest quality visual manipulations available today. Human observers find them significantly harder to identify as forged than videos from other benchmarks. Furthermore we propose new family of deep-learning-based fake detectors, demonstrating that existing detectors are not well-suited for detecting fakes of a quality as high as presented in our dataset. Our detectors examine spatial and temporal features. This allows them to outperform existing approaches both in terms of high detection accuracy and generalization to unseen fake generation methods and unseen identities.}, }
Endnote
%0 Report %A Fox, Gereon %A Liu, Wentao %A Kim, Hyeongwoo %A Seidel, Hans-Peter %A Elgharib, Mohamed %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T VideoForensicsHQ: Detecting High-quality Manipulated Face Videos : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B109-7 %U https://arxiv.org/abs/2005.10360 %D 2020 %X New approaches to synthesize and manipulate face videos at very high quality have paved the way for new applications in computer animation, virtual and augmented reality, or face video analysis. However, there are concerns that they may be used in a malicious way, e.g. to manipulate videos of public figures, politicians or reporters, to spread false information. The research community therefore developed techniques for automated detection of modified imagery, and assembled benchmark datasets showing manipulatons by state-of-the-art techniques. In this paper, we contribute to this initiative in two ways: First, we present a new audio-visual benchmark dataset. It shows some of the highest quality visual manipulations available today. Human observers find them significantly harder to identify as forged than videos from other benchmarks. Furthermore we propose new family of deep-learning-based fake detectors, demonstrating that existing detectors are not well-suited for detecting fakes of a quality as high as presented in our dataset. Our detectors examine spatial and temporal features. This allows them to outperform existing approaches both in terms of high detection accuracy and generalization to unseen fake generation methods and unseen identities. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Golyanik, V. and Theobalt, C. 2020. A Quantum Computational Approach to Correspondence Problems on Point Sets. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020), IEEE.
Export
BibTeX
@inproceedings{Golyanik_CVPR2020, TITLE = {A Quantum Computational Approach to Correspondence Problems on Point Sets}, AUTHOR = {Golyanik, Vladislav and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-7168-5}, DOI = {10.1109/CVPR42600.2020.00920}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020)}, PAGES = {9179--9188}, ADDRESS = {Seattle, WA, USA (Virtual)}, }
Endnote
%0 Conference Proceedings %A Golyanik, Vladislav %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T A Quantum Computational Approach to Correspondence Problems on Point Sets : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D053-0 %R 10.1109/CVPR42600.2020.00920 %D 2020 %B 33rd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2020-06-14 - 2020-06-19 %C Seattle, WA, USA (Virtual) %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 9179 - 9188 %I IEEE %@ 978-1-7281-7168-5
Golyanik, V., Shimada, S., and Theobalt, C. 2020a. Fast Simultaneous Gravitational Alignment of Multiple Point Sets. International Conference on 3D Vision, IEEE.
Export
BibTeX
@inproceedings{Golyanik_MBGA2020, TITLE = {Fast Simultaneous Gravitational Alignment of Multiple Point Sets}, AUTHOR = {Golyanik, Vladislav and Shimada, Soshi and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-8128-8}, DOI = {10.1109/3DV50981.2020.00019}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {International Conference on 3D Vision}, PAGES = {91--100}, ADDRESS = {Fukuoka, Japan (Virtual Event)}, }
Endnote
%0 Conference Proceedings %A Golyanik, Vladislav %A Shimada, Soshi %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Fast Simultaneous Gravitational Alignment of Multiple Point Sets : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0358-2 %R 10.1109/3DV50981.2020.00019 %D 2020 %B International Conference on 3D Vision %Z date of event: 2020-11-25 - 2020-11-28 %C Fukuoka, Japan (Virtual Event) %B International Conference on 3D Vision %P 91 - 100 %I IEEE %@ 978-1-7281-8128-8
Golyanik, V., Jonas, A., Stricker, D., and Theobalt, C. 2020b. Intrinsic Dynamic Shape Prior for Dense Non-Rigid Structure from Motion. International Conference on 3D Vision, IEEE.
Export
BibTeX
@inproceedings{Golyanik2020DSPR, TITLE = {Intrinsic Dynamic Shape Prior for Dense Non-Rigid Structure from Motion}, AUTHOR = {Golyanik, Vladislav and Jonas, Andr{\'e} and Stricker, Didier and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-8128-8}, DOI = {10.1109/3DV50981.2020.00079}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {International Conference on 3D Vision}, PAGES = {692--701}, ADDRESS = {Fukuoka, Japan (Virtual Event)}, }
Endnote
%0 Conference Proceedings %A Golyanik, Vladislav %A Jonas, André %A Stricker, Didier %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Intrinsic Dynamic Shape Prior for Dense Non-Rigid Structure from Motion : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0364-4 %R 10.1109/3DV50981.2020.00079 %D 2020 %B International Conference on 3D Vision %Z date of event: 2020-11-25 - 2020-11-28 %C Fukuoka, Japan (Virtual Event) %B International Conference on 3D Vision %P 692 - 701 %I IEEE %@ 978-1-7281-8128-8
Günther, F., Jiang, C., and Pottmann, H. 2020. Smooth Polyhedral Surfaces. Advances in Mathematics363.
(arXiv: 1703.05318)
Abstract
Polyhedral surfaces are fundamental objects in architectural geometry and industrial design. Whereas closeness of a given mesh to a smooth reference surface and its suitability for numerical simulations were already studied extensively, the aim of our work is to find and to discuss suitable assessments of smoothness of polyhedral surfaces that only take the geometry of the polyhedral surface itself into account. Motivated by analogies to classical differential geometry, we propose a theory of smoothness of polyhedral surfaces including suitable notions of normal vectors, tangent planes, asymptotic directions, and parabolic curves that are invariant under projective transformations. It is remarkable that seemingly mild conditions significantly limit the shapes of faces of a smooth polyhedral surface. Besides being of theoretical interest, we believe that smoothness of polyhedral surfaces is of interest in the architectural context, where vertices and edges of polyhedral surfaces are highly visible.
Export
BibTeX
@article{Guenther2020, TITLE = {Smooth Polyhedral Surfaces}, AUTHOR = {G{\"u}nther, Felix and Jiang, Caigui and Pottmann, Helmut}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1703.05318}, DOI = {10.1016/j.aim.2020.107004}, EPRINT = {1703.05318}, EPRINTTYPE = {arXiv}, PUBLISHER = {Elsevier}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, ABSTRACT = {Polyhedral surfaces are fundamental objects in architectural geometry and industrial design. Whereas closeness of a given mesh to a smooth reference surface and its suitability for numerical simulations were already studied extensively, the aim of our work is to find and to discuss suitable assessments of smoothness of polyhedral surfaces that only take the geometry of the polyhedral surface itself into account. Motivated by analogies to classical differential geometry, we propose a theory of smoothness of polyhedral surfaces including suitable notions of normal vectors, tangent planes, asymptotic directions, and parabolic curves that are invariant under projective transformations. It is remarkable that seemingly mild conditions significantly limit the shapes of faces of a smooth polyhedral surface. Besides being of theoretical interest, we believe that smoothness of polyhedral surfaces is of interest in the architectural context, where vertices and edges of polyhedral surfaces are highly visible.}, JOURNAL = {Advances in Mathematics}, VOLUME = {363}, EID = {107004}, }
Endnote
%0 Journal Article %A Günther, Felix %A Jiang, Caigui %A Pottmann, Helmut %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Smooth Polyhedral Surfaces : %G eng %U http://hdl.handle.net/21.11116/0000-0006-9760-3 %U http://arxiv.org/abs/1703.05318 %R 10.1016/j.aim.2020.107004 %D 2020 %* Review method: peer-reviewed %X Polyhedral surfaces are fundamental objects in architectural geometry and industrial design. Whereas closeness of a given mesh to a smooth reference surface and its suitability for numerical simulations were already studied extensively, the aim of our work is to find and to discuss suitable assessments of smoothness of polyhedral surfaces that only take the geometry of the polyhedral surface itself into account. Motivated by analogies to classical differential geometry, we propose a theory of smoothness of polyhedral surfaces including suitable notions of normal vectors, tangent planes, asymptotic directions, and parabolic curves that are invariant under projective transformations. It is remarkable that seemingly mild conditions significantly limit the shapes of faces of a smooth polyhedral surface. Besides being of theoretical interest, we believe that smoothness of polyhedral surfaces is of interest in the architectural context, where vertices and edges of polyhedral surfaces are highly visible. %K Mathematics, Metric Geometry, Mathematics, Differential Geometry %J Advances in Mathematics %O Adv. Math. %V 363 %Z sequence number: 107004 %I Elsevier
Habermann, M., Xu, W., Zollhöfer, M., Pons-Moll, G., and Theobalt, C. 2020a. DeepCap: Monocular Human Performance Capture Using Weak Supervision. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020), IEEE.
Export
BibTeX
@inproceedings{deepcap2020, TITLE = {{DeepCap}: {M}onocular Human Performance Capture Using Weak Supervision}, AUTHOR = {Habermann, Marc and Xu, Weipeng and Zollh{\"o}fer, Michael and Pons-Moll, Gerard and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-7168-5}, DOI = {10.1109/CVPR42600.2020.00510}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020)}, PAGES = {5051--5062}, ADDRESS = {Seattle, WA, USA (Virtual)}, }
Endnote
%0 Conference Proceedings %A Habermann, Marc %A Xu, Weipeng %A Zollhöfer, Michael %A Pons-Moll, Gerard %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T DeepCap: Monocular Human Performance Capture Using Weak Supervision : %G eng %U http://hdl.handle.net/21.11116/0000-0006-A895-4 %R 10.1109/CVPR42600.2020.00510 %D 2020 %B 33rd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2020-06-14 - 2020-06-19 %C Seattle, WA, USA (Virtual) %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 5051 - 5062 %I IEEE %@ 978-1-7281-7168-5
Habermann, M., Xu, W., Zollhöfer, M., Pons-Moll, G., and Theobalt, C. 2020b. DeepCap: Monocular Human Performance Capture Using Weak Supervision. https://arxiv.org/abs/2003.08325.
(arXiv: 2003.08325)
Abstract
Human performance capture is a highly important computer vision problem with many applications in movie production and virtual/augmented reality. Many previous performance capture approaches either required expensive multi-view setups or did not recover dense space-time coherent geometry with frame-to-frame correspondences. We propose a novel deep learning approach for monocular dense human performance capture. Our method is trained in a weakly supervised manner based on multi-view supervision completely removing the need for training data with 3D ground truth annotations. The network architecture is based on two separate networks that disentangle the task into a pose estimation and a non-rigid surface deformation step. Extensive qualitative and quantitative evaluations show that our approach outperforms the state of the art in terms of quality and robustness.
Export
BibTeX
@online{Habermann2003.08325, TITLE = {{DeepCap}: {M}onocular Human Performance Capture Using Weak Supervision}, AUTHOR = {Habermann, Marc and Xu, Weipeng and Zollh{\"o}fer, Michael and Pons-Moll, Gerard and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2003.08325}, EPRINT = {2003.08325}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Human performance capture is a highly important computer vision problem with many applications in movie production and virtual/augmented reality. Many previous performance capture approaches either required expensive multi-view setups or did not recover dense space-time coherent geometry with frame-to-frame correspondences. We propose a novel deep learning approach for monocular dense human performance capture. Our method is trained in a weakly supervised manner based on multi-view supervision completely removing the need for training data with 3D ground truth annotations. The network architecture is based on two separate networks that disentangle the task into a pose estimation and a non-rigid surface deformation step. Extensive qualitative and quantitative evaluations show that our approach outperforms the state of the art in terms of quality and robustness.}, }
Endnote
%0 Report %A Habermann, Marc %A Xu, Weipeng %A Zollhöfer, Michael %A Pons-Moll, Gerard %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T DeepCap: Monocular Human Performance Capture Using Weak Supervision : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E010-9 %U https://arxiv.org/abs/2003.08325 %D 2020 %X Human performance capture is a highly important computer vision problem with many applications in movie production and virtual/augmented reality. Many previous performance capture approaches either required expensive multi-view setups or did not recover dense space-time coherent geometry with frame-to-frame correspondences. We propose a novel deep learning approach for monocular dense human performance capture. Our method is trained in a weakly supervised manner based on multi-view supervision completely removing the need for training data with 3D ground truth annotations. The network architecture is based on two separate networks that disentangle the task into a pose estimation and a non-rigid surface deformation step. Extensive qualitative and quantitative evaluations show that our approach outperforms the state of the art in terms of quality and robustness. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Huang, L., Gao, C., Zhou, Y., et al. 2020. Universal Physical Camouflage Attacks on Object Detectors. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020), IEEE.
Export
BibTeX
@inproceedings{Huang_CVPR2020, TITLE = {Universal Physical Camou{fl}age Attacks on Object Detectors}, AUTHOR = {Huang, Lifeng and Gao, Chengying and Zhou, Yuyin and Xie, Cihang and Yuille, Alan and Zou, Changqing and Liu, Ning}, LANGUAGE = {eng}, ISBN = {978-1-7281-7168-5}, DOI = {10.1109/CVPR42600.2020.00080}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020)}, PAGES = {717--726}, ADDRESS = {Seattle, WA, USA (Virtual)}, }
Endnote
%0 Conference Proceedings %A Huang, Lifeng %A Gao, Chengying %A Zhou, Yuyin %A Xie, Cihang %A Yuille, Alan %A Zou, Changqing %A Liu, Ning %+ External Organizations External Organizations External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Universal Physical Camouflage Attacks on Object Detectors : %G eng %U http://hdl.handle.net/21.11116/0000-0006-09F0-1 %R 10.1109/CVPR42600.2020.00080 %D 2020 %B 33rd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2020-06-14 - 2020-06-19 %C Seattle, WA, USA (Virtual) %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 717 - 726 %I IEEE %@ 978-1-7281-7168-5
Kappel, M., Golyanik, V., Elgharib, M., et al. 2020. High-Fidelity Neural Human Motion Transfer from Monocular Video. https://arxiv.org/abs/2012.10974.
(arXiv: 2012.10974)
Abstract
Video-based human motion transfer creates video animations of humans following a source motion. Current methods show remarkable results for tightly-clad subjects. However, the lack of temporally consistent handling of plausible clothing dynamics, including fine and high-frequency details, significantly limits the attainable visual quality. We address these limitations for the first time in the literature and present a new framework which performs high-fidelity and temporally-consistent human motion transfer with natural pose-dependent non-rigid deformations, for several types of loose garments. In contrast to the previous techniques, we perform image generation in three subsequent stages, synthesizing human shape, structure, and appearance. Given a monocular RGB video of an actor, we train a stack of recurrent deep neural networks that generate these intermediate representations from 2D poses and their temporal derivatives. Splitting the difficult motion transfer problem into subtasks that are aware of the temporal motion context helps us to synthesize results with plausible dynamics and pose-dependent detail. It also allows artistic control of results by manipulation of individual framework stages. In the experimental results, we significantly outperform the state-of-the-art in terms of video realism. Our code and data will be made publicly available.
Export
BibTeX
@online{Kappel_arXiv2012.10974, TITLE = {High-Fidelity Neural Human Motion Transfer from Monocular Video}, AUTHOR = {Kappel, Moritz and Golyanik, Vladislav and Elgharib, Mohamed and Henningson, Jann-Ole and Seidel, Hans-Peter and Castillo, Susana and Theobalt, Christian and Magnor, Marcus A.}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2012.10974}, EPRINT = {2012.10974}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Video-based human motion transfer creates video animations of humans following a source motion. Current methods show remarkable results for tightly-clad subjects. However, the lack of temporally consistent handling of plausible clothing dynamics, including fine and high-frequency details, significantly limits the attainable visual quality. We address these limitations for the first time in the literature and present a new framework which performs high-fidelity and temporally-consistent human motion transfer with natural pose-dependent non-rigid deformations, for several types of loose garments. In contrast to the previous techniques, we perform image generation in three subsequent stages, synthesizing human shape, structure, and appearance. Given a monocular RGB video of an actor, we train a stack of recurrent deep neural networks that generate these intermediate representations from 2D poses and their temporal derivatives. Splitting the difficult motion transfer problem into subtasks that are aware of the temporal motion context helps us to synthesize results with plausible dynamics and pose-dependent detail. It also allows artistic control of results by manipulation of individual framework stages. In the experimental results, we significantly outperform the state-of-the-art in terms of video realism. Our code and data will be made publicly available.}, }
Endnote
%0 Report %A Kappel, Moritz %A Golyanik, Vladislav %A Elgharib, Mohamed %A Henningson, Jann-Ole %A Seidel, Hans-Peter %A Castillo, Susana %A Theobalt, Christian %A Magnor, Marcus A. %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T High-Fidelity Neural Human Motion Transfer from Monocular Video : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B715-3 %U https://arxiv.org/abs/2012.10974 %D 2020 %X Video-based human motion transfer creates video animations of humans following a source motion. Current methods show remarkable results for tightly-clad subjects. However, the lack of temporally consistent handling of plausible clothing dynamics, including fine and high-frequency details, significantly limits the attainable visual quality. We address these limitations for the first time in the literature and present a new framework which performs high-fidelity and temporally-consistent human motion transfer with natural pose-dependent non-rigid deformations, for several types of loose garments. In contrast to the previous techniques, we perform image generation in three subsequent stages, synthesizing human shape, structure, and appearance. Given a monocular RGB video of an actor, we train a stack of recurrent deep neural networks that generate these intermediate representations from 2D poses and their temporal derivatives. Splitting the difficult motion transfer problem into subtasks that are aware of the temporal motion context helps us to synthesize results with plausible dynamics and pose-dependent detail. It also allows artistic control of results by manipulation of individual framework stages. In the experimental results, we significantly outperform the state-of-the-art in terms of video realism. Our code and data will be made publicly available. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR,Computer Science, Learning, cs.LG
Liu, L., Gu, J., Lin, K.Z., Chua, T.-S., and Theobalt, C. 2020a. Neural Sparse Voxel Fields. Advances in Neural Information Processing Systems 33 (NeurIPS 2020), Curran Associates, Inc.
Export
BibTeX
@inproceedings{LiuNeural20, TITLE = {Neural Sparse Voxel Fields}, AUTHOR = {Liu, Lingjie and Gu, Jiatao and Lin, Kyaw Zaw and Chua, Tat-Seng and Theobalt, Christian}, LANGUAGE = {eng}, PUBLISHER = {Curran Associates, Inc.}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Advances in Neural Information Processing Systems 33 (NeurIPS 2020)}, EDITOR = {Larochelle, H. and Ranzato, M. and Hadsell, R. and Balcan, M. F. and Lin, H.}, ADDRESS = {Virtual Event}, }
Endnote
%0 Conference Proceedings %A Liu, Lingjie %A Gu, Jiatao %A Lin, Kyaw Zaw %A Chua, Tat-Seng %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Neural Sparse Voxel Fields : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D437-C %D 2020 %B 34th Conference on Neural Information Processing Systems %Z date of event: 2020-12-06 - 2020-12-12 %C Virtual Event %B Advances in Neural Information Processing Systems 33 %E Larochelle, H.; Ranzato, M.; Hadsell, R.; Balcan, M. F.; Lin, H. %I Curran Associates, Inc. %U https://proceedings.neurips.cc/paper/2020/file/b4b758962f17808746e9bb832a6fa4b8-Paper.pdf
Liu, L., Gu, J., Lin, K.Z., Chua, T.-S., and Theobalt, C. 2020b. Neural Sparse Voxel Fields. https://arxiv.org/abs/2007.11571.
(arXiv: 2007.11571)
Abstract
Photo-realistic free-viewpoint rendering of real-world scenes using classical computer graphics techniques is challenging, because it requires the difficult step of capturing detailed appearance and geometry models. Recent studies have demonstrated promising results by learning scene representations that implicitly encode both geometry and appearance without 3D supervision. However, existing approaches in practice often show blurry renderings caused by the limited network capacity or the difficulty in finding accurate intersections of camera rays with the scene geometry. Synthesizing high-resolution imagery from these representations often requires time-consuming optical ray marching. In this work, we introduce Neural Sparse Voxel Fields (NSVF), a new neural scene representation for fast and high-quality free-viewpoint rendering. NSVF defines a set of voxel-bounded implicit fields organized in a sparse voxel octree to model local properties in each cell. We progressively learn the underlying voxel structures with a differentiable ray-marching operation from only a set of posed RGB images. With the sparse voxel octree structure, rendering novel views can be accelerated by skipping the voxels containing no relevant scene content. Our method is typically over 10 times faster than the state-of-the-art (namely, NeRF(Mildenhall et al., 2020)) at inference time while achieving higher quality results. Furthermore, by utilizing an explicit sparse voxel representation, our method can easily be applied to scene editing and scene composition. We also demonstrate several challenging tasks, including multi-scene learning, free-viewpoint rendering of a moving human, and large-scale scene rendering. Code and data are available at our website: https://github.com/facebookresearch/NSVF.
Export
BibTeX
@online{Liu_2007.11571, TITLE = {Neural Sparse Voxel Fields}, AUTHOR = {Liu, Lingjie and Gu, Jiatao and Lin, Kyaw Zaw and Chua, Tat-Seng and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2007.11571}, EPRINT = {2007.11571}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Photo-realistic free-viewpoint rendering of real-world scenes using classical computer graphics techniques is challenging, because it requires the difficult step of capturing detailed appearance and geometry models. Recent studies have demonstrated promising results by learning scene representations that implicitly encode both geometry and appearance without 3D supervision. However, existing approaches in practice often show blurry renderings caused by the limited network capacity or the difficulty in finding accurate intersections of camera rays with the scene geometry. Synthesizing high-resolution imagery from these representations often requires time-consuming optical ray marching. In this work, we introduce Neural Sparse Voxel Fields (NSVF), a new neural scene representation for fast and high-quality free-viewpoint rendering. NSVF defines a set of voxel-bounded implicit fields organized in a sparse voxel octree to model local properties in each cell. We progressively learn the underlying voxel structures with a differentiable ray-marching operation from only a set of posed RGB images. With the sparse voxel octree structure, rendering novel views can be accelerated by skipping the voxels containing no relevant scene content. Our method is typically over 10 times faster than the state-of-the-art (namely, NeRF(Mildenhall et al., 2020)) at inference time while achieving higher quality results. Furthermore, by utilizing an explicit sparse voxel representation, our method can easily be applied to scene editing and scene composition. We also demonstrate several challenging tasks, including multi-scene learning, free-viewpoint rendering of a moving human, and large-scale scene rendering. Code and data are available at our website: https://github.com/facebookresearch/NSVF.}, }
Endnote
%0 Report %A Liu, Lingjie %A Gu, Jiatao %A Lin, Kyaw Zaw %A Chua, Tat-Seng %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Neural Sparse Voxel Fields : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E8B2-A %U https://arxiv.org/abs/2007.11571 %D 2020 %X Photo-realistic free-viewpoint rendering of real-world scenes using classical computer graphics techniques is challenging, because it requires the difficult step of capturing detailed appearance and geometry models. Recent studies have demonstrated promising results by learning scene representations that implicitly encode both geometry and appearance without 3D supervision. However, existing approaches in practice often show blurry renderings caused by the limited network capacity or the difficulty in finding accurate intersections of camera rays with the scene geometry. Synthesizing high-resolution imagery from these representations often requires time-consuming optical ray marching. In this work, we introduce Neural Sparse Voxel Fields (NSVF), a new neural scene representation for fast and high-quality free-viewpoint rendering. NSVF defines a set of voxel-bounded implicit fields organized in a sparse voxel octree to model local properties in each cell. We progressively learn the underlying voxel structures with a differentiable ray-marching operation from only a set of posed RGB images. With the sparse voxel octree structure, rendering novel views can be accelerated by skipping the voxels containing no relevant scene content. Our method is typically over 10 times faster than the state-of-the-art (namely, NeRF(Mildenhall et al., 2020)) at inference time while achieving higher quality results. Furthermore, by utilizing an explicit sparse voxel representation, our method can easily be applied to scene editing and scene composition. We also demonstrate several challenging tasks, including multi-scene learning, free-viewpoint rendering of a moving human, and large-scale scene rendering. Code and data are available at our website: https://github.com/facebookresearch/NSVF. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR,Computer Science, Learning, cs.LG
Liu, L., Xu, W., Habermann, M., et al. 2020c. Neural Human Video Rendering by Learning Dynamic Textures and Rendering-to-Video Translation. IEEE Transactions on Visualization and Computer Graphics.
Export
BibTeX
@article{liu2020NeuralHumanRendering, TITLE = {Neural Human Video Rendering by Learning Dynamic Textures and Rendering-to-Video Translation}, AUTHOR = {Liu, Lingjie and Xu, Weipeng and Habermann, Marc and Zollh{\"o}fer, Michael and Bernard, Florian and Kim, Hyeongwoo and Wang, Wenping and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {1077-2626}, DOI = {10.1109/TVCG.2020.2996594}, PUBLISHER = {IEEE}, ADDRESS = {Piscataway, NJ}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {IEEE Transactions on Visualization and Computer Graphics}, }
Endnote
%0 Journal Article %A Liu, Lingjie %A Xu, Weipeng %A Habermann, Marc %A Zollhöfer, Michael %A Bernard, Florian %A Kim, Hyeongwoo %A Wang, Wenping %A Theobalt, Christian %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Neural Human Video Rendering by Learning Dynamic Textures and Rendering-to-Video Translation : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0369-F %R 10.1109/TVCG.2020.2996594 %7 2020 %D 2020 %J IEEE Transactions on Visualization and Computer Graphics %I IEEE %C Piscataway, NJ %@ false
Li, Y., Habermann, M., Thomaszewski,, B., Coros, S., Beeler, T., and Theobalt, C. 2020. Deep Physics-aware Inference of Cloth Deformation for Monocular Human Performance Capture. https://arxiv.org/abs/2011.12866.
(arXiv: 2011.12866)
Abstract
Recent monocular human performance capture approaches have shown compelling dense tracking results of the full body from a single RGB camera. However, existing methods either do not estimate clothing at all or model cloth deformation with simple geometric priors instead of taking into account the underlying physical principles. This leads to noticeable artifacts in their reconstructions, such as baked-in wrinkles, implausible deformations that seemingly defy gravity, and intersections between cloth and body. To address these problems, we propose a person-specific, learning-based method that integrates a finite element-based simulation layer into the training process to provide for the first time physics supervision in the context of weakly-supervised deep monocular human performance capture. We show how integrating physics into the training process improves the learned cloth deformations, allows modeling clothing as a separate piece of geometry, and largely reduces cloth-body intersections. Relying only on weak 2D multi-view supervision during training, our approach leads to a significant improvement over current state-of-the-art methods and is thus a clear step towards realistic monocular capture of the entire deforming surface of a clothed human.
Export
BibTeX
@online{Li_2011.12866, TITLE = {Deep Physics-aware Inference of Cloth Deformation for Monocular Human Performance Capture}, AUTHOR = {Li, Yue and Habermann, Marc and Thomaszewski,, Bernhard and Coros, Stelian and Beeler, Thabo and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2011.12866}, EPRINT = {2011.12866}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Recent monocular human performance capture approaches have shown compelling dense tracking results of the full body from a single RGB camera. However, existing methods either do not estimate clothing at all or model cloth deformation with simple geometric priors instead of taking into account the underlying physical principles. This leads to noticeable artifacts in their reconstructions, such as baked-in wrinkles, implausible deformations that seemingly defy gravity, and intersections between cloth and body. To address these problems, we propose a person-specific, learning-based method that integrates a finite element-based simulation layer into the training process to provide for the first time physics supervision in the context of weakly-supervised deep monocular human performance capture. We show how integrating physics into the training process improves the learned cloth deformations, allows modeling clothing as a separate piece of geometry, and largely reduces cloth-body intersections. Relying only on weak 2D multi-view supervision during training, our approach leads to a significant improvement over current state-of-the-art methods and is thus a clear step towards realistic monocular capture of the entire deforming surface of a clothed human.}, }
Endnote
%0 Report %A Li, Yue %A Habermann, Marc %A Thomaszewski,, Bernhard %A Coros, Stelian %A Beeler, Thabo %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Deep Physics-aware Inference of Cloth Deformation for Monocular Human Performance Capture : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E9D0-7 %U https://arxiv.org/abs/2011.12866 %D 2020 %X Recent monocular human performance capture approaches have shown compelling dense tracking results of the full body from a single RGB camera. However, existing methods either do not estimate clothing at all or model cloth deformation with simple geometric priors instead of taking into account the underlying physical principles. This leads to noticeable artifacts in their reconstructions, such as baked-in wrinkles, implausible deformations that seemingly defy gravity, and intersections between cloth and body. To address these problems, we propose a person-specific, learning-based method that integrates a finite element-based simulation layer into the training process to provide for the first time physics supervision in the context of weakly-supervised deep monocular human performance capture. We show how integrating physics into the training process improves the learned cloth deformations, allows modeling clothing as a separate piece of geometry, and largely reduces cloth-body intersections. Relying only on weak 2D multi-view supervision during training, our approach leads to a significant improvement over current state-of-the-art methods and is thus a clear step towards realistic monocular capture of the entire deforming surface of a clothed human. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Long, X., Liu, L., Theobalt, C., and Wang, W. 2020a. Occlusion-Aware Depth Estimation with Adaptive Normal Constraints. Computer Vision -- ECCV 2020, Springer.
Export
BibTeX
@inproceedings{Long_ECCV20, TITLE = {Occlusion-Aware Depth Estimation with Adaptive Normal Constraints}, AUTHOR = {Long, Xiaoxiao and Liu, Lingjie and Theobalt, Christian and Wang, Wenping}, LANGUAGE = {eng}, ISBN = {978-3-030-58544-0}, DOI = {10.1007/978-3-030-58545-7_37}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Computer Vision -- ECCV 2020}, EDITOR = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael}, PAGES = {640--657}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12354}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Long, Xiaoxiao %A Liu, Lingjie %A Theobalt, Christian %A Wang, Wenping %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Occlusion-Aware Depth Estimation with Adaptive Normal Constraints : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D42B-A %R 10.1007/978-3-030-58545-7_37 %D 2020 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV 2020 %E Vedaldi, Andrea; Bischof, Horst; Brox, Thomas; Frahm, Jan-Michael %P 640 - 657 %I Springer %@ 978-3-030-58544-0 %B Lecture Notes in Computer Science %N 12354
Long, X., Liu, L., Theobalt, C., and Wang, W. 2020b. Occlusion-Aware Depth Estimation with Adaptive Normal Constraints. ECCV 2020. Lecture Notes in Computer Science, vol 12354. Springer, Cham. https://arxiv.org/abs/2004.00845.
(arXiv: 2004.00845)
Abstract
We present a new learning-based method for multi-frame depth estimation from a color video, which is a fundamental problem in scene understanding, robot navigation or handheld 3D reconstruction. While recent learning-based methods estimate depth at high accuracy, 3D point clouds exported from their depth maps often fail to preserve important geometric feature (e.g., corners, edges, planes) of man-made scenes. Widely-used pixel-wise depth errors do not specifically penalize inconsistency on these features. These inaccuracies are particularly severe when subsequent depth reconstructions are accumulated in an attempt to scan a full environment with man-made objects with this kind of features. Our depth estimation algorithm therefore introduces a Combined Normal Map (CNM) constraint, which is designed to better preserve high-curvature features and global planar regions. In order to further improve the depth estimation accuracy, we introduce a new occlusion-aware strategy that aggregates initial depth predictions from multiple adjacent views into one final depth map and one occlusion probability map for the current reference view. Our method outperforms the state-of-the-art in terms of depth estimation accuracy, and preserves essential geometric features of man-made indoor scenes much better than other algorithms.
Export
BibTeX
@online{Long2004.00845, TITLE = {Occlusion-Aware Depth Estimation with Adaptive Normal Constraints}, AUTHOR = {Long, Xiaoxiao and Liu, Lingjie and Theobalt, Christian and Wang, Wenping}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2004.00845}, EPRINT = {2004.00845}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present a new learning-based method for multi-frame depth estimation from a color video, which is a fundamental problem in scene understanding, robot navigation or handheld 3D reconstruction. While recent learning-based methods estimate depth at high accuracy, 3D point clouds exported from their depth maps often fail to preserve important geometric feature (e.g., corners, edges, planes) of man-made scenes. Widely-used pixel-wise depth errors do not specifically penalize inconsistency on these features. These inaccuracies are particularly severe when subsequent depth reconstructions are accumulated in an attempt to scan a full environment with man-made objects with this kind of features. Our depth estimation algorithm therefore introduces a Combined Normal Map (CNM) constraint, which is designed to better preserve high-curvature features and global planar regions. In order to further improve the depth estimation accuracy, we introduce a new occlusion-aware strategy that aggregates initial depth predictions from multiple adjacent views into one final depth map and one occlusion probability map for the current reference view. Our method outperforms the state-of-the-art in terms of depth estimation accuracy, and preserves essential geometric features of man-made indoor scenes much better than other algorithms.}, JOURNAL = {ECCV 2020. Lecture Notes in Computer Science, vol 12354. Springer, Cham}, }
Endnote
%0 Report %A Long, Xiaoxiao %A Liu, Lingjie %A Theobalt, Christian %A Wang, Wenping %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Occlusion-Aware Depth Estimation with Adaptive Normal Constraints : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E0E9-5 %U https://arxiv.org/abs/2004.00845 %D 2020 %X We present a new learning-based method for multi-frame depth estimation from a color video, which is a fundamental problem in scene understanding, robot navigation or handheld 3D reconstruction. While recent learning-based methods estimate depth at high accuracy, 3D point clouds exported from their depth maps often fail to preserve important geometric feature (e.g., corners, edges, planes) of man-made scenes. Widely-used pixel-wise depth errors do not specifically penalize inconsistency on these features. These inaccuracies are particularly severe when subsequent depth reconstructions are accumulated in an attempt to scan a full environment with man-made objects with this kind of features. Our depth estimation algorithm therefore introduces a Combined Normal Map (CNM) constraint, which is designed to better preserve high-curvature features and global planar regions. In order to further improve the depth estimation accuracy, we introduce a new occlusion-aware strategy that aggregates initial depth predictions from multiple adjacent views into one final depth map and one occlusion probability map for the current reference view. Our method outperforms the state-of-the-art in terms of depth estimation accuracy, and preserves essential geometric features of man-made indoor scenes much better than other algorithms. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV %J ECCV 2020. Lecture Notes in Computer Science, vol 12354. Springer, Cham
Long, X., Liu, L., Li, W., Theobalt, C., and Wang, W. 2020c. Multi-view Depth Estimation using Epipolar Spatio-Temporal Networks. https://arxiv.org/abs/2011.13118.
(arXiv: 2011.13118)
Abstract
We present a novel method for multi-view depth estimation from a single video, which is a critical task in various applications, such as perception, reconstruction and robot navigation. Although previous learning-based methods have demonstrated compelling results, most works estimate depth maps of individual video frames independently, without taking into consideration the strong geometric and temporal coherence among the frames. Moreover, current state-of-the-art (SOTA) models mostly adopt a fully 3D convolution network for cost regularization and therefore require high computational cost, thus limiting their deployment in real-world applications. Our method achieves temporally coherent depth estimation results by using a novel Epipolar Spatio-Temporal (EST) transformer to explicitly associate geometric and temporal correlation with multiple estimated depth maps. Furthermore, to reduce the computational cost, inspired by recent Mixture-of-Experts models, we design a compact hybrid network consisting of a 2D context-aware network and a 3D matching network which learn 2D context information and 3D disparity cues separately. Extensive experiments demonstrate that our method achieves higher accuracy in depth estimation and significant speedup than the SOTA methods.
Export
BibTeX
@online{Long_2011.13118, TITLE = {Multi-view Depth Estimation using Epipolar Spatio-Temporal Networks}, AUTHOR = {Long, Xiaoxiao and Liu, Lingjie and Li, Wei and Theobalt, Christian and Wang, Wenping}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2011.13118}, EPRINT = {2011.13118}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present a novel method for multi-view depth estimation from a single video, which is a critical task in various applications, such as perception, reconstruction and robot navigation. Although previous learning-based methods have demonstrated compelling results, most works estimate depth maps of individual video frames independently, without taking into consideration the strong geometric and temporal coherence among the frames. Moreover, current state-of-the-art (SOTA) models mostly adopt a fully 3D convolution network for cost regularization and therefore require high computational cost, thus limiting their deployment in real-world applications. Our method achieves temporally coherent depth estimation results by using a novel Epipolar Spatio-Temporal (EST) transformer to explicitly associate geometric and temporal correlation with multiple estimated depth maps. Furthermore, to reduce the computational cost, inspired by recent Mixture-of-Experts models, we design a compact hybrid network consisting of a 2D context-aware network and a 3D matching network which learn 2D context information and 3D disparity cues separately. Extensive experiments demonstrate that our method achieves higher accuracy in depth estimation and significant speedup than the SOTA methods.}, }
Endnote
%0 Report %A Long, Xiaoxiao %A Liu, Lingjie %A Li, Wei %A Theobalt, Christian %A Wang, Wenping %+ External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Multi-view Depth Estimation using Epipolar Spatio-Temporal Networks : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E9EA-B %U https://arxiv.org/abs/2011.13118 %D 2020 %X We present a novel method for multi-view depth estimation from a single video, which is a critical task in various applications, such as perception, reconstruction and robot navigation. Although previous learning-based methods have demonstrated compelling results, most works estimate depth maps of individual video frames independently, without taking into consideration the strong geometric and temporal coherence among the frames. Moreover, current state-of-the-art (SOTA) models mostly adopt a fully 3D convolution network for cost regularization and therefore require high computational cost, thus limiting their deployment in real-world applications. Our method achieves temporally coherent depth estimation results by using a novel Epipolar Spatio-Temporal (EST) transformer to explicitly associate geometric and temporal correlation with multiple estimated depth maps. Furthermore, to reduce the computational cost, inspired by recent Mixture-of-Experts models, we design a compact hybrid network consisting of a 2D context-aware network and a 3D matching network which learn 2D context information and 3D disparity cues separately. Extensive experiments demonstrate that our method achieves higher accuracy in depth estimation and significant speedup than the SOTA methods. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Malik, J., Abdelaziz, I., Elhayek, A., et al. 2020a. HandVoxNet: Deep Voxel-Based Network for 3D Hand Shape and Pose Estimation From a Single Depth Map. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020), IEEE.
Export
BibTeX
@inproceedings{Malik_CVPR2020, TITLE = {{HandVoxNet}: {D}eep Voxel-Based Network for {3D} Hand Shape and Pose Estimation From a Single Depth Map}, AUTHOR = {Malik, Jameel and Abdelaziz, Ibrahim and Elhayek, Ahmed and Shimada, Soshi and Ali, Sk Aziz and Golyanik, Vladislav and Theobalt, Christian and Stricker, Didier}, LANGUAGE = {eng}, ISBN = {978-1-7281-7168-5}, DOI = {10.1109/CVPR42600.2020.00714}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020)}, PAGES = {7111--7120}, ADDRESS = {Seattle, WA, USA (Virtual)}, }
Endnote
%0 Conference Proceedings %A Malik, Jameel %A Abdelaziz, Ibrahim %A Elhayek, Ahmed %A Shimada, Soshi %A Ali, Sk Aziz %A Golyanik, Vladislav %A Theobalt, Christian %A Stricker, Didier %+ External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T HandVoxNet: Deep Voxel-Based Network for 3D Hand Shape and Pose Estimation From a Single Depth Map : %G eng %U http://hdl.handle.net/21.11116/0000-0007-CFCA-D %R 10.1109/CVPR42600.2020.00714 %D 2020 %B 33rd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2020-06-14 - 2020-06-19 %C Seattle, WA, USA (Virtual) %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 7111 - 7120 %I IEEE %@ 978-1-7281-7168-5
Malik, J., Abdelaziz, I., Elhayek, A., et al. 2020b. HandVoxNet: Deep Voxel-Based Network for 3D Hand Shape and Pose Estimation from a Single Depth Map. https://arxiv.org/abs/2004.01588.
(arXiv: 2004.01588)
Abstract
3D hand shape and pose estimation from a single depth map is a new and challenging computer vision problem with many applications. The state-of-the-art methods directly regress 3D hand meshes from 2D depth images via 2D convolutional neural networks, which leads to artefacts in the estimations due to perspective distortions in the images. In contrast, we propose a novel architecture with 3D convolutions trained in a weakly-supervised manner. The input to our method is a 3D voxelized depth map, and we rely on two hand shape representations. The first one is the 3D voxelized grid of the shape which is accurate but does not preserve the mesh topology and the number of mesh vertices. The second representation is the 3D hand surface which is less accurate but does not suffer from the limitations of the first representation. We combine the advantages of these two representations by registering the hand surface to the voxelized hand shape. In the extensive experiments, the proposed approach improves over the state of the art by 47.8% on the SynHand5M dataset. Moreover, our augmentation policy for voxelized depth maps further enhances the accuracy of 3D hand pose estimation on real data. Our method produces visually more reasonable and realistic hand shapes on NYU and BigHand2.2M datasets compared to the existing approaches.
Export
BibTeX
@online{Malik2004.01588, TITLE = {{HandVoxNet}: {D}eep Voxel-Based Network for {3D} Hand Shape and Pose Estimation from a Single Depth Map}, AUTHOR = {Malik, Jameel and Abdelaziz, Ibrahim and Elhayek, Ahmed and Shimada, Soshi and Ali, Sk Aziz and Golyanik, Vladislav and Theobalt, Christian and Stricker, Didier}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2004.01588}, EPRINT = {2004.01588}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {3D hand shape and pose estimation from a single depth map is a new and challenging computer vision problem with many applications. The state-of-the-art methods directly regress 3D hand meshes from 2D depth images via 2D convolutional neural networks, which leads to artefacts in the estimations due to perspective distortions in the images. In contrast, we propose a novel architecture with 3D convolutions trained in a weakly-supervised manner. The input to our method is a 3D voxelized depth map, and we rely on two hand shape representations. The first one is the 3D voxelized grid of the shape which is accurate but does not preserve the mesh topology and the number of mesh vertices. The second representation is the 3D hand surface which is less accurate but does not suffer from the limitations of the first representation. We combine the advantages of these two representations by registering the hand surface to the voxelized hand shape. In the extensive experiments, the proposed approach improves over the state of the art by 47.8% on the SynHand5M dataset. Moreover, our augmentation policy for voxelized depth maps further enhances the accuracy of 3D hand pose estimation on real data. Our method produces visually more reasonable and realistic hand shapes on NYU and BigHand2.2M datasets compared to the existing approaches.}, }
Endnote
%0 Report %A Malik, Jameel %A Abdelaziz, Ibrahim %A Elhayek, Ahmed %A Shimada, Soshi %A Ali, Sk Aziz %A Golyanik, Vladislav %A Theobalt, Christian %A Stricker, Didier %+ External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T HandVoxNet: Deep Voxel-Based Network for 3D Hand Shape and Pose Estimation from a Single Depth Map : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E0FF-D %U https://arxiv.org/abs/2004.01588 %D 2020 %X 3D hand shape and pose estimation from a single depth map is a new and challenging computer vision problem with many applications. The state-of-the-art methods directly regress 3D hand meshes from 2D depth images via 2D convolutional neural networks, which leads to artefacts in the estimations due to perspective distortions in the images. In contrast, we propose a novel architecture with 3D convolutions trained in a weakly-supervised manner. The input to our method is a 3D voxelized depth map, and we rely on two hand shape representations. The first one is the 3D voxelized grid of the shape which is accurate but does not preserve the mesh topology and the number of mesh vertices. The second representation is the 3D hand surface which is less accurate but does not suffer from the limitations of the first representation. We combine the advantages of these two representations by registering the hand surface to the voxelized hand shape. In the extensive experiments, the proposed approach improves over the state of the art by 47.8% on the SynHand5M dataset. Moreover, our augmentation policy for voxelized depth maps further enhances the accuracy of 3D hand pose estimation on real data. Our method produces visually more reasonable and realistic hand shapes on NYU and BigHand2.2M datasets compared to the existing approaches. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Mallikarjun B R, Tewari, A., Oh, T.-H., et al. 2020a. Monocular Reconstruction of Neural Face Reflectance Fields. https://arxiv.org/abs/2008.10247.
(arXiv: 2008.10247)
Abstract
The reflectance field of a face describes the reflectance properties responsible for complex lighting effects including diffuse, specular, inter-reflection and self shadowing. Most existing methods for estimating the face reflectance from a monocular image assume faces to be diffuse with very few approaches adding a specular component. This still leaves out important perceptual aspects of reflectance as higher-order global illumination effects and self-shadowing are not modeled. We present a new neural representation for face reflectance where we can estimate all components of the reflectance responsible for the final appearance from a single monocular image. Instead of modeling each component of the reflectance separately using parametric models, our neural representation allows us to generate a basis set of faces in a geometric deformation-invariant space, parameterized by the input light direction, viewpoint and face geometry. We learn to reconstruct this reflectance field of a face just from a monocular image, which can be used to render the face from any viewpoint in any light condition. Our method is trained on a light-stage training dataset, which captures 300 people illuminated with 150 light conditions from 8 viewpoints. We show that our method outperforms existing monocular reflectance reconstruction methods, in terms of photorealism due to better capturing of physical premitives, such as sub-surface scattering, specularities, self-shadows and other higher-order effects.
Export
BibTeX
@online{Mallikarjun_2008.10247, TITLE = {Monocular Reconstruction of Neural Face Reflectance Fields}, AUTHOR = {Mallikarjun B R, and Tewari, Ayush and Oh, Tae-Hyun and Weyrich, Tim and Bickel, Bernd and Seidel, Hans-Peter and Pfister, Hanspeter and Matusik, Wojciech and Elgharib, Mohamed and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2008.10247}, EPRINT = {2008.10247}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {The reflectance field of a face describes the reflectance properties responsible for complex lighting effects including diffuse, specular, inter-reflection and self shadowing. Most existing methods for estimating the face reflectance from a monocular image assume faces to be diffuse with very few approaches adding a specular component. This still leaves out important perceptual aspects of reflectance as higher-order global illumination effects and self-shadowing are not modeled. We present a new neural representation for face reflectance where we can estimate all components of the reflectance responsible for the final appearance from a single monocular image. Instead of modeling each component of the reflectance separately using parametric models, our neural representation allows us to generate a basis set of faces in a geometric deformation-invariant space, parameterized by the input light direction, viewpoint and face geometry. We learn to reconstruct this reflectance field of a face just from a monocular image, which can be used to render the face from any viewpoint in any light condition. Our method is trained on a light-stage training dataset, which captures 300 people illuminated with 150 light conditions from 8 viewpoints. We show that our method outperforms existing monocular reflectance reconstruction methods, in terms of photorealism due to better capturing of physical premitives, such as sub-surface scattering, specularities, self-shadows and other higher-order effects.}, }
Endnote
%0 Report %A Mallikarjun B R, %A Tewari, Ayush %A Oh, Tae-Hyun %A Weyrich, Tim %A Bickel, Bernd %A Seidel, Hans-Peter %A Pfister, Hanspeter %A Matusik, Wojciech %A Elgharib, Mohamed %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Monocular Reconstruction of Neural Face Reflectance Fields : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B110-E %U https://arxiv.org/abs/2008.10247 %D 2020 %X The reflectance field of a face describes the reflectance properties responsible for complex lighting effects including diffuse, specular, inter-reflection and self shadowing. Most existing methods for estimating the face reflectance from a monocular image assume faces to be diffuse with very few approaches adding a specular component. This still leaves out important perceptual aspects of reflectance as higher-order global illumination effects and self-shadowing are not modeled. We present a new neural representation for face reflectance where we can estimate all components of the reflectance responsible for the final appearance from a single monocular image. Instead of modeling each component of the reflectance separately using parametric models, our neural representation allows us to generate a basis set of faces in a geometric deformation-invariant space, parameterized by the input light direction, viewpoint and face geometry. We learn to reconstruct this reflectance field of a face just from a monocular image, which can be used to render the face from any viewpoint in any light condition. Our method is trained on a light-stage training dataset, which captures 300 people illuminated with 150 light conditions from 8 viewpoints. We show that our method outperforms existing monocular reflectance reconstruction methods, in terms of photorealism due to better capturing of physical premitives, such as sub-surface scattering, specularities, self-shadows and other higher-order effects. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR,Computer Science, Learning, cs.LG
Mallikarjun B R, Tewari, A., Seidel, H.-P., Elgharib, M., and Theobalt, C. 2020b. Learning Complete 3D Morphable Face Models from Images and Videos. https://arxiv.org/abs/2010.01679.
(arXiv: 2010.01679)
Abstract
Most 3D face reconstruction methods rely on 3D morphable models, which disentangle the space of facial deformations into identity geometry, expressions and skin reflectance. These models are typically learned from a limited number of 3D scans and thus do not generalize well across different identities and expressions. We present the first approach to learn complete 3D models of face identity geometry, albedo and expression just from images and videos. The virtually endless collection of such data, in combination with our self-supervised learning-based approach allows for learning face models that generalize beyond the span of existing approaches. Our network design and loss functions ensure a disentangled parameterization of not only identity and albedo, but also, for the first time, an expression basis. Our method also allows for in-the-wild monocular reconstruction at test time. We show that our learned models better generalize and lead to higher quality image-based reconstructions than existing approaches.
Export
BibTeX
@online{Mallikarjun_arXiv2010.01679, TITLE = {Learning Complete {3D} Morphable Face Models from Images and Videos}, AUTHOR = {Mallikarjun B R, and Tewari, Ayush and Seidel, Hans-Peter and Elgharib, Mohamed and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2010.01679}, EPRINT = {2010.01679}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Most 3D face reconstruction methods rely on 3D morphable models, which disentangle the space of facial deformations into identity geometry, expressions and skin reflectance. These models are typically learned from a limited number of 3D scans and thus do not generalize well across different identities and expressions. We present the first approach to learn complete 3D models of face identity geometry, albedo and expression just from images and videos. The virtually endless collection of such data, in combination with our self-supervised learning-based approach allows for learning face models that generalize beyond the span of existing approaches. Our network design and loss functions ensure a disentangled parameterization of not only identity and albedo, but also, for the first time, an expression basis. Our method also allows for in-the-wild monocular reconstruction at test time. We show that our learned models better generalize and lead to higher quality image-based reconstructions than existing approaches.}, }
Endnote
%0 Report %A Mallikarjun B R, %A Tewari, Ayush %A Seidel, Hans-Peter %A Elgharib, Mohamed %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Learning Complete 3D Morphable Face Models from Images and Videos : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B6FB-1 %U https://arxiv.org/abs/2010.01679 %D 2020 %X Most 3D face reconstruction methods rely on 3D morphable models, which disentangle the space of facial deformations into identity geometry, expressions and skin reflectance. These models are typically learned from a limited number of 3D scans and thus do not generalize well across different identities and expressions. We present the first approach to learn complete 3D models of face identity geometry, albedo and expression just from images and videos. The virtually endless collection of such data, in combination with our self-supervised learning-based approach allows for learning face models that generalize beyond the span of existing approaches. Our network design and loss functions ensure a disentangled parameterization of not only identity and albedo, but also, for the first time, an expression basis. Our method also allows for in-the-wild monocular reconstruction at test time. We show that our learned models better generalize and lead to higher quality image-based reconstructions than existing approaches. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Artificial Intelligence, cs.AI,Computer Science, Graphics, cs.GR,Computer Science, Learning, cs.LG,Computer Science, Multimedia, cs.MM
Mehta, D., Sotnychenko, O., Mueller, F., et al. 2020. XNect: Real-time Multi-person 3D Human Pose Estimation with a Single RGB Camera. ACM Transactions on Graphics (Proc. ACM SIGGRAPH 2020)39, 4.
Export
BibTeX
@article{Mehta_TOG2020, TITLE = {{XNect}: {R}eal-time Multi-person {3D} Human Pose Estimation with a Single {RGB} Camera}, AUTHOR = {Mehta, Dushyant and Sotnychenko, Oleksandr and Mueller, Franziska and Xu, Weipeng and Elgharib, Mohamed and Fua, Pascal and Seidel, Hans-Peter and Rhodin, Helge and Pons-Moll, Gerard and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3386569.3392410}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH)}, VOLUME = {39}, NUMBER = {4}, EID = {82}, BOOKTITLE = {Proceedings of ACM SIGGRAPH 2020}, }
Endnote
%0 Journal Article %A Mehta, Dushyant %A Sotnychenko, Oleksandr %A Mueller, Franziska %A Xu, Weipeng %A Elgharib, Mohamed %A Fua, Pascal %A Seidel, Hans-Peter %A Rhodin, Helge %A Pons-Moll, Gerard %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T XNect: Real-time Multi-person 3D Human Pose Estimation with a Single RGB Camera : %G eng %U http://hdl.handle.net/21.11116/0000-0007-832D-3 %R 10.1145/3386569.3392410 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 4 %Z sequence number: 82 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH 2020 %O ACM SIGGRAPH 2020 Virtual Conference ; 2020, 17-28 August
Meka, A. 2020. Live inverse rendering. .
Export
BibTeX
@phdthesis{Meka_2019, TITLE = {Live inverse rendering}, AUTHOR = {Meka, Abhimitra}, LANGUAGE = {eng}, DOI = {http://dx.doi.org/10.22028/D291-30206}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, }
Endnote
%0 Thesis %A Meka, Abhimitra %Y Theobalt, Christian %A referee: Drettakis, George %+ Computer Graphics, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Live inverse rendering : %G eng %U http://hdl.handle.net/21.11116/0000-0007-715A-5 %R http://dx.doi.org/10.22028/D291-30206 %I Universität des Saarlandes %C Saarbrücken %D 2020 %P 189 p. %V phd %9 phd %U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/28721
Meka, A., Pandey, R., Häne, C., et al. 2020. Deep Relightable Textures Volumetric Performance Capture with Neural Rendering. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2020)39, 6.
Export
BibTeX
@article{Meka_ToG2020, TITLE = {Deep Relightable Textures Volumetric Performance Capture with Neural Rendering}, AUTHOR = {Meka, Abhimitra and Pandey, Rohit and H{\"a}ne, Christian and Orts-Escolano, Sergio and Barnum, Peter and David-Son, Philip and Erickson, Daniel and Zhang, Yinda and Taylor, Jonathan and Bouaziz, Sofien and Legendre, Chloe and Ma, Wan-Chun and Overbeck, Ryan and Beeler, Thabo and Debevec, Paul and Izadi, Shahram and Theobalt, Christian and Rhemann, Christoph and Fanello, Sean}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3414685.3417814}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {39}, NUMBER = {6}, EID = {259}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2020}, EDITOR = {Myszkowski, Karol}, }
Endnote
%0 Journal Article %A Meka, Abhimitra %A Pandey, Rohit %A Häne, Christian %A Orts-Escolano, Sergio %A Barnum, Peter %A David-Son, Philip %A Erickson, Daniel %A Zhang, Yinda %A Taylor, Jonathan %A Bouaziz, Sofien %A Legendre, Chloe %A Ma, Wan-Chun %A Overbeck, Ryan %A Beeler, Thabo %A Debevec, Paul %A Izadi, Shahram %A Theobalt, Christian %A Rhemann, Christoph %A Fanello, Sean %+ External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Deep Relightable Textures Volumetric Performance Capture with Neural Rendering : %G eng %U http://hdl.handle.net/21.11116/0000-0007-A6FA-4 %R 10.1145/3414685.3417814 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 6 %Z sequence number: 259 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2020 %O ACM SIGGRAPH Asia 2020 SA'20 SA 2020 %U https://dl.acm.org/doi/pdf/10.1145/3414685.3417814
Meng, X., Zheng, Q., Varshney, A., Singh, G., and Zwicker, M. 2020. Real-time Monte Carlo Denoising with the Neural Bilateral Grid. Rendering 2020 - DL-only Track (Eurographics Symposium on Rendering 2020), The Eurographics Association.
Export
BibTeX
@inproceedings{Meng_EGRendering20, TITLE = {Real-time {Monte Carlo} Denoising with the Neural Bilateral Grid}, AUTHOR = {Meng, Xiaoxu and Zheng, Quan and Varshney, Amitabh and Singh, Gurprit and Zwicker, Matthias}, LANGUAGE = {eng}, ISBN = {978-3-03868-117-5}, URL = {https://diglib.eg.org:443/handle/10.2312/sr20201133}, DOI = {10.2312/sr.20201133}, PUBLISHER = {The Eurographics Association}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Rendering 2020 -- DL-only Track (Eurographics Symposium on Rendering 2020)}, EDITOR = {Dachsbacher, Carsten and Pharr, Matt}, PAGES = {1--12}, ADDRESS = {London, UK}, }
Endnote
%0 Conference Proceedings %A Meng, Xiaoxu %A Zheng, Quan %A Varshney, Amitabh %A Singh, Gurprit %A Zwicker, Matthias %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Real-time Monte Carlo Denoising with the Neural Bilateral Grid : %G eng %U http://hdl.handle.net/21.11116/0000-0007-CEC2-6 %R 10.2312/sr.20201133 %U https://diglib.eg.org:443/handle/10.2312/sr20201133 %D 2020 %B 31st Eurographics Symposium on Rendering %Z date of event: 2020-06-29 - 2020-07-02 %C London, UK %B Rendering 2020 - DL-only Track %E Dachsbacher, Carsten; Pharr, Matt %P 1 - 12 %I The Eurographics Association %@ 978-3-03868-117-5
Mlakar, D., Winter, M., Stadlbauer, P., Seidel, H.-P., Steinberger, M., and Zayer, R. 2020. Subdivision-Specialized Linear Algebra Kernels for Static and Dynamic Mesh Connectivity on the GPU. Computer Graphics Forum (Proc. EUROGRAPHICS 2020)39, 2.
Export
BibTeX
@article{Mlakar_EG2020, TITLE = {Subdivision-Specialized Linear Algebra Kernels for Static and Dynamic Mesh Connectivity on the {GPU}}, AUTHOR = {Mlakar, Daniel and Winter, M. and Stadlbauer, Pascal and Seidel, Hans-Peter and Steinberger, Markus and Zayer, Rhaleb}, LANGUAGE = {eng}, ISSN = {0167-7055}, DOI = {10.1111/cgf.13934}, PUBLISHER = {Blackwell-Wiley}, ADDRESS = {Oxford}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, JOURNAL = {Computer Graphics Forum (Proc. EUROGRAPHICS)}, VOLUME = {39}, NUMBER = {2}, PAGES = {335--349}, BOOKTITLE = {The European Association for Computer Graphics 41st Annual Conference (EUROGRAPHICS 2020)}, EDITOR = {Panozzo, Daniele and Assarsson, Ulf}, }
Endnote
%0 Journal Article %A Mlakar, Daniel %A Winter, M. %A Stadlbauer, Pascal %A Seidel, Hans-Peter %A Steinberger, Markus %A Zayer, Rhaleb %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Subdivision-Specialized Linear Algebra Kernels for Static and Dynamic Mesh Connectivity on the GPU : %G eng %U http://hdl.handle.net/21.11116/0000-0006-DB80-2 %R 10.1111/cgf.13934 %7 2020 %D 2020 %J Computer Graphics Forum %O Computer Graphics Forum : journal of the European Association for Computer Graphics Comput. Graph. Forum %V 39 %N 2 %& 335 %P 335 - 349 %I Blackwell-Wiley %C Oxford %@ false %B The European Association for Computer Graphics 41st Annual Conference %O EUROGRAPHICS 2020 EG 2020 The European Association for Computer Graphics 41st Annual Conference ; Norrköping, Sweden, May 25 – 29, 2020
Mueller, F. 2020. Real-time 3D Hand Reconstruction in Challenging Scenes from a Single Color or Depth Camera. .
Export
BibTeX
@phdthesis{MuellerFDiss_2020, TITLE = {Real-time 3{D} Hand Reconstruction in Challenging Scenes from a Single Color or Depth Camera}, AUTHOR = {Mueller, Franziska}, LANGUAGE = {eng}, DOI = {10.22028/D291-32846}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, }
Endnote
%0 Thesis %A Mueller, Franziska %Y Theobalt, Christian %A referee: Seidel, Hans-Peter %A referee: Izadi, Shahram %+ Computer Graphics, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Real-time 3D Hand Reconstruction in Challenging Scenes from a Single Color or Depth Camera : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D8C7-5 %R 10.22028/D291-32846 %I Universität des Saarlandes %C Saarbrücken %D 2020 %P 155 p. %V phd %9 phd %U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/30313
Piovarči, M., Foshey, M., Babaei, V., Rusinkiewicz, S., Matusik, W., and Didyk, P. 2020. Towards Spatially Varying Gloss Reproduction for 3D Printing. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2020)39, 6.
Export
BibTeX
@article{Piovarci_ToG2020, TITLE = {Towards Spatially Varying Gloss Reproduction for {3D} Printing}, AUTHOR = {Piovar{\v c}i, Michal and Foshey, Michael and Babaei, Vahid and Rusinkiewicz, Szymon and Matusik, Wojciech and Didyk, Piotr}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3414685.3417850}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {39}, NUMBER = {6}, EID = {206}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2020}, EDITOR = {Myszkowski, Karol}, }
Endnote
%0 Journal Article %A Piovarči, Michal %A Foshey, Michael %A Babaei, Vahid %A Rusinkiewicz, Szymon %A Matusik, Wojciech %A Didyk, Piotr %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T Towards Spatially Varying Gloss Reproduction for 3D Printing : %G eng %U http://hdl.handle.net/21.11116/0000-0007-A6FE-0 %R 10.1145/3414685.3417850 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 6 %Z sequence number: 206 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2020 %O ACM SIGGRAPH Asia 2020 SA'20 SA 2020
Qian, N., Wang, J., Mueller, F., Bernard, F., Golyanik, V., and Theobalt, C. 2020a. Parametric Hand Texture Model for 3D Hand Reconstruction and Personalization. Max-Planck-Institut für Informatik, Saarbrücken.
Abstract
3D hand reconstruction from image data is a widely-studied problem in com- puter vision and graphics, and has a particularly high relevance for virtual and augmented reality. Although several 3D hand reconstruction approaches leverage hand models as a strong prior to resolve ambiguities and achieve a more robust reconstruction, most existing models account only for the hand shape and poses and do not model the texture. To fill this gap, in this work we present the first parametric texture model of human hands. Our model spans several dimensions of hand appearance variability (e.g., related to gen- der, ethnicity, or age) and only requires a commodity camera for data acqui- sition. Experimentally, we demonstrate that our appearance model can be used to tackle a range of challenging problems such as 3D hand reconstruc- tion from a single monocular image. Furthermore, our appearance model can be used to define a neural rendering layer that enables training with a self-supervised photometric loss. We make our model publicly available.
Export
BibTeX
@techreport{Qian_report2020, TITLE = {Parametric Hand Texture Model for {3D} Hand Reconstruction and Personalization}, AUTHOR = {Qian, Neng and Wang, Jiayi and Mueller, Franziska and Bernard, Florian and Golyanik, Vladislav and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0946-011X}, NUMBER = {MPI-I-2020-4-001}, INSTITUTION = {Max-Planck-Institut f{\"u}r Informatik}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {3D hand reconstruction from image data is a widely-studied problem in com- puter vision and graphics, and has a particularly high relevance for virtual and augmented reality. Although several 3D hand reconstruction approaches leverage hand models as a strong prior to resolve ambiguities and achieve a more robust reconstruction, most existing models account only for the hand shape and poses and do not model the texture. To {fi}ll this gap, in this work we present the {fi}rst parametric texture model of human hands. Our model spans several dimensions of hand appearance variability (e.g., related to gen- der, ethnicity, or age) and only requires a commodity camera for data acqui- sition. Experimentally, we demonstrate that our appearance model can be used to tackle a range of challenging problems such as 3D hand reconstruc- tion from a single monocular image. Furthermore, our appearance model can be used to de{fi}ne a neural rendering layer that enables training with a self-supervised photometric loss. We make our model publicly available.}, TYPE = {Research Report}, }
Endnote
%0 Report %A Qian, Neng %A Wang, Jiayi %A Mueller, Franziska %A Bernard, Florian %A Golyanik, Vladislav %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Parametric Hand Texture Model for 3D Hand Reconstruction and Personalization : %G eng %U http://hdl.handle.net/21.11116/0000-0006-9128-9 %Y Max-Planck-Institut für Informatik %C Saarbrücken %D 2020 %P 37 p. %X 3D hand reconstruction from image data is a widely-studied problem in com- puter vision and graphics, and has a particularly high relevance for virtual and augmented reality. Although several 3D hand reconstruction approaches leverage hand models as a strong prior to resolve ambiguities and achieve a more robust reconstruction, most existing models account only for the hand shape and poses and do not model the texture. To fill this gap, in this work we present the first parametric texture model of human hands. Our model spans several dimensions of hand appearance variability (e.g., related to gen- der, ethnicity, or age) and only requires a commodity camera for data acqui- sition. Experimentally, we demonstrate that our appearance model can be used to tackle a range of challenging problems such as 3D hand reconstruc- tion from a single monocular image. Furthermore, our appearance model can be used to define a neural rendering layer that enables training with a self-supervised photometric loss. We make our model publicly available. %K hand texture model, appearance modeling, hand tracking, 3D hand recon- struction %B Research Report %@ false
Qian, N., Wang, J., Mueller, F., Bernard, F., Golyanik, V., and Theobalt, C. 2020b. HTML: A Parametric Hand Texture Model for 3D Hand Reconstruction and Personalization. Computer Vision -- ECCV 2020, Springer.
Export
BibTeX
@inproceedings{Qian_ECCV20, TITLE = {{HTML}: {A} Parametric Hand Texture Model for {3D} Hand Reconstruction and Personalization}, AUTHOR = {Qian, Neng and Wang, Jiayi and Mueller, Franziska and Bernard, Florian and Golyanik, Vladislav and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-3-030-58621-8}, DOI = {10.1007/978-3-030-58621-8_4}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Computer Vision -- ECCV 2020}, EDITOR = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael}, PAGES = {54--71}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12356}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Qian, Neng %A Wang, Jiayi %A Mueller, Franziska %A Bernard, Florian %A Golyanik, Vladislav %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T HTML: A Parametric Hand Texture Model for 3D Hand Reconstruction and Personalization : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D062-F %R 10.1007/978-3-030-58621-8_4 %D 2020 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV 2020 %E Vedaldi, Andrea; Bischof, Horst; Brox, Thomas; Frahm, Jan-Michael %P 54 - 71 %I Springer %@ 978-3-030-58621-8 %B Lecture Notes in Computer Science %N 12356
Rao, S., Stutz, D., and Schiele, B. 2020. Adversarial Training against Location-Optimized Adversarial Patches. https://arxiv.org/abs/2005.02313.
(arXiv: 2005.02313)
Abstract
Deep neural networks have been shown to be susceptible to adversarial examples -- small, imperceptible changes constructed to cause mis-classification in otherwise highly accurate image classifiers. As a practical alternative, recent work proposed so-called adversarial patches: clearly visible, but adversarially crafted rectangular patches in images. These patches can easily be printed and applied in the physical world. While defenses against imperceptible adversarial examples have been studied extensively, robustness against adversarial patches is poorly understood. In this work, we first devise a practical approach to obtain adversarial patches while actively optimizing their location within the image. Then, we apply adversarial training on these location-optimized adversarial patches and demonstrate significantly improved robustness on CIFAR10 and GTSRB. Additionally, in contrast to adversarial training on imperceptible adversarial examples, our adversarial patch training does not reduce accuracy.
Export
BibTeX
@online{Rao_arXiv2005.02313, TITLE = {Adversarial Training against Location-Optimized Adversarial Patches}, AUTHOR = {Rao, Sukrut and Stutz, David and Schiele, Bernt}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2005.02313}, EPRINT = {2005.02313}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Deep neural networks have been shown to be susceptible to adversarial examples -- small, imperceptible changes constructed to cause mis-classification in otherwise highly accurate image classifiers. As a practical alternative, recent work proposed so-called adversarial patches: clearly visible, but adversarially crafted rectangular patches in images. These patches can easily be printed and applied in the physical world. While defenses against imperceptible adversarial examples have been studied extensively, robustness against adversarial patches is poorly understood. In this work, we first devise a practical approach to obtain adversarial patches while actively optimizing their location within the image. Then, we apply adversarial training on these location-optimized adversarial patches and demonstrate significantly improved robustness on CIFAR10 and GTSRB. Additionally, in contrast to adversarial training on imperceptible adversarial examples, our adversarial patch training does not reduce accuracy.}, }
Endnote
%0 Report %A Rao, Sukrut %A Stutz, David %A Schiele, Bernt %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society %T Adversarial Training against Location-Optimized Adversarial Patches : %G eng %U http://hdl.handle.net/21.11116/0000-0007-80D0-C %U https://arxiv.org/abs/2005.02313 %D 2020 %X Deep neural networks have been shown to be susceptible to adversarial examples -- small, imperceptible changes constructed to cause mis-classification in otherwise highly accurate image classifiers. As a practical alternative, recent work proposed so-called adversarial patches: clearly visible, but adversarially crafted rectangular patches in images. These patches can easily be printed and applied in the physical world. While defenses against imperceptible adversarial examples have been studied extensively, robustness against adversarial patches is poorly understood. In this work, we first devise a practical approach to obtain adversarial patches while actively optimizing their location within the image. Then, we apply adversarial training on these location-optimized adversarial patches and demonstrate significantly improved robustness on CIFAR10 and GTSRB. Additionally, in contrast to adversarial training on imperceptible adversarial examples, our adversarial patch training does not reduce accuracy. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Cryptography and Security, cs.CR,Computer Science, Learning, cs.LG,Statistics, Machine Learning, stat.ML
Rudnev, V., Golyanik, V., Wang, J., et al. 2020. EventHands: Real-Time Neural 3D Hand Reconstruction from an Event Stream. https://arxiv.org/abs/2012.06475.
(arXiv: 2012.06475)
Abstract
3D hand pose estimation from monocular videos is a long-standing and challenging problem, which is now seeing a strong upturn. In this work, we address it for the first time using a single event camera, i.e., an asynchronous vision sensor reacting on brightness changes. Our EventHands approach has characteristics previously not demonstrated with a single RGB or depth camera such as high temporal resolution at low data throughputs and real-time performance at 1000 Hz. Due to the different data modality of event cameras compared to classical cameras, existing methods cannot be directly applied to and re-trained for event streams. We thus design a new neural approach which accepts a new event stream representation suitable for learning, which is trained on newly-generated synthetic event streams and can generalise to real data. Experiments show that EventHands outperforms recent monocular methods using a colour (or depth) camera in terms of accuracy and its ability to capture hand motions of unprecedented speed. Our method, the event stream simulator and the dataset will be made publicly available.
Export
BibTeX
@online{Rudnev_arXiv2012.06475, TITLE = {{EventHands}: {R}eal-Time Neural {3D} Hand Reconstruction from an Event Stream}, AUTHOR = {Rudnev, Viktor and Golyanik, Vladislav and Wang, Jiayi and Seidel, Hans-Peter and Mueller, Franziska and Elgharib, Mohamed and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2012.06475}, EPRINT = {2012.06475}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {3D hand pose estimation from monocular videos is a long-standing and challenging problem, which is now seeing a strong upturn. In this work, we address it for the first time using a single event camera, i.e., an asynchronous vision sensor reacting on brightness changes. Our EventHands approach has characteristics previously not demonstrated with a single RGB or depth camera such as high temporal resolution at low data throughputs and real-time performance at 1000 Hz. Due to the different data modality of event cameras compared to classical cameras, existing methods cannot be directly applied to and re-trained for event streams. We thus design a new neural approach which accepts a new event stream representation suitable for learning, which is trained on newly-generated synthetic event streams and can generalise to real data. Experiments show that EventHands outperforms recent monocular methods using a colour (or depth) camera in terms of accuracy and its ability to capture hand motions of unprecedented speed. Our method, the event stream simulator and the dataset will be made publicly available.}, }
Endnote
%0 Report %A Rudnev, Viktor %A Golyanik, Vladislav %A Wang, Jiayi %A Seidel, Hans-Peter %A Mueller, Franziska %A Elgharib, Mohamed %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T EventHands: Real-Time Neural 3D Hand Reconstruction from an Event Stream : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B709-1 %U https://arxiv.org/abs/2012.06475 %D 2020 %X 3D hand pose estimation from monocular videos is a long-standing and challenging problem, which is now seeing a strong upturn. In this work, we address it for the first time using a single event camera, i.e., an asynchronous vision sensor reacting on brightness changes. Our EventHands approach has characteristics previously not demonstrated with a single RGB or depth camera such as high temporal resolution at low data throughputs and real-time performance at 1000 Hz. Due to the different data modality of event cameras compared to classical cameras, existing methods cannot be directly applied to and re-trained for event streams. We thus design a new neural approach which accepts a new event stream representation suitable for learning, which is trained on newly-generated synthetic event streams and can generalise to real data. Experiments show that EventHands outperforms recent monocular methods using a colour (or depth) camera in terms of accuracy and its ability to capture hand motions of unprecedented speed. Our method, the event stream simulator and the dataset will be made publicly available. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Saberpour, A., Hersch, R.D., Fang, J., Zayer, R., Seidel, H.-P., and Babaei, V. 2020. Fabrication of Moiré on Curved Surfaces. Optics Express28, 13.
Export
BibTeX
@article{Saberpour2020, TITLE = {Fabrication of Moir{\'e} on Curved Surfaces}, AUTHOR = {Saberpour, Artin and Hersch, Roger D. and Fang, Jiajing and Zayer, Rhaleb and Seidel, Hans-Peter and Babaei, Vahid}, LANGUAGE = {eng}, ISSN = {1094-4087}, DOI = {10.1364/OE.393843}, PUBLISHER = {Optical Society of America}, ADDRESS = {Washington, DC}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, JOURNAL = {Optics Express}, VOLUME = {28}, NUMBER = {13}, PAGES = {19413--19427}, }
Endnote
%0 Journal Article %A Saberpour, Artin %A Hersch, Roger D. %A Fang, Jiajing %A Zayer, Rhaleb %A Seidel, Hans-Peter %A Babaei, Vahid %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Fabrication of Moiré on Curved Surfaces : %G eng %U http://hdl.handle.net/21.11116/0000-0006-D39D-B %R 10.1364/OE.393843 %7 2020 %D 2020 %J Optics Express %O Opt. Express %V 28 %N 13 %& 19413 %P 19413 - 19427 %I Optical Society of America %C Washington, DC %@ false
Sarkar, K., Mehta, D., Xu, W., Golyanik, V., and Theobalt, C. 2020. Neural Re-rendering of Humans from a Single Image. Computer Vision -- ECCV 2020, Springer.
Export
BibTeX
@inproceedings{Sarkar_ECCV20, TITLE = {Neural Re-rendering of Humans from a Single Image}, AUTHOR = {Sarkar, Kripasindhu and Mehta, Dushyant and Xu, Weipeng and Golyanik, Vladislav and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-3-030-58621-8}, DOI = {10.1007/978-3-030-58621-8_35}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Computer Vision -- ECCV 2020}, EDITOR = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael}, PAGES = {596--613}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12356}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Sarkar, Kripasindhu %A Mehta, Dushyant %A Xu, Weipeng %A Golyanik, Vladislav %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Neural Re-rendering of Humans from a Single Image : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D0A4-4 %R 10.1007/978-3-030-58621-8_35 %D 2020 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV 2020 %E Vedaldi, Andrea; Bischof, Horst; Brox, Thomas; Frahm, Jan-Michael %P 596 - 613 %I Springer %@ 978-3-030-58621-8 %B Lecture Notes in Computer Science %N 12356
Seelbach Benkner, M., Golyanik, V., Theobalt, C., and Moeller, M. 2020. Adiabatic Quantum Graph Matching with Permutation Matrix Constraints. International Conference on 3D Vision, IEEE.
Export
BibTeX
@inproceedings{SeelbachBenkner2020, TITLE = {Adiabatic Quantum Graph Matching with Permutation Matrix Constraints}, AUTHOR = {Seelbach Benkner, Marcel and Golyanik, Vladislav and Theobalt, Christian and Moeller, Michael}, LANGUAGE = {eng}, ISBN = {978-1-7281-8128-8}, DOI = {10.1109/3DV50981.2020.00068}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {International Conference on 3D Vision}, PAGES = {583--592}, ADDRESS = {Fukuoka, Japan (Virtual Event)}, }
Endnote
%0 Conference Proceedings %A Seelbach Benkner, Marcel %A Golyanik, Vladislav %A Theobalt, Christian %A Moeller, Michael %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Adiabatic Quantum Graph Matching with Permutation Matrix Constraints : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0362-6 %R 10.1109/3DV50981.2020.00068 %D 2020 %B International Conference on 3D Vision %Z date of event: 2020-11-25 - 2020-11-28 %C Fukuoka, Japan (Virtual Event) %B International Conference on 3D Vision %P 583 - 592 %I IEEE %@ 978-1-7281-8128-8
Serrano, A., Martin, D., Gutierrez, D., Myszkowski, K., and Masia, B. 2020. Imperceptible Manipulation of Lateral Camera Motion for Improved Virtual Reality Applications. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2020)39, 6.
Export
BibTeX
@article{Serrano2020, TITLE = {Imperceptible Manipulation of Lateral Camera Motion for Improved Virtual Reality Applications}, AUTHOR = {Serrano, Ana and Martin, Daniel and Gutierrez, Diego and Myszkowski, Karol and Masia, Belen}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3414685.3417773}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {39}, NUMBER = {6}, EID = {268}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2020}, EDITOR = {Myszkowski, Karol}, }
Endnote
%0 Journal Article %A Serrano, Ana %A Martin, Daniel %A Gutierrez, Diego %A Myszkowski, Karol %A Masia, Belen %+ External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Imperceptible Manipulation of Lateral Camera Motion for Improved Virtual Reality Applications : %G eng %U http://hdl.handle.net/21.11116/0000-0006-FBE8-A %R 10.1145/3414685.3417773 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 6 %Z sequence number: 268 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2020 %O ACM SIGGRAPH Asia 2020 SA'20 SA 2020
Shahmirzadi, A.A., Babaei, V., and Seidel, H.-P. 2020. A Multispectral Dataset of Oil and Watercolor Paints. Electronic Imaging.
Export
BibTeX
@article{shahmirzadi2020multispectral, TITLE = {A Multispectral Dataset of Oil and Watercolor Paints}, AUTHOR = {Shahmirzadi, Azadeh Asadi and Babaei, Vahid and Seidel, Hans-Peter}, LANGUAGE = {eng}, DOI = {10.2352/ISSN.2470-1173.2020.5.MAAP-107}, PUBLISHER = {IS\&T}, ADDRESS = {Springfield, VA}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {Electronic Imaging}, PAGES = {1--4}, EID = {107}, }
Endnote
%0 Journal Article %A Shahmirzadi, Azadeh Asadi %A Babaei, Vahid %A Seidel, Hans-Peter %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T A Multispectral Dataset of Oil and Watercolor Paints : %G eng %U http://hdl.handle.net/21.11116/0000-0007-F064-9 %R 10.2352/ISSN.2470-1173.2020.5.MAAP-107 %7 2020 %D 2020 %J Electronic Imaging %& 1 %P 1 - 4 %Z sequence number: 107 %I IS&T %C Springfield, VA
Shimada, S., Golyanik, V., Xu, W., and Theobalt, C. 2020a. PhysCap: Physically Plausible Monocular 3D Motion Capture in Real Time. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2020)39, 6.
Export
BibTeX
@article{Shimada_ToG2020, TITLE = {{PhysCap}: {P}hysically Plausible Monocular {3D} Motion Capture in Real Time}, AUTHOR = {Shimada, Soshi and Golyanik, Vladislav and Xu, Weipeng and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3414685.3417877}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {39}, NUMBER = {6}, EID = {235}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2020}, EDITOR = {Myszkowski, Karol}, }
Endnote
%0 Journal Article %A Shimada, Soshi %A Golyanik, Vladislav %A Xu, Weipeng %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T PhysCap: Physically Plausible Monocular 3D Motion Capture in Real Time : %G eng %U http://hdl.handle.net/21.11116/0000-0007-A709-3 %R 10.1145/3414685.3417877 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 6 %Z sequence number: 235 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2020 %O ACM SIGGRAPH Asia 2020 SA'20 SA 2020
Shimada, S., Golyanik, V., Xu, W., and Theobalt, C. 2020b. PhysCap: Physically Plausible Monocular 3D Motion Capture in Real Time. https://arxiv.org/abs/2008.08880.
(arXiv: 2008.08880)
Abstract
Marker-less 3D human motion capture from a single colour camera has seen significant progress. However, it is a very challenging and severely ill-posed problem. In consequence, even the most accurate state-of-the-art approaches have significant limitations. Purely kinematic formulations on the basis of individual joints or skeletons, and the frequent frame-wise reconstruction in state-of-the-art methods greatly limit 3D accuracy and temporal stability compared to multi-view or marker-based motion capture. Further, captured 3D poses are often physically incorrect and biomechanically implausible, or exhibit implausible environment interactions (floor penetration, foot skating, unnatural body leaning and strong shifting in depth), which is problematic for any use case in computer graphics. We, therefore, present PhysCap, the first algorithm for physically plausible, real-time and marker-less human 3D motion capture with a single colour camera at 25 fps. Our algorithm first captures 3D human poses purely kinematically. To this end, a CNN infers 2D and 3D joint positions, and subsequently, an inverse kinematics step finds space-time coherent joint angles and global 3D pose. Next, these kinematic reconstructions are used as constraints in a real-time physics-based pose optimiser that accounts for environment constraints (e.g., collision handling and floor placement), gravity, and biophysical plausibility of human postures. Our approach employs a combination of ground reaction force and residual force for plausible root control, and uses a trained neural network to detect foot contact events in images. Our method captures physically plausible and temporally stable global 3D human motion, without physically implausible postures, floor penetrations or foot skating, from video in real time and in general scenes. The video is available at http://gvv.mpi-inf.mpg.de/projects/PhysCap
Export
BibTeX
@online{Shimada_2008.08880, TITLE = {{PhysCap}: {P}hysically Plausible Monocular {3D} Motion Capture in Real Time}, AUTHOR = {Shimada, Soshi and Golyanik, Vladislav and Xu, Weipeng and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2008.08880}, EPRINT = {2008.08880}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Marker-less 3D human motion capture from a single colour camera has seen significant progress. However, it is a very challenging and severely ill-posed problem. In consequence, even the most accurate state-of-the-art approaches have significant limitations. Purely kinematic formulations on the basis of individual joints or skeletons, and the frequent frame-wise reconstruction in state-of-the-art methods greatly limit 3D accuracy and temporal stability compared to multi-view or marker-based motion capture. Further, captured 3D poses are often physically incorrect and biomechanically implausible, or exhibit implausible environment interactions (floor penetration, foot skating, unnatural body leaning and strong shifting in depth), which is problematic for any use case in computer graphics. We, therefore, present PhysCap, the first algorithm for physically plausible, real-time and marker-less human 3D motion capture with a single colour camera at 25 fps. Our algorithm first captures 3D human poses purely kinematically. To this end, a CNN infers 2D and 3D joint positions, and subsequently, an inverse kinematics step finds space-time coherent joint angles and global 3D pose. Next, these kinematic reconstructions are used as constraints in a real-time physics-based pose optimiser that accounts for environment constraints (e.g., collision handling and floor placement), gravity, and biophysical plausibility of human postures. Our approach employs a combination of ground reaction force and residual force for plausible root control, and uses a trained neural network to detect foot contact events in images. Our method captures physically plausible and temporally stable global 3D human motion, without physically implausible postures, floor penetrations or foot skating, from video in real time and in general scenes. The video is available at http://gvv.mpi-inf.mpg.de/projects/PhysCap}, }
Endnote
%0 Report %A Shimada, Soshi %A Golyanik, Vladislav %A Xu, Weipeng %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T PhysCap: Physically Plausible Monocular 3D Motion Capture in Real Time : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E8F3-1 %U https://arxiv.org/abs/2008.08880 %D 2020 %X Marker-less 3D human motion capture from a single colour camera has seen significant progress. However, it is a very challenging and severely ill-posed problem. In consequence, even the most accurate state-of-the-art approaches have significant limitations. Purely kinematic formulations on the basis of individual joints or skeletons, and the frequent frame-wise reconstruction in state-of-the-art methods greatly limit 3D accuracy and temporal stability compared to multi-view or marker-based motion capture. Further, captured 3D poses are often physically incorrect and biomechanically implausible, or exhibit implausible environment interactions (floor penetration, foot skating, unnatural body leaning and strong shifting in depth), which is problematic for any use case in computer graphics. We, therefore, present PhysCap, the first algorithm for physically plausible, real-time and marker-less human 3D motion capture with a single colour camera at 25 fps. Our algorithm first captures 3D human poses purely kinematically. To this end, a CNN infers 2D and 3D joint positions, and subsequently, an inverse kinematics step finds space-time coherent joint angles and global 3D pose. Next, these kinematic reconstructions are used as constraints in a real-time physics-based pose optimiser that accounts for environment constraints (e.g., collision handling and floor placement), gravity, and biophysical plausibility of human postures. Our approach employs a combination of ground reaction force and residual force for plausible root control, and uses a trained neural network to detect foot contact events in images. Our method captures physically plausible and temporally stable global 3D human motion, without physically implausible postures, floor penetrations or foot skating, from video in real time and in general scenes. The video is available at http://gvv.mpi-inf.mpg.de/projects/PhysCap %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR
Sidhu, V., Tretschk, E., Golyanik, V., Agudo, A., and Theobalt, C. 2020. Neural Dense Non-Rigid Structure from Motion with Latent Space Constraints. Computer Vision -- ECCV 2020, Springer.
Export
BibTeX
@inproceedings{Sidhu_ECCV20, TITLE = {Neural Dense Non-Rigid Structure from Motion with Latent Space Constraints}, AUTHOR = {Sidhu, Vikramjit and Tretschk, Edgar and Golyanik, Vladislav and Agudo, Antonio and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-3-030-58516-7}, DOI = {10.1007/978-3-030-58517-4_13}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Computer Vision -- ECCV 2020}, EDITOR = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael}, PAGES = {204--222}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12361}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Sidhu, Vikramjit %A Tretschk, Edgar %A Golyanik, Vladislav %A Agudo, Antonio %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Neural Dense Non-Rigid Structure from Motion with Latent Space Constraints : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D080-C %R 10.1007/978-3-030-58517-4_13 %D 2020 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV 2020 %E Vedaldi, Andrea; Bischof, Horst; Brox, Thomas; Frahm, Jan-Michael %P 204 - 222 %I Springer %@ 978-3-030-58516-7 %B Lecture Notes in Computer Science %N 12361
Singh, G., Subr, K., Coeurjolly, D., Ostromoukhov, V., and Jarosz, W. 2020. Fourier Analysis of Correlated Monte Carlo Importance Sampling. Computer Graphics Forum39, 1.
Export
BibTeX
@article{SinghCGF2020, TITLE = {Fourier Analysis of Correlated {Monte Carlo} Importance Sampling}, AUTHOR = {Singh, Gurprit and Subr, Kartic and Coeurjolly, David and Ostromoukhov, Victor and Jarosz, Wojciech}, LANGUAGE = {eng}, ISSN = {0167-7055}, DOI = {10.1111/cgf.13613}, PUBLISHER = {Blackwell-Wiley}, ADDRESS = {Oxford}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, JOURNAL = {Computer Graphics Forum}, VOLUME = {39}, NUMBER = {1}, PAGES = {7--19}, }
Endnote
%0 Journal Article %A Singh, Gurprit %A Subr, Kartic %A Coeurjolly, David %A Ostromoukhov, Victor %A Jarosz, Wojciech %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations %T Fourier Analysis of Correlated Monte Carlo Importance Sampling : %G eng %U http://hdl.handle.net/21.11116/0000-0006-978D-1 %R 10.1111/cgf.13613 %7 2020 %D 2020 %J Computer Graphics Forum %O Computer Graphics Forum : journal of the European Association for Computer Graphics Comput. Graph. Forum %V 39 %N 1 %& 7 %P 7 - 19 %I Blackwell-Wiley %C Oxford %@ false
Stadlbauer, P., Mlakar, D., Seidel, H.-P., Steinberger, M., and Zayer, R. 2020. Interactive Modeling of Cellular Structures on Surfaces with Application to Additive Manufacturing. Computer Graphics Forum (Proc. EUROGRAPHICS 2020)39, 2.
Export
BibTeX
@article{Stadlbauer_EG2020, TITLE = {Interactive Modeling of Cellular Structures on Surfaces with Application to Additive Manufacturing}, AUTHOR = {Stadlbauer, Pascal and Mlakar, Daniel and Seidel, Hans-Peter and Steinberger, Markus and Zayer, Rhaleb}, LANGUAGE = {eng}, ISSN = {0167-7055}, DOI = {10.1111/cgf.13929}, PUBLISHER = {Blackwell-Wiley}, ADDRESS = {Oxford}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, JOURNAL = {Computer Graphics Forum (Proc. EUROGRAPHICS)}, VOLUME = {39}, NUMBER = {2}, PAGES = {277--289}, BOOKTITLE = {The European Association for Computer Graphics 41st Annual Conference (EUROGRAPHICS 2020)}, EDITOR = {Panozzo, Daniele and Assarsson, Ulf}, }
Endnote
%0 Journal Article %A Stadlbauer, Pascal %A Mlakar, Daniel %A Seidel, Hans-Peter %A Steinberger, Markus %A Zayer, Rhaleb %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Interactive Modeling of Cellular Structures on Surfaces with Application to Additive Manufacturing : %G eng %U http://hdl.handle.net/21.11116/0000-0006-DB8A-8 %R 10.1111/cgf.13929 %7 2020 %D 2020 %J Computer Graphics Forum %O Computer Graphics Forum : journal of the European Association for Computer Graphics Comput. Graph. Forum %V 39 %N 2 %& 277 %P 277 - 289 %I Blackwell-Wiley %C Oxford %@ false %B The European Association for Computer Graphics 41st Annual Conference %O EUROGRAPHICS 2020 EG 2020 The European Association for Computer Graphics 41st Annual Conference ; Norrköping, Sweden, May 25 – 29, 2020
Sultan, A.S., Elgharib, M., Tavares, T., Jessri, M., and Basile, J.R. 2020. The Use of Artificial Intelligence, Machine Learning and Deep Learning in Oncologic Histopathology. Journal of Oral Pathology & Medicine49, 9.
Export
BibTeX
@article{Sultan2020, TITLE = {The Use of Artificial Intelligence, Machine Learning and Deep Learning in Oncologic Histopathology}, AUTHOR = {Sultan, Ahmed S. and Elgharib, Mohamed and Tavares, Tiffany and Jessri, Maryam and Basile, John R.}, LANGUAGE = {eng}, ISSN = {0904-2512}, DOI = {10.1111/jop.13042}, PUBLISHER = {Wiley-Blackwell}, ADDRESS = {Oxford}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {Journal of Oral Pathology \& Medicine}, VOLUME = {49}, NUMBER = {9}, PAGES = {849--856}, }
Endnote
%0 Journal Article %A Sultan, Ahmed S. %A Elgharib, Mohamed %A Tavares, Tiffany %A Jessri, Maryam %A Basile, John R. %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T The Use of Artificial Intelligence, Machine Learning and Deep Learning in Oncologic Histopathology : %G eng %U http://hdl.handle.net/21.11116/0000-0006-A2C9-0 %R 10.1111/jop.13042 %7 2020 %D 2020 %J Journal of Oral Pathology & Medicine %V 49 %N 9 %& 849 %P 849 - 856 %I Wiley-Blackwell %C Oxford %@ false
Tewari, A., Zollhöfer, M., Bernard, F., et al. 2020a. High-Fidelity Monocular Face Reconstruction based on an Unsupervised Model-based Face Autoencoder. IEEE Transactions on Pattern Analysis and Machine Intelligence42, 2.
Export
BibTeX
@article{8496850, TITLE = {High-Fidelity Monocular Face Reconstruction based on an Unsupervised Model-based Face Autoencoder}, AUTHOR = {Tewari, Ayush and Zollh{\"o}fer, Michael and Bernard, Florian and Garrido, Pablo and Kim, Hyeongwoo and P{\'e}rez, Patrick and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0162-8828}, DOI = {10.1109/TPAMI.2018.2876842}, PUBLISHER = {IEEE}, ADDRESS = {Piscataway, NJ}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, JOURNAL = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, VOLUME = {42}, NUMBER = {2}, PAGES = {357--370}, }
Endnote
%0 Journal Article %A Tewari, Ayush %A Zollhöfer, Michael %A Bernard, Florian %A Garrido, Pablo %A Kim, Hyeongwoo %A Pérez, Patrick %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T High-Fidelity Monocular Face Reconstruction based on an Unsupervised Model-based Face Autoencoder : %G eng %U http://hdl.handle.net/21.11116/0000-0002-EF5B-C %R 10.1109/TPAMI.2018.2876842 %7 2018 %D 2020 %J IEEE Transactions on Pattern Analysis and Machine Intelligence %O IEEE Trans. Pattern Anal. Mach. Intell. %V 42 %N 2 %& 357 %P 357 - 370 %I IEEE %C Piscataway, NJ %@ false
Tewari, A., Fried, O., Thies, J., et al. 2020b. State of the Art on Neural Rendering. Computer Graphics Forum (Proc. EUROGRAPHICS 2020)39, 2.
Export
BibTeX
@article{Tewari_EG2020, TITLE = {State of the Art on Neural Rendering}, AUTHOR = {Tewari, Ayush and Fried, Ohad and Thies, Justus and Sitzmann, Vincent and Lombardi, Stephen and Sunkavalli, Kalyan and Martin-Brualla, Ricardo and Simon, Tomas and Saragih, Jason and Nie{\ss}ner, Matthias and Pandey, Rohit and Fanello, Sean and Wetzstein, Gordon and Zhu, Jun-Yan and Theobalt, Christian and Agrawala, Maneesh and Shechtman, Eli and Goldman, Dan B. and Zollh{\"o}fer, Michael}, LANGUAGE = {eng}, ISSN = {0167-7055}, DOI = {10.1111/cgf.14022}, PUBLISHER = {Blackwell-Wiley}, ADDRESS = {Oxford}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, JOURNAL = {Computer Graphics Forum (Proc. EUROGRAPHICS)}, VOLUME = {39}, NUMBER = {2}, PAGES = {701--727}, BOOKTITLE = {The European Association for Computer Graphics 41st Annual Conference (EUROGRAPHICS 2020)}, EDITOR = {Panozzo, Daniele and Assarsson, Ulf}, }
Endnote
%0 Journal Article %A Tewari, Ayush %A Fried, Ohad %A Thies, Justus %A Sitzmann, Vincent %A Lombardi, Stephen %A Sunkavalli, Kalyan %A Martin-Brualla, Ricardo %A Simon, Tomas %A Saragih, Jason %A Nießner, Matthias %A Pandey, Rohit %A Fanello, Sean %A Wetzstein, Gordon %A Zhu, Jun-Yan %A Theobalt, Christian %A Agrawala, Maneesh %A Shechtman, Eli %A Goldman, Dan B. %A Zollhöfer, Michael %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations %T State of the Art on Neural Rendering : %G eng %U http://hdl.handle.net/21.11116/0000-0006-DB93-D %R 10.1111/cgf.14022 %7 2020 %D 2020 %J Computer Graphics Forum %O Computer Graphics Forum : journal of the European Association for Computer Graphics Comput. Graph. Forum %V 39 %N 2 %& 701 %P 701 - 727 %I Blackwell-Wiley %C Oxford %@ false %B The European Association for Computer Graphics 41st Annual Conference %O EUROGRAPHICS 2020 EG 2020 The European Association for Computer Graphics 41st Annual Conference ; Norrköping, Sweden, May 25 – 29, 2020
Tewari, A., Elgharib, M., Mallikarjun B R, et al. 2020c. PIE: Portrait Image Embedding for Semantic Control. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2020)39, 6.
Export
BibTeX
@article{Tewari_ToG2020, TITLE = {{PIE}: {P}ortrait Image Embedding for Semantic Control}, AUTHOR = {Tewari, Ayush and Elgharib, Mohamed and Mallikarjun B R, and Bernard, Florian and Seidel, Hans-Peter and P{\'e}rez, Patrick and Zollh{\"o}fer, Michael and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3414685.3417803}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {39}, NUMBER = {6}, EID = {223}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2020}, EDITOR = {Myszkowski, Karol}, }
Endnote
%0 Journal Article %A Tewari, Ayush %A Elgharib, Mohamed %A Mallikarjun B R, %A Bernard, Florian %A Seidel, Hans-Peter %A Pérez, Patrick %A Zollhöfer, Michael %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T PIE: Portrait Image Embedding for Semantic Control : %G eng %U http://hdl.handle.net/21.11116/0000-0007-9B0C-E %R 10.1145/3414685.3417803 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 6 %Z sequence number: 223 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2020 %O ACM SIGGRAPH Asia 2020 SA'20 SA 2020
Tewari, A., Elgharib, M., Bharaj, G., et al. 2020d. StyleRig: Rigging StyleGAN for 3D Control over Portrait Images. https://arxiv.org/abs/2004.00121.
(arXiv: 2004.00121)
Abstract
StyleGAN generates photorealistic portrait images of faces with eyes, teeth, hair and context (neck, shoulders, background), but lacks a rig-like control over semantic face parameters that are interpretable in 3D, such as face pose, expressions, and scene illumination. Three-dimensional morphable face models (3DMMs) on the other hand offer control over the semantic parameters, but lack photorealism when rendered and only model the face interior, not other parts of a portrait image (hair, mouth interior, background). We present the first method to provide a face rig-like control over a pretrained and fixed StyleGAN via a 3DMM. A new rigging network, RigNet is trained between the 3DMM's semantic parameters and StyleGAN's input. The network is trained in a self-supervised manner, without the need for manual annotations. At test time, our method generates portrait images with the photorealism of StyleGAN and provides explicit control over the 3D semantic parameters of the face.
Export
BibTeX
@online{Tewari_2004.00121, TITLE = {{StyleRig}: Rigging {StyleGAN} for {3D} Control over Portrait Images}, AUTHOR = {Tewari, Ayush and Elgharib, Mohamed and Bharaj, Gaurav and Bernard, Florian and Seidel, Hans-Peter and P{\'e}rez, Patrick and Zollh{\"o}fer, Michael and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2004.00121}, EPRINT = {2004.00121}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {StyleGAN generates photorealistic portrait images of faces with eyes, teeth, hair and context (neck, shoulders, background), but lacks a rig-like control over semantic face parameters that are interpretable in 3D, such as face pose, expressions, and scene illumination. Three-dimensional morphable face models (3DMMs) on the other hand offer control over the semantic parameters, but lack photorealism when rendered and only model the face interior, not other parts of a portrait image (hair, mouth interior, background). We present the first method to provide a face rig-like control over a pretrained and fixed StyleGAN via a 3DMM. A new rigging network, RigNet is trained between the 3DMM's semantic parameters and StyleGAN's input. The network is trained in a self-supervised manner, without the need for manual annotations. At test time, our method generates portrait images with the photorealism of StyleGAN and provides explicit control over the 3D semantic parameters of the face.}, }
Endnote
%0 Report %A Tewari, Ayush %A Elgharib, Mohamed %A Bharaj, Gaurav %A Bernard, Florian %A Seidel, Hans-Peter %A Pérez, Patrick %A Zollhöfer, Michael %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T StyleRig: Rigging StyleGAN for 3D Control over Portrait Images : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B0FC-6 %U https://arxiv.org/abs/2004.00121 %D 2020 %X StyleGAN generates photorealistic portrait images of faces with eyes, teeth, hair and context (neck, shoulders, background), but lacks a rig-like control over semantic face parameters that are interpretable in 3D, such as face pose, expressions, and scene illumination. Three-dimensional morphable face models (3DMMs) on the other hand offer control over the semantic parameters, but lack photorealism when rendered and only model the face interior, not other parts of a portrait image (hair, mouth interior, background). We present the first method to provide a face rig-like control over a pretrained and fixed StyleGAN via a 3DMM. A new rigging network, RigNet is trained between the 3DMM's semantic parameters and StyleGAN's input. The network is trained in a self-supervised manner, without the need for manual annotations. At test time, our method generates portrait images with the photorealism of StyleGAN and provides explicit control over the 3D semantic parameters of the face. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR
Tewari, A., Elgharib, M., Bharaj, G., et al. 2020e. StyleRig: Rigging StyleGAN for 3D Control Over Portrait Images. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020), IEEE.
Export
BibTeX
@inproceedings{Tewari_CVPR2020, TITLE = {{StyleRig}: {R}igging {StyleGAN} for {3D} Control Over Portrait Images}, AUTHOR = {Tewari, Ayush and Elgharib, Mohamed and Bharaj, Gaurav and Bernard, Florian and Seidel, Hans-Peter and P{\'e}rez, Patrick and Zollh{\"o}fer, Michael and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-7168-5}, DOI = {10.1109/CVPR42600.2020.00618}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020)}, PAGES = {6141--6150}, ADDRESS = {Seattle, WA, USA (Virtual)}, }
Endnote
%0 Conference Proceedings %A Tewari, Ayush %A Elgharib, Mohamed %A Bharaj, Gaurav %A Bernard, Florian %A Seidel, Hans-Peter %A Pérez, Patrick %A Zollhöfer, Michael %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T StyleRig: Rigging StyleGAN for 3D Control Over Portrait Images : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B0E7-D %R 10.1109/CVPR42600.2020.00618 %D 2020 %B 33rd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2020-06-14 - 2020-06-19 %C Seattle, WA, USA (Virtual) %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 6141 - 6150 %I IEEE %@ 978-1-7281-7168-5
Tewari, A., Elgharib, M., Mallikarjun B R, et al. 2020f. PIE: Portrait Image Embedding for Semantic Control. https://arxiv.org/abs/2009.09485.
(arXiv: 2009.09485)
Abstract
Editing of portrait images is a very popular and important research topic with a large variety of applications. For ease of use, control should be provided via a semantically meaningful parameterization that is akin to computer animation controls. The vast majority of existing techniques do not provide such intuitive and fine-grained control, or only enable coarse editing of a single isolated control parameter. Very recently, high-quality semantically controlled editing has been demonstrated, however only on synthetically created StyleGAN images. We present the first approach for embedding real portrait images in the latent space of StyleGAN, which allows for intuitive editing of the head pose, facial expression, and scene illumination in the image. Semantic editing in parameter space is achieved based on StyleRig, a pretrained neural network that maps the control space of a 3D morphable face model to the latent space of the GAN. We design a novel hierarchical non-linear optimization problem to obtain the embedding. An identity preservation energy term allows spatially coherent edits while maintaining facial integrity. Our approach runs at interactive frame rates and thus allows the user to explore the space of possible edits. We evaluate our approach on a wide set of portrait photos, compare it to the current state of the art, and validate the effectiveness of its components in an ablation study.
Export
BibTeX
@online{Tewari_2009.09485, TITLE = {{PIE}: {P}ortrait Image Embedding for Semantic Control}, AUTHOR = {Tewari, Ayush and Elgharib, Mohamed and Mallikarjun B R, and Bernard, Florian and Seidel, Hans-Peter and P{\'e}rez, Patrick and Zollh{\"o}fer, Michael and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2009.09485}, EPRINT = {2009.09485}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Editing of portrait images is a very popular and important research topic with a large variety of applications. For ease of use, control should be provided via a semantically meaningful parameterization that is akin to computer animation controls. The vast majority of existing techniques do not provide such intuitive and fine-grained control, or only enable coarse editing of a single isolated control parameter. Very recently, high-quality semantically controlled editing has been demonstrated, however only on synthetically created StyleGAN images. We present the first approach for embedding real portrait images in the latent space of StyleGAN, which allows for intuitive editing of the head pose, facial expression, and scene illumination in the image. Semantic editing in parameter space is achieved based on StyleRig, a pretrained neural network that maps the control space of a 3D morphable face model to the latent space of the GAN. We design a novel hierarchical non-linear optimization problem to obtain the embedding. An identity preservation energy term allows spatially coherent edits while maintaining facial integrity. Our approach runs at interactive frame rates and thus allows the user to explore the space of possible edits. We evaluate our approach on a wide set of portrait photos, compare it to the current state of the art, and validate the effectiveness of its components in an ablation study.}, }
Endnote
%0 Report %A Tewari, Ayush %A Elgharib, Mohamed %A Mallikarjun B R, %A Bernard, Florian %A Seidel, Hans-Peter %A Pérez, Patrick %A Zollhöfer, Michael %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T PIE: Portrait Image Embedding for Semantic Control : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B117-7 %U https://arxiv.org/abs/2009.09485 %D 2020 %X Editing of portrait images is a very popular and important research topic with a large variety of applications. For ease of use, control should be provided via a semantically meaningful parameterization that is akin to computer animation controls. The vast majority of existing techniques do not provide such intuitive and fine-grained control, or only enable coarse editing of a single isolated control parameter. Very recently, high-quality semantically controlled editing has been demonstrated, however only on synthetically created StyleGAN images. We present the first approach for embedding real portrait images in the latent space of StyleGAN, which allows for intuitive editing of the head pose, facial expression, and scene illumination in the image. Semantic editing in parameter space is achieved based on StyleRig, a pretrained neural network that maps the control space of a 3D morphable face model to the latent space of the GAN. We design a novel hierarchical non-linear optimization problem to obtain the embedding. An identity preservation energy term allows spatially coherent edits while maintaining facial integrity. Our approach runs at interactive frame rates and thus allows the user to explore the space of possible edits. We evaluate our approach on a wide set of portrait photos, compare it to the current state of the art, and validate the effectiveness of its components in an ablation study. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR
Tewari, A., Fried, O., Thies, J., et al. 2020g. State of the Art on Neural Rendering. https://arxiv.org/abs/2004.03805.
(arXiv: 2004.03805)
Abstract
Efficient rendering of photo-realistic virtual worlds is a long standing effort of computer graphics. Modern graphics techniques have succeeded in synthesizing photo-realistic images from hand-crafted scene representations. However, the automatic generation of shape, materials, lighting, and other aspects of scenes remains a challenging problem that, if solved, would make photo-realistic computer graphics more widely accessible. Concurrently, progress in computer vision and machine learning have given rise to a new approach to image synthesis and editing, namely deep generative models. Neural rendering is a new and rapidly emerging field that combines generative machine learning techniques with physical knowledge from computer graphics, e.g., by the integration of differentiable rendering into network training. With a plethora of applications in computer graphics and vision, neural rendering is poised to become a new area in the graphics community, yet no survey of this emerging field exists. This state-of-the-art report summarizes the recent trends and applications of neural rendering. We focus on approaches that combine classic computer graphics techniques with deep generative models to obtain controllable and photo-realistic outputs. Starting with an overview of the underlying computer graphics and machine learning concepts, we discuss critical aspects of neural rendering approaches. This state-of-the-art report is focused on the many important use cases for the described algorithms such as novel view synthesis, semantic photo manipulation, facial and body reenactment, relighting, free-viewpoint video, and the creation of photo-realistic avatars for virtual and augmented reality telepresence. Finally, we conclude with a discussion of the social implications of such technology and investigate open research problems.
Export
BibTeX
@online{Tewari2004.03805, TITLE = {State of the Art on Neural Rendering}, AUTHOR = {Tewari, Ayush and Fried, Ohad and Thies, Justus and Sitzmann, Vincent and Lombardi, Stephen and Sunkavalli, Kalyan and Martin-Brualla, Ricardo and Simon, Tomas and Saragih, Jason and Nie{\ss}ner, Matthias and Pandey, Rohit and Fanello, Sean and Wetzstein, Gordon and Zhu, Jun-Yan and Theobalt, Christian and Agrawala, Maneesh and Shechtman, Eli and Goldman, Dan B and Zollh{\"o}fer, Michael}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2004.03805}, EPRINT = {2004.03805}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Efficient rendering of photo-realistic virtual worlds is a long standing effort of computer graphics. Modern graphics techniques have succeeded in synthesizing photo-realistic images from hand-crafted scene representations. However, the automatic generation of shape, materials, lighting, and other aspects of scenes remains a challenging problem that, if solved, would make photo-realistic computer graphics more widely accessible. Concurrently, progress in computer vision and machine learning have given rise to a new approach to image synthesis and editing, namely deep generative models. Neural rendering is a new and rapidly emerging field that combines generative machine learning techniques with physical knowledge from computer graphics, e.g., by the integration of differentiable rendering into network training. With a plethora of applications in computer graphics and vision, neural rendering is poised to become a new area in the graphics community, yet no survey of this emerging field exists. This state-of-the-art report summarizes the recent trends and applications of neural rendering. We focus on approaches that combine classic computer graphics techniques with deep generative models to obtain controllable and photo-realistic outputs. Starting with an overview of the underlying computer graphics and machine learning concepts, we discuss critical aspects of neural rendering approaches. This state-of-the-art report is focused on the many important use cases for the described algorithms such as novel view synthesis, semantic photo manipulation, facial and body reenactment, relighting, free-viewpoint video, and the creation of photo-realistic avatars for virtual and augmented reality telepresence. Finally, we conclude with a discussion of the social implications of such technology and investigate open research problems.}, }
Endnote
%0 Report %A Tewari, Ayush %A Fried, Ohad %A Thies, Justus %A Sitzmann, Vincent %A Lombardi, Stephen %A Sunkavalli, Kalyan %A Martin-Brualla, Ricardo %A Simon, Tomas %A Saragih, Jason %A Nießner, Matthias %A Pandey, Rohit %A Fanello, Sean %A Wetzstein, Gordon %A Zhu, Jun-Yan %A Theobalt, Christian %A Agrawala, Maneesh %A Shechtman, Eli %A Goldman, Dan B %A Zollhöfer, Michael %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations %T State of the Art on Neural Rendering : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E114-4 %U https://arxiv.org/abs/2004.03805 %D 2020 %X Efficient rendering of photo-realistic virtual worlds is a long standing effort of computer graphics. Modern graphics techniques have succeeded in synthesizing photo-realistic images from hand-crafted scene representations. However, the automatic generation of shape, materials, lighting, and other aspects of scenes remains a challenging problem that, if solved, would make photo-realistic computer graphics more widely accessible. Concurrently, progress in computer vision and machine learning have given rise to a new approach to image synthesis and editing, namely deep generative models. Neural rendering is a new and rapidly emerging field that combines generative machine learning techniques with physical knowledge from computer graphics, e.g., by the integration of differentiable rendering into network training. With a plethora of applications in computer graphics and vision, neural rendering is poised to become a new area in the graphics community, yet no survey of this emerging field exists. This state-of-the-art report summarizes the recent trends and applications of neural rendering. We focus on approaches that combine classic computer graphics techniques with deep generative models to obtain controllable and photo-realistic outputs. Starting with an overview of the underlying computer graphics and machine learning concepts, we discuss critical aspects of neural rendering approaches. This state-of-the-art report is focused on the many important use cases for the described algorithms such as novel view synthesis, semantic photo manipulation, facial and body reenactment, relighting, free-viewpoint video, and the creation of photo-realistic avatars for virtual and augmented reality telepresence. Finally, we conclude with a discussion of the social implications of such technology and investigate open research problems. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR
Thies, J., Zollhöfer, M., Theobalt, C., Stamminger, M., and Nießner, M. 2020a. Image-guided Neural Object Rendering. International Conference on Learning Representations (ICLR 2020), OpenReview.net.
Export
BibTeX
@inproceedings{Thies_ICLR2020, TITLE = {Image-guided Neural Object Rendering}, AUTHOR = {Thies, Justus and Zollh{\"o}fer, Michael and Theobalt, Christian and Stamminger, Marc and Nie{\ss}ner, Matthias}, LANGUAGE = {eng}, URL = {https://openreview.net/forum?id=Hyg9anEFPS; https://iclr.cc/Conferences/2020}, PUBLISHER = {OpenReview.net}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {International Conference on Learning Representations (ICLR 2020)}, ADDRESS = {Addis Ababa, Ethopia}, }
Endnote
%0 Conference Proceedings %A Thies, Justus %A Zollhöfer, Michael %A Theobalt, Christian %A Stamminger, Marc %A Nießner, Matthias %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Image-guided Neural Object Rendering : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D431-2 %U https://openreview.net/forum?id=Hyg9anEFPS %D 2020 %B 8th International Conference on Learning Representations %Z date of event: 2020-04-26 - 2020-04-30 %C Addis Ababa, Ethopia %B International Conference on Learning Representations %I OpenReview.net %U https://openreview.net/forum?id=Hyg9anEFPS
Thies, J., Elgharib, M., Tewari, A., Theobalt, C., and Nießner, M. 2020b. Neural Voice Puppetry: Audio-Driven Facial Reenactment. Computer Vision -- ECCV 2020, Springer.
Export
BibTeX
@inproceedings{Thies_ECCV20, TITLE = {Neural Voice Puppetry: {A}udio-Driven Facial Reenactment}, AUTHOR = {Thies, Justus and Elgharib, Mohamed and Tewari, Ayush and Theobalt, Christian and Nie{\ss}ner, Matthias}, LANGUAGE = {eng}, ISBN = {978-3-030-58516-7}, DOI = {10.1007/978-3-030-58517-4_42}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Computer Vision -- ECCV 2020}, EDITOR = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael}, PAGES = {716--731}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12361}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Thies, Justus %A Elgharib, Mohamed %A Tewari, Ayush %A Theobalt, Christian %A Nießner, Matthias %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Neural Voice Puppetry: Audio-Driven Facial Reenactment : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D42F-6 %R 10.1007/978-3-030-58517-4_42 %D 2020 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV 2020 %E Vedaldi, Andrea; Bischof, Horst; Brox, Thomas; Frahm, Jan-Michael %P 716 - 731 %I Springer %@ 978-3-030-58516-7 %B Lecture Notes in Computer Science %N 12361
Thies, J., Zollhöfer, M., Stamminger, M., Theobalt, C., and Nießner, M. 2020c. Face2Face: Real-time Face Capture and Reenactment of RGB Videos. https://arxiv.org/abs/2007.14808.
(arXiv: 2007.14808)
Abstract
We present Face2Face, a novel approach for real-time facial reenactment of a monocular target video sequence (e.g., Youtube video). The source sequence is also a monocular video stream, captured live with a commodity webcam. Our goal is to animate the facial expressions of the target video by a source actor and re-render the manipulated output video in a photo-realistic fashion. To this end, we first address the under-constrained problem of facial identity recovery from monocular video by non-rigid model-based bundling. At run time, we track facial expressions of both source and target video using a dense photometric consistency measure. Reenactment is then achieved by fast and efficient deformation transfer between source and target. The mouth interior that best matches the re-targeted expression is retrieved from the target sequence and warped to produce an accurate fit. Finally, we convincingly re-render the synthesized target face on top of the corresponding video stream such that it seamlessly blends with the real-world illumination. We demonstrate our method in a live setup, where Youtube videos are reenacted in real time.
Export
BibTeX
@online{Thies_2007.14808, TITLE = {{Face2Face}: {R}eal-time Face Capture and Reenactment of {RGB} Videos}, AUTHOR = {Thies, Justus and Zollh{\"o}fer, Michael and Stamminger, Marc and Theobalt, Christian and Nie{\ss}ner, Matthias}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2007.14808}, EPRINT = {2007.14808}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present Face2Face, a novel approach for real-time facial reenactment of a monocular target video sequence (e.g., Youtube video). The source sequence is also a monocular video stream, captured live with a commodity webcam. Our goal is to animate the facial expressions of the target video by a source actor and re-render the manipulated output video in a photo-realistic fashion. To this end, we first address the under-constrained problem of facial identity recovery from monocular video by non-rigid model-based bundling. At run time, we track facial expressions of both source and target video using a dense photometric consistency measure. Reenactment is then achieved by fast and efficient deformation transfer between source and target. The mouth interior that best matches the re-targeted expression is retrieved from the target sequence and warped to produce an accurate fit. Finally, we convincingly re-render the synthesized target face on top of the corresponding video stream such that it seamlessly blends with the real-world illumination. We demonstrate our method in a live setup, where Youtube videos are reenacted in real time.}, }
Endnote
%0 Report %A Thies, Justus %A Zollhöfer, Michael %A Stamminger, Marc %A Theobalt, Christian %A Nießner, Matthias %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Face2Face: Real-time Face Capture and Reenactment of RGB Videos : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E8E9-D %U https://arxiv.org/abs/2007.14808 %D 2020 %X We present Face2Face, a novel approach for real-time facial reenactment of a monocular target video sequence (e.g., Youtube video). The source sequence is also a monocular video stream, captured live with a commodity webcam. Our goal is to animate the facial expressions of the target video by a source actor and re-render the manipulated output video in a photo-realistic fashion. To this end, we first address the under-constrained problem of facial identity recovery from monocular video by non-rigid model-based bundling. At run time, we track facial expressions of both source and target video using a dense photometric consistency measure. Reenactment is then achieved by fast and efficient deformation transfer between source and target. The mouth interior that best matches the re-targeted expression is retrieved from the target sequence and warped to produce an accurate fit. Finally, we convincingly re-render the synthesized target face on top of the corresponding video stream such that it seamlessly blends with the real-world illumination. We demonstrate our method in a live setup, where Youtube videos are reenacted in real time. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Tong, X., Myszkowski, K., and Huang, J. 2020. Foreword to the Special Section on the International Conference on Computer-Aided Design and Computer Graphics (CAD/Graphics) 2019. Computers and Graphics86.
Export
BibTeX
@article{Tong_CAD19, TITLE = {Foreword to the Special Section on the {International Conference on Computer-Aided Design and Computer Graphics (CAD/Graphics)} 2019}, AUTHOR = {Tong, Xin and Myszkowski, Karol and Huang, Jin}, LANGUAGE = {eng}, ISSN = {0097-8493}, DOI = {10.1016/j.cag.2019.12.002}, PUBLISHER = {Elsevier}, ADDRESS = {Amsterdam}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, JOURNAL = {Computers and Graphics}, VOLUME = {86}, PAGES = {A5--A6}, }
Endnote
%0 Journal Article %A Tong, Xin %A Myszkowski, Karol %A Huang, Jin %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Foreword to the Special Section on the International Conference on Computer-Aided Design and Computer Graphics (CAD/Graphics) 2019 : %G eng %U http://hdl.handle.net/21.11116/0000-0007-CEAF-D %R 10.1016/j.cag.2019.12.002 %7 2019 %D 2020 %J Computers and Graphics %V 86 %& A5 %P A5 - A6 %I Elsevier %C Amsterdam %@ false
Tretschk, E., Tewari, A., Golyanik, V., Zollhöfer, M., Stoll, C., and Theobalt, C. 2020a. PatchNets: Patch-Based Generalizable Deep Implicit 3D Shape Representations. Computer Vision -- ECCV 2020, Springer.
Export
BibTeX
@inproceedings{Tretschk_ECCV20a, TITLE = {{PatchNets}: {P}atch-Based Generalizable Deep Implicit {3D} Shape Representations}, AUTHOR = {Tretschk, Edgar and Tewari, Ayush and Golyanik, Vladislav and Zollh{\"o}fer, Michael and Stoll, Carsten and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-3-030-58516-7}, DOI = {10.1007/978-3-030-58517-4_18}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Computer Vision -- ECCV 2020}, EDITOR = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael}, PAGES = {293--309}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12361}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Tretschk, Edgar %A Tewari, Ayush %A Golyanik, Vladislav %A Zollhöfer, Michael %A Stoll, Carsten %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T PatchNets: Patch-Based Generalizable Deep Implicit 3D Shape Representations : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D095-5 %R 10.1007/978-3-030-58517-4_18 %D 2020 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV 2020 %E Vedaldi, Andrea; Bischof, Horst; Brox, Thomas; Frahm, Jan-Michael %P 293 - 309 %I Springer %@ 978-3-030-58516-7 %B Lecture Notes in Computer Science %N 12361
Tretschk, E., Tewari, A., Zollhöfer, M., Golyanik, V., and Theobalt, C. 2020b. DEMEA: Deep Mesh Autoencoders for Non-rigidly Deforming Objects. Computer Vision -- ECCV 2020, Springer.
Export
BibTeX
@inproceedings{Tretschk_ECCV20, TITLE = {{DEMEA}: {D}eep Mesh Autoencoders for Non-rigidly Deforming Objects}, AUTHOR = {Tretschk, Edgar and Tewari, Ayush and Zollh{\"o}fer, Michael and Golyanik, Vladislav and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-3-030-58516-7}, DOI = {10.1007/978-3-030-58548-8_35}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Computer Vision -- ECCV 2020}, EDITOR = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael}, PAGES = {601--617}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12349}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Tretschk, Edgar %A Tewari, Ayush %A Zollhöfer, Michael %A Golyanik, Vladislav %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T DEMEA: Deep Mesh Autoencoders for Non-rigidly Deforming Objects : %G eng %U http://hdl.handle.net/21.11116/0000-0007-D425-0 %R 10.1007/978-3-030-58548-8_35 %D 2020 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV 2020 %E Vedaldi, Andrea; Bischof, Horst; Brox, Thomas; Frahm, Jan-Michael %P 601 - 617 %I Springer %@ 978-3-030-58516-7 %B Lecture Notes in Computer Science %N 12349
Tretschk, E., Tewari, A., Golyanik, V., Zollhöfer, M., Stoll, C., and Theobalt, C. 2020c. PatchNets: Patch-Based Generalizable Deep Implicit 3D Shape Representations. https://arxiv.org/abs/2008.01639.
(arXiv: 2008.01639)
Abstract
Implicit surface representations, such as signed-distance functions, combined with deep learning have led to impressive models which can represent detailed shapes of objects with arbitrary topology. Since a continuous function is learned, the reconstructions can also be extracted at any arbitrary resolution. However, large datasets such as ShapeNet are required to train such models. In this paper, we present a new mid-level patch-based surface representation. At the level of patches, objects across different categories share similarities, which leads to more generalizable models. We then introduce a novel method to learn this patch-based representation in a canonical space, such that it is as object-agnostic as possible. We show that our representation trained on one category of objects from ShapeNet can also well represent detailed shapes from any other category. In addition, it can be trained using much fewer shapes, compared to existing approaches. We show several applications of our new representation, including shape interpolation and partial point cloud completion. Due to explicit control over positions, orientations and scales of patches, our representation is also more controllable compared to object-level representations, which enables us to deform encoded shapes non-rigidly.
Export
BibTeX
@online{Tretschk_2008.01639, TITLE = {{PatchNets}: {P}atch-Based Generalizable Deep Implicit {3D} Shape Representations}, AUTHOR = {Tretschk, Edgar and Tewari, Ayush and Golyanik, Vladislav and Zollh{\"o}fer, Michael and Stoll, Carsten and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2008.01639}, EPRINT = {2008.01639}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Implicit surface representations, such as signed-distance functions, combined with deep learning have led to impressive models which can represent detailed shapes of objects with arbitrary topology. Since a continuous function is learned, the reconstructions can also be extracted at any arbitrary resolution. However, large datasets such as ShapeNet are required to train such models. In this paper, we present a new mid-level patch-based surface representation. At the level of patches, objects across different categories share similarities, which leads to more generalizable models. We then introduce a novel method to learn this patch-based representation in a canonical space, such that it is as object-agnostic as possible. We show that our representation trained on one category of objects from ShapeNet can also well represent detailed shapes from any other category. In addition, it can be trained using much fewer shapes, compared to existing approaches. We show several applications of our new representation, including shape interpolation and partial point cloud completion. Due to explicit control over positions, orientations and scales of patches, our representation is also more controllable compared to object-level representations, which enables us to deform encoded shapes non-rigidly.}, }
Endnote
%0 Report %A Tretschk, Edgar %A Tewari, Ayush %A Golyanik, Vladislav %A Zollhöfer, Michael %A Stoll, Carsten %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T PatchNets: Patch-Based Generalizable Deep Implicit 3D Shape Representations : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E8ED-9 %U https://arxiv.org/abs/2008.01639 %D 2020 %X Implicit surface representations, such as signed-distance functions, combined with deep learning have led to impressive models which can represent detailed shapes of objects with arbitrary topology. Since a continuous function is learned, the reconstructions can also be extracted at any arbitrary resolution. However, large datasets such as ShapeNet are required to train such models. In this paper, we present a new mid-level patch-based surface representation. At the level of patches, objects across different categories share similarities, which leads to more generalizable models. We then introduce a novel method to learn this patch-based representation in a canonical space, such that it is as object-agnostic as possible. We show that our representation trained on one category of objects from ShapeNet can also well represent detailed shapes from any other category. In addition, it can be trained using much fewer shapes, compared to existing approaches. We show several applications of our new representation, including shape interpolation and partial point cloud completion. Due to explicit control over positions, orientations and scales of patches, our representation is also more controllable compared to object-level representations, which enables us to deform encoded shapes non-rigidly. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR
Tretschk, E., Tewari, A., Golyanik, V., Zollhöfer, M., Lassner, C., and Theobalt, C. 2020d. Non-Rigid Neural Radiance Fields: Reconstruction and Novel View Synthesis of a Deforming Scene from Monocular Video. https://arxiv.org/abs/2012.12247.
(arXiv: 2012.12247)
Abstract
In this tech report, we present the current state of our ongoing work on reconstructing Neural Radiance Fields (NERF) of general non-rigid scenes via ray bending. Non-rigid NeRF (NR-NeRF) takes RGB images of a deforming object (e.g., from a monocular video) as input and then learns a geometry and appearance representation that not only allows to reconstruct the input sequence but also to re-render any time step into novel camera views with high fidelity. In particular, we show that a consumer-grade camera is sufficient to synthesize convincing bullet-time videos of short and simple scenes. In addition, the resulting representation enables correspondence estimation across views and time, and provides rigidity scores for each point in the scene. We urge the reader to watch the supplemental videos for qualitative results. We will release our code.
Export
BibTeX
@online{Tretschk_2012.12247, TITLE = {Non-Rigid Neural Radiance Fields: Reconstruction and Novel View Synthesis of a Deforming Scene from Monocular Video}, AUTHOR = {Tretschk, Edgar and Tewari, Ayush and Golyanik, Vladislav and Zollh{\"o}fer, Michael and Lassner, Christoph and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2012.12247}, EPRINT = {2012.12247}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {In this tech report, we present the current state of our ongoing work on reconstructing Neural Radiance Fields (NERF) of general non-rigid scenes via ray bending. Non-rigid NeRF (NR-NeRF) takes RGB images of a deforming object (e.g., from a monocular video) as input and then learns a geometry and appearance representation that not only allows to reconstruct the input sequence but also to re-render any time step into novel camera views with high fidelity. In particular, we show that a consumer-grade camera is sufficient to synthesize convincing bullet-time videos of short and simple scenes. In addition, the resulting representation enables correspondence estimation across views and time, and provides rigidity scores for each point in the scene. We urge the reader to watch the supplemental videos for qualitative results. We will release our code.}, }
Endnote
%0 Report %A Tretschk, Edgar %A Tewari, Ayush %A Golyanik, Vladislav %A Zollhöfer, Michael %A Lassner, Christoph %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Non-Rigid Neural Radiance Fields: Reconstruction and Novel View Synthesis of a Deforming Scene from Monocular Video : %G eng %U http://hdl.handle.net/21.11116/0000-0007-EA00-1 %U https://arxiv.org/abs/2012.12247 %D 2020 %X In this tech report, we present the current state of our ongoing work on reconstructing Neural Radiance Fields (NERF) of general non-rigid scenes via ray bending. Non-rigid NeRF (NR-NeRF) takes RGB images of a deforming object (e.g., from a monocular video) as input and then learns a geometry and appearance representation that not only allows to reconstruct the input sequence but also to re-render any time step into novel camera views with high fidelity. In particular, we show that a consumer-grade camera is sufficient to synthesize convincing bullet-time videos of short and simple scenes. In addition, the resulting representation enables correspondence estimation across views and time, and provides rigidity scores for each point in the scene. We urge the reader to watch the supplemental videos for qualitative results. We will release our code. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR
Wang, J., Mueller, F., Bernard, F., et al. 2020a. RGB2Hands: Real-Time Tracking of 3D Hand Interactions from Monocular RGB Video. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2020)39, 6.
Export
BibTeX
@article{Wang_ToG2020, TITLE = {{RGB2Hands}: {R}eal-Time Tracking of {3D} Hand Interactions from Monocular {RGB} Video}, AUTHOR = {Wang, Jiayi and Mueller, Franziska and Bernard, Florian and Sorli, Suzanne and Sotnychenko, Oleksandr and Qian, Neng and Otaduy, Miguel A. and Casas, Dan and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3414685.3417852}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {39}, NUMBER = {6}, EID = {218}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2020}, EDITOR = {Myszkowski, Karol}, }
Endnote
%0 Journal Article %A Wang, Jiayi %A Mueller, Franziska %A Bernard, Florian %A Sorli, Suzanne %A Sotnychenko, Oleksandr %A Qian, Neng %A Otaduy, Miguel A. %A Casas, Dan %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T RGB2Hands: Real-Time Tracking of 3D Hand Interactions from Monocular RGB Video : %G eng %U http://hdl.handle.net/21.11116/0000-0007-CF20-C %R 10.1145/3414685.3417852 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 6 %Z sequence number: 218 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2020 %O ACM SIGGRAPH Asia 2020 SA'20 SA 2020
Wang, J., Mueller, F., Bernard, F., and Theobalt, C. 2020b. Generative Model-Based Loss to the Rescue: A Method to Overcome Annotation Errors for Depth-Based Hand Pose Estimation. https://arxiv.org/abs/2007.03073.
(arXiv: 2007.03073)
Abstract
We propose to use a model-based generative loss for training hand pose estimators on depth images based on a volumetric hand model. This additional loss allows training of a hand pose estimator that accurately infers the entire set of 21 hand keypoints while only using supervision for 6 easy-to-annotate keypoints (fingertips and wrist). We show that our partially-supervised method achieves results that are comparable to those of fully-supervised methods which enforce articulation consistency. Moreover, for the first time we demonstrate that such an approach can be used to train on datasets that have erroneous annotations, i.e. "ground truth" with notable measurement errors, while obtaining predictions that explain the depth images better than the given "ground truth".
Export
BibTeX
@online{Wang_2007.03073, TITLE = {Generative Model-Based Loss to the Rescue: {A} Method to Overcome Annotation Errors for Depth-Based Hand Pose Estimation}, AUTHOR = {Wang, Jiayi and Mueller, Franziska and Bernard, Florian and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2007.03073}, EPRINT = {2007.03073}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We propose to use a model-based generative loss for training hand pose estimators on depth images based on a volumetric hand model. This additional loss allows training of a hand pose estimator that accurately infers the entire set of 21 hand keypoints while only using supervision for 6 easy-to-annotate keypoints (fingertips and wrist). We show that our partially-supervised method achieves results that are comparable to those of fully-supervised methods which enforce articulation consistency. Moreover, for the first time we demonstrate that such an approach can be used to train on datasets that have erroneous annotations, i.e. "ground truth" with notable measurement errors, while obtaining predictions that explain the depth images better than the given "ground truth".}, }
Endnote
%0 Report %A Wang, Jiayi %A Mueller, Franziska %A Bernard, Florian %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Generative Model-Based Loss to the Rescue: A Method to Overcome Annotation Errors for Depth-Based Hand Pose Estimation : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E89A-6 %U https://arxiv.org/abs/2007.03073 %D 2020 %X We propose to use a model-based generative loss for training hand pose estimators on depth images based on a volumetric hand model. This additional loss allows training of a hand pose estimator that accurately infers the entire set of 21 hand keypoints while only using supervision for 6 easy-to-annotate keypoints (fingertips and wrist). We show that our partially-supervised method achieves results that are comparable to those of fully-supervised methods which enforce articulation consistency. Moreover, for the first time we demonstrate that such an approach can be used to train on datasets that have erroneous annotations, i.e. "ground truth" with notable measurement errors, while obtaining predictions that explain the depth images better than the given "ground truth". %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Wang, J., Mueller, F., Bernard, F., and Theobalt, C. 2020c. Generative Model-Based Loss to the Rescue: A Method to Overcome Annotation Errors for Depth-Based Hand Pose Estimation. 15th IEEE International Conference on Automatic Face and Gesture Recognition (FG 2020), IEEE.
Export
BibTeX
@inproceedings{Wang_FG2020, TITLE = {Generative Model-Based Loss to the Rescue: {A} Method to Overcome Annotation Errors for Depth-Based Hand Pose Estimation}, AUTHOR = {Wang, Jiayi and Mueller, Franziska and Bernard, Florian and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-3079-8}, DOI = {10.1109/FG47880.2020.00013.}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {15th IEEE International Conference on Automatic Face and Gesture Recognition (FG 2020)}, EDITOR = {{\v S}truc, Vitomir and G{\'o}mez-Fern{\'a}ndez, Francisco}, PAGES = {101--108}, ADDRESS = {Buenos Aires, Argentina}, }
Endnote
%0 Conference Proceedings %A Wang, Jiayi %A Mueller, Franziska %A Bernard, Florian %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Generative Model-Based Loss to the Rescue: A Method to Overcome Annotation Errors for Depth-Based Hand Pose Estimation : %G eng %U http://hdl.handle.net/21.11116/0000-0008-1687-7 %R 10.1109/FG47880.2020.00013. %D 2020 %B 15th IEEE International Conference on Automatic Face and Gesture Recognition %Z date of event: 2020-11-16 - 2020-11-20 %C Buenos Aires, Argentina %B 15th IEEE International Conference on Automatic Face and Gesture Recognition %E Štruc, Vitomir; Gómez-Fernández, Francisco %P 101 - 108 %I IEEE %@ 978-1-7281-3079-8
Wang, P., Liu, L., Chen, N., Chu, H.-K., Theobalt, C., and Wang, W. 2020d. Vid2Curve: Simultaneous Camera Motion Estimation and Thin Structure Reconstruction from an RGB Video. ACM Transactions on Graphics (Proc. ACM SIGGRAPH 2020)39, 4.
Export
BibTeX
@article{Wang_SIGGRAPH2020, TITLE = {{Vid2Curve}: {S}imultaneous Camera Motion Estimation and Thin Structure Reconstruction from an {RGB} Video}, AUTHOR = {Wang, Peng and Liu, Lingjie and Chen, Nenglun and Chu, Hung-Kuo and Theobalt, Christian and Wang, Wenping}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3386569.3392476}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH)}, VOLUME = {39}, NUMBER = {4}, EID = {132}, BOOKTITLE = {Proceedings of ACM SIGGRAPH 2020}, }
Endnote
%0 Journal Article %A Wang, Peng %A Liu, Lingjie %A Chen, Nenglun %A Chu, Hung-Kuo %A Theobalt, Christian %A Wang, Wenping %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Vid2Curve: Simultaneous Camera Motion Estimation and Thin Structure Reconstruction from an RGB Video : %G eng %U http://hdl.handle.net/21.11116/0000-0007-9A74-9 %R 10.1145/3386569.3392476 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 4 %Z sequence number: 132 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH 2020 %O ACM SIGGRAPH 2020 Virtual Conference ; 2020, 17-28 August
Wang, P., Liu, L., Chen, N., Chu, H.-K., Theobalt, C., and Wang, W. 2020e. Vid2Curve: Simultaneous Camera Motion Estimation and Thin Structure Reconstruction from an RGB Video. https://arxiv.org/abs/2005.03372.
(arXiv: 2005.03372)
Abstract
Thin structures, such as wire-frame sculptures, fences, cables, power lines, and tree branches, are common in the real world. It is extremely challenging to acquire their 3D digital models using traditional image-based or depth-based reconstruction methods because thin structures often lack distinct point features and have severe self-occlusion. We propose the first approach that simultaneously estimates camera motion and reconstructs the geometry of complex 3D thin structures in high quality from a color video captured by a handheld camera. Specifically, we present a new curve-based approach to estimate accurate camera poses by establishing correspondences between featureless thin objects in the foreground in consecutive video frames, without requiring visual texture in the background scene to lock on. Enabled by this effective curve-based camera pose estimation strategy, we develop an iterative optimization method with tailored measures on geometry, topology as well as self-occlusion handling for reconstructing 3D thin structures. Extensive validations on a variety of thin structures show that our method achieves accurate camera pose estimation and faithful reconstruction of 3D thin structures with complex shape and topology at a level that has not been attained by other existing reconstruction methods.
Export
BibTeX
@online{Wang2005.03372, TITLE = {{Vid2Curve}: {S}imultaneous Camera Motion Estimation and Thin Structure Reconstruction from an {RGB} Video}, AUTHOR = {Wang, Peng and Liu, Lingjie and Chen, Nenglun and Chu, Hung-Kuo and Theobalt, Christian and Wang, Wenping}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2005.03372}, EPRINT = {2005.03372}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Thin structures, such as wire-frame sculptures, fences, cables, power lines, and tree branches, are common in the real world. It is extremely challenging to acquire their 3D digital models using traditional image-based or depth-based reconstruction methods because thin structures often lack distinct point features and have severe self-occlusion. We propose the first approach that simultaneously estimates camera motion and reconstructs the geometry of complex 3D thin structures in high quality from a color video captured by a handheld camera. Specifically, we present a new curve-based approach to estimate accurate camera poses by establishing correspondences between featureless thin objects in the foreground in consecutive video frames, without requiring visual texture in the background scene to lock on. Enabled by this effective curve-based camera pose estimation strategy, we develop an iterative optimization method with tailored measures on geometry, topology as well as self-occlusion handling for reconstructing 3D thin structures. Extensive validations on a variety of thin structures show that our method achieves accurate camera pose estimation and faithful reconstruction of 3D thin structures with complex shape and topology at a level that has not been attained by other existing reconstruction methods.}, }
Endnote
%0 Report %A Wang, Peng %A Liu, Lingjie %A Chen, Nenglun %A Chu, Hung-Kuo %A Theobalt, Christian %A Wang, Wenping %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Vid2Curve: Simultaneous Camera Motion Estimation and Thin Structure Reconstruction from an RGB Video : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E122-4 %U https://arxiv.org/abs/2005.03372 %D 2020 %X Thin structures, such as wire-frame sculptures, fences, cables, power lines, and tree branches, are common in the real world. It is extremely challenging to acquire their 3D digital models using traditional image-based or depth-based reconstruction methods because thin structures often lack distinct point features and have severe self-occlusion. We propose the first approach that simultaneously estimates camera motion and reconstructs the geometry of complex 3D thin structures in high quality from a color video captured by a handheld camera. Specifically, we present a new curve-based approach to estimate accurate camera poses by establishing correspondences between featureless thin objects in the foreground in consecutive video frames, without requiring visual texture in the background scene to lock on. Enabled by this effective curve-based camera pose estimation strategy, we develop an iterative optimization method with tailored measures on geometry, topology as well as self-occlusion handling for reconstructing 3D thin structures. Extensive validations on a variety of thin structures show that our method achieves accurate camera pose estimation and faithful reconstruction of 3D thin structures with complex shape and topology at a level that has not been attained by other existing reconstruction methods. %K Computer Science, Graphics, cs.GR,Computer Science, Computer Vision and Pattern Recognition, cs.CV,eess.IV
Xu, L., Xu, W., Golyanik, V., Habermann, M., Fang, L., and Theobalt, C. 2020a. EventCap: Monocular 3D Capture of High-Speed Human Motions Using an Event Camera. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020), IEEE.
Export
BibTeX
@inproceedings{Xu_CVPR2020, TITLE = {{EventCap}: {M}onocular {3D} Capture of High-Speed Human Motions Using an Event Camera}, AUTHOR = {Xu, Lan and Xu, Weipeng and Golyanik, Vladislav and Habermann, Marc and Fang, Lu and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-7168-5}, DOI = {10.1109/CVPR42600.2020.00502}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020)}, PAGES = {4967--4977}, ADDRESS = {Seattle, WA, USA (Virtual)}, }
Endnote
%0 Conference Proceedings %A Xu, Lan %A Xu, Weipeng %A Golyanik, Vladislav %A Habermann, Marc %A Fang, Lu %A Theobalt, Christian %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T EventCap: Monocular 3D Capture of High-Speed Human Motions Using an Event Camera : %G eng %U http://hdl.handle.net/21.11116/0000-0007-CF57-F %R 10.1109/CVPR42600.2020.00502 %D 2020 %B 33rd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2020-06-14 - 2020-06-19 %C Seattle, WA, USA (Virtual) %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 4967 - 4977 %I IEEE %@ 978-1-7281-7168-5
Xu, Y., Fan, T., Yuan, Y., and Singh, G. 2020b. Ladybird: Quasi-Monte Carlo Sampling for Deep Implicit Field Based 3D Reconstruction with Symmetry. Computer Vision -- ECCV 2020, Springer.
Export
BibTeX
@inproceedings{Xu_ECCV20, TITLE = {Ladybird: {Quasi-Monte Carlo} Sampling for Deep Implicit Field Based {3D} Reconstruction with Symmetry}, AUTHOR = {Xu, Yifan and Fan, Tianqi and Yuan, Yi and Singh, Gurprit}, LANGUAGE = {eng}, ISBN = {978-3-030-58451-1}, DOI = {10.1007/978-3-030-58452-8_15}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Computer Vision -- ECCV 2020}, EDITOR = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael}, PAGES = {248--263}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12346}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Xu, Yifan %A Fan, Tianqi %A Yuan, Yi %A Singh, Gurprit %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Ladybird: Quasi-Monte Carlo Sampling for Deep Implicit Field Based 3D Reconstruction with Symmetry : %G eng %U http://hdl.handle.net/21.11116/0000-0007-CEBE-C %R 10.1007/978-3-030-58452-8_15 %D 2020 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV 2020 %E Vedaldi, Andrea; Bischof, Horst; Brox, Thomas; Frahm, Jan-Michael %P 248 - 263 %I Springer %@ 978-3-030-58451-1 %B Lecture Notes in Computer Science %N 12346
Xu, Y., Fan, T., Yuan, Y., and Singh, G. 2020c. Ladybird: Quasi-Monte Carlo Sampling for Deep Implicit Field Based 3D Reconstruction with Symmetry. https://arxiv.org/abs/2007.13393.
(arXiv: 2007.13393)
Abstract
Deep implicit field regression methods are effective for 3D reconstruction from single-view images. However, the impact of different sampling patterns on the reconstruction quality is not well-understood. In this work, we first study the effect of point set discrepancy on the network training. Based on Farthest Point Sampling algorithm, we propose a sampling scheme that theoretically encourages better generalization performance, and results in fast convergence for SGD-based optimization algorithms. Secondly, based on the reflective symmetry of an object, we propose a feature fusion method that alleviates issues due to self-occlusions which makes it difficult to utilize local image features. Our proposed system Ladybird is able to create high quality 3D object reconstructions from a single input image. We evaluate Ladybird on a large scale 3D dataset (ShapeNet) demonstrating highly competitive results in terms of Chamfer distance, Earth Mover's distance and Intersection Over Union (IoU).
Export
BibTeX
@online{Xu_arXiv2007.13393, TITLE = {Ladybird: {Quasi-Monte Carlo} Sampling for Deep Implicit Field Based {3D} Reconstruction with Symmetry}, AUTHOR = {Xu, Yifan and Fan, Tianqi and Yuan, Yi and Singh, Gurprit}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2007.13393}, EPRINT = {2007.13393}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Deep implicit field regression methods are effective for 3D reconstruction from single-view images. However, the impact of different sampling patterns on the reconstruction quality is not well-understood. In this work, we first study the effect of point set discrepancy on the network training. Based on Farthest Point Sampling algorithm, we propose a sampling scheme that theoretically encourages better generalization performance, and results in fast convergence for SGD-based optimization algorithms. Secondly, based on the reflective symmetry of an object, we propose a feature fusion method that alleviates issues due to self-occlusions which makes it difficult to utilize local image features. Our proposed system Ladybird is able to create high quality 3D object reconstructions from a single input image. We evaluate Ladybird on a large scale 3D dataset (ShapeNet) demonstrating highly competitive results in terms of Chamfer distance, Earth Mover's distance and Intersection Over Union (IoU).}, }
Endnote
%0 Report %A Xu, Yifan %A Fan, Tianqi %A Yuan, Yi %A Singh, Gurprit %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Ladybird: Quasi-Monte Carlo Sampling for Deep Implicit Field Based 3D Reconstruction with Symmetry : %G eng %U http://hdl.handle.net/21.11116/0000-0007-CECA-E %U https://arxiv.org/abs/2007.13393 %D 2020 %X Deep implicit field regression methods are effective for 3D reconstruction from single-view images. However, the impact of different sampling patterns on the reconstruction quality is not well-understood. In this work, we first study the effect of point set discrepancy on the network training. Based on Farthest Point Sampling algorithm, we propose a sampling scheme that theoretically encourages better generalization performance, and results in fast convergence for SGD-based optimization algorithms. Secondly, based on the reflective symmetry of an object, we propose a feature fusion method that alleviates issues due to self-occlusions which makes it difficult to utilize local image features. Our proposed system Ladybird is able to create high quality 3D object reconstructions from a single input image. We evaluate Ladybird on a large scale 3D dataset (ShapeNet) demonstrating highly competitive results in terms of Chamfer distance, Earth Mover's distance and Intersection Over Union (IoU). %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Yenamandra, T., Tewari, A., Bernard, F., et al. 2020. i3DMM: Deep Implicit 3D Morphable Model of Human Heads. https://arxiv.org/abs/2011.14143.
(arXiv: 2011.14143)
Abstract
We present the first deep implicit 3D morphable model (i3DMM) of full heads. Unlike earlier morphable face models it not only captures identity-specific geometry, texture, and expressions of the frontal face, but also models the entire head, including hair. We collect a new dataset consisting of 64 people with different expressions and hairstyles to train i3DMM. Our approach has the following favorable properties: (i) It is the first full head morphable model that includes hair. (ii) In contrast to mesh-based models it can be trained on merely rigidly aligned scans, without requiring difficult non-rigid registration. (iii) We design a novel architecture to decouple the shape model into an implicit reference shape and a deformation of this reference shape. With that, dense correspondences between shapes can be learned implicitly. (iv) This architecture allows us to semantically disentangle the geometry and color components, as color is learned in the reference space. Geometry is further disentangled as identity, expressions, and hairstyle, while color is disentangled as identity and hairstyle components. We show the merits of i3DMM using ablation studies, comparisons to state-of-the-art models, and applications such as semantic head editing and texture transfer. We will make our model publicly available.
Export
BibTeX
@online{Yenamandra_arXiv2011.14143, TITLE = {i{3D}MM: Deep Implicit {3D} Morphable Model of Human Heads}, AUTHOR = {Yenamandra, Tarun and Tewari, Ayush and Bernard, Florian and Seidel, Hans-Peter and Elgharib, Mohamed and Cremers, Daniel and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2011.14143}, EPRINT = {2011.14143}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present the first deep implicit 3D morphable model (i3DMM) of full heads. Unlike earlier morphable face models it not only captures identity-specific geometry, texture, and expressions of the frontal face, but also models the entire head, including hair. We collect a new dataset consisting of 64 people with different expressions and hairstyles to train i3DMM. Our approach has the following favorable properties: (i) It is the first full head morphable model that includes hair. (ii) In contrast to mesh-based models it can be trained on merely rigidly aligned scans, without requiring difficult non-rigid registration. (iii) We design a novel architecture to decouple the shape model into an implicit reference shape and a deformation of this reference shape. With that, dense correspondences between shapes can be learned implicitly. (iv) This architecture allows us to semantically disentangle the geometry and color components, as color is learned in the reference space. Geometry is further disentangled as identity, expressions, and hairstyle, while color is disentangled as identity and hairstyle components. We show the merits of i3DMM using ablation studies, comparisons to state-of-the-art models, and applications such as semantic head editing and texture transfer. We will make our model publicly available.}, }
Endnote
%0 Report %A Yenamandra, Tarun %A Tewari, Ayush %A Bernard, Florian %A Seidel, Hans-Peter %A Elgharib, Mohamed %A Cremers, Daniel %A Theobalt, Christian %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T i3DMM: Deep Implicit 3D Morphable Model of Human Heads : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B702-8 %U https://arxiv.org/abs/2011.14143 %D 2020 %X We present the first deep implicit 3D morphable model (i3DMM) of full heads. Unlike earlier morphable face models it not only captures identity-specific geometry, texture, and expressions of the frontal face, but also models the entire head, including hair. We collect a new dataset consisting of 64 people with different expressions and hairstyles to train i3DMM. Our approach has the following favorable properties: (i) It is the first full head morphable model that includes hair. (ii) In contrast to mesh-based models it can be trained on merely rigidly aligned scans, without requiring difficult non-rigid registration. (iii) We design a novel architecture to decouple the shape model into an implicit reference shape and a deformation of this reference shape. With that, dense correspondences between shapes can be learned implicitly. (iv) This architecture allows us to semantically disentangle the geometry and color components, as color is learned in the reference space. Geometry is further disentangled as identity, expressions, and hairstyle, while color is disentangled as identity and hairstyle components. We show the merits of i3DMM using ablation studies, comparisons to state-of-the-art models, and applications such as semantic head editing and texture transfer. We will make our model publicly available. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR,Computer Science, Learning, cs.LG
Yoon, J.S., Liu, L., Golyanik, V., Sarkar, K., Park, H.S., and Theobalt, C. 2020. Pose-Guided Human Animation from a Single Image in the Wild. https://arxiv.org/abs/2012.03796.
(arXiv: 2012.03796)
Abstract
We present a new pose transfer method for synthesizing a human animation from a single image of a person controlled by a sequence of body poses. Existing pose transfer methods exhibit significant visual artifacts when applying to a novel scene, resulting in temporal inconsistency and failures in preserving the identity and textures of the person. To address these limitations, we design a compositional neural network that predicts the silhouette, garment labels, and textures. Each modular network is explicitly dedicated to a subtask that can be learned from the synthetic data. At the inference time, we utilize the trained network to produce a unified representation of appearance and its labels in UV coordinates, which remains constant across poses. The unified representation provides an incomplete yet strong guidance to generating the appearance in response to the pose change. We use the trained network to complete the appearance and render it with the background. With these strategies, we are able to synthesize human animations that can preserve the identity and appearance of the person in a temporally coherent way without any fine-tuning of the network on the testing scene. Experiments show that our method outperforms the state-of-the-arts in terms of synthesis quality, temporal coherence, and generalization ability.
Export
BibTeX
@online{Yoon_2012.03796, TITLE = {Pose-Guided Human Animation from a Single Image in the Wild}, AUTHOR = {Yoon, Jae Shin and Liu, Lingjie and Golyanik, Vladislav and Sarkar, Kripasindhu and Park, Hyun Soo and Theobalt, Christian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2012.03796}, EPRINT = {2012.03796}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present a new pose transfer method for synthesizing a human animation from a single image of a person controlled by a sequence of body poses. Existing pose transfer methods exhibit significant visual artifacts when applying to a novel scene, resulting in temporal inconsistency and failures in preserving the identity and textures of the person. To address these limitations, we design a compositional neural network that predicts the silhouette, garment labels, and textures. Each modular network is explicitly dedicated to a subtask that can be learned from the synthetic data. At the inference time, we utilize the trained network to produce a unified representation of appearance and its labels in UV coordinates, which remains constant across poses. The unified representation provides an incomplete yet strong guidance to generating the appearance in response to the pose change. We use the trained network to complete the appearance and render it with the background. With these strategies, we are able to synthesize human animations that can preserve the identity and appearance of the person in a temporally coherent way without any fine-tuning of the network on the testing scene. Experiments show that our method outperforms the state-of-the-arts in terms of synthesis quality, temporal coherence, and generalization ability.}, }
Endnote
%0 Report %A Yoon, Jae Shin %A Liu, Lingjie %A Golyanik, Vladislav %A Sarkar, Kripasindhu %A Park, Hyun Soo %A Theobalt, Christian %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Pose-Guided Human Animation from a Single Image in the Wild : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E9F3-0 %U https://arxiv.org/abs/2012.03796 %D 2020 %X We present a new pose transfer method for synthesizing a human animation from a single image of a person controlled by a sequence of body poses. Existing pose transfer methods exhibit significant visual artifacts when applying to a novel scene, resulting in temporal inconsistency and failures in preserving the identity and textures of the person. To address these limitations, we design a compositional neural network that predicts the silhouette, garment labels, and textures. Each modular network is explicitly dedicated to a subtask that can be learned from the synthetic data. At the inference time, we utilize the trained network to produce a unified representation of appearance and its labels in UV coordinates, which remains constant across poses. The unified representation provides an incomplete yet strong guidance to generating the appearance in response to the pose change. We use the trained network to complete the appearance and render it with the background. With these strategies, we are able to synthesize human animations that can preserve the identity and appearance of the person in a temporally coherent way without any fine-tuning of the network on the testing scene. Experiments show that our method outperforms the state-of-the-arts in terms of synthesis quality, temporal coherence, and generalization ability. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Yu, Y., Meka, A., Elgharib, M., Seidel, H.-P., Theobalt, C., and Smith, W.A.P. 2020. Self-supervised Outdoor Scene Relighting. Computer Vision -- ECCV 2020, Springer.
Export
BibTeX
@inproceedings{yu_ECCV20, TITLE = {Self-supervised Outdoor Scene Relighting}, AUTHOR = {Yu, Ye and Meka, Abhimitra and Elgharib, Mohamed and Seidel, Hans-Peter and Theobalt, Christian and Smith, William A. P.}, LANGUAGE = {eng}, ISBN = {978-3-030-58541-9}, DOI = {10.1007/978-3-030-58542-6_6}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Computer Vision -- ECCV 2020}, EDITOR = {Vedaldi, Andrea and Bischof, Horst and Brox, Thomas and Frahm, Jan-Michael}, PAGES = {84--101}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12367}, ADDRESS = {Glasgow, UK}, }
Endnote
%0 Conference Proceedings %A Yu, Ye %A Meka, Abhimitra %A Elgharib, Mohamed %A Seidel, Hans-Peter %A Theobalt, Christian %A Smith, William A. P. %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Self-supervised Outdoor Scene Relighting : %G eng %U http://hdl.handle.net/21.11116/0000-0007-B0F6-C %R 10.1007/978-3-030-58542-6_6 %D 2020 %B 16th European Conference on Computer Vision %Z date of event: 2020-08-23 - 2020-08-28 %C Glasgow, UK %B Computer Vision -- ECCV 2020 %E Vedaldi, Andrea; Bischof, Horst; Brox, Thomas; Frahm, Jan-Michael %P 84 - 101 %I Springer %@ 978-3-030-58541-9 %B Lecture Notes in Computer Science %N 12367
Zheng, Q., Babaei, V., Wetzstein, G., Seidel, H.-P., Zwicker, M., and Singh, G. 2020. Neural Light Field 3D Printing. ACM Transactions on Graphics (Proc. SIGGRAPH Asia 2020)39, 6.
Export
BibTeX
@article{Zheng_TOG2020, TITLE = {Neural Light Field {3D} Printing}, AUTHOR = {Zheng, Quan and Babaei, Vahid and Wetzstein, Gordon and Seidel, Hans-Peter and Zwicker, Matthias and Singh, Gurprit}, ISSN = {0730-0301}, DOI = {10.1145/3414685.3417879}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. SIGGRAPH Asia)}, VOLUME = {39}, NUMBER = {6}, EID = {207}, BOOKTITLE = {Proceedings of the SIGGRAPH Asia 2020}, EDITOR = {Myszkowski, Karol}, }
Endnote
%0 Journal Article %A Zheng, Quan %A Babaei, Vahid %A Wetzstein, Gordon %A Seidel, Hans-Peter %A Zwicker, Matthias %A Singh, Gurprit %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Neural Light Field 3D Printing : %U http://hdl.handle.net/21.11116/0000-0007-9AA8-E %R 10.1145/3414685.3417879 %7 2020 %D 2020 %J ACM Transactions on Graphics %V 39 %N 6 %Z sequence number: 207 %I ACM %C New York, NY %@ false %B Proceedings of the SIGGRAPH Asia 2020 %O SIGGRAPH Asia 2020 SA'20 SA 2020
Zhou, Y., Habermann, M., Xu, W., Habibie, I., Theobalt, C., and Xu, F. 2020a. Monocular Real-time Hand Shape and Motion Capture using Multi-modal Data. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020), IEEE.
Export
BibTeX
@inproceedings{zhou2019monocular, TITLE = {Monocular Real-time Hand Shape and Motion Capture using Multi-modal Data}, AUTHOR = {Zhou, Yuxiao and Habermann, Marc and Xu, Weipeng and Habibie, Ikhsanul and Theobalt, Christian and Xu, Feng}, LANGUAGE = {eng}, ISBN = {978-1-7281-7168-5}, DOI = {10.1109/CVPR42600.2020.00539}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2020)}, PAGES = {5345--5354}, ADDRESS = {Seattle, WA, USA (Virtual)}, }
Endnote
%0 Conference Proceedings %A Zhou, Yuxiao %A Habermann, Marc %A Xu, Weipeng %A Habibie, Ikhsanul %A Theobalt, Christian %A Xu, Feng %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Monocular Real-time Hand Shape and Motion Capture using Multi-modal Data : %G eng %U http://hdl.handle.net/21.11116/0000-0006-A89E-B %R 10.1109/CVPR42600.2020.00539 %D 2020 %B 33rd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2020-06-14 - 2020-06-19 %C Seattle, WA, USA (Virtual) %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 5345 - 5354 %I IEEE %@ 978-1-7281-7168-5
Zhou, Y., Habermann, M., Xu, W., Habibie, I., Theobalt, C., and Xu, F. 2020b. Monocular Real-time Hand Shape and Motion Capture using Multi-modal Data. https://arxiv.org/abs/2003.09572.
(arXiv: 2003.09572)
Abstract
We present a novel method for monocular hand shape and pose estimation at unprecedented runtime performance of 100fps and at state-of-the-art accuracy. This is enabled by a new learning based architecture designed such that it can make use of all the sources of available hand training data: image data with either 2D or 3D annotations, as well as stand-alone 3D animations without corresponding image data. It features a 3D hand joint detection module and an inverse kinematics module which regresses not only 3D joint positions but also maps them to joint rotations in a single feed-forward pass. This output makes the method more directly usable for applications in computer vision and graphics compared to only regressing 3D joint positions. We demonstrate that our architectural design leads to a significant quantitative and qualitative improvement over the state of the art on several challenging benchmarks. Our model is publicly available for future research.
Export
BibTeX
@online{Zhou2003.09572, TITLE = {Monocular Real-time Hand Shape and Motion Capture using Multi-modal Data}, AUTHOR = {Zhou, Yuxiao and Habermann, Marc and Xu, Weipeng and Habibie, Ikhsanul and Theobalt, Christian and Xu, Feng}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2003.09572}, EPRINT = {2003.09572}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present a novel method for monocular hand shape and pose estimation at unprecedented runtime performance of 100fps and at state-of-the-art accuracy. This is enabled by a new learning based architecture designed such that it can make use of all the sources of available hand training data: image data with either 2D or 3D annotations, as well as stand-alone 3D animations without corresponding image data. It features a 3D hand joint detection module and an inverse kinematics module which regresses not only 3D joint positions but also maps them to joint rotations in a single feed-forward pass. This output makes the method more directly usable for applications in computer vision and graphics compared to only regressing 3D joint positions. We demonstrate that our architectural design leads to a significant quantitative and qualitative improvement over the state of the art on several challenging benchmarks. Our model is publicly available for future research.}, }
Endnote
%0 Report %A Zhou, Yuxiao %A Habermann, Marc %A Xu, Weipeng %A Habibie, Ikhsanul %A Theobalt, Christian %A Xu, Feng %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Monocular Real-time Hand Shape and Motion Capture using Multi-modal Data : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E0D3-D %U https://arxiv.org/abs/2003.09572 %D 2020 %X We present a novel method for monocular hand shape and pose estimation at unprecedented runtime performance of 100fps and at state-of-the-art accuracy. This is enabled by a new learning based architecture designed such that it can make use of all the sources of available hand training data: image data with either 2D or 3D annotations, as well as stand-alone 3D animations without corresponding image data. It features a 3D hand joint detection module and an inverse kinematics module which regresses not only 3D joint positions but also maps them to joint rotations in a single feed-forward pass. This output makes the method more directly usable for applications in computer vision and graphics compared to only regressing 3D joint positions. We demonstrate that our architectural design leads to a significant quantitative and qualitative improvement over the state of the art on several challenging benchmarks. Our model is publicly available for future research. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Zhou, Y., Habermann, M., Habibie, I., Tewari, A., Theobalt, C., and Xu, F. 2020c. Monocular Real-time Full Body Capture with Inter-part Correlations. https://arxiv.org/abs/2012.06087.
(arXiv: 2012.06087)
Abstract
We present the first method for real-time full body capture that estimates shape and motion of body and hands together with a dynamic 3D face model from a single color image. Our approach uses a new neural network architecture that exploits correlations between body and hands at high computational efficiency. Unlike previous works, our approach is jointly trained on multiple datasets focusing on hand, body or face separately, without requiring data where all the parts are annotated at the same time, which is much more difficult to create at sufficient variety. The possibility of such multi-dataset training enables superior generalization ability. In contrast to earlier monocular full body methods, our approach captures more expressive 3D face geometry and color by estimating the shape, expression, albedo and illumination parameters of a statistical face model. Our method achieves competitive accuracy on public benchmarks, while being significantly faster and providing more complete face reconstructions.
Export
BibTeX
@online{Zhou_2012.06087, TITLE = {Monocular Real-time Full Body Capture with Inter-part Correlations}, AUTHOR = {Zhou, Yuxiao and Habermann, Marc and Habibie, Ikhsanul and Tewari, Ayush and Theobalt, Christian and Xu, Feng}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2012.06087}, EPRINT = {2012.06087}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present the first method for real-time full body capture that estimates shape and motion of body and hands together with a dynamic 3D face model from a single color image. Our approach uses a new neural network architecture that exploits correlations between body and hands at high computational efficiency. Unlike previous works, our approach is jointly trained on multiple datasets focusing on hand, body or face separately, without requiring data where all the parts are annotated at the same time, which is much more difficult to create at sufficient variety. The possibility of such multi-dataset training enables superior generalization ability. In contrast to earlier monocular full body methods, our approach captures more expressive 3D face geometry and color by estimating the shape, expression, albedo and illumination parameters of a statistical face model. Our method achieves competitive accuracy on public benchmarks, while being significantly faster and providing more complete face reconstructions.}, }
Endnote
%0 Report %A Zhou, Yuxiao %A Habermann, Marc %A Habibie, Ikhsanul %A Tewari, Ayush %A Theobalt, Christian %A Xu, Feng %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Monocular Real-time Full Body Capture with Inter-part Correlations : %G eng %U http://hdl.handle.net/21.11116/0000-0007-E9FB-8 %U https://arxiv.org/abs/2012.06087 %D 2020 %X We present the first method for real-time full body capture that estimates shape and motion of body and hands together with a dynamic 3D face model from a single color image. Our approach uses a new neural network architecture that exploits correlations between body and hands at high computational efficiency. Unlike previous works, our approach is jointly trained on multiple datasets focusing on hand, body or face separately, without requiring data where all the parts are annotated at the same time, which is much more difficult to create at sufficient variety. The possibility of such multi-dataset training enables superior generalization ability. In contrast to earlier monocular full body methods, our approach captures more expressive 3D face geometry and color by estimating the shape, expression, albedo and illumination parameters of a statistical face model. Our method achieves competitive accuracy on public benchmarks, while being significantly faster and providing more complete face reconstructions. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
2019
Aharon, I., Chen, R., Zorin, D., and Weber, O. 2019. Bounded Distortion Tetrahedral Metric Interpolation. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2019)38, 6.
Export
BibTeX
@article{Aharon2019, TITLE = {Bounded Distortion Tetrahedral Metric Interpolation}, AUTHOR = {Aharon, Ido and Chen, Renjie and Zorin, Denis and Weber, Ofir}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3355089.3356569}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {38}, NUMBER = {6}, EID = {182}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2019}, }
Endnote
%0 Journal Article %A Aharon, Ido %A Chen, Renjie %A Zorin, Denis %A Weber, Ofir %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Bounded Distortion Tetrahedral Metric Interpolation : %G eng %U http://hdl.handle.net/21.11116/0000-0008-04BA-2 %R 10.1145/3355089.3356569 %7 2019 %D 2019 %J ACM Transactions on Graphics %V 38 %N 6 %Z sequence number: 182 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2019 %O ACM SIGGRAPH Asia 2019 Brisbane, Australia, 17 - 20 November 2019 SA'19 SA 2019
Alldieck, T., Pons-Moll, G., Theobalt, C., and Magnor, M.A. 2019a. Tex2Shape: Detailed Full Human Body Geometry From a Single Image. http://arxiv.org/abs/1904.08645.
(arXiv: 1904.08645)
Abstract
We present a simple yet effective method to infer detailed full human body shape from only a single photograph. Our model can infer full-body shape including face, hair, and clothing including wrinkles at interactive frame-rates. Results feature details even on parts that are occluded in the input image. Our main idea is to turn shape regression into an aligned image-to-image translation problem. The input to our method is a partial texture map of the visible region obtained from off-the-shelf methods. From a partial texture, we estimate detailed normal and vector displacement maps, which can be applied to a low-resolution smooth body model to add detail and clothing. Despite being trained purely with synthetic data, our model generalizes well to real-world photographs. Numerous results demonstrate the versatility and robustness of our method.
Export
BibTeX
@online{Alldieck_arXiv1904.08645, TITLE = {{Tex2Shape}: Detailed Full Human Body Geometry From a Single Image}, AUTHOR = {Alldieck, Thiemo and Pons-Moll, Gerard and Theobalt, Christian and Magnor, Marcus A.}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1904.08645}, EPRINT = {1904.08645}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present a simple yet effective method to infer detailed full human body shape from only a single photograph. Our model can infer full-body shape including face, hair, and clothing including wrinkles at interactive frame-rates. Results feature details even on parts that are occluded in the input image. Our main idea is to turn shape regression into an aligned image-to-image translation problem. The input to our method is a partial texture map of the visible region obtained from off-the-shelf methods. From a partial texture, we estimate detailed normal and vector displacement maps, which can be applied to a low-resolution smooth body model to add detail and clothing. Despite being trained purely with synthetic data, our model generalizes well to real-world photographs. Numerous results demonstrate the versatility and robustness of our method.}, }
Endnote
%0 Report %A Alldieck, Thiemo %A Pons-Moll, Gerard %A Theobalt, Christian %A Magnor, Marcus A. %+ Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Tex2Shape: Detailed Full Human Body Geometry From a Single Image : %G eng %U http://hdl.handle.net/21.11116/0000-0005-7CF6-B %U http://arxiv.org/abs/1904.08645 %D 2019 %X We present a simple yet effective method to infer detailed full human body shape from only a single photograph. Our model can infer full-body shape including face, hair, and clothing including wrinkles at interactive frame-rates. Results feature details even on parts that are occluded in the input image. Our main idea is to turn shape regression into an aligned image-to-image translation problem. The input to our method is a partial texture map of the visible region obtained from off-the-shelf methods. From a partial texture, we estimate detailed normal and vector displacement maps, which can be applied to a low-resolution smooth body model to add detail and clothing. Despite being trained purely with synthetic data, our model generalizes well to real-world photographs. Numerous results demonstrate the versatility and robustness of our method. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Alldieck, T., Pons-Moll, G., Theobalt, C., and Magnor, M.A. 2019b. Tex2Shape: Detailed Full Human Body Geometry from a Single Image. International Conference on Computer Vision (ICCV 2019), IEEE.
Abstract
We present a simple yet effective method to infer detailed full human body shape from only a single photograph. Our model can infer full-body shape including face, hair, and clothing including wrinkles at interactive frame-rates. Results feature details even on parts that are occluded in the input image. Our main idea is to turn shape regression into an aligned image-to-image translation problem. The input to our method is a partial texture map of the visible region obtained from off-the-shelf methods. From a partial texture, we estimate detailed normal and vector displacement maps, which can be applied to a low-resolution smooth body model to add detail and clothing. Despite being trained purely with synthetic data, our model generalizes well to real-world photographs. Numerous results demonstrate the versatility and robustness of our method.
Export
BibTeX
@inproceedings{Alldieck_ICCV2019, TITLE = {{Tex2Shape}: Detailed Full Human Body Geometry from a Single Image}, AUTHOR = {Alldieck, Thiemo and Pons-Moll, Gerard and Theobalt, Christian and Magnor, Marcus A.}, LANGUAGE = {eng}, ISBN = {978-1-7281-4803-8}, DOI = {10.1109/ICCV.2019.00238}, PUBLISHER = {IEEE}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, ABSTRACT = {We present a simple yet effective method to infer detailed full human body shape from only a single photograph. Our model can infer full-body shape including face, hair, and clothing including wrinkles at interactive frame-rates. Results feature details even on parts that are occluded in the input image. Our main idea is to turn shape regression into an aligned image-to-image translation problem. The input to our method is a partial texture map of the visible region obtained from off-the-shelf methods. From a partial texture, we estimate detailed normal and vector displacement maps, which can be applied to a low-resolution smooth body model to add detail and clothing. Despite being trained purely with synthetic data, our model generalizes well to real-world photographs. Numerous results demonstrate the versatility and robustness of our method.}, BOOKTITLE = {International Conference on Computer Vision (ICCV 2019)}, PAGES = {2293--2303}, ADDRESS = {Seoul, Korea}, }
Endnote
%0 Conference Proceedings %A Alldieck, Thiemo %A Pons-Moll, Gerard %A Theobalt, Christian %A Magnor, Marcus A. %+ External Organizations Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Tex2Shape: Detailed Full Human Body Geometry from a Single Image : %G eng %U http://hdl.handle.net/21.11116/0000-0003-ECBE-E %R 10.1109/ICCV.2019.00238 %D 2019 %B International Conference on Computer Vision %Z date of event: 2019-10-27 - 2019-11-02 %C Seoul, Korea %X We present a simple yet effective method to infer detailed full human body shape from only a single photograph. Our model can infer full-body shape including face, hair, and clothing including wrinkles at interactive frame-rates. Results feature details even on parts that are occluded in the input image. Our main idea is to turn shape regression into an aligned image-to-image translation problem. The input to our method is a partial texture map of the visible region obtained from off-the-shelf methods. From a partial texture, we estimate detailed normal and vector displacement maps, which can be applied to a low-resolution smooth body model to add detail and clothing. Despite being trained purely with synthetic data, our model generalizes well to real-world photographs. Numerous results demonstrate the versatility and robustness of our method. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV %B International Conference on Computer Vision %P 2293 - 2303 %I IEEE %@ 978-1-7281-4803-8
Alldieck, T., Magnor, M.A., Bhatnagar, B.L., Theobalt, C., and Pons-Moll, G. 2019c. Learning to Reconstruct People in Clothing from a Single RGB Camera. http://arxiv.org/abs/1903.05885.
(arXiv: 1903.05885)
Abstract
We present a learning-based model to infer the personalized 3D shape of people from a few frames (1-8) of a monocular video in which the person is moving, in less than 10 seconds with a reconstruction accuracy of 5mm. Our model learns to predict the parameters of a statistical body model and instance displacements that add clothing and hair to the shape. The model achieves fast and accurate predictions based on two key design choices. First, by predicting shape in a canonical T-pose space, the network learns to encode the images of the person into pose-invariant latent codes, where the information is fused. Second, based on the observation that feed-forward predictions are fast but do not always align with the input images, we predict using both, bottom-up and top-down streams (one per view) allowing information to flow in both directions. Learning relies only on synthetic 3D data. Once learned, the model can take a variable number of frames as input, and is able to reconstruct shapes even from a single image with an accuracy of 6mm. Results on 3 different datasets demonstrate the efficacy and accuracy of our approach.
Export
BibTeX
@online{Alldieck_arXiv1903.05885, TITLE = {Learning to Reconstruct People in Clothing from a Single {RGB} Camera}, AUTHOR = {Alldieck, Thiemo and Magnor, Marcus A. and Bhatnagar, Bharat Lal and Theobalt, Christian and Pons-Moll, Gerard}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1903.05885}, EPRINT = {1903.05885}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present a learning-based model to infer the personalized 3D shape of people from a few frames (1-8) of a monocular video in which the person is moving, in less than 10 seconds with a reconstruction accuracy of 5mm. Our model learns to predict the parameters of a statistical body model and instance displacements that add clothing and hair to the shape. The model achieves fast and accurate predictions based on two key design choices. First, by predicting shape in a canonical T-pose space, the network learns to encode the images of the person into pose-invariant latent codes, where the information is fused. Second, based on the observation that feed-forward predictions are fast but do not always align with the input images, we predict using both, bottom-up and top-down streams (one per view) allowing information to flow in both directions. Learning relies only on synthetic 3D data. Once learned, the model can take a variable number of frames as input, and is able to reconstruct shapes even from a single image with an accuracy of 6mm. Results on 3 different datasets demonstrate the efficacy and accuracy of our approach.}, }
Endnote
%0 Report %A Alldieck, Thiemo %A Magnor, Marcus A. %A Bhatnagar, Bharat Lal %A Theobalt, Christian %A Pons-Moll, Gerard %+ Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society External Organizations Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society %T Learning to Reconstruct People in Clothing from a Single RGB Camera : %G eng %U http://hdl.handle.net/21.11116/0000-0003-FE01-E %U http://arxiv.org/abs/1903.05885 %D 2019 %X We present a learning-based model to infer the personalized 3D shape of people from a few frames (1-8) of a monocular video in which the person is moving, in less than 10 seconds with a reconstruction accuracy of 5mm. Our model learns to predict the parameters of a statistical body model and instance displacements that add clothing and hair to the shape. The model achieves fast and accurate predictions based on two key design choices. First, by predicting shape in a canonical T-pose space, the network learns to encode the images of the person into pose-invariant latent codes, where the information is fused. Second, based on the observation that feed-forward predictions are fast but do not always align with the input images, we predict using both, bottom-up and top-down streams (one per view) allowing information to flow in both directions. Learning relies only on synthetic 3D data. Once learned, the model can take a variable number of frames as input, and is able to reconstruct shapes even from a single image with an accuracy of 6mm. Results on 3 different datasets demonstrate the efficacy and accuracy of our approach. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Alldieck, T., Magnor, M.A., Bhatnagar, B.L., Theobalt, C., and Pons-Moll, G. 2019d. Learning to Reconstruct People in Clothing from a Single RGB Camera. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2019), IEEE.
Export
BibTeX
@inproceedings{alldieck19cvpr, TITLE = {Learning to Reconstruct People in Clothing from a Single {RGB} Camera}, AUTHOR = {Alldieck, Thiemo and Magnor, Marcus A. and Bhatnagar, Bharat Lal and Theobalt, Christian and Pons-Moll, Gerard}, ISBN = {978-1-7281-3293-8}, DOI = {10.1109/CVPR.2019.00127}, PUBLISHER = {IEEE}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2019)}, PAGES = {1175--1186}, ADDRESS = {Long Beach, CA, USA}, }
Endnote
%0 Conference Proceedings %A Alldieck, Thiemo %A Magnor, Marcus A. %A Bhatnagar, Bharat Lal %A Theobalt, Christian %A Pons-Moll, Gerard %+ Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society External Organizations Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society %T Learning to Reconstruct People in Clothing from a Single RGB Camera : %U http://hdl.handle.net/21.11116/0000-0003-5F97-9 %R 10.1109/CVPR.2019.00127 %D 2019 %B 32nd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2019-06-15 - 2019-06-20 %C Long Beach, CA, USA %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 1175 - 1186 %I IEEE %@ 978-1-7281-3293-8
Bemana, M., Myszkowski, K., Seidel, H.-P., and Ritschel, T. 2019a. Neural View-Interpolation for Sparse Light Field Video. http://arxiv.org/abs/1910.13921.
(arXiv: 1910.13921)
Abstract
We suggest representing light field (LF) videos as "one-off" neural networks (NN), i.e., a learned mapping from view-plus-time coordinates to high-resolution color values, trained on sparse views. Initially, this sounds like a bad idea for three main reasons: First, a NN LF will likely have less quality than a same-sized pixel basis representation. Second, only few training data, e.g., 9 exemplars per frame are available for sparse LF videos. Third, there is no generalization across LFs, but across view and time instead. Consequently, a network needs to be trained for each LF video. Surprisingly, these problems can turn into substantial advantages: Other than the linear pixel basis, a NN has to come up with a compact, non-linear i.e., more intelligent, explanation of color, conditioned on the sparse view and time coordinates. As observed for many NN however, this representation now is interpolatable: if the image output for sparse view coordinates is plausible, it is for all intermediate, continuous coordinates as well. Our specific network architecture involves a differentiable occlusion-aware warping step, which leads to a compact set of trainable parameters and consequently fast learning and fast execution.
Export
BibTeX
@online{Bemana_arXiv1910.13921, TITLE = {Neural View-Interpolation for Sparse Light Field Video}, AUTHOR = {Bemana, Mojtaba and Myszkowski, Karol and Seidel, Hans-Peter and Ritschel, Tobias}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1910.13921}, EPRINT = {1910.13921}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We suggest representing light field (LF) videos as "one-off" neural networks (NN), i.e., a learned mapping from view-plus-time coordinates to high-resolution color values, trained on sparse views. Initially, this sounds like a bad idea for three main reasons: First, a NN LF will likely have less quality than a same-sized pixel basis representation. Second, only few training data, e.g., 9 exemplars per frame are available for sparse LF videos. Third, there is no generalization across LFs, but across view and time instead. Consequently, a network needs to be trained for each LF video. Surprisingly, these problems can turn into substantial advantages: Other than the linear pixel basis, a NN has to come up with a compact, non-linear i.e., more intelligent, explanation of color, conditioned on the sparse view and time coordinates. As observed for many NN however, this representation now is interpolatable: if the image output for sparse view coordinates is plausible, it is for all intermediate, continuous coordinates as well. Our specific network architecture involves a differentiable occlusion-aware warping step, which leads to a compact set of trainable parameters and consequently fast learning and fast execution.}, }
Endnote
%0 Report %A Bemana, Mojtaba %A Myszkowski, Karol %A Seidel, Hans-Peter %A Ritschel, Tobias %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Neural View-Interpolation for Sparse Light Field Video : %G eng %U http://hdl.handle.net/21.11116/0000-0005-7B16-9 %U http://arxiv.org/abs/1910.13921 %D 2019 %X We suggest representing light field (LF) videos as "one-off" neural networks (NN), i.e., a learned mapping from view-plus-time coordinates to high-resolution color values, trained on sparse views. Initially, this sounds like a bad idea for three main reasons: First, a NN LF will likely have less quality than a same-sized pixel basis representation. Second, only few training data, e.g., 9 exemplars per frame are available for sparse LF videos. Third, there is no generalization across LFs, but across view and time instead. Consequently, a network needs to be trained for each LF video. Surprisingly, these problems can turn into substantial advantages: Other than the linear pixel basis, a NN has to come up with a compact, non-linear i.e., more intelligent, explanation of color, conditioned on the sparse view and time coordinates. As observed for many NN however, this representation now is interpolatable: if the image output for sparse view coordinates is plausible, it is for all intermediate, continuous coordinates as well. Our specific network architecture involves a differentiable occlusion-aware warping step, which leads to a compact set of trainable parameters and consequently fast learning and fast execution. %K Computer Science, Graphics, cs.GR,Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Learning, cs.LG,eess.IV
Bemana, M., Keinert, J., Myszkowski, K., et al. 2019b. Learning to Predict Image-based Rendering Artifacts with Respect to a Hidden Reference Image. Computer Graphics Forum (Proc. Pacific Graphics 2019)38, 7.
Export
BibTeX
@article{Bemana_PG2019, TITLE = {Learning to Predict Image-based Rendering Artifacts with Respect to a Hidden Reference Image}, AUTHOR = {Bemana, Mojtaba and Keinert, Joachim and Myszkowski, Karol and B{\"a}tz, Michel and Ziegler, Matthias and Seidel, Hans-Peter and Ritschel, Tobias}, LANGUAGE = {eng}, ISSN = {1467-8659}, DOI = {10.1111/cgf.13862}, PUBLISHER = {Wiley-Blackwell}, ADDRESS = {Oxford, UK}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {Computer Graphics Forum (Proc. Pacific Graphics)}, VOLUME = {38}, NUMBER = {7}, PAGES = {579--589}, BOOKTITLE = {27th Annual International Conference on Computer Graphics and Applications (Pacific Graphics 2019)}, }
Endnote
%0 Journal Article %A Bemana, Mojtaba %A Keinert, Joachim %A Myszkowski, Karol %A Bätz, Michel %A Ziegler, Matthias %A Seidel, Hans-Peter %A Ritschel, Tobias %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Learning to Predict Image-based Rendering Artifacts with Respect to a Hidden Reference Image : %G eng %U http://hdl.handle.net/21.11116/0000-0004-9BC5-F %R 10.1111/cgf.13862 %7 2019 %D 2019 %J Computer Graphics Forum %V 38 %N 7 %& 579 %P 579 - 589 %I Wiley-Blackwell %C Oxford, UK %@ false %B 27th Annual International Conference on Computer Graphics and Applications %O Pacific Graphics 2019 PG 2019 Seoul, October 14-17, 2019
Bernard, F., Thunberg, J., Goncalves, J., and Theobalt, C. 2019a. Synchronisation of Partial Multi-Matchings via Non-negative Factorisations. Pattern Recognition92.
Export
BibTeX
@article{Bernard2019, TITLE = {Synchronisation of Partial Multi-Matchings via Non-negative Factorisations}, AUTHOR = {Bernard, Florian and Thunberg, Johan and Goncalves, Jorge and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0031-3203}, DOI = {10.1016/j.patcog.2019.03.021}, PUBLISHER = {Pergamon}, ADDRESS = {Oxford}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {Pattern Recognition}, VOLUME = {92}, PAGES = {146--155}, }
Endnote
%0 Journal Article %A Bernard, Florian %A Thunberg, Johan %A Goncalves, Jorge %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Synchronisation of Partial Multi-Matchings via Non-negative Factorisations : %G eng %U http://hdl.handle.net/21.11116/0000-0003-B2EC-A %R 10.1016/j.patcog.2019.03.021 %7 2019-03-23 %D 2019 %J Pattern Recognition %O Pattern Recognit. %V 92 %& 146 %P 146 - 155 %I Pergamon %C Oxford %@ false
Bernard, F., Thunberg, J., Swoboda, P., and Theobalt, C. 2019b. HiPPI: Higher-Order Projected Power Iterations for Scalable Multi-Matching. International Conference on Computer Vision (ICCV 2019), IEEE.
Export
BibTeX
@inproceedings{Bernard_ICCV2019, TITLE = {{HiPPI}: {H}igher-Order Projected Power Iterations for Scalable Multi-Matching}, AUTHOR = {Bernard, Florian and Thunberg, Johan and Swoboda, Paul and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-4803-8}, DOI = {10.1109/ICCV.2019.01038}, PUBLISHER = {IEEE}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, BOOKTITLE = {International Conference on Computer Vision (ICCV 2019)}, PAGES = {10283--10292}, ADDRESS = {Seoul, Korea}, }
Endnote
%0 Conference Proceedings %A Bernard, Florian %A Thunberg, Johan %A Swoboda, Paul %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T HiPPI: Higher-Order Projected Power Iterations for Scalable Multi-Matching : %G eng %U http://hdl.handle.net/21.11116/0000-0006-DC81-0 %R 10.1109/ICCV.2019.01038 %D 2019 %B International Conference on Computer Vision %Z date of event: 2019-10-27 - 2019-11-02 %C Seoul, Korea %B International Conference on Computer Vision %P 10283 - 10292 %I IEEE %@ 978-1-7281-4803-8
Bhatnagar, B.L., Tiwari, G., Theobalt, C., and Pons-Moll, G. 2019a. Multi-Garment Net: Learning to Dress 3D People from Images. http://arxiv.org/abs/1908.06903.
(arXiv: 1908.06903)
Abstract
We present Multi-Garment Network (MGN), a method to predict body shape and clothing, layered on top of the SMPL model from a few frames (1-8) of a video. Several experiments demonstrate that this representation allows higher level of control when compared to single mesh or voxel representations of shape. Our model allows to predict garment geometry, relate it to the body shape, and transfer it to new body shapes and poses. To train MGN, we leverage a digital wardrobe containing 712 digital garments in correspondence, obtained with a novel method to register a set of clothing templates to a dataset of real 3D scans of people in different clothing and poses. Garments from the digital wardrobe, or predicted by MGN, can be used to dress any body shape in arbitrary poses. We will make publicly available the digital wardrobe, the MGN model, and code to dress SMPL with the garments.
Export
BibTeX
@online{Bhatnagar_arXiv1908.06903, TITLE = {Multi-Garment Net: {L}earning to Dress {3D} People from Images}, AUTHOR = {Bhatnagar, Bharat Lal and Tiwari, Garvita and Theobalt, Christian and Pons-Moll, Gerard}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1908.06903}, EPRINT = {1908.06903}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present Multi-Garment Network (MGN), a method to predict body shape and clothing, layered on top of the SMPL model from a few frames (1-8) of a video. Several experiments demonstrate that this representation allows higher level of control when compared to single mesh or voxel representations of shape. Our model allows to predict garment geometry, relate it to the body shape, and transfer it to new body shapes and poses. To train MGN, we leverage a digital wardrobe containing 712 digital garments in correspondence, obtained with a novel method to register a set of clothing templates to a dataset of real 3D scans of people in different clothing and poses. Garments from the digital wardrobe, or predicted by MGN, can be used to dress any body shape in arbitrary poses. We will make publicly available the digital wardrobe, the MGN model, and code to dress SMPL with the garments.}, }
Endnote
%0 Report %A Bhatnagar, Bharat Lal %A Tiwari, Garvita %A Theobalt, Christian %A Pons-Moll, Gerard %+ Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society %T Multi-Garment Net: Learning to Dress 3D People from Images : %G eng %U http://hdl.handle.net/21.11116/0000-0005-7D67-C %U http://arxiv.org/abs/1908.06903 %D 2019 %X We present Multi-Garment Network (MGN), a method to predict body shape and clothing, layered on top of the SMPL model from a few frames (1-8) of a video. Several experiments demonstrate that this representation allows higher level of control when compared to single mesh or voxel representations of shape. Our model allows to predict garment geometry, relate it to the body shape, and transfer it to new body shapes and poses. To train MGN, we leverage a digital wardrobe containing 712 digital garments in correspondence, obtained with a novel method to register a set of clothing templates to a dataset of real 3D scans of people in different clothing and poses. Garments from the digital wardrobe, or predicted by MGN, can be used to dress any body shape in arbitrary poses. We will make publicly available the digital wardrobe, the MGN model, and code to dress SMPL with the garments. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Bhatnagar, B.L., Tiwari, G., Theobalt, C., and Pons-Moll, G. 2019b. Multi-Garment Net: Learning to Dress 3D People from Images. International Conference on Computer Vision (ICCV 2019), IEEE.
Export
BibTeX
@inproceedings{bhatnagar_ICCV2019, TITLE = {Multi-Garment {N}et: {L}earning to Dress {3D} People from Images}, AUTHOR = {Bhatnagar, Bharat Lal and Tiwari, Garvita and Theobalt, Christian and Pons-Moll, Gerard}, LANGUAGE = {eng}, ISBN = {978-1-7281-4803-8}, DOI = {10.1109/ICCV.2019.00552}, PUBLISHER = {IEEE}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, BOOKTITLE = {International Conference on Computer Vision (ICCV 2019)}, PAGES = {5419--5429}, ADDRESS = {Seoul, Korea}, }
Endnote
%0 Conference Proceedings %A Bhatnagar, Bharat Lal %A Tiwari, Garvita %A Theobalt, Christian %A Pons-Moll, Gerard %+ Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society %T Multi-Garment Net: Learning to Dress 3D People from Images : %G eng %U http://hdl.handle.net/21.11116/0000-0004-89E8-C %R 10.1109/ICCV.2019.00552 %D 2019 %B International Conference on Computer Vision %Z date of event: 2019-10-27 - 2019-11-02 %C Seoul, Korea %B International Conference on Computer Vision %P 5419 - 5429 %I IEEE %@ 978-1-7281-4803-8
Bojja, A.K., Mueller, F., Malireddi, S.R., et al. 2019. HandSeg: An Automatically Labeled Dataset for Hand Segmentation from Depth Images. 16th Conference on Computer and Robot Vision (CRV 2019), IEEE.
Export
BibTeX
@inproceedings{Malireddi_CRV2019, TITLE = {{HandSeg}: {An Automatically Labeled Dataset for Hand Segmentation from Depth Images}}, AUTHOR = {Bojja, Abhishake Kumar and Mueller, Franziska and Malireddi, Sri Raghu and Oberweger, Markus and Lepetit, Vincent and Theobalt, Christian and Yi, Kwang Moo and Tagliasacchi, Andrea}, LANGUAGE = {eng}, ISBN = {978-1-7281-1838-3}, DOI = {10.1109/CRV.2019.00028}, PUBLISHER = {IEEE}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, BOOKTITLE = {16th Conference on Computer and Robot Vision (CRV 2019)}, PAGES = {151--158}, ADDRESS = {Kingston, Canada}, }
Endnote
%0 Conference Proceedings %A Bojja, Abhishake Kumar %A Mueller, Franziska %A Malireddi, Sri Raghu %A Oberweger, Markus %A Lepetit, Vincent %A Theobalt, Christian %A Yi, Kwang Moo %A Tagliasacchi, Andrea %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations %T HandSeg: An Automatically Labeled Dataset for Hand Segmentation from Depth Images : %G eng %U http://hdl.handle.net/21.11116/0000-0005-6BC4-6 %R 10.1109/CRV.2019.00028 %D 2019 %B 16th Conference on Computer and Robot Vision %Z date of event: 2019-05-29 - 2019-05-31 %C Kingston, Canada %B 16th Conference on Computer and Robot Vision %P 151 - 158 %I IEEE %@ 978-1-7281-1838-3
Božič, A., Zollhöfer, M., Theobalt, C., and Nießner, M. 2019. DeepDeform: Learning Non-rigid RGB-D Reconstruction with Semi-supervised Data. http://arxiv.org/abs/1912.04302.
(arXiv: 1912.04302)
Abstract
Applying data-driven approaches to non-rigid 3D reconstruction has been difficult, which we believe can be attributed to the lack of a large-scale training corpus. One recent approach proposes self-supervision based on non-rigid reconstruction. Unfortunately, this method fails for important cases such as highly non-rigid deformations. We first address this problem of lack of data by introducing a novel semi-supervised strategy to obtain dense inter-frame correspondences from a sparse set of annotations. This way, we obtain a large dataset of 400 scenes, over 390,000 RGB-D frames, and 2,537 densely aligned frame pairs; in addition, we provide a test set along with several metrics for evaluation. Based on this corpus, we introduce a data-driven non-rigid feature matching approach, which we integrate into an optimization-based reconstruction pipeline. Here, we propose a new neural network that operates on RGB-D frames, while maintaining robustness under large non-rigid deformations and producing accurate predictions. Our approach significantly outperforms both existing non-rigid reconstruction methods that do not use learned data terms, as well as learning-based approaches that only use self-supervision.
Export
BibTeX
@online{Bozic_arXiv1912.04302, TITLE = {{DeepDeform}: Learning Non-rigid {RGB}-D Reconstruction with Semi-supervised Data}, AUTHOR = {Bo{\v z}i{\v c}, Alja{\v z} and Zollh{\"o}fer, Michael and Theobalt, Christian and Nie{\ss}ner, Matthias}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1912.04302}, EPRINT = {1912.04302}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Applying data-driven approaches to non-rigid 3D reconstruction has been difficult, which we believe can be attributed to the lack of a large-scale training corpus. One recent approach proposes self-supervision based on non-rigid reconstruction. Unfortunately, this method fails for important cases such as highly non-rigid deformations. We first address this problem of lack of data by introducing a novel semi-supervised strategy to obtain dense inter-frame correspondences from a sparse set of annotations. This way, we obtain a large dataset of 400 scenes, over 390,000 RGB-D frames, and 2,537 densely aligned frame pairs; in addition, we provide a test set along with several metrics for evaluation. Based on this corpus, we introduce a data-driven non-rigid feature matching approach, which we integrate into an optimization-based reconstruction pipeline. Here, we propose a new neural network that operates on RGB-D frames, while maintaining robustness under large non-rigid deformations and producing accurate predictions. Our approach significantly outperforms both existing non-rigid reconstruction methods that do not use learned data terms, as well as learning-based approaches that only use self-supervision.}, }
Endnote
%0 Report %A Božič, Aljaž %A Zollhöfer, Michael %A Theobalt, Christian %A Nießner, Matthias %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T DeepDeform: Learning Non-rigid RGB-D Reconstruction with Semi-supervised Data : %G eng %U http://hdl.handle.net/21.11116/0000-0005-7DDE-6 %U http://arxiv.org/abs/1912.04302 %D 2019 %X Applying data-driven approaches to non-rigid 3D reconstruction has been difficult, which we believe can be attributed to the lack of a large-scale training corpus. One recent approach proposes self-supervision based on non-rigid reconstruction. Unfortunately, this method fails for important cases such as highly non-rigid deformations. We first address this problem of lack of data by introducing a novel semi-supervised strategy to obtain dense inter-frame correspondences from a sparse set of annotations. This way, we obtain a large dataset of 400 scenes, over 390,000 RGB-D frames, and 2,537 densely aligned frame pairs; in addition, we provide a test set along with several metrics for evaluation. Based on this corpus, we introduce a data-driven non-rigid feature matching approach, which we integrate into an optimization-based reconstruction pipeline. Here, we propose a new neural network that operates on RGB-D frames, while maintaining robustness under large non-rigid deformations and producing accurate predictions. Our approach significantly outperforms both existing non-rigid reconstruction methods that do not use learned data terms, as well as learning-based approaches that only use self-supervision. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR
Castelli Aleardi, L., Salihoglu, S., Singh, G., and Ovsjanikov, M. 2019. Spectral Measures of Distortion for Change Detection in Dynamic Graphs. Complex Networks and Their Applications VII, Springer.
Export
BibTeX
@inproceedings{Castelli_COMPLEX2018, TITLE = {Spectral Measures of Distortion for Change Detection in Dynamic Graphs}, AUTHOR = {Castelli Aleardi, Luca and Salihoglu, Semih and Singh, Gurprit and Ovsjanikov, Maks}, LANGUAGE = {eng}, ISBN = {978-3-030-05413-7; 978-3-030-05414-4}, DOI = {10.1007/978-3-030-05414-4_5}, PUBLISHER = {Springer}, YEAR = {2018}, MARGINALMARK = {$\bullet$}, DATE = {2019}, BOOKTITLE = {Complex Networks and Their Applications VII}, EDITOR = {Aiello, Luca Maria and Cherifi, Chantal and Cherifi, Hocine and Lambiotte, Renaud and Li{\'o}, Pietro and Rocha, Luis M.}, PAGES = {54--66}, SERIES = {Studies in Computational Intelligence}, VOLUME = {813}, ADDRESS = {Cambridge, UK}, }
Endnote
%0 Conference Proceedings %A Castelli Aleardi, Luca %A Salihoglu, Semih %A Singh, Gurprit %A Ovsjanikov, Maks %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Spectral Measures of Distortion for Change Detection in Dynamic Graphs : %G eng %U http://hdl.handle.net/21.11116/0000-0003-F1F9-4 %R 10.1007/978-3-030-05414-4_5 %D 2019 %B 7th International Conference on Complex Networks and Their Applications %Z date of event: 2018-12-11 - 2018-12-13 %C Cambridge, UK %B Complex Networks and Their Applications VII %E Aiello, Luca Maria; Cherifi, Chantal; Cherifi, Hocine; Lambiotte, Renaud; Lió, Pietro; Rocha, Luis M. %P 54 - 66 %I Springer %@ 978-3-030-05413-7 978-3-030-05414-4 %B Studies in Computational Intelligence %N 813
Díaz Barros, J.M., Golyanik, V., Varanasi, K., and Stricker, D. 2019. Face It!: A Pipeline for Real-Time Performance-Driven Facial Animation. IEEE International Conference on Image Processing (ICIP 2019), IEEE.
Export
BibTeX
@inproceedings{DiazBarros_ICIP2019, TITLE = {Face It!: {A} Pipeline for Real-Time Performance-Driven Facial Animation}, AUTHOR = {D{\'i}az Barros, Jilliam Mar{\'i}a and Golyanik, Vladislav and Varanasi, Kiran and Stricker, Didier}, LANGUAGE = {eng}, ISBN = {978-1-5386-6249-6}, DOI = {10.1109/ICIP.2019.8803330}, PUBLISHER = {IEEE}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE International Conference on Image Processing (ICIP 2019)}, PAGES = {2209--2213}, ADDRESS = {Taipei, Taiwan}, }
Endnote
%0 Conference Proceedings %A Díaz Barros, Jilliam María %A Golyanik, Vladislav %A Varanasi, Kiran %A Stricker, Didier %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Face It!: A Pipeline for Real-Time Performance-Driven Facial Animation : %G eng %U http://hdl.handle.net/21.11116/0000-0005-982B-0 %R 10.1109/ICIP.2019.8803330 %D 2019 %B IEEE International Conference on Image Processing %Z date of event: 2019-09-22 - 2019-09-25 %C Taipei, Taiwan %B IEEE International Conference on Image Processing %P 2209 - 2213 %I IEEE %@ 978-1-5386-6249-6
Dokter, M., Hladký, J., Parger, M., Schmalstieg, D., Seidel, H.-P., and Steinberger, M. 2019. Hierarchical Rasterization of Curved Primitives for Vector Graphics Rendering on the GPU. Computer Graphics Forum (Proc. EUROGRAPHICS 2019)38, 2.
Export
BibTeX
@article{Dokter_EG2019, TITLE = {Hierarchical Rasterization of Curved Primitives for Vector Graphics Rendering on the {GPU}}, AUTHOR = {Dokter, Mark and Hladk{\'y}, Jozef and Parger, Mathias and Schmalstieg, Dieter and Seidel, Hans-Peter and Steinberger, Markus}, LANGUAGE = {eng}, ISSN = {0167-7055}, DOI = {10.1111/cgf.13622}, PUBLISHER = {Wiley-Blackwell}, ADDRESS = {Oxford}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {Computer Graphics Forum (Proc. EUROGRAPHICS)}, VOLUME = {38}, NUMBER = {2}, PAGES = {93--103}, BOOKTITLE = {EUROGRAPHICS 2019 STAR -- State of The Art Reports}, }
Endnote
%0 Journal Article %A Dokter, Mark %A Hladký, Jozef %A Parger, Mathias %A Schmalstieg, Dieter %A Seidel, Hans-Peter %A Steinberger, Markus %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Hierarchical Rasterization of Curved Primitives for Vector Graphics Rendering on the GPU : %G eng %U http://hdl.handle.net/21.11116/0000-0002-FC80-1 %R 10.1111/cgf.13622 %7 2019 %D 2019 %J Computer Graphics Forum %V 38 %N 2 %& 93 %P 93 - 103 %I Wiley-Blackwell %C Oxford %@ false %B EUROGRAPHICS 2019 STAR – State of The Art Reports %O EUROGRAPHICS 2019 The 40th Annual Conference of the European Association for Computer Graphics ; Genova, Italy, May 6-10, 2019 EG 2019
Egger, B., Smith, W., Theobalt, C., and Vetter, T., eds. 2019a. 3D Morphable Models. Schloss Dagstuhl.
Export
BibTeX
@proceedings{Egger_2019, TITLE = {3D Morphable Models}, EDITOR = {Egger, Bernhard and Smith, William and Theobalt, Christian and Vetter, Thomas}, LANGUAGE = {eng}, URL = {urn:nbn:de:0030-drops-112894}, DOI = {10.4230/DagRep.9.3.16}, PUBLISHER = {Schloss Dagstuhl}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, SERIES = {Dagstuhl Reports}, VOLUME = {9}, ISSUE = {3}, PAGES = {16--38}, ADDRESS = {Dagstuhl, Germany}, }
Endnote
%0 Conference Proceedings %E Egger, Bernhard %E Smith, William %E Theobalt, Christian %E Vetter, Thomas %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T 3D Morphable Models : %G eng %U http://hdl.handle.net/21.11116/0000-0005-7DF9-7 %U urn:nbn:de:0030-drops-112894 %R 10.4230/DagRep.9.3.16 %I Schloss Dagstuhl %D 2019 %B Dagstuhl Seminar 17201 "3D Morphable Models" %Z date of event: - %C Dagstuhl, Germany %S Dagstuhl Reports %V 9 %P 16 - 38 %U http://drops.dagstuhl.de/opus/volltexte/2019/11289/
Egger, B., Smith, W.A.P., Tewari, A., et al. 2019b. 3D Morphable Face Models -- Past, Present and Future. http://arxiv.org/abs/1909.01815.
(arXiv: 1909.01815)
Abstract
In this paper, we provide a detailed survey of 3D Morphable Face Models over the 20 years since they were first proposed. The challenges in building and applying these models, namely capture, modeling, image formation, and image analysis, are still active research topics, and we review the state-of-the-art in each of these areas. We also look ahead, identifying unsolved challenges, proposing directions for future research and highlighting the broad range of current and future applications.
Export
BibTeX
@online{Egger_arXIv1909.01815, TITLE = {{3D} Morphable Face Models -- Past, Present and Future}, AUTHOR = {Egger, Bernhard and Smith, William A. P. and Tewari, Ayush and Wuhrer, Stefanie and Zollh{\"o}fer, Michael and Beeler, Thabo and Bernard, Florian and Bolkart, Timo and Kortylewski, Adam and Romdhani, Sami and Theobalt, Christian and Blanz, Volker and Vetter, Thomas}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1909.01815}, EPRINT = {1909.01815}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {In this paper, we provide a detailed survey of 3D Morphable Face Models over the 20 years since they were first proposed. The challenges in building and applying these models, namely capture, modeling, image formation, and image analysis, are still active research topics, and we review the state-of-the-art in each of these areas. We also look ahead, identifying unsolved challenges, proposing directions for future research and highlighting the broad range of current and future applications.}, }
Endnote
%0 Report %A Egger, Bernhard %A Smith, William A. P. %A Tewari, Ayush %A Wuhrer, Stefanie %A Zollhöfer, Michael %A Beeler, Thabo %A Bernard, Florian %A Bolkart, Timo %A Kortylewski, Adam %A Romdhani, Sami %A Theobalt, Christian %A Blanz, Volker %A Vetter, Thomas %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations %T 3D Morphable Face Models -- Past, Present and Future : %G eng %U http://hdl.handle.net/21.11116/0000-0005-7D8E-0 %U http://arxiv.org/abs/1909.01815 %D 2019 %X In this paper, we provide a detailed survey of 3D Morphable Face Models over the 20 years since they were first proposed. The challenges in building and applying these models, namely capture, modeling, image formation, and image analysis, are still active research topics, and we review the state-of-the-art in each of these areas. We also look ahead, identifying unsolved challenges, proposing directions for future research and highlighting the broad range of current and future applications. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR,Computer Science, Learning, cs.LG
Elgharib, M., Mallikarjun B R, Tewari, A., et al. 2019. EgoFace: Egocentric Face Performance Capture and Videorealistic Reenactment. http://arxiv.org/abs/1905.10822.
(arXiv: 1905.10822)
Abstract
Face performance capture and reenactment techniques use multiple cameras and sensors, positioned at a distance from the face or mounted on heavy wearable devices. This limits their applications in mobile and outdoor environments. We present EgoFace, a radically new lightweight setup for face performance capture and front-view videorealistic reenactment using a single egocentric RGB camera. Our lightweight setup allows operations in uncontrolled environments, and lends itself to telepresence applications such as video-conferencing from dynamic environments. The input image is projected into a low dimensional latent space of the facial expression parameters. Through careful adversarial training of the parameter-space synthetic rendering, a videorealistic animation is produced. Our problem is challenging as the human visual system is sensitive to the smallest face irregularities that could occur in the final results. This sensitivity is even stronger for video results. Our solution is trained in a pre-processing stage, through a supervised manner without manual annotations. EgoFace captures a wide variety of facial expressions, including mouth movements and asymmetrical expressions. It works under varying illuminations, background, movements, handles people from different ethnicities and can operate in real time.
Export
BibTeX
@online{Elgharib_arXiv1905.10822, TITLE = {{EgoFace}: Egocentric Face Performance Capture and Videorealistic Reenactment}, AUTHOR = {Elgharib, Mohamed and Mallikarjun B R, and Tewari, Ayush and Kim, Hyeongwoo and Liu, Wentao and Seidel, Hans-Peter and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1905.10822}, EPRINT = {1905.10822}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Face performance capture and reenactment techniques use multiple cameras and sensors, positioned at a distance from the face or mounted on heavy wearable devices. This limits their applications in mobile and outdoor environments. We present EgoFace, a radically new lightweight setup for face performance capture and front-view videorealistic reenactment using a single egocentric RGB camera. Our lightweight setup allows operations in uncontrolled environments, and lends itself to telepresence applications such as video-conferencing from dynamic environments. The input image is projected into a low dimensional latent space of the facial expression parameters. Through careful adversarial training of the parameter-space synthetic rendering, a videorealistic animation is produced. Our problem is challenging as the human visual system is sensitive to the smallest face irregularities that could occur in the final results. This sensitivity is even stronger for video results. Our solution is trained in a pre-processing stage, through a supervised manner without manual annotations. EgoFace captures a wide variety of facial expressions, including mouth movements and asymmetrical expressions. It works under varying illuminations, background, movements, handles people from different ethnicities and can operate in real time.}, }
Endnote
%0 Report %A Elgharib, Mohamed %A Mallikarjun B R, %A Tewari, Ayush %A Kim, Hyeongwoo %A Liu, Wentao %A Seidel, Hans-Peter %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T EgoFace: Egocentric Face Performance Capture and Videorealistic Reenactment : %G eng %U http://hdl.handle.net/21.11116/0000-0003-F1E6-9 %U http://arxiv.org/abs/1905.10822 %D 2019 %X Face performance capture and reenactment techniques use multiple cameras and sensors, positioned at a distance from the face or mounted on heavy wearable devices. This limits their applications in mobile and outdoor environments. We present EgoFace, a radically new lightweight setup for face performance capture and front-view videorealistic reenactment using a single egocentric RGB camera. Our lightweight setup allows operations in uncontrolled environments, and lends itself to telepresence applications such as video-conferencing from dynamic environments. The input image is projected into a low dimensional latent space of the facial expression parameters. Through careful adversarial training of the parameter-space synthetic rendering, a videorealistic animation is produced. Our problem is challenging as the human visual system is sensitive to the smallest face irregularities that could occur in the final results. This sensitivity is even stronger for video results. Our solution is trained in a pre-processing stage, through a supervised manner without manual annotations. EgoFace captures a wide variety of facial expressions, including mouth movements and asymmetrical expressions. It works under varying illuminations, background, movements, handles people from different ethnicities and can operate in real time. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR %U http://gvv.mpi-inf.mpg.de/projects/EgoFace/
Fried, O., Tewari, A., Zollhöfer, M., et al. 2019a. Text-based Editing of Talking-head Video. ACM Transactions on Graphics (Proc. ACM SIGGRAPH 2019)38, 4.
Export
BibTeX
@article{Fried_SIGGRAPH2019, TITLE = {Text-based Editing of Talking-head Video}, AUTHOR = {Fried, Ohad and Tewari, Ayush and Zollh{\"o}fer, Michael and Finkelstein, Adam and Shechtman, Eli and Goldman, Dan B. and Genova, Kyle and Jin, Zeyu and Theobalt, Christian and Agrawala, Maneesh}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3306346.3323028}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH)}, VOLUME = {38}, NUMBER = {4}, EID = {68}, BOOKTITLE = {Proceedings of ACM SIGGRAPH 2019}, }
Endnote
%0 Journal Article %A Fried, Ohad %A Tewari, Ayush %A Zollhöfer, Michael %A Finkelstein, Adam %A Shechtman, Eli %A Goldman, Dan B. %A Genova, Kyle %A Jin, Zeyu %A Theobalt, Christian %A Agrawala, Maneesh %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Text-based Editing of Talking-head Video : %G eng %U http://hdl.handle.net/21.11116/0000-0004-8458-4 %R 10.1145/3306346.3323028 %7 2019 %D 2019 %J ACM Transactions on Graphics %V 38 %N 4 %Z sequence number: 68 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH 2019 %O ACM SIGGRAPH 2019 Los Angeles, CA, USA, 28 July - 1 August
Fried, O., Tewari, A., Zollhöfer, M., et al. 2019b. Text-based Editing of Talking-head Video. http://arxiv.org/abs/1906.01524.
(arXiv: 1906.01524)
Abstract
Editing talking-head video to change the speech content or to remove filler words is challenging. We propose a novel method to edit talking-head video based on its transcript to produce a realistic output video in which the dialogue of the speaker has been modified, while maintaining a seamless audio-visual flow (i.e. no jump cuts). Our method automatically annotates an input talking-head video with phonemes, visemes, 3D face pose and geometry, reflectance, expression and scene illumination per frame. To edit a video, the user has to only edit the transcript, and an optimization strategy then chooses segments of the input corpus as base material. The annotated parameters corresponding to the selected segments are seamlessly stitched together and used to produce an intermediate video representation in which the lower half of the face is rendered with a parametric face model. Finally, a recurrent video generation network transforms this representation to a photorealistic video that matches the edited transcript. We demonstrate a large variety of edits, such as the addition, removal, and alteration of words, as well as convincing language translation and full sentence synthesis.
Export
BibTeX
@online{Fried_arXiv1906.01524, TITLE = {Text-based Editing of Talking-head Video}, AUTHOR = {Fried, Ohad and Tewari, Ayush and Zollh{\"o}fer, Michael and Finkelstein, Adam and Shechtman, Eli and Goldman, Dan B. and Genova, Kyle and Jin, Zeyu and Theobalt, Christian and Agrawala, Maneesh}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1906.01524}, EPRINT = {1906.01524}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Editing talking-head video to change the speech content or to remove filler words is challenging. We propose a novel method to edit talking-head video based on its transcript to produce a realistic output video in which the dialogue of the speaker has been modified, while maintaining a seamless audio-visual flow (i.e. no jump cuts). Our method automatically annotates an input talking-head video with phonemes, visemes, 3D face pose and geometry, reflectance, expression and scene illumination per frame. To edit a video, the user has to only edit the transcript, and an optimization strategy then chooses segments of the input corpus as base material. The annotated parameters corresponding to the selected segments are seamlessly stitched together and used to produce an intermediate video representation in which the lower half of the face is rendered with a parametric face model. Finally, a recurrent video generation network transforms this representation to a photorealistic video that matches the edited transcript. We demonstrate a large variety of edits, such as the addition, removal, and alteration of words, as well as convincing language translation and full sentence synthesis.}, }
Endnote
%0 Report %A Fried, Ohad %A Tewari, Ayush %A Zollhöfer, Michael %A Finkelstein, Adam %A Shechtman, Eli %A Goldman, Dan B. %A Genova, Kyle %A Jin, Zeyu %A Theobalt, Christian %A Agrawala, Maneesh %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Text-based Editing of Talking-head Video : %G eng %U http://hdl.handle.net/21.11116/0000-0003-FE15-8 %U http://arxiv.org/abs/1906.01524 %D 2019 %X Editing talking-head video to change the speech content or to remove filler words is challenging. We propose a novel method to edit talking-head video based on its transcript to produce a realistic output video in which the dialogue of the speaker has been modified, while maintaining a seamless audio-visual flow (i.e. no jump cuts). Our method automatically annotates an input talking-head video with phonemes, visemes, 3D face pose and geometry, reflectance, expression and scene illumination per frame. To edit a video, the user has to only edit the transcript, and an optimization strategy then chooses segments of the input corpus as base material. The annotated parameters corresponding to the selected segments are seamlessly stitched together and used to produce an intermediate video representation in which the lower half of the face is rendered with a parametric face model. Finally, a recurrent video generation network transforms this representation to a photorealistic video that matches the edited transcript. We demonstrate a large variety of edits, such as the addition, removal, and alteration of words, as well as convincing language translation and full sentence synthesis. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR,Computer Science, Learning, cs.LG
Golyanik, V., Jonas, A., Stricker, D., and Theobalt, C. 2019a. Intrinsic Dynamic Shape Prior for Fast, Sequential and Dense Non-Rigid Structure from Motion with Detection of Temporally-Disjoint Rigidity. http://arxiv.org/abs/1909.02468.
(arXiv: 1909.02468)
Abstract
While dense non-rigid structure from motion (NRSfM) has been extensively studied from the perspective of the reconstructability problem over the recent years, almost no attempts have been undertaken to bring it into the practical realm. The reasons for the slow dissemination are the severe ill-posedness, high sensitivity to motion and deformation cues and the difficulty to obtain reliable point tracks in the vast majority of practical scenarios. To fill this gap, we propose a hybrid approach that extracts prior shape knowledge from an input sequence with NRSfM and uses it as a dynamic shape prior for sequential surface recovery in scenarios with recurrence. Our Dynamic Shape Prior Reconstruction (DSPR) method can be combined with existing dense NRSfM techniques while its energy functional is optimised with stochastic gradient descent at real-time rates for new incoming point tracks. The proposed versatile framework with a new core NRSfM approach outperforms several other methods in the ability to handle inaccurate and noisy point tracks, provided we have access to a representative (in terms of the deformation variety) image sequence. Comprehensive experiments highlight convergence properties and the accuracy of DSPR under different disturbing effects. We also perform a joint study of tracking and reconstruction and show applications to shape compression and heart reconstruction under occlusions. We achieve state-of-the-art metrics (accuracy and compression ratios) in different scenarios.
Export
BibTeX
@online{Golyanik_arXiv1909.02468, TITLE = {Intrinsic Dynamic Shape Prior for Fast, Sequential and Dense Non-Rigid Structure from Motion with Detection of Temporally-Disjoint Rigidity}, AUTHOR = {Golyanik, Vladislav and Jonas, Andr{\'e} and Stricker, Didier and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1909.02468}, EPRINT = {1909.02468}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {While dense non-rigid structure from motion (NRSfM) has been extensively studied from the perspective of the reconstructability problem over the recent years, almost no attempts have been undertaken to bring it into the practical realm. The reasons for the slow dissemination are the severe ill-posedness, high sensitivity to motion and deformation cues and the difficulty to obtain reliable point tracks in the vast majority of practical scenarios. To fill this gap, we propose a hybrid approach that extracts prior shape knowledge from an input sequence with NRSfM and uses it as a dynamic shape prior for sequential surface recovery in scenarios with recurrence. Our Dynamic Shape Prior Reconstruction (DSPR) method can be combined with existing dense NRSfM techniques while its energy functional is optimised with stochastic gradient descent at real-time rates for new incoming point tracks. The proposed versatile framework with a new core NRSfM approach outperforms several other methods in the ability to handle inaccurate and noisy point tracks, provided we have access to a representative (in terms of the deformation variety) image sequence. Comprehensive experiments highlight convergence properties and the accuracy of DSPR under different disturbing effects. We also perform a joint study of tracking and reconstruction and show applications to shape compression and heart reconstruction under occlusions. We achieve state-of-the-art metrics (accuracy and compression ratios) in different scenarios.}, }
Endnote
%0 Report %A Golyanik, Vladislav %A Jonas, André %A Stricker, Didier %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Intrinsic Dynamic Shape Prior for Fast, Sequential and Dense Non-Rigid Structure from Motion with Detection of Temporally-Disjoint Rigidity : %G eng %U http://hdl.handle.net/21.11116/0000-0005-7D9A-2 %U http://arxiv.org/abs/1909.02468 %D 2019 %X While dense non-rigid structure from motion (NRSfM) has been extensively studied from the perspective of the reconstructability problem over the recent years, almost no attempts have been undertaken to bring it into the practical realm. The reasons for the slow dissemination are the severe ill-posedness, high sensitivity to motion and deformation cues and the difficulty to obtain reliable point tracks in the vast majority of practical scenarios. To fill this gap, we propose a hybrid approach that extracts prior shape knowledge from an input sequence with NRSfM and uses it as a dynamic shape prior for sequential surface recovery in scenarios with recurrence. Our Dynamic Shape Prior Reconstruction (DSPR) method can be combined with existing dense NRSfM techniques while its energy functional is optimised with stochastic gradient descent at real-time rates for new incoming point tracks. The proposed versatile framework with a new core NRSfM approach outperforms several other methods in the ability to handle inaccurate and noisy point tracks, provided we have access to a representative (in terms of the deformation variety) image sequence. Comprehensive experiments highlight convergence properties and the accuracy of DSPR under different disturbing effects. We also perform a joint study of tracking and reconstruction and show applications to shape compression and heart reconstruction under occlusions. We achieve state-of-the-art metrics (accuracy and compression ratios) in different scenarios. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Golyanik, V. and Theobalt, C. 2019a. Optimising for Scale in Globally Multiply-Linked Gravitational Point Set Registration Leads to Singularities. International Conference on 3D Vision, IEEE.
Export
BibTeX
@inproceedings{Golyanik_3DV2019, TITLE = {Optimising for Scale in Globally Multiply-Linked Gravitational Point Set Registration Leads to Singularities}, AUTHOR = {Golyanik, Vladislav and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-3131-3}, DOI = {10.1109/3DV.2019.00027}, PUBLISHER = {IEEE}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, BOOKTITLE = {International Conference on 3D Vision}, PAGES = {164--172}, ADDRESS = {Qu{\'e}bec City, Canada}, }
Endnote
%0 Conference Proceedings %A Golyanik, Vladislav %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Optimising for Scale in Globally Multiply-Linked Gravitational Point Set Registration Leads to Singularities : %G eng %U http://hdl.handle.net/21.11116/0000-0005-7B4E-B %R 10.1109/3DV.2019.00027 %D 2019 %B International Conference on 3D Vision %Z date of event: 2019-09-16 - 2019-09-19 %C Québec City, Canada %B International Conference on 3D Vision %P 164 - 172 %I IEEE %@ 978-1-7281-3131-3
Golyanik, V., Jonas, A., and Stricker, D. 2019b. Consolidating Segmentwise Non-Rigid Structure from Motion. Proceedings of the Sixteenth International Conference on Machine Vision Applications (MVA 2019), IEEE.
Export
BibTeX
@inproceedings{Golyanik_MVA2019, TITLE = {Consolidating Segmentwise Non-Rigid Structure from Motion}, AUTHOR = {Golyanik, Vladislav and Jonas, Andr{\'e} and Stricker, Didier}, LANGUAGE = {eng}, ISBN = {978-4-901122-18-4}, DOI = {10.23919/MVA.2019.8757909}, PUBLISHER = {IEEE}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the Sixteenth International Conference on Machine Vision Applications (MVA 2019)}, PAGES = {1--6}, ADDRESS = {Tokyo, Japan}, }
Endnote
%0 Conference Proceedings %A Golyanik, Vladislav %A Jonas, André %A Stricker, Didier %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Consolidating Segmentwise Non-Rigid Structure from Motion : %G eng %U http://hdl.handle.net/21.11116/0000-0005-9823-8 %R 10.23919/MVA.2019.8757909 %D 2019 %B Sixteenth International Conference on Machine Vision Applications %Z date of event: 2019-05-27 - 2019-05-31 %C Tokyo, Japan %B Proceedings of the Sixteenth International Conference on Machine Vision Applications %P 1 - 6 %I IEEE %@ 978-4-901122-18-4
Golyanik, V., Theobalt, C., and Stricker, D. 2019c. Accelerated Gravitational Point Set Alignment with Altered Physical Laws. International Conference on Computer Vision (ICCV 2019), IEEE.
Export
BibTeX
@inproceedings{BHRGA2019, TITLE = {Accelerated Gravitational Point Set Alignment with Altered Physical Laws}, AUTHOR = {Golyanik, Vladislav and Theobalt, Christian and Stricker, Didier}, LANGUAGE = {eng}, ISBN = {978-1-7281-4803-8}, DOI = {10.1109/ICCV.2019.00217}, PUBLISHER = {IEEE}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, BOOKTITLE = {International Conference on Computer Vision (ICCV 2019)}, PAGES = {2080--2089}, ADDRESS = {Seoul, Korea}, }
Endnote
%0 Conference Proceedings %A Golyanik, Vladislav %A Theobalt, Christian %A Stricker, Didier %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Accelerated Gravitational Point Set Alignment with Altered Physical Laws : %G eng %U http://hdl.handle.net/21.11116/0000-0005-9473-2 %R 10.1109/ICCV.2019.00217 %D 2019 %B International Conference on Computer Vision %Z date of event: 2019-10-27 - 2019-11-02 %C Seoul, Korea %B International Conference on Computer Vision %P 2080 - 2089 %I IEEE %@ 978-1-7281-4803-8
Golyanik, V. and Theobalt, C. 2019b. A Quantum Computational Approach to Correspondence Problems on Point Sets. http://arxiv.org/abs/1912.12296.
(arXiv: 1912.12296)
Abstract
Modern adiabatic quantum computers (AQC) are already used to solve difficult combinatorial optimisation problems in various domains of science. Currently, only a few applications of AQC in computer vision have been demonstrated. We review modern AQC and derive the first algorithm for transformation estimation and point set alignment suitable for AQC. Our algorithm has a subquadratic computational complexity of state preparation. We perform a systematic experimental analysis of the proposed approach and show several examples of successful point set alignment by simulated sampling. With this paper, we hope to boost the research on AQC for computer vision.
Export
BibTeX
@online{Golyanik_arXiv1912.12296, TITLE = {A Quantum Computational Approach to Correspondence Problems on Point Sets}, AUTHOR = {Golyanik, Vladislav and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1912.12296}, EPRINT = {1912.12296}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Modern adiabatic quantum computers (AQC) are already used to solve difficult combinatorial optimisation problems in various domains of science. Currently, only a few applications of AQC in computer vision have been demonstrated. We review modern AQC and derive the first algorithm for transformation estimation and point set alignment suitable for AQC. Our algorithm has a subquadratic computational complexity of state preparation. We perform a systematic experimental analysis of the proposed approach and show several examples of successful point set alignment by simulated sampling. With this paper, we hope to boost the research on AQC for computer vision.}, }
Endnote
%0 Report %A Golyanik, Vladislav %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T A Quantum Computational Approach to Correspondence Problems on Point Sets : %G eng %U http://hdl.handle.net/21.11116/0000-0005-7DF0-0 %U http://arxiv.org/abs/1912.12296 %D 2019 %X Modern adiabatic quantum computers (AQC) are already used to solve difficult combinatorial optimisation problems in various domains of science. Currently, only a few applications of AQC in computer vision have been demonstrated. We review modern AQC and derive the first algorithm for transformation estimation and point set alignment suitable for AQC. Our algorithm has a subquadratic computational complexity of state preparation. We perform a systematic experimental analysis of the proposed approach and show several examples of successful point set alignment by simulated sampling. With this paper, we hope to boost the research on AQC for computer vision. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,cs.ET,Quantum Physics, quant-ph
Habermann, M., Xu, W., Rohdin, H., Zollhöfer, M., Pons-Moll, G., and Theobalt, C. 2019a. NRST: Non-rigid Surface Tracking from Monocular Video. Pattern Recognition (GCPR 2018), Springer.
Export
BibTeX
@inproceedings{Habermann_GVPR18, TITLE = {{NRST}: {N}on-rigid Surface Tracking from Monocular Video}, AUTHOR = {Habermann, Marc and Xu, Weipeng and Rohdin, Helge and Zollh{\"o}fer, Michael and Pons-Moll, Gerard and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-3-030-12938-5}, DOI = {10.1007/978-3-030-12939-2_23}, PUBLISHER = {Springer}, YEAR = {2018}, MARGINALMARK = {$\bullet$}, DATE = {2019}, BOOKTITLE = {Pattern Recognition (GCPR 2018)}, EDITOR = {Brox, Thomas and Bruhn, Andr{\'e}s and Fritz, Mario}, PAGES = {335--348}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {11269}, ADDRESS = {Stuttgart, Germany}, }
Endnote
%0 Conference Proceedings %A Habermann, Marc %A Xu, Weipeng %A Rohdin, Helge %A Zollhöfer, Michael %A Pons-Moll, Gerard %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T NRST: Non-rigid Surface Tracking from Monocular Video : %G eng %U http://hdl.handle.net/21.11116/0000-0002-B94C-9 %R 10.1007/978-3-030-12939-2_23 %D 2019 %B 40th German Conference on Pattern Recognition %Z date of event: 2018-10-09 - 2018-10-12 %C Stuttgart, Germany %B Pattern Recognition %E Brox, Thomas; Bruhn, Andrés; Fritz, Mario %P 335 - 348 %I Springer %@ 978-3-030-12938-5 %B Lecture Notes in Computer Science %N 11269
Habermann, M., Xu, W., Zollhöfer, M., Pons-Moll, G., and Theobalt, C. 2019b. LiveCap: Real-time Human Performance Capture from Monocular Video. ACM Transactions on Graphics38, 2.
Export
BibTeX
@article{Habermann_TOG19, TITLE = {{LiveCap}: {R}eal-time Human Performance Capture from Monocular Video}, AUTHOR = {Habermann, Marc and Xu, Weipeng and Zollh{\"o}fer, Michael and Pons-Moll, Gerard and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3311970}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {ACM Transactions on Graphics}, VOLUME = {38}, NUMBER = {2}, EID = {14}, }
Endnote
%0 Journal Article %A Habermann, Marc %A Xu, Weipeng %A Zollhöfer, Michael %A Pons-Moll, Gerard %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T LiveCap: Real-time Human Performance Capture from Monocular Video : %G eng %U http://hdl.handle.net/21.11116/0000-0002-B947-E %R 10.1145/3311970 %7 2019 %D 2019 %J ACM Transactions on Graphics %V 38 %N 2 %Z sequence number: 14 %I ACM %C New York, NY %@ false
Habibie, I., Xu, W., Mehta, D., Pons-Moll, G., and Theobalt, C. 2019a. In the Wild Human Pose Estimation using Explicit 2D Features and Intermediate 3D Representations. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2019), IEEE.
Export
BibTeX
@inproceedings{habibieCVPR19, TITLE = {In the Wild Human Pose Estimation using Explicit {2D} Features and Intermediate {3D} Representations}, AUTHOR = {Habibie, Ikhsanul and Xu, Weipeng and Mehta, Dushyant and Pons-Moll, Gerard and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-3293-8}, DOI = {10.1109/CVPR.2019.01116}, PUBLISHER = {IEEE}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2019)}, PAGES = {10897--10906}, ADDRESS = {Long Beach, CA, USA}, }
Endnote
%0 Conference Proceedings %A Habibie, Ikhsanul %A Xu, Weipeng %A Mehta, Dushyant %A Pons-Moll, Gerard %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T In the Wild Human Pose Estimation using Explicit 2D Features and Intermediate 3D Representations : %G eng %U http://hdl.handle.net/21.11116/0000-0003-6520-7 %R 10.1109/CVPR.2019.01116 %D 2019 %B 32nd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2019-06-15 - 2019-06-20 %C Long Beach, CA, USA %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 10897 - 10906 %I IEEE %@ 978-1-7281-3293-8
Habibie, I., Xu, W., Mehta, D., Pons-Moll, G., and Theobalt, C. 2019b. In the Wild Human Pose Estimation Using Explicit 2D Features and Intermediate 3D Representations. http://arxiv.org/abs/1904.03289.
(arXiv: 1904.03289)
Abstract
Convolutional Neural Network based approaches for monocular 3D human pose estimation usually require a large amount of training images with 3D pose annotations. While it is feasible to provide 2D joint annotations for large corpora of in-the-wild images with humans, providing accurate 3D annotations to such in-the-wild corpora is hardly feasible in practice. Most existing 3D labelled data sets are either synthetically created or feature in-studio images. 3D pose estimation algorithms trained on such data often have limited ability to generalize to real world scene diversity. We therefore propose a new deep learning based method for monocular 3D human pose estimation that shows high accuracy and generalizes better to in-the-wild scenes. It has a network architecture that comprises a new disentangled hidden space encoding of explicit 2D and 3D features, and uses supervision by a new learned projection model from predicted 3D pose. Our algorithm can be jointly trained on image data with 3D labels and image data with only 2D labels. It achieves state-of-the-art accuracy on challenging in-the-wild data.
Export
BibTeX
@online{Habibie_arXiv1904.03289, TITLE = {In the Wild Human Pose Estimation Using Explicit {2D} Features and Intermediate {3D} Representations}, AUTHOR = {Habibie, Ikhsanul and Xu, Weipeng and Mehta, Dushyant and Pons-Moll, Gerard and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1904.03289}, EPRINT = {1904.03289}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Convolutional Neural Network based approaches for monocular 3D human pose estimation usually require a large amount of training images with 3D pose annotations. While it is feasible to provide 2D joint annotations for large corpora of in-the-wild images with humans, providing accurate 3D annotations to such in-the-wild corpora is hardly feasible in practice. Most existing 3D labelled data sets are either synthetically created or feature in-studio images. 3D pose estimation algorithms trained on such data often have limited ability to generalize to real world scene diversity. We therefore propose a new deep learning based method for monocular 3D human pose estimation that shows high accuracy and generalizes better to in-the-wild scenes. It has a network architecture that comprises a new disentangled hidden space encoding of explicit 2D and 3D features, and uses supervision by a new learned projection model from predicted 3D pose. Our algorithm can be jointly trained on image data with 3D labels and image data with only 2D labels. It achieves state-of-the-art accuracy on challenging in-the-wild data.}, }
Endnote
%0 Report %A Habibie, Ikhsanul %A Xu, Weipeng %A Mehta, Dushyant %A Pons-Moll, Gerard %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T In the Wild Human Pose Estimation Using Explicit 2D Features and Intermediate 3D Representations : %G eng %U http://hdl.handle.net/21.11116/0000-0003-F76E-C %U http://arxiv.org/abs/1904.03289 %D 2019 %X Convolutional Neural Network based approaches for monocular 3D human pose estimation usually require a large amount of training images with 3D pose annotations. While it is feasible to provide 2D joint annotations for large corpora of in-the-wild images with humans, providing accurate 3D annotations to such in-the-wild corpora is hardly feasible in practice. Most existing 3D labelled data sets are either synthetically created or feature in-studio images. 3D pose estimation algorithms trained on such data often have limited ability to generalize to real world scene diversity. We therefore propose a new deep learning based method for monocular 3D human pose estimation that shows high accuracy and generalizes better to in-the-wild scenes. It has a network architecture that comprises a new disentangled hidden space encoding of explicit 2D and 3D features, and uses supervision by a new learned projection model from predicted 3D pose. Our algorithm can be jointly trained on image data with 3D labels and image data with only 2D labels. It achieves state-of-the-art accuracy on challenging in-the-wild data. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Hladký, J., Seidel, H.-P., and Steinberger, M. 2019a. The Camera Offset Space: Real-time Potentially Visible Set Computations for Streaming Rendering. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2019)38, 6.
Export
BibTeX
@article{Hladky_SA2019, TITLE = {The Camera Offset Space: Real-time Potentially Visible Set Computations for Streaming Rendering}, AUTHOR = {Hladk{\'y}, Jozef and Seidel, Hans-Peter and Steinberger, Markus}, LANGUAGE = {eng}, ISSN = {0730-0301}, ISBN = {978-1-4503-6008-1}, DOI = {10.1145/3355089.3356530}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {38}, NUMBER = {6}, EID = {231}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2019}, }
Endnote
%0 Journal Article %A Hladký, Jozef %A Seidel, Hans-Peter %A Steinberger, Markus %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T The Camera Offset Space: Real-time Potentially Visible Set Computations for Streaming Rendering : %G eng %U http://hdl.handle.net/21.11116/0000-0005-4E4F-D %R 10.1145/3355089.3356530 %7 2019 %D 2019 %J ACM Transactions on Graphics %V 38 %N 6 %Z sequence number: 231 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2019 %O ACM SIGGRAPH Asia 2019 Brisbane, Australia, 17 - 20 November 2019 SA'19 SA 2019 %@ 978-1-4503-6008-1
Hladký, J., Seidel, H.-P., and Steinberger, M. 2019b. Tessellated Shading Streaming. Computer Graphics Forum (Proc. Eurographics Symposium on Rendering 2019)38, 4.
Export
BibTeX
@article{Hladky_EGSR2019, TITLE = {Tessellated Shading Streaming}, AUTHOR = {Hladk{\'y}, Jozef and Seidel, Hans-Peter and Steinberger, Markus}, LANGUAGE = {eng}, ISSN = {0167-7055}, URL = {https://diglib.eg.org/handle/10.1111/cgf13780}, DOI = {10.1111/cgf.13780}, PUBLISHER = {Wiley-Blackwell}, ADDRESS = {Oxford}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {Computer Graphics Forum (Proc. Eurographics Symposium on Rendering)}, VOLUME = {38}, NUMBER = {4}, PAGES = {171--182}, BOOKTITLE = {Eurographics Symposium on Rendering 2019}, EDITOR = {Boubekeur, Tamy and Sen, Pradeep}, }
Endnote
%0 Journal Article %A Hladký, Jozef %A Seidel, Hans-Peter %A Steinberger, Markus %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Tessellated Shading Streaming : %G eng %U http://hdl.handle.net/21.11116/0000-0004-4897-1 %R 10.1111/cgf.13780 %U https://diglib.eg.org/handle/10.1111/cgf13780 %7 2019 %D 2019 %J Computer Graphics Forum %V 38 %N 4 %& 171 %P 171 - 182 %I Wiley-Blackwell %C Oxford %@ false %B Eurographics Symposium on Rendering 2019 %O Eurographics Symposium on Rendering 2019 EGSR 2019 Strasbourg, France, July 10 - 12, 2109
Jiang, C., Tang, C., Seidel, H.-P., Chen, R., and Wonka, P. 2019. Computational Design of Lightweight Trusses. http://arxiv.org/abs/1901.05637.
(arXiv: 1901.05637)
Abstract
Trusses are load-carrying light-weight structures consisting of bars connected at joints ubiquitously applied in a variety of engineering scenarios. Designing optimal trusses that satisfy functional specifications with a minimal amount of material has interested both theoreticians and practitioners for more than a century. In this paper, we introduce two main ideas to improve upon the state of the art. First, we formulate an alternating linear programming problem for geometry optimization. Second, we introduce two sets of complementary topological operations, including a novel subdivision scheme for global topology refinement inspired by Michell's famed theoretical study. Based on these two ideas, we build an efficient computational framework for the design of lightweight trusses. \AD{We illustrate our framework with a variety of functional specifications and extensions. We show that our method achieves trusses with smaller volumes and is over two orders of magnitude faster compared with recent state-of-the-art approaches.
Export
BibTeX
@online{Jiang_arXIv1901.05637, TITLE = {Computational Design of Lightweight Trusses}, AUTHOR = {Jiang, Caigui and Tang, Chengcheng and Seidel, Hans-Peter and Chen, Renjie and Wonka, Peter}, URL = {http://arxiv.org/abs/1901.05637}, EPRINT = {1901.05637}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Trusses are load-carrying light-weight structures consisting of bars connected at joints ubiquitously applied in a variety of engineering scenarios. Designing optimal trusses that satisfy functional specifications with a minimal amount of material has interested both theoreticians and practitioners for more than a century. In this paper, we introduce two main ideas to improve upon the state of the art. First, we formulate an alternating linear programming problem for geometry optimization. Second, we introduce two sets of complementary topological operations, including a novel subdivision scheme for global topology refinement inspired by Michell's famed theoretical study. Based on these two ideas, we build an efficient computational framework for the design of lightweight trusses. \AD{We illustrate our framework with a variety of functional specifications and extensions. We show that our method achieves trusses with smaller volumes and is over two orders of magnitude faster compared with recent state-of-the-art approaches.}, }
Endnote
%0 Report %A Jiang, Caigui %A Tang, Chengcheng %A Seidel, Hans-Peter %A Chen, Renjie %A Wonka, Peter %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Computational Design of Lightweight Trusses : %U http://hdl.handle.net/21.11116/0000-0003-A7E9-A %U http://arxiv.org/abs/1901.05637 %D 2019 %X Trusses are load-carrying light-weight structures consisting of bars connected at joints ubiquitously applied in a variety of engineering scenarios. Designing optimal trusses that satisfy functional specifications with a minimal amount of material has interested both theoreticians and practitioners for more than a century. In this paper, we introduce two main ideas to improve upon the state of the art. First, we formulate an alternating linear programming problem for geometry optimization. Second, we introduce two sets of complementary topological operations, including a novel subdivision scheme for global topology refinement inspired by Michell's famed theoretical study. Based on these two ideas, we build an efficient computational framework for the design of lightweight trusses. \AD{We illustrate our framework with a variety of functional specifications and extensions. We show that our method achieves trusses with smaller volumes and is over two orders of magnitude faster compared with recent state-of-the-art approaches. %K Computer Science, Graphics, cs.GR
Kim, H., Elgharib, M., Zollhöfer, M., et al. 2019. Neural Style-preserving Visual Dubbing. ACM Transactions on Graphics38, 6.
Export
BibTeX
@article{Kim2019, TITLE = {Neural Style-preserving Visual Dubbing}, AUTHOR = {Kim, Hyeongwoo and Elgharib, Mohamed and Zollh{\"o}fer, Michael and Seidel, Hans-Peter and Beeler, Thabo and Richardt, Christian and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3355089.3356500}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {ACM Transactions on Graphics}, VOLUME = {38}, NUMBER = {6}, EID = {178}, }
Endnote
%0 Journal Article %A Kim, Hyeongwoo %A Elgharib, Mohamed %A Zollhöfer, Michael %A Seidel, Hans-Peter %A Beeler, Thabo %A Richardt, Christian %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Neural Style-preserving Visual Dubbing : %G eng %U http://hdl.handle.net/21.11116/0000-0005-6AC0-B %R 10.1145/3355089.3356500 %7 2019 %D 2019 %J ACM Transactions on Graphics %V 38 %N 6 %Z sequence number: 178 %I ACM %C New York, NY %@ false
Kim, H. 2019. Learning-based Face Reconstruction and Editing. .
Export
BibTeX
@phdthesis{Kim_2019, TITLE = {Learning-based Face Reconstruction and Editing}, AUTHOR = {Kim, Hyeongwoo}, LANGUAGE = {eng}, DOI = {10.22028/D291-32394}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, }
Endnote
%0 Thesis %A Kim, Hyeongwoo %Y Theobalt, Christian %A referee: Izadi, Shahram %A referee: Richardt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Learning-based Face Reconstruction and Editing : %G eng %U http://hdl.handle.net/21.11116/0000-0007-6FFA-4 %R 10.22028/D291-32394 %I Universität des Saarlandes %C Saarbrücken %D 2019 %P 124 p. %V phd %9 phd %U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/29769
Kovalenko, O., Golyanik, V., Malik, J., Elhayek, A., and Stricker, D. 2019. Structure from Articulated Motion: Accurate and Stable Monocular 3D Reconstruction without Training Data. Sensors19, 20.
Export
BibTeX
@article{Kovalenko2019, TITLE = {Structure from Articulated Motion: {A}ccurate and Stable Monocular {3D} Reconstruction without Training Data}, AUTHOR = {Kovalenko, Onorina and Golyanik, Vladislav and Malik, Jameel and Elhayek, Ahmed and Stricker, Didier}, LANGUAGE = {eng}, ISSN = {1424-8220}, DOI = {10.3390/s19204603}, PUBLISHER = {MDPI}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, JOURNAL = {Sensors}, VOLUME = {19}, NUMBER = {20}, EID = {4603}, }
Endnote
%0 Journal Article %A Kovalenko, Onorina %A Golyanik, Vladislav %A Malik, Jameel %A Elhayek, Ahmed %A Stricker, Didier %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T Structure from Articulated Motion: Accurate and Stable Monocular 3D Reconstruction without Training Data : %G eng %U http://hdl.handle.net/21.11116/0000-0005-5CB5-8 %R 10.3390/s19204603 %7 2019 %D 2019 %J Sensors %V 19 %N 20 %Z sequence number: 4603 %I MDPI %@ false
Leimkühler, T., Singh, G., Myszkowski, K., Seidel, H.-P., and Ritschel, T. 2019. Deep Point Correlation Design. ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia 2019)38, 6.
Export
BibTeX
@article{Leimkuehler_SA2019, TITLE = {Deep Point Correlation Design}, AUTHOR = {Leimk{\"u}hler, Thomas and Singh, Gurprit and Myszkowski, Karol and Seidel, Hans-Peter and Ritschel, Tobias}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3355089.3356562}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH Asia)}, VOLUME = {38}, NUMBER = {6}, EID = {226}, BOOKTITLE = {Proceedings of ACM SIGGRAPH Asia 2019}, }
Endnote
%0 Journal Article %A Leimkühler, Thomas %A Singh, Gurprit %A Myszkowski, Karol %A Seidel, Hans-Peter %A Ritschel, Tobias %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Deep Point Correlation Design : %G eng %U http://hdl.handle.net/21.11116/0000-0004-9BF3-B %R 10.1145/3355089.3356562 %7 2019 %D 2019 %J ACM Transactions on Graphics %V 38 %N 6 %Z sequence number: 226 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH Asia 2019 %O ACM SIGGRAPH Asia 2019 Brisbane, Australia, 17 - 20 November 2019 SA'19 SA 2019
Leimkühler, T. 2019. Artificial Intelligence for Efficient Image-based View Synthesis. .
Abstract
Synthesizing novel views from image data is a widely investigated topic in both computer graphics and computer vision, and has many applications like stereo or multi-view rendering for virtual reality, light field reconstruction, and image post-processing. While image-based approaches have the advantage of reduced computational load compared to classical model-based rendering, efficiency is still a major concern. This thesis demonstrates how concepts and tools from artificial intelligence can be used to increase the efficiency of image-based view synthesis algorithms. In particular it is shown how machine learning can help to generate point patterns useful for a variety of computer graphics tasks, how path planning can guide image warping, how sparsity-enforcing optimization can lead to significant speedups in interactive distribution effect rendering, and how probabilistic inference can be used to perform real-time 2D-to-3D conversion.
Export
BibTeX
@phdthesis{Leimphd2019, TITLE = {Artificial Intelligence for Efficient Image-based View Synthesis}, AUTHOR = {Leimk{\"u}hler, Thomas}, LANGUAGE = {eng}, DOI = {10.22028/D291-28379}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, ABSTRACT = {Synthesizing novel views from image data is a widely investigated topic in both computer graphics and computer vision, and has many applications like stereo or multi-view rendering for virtual reality, light field reconstruction, and image post-processing. While image-based approaches have the advantage of reduced computational load compared to classical model-based rendering, efficiency is still a major concern. This thesis demonstrates how concepts and tools from artificial intelligence can be used to increase the efficiency of image-based view synthesis algorithms. In particular it is shown how machine learning can help to generate point patterns useful for a variety of computer graphics tasks, how path planning can guide image warping, how sparsity-enforcing optimization can lead to significant speedups in interactive distribution effect rendering, and how probabilistic inference can be used to perform real-time 2D-to-3D conversion.}, }
Endnote
%0 Thesis %A Leimkühler, Thomas %Y Seidel, Hans-Peter %A referee: Ritschel, Tobias %A referee: Lensch, Hendrik %A referee: Drettakis, George %+ Computer Graphics, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Artificial Intelligence for Efficient Image-based View Synthesis : %G eng %U http://hdl.handle.net/21.11116/0000-0004-A589-7 %R 10.22028/D291-28379 %I Universität des Saarlandes %C Saarbrücken %D 2019 %P 136 p. %V phd %9 phd %X Synthesizing novel views from image data is a widely investigated topic in both computer graphics and computer vision, and has many applications like stereo or multi-view rendering for virtual reality, light field reconstruction, and image post-processing. While image-based approaches have the advantage of reduced computational load compared to classical model-based rendering, efficiency is still a major concern. This thesis demonstrates how concepts and tools from artificial intelligence can be used to increase the efficiency of image-based view synthesis algorithms. In particular it is shown how machine learning can help to generate point patterns useful for a variety of computer graphics tasks, how path planning can guide image warping, how sparsity-enforcing optimization can lead to significant speedups in interactive distribution effect rendering, and how probabilistic inference can be used to perform real-time 2D-to-3D conversion. %U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/27664
Liu, L., Xu, W., Zollhöfer, M., et al. 2019. Neural Rendering and Reenactment of Human Actor Videos. ACM Transactions on Graphics38, 5.
Export
BibTeX
@article{Liu_2019, TITLE = {Neural Rendering and Reenactment of Human Actor Videos}, AUTHOR = {Liu, Lingjie and Xu, Weipeng and Zollh{\"o}fer, Michael and Kim, Hyeongwoo and Bernard, Florian and Habermann, Marc and Wang, Wenping and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3333002}, PUBLISHER = {Association for Computing Machinery}, ADDRESS = {New York, NY}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics}, VOLUME = {38}, NUMBER = {5}, EID = {139}, }
Endnote
%0 Journal Article %A Liu, Lingjie %A Xu, Weipeng %A Zollhöfer, Michael %A Kim, Hyeongwoo %A Bernard, Florian %A Habermann, Marc %A Wang, Wenping %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Neural Rendering and Reenactment of Human Actor Videos : %G eng %U http://hdl.handle.net/21.11116/0000-0005-7B28-5 %R 10.1145/3333002 %7 2019 %D 2019 %J ACM Transactions on Graphics %V 38 %N 5 %Z sequence number: 139 %I Association for Computing Machinery %C New York, NY %@ false
Mehta, D., Kim, K.I., and Theobalt, C. 2019a. On Implicit Filter Level Sparsity in Convolutional Neural Networks. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2019), IEEE.
Export
BibTeX
@inproceedings{Mehta_CVPR2019, TITLE = {On Implicit Filter Level Sparsity in Convolutional Neural Networks}, AUTHOR = {Mehta, Dushyant and Kim, Kwang In and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-3293-8}, DOI = {10.1109/CVPR.2019.00061}, PUBLISHER = {IEEE}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2019)}, PAGES = {520--528}, ADDRESS = {Long Beach, CA, USA}, }
Endnote
%0 Conference Proceedings %A Mehta, Dushyant %A Kim, Kwang In %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T On Implicit Filter Level Sparsity in Convolutional Neural Networks : %G eng %U http://hdl.handle.net/21.11116/0000-0005-7CB8-1 %R 10.1109/CVPR.2019.00061 %D 2019 %B 32nd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2019-06-16 - 2019-06-20 %C Long Beach, CA, USA %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 520 - 528 %I IEEE %@ 978-1-7281-3293-8
Mehta, D., Kim, K.I., and Theobalt, C. 2019b. Implicit Filter Sparsification In Convolutional Neural Networks. http://arxiv.org/abs/1905.04967.
(arXiv: 1905.04967)
Abstract
We show implicit filter level sparsity manifests in convolutional neural networks (CNNs) which employ Batch Normalization and ReLU activation, and are trained with adaptive gradient descent techniques and L2 regularization or weight decay. Through an extensive empirical study (Mehta et al., 2019) we hypothesize the mechanism behind the sparsification process, and find surprising links to certain filter sparsification heuristics proposed in literature. Emergence of, and the subsequent pruning of selective features is observed to be one of the contributing mechanisms, leading to feature sparsity at par or better than certain explicit sparsification / pruning approaches. In this workshop article we summarize our findings, and point out corollaries of selective-featurepenalization which could also be employed as heuristics for filter pruning
Export
BibTeX
@online{Mehta_arXiv1905.04967, TITLE = {Implicit Filter Sparsification In Convolutional Neural Networks}, AUTHOR = {Mehta, Dushyant and Kim, Kwang In and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1905.04967}, EPRINT = {1905.04967}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We show implicit filter level sparsity manifests in convolutional neural networks (CNNs) which employ Batch Normalization and ReLU activation, and are trained with adaptive gradient descent techniques and L2 regularization or weight decay. Through an extensive empirical study (Mehta et al., 2019) we hypothesize the mechanism behind the sparsification process, and find surprising links to certain filter sparsification heuristics proposed in literature. Emergence of, and the subsequent pruning of selective features is observed to be one of the contributing mechanisms, leading to feature sparsity at par or better than certain explicit sparsification / pruning approaches. In this workshop article we summarize our findings, and point out corollaries of selective-featurepenalization which could also be employed as heuristics for filter pruning}, }
Endnote
%0 Report %A Mehta, Dushyant %A Kim, Kwang In %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Implicit Filter Sparsification In Convolutional Neural Networks : %G eng %U http://hdl.handle.net/21.11116/0000-0003-FE07-8 %U http://arxiv.org/abs/1905.04967 %D 2019 %X We show implicit filter level sparsity manifests in convolutional neural networks (CNNs) which employ Batch Normalization and ReLU activation, and are trained with adaptive gradient descent techniques and L2 regularization or weight decay. Through an extensive empirical study (Mehta et al., 2019) we hypothesize the mechanism behind the sparsification process, and find surprising links to certain filter sparsification heuristics proposed in literature. Emergence of, and the subsequent pruning of selective features is observed to be one of the contributing mechanisms, leading to feature sparsity at par or better than certain explicit sparsification / pruning approaches. In this workshop article we summarize our findings, and point out corollaries of selective-featurepenalization which could also be employed as heuristics for filter pruning %K Computer Science, Learning, cs.LG,Computer Science, Computer Vision and Pattern Recognition, cs.CV,Statistics, Machine Learning, stat.ML
Mehta, D., Sotnychenko, O., Mueller, F., et al. 2019c. XNect: Real-time Multi-person 3D Human Pose Estimation with a Single RGB Camera. http://arxiv.org/abs/1907.00837.
(arXiv: 1907.00837)
Abstract
We present a real-time approach for multi-person 3D motion capture at over 30 fps using a single RGB camera. It operates in generic scenes and is robust to difficult occlusions both by other people and objects. Our method operates in subsequent stages. The first stage is a convolutional neural network (CNN) that estimates 2D and 3D pose features along with identity assignments for all visible joints of all individuals. We contribute a new architecture for this CNN, called SelecSLS Net, that uses novel selective long and short range skip connections to improve the information flow allowing for a drastically faster network without compromising accuracy. In the second stage, a fully-connected neural network turns the possibly partial (on account of occlusion) 2D pose and 3D pose features for each subject into a complete 3D pose estimate per individual. The third stage applies space-time skeletal model fitting to the predicted 2D and 3D pose per subject to further reconcile the 2D and 3D pose, and enforce temporal coherence. Our method returns the full skeletal pose in joint angles for each subject. This is a further key distinction from previous work that neither extracted global body positions nor joint angle results of a coherent skeleton in real time for multi-person scenes. The proposed system runs on consumer hardware at a previously unseen speed of more than 30 fps given 512x320 images as input while achieving state-of-the-art accuracy, which we will demonstrate on a range of challenging real-world scenes.
Export
BibTeX
@online{Mehta_arXiv1907.00837, TITLE = {{XNect}: Real-time Multi-person {3D} Human Pose Estimation with a Single {RGB} Camera}, AUTHOR = {Mehta, Dushyant and Sotnychenko, Oleksandr and Mueller, Franziska and Xu, Weipeng and Elgharib, Mohamed and Fua, Pascal and Seidel, Hans-Peter and Rhodin, Helge and Pons-Moll, Gerard and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1907.00837}, EPRINT = {1907.00837}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present a real-time approach for multi-person 3D motion capture at over 30 fps using a single RGB camera. It operates in generic scenes and is robust to difficult occlusions both by other people and objects. Our method operates in subsequent stages. The first stage is a convolutional neural network (CNN) that estimates 2D and 3D pose features along with identity assignments for all visible joints of all individuals. We contribute a new architecture for this CNN, called SelecSLS Net, that uses novel selective long and short range skip connections to improve the information flow allowing for a drastically faster network without compromising accuracy. In the second stage, a fully-connected neural network turns the possibly partial (on account of occlusion) 2D pose and 3D pose features for each subject into a complete 3D pose estimate per individual. The third stage applies space-time skeletal model fitting to the predicted 2D and 3D pose per subject to further reconcile the 2D and 3D pose, and enforce temporal coherence. Our method returns the full skeletal pose in joint angles for each subject. This is a further key distinction from previous work that neither extracted global body positions nor joint angle results of a coherent skeleton in real time for multi-person scenes. The proposed system runs on consumer hardware at a previously unseen speed of more than 30 fps given 512x320 images as input while achieving state-of-the-art accuracy, which we will demonstrate on a range of challenging real-world scenes.}, }
Endnote
%0 Report %A Mehta, Dushyant %A Sotnychenko, Oleksandr %A Mueller, Franziska %A Xu, Weipeng %A Elgharib, Mohamed %A Fua, Pascal %A Seidel, Hans-Peter %A Rhodin, Helge %A Pons-Moll, Gerard %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T XNect: Real-time Multi-person 3D Human Pose Estimation with a Single RGB Camera : %G eng %U http://hdl.handle.net/21.11116/0000-0003-FE21-A %U http://arxiv.org/abs/1907.00837 %D 2019 %X We present a real-time approach for multi-person 3D motion capture at over 30 fps using a single RGB camera. It operates in generic scenes and is robust to difficult occlusions both by other people and objects. Our method operates in subsequent stages. The first stage is a convolutional neural network (CNN) that estimates 2D and 3D pose features along with identity assignments for all visible joints of all individuals. We contribute a new architecture for this CNN, called SelecSLS Net, that uses novel selective long and short range skip connections to improve the information flow allowing for a drastically faster network without compromising accuracy. In the second stage, a fully-connected neural network turns the possibly partial (on account of occlusion) 2D pose and 3D pose features for each subject into a complete 3D pose estimate per individual. The third stage applies space-time skeletal model fitting to the predicted 2D and 3D pose per subject to further reconcile the 2D and 3D pose, and enforce temporal coherence. Our method returns the full skeletal pose in joint angles for each subject. This is a further key distinction from previous work that neither extracted global body positions nor joint angle results of a coherent skeleton in real time for multi-person scenes. The proposed system runs on consumer hardware at a previously unseen speed of more than 30 fps given 512x320 images as input while achieving state-of-the-art accuracy, which we will demonstrate on a range of challenging real-world scenes. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR
Mehta, D., Sotnychenko, O., Mueller, F., et al. 2019d. XNect Demo (v2): Real-time Multi-person 3D Human Pose Estimation with a Single RGB Camera. CVPR 2019 Demonstrations.
Export
BibTeX
@inproceedings{XNectDemoV2_CVPR2019, TITLE = {Demo of {VNect} (v2): {R}eal-time {3D} Human Pose Estimation with a Single {RGB} Camera}, AUTHOR = {Mehta, Dushyant and Sotnychenko, Oleksandr and Mueller, Franziska and Xu, Weipeng and Seidel, Hans-Peter and Fua, Pascal and Elgharib, Mohamed and Rhodin, Helge and Pons-Moll, Gerard and Theobalt, Christian}, LANGUAGE = {eng}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {CVPR 2019 Demonstrations}, ADDRESS = {Long Beach, CA, USA}, }
Endnote
%0 Conference Proceedings %A Mehta, Dushyant %A Sotnychenko, Oleksandr %A Mueller, Franziska %A Xu, Weipeng %A Seidel, Hans-Peter %A Fua, Pascal %A Elgharib, Mohamed %A Rhodin, Helge %A Pons-Moll, Gerard %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T XNect Demo (v2): Real-time Multi-person 3D Human Pose Estimation with a Single RGB Camera : %G eng %U http://hdl.handle.net/21.11116/0000-0004-71DB-6 %D 2019 %B 32nd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2019-06-16 - 2019-06-20 %C Long Beach, CA, USA %B CVPR 2019 Demonstrations %U http://gvv.mpi-inf.mpg.de/projects/XNectDemoV2/http://gvv.mpi-inf.mpg.de/projects/XNectDemoV2/
Meka, A., Hane, C., Pandey, R., et al. 2019. Deep Reflectance Fields High-Quality Facial Reflectance Field Inference from Color Gradient Illumination. ACM Transactions on Graphics (Proc. ACM SIGGRAPH 2019)38, 4.
Export
BibTeX
@article{Meka_SIGGRAPH2019, TITLE = {Deep Reflectance Fields High-Quality Facial Reflectance Field Inference from Color Gradient Illumination}, AUTHOR = {Meka, Abhimitra and Hane, Christian and Pandey, Rohit and Zollh{\"o}fer, Michael and Fanello, Sean and Fyffe, Graham and Kowdle, Adarsh and Yu, Xueming and Busch, Jay and Dour-Garian, Jason and Denny, Peter and Bouaziz, Sofien and Lincoln, Peter and Whalen, Matt and Harvey, Geoff and Taylor, Jonathan and Izadi, Shahram and Tagliasacchi, Andrea and Debevec, Paul and Theobalt, Christian and Valentin, Julien and Rhemann, Christoph}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3306346.3323027}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH)}, VOLUME = {38}, NUMBER = {4}, EID = {77}, BOOKTITLE = {Proceedings of ACM SIGGRAPH 2019}, }
Endnote
%0 Journal Article %A Meka, Abhimitra %A Hane, Christian %A Pandey, Rohit %A Zollhöfer, Michael %A Fanello, Sean %A Fyffe, Graham %A Kowdle, Adarsh %A Yu, Xueming %A Busch, Jay %A Dour-Garian, Jason %A Denny, Peter %A Bouaziz, Sofien %A Lincoln, Peter %A Whalen, Matt %A Harvey, Geoff %A Taylor, Jonathan %A Izadi, Shahram %A Tagliasacchi, Andrea %A Debevec, Paul %A Theobalt, Christian %A Valentin, Julien %A Rhemann, Christoph %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Deep Reflectance Fields High-Quality Facial Reflectance Field Inference from Color Gradient Illumination : %G eng %U http://hdl.handle.net/21.11116/0000-0004-8453-9 %R 10.1145/3306346.3323027 %7 2019 %D 2019 %J ACM Transactions on Graphics %V 38 %N 4 %Z sequence number: 77 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH 2019 %O ACM SIGGRAPH 2019 Los Angeles, CA, USA, 28 July - 1 August
Mueller, F., Davis, M., Bernard, F., et al. 2019. Real-time Pose and Shape Reconstruction of Two Interacting Hands With a Single Depth Camera. ACM Transactions on Graphics (Proc. ACM SIGGRAPH 2019)38, 4.
Export
BibTeX
@article{MuellerTOG2019, TITLE = {Real-time Pose and Shape Reconstruction of Two Interacting Hands With a Single Depth Camera}, AUTHOR = {Mueller, Franziska and Davis, Micah and Bernard, Florian and Sotnychenko, Oleksandr and Verschoor, Mickeal and Otaduy, Miguel A. and Casas, Dan and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3306346.3322958}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH)}, VOLUME = {38}, NUMBER = {4}, EID = {49}, BOOKTITLE = {Proceedings of ACM SIGGRAPH 2019}, }
Endnote
%0 Journal Article %A Mueller, Franziska %A Davis, Micah %A Bernard, Florian %A Sotnychenko, Oleksandr %A Verschoor, Mickeal %A Otaduy, Miguel A. %A Casas, Dan %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Real-time Pose and Shape Reconstruction of Two Interacting Hands With a Single Depth Camera : %G eng %U http://hdl.handle.net/21.11116/0000-0004-844A-4 %R 10.1145/3306346.3322958 %7 2019 %D 2019 %J ACM Transactions on Graphics %V 38 %N 4 %Z sequence number: 49 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH 2019 %O ACM SIGGRAPH 2019 Los Angeles, CA, USA, 28 July - 1 August
Robertini, N. 2019. Model-based Human Performance Capture in Outdoor Scenes. urn:nbn:de:bsz:291--ds-285887.
Abstract
Technologies for motion and performance capture of real actors have enabled the creation of realisticlooking virtual humans through detail and deformation transfer at the cost of extensive manual work and sophisticated in-studio marker-based systems. This thesis pushes the boundaries of performance capture by proposing automatic algorithms for robust 3D skeleton and detailed surface tracking in less constrained multi-view outdoor scenarios. Contributions include new multi-layered human body representations designed for effective model-based time-consistent reconstruction in complex dynamic environments with varying illumination, from a set of vision cameras. We design dense surface refinement approaches to enable smooth silhouette-free model-to-image alignment, as well as coarse-to-fine tracking techniques to enable joint estimation of skeleton motion and finescale surface deformations in complicated scenarios. High-quality results attained on challenging application scenarios confirm the contributions and show great potential for the automatic creation of personalized 3D virtual humans.
Export
BibTeX
@phdthesis{Robertini_PhD2019, TITLE = {Model-based Human Performance Capture in Outdoor Scenes}, AUTHOR = {Robertini, Nadia}, LANGUAGE = {eng}, URL = {urn:nbn:de:bsz:291--ds-285887}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, ABSTRACT = {Technologies for motion and performance capture of real actors have enabled the creation of realisticlooking virtual humans through detail and deformation transfer at the cost of extensive manual work and sophisticated in-studio marker-based systems. This thesis pushes the boundaries of performance capture by proposing automatic algorithms for robust 3D skeleton and detailed surface tracking in less constrained multi-view outdoor scenarios. Contributions include new multi-layered human body representations designed for effective model-based time-consistent reconstruction in complex dynamic environments with varying illumination, from a set of vision cameras. We design dense surface refinement approaches to enable smooth silhouette-free model-to-image alignment, as well as coarse-to-fine tracking techniques to enable joint estimation of skeleton motion and finescale surface deformations in complicated scenarios. High-quality results attained on challenging application scenarios confirm the contributions and show great potential for the automatic creation of personalized 3D virtual humans.}, }
Endnote
%0 Thesis %A Robertini, Nadia %Y Theobalt, Christian %A referee: Seidel, Hans-Peter %+ Computer Graphics, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Model-based Human Performance Capture in Outdoor Scenes : %G eng %U http://hdl.handle.net/21.11116/0000-0004-9B2E-B %U urn:nbn:de:bsz:291--ds-285887 %F OTHER: hdl:20.500.11880/27667 %I Universität des Saarlandes %C Saarbrücken %D 2019 %P XIX, 136, XI p. %V phd %9 phd %X Technologies for motion and performance capture of real actors have enabled the creation of realisticlooking virtual humans through detail and deformation transfer at the cost of extensive manual work and sophisticated in-studio marker-based systems. This thesis pushes the boundaries of performance capture by proposing automatic algorithms for robust 3D skeleton and detailed surface tracking in less constrained multi-view outdoor scenarios. Contributions include new multi-layered human body representations designed for effective model-based time-consistent reconstruction in complex dynamic environments with varying illumination, from a set of vision cameras. We design dense surface refinement approaches to enable smooth silhouette-free model-to-image alignment, as well as coarse-to-fine tracking techniques to enable joint estimation of skeleton motion and finescale surface deformations in complicated scenarios. High-quality results attained on challenging application scenarios confirm the contributions and show great potential for the automatic creation of personalized 3D virtual humans. %U https://scidok.sulb.uni-saarland.de/handle/20.500.11880/27667
Shekhar, S., Semmo, A., Trapp, M., et al. 2019. Consistent Filtering of Videos and Dense Light-Fields without Optic-Flow. Vision, Modeling and Visualization 2019 (VMV 2019), Eurographics Association.
Export
BibTeX
@inproceedings{Shekhar_VMV2019, TITLE = {Consistent Filtering of Videos and Dense Light-Fields without Optic-Flow}, AUTHOR = {Shekhar, Sumit and Semmo, Amir and Trapp, Matthias and Tursun, Okan Tarhan and Pasewaldt, Sebastian and Myszkowski, Karol and D{\"o}llner, J{\"u}rgen}, LANGUAGE = {eng}, ISBN = {978-3-03868-098-7}, DOI = {10.2312/vmv.20191326}, PUBLISHER = {Eurographics Association}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, BOOKTITLE = {Vision, Modeling and Visualization 2019 (VMV 2019)}, PAGES = {125--134}, ADDRESS = {Rostock, Germany}, }
Endnote
%0 Conference Proceedings %A Shekhar, Sumit %A Semmo, Amir %A Trapp, Matthias %A Tursun, Okan Tarhan %A Pasewaldt, Sebastian %A Myszkowski, Karol %A Döllner, Jürgen %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Consistent Filtering of Videos and Dense Light-Fields without Optic-Flow : %G eng %U http://hdl.handle.net/21.11116/0000-0004-9C10-A %R 10.2312/vmv.20191326 %D 2019 %B 24th International Symposium on Vision, Modeling, and Visualization %Z date of event: 2019-09-30 - 2019-10-02 %C Rostock, Germany %B Vision, Modeling and Visualization 2019 %P 125 - 134 %I Eurographics Association %@ 978-3-03868-098-7
Shimada, S., Golyanik, V., Theobalt, C., and Stricker, D. 2019a. IsMo-GAN: Adversarial Learning for Monocular Non-Rigid 3D Reconstruction. IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW 2019), Computer Vision Foundation.
Export
BibTeX
@inproceedings{Shimada_2019, TITLE = {{IsMo-GAN}: {A}dversarial Learning for Monocular Non-Rigid {3D} Reconstruction}, AUTHOR = {Shimada, Soshi and Golyanik, Vladislav and Theobalt, Christian and Stricker, Didier}, LANGUAGE = {eng}, PUBLISHER = {Computer Vision Foundation}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW 2019)}, ADDRESS = {Long Beach, CA, USA}, }
Endnote
%0 Conference Proceedings %A Shimada, Soshi %A Golyanik, Vladislav %A Theobalt, Christian %A Stricker, Didier %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T IsMo-GAN: Adversarial Learning for Monocular Non-Rigid 3D Reconstruction : %G eng %U http://hdl.handle.net/21.11116/0000-0005-9410-1 %D 2019 %B Photogrammetric Computer Vision Workshop %Z date of event: 2019-06-17 - 2019-06-17 %C Long Beach, CA, USA %B IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops %I Computer Vision Foundation
Shimada, S., Golyanik, V., Theobalt, C., and Stricker, D. 2019b. IsMo-GAN: Adversarial Learning for Monocular Non-Rigid 3D Reconstruction. http://arxiv.org/abs/1904.12144.
(arXiv: 1904.12144)
Abstract
The majority of the existing methods for non-rigid 3D surface regression from monocular 2D images require an object template or point tracks over multiple frames as an input, and are still far from real-time processing rates. In this work, we present the Isometry-Aware Monocular Generative Adversarial Network (IsMo-GAN) - an approach for direct 3D reconstruction from a single image, trained for the deformation model in an adversarial manner on a light-weight synthetic dataset. IsMo-GAN reconstructs surfaces from real images under varying illumination, camera poses, textures and shading at over 250 Hz. In multiple experiments, it consistently outperforms several approaches in the reconstruction accuracy, runtime, generalisation to unknown surfaces and robustness to occlusions. In comparison to the state-of-the-art, we reduce the reconstruction error by 10-30% including the textureless case and our surfaces evince fewer artefacts qualitatively.
Export
BibTeX
@online{Shimada_arXiv1904.12144, TITLE = {{IsMo}-{GAN}: Adversarial Learning for Monocular Non-Rigid {3D} Reconstruction}, AUTHOR = {Shimada, Soshi and Golyanik, Vladislav and Theobalt, Christian and Stricker, Didier}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1904.12144}, EPRINT = {1904.12144}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {The majority of the existing methods for non-rigid 3D surface regression from monocular 2D images require an object template or point tracks over multiple frames as an input, and are still far from real-time processing rates. In this work, we present the Isometry-Aware Monocular Generative Adversarial Network (IsMo-GAN) -- an approach for direct 3D reconstruction from a single image, trained for the deformation model in an adversarial manner on a light-weight synthetic dataset. IsMo-GAN reconstructs surfaces from real images under varying illumination, camera poses, textures and shading at over 250 Hz. In multiple experiments, it consistently outperforms several approaches in the reconstruction accuracy, runtime, generalisation to unknown surfaces and robustness to occlusions. In comparison to the state-of-the-art, we reduce the reconstruction error by 10-30% including the textureless case and our surfaces evince fewer artefacts qualitatively.}, }
Endnote
%0 Report %A Shimada, Soshi %A Golyanik, Vladislav %A Theobalt, Christian %A Stricker, Didier %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T IsMo-GAN: Adversarial Learning for Monocular Non-Rigid 3D Reconstruction : %G eng %U http://hdl.handle.net/21.11116/0000-0003-FE04-B %U http://arxiv.org/abs/1904.12144 %D 2019 %X The majority of the existing methods for non-rigid 3D surface regression from monocular 2D images require an object template or point tracks over multiple frames as an input, and are still far from real-time processing rates. In this work, we present the Isometry-Aware Monocular Generative Adversarial Network (IsMo-GAN) - an approach for direct 3D reconstruction from a single image, trained for the deformation model in an adversarial manner on a light-weight synthetic dataset. IsMo-GAN reconstructs surfaces from real images under varying illumination, camera poses, textures and shading at over 250 Hz. In multiple experiments, it consistently outperforms several approaches in the reconstruction accuracy, runtime, generalisation to unknown surfaces and robustness to occlusions. In comparison to the state-of-the-art, we reduce the reconstruction error by 10-30% including the textureless case and our surfaces evince fewer artefacts qualitatively. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Shimada, S., Golyanik, V., Tretschk, E., Stricker, D., and Theobalt, C. 2019c. DispVoxNets: Non-Rigid Point Set Alignment with Supervised Learning Proxies. International Conference on 3D Vision, IEEE.
Export
BibTeX
@inproceedings{Shimada_3DV2019, TITLE = {{DispVoxNets}: {N}on-Rigid Point Set Alignment with Supervised Learning Proxies}, AUTHOR = {Shimada, Soshi and Golyanik, Vladislav and Tretschk, Edgar and Stricker, Didier and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-3131-3}, DOI = {10.1109/3DV.2019.00013}, PUBLISHER = {IEEE}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, BOOKTITLE = {International Conference on 3D Vision}, PAGES = {27--36}, ADDRESS = {Qu{\'e}bec City, Canada}, }
Endnote
%0 Conference Proceedings %A Shimada, Soshi %A Golyanik, Vladislav %A Tretschk, Edgar %A Stricker, Didier %A Theobalt, Christian %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T DispVoxNets: Non-Rigid Point Set Alignment with Supervised Learning Proxies : %G eng %U http://hdl.handle.net/21.11116/0000-0005-7B48-1 %R 10.1109/3DV.2019.00013 %D 2019 %B International Conference on 3D Vision %Z date of event: 2019-09-16 - 2019-09-19 %C Québec City, Canada %B International Conference on 3D Vision %P 27 - 36 %I IEEE %@ 978-1-7281-3131-3
Shimada, S., Golyanik, V., Tretschk, E., Stricker, D., and Theobalt, C. 2019d. DispVoxNets: Non-Rigid Point Set Alignment with Supervised Learning Proxies. http://arxiv.org/abs/1907.10367.
(arXiv: 1907.10367)
Abstract
We introduce a supervised-learning framework for non-rigid point set alignment of a new kind - Displacements on Voxels Networks (DispVoxNets) - which abstracts away from the point set representation and regresses 3D displacement fields on regularly sampled proxy 3D voxel grids. Thanks to recently released collections of deformable objects with known intra-state correspondences, DispVoxNets learn a deformation model and further priors (e.g., weak point topology preservation) for different object categories such as cloths, human bodies and faces. DispVoxNets cope with large deformations, noise and clustered outliers more robustly than the state-of-the-art. At test time, our approach runs orders of magnitude faster than previous techniques. All properties of DispVoxNets are ascertained numerically and qualitatively in extensive experiments and comparisons to several previous methods.
Export
BibTeX
@online{Shimada_arXiv1907.10367, TITLE = {{DispVoxNets}: {N}on-Rigid Point Set Alignment with Supervised Learning Proxies}, AUTHOR = {Shimada, Soshi and Golyanik, Vladislav and Tretschk, Edgar and Stricker, Didier and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1907.10367}, EPRINT = {1907.10367}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We introduce a supervised-learning framework for non-rigid point set alignment of a new kind -- Displacements on Voxels Networks (DispVoxNets) - which abstracts away from the point set representation and regresses 3D displacement fields on regularly sampled proxy 3D voxel grids. Thanks to recently released collections of deformable objects with known intra-state correspondences, DispVoxNets learn a deformation model and further priors (e.g., weak point topology preservation) for different object categories such as cloths, human bodies and faces. DispVoxNets cope with large deformations, noise and clustered outliers more robustly than the state-of-the-art. At test time, our approach runs orders of magnitude faster than previous techniques. All properties of DispVoxNets are ascertained numerically and qualitatively in extensive experiments and comparisons to several previous methods.}, }
Endnote
%0 Report %A Shimada, Soshi %A Golyanik, Vladislav %A Tretschk, Edgar %A Stricker, Didier %A Theobalt, Christian %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T DispVoxNets: Non-Rigid Point Set Alignment with Supervised Learning Proxies : %G eng %U http://hdl.handle.net/21.11116/0000-0005-7D04-B %U http://arxiv.org/abs/1907.10367 %D 2019 %X We introduce a supervised-learning framework for non-rigid point set alignment of a new kind - Displacements on Voxels Networks (DispVoxNets) - which abstracts away from the point set representation and regresses 3D displacement fields on regularly sampled proxy 3D voxel grids. Thanks to recently released collections of deformable objects with known intra-state correspondences, DispVoxNets learn a deformation model and further priors (e.g., weak point topology preservation) for different object categories such as cloths, human bodies and faces. DispVoxNets cope with large deformations, noise and clustered outliers more robustly than the state-of-the-art. At test time, our approach runs orders of magnitude faster than previous techniques. All properties of DispVoxNets are ascertained numerically and qualitatively in extensive experiments and comparisons to several previous methods. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Computational Geometry, cs.CG
Singh, G., Öztireli, C., Ahmed, A.G.M., et al. 2019. Analysis of Sample Correlations for Monte Carlo Rendering. Computer Graphics Forum (Proc. EUROGRAPHICS 2019)38, 2.
Export
BibTeX
@article{Singh_EG2019STAR, TITLE = {Analysis of Sample Correlations for {Monte Carlo} Rendering}, AUTHOR = {Singh, Gurprit and {\"O}ztireli, Cengiz and Ahmed, Abdalla G.M. and Coeurjolly, David and Subr, Kartic and Ostromoukhov, Victor and Deussen, Oliver and Ramamoorthi, Ravi and Jarosz, Wojciech}, LANGUAGE = {eng}, ISSN = {0167-7055}, DOI = {10.1111/cgf.13653}, PUBLISHER = {Blackwell-Wiley}, ADDRESS = {Oxford}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {Computer Graphics Forum (Proc. EUROGRAPHICS)}, VOLUME = {38}, NUMBER = {2}, PAGES = {473--491}, BOOKTITLE = {EUROGRAPHICS 2019 STAR -- State of The Art Reports}, }
Endnote
%0 Journal Article %A Singh, Gurprit %A Öztireli, Cengiz %A Ahmed, Abdalla G.M. %A Coeurjolly, David %A Subr, Kartic %A Ostromoukhov, Victor %A Deussen, Oliver %A Ramamoorthi, Ravi %A Jarosz, Wojciech %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations %T Analysis of Sample Correlations for Monte Carlo Rendering : %G eng %U http://hdl.handle.net/21.11116/0000-0002-F487-2 %R 10.1111/cgf.13653 %7 2019 %D 2019 %J Computer Graphics Forum %O Computer Graphics Forum : journal of the European Association for Computer Graphics Comput. Graph. Forum %V 38 %N 2 %& 473 %P 473 - 491 %I Blackwell-Wiley %C Oxford %@ false %B EUROGRAPHICS 2019 STAR – State of The Art Reports %O EUROGRAPHICS 2019 EG 2019 The 40th Annual Conference of the European Association for Computer Graphics ; Genova, Italy, May 6-10
Sumin, D., Rittig, T., Babaei, V., et al. 2019. Geometry-Aware Scattering Compensation for 3D Printing. ACM Transactions on Graphics (Proc. ACM SIGGRAPH 2019)38, 4.
Export
BibTeX
@article{SuminRittig2019, TITLE = {Geometry-Aware Scattering Compensation for {3D} Printing}, AUTHOR = {Sumin, Denis and Rittig, Tobias and Babaei, Vahid and Nindel, Thomas and Wilkie, Alexander and Didyk, Piotr and Bickel, Bernd and K{\v r}iv{\'a}nek, Jaroslav and Myszkowski, Karol and Weyrich, Tim}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3306346.3322992}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH)}, VOLUME = {38}, NUMBER = {4}, EID = {111}, BOOKTITLE = {Proceedings of ACM SIGGRAPH 2019}, }
Endnote
%0 Journal Article %A Sumin, Denis %A Rittig, Tobias %A Babaei, Vahid %A Nindel, Thomas %A Wilkie, Alexander %A Didyk, Piotr %A Bickel, Bernd %A Křivánek, Jaroslav %A Myszkowski, Karol %A Weyrich, Tim %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Geometry-Aware Scattering Compensation for 3D Printing : %G eng %U http://hdl.handle.net/21.11116/0000-0003-7D65-0 %R 10.1145/3306346.3322992 %7 2019 %D 2019 %J ACM Transactions on Graphics %V 38 %N 4 %Z sequence number: 111 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH 2019 %O ACM SIGGRAPH 2019 Los Angeles, CA, USA, 28 July - 1 August
Su, Y., Golyanik, V., Minaskan, N., Ali, S.A., and Stricker, D. 2019. A Shape Completion Component for Monocular Non-Rigid SLAM. Adjunct Proceedings of the 2019 IEEE International Symposium on Mixed and Augmented Reality (ISMAR-Adjunct 2019), IEEE.
Export
BibTeX
@inproceedings{Su_ISMAR2019, TITLE = {A Shape Completion Component for Monocular Non-Rigid {SLAM}}, AUTHOR = {Su, Yongzhi and Golyanik, Vladislav and Minaskan, Nareg and Ali, Sk Aziz and Stricker, Didier}, LANGUAGE = {eng}, ISBN = {978-1-7281-4765-9}, DOI = {10.1109/ISMAR-Adjunct.2019.00-18}, PUBLISHER = {IEEE}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Adjunct Proceedings of the 2019 IEEE International Symposium on Mixed and Augmented Reality (ISMAR-Adjunct 2019)}, PAGES = {332--337}, ADDRESS = {Beijing, China}, }
Endnote
%0 Conference Proceedings %A Su, Yongzhi %A Golyanik, Vladislav %A Minaskan, Nareg %A Ali, Sk Aziz %A Stricker, Didier %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T A Shape Completion Component for Monocular Non-Rigid SLAM : %G eng %U http://hdl.handle.net/21.11116/0000-0005-9832-7 %R 10.1109/ISMAR-Adjunct.2019.00-18 %D 2019 %B IEEE International Symposium on Mixed and Augmented Reality %Z date of event: 2019-10-14 - 2019-10-18 %C Beijing, China %B Adjunct Proceedings of the 2019 IEEE International Symposium on Mixed and Augmented Reality %P 332 - 337 %I IEEE %@ 978-1-7281-4765-9
Swoboda, P., Kainmüller, D., Mokarian, A., Theobalt, C., and Bernard, F. 2019. A Convex Relaxation for Multi-Graph Matching. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2019), IEEE.
Export
BibTeX
@inproceedings{SwobodaCVPR2019a, TITLE = {A Convex Relaxation for Multi-Graph Matching}, AUTHOR = {Swoboda, Paul and Kainm{\"u}ller, Dagmar and Mokarian, Ashkan and Theobalt, Christian and Bernard, Florian}, LANGUAGE = {eng}, ISBN = {978-1-7281-3293-8}, DOI = {10.1109/CVPR.2019.01141}, PUBLISHER = {IEEE}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2019)}, PAGES = {11156--11165}, ADDRESS = {Long Beach, CA, USA}, }
Endnote
%0 Conference Proceedings %A Swoboda, Paul %A Kainmüller, Dagmar %A Mokarian, Ashkan %A Theobalt, Christian %A Bernard, Florian %+ Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T A Convex Relaxation for Multi-Graph Matching : %G eng %U http://hdl.handle.net/21.11116/0000-0005-74B8-9 %R 10.1109/CVPR.2019.01141 %D 2019 %B 32nd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2019-06-15 - 2019-06-20 %C Long Beach, CA, USA %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 11156 - 11165 %I IEEE %@ 978-1-7281-3293-8
Tewari, A., Bernard, F., Garrido, P., et al. 2019. FML: Face Model Learning From Videos. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2019), IEEE.
Export
BibTeX
@inproceedings{TewariCVPR2019, TITLE = {{FML}: {F}ace Model Learning From Videos}, AUTHOR = {Tewari, Ayush and Bernard, Florian and Garrido, Pablo and Bharaj, Gaurav and Elgharib, Mohamed and Seidel, Hans-Peter and P{\'e}rez, Patrick and Zollh{\"o}fer, Michael and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-3293-8}, DOI = {10.1109/CVPR.2019.01107}, PUBLISHER = {IEEE}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2019)}, PAGES = {10812--10822}, ADDRESS = {Long Beach, CA, USA}, }
Endnote
%0 Conference Proceedings %A Tewari, Ayush %A Bernard, Florian %A Garrido, Pablo %A Bharaj, Gaurav %A Elgharib, Mohamed %A Seidel, Hans-Peter %A Pérez, Patrick %A Zollhöfer, Michael %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T FML: Face Model Learning From Videos : %G eng %U http://hdl.handle.net/21.11116/0000-0005-7B0C-5 %R 10.1109/CVPR.2019.01107 %D 2019 %B 32nd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2019-06-16 - 2019-06-20 %C Long Beach, CA, USA %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 10812 - 10822 %I IEEE %@ 978-1-7281-3293-8
Thies, J., Elgharib, M., Tewari, A., Theobalt, C., and Nießner, M. 2019a. Neural Voice Puppetry: Audio-driven Facial Reenactment. http://arxiv.org/abs/1912.05566.
(arXiv: 1912.05566)
Abstract
We present Neural Voice Puppetry, a novel approach for audio-driven facial video synthesis. Given an audio sequence of a source person or digital assistant, we generate a photo-realistic output video of a target person that is in sync with the audio of the source input. This audio-driven facial reenactment is driven by a deep neural network that employs a latent 3D face model space. Through the underlying 3D representation, the model inherently learns temporal stability while we leverage neural rendering to generate photo-realistic output frames. Our approach generalizes across different people, allowing us to synthesize videos of a target actor with the voice of any unknown source actor or even synthetic voices that can be generated utilizing standard text-to-speech approaches. Neural Voice Puppetry has a variety of use-cases, including audio-driven video avatars, video dubbing, and text-driven video synthesis of a talking head. We demonstrate the capabilities of our method in a series of audio- and text-based puppetry examples. Our method is not only more general than existing works since we are generic to the input person, but we also show superior visual and lip sync quality compared to photo-realistic audio- and video-driven reenactment techniques.
Export
BibTeX
@online{Thies_arXiv1912.05566, TITLE = {Neural Voice Puppetry: Audio-driven Facial Reenactment}, AUTHOR = {Thies, Justus and Elgharib, Mohamed and Tewari, Ayush and Theobalt, Christian and Nie{\ss}ner, Matthias}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1912.05566}, EPRINT = {1912.05566}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present Neural Voice Puppetry, a novel approach for audio-driven facial video synthesis. Given an audio sequence of a source person or digital assistant, we generate a photo-realistic output video of a target person that is in sync with the audio of the source input. This audio-driven facial reenactment is driven by a deep neural network that employs a latent 3D face model space. Through the underlying 3D representation, the model inherently learns temporal stability while we leverage neural rendering to generate photo-realistic output frames. Our approach generalizes across different people, allowing us to synthesize videos of a target actor with the voice of any unknown source actor or even synthetic voices that can be generated utilizing standard text-to-speech approaches. Neural Voice Puppetry has a variety of use-cases, including audio-driven video avatars, video dubbing, and text-driven video synthesis of a talking head. We demonstrate the capabilities of our method in a series of audio- and text-based puppetry examples. Our method is not only more general than existing works since we are generic to the input person, but we also show superior visual and lip sync quality compared to photo-realistic audio- and video-driven reenactment techniques.}, }
Endnote
%0 Report %A Thies, Justus %A Elgharib, Mohamed %A Tewari, Ayush %A Theobalt, Christian %A Nießner, Matthias %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Neural Voice Puppetry: Audio-driven Facial Reenactment : %G eng %U http://hdl.handle.net/21.11116/0000-0005-7DE3-F %U http://arxiv.org/abs/1912.05566 %D 2019 %X We present Neural Voice Puppetry, a novel approach for audio-driven facial video synthesis. Given an audio sequence of a source person or digital assistant, we generate a photo-realistic output video of a target person that is in sync with the audio of the source input. This audio-driven facial reenactment is driven by a deep neural network that employs a latent 3D face model space. Through the underlying 3D representation, the model inherently learns temporal stability while we leverage neural rendering to generate photo-realistic output frames. Our approach generalizes across different people, allowing us to synthesize videos of a target actor with the voice of any unknown source actor or even synthetic voices that can be generated utilizing standard text-to-speech approaches. Neural Voice Puppetry has a variety of use-cases, including audio-driven video avatars, video dubbing, and text-driven video synthesis of a talking head. We demonstrate the capabilities of our method in a series of audio- and text-based puppetry examples. Our method is not only more general than existing works since we are generic to the input person, but we also show superior visual and lip sync quality compared to photo-realistic audio- and video-driven reenactment techniques. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR
Thies, J., Zollhöfer, M., Stamminger, M., Theobalt, C., and Nießner, M. 2019b. Face2Face: Real-Time Face Capture and Reenactment of RGB Videos. Communications of the ACM62, 1.
Export
BibTeX
@article{thies2019face, TITLE = {{Face2Face}: {R}eal-Time Face Capture and Reenactment of {RGB} Videos}, AUTHOR = {Thies, Justus and Zollh{\"o}fer, Michael and Stamminger, Marc and Theobalt, Christian and Nie{\ss}ner, Matthias}, LANGUAGE = {eng}, ISSN = {0001-0782}, DOI = {10.1145/3292039}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {Communications of the ACM}, VOLUME = {62}, NUMBER = {1}, PAGES = {96--104}, }
Endnote
%0 Journal Article %A Thies, Justus %A Zollhöfer, Michael %A Stamminger, Marc %A Theobalt, Christian %A Nießner, Matthias %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Face2Face: Real-Time Face Capture and Reenactment of RGB Videos : %G eng %U http://hdl.handle.net/21.11116/0000-0002-C0A7-8 %R 10.1145/3292039 %7 2019 %D 2019 %J Communications of the ACM %V 62 %N 1 %& 96 %P 96 - 104 %I ACM %C New York, NY %@ false
Tretschk, E., Tewari, A., Zollhöfer, M., Golyanik, V., and Theobalt, C. 2019. DEMEA: Deep Mesh Autoencoders for Non-Rigidly Deforming Objects. http://arxiv.org/abs/1905.10290.
(arXiv: 1905.10290)
Abstract
Mesh autoencoders are commonly used for dimensionality reduction, sampling and mesh modeling. We propose a general-purpose DEep MEsh Autoencoder (DEMEA) which adds a novel embedded deformation layer to a graph-convolutional mesh autoencoder. The embedded deformation layer (EDL) is a differentiable deformable geometric proxy which explicitly models point displacements of non-rigid deformations in a lower dimensional space and serves as a local rigidity regularizer. DEMEA decouples the parameterization of the deformation from the final mesh resolution since the deformation is defined over a lower dimensional embedded deformation graph. We perform a large-scale study on four different datasets of deformable objects. Reasoning about the local rigidity of meshes using EDL allows us to achieve higher-quality results for highly deformable objects, compared to directly regressing vertex positions. We demonstrate multiple applications of DEMEA, including non-rigid 3D reconstruction from depth and shading cues, non-rigid surface tracking, as well as the transfer of deformations over different meshes.
Export
BibTeX
@online{Tretschk_arXIv1905.10290, TITLE = {{DEMEA}: Deep Mesh Autoencoders for Non-Rigidly Deforming Objects}, AUTHOR = {Tretschk, Edgar and Tewari, Ayush and Zollh{\"o}fer, Michael and Golyanik, Vladislav and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1905.10290}, EPRINT = {1905.10290}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Mesh autoencoders are commonly used for dimensionality reduction, sampling and mesh modeling. We propose a general-purpose DEep MEsh Autoencoder (DEMEA) which adds a novel embedded deformation layer to a graph-convolutional mesh autoencoder. The embedded deformation layer (EDL) is a differentiable deformable geometric proxy which explicitly models point displacements of non-rigid deformations in a lower dimensional space and serves as a local rigidity regularizer. DEMEA decouples the parameterization of the deformation from the final mesh resolution since the deformation is defined over a lower dimensional embedded deformation graph. We perform a large-scale study on four different datasets of deformable objects. Reasoning about the local rigidity of meshes using EDL allows us to achieve higher-quality results for highly deformable objects, compared to directly regressing vertex positions. We demonstrate multiple applications of DEMEA, including non-rigid 3D reconstruction from depth and shading cues, non-rigid surface tracking, as well as the transfer of deformations over different meshes.}, }
Endnote
%0 Report %A Tretschk, Edgar %A Tewari, Ayush %A Zollhöfer, Michael %A Golyanik, Vladislav %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T DEMEA: Deep Mesh Autoencoders for Non-Rigidly Deforming Objects : %G eng %U http://hdl.handle.net/21.11116/0000-0003-FE0C-3 %U http://arxiv.org/abs/1905.10290 %D 2019 %X Mesh autoencoders are commonly used for dimensionality reduction, sampling and mesh modeling. We propose a general-purpose DEep MEsh Autoencoder (DEMEA) which adds a novel embedded deformation layer to a graph-convolutional mesh autoencoder. The embedded deformation layer (EDL) is a differentiable deformable geometric proxy which explicitly models point displacements of non-rigid deformations in a lower dimensional space and serves as a local rigidity regularizer. DEMEA decouples the parameterization of the deformation from the final mesh resolution since the deformation is defined over a lower dimensional embedded deformation graph. We perform a large-scale study on four different datasets of deformable objects. Reasoning about the local rigidity of meshes using EDL allows us to achieve higher-quality results for highly deformable objects, compared to directly regressing vertex positions. We demonstrate multiple applications of DEMEA, including non-rigid 3D reconstruction from depth and shading cues, non-rigid surface tracking, as well as the transfer of deformations over different meshes. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR
Tursun, O.T., Arabadzhiyska, E., Wernikowski, M., et al. 2019. Luminance-Contrast-Aware Foveated Rendering. ACM Transactions on Graphics (Proc. ACM SIGGRAPH 2019)38, 4.
Export
BibTeX
@article{Tursun2019Luminance, TITLE = {Luminance-Contrast-Aware Foveated Rendering}, AUTHOR = {Tursun, Okan Tarhan and Arabadzhiyska, Elena and Wernikowski, Marek and Mantiuk, Rados{\l}aw and Seidel, Hans-Peter and Myszkowski, Karol and Didyk, Piotr}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3306346.3322985}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH)}, VOLUME = {38}, NUMBER = {4}, EID = {98}, BOOKTITLE = {Proceedings of ACM SIGGRAPH 2019}, }
Endnote
%0 Journal Article %A Tursun, Okan Tarhan %A Arabadzhiyska, Elena %A Wernikowski, Marek %A Mantiuk, Radosław %A Seidel, Hans-Peter %A Myszkowski, Karol %A Didyk, Piotr %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Luminance-Contrast-Aware Foveated Rendering : %G eng %U http://hdl.handle.net/21.11116/0000-0003-75D5-9 %R 10.1145/3306346.3322985 %7 2019 %D 2019 %J ACM Transactions on Graphics %V 38 %N 4 %Z sequence number: 98 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH 2019 %O ACM SIGGRAPH 2019 Los Angeles, CA, USA, 28 July - 1 August
Winter, M., Mlakar, D., Zayer, R., Seidel, H.-P., and Steinberger, M. 2019. Adaptive Sparse Matrix-Matrix Multiplication on the GPU. PPoPP’19, 24th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, ACM.
Export
BibTeX
@inproceedings{PPOPP:2019:ASPMM, TITLE = {Adaptive Sparse Matrix-Matrix Multiplication on the {GPU}}, AUTHOR = {Winter, Martin and Mlakar, Daniel and Zayer, Rhaleb and Seidel, Hans-Peter and Steinberger, Markus}, LANGUAGE = {eng}, ISBN = {978-1-4503-6225-2}, DOI = {10.1145/3293883.3295701}, PUBLISHER = {ACM}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, BOOKTITLE = {PPoPP'19, 24th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming}, PAGES = {68--81}, ADDRESS = {Washington, DC, USA}, }
Endnote
%0 Conference Proceedings %A Winter, Martin %A Mlakar, Daniel %A Zayer, Rhaleb %A Seidel, Hans-Peter %A Steinberger, Markus %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Adaptive Sparse Matrix-Matrix Multiplication on the GPU : %G eng %U http://hdl.handle.net/21.11116/0000-0002-EFE9-B %R 10.1145/3293883.3295701 %D 2019 %B 24th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming %Z date of event: 2019-02-16 - 2019-02-20 %C Washington, DC, USA %B PPoPP'19 %P 68 - 81 %I ACM %@ 978-1-4503-6225-2
Wolski, K., Giunchi,, D., Kinuwaki, S., et al. 2019. Selecting Texture Resolution Using a Task-specific Visibility Metric. Computer Graphics Forum (Proc. Pacific Graphics 2019)38, 7.
Export
BibTeX
@article{Wolski_PG2019, TITLE = {Selecting Texture Resolution Using a Task-specific Visibility Metric}, AUTHOR = {Wolski, Krzysztof and Giunchi,, Daniele and Kinuwaki, Shinichi and Didyk, Piotr and Myszkowski, Karol and Mantiuk, Rafa{\l} K. and Anthony, Steed}, LANGUAGE = {eng}, ISSN = {1467-8659}, DOI = {10.1111/cgf.13871}, PUBLISHER = {Wiley-Blackwell}, ADDRESS = {Oxford, UK}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {Computer Graphics Forum (Proc. Pacific Graphics)}, VOLUME = {38}, NUMBER = {7}, PAGES = {685--696}, BOOKTITLE = {27th Annual International Conference on Computer Graphics and Applications (Pacific Graphics 2019)}, }
Endnote
%0 Journal Article %A Wolski, Krzysztof %A Giunchi,, Daniele %A Kinuwaki, Shinichi %A Didyk, Piotr %A Myszkowski, Karol %A Mantiuk, Rafał K. %A Anthony, Steed %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Selecting Texture Resolution Using a Task-specific Visibility Metric : %G eng %U http://hdl.handle.net/21.11116/0000-0004-9BB3-3 %R 10.1111/cgf.13871 %7 2019 %D 2019 %J Computer Graphics Forum %V 38 %N 7 %& 685 %P 685 - 696 %I Wiley-Blackwell %C Oxford, UK %@ false %B 27th Annual International Conference on Computer Graphics and Applications %O Pacific Graphics 2019 PG 2019 Seoul, October 14-17, 2019
Xu, L., Xu, W., Golyanik, V., Habermann, M., Fang, L., and Theobalt, C. 2019a. EventCap: Monocular 3D Capture of High-Speed Human Motions using an Event Camera. http://arxiv.org/abs/1908.11505.
(arXiv: 1908.11505)
Abstract
The high frame rate is a critical requirement for capturing fast human motions. In this setting, existing markerless image-based methods are constrained by the lighting requirement, the high data bandwidth and the consequent high computation overhead. In this paper, we propose EventCap --- the first approach for 3D capturing of high-speed human motions using a single event camera. Our method combines model-based optimization and CNN-based human pose detection to capture high-frequency motion details and to reduce the drifting in the tracking. As a result, we can capture fast motions at millisecond resolution with significantly higher data efficiency than using high frame rate videos. Experiments on our new event-based fast human motion dataset demonstrate the effectiveness and accuracy of our method, as well as its robustness to challenging lighting conditions.
Export
BibTeX
@online{Xu_arXiv1908.11505, TITLE = {{EventCap}: Monocular {3D} Capture of High-Speed Human Motions using an Event Camera}, AUTHOR = {Xu, Lan and Xu, Weipeng and Golyanik, Vladislav and Habermann, Marc and Fang, Lu and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1908.11505}, EPRINT = {1908.11505}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {The high frame rate is a critical requirement for capturing fast human motions. In this setting, existing markerless image-based methods are constrained by the lighting requirement, the high data bandwidth and the consequent high computation overhead. In this paper, we propose EventCap --- the first approach for 3D capturing of high-speed human motions using a single event camera. Our method combines model-based optimization and CNN-based human pose detection to capture high-frequency motion details and to reduce the drifting in the tracking. As a result, we can capture fast motions at millisecond resolution with significantly higher data efficiency than using high frame rate videos. Experiments on our new event-based fast human motion dataset demonstrate the effectiveness and accuracy of our method, as well as its robustness to challenging lighting conditions.}, }
Endnote
%0 Report %A Xu, Lan %A Xu, Weipeng %A Golyanik, Vladislav %A Habermann, Marc %A Fang, Lu %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T EventCap: Monocular 3D Capture of High-Speed Human Motions using an Event Camera : %G eng %U http://hdl.handle.net/21.11116/0000-0005-7D7B-6 %U http://arxiv.org/abs/1908.11505 %D 2019 %X The high frame rate is a critical requirement for capturing fast human motions. In this setting, existing markerless image-based methods are constrained by the lighting requirement, the high data bandwidth and the consequent high computation overhead. In this paper, we propose EventCap --- the first approach for 3D capturing of high-speed human motions using a single event camera. Our method combines model-based optimization and CNN-based human pose detection to capture high-frequency motion details and to reduce the drifting in the tracking. As a result, we can capture fast motions at millisecond resolution with significantly higher data efficiency than using high frame rate videos. Experiments on our new event-based fast human motion dataset demonstrate the effectiveness and accuracy of our method, as well as its robustness to challenging lighting conditions. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR
Xu, W., Chatterjee, A., Zollhöfer, M., et al. 2019b. Mo2Cap2: Real-time Mobile 3D Motion Capture with a Cap-mounted Fisheye Camera. IEEE Transactions on Visualization and Computer Graphics (Proc. IEEE VR 2019)25, 5.
Export
BibTeX
@article{Xu2019Mo2Cap2, TITLE = {{Mo2Cap2}: Real-time Mobile {3D} Motion Capture with a Cap-mounted Fisheye Camera}, AUTHOR = {Xu, Weipeng and Chatterjee, Avishek and Zollh{\"o}fer, Michael and Rhodin, Helge and Fua, Pascal and Seidel, Hans-Peter and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {1077-2626}, DOI = {10.1109/TVCG.2019.2898650}, PUBLISHER = {IEEE}, ADDRESS = {Piscataway, NJ}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {IEEE Transactions on Visualization and Computer Graphics (Proc. IEEE VR)}, VOLUME = {25}, NUMBER = {5}, PAGES = {2093--2101}, BOOKTITLE = {Selected Proceedings IEEE Virtual Reality 2019 (IEEE VR 2019)}, }
Endnote
%0 Journal Article %A Xu, Weipeng %A Chatterjee, Avishek %A Zollhöfer, Michael %A Rhodin, Helge %A Fua, Pascal %A Seidel, Hans-Peter %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Mo2Cap2: Real-time Mobile 3D Motion Capture with a Cap-mounted Fisheye Camera : %G eng %U http://hdl.handle.net/21.11116/0000-0002-F1DB-7 %R 10.1109/TVCG.2019.2898650 %7 2019 %D 2019 %J IEEE Transactions on Visualization and Computer Graphics %V 25 %N 5 %& 2093 %P 2093 - 2101 %I IEEE %C Piscataway, NJ %@ false %B Selected Proceedings IEEE Virtual Reality 2019 %O IEEE VR 2019 Osaka, Japan, March 23rd - 27th
Yenamandra, T., Bernard, F., Wang, J., Mueller, F., and Theobalt, C. 2019a. Convex Optimisation for Inverse Kinematics. http://arxiv.org/abs/1910.11016.
(arXiv: 1910.11016)
Abstract
We consider the problem of inverse kinematics (IK), where one wants to find the parameters of a given kinematic skeleton that best explain a set of observed 3D joint locations. The kinematic skeleton has a tree structure, where each node is a joint that has an associated geometric transformation that is propagated to all its child nodes. The IK problem has various applications in vision and graphics, for example for tracking or reconstructing articulated objects, such as human hands or bodies. Most commonly, the IK problem is tackled using local optimisation methods. A major downside of these approaches is that, due to the non-convex nature of the problem, such methods are prone to converge to unwanted local optima and therefore require a good initialisation. In this paper we propose a convex optimisation approach for the IK problem based on semidefinite programming, which admits a polynomial-time algorithm that globally solves (a relaxation of) the IK problem. Experimentally, we demonstrate that the proposed method significantly outperforms local optimisation methods using different real-world skeletons.
Export
BibTeX
@online{Yenamandra_arXiv1910.11016, TITLE = {Convex Optimisation for Inverse Kinematics}, AUTHOR = {Yenamandra, Tarum and Bernard, Florian and Wang, Jiayi and Mueller, Franziska and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1910.11016}, EPRINT = {1910.11016}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We consider the problem of inverse kinematics (IK), where one wants to find the parameters of a given kinematic skeleton that best explain a set of observed 3D joint locations. The kinematic skeleton has a tree structure, where each node is a joint that has an associated geometric transformation that is propagated to all its child nodes. The IK problem has various applications in vision and graphics, for example for tracking or reconstructing articulated objects, such as human hands or bodies. Most commonly, the IK problem is tackled using local optimisation methods. A major downside of these approaches is that, due to the non-convex nature of the problem, such methods are prone to converge to unwanted local optima and therefore require a good initialisation. In this paper we propose a convex optimisation approach for the IK problem based on semidefinite programming, which admits a polynomial-time algorithm that globally solves (a relaxation of) the IK problem. Experimentally, we demonstrate that the proposed method significantly outperforms local optimisation methods using different real-world skeletons.}, }
Endnote
%0 Report %A Yenamandra, Tarum %A Bernard, Florian %A Wang, Jiayi %A Mueller, Franziska %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Convex Optimisation for Inverse Kinematics : %G eng %U http://hdl.handle.net/21.11116/0000-0005-7DA8-2 %U http://arxiv.org/abs/1910.11016 %D 2019 %X We consider the problem of inverse kinematics (IK), where one wants to find the parameters of a given kinematic skeleton that best explain a set of observed 3D joint locations. The kinematic skeleton has a tree structure, where each node is a joint that has an associated geometric transformation that is propagated to all its child nodes. The IK problem has various applications in vision and graphics, for example for tracking or reconstructing articulated objects, such as human hands or bodies. Most commonly, the IK problem is tackled using local optimisation methods. A major downside of these approaches is that, due to the non-convex nature of the problem, such methods are prone to converge to unwanted local optima and therefore require a good initialisation. In this paper we propose a convex optimisation approach for the IK problem based on semidefinite programming, which admits a polynomial-time algorithm that globally solves (a relaxation of) the IK problem. Experimentally, we demonstrate that the proposed method significantly outperforms local optimisation methods using different real-world skeletons. %K Computer Science, Learning, cs.LG,Computer Science, Computer Vision and Pattern Recognition, cs.CV,Statistics, Machine Learning, stat.ML
Yenamandra, T., Bernard, F., Wang, J., Mueller, F., and Theobalt, C. 2019b. Convex Optimisation for Inverse Kinematics. International Conference on 3D Vision, IEEE.
Export
BibTeX
@inproceedings{Yenamandra_3DV2019, TITLE = {Convex Optimisation for Inverse Kinematics}, AUTHOR = {Yenamandra, Tarum and Bernard, Florian and Wang, Jiayi and Mueller, Franziska and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-7281-3131-3}, DOI = {10.1109/3DV.2019.00043}, PUBLISHER = {IEEE}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, BOOKTITLE = {International Conference on 3D Vision}, PAGES = {318--327}, ADDRESS = {Qu{\'e}bec City, Canada}, }
Endnote
%0 Conference Proceedings %A Yenamandra, Tarum %A Bernard, Florian %A Wang, Jiayi %A Mueller, Franziska %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Convex Optimisation for Inverse Kinematics : %G eng %U http://hdl.handle.net/21.11116/0000-0005-7B63-2 %R 10.1109/3DV.2019.00043 %D 2019 %B International Conference on 3D Vision %Z date of event: 2019-09-16 - 2019-09-19 %C Québec City, Canada %B International Conference on 3D Vision %P 318 - 327 %I IEEE %@ 978-1-7281-3131-3
Ye, N., Wolski, K., and Mantiuk, R.K. 2019. Predicting Visible Image Differences under Varying Display Brightness and Viewing Distance. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2019), IEEE.
Export
BibTeX
@inproceedings{Ye19, TITLE = {Predicting Visible Image Differences under Varying Display Brightness and Viewing Distance}, AUTHOR = {Ye, Nanyang and Wolski, Krzysztof and Mantiuk, Rafa{\l} K.}, LANGUAGE = {eng}, ISBN = {978-1-7281-3293-8}, DOI = {10.1109/CVPR.2019.00558}, PUBLISHER = {IEEE}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2019)}, PAGES = {5429--5437}, ADDRESS = {Long Beach, CA, USA}, }
Endnote
%0 Conference Proceedings %A Ye, Nanyang %A Wolski, Krzysztof %A Mantiuk, Rafał K. %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Predicting Visible Image Differences under Varying Display Brightness and Viewing Distance : %G eng %U http://hdl.handle.net/21.11116/0000-0003-2748-1 %R 10.1109/CVPR.2019.00558 %D 2019 %B 32nd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2019-06-16 - 2019-06-20 %C Long Beach, CA, USA %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 5429 - 5437 %I IEEE %@ 978-1-7281-3293-8
Yu, H., Bemana, M., Wernikowski, M., et al. 2019a. A Perception-driven Hybrid Decomposition for Multi-layer Accommodative Displays. IEEE Transactions on Visualization and Computer Graphics (Proc. IEEE VR 2019)25, 5.
Export
BibTeX
@article{Yu_VR2019, TITLE = {A Perception-driven Hybrid Decomposition for Multi-layer Accommodative Displays}, AUTHOR = {Yu, Hyeonseung and Bemana, Mojtaba and Wernikowski, Marek and Chwesiuk, Micha{\l} and Tursun, Okan Tarhan and Singh, Gurprit and Myszkowski, Karol and Mantiuk, Rados{\l}aw and Seidel, Hans-Peter and Didyk, Piotr}, LANGUAGE = {eng}, ISSN = {1077-2626}, DOI = {10.1109/TVCG.2019.2898821}, PUBLISHER = {IEEE Computer Society}, ADDRESS = {New York, NY}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {IEEE Transactions on Visualization and Computer Graphics (Proc. IEEE VR)}, VOLUME = {25}, NUMBER = {5}, PAGES = {1940--1950}, BOOKTITLE = {Selected Proceedings IEEE Virtual Reality 2019 (IEEE VR 2019)}, EDITOR = {Thomas, Bruce and Welch, Greg and Kuhlen, Torsten and Johnson, Kyle}, }
Endnote
%0 Journal Article %A Yu, Hyeonseung %A Bemana, Mojtaba %A Wernikowski, Marek %A Chwesiuk, Michał %A Tursun, Okan Tarhan %A Singh, Gurprit %A Myszkowski, Karol %A Mantiuk, Radosław %A Seidel, Hans-Peter %A Didyk, Piotr %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T A Perception-driven Hybrid Decomposition for Multi-layer Accommodative Displays : %G eng %U http://hdl.handle.net/21.11116/0000-0002-DCB5-A %R 10.1109/TVCG.2019.2898821 %7 2019 %D 2019 %J IEEE Transactions on Visualization and Computer Graphics %V 25 %N 5 %& 1940 %P 1940 - 1950 %I IEEE Computer Society %C New York, NY %@ false %B Selected Proceedings IEEE Virtual Reality 2019 %O IEEE VR 2019 Osaka, Japan, 23rd - 27th March
Yu, T., Zheng, Z., Zhong, Y., et al. 2019b. SimulCap : Single-View Human Performance Capture with Cloth Simulation. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2019), IEEE.
Export
BibTeX
@inproceedings{SimulCap19, TITLE = {{SimulCap} : {S}ingle-View Human Performance Capture with Cloth Simulation}, AUTHOR = {Yu, Tao and Zheng, Zerong and Zhong, Yuan and Zhao, Jianhui and Quionhai, Dai and Pons-Moll, Gerard and Liu, Yebin}, LANGUAGE = {eng}, ISBN = {978-1-7281-3293-8}, DOI = {10.1109/CVPR.2019.00565}, PUBLISHER = {IEEE}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2019)}, PAGES = {5499--5509}, ADDRESS = {Long Beach, CA, USA}, }
Endnote
%0 Conference Proceedings %A Yu, Tao %A Zheng, Zerong %A Zhong, Yuan %A Zhao, Jianhui %A Quionhai, Dai %A Pons-Moll, Gerard %A Liu, Yebin %+ External Organizations External Organizations External Organizations External Organizations External Organizations Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T SimulCap : Single-View Human Performance Capture with Cloth Simulation : %G eng %U http://hdl.handle.net/21.11116/0000-0003-651E-B %R 10.1109/CVPR.2019.00565 %D 2019 %B 32nd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2019-06-16 - 2019-06-20 %C Long Beach, CA, USA %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 5499 - 5509 %I IEEE %@ 978-1-7281-3293-8
Ziegler, M., Bemana, M., Keinert, J., and Myszkowski, K. 2019. Near Real-time Light Field Reconstruction and Rendering for On-set Capture Quality Evaluation. European Light Field Imaging Workshop (ELFI 2019), EURASIP.
Export
BibTeX
@inproceedings{bemana2019near, TITLE = {Near Real-time Light Field Reconstruction and Rendering for On-set Capture Quality Evaluation}, AUTHOR = {Ziegler, Matthias and Bemana, Mojtaba and Keinert, Joachim and Myszkowski, Karol}, LANGUAGE = {eng}, URL = {https://www.eurasip.org/Proceedings/Ext/ELFI_2019/Proceedings.html}, PUBLISHER = {EURASIP}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {European Light Field Imaging Workshop (ELFI 2019)}, ADDRESS = {Borovets, Bulgaria}, }
Endnote
%0 Conference Proceedings %A Ziegler, Matthias %A Bemana, Mojtaba %A Keinert, Joachim %A Myszkowski, Karol %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Near Real-time Light Field Reconstruction and Rendering for On-set Capture Quality Evaluation : %G eng %U http://hdl.handle.net/21.11116/0000-0007-DDC0-7 %D 2019 %B European Light Field Imaging Workshop %Z date of event: 2019-06-04 - 2019-06-06 %C Borovets, Bulgaria %B European Light Field Imaging Workshop %I EURASIP %U https://www.eurasip.org/Proceedings/Ext/ELFI_2019/Proceedings.html
2018
Alldieck, T., Magnor, M.A., Xu, W., Theobalt, C., and Pons-Moll, G. 2018a. Video Based Reconstruction of 3D People Models. http://arxiv.org/abs/1803.04758.
(arXiv: 1803.04758)
Abstract
This paper describes how to obtain accurate 3D body models and texture of arbitrary people from a single, monocular video in which a person is moving. Based on a parametric body model, we present a robust processing pipeline achieving 3D model fits with 5mm accuracy also for clothed people. Our main contribution is a method to nonrigidly deform the silhouette cones corresponding to the dynamic human silhouettes, resulting in a visual hull in a common reference frame that enables surface reconstruction. This enables efficient estimation of a consensus 3D shape, texture and implanted animation skeleton based on a large number of frames. We present evaluation results for a number of test subjects and analyze overall performance. Requiring only a smartphone or webcam, our method enables everyone to create their own fully animatable digital double, e.g., for social VR applications or virtual try-on for online fashion shopping.
Export
BibTeX
@online{Alldieck_arXiv1803.04758, TITLE = {Video Based Reconstruction of {3D} People Models}, AUTHOR = {Alldieck, Thiemo and Magnor, Marcus A. and Xu, Weipeng and Theobalt, Christian and Pons-Moll, Gerard}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1803.04758}, EPRINT = {1803.04758}, EPRINTTYPE = {arXiv}, YEAR = {2018}, ABSTRACT = {This paper describes how to obtain accurate 3D body models and texture of arbitrary people from a single, monocular video in which a person is moving. Based on a parametric body model, we present a robust processing pipeline achieving 3D model fits with 5mm accuracy also for clothed people. Our main contribution is a method to nonrigidly deform the silhouette cones corresponding to the dynamic human silhouettes, resulting in a visual hull in a common reference frame that enables surface reconstruction. This enables efficient estimation of a consensus 3D shape, texture and implanted animation skeleton based on a large number of frames. We present evaluation results for a number of test subjects and analyze overall performance. Requiring only a smartphone or webcam, our method enables everyone to create their own fully animatable digital double, e.g., for social VR applications or virtual try-on for online fashion shopping.}, }
Endnote
%0 Report %A Alldieck, Thiemo %A Magnor, Marcus A. %A Xu, Weipeng %A Theobalt, Christian %A Pons-Moll, Gerard %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society D2 Extern %T Video Based Reconstruction of 3D People Models : %G eng %U http://hdl.handle.net/21.11116/0000-0001-40CD-0 %U http://arxiv.org/abs/1803.04758 %D 2018 %X This paper describes how to obtain accurate 3D body models and texture of arbitrary people from a single, monocular video in which a person is moving. Based on a parametric body model, we present a robust processing pipeline achieving 3D model fits with 5mm accuracy also for clothed people. Our main contribution is a method to nonrigidly deform the silhouette cones corresponding to the dynamic human silhouettes, resulting in a visual hull in a common reference frame that enables surface reconstruction. This enables efficient estimation of a consensus 3D shape, texture and implanted animation skeleton based on a large number of frames. We present evaluation results for a number of test subjects and analyze overall performance. Requiring only a smartphone or webcam, our method enables everyone to create their own fully animatable digital double, e.g., for social VR applications or virtual try-on for online fashion shopping. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Alldieck, T., Magnor, M.A., Xu, W., Theobalt, C., and Pons-Moll, G. 2018b. Video Based Reconstruction of 3D People Models. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2018), IEEE.
Export
BibTeX
@inproceedings{alldieck2018video, TITLE = {Video Based Reconstruction of {3D} People Models}, AUTHOR = {Alldieck, Thiemo and Magnor, Marcus A. and Xu, Weipeng and Theobalt, Christian and Pons-Moll, Gerard}, LANGUAGE = {eng}, ISBN = {978-1-5386-6420-9}, DOI = {10.1109/CVPR.2018.00875}, PUBLISHER = {IEEE}, YEAR = {2018}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2018)}, PAGES = {8387--8397}, ADDRESS = {Salt Lake City, UT, USA}, }
Endnote
%0 Conference Proceedings %A Alldieck, Thiemo %A Magnor, Marcus A. %A Xu, Weipeng %A Theobalt, Christian %A Pons-Moll, Gerard %+ External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society %T Video Based Reconstruction of 3D People Models : %G eng %U http://hdl.handle.net/21.11116/0000-0001-1E24-6 %R 10.1109/CVPR.2018.00875 %D 2018 %B 31st IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2018-06-18 - 2018-06-22 %C Salt Lake City, UT, USA %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 8387 - 8397 %I IEEE %@ 978-1-5386-6420-9
Alldieck, T., Magnor, M.A., Xu, W., Theobalt, C., and Pons-Moll, G. 2018c. Detailed Human Avatars from Monocular Video. 3DV 2018 , International Conference on 3D Vision, IEEE.
Export
BibTeX
@inproceedings{Alldieck_3DV2018, TITLE = {Detailed Human Avatars from Monocular Video}, AUTHOR = {Alldieck, Thiemo and Magnor, Marcus A. and Xu, Weipeng and Theobalt, Christian and Pons-Moll, Gerard}, LANGUAGE = {eng}, ISBN = {978-1-5386-8425-2 ; 978-1-5386-8426-9}, DOI = {10.1109/3DV.2018.00022}, PUBLISHER = {IEEE}, YEAR = {2018}, DATE = {2018}, BOOKTITLE = {3DV 2018 , International Conference on 3D Vision}, PAGES = {98--109}, ADDRESS = {Verona, Italy}, }
Endnote
%0 Conference Proceedings %A Alldieck, Thiemo %A Magnor, Marcus A. %A Xu, Weipeng %A Theobalt, Christian %A Pons-Moll, Gerard %+ Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society %T Detailed Human Avatars from Monocular Video : %G eng %U http://hdl.handle.net/21.11116/0000-0002-5C40-F %R 10.1109/3DV.2018.00022 %D 2018 %B International Conference on 3D Vision %Z date of event: 2018-09-05 - 2018-09-08 %C Verona, Italy %B 3DV 2018 %P 98 - 109 %I IEEE %@ 978-1-5386-8425-2 978-1-5386-8426-9
Alldieck, T., Magnor, M.A., Xu, W., Theobalt, C., and Pons-Moll, G. 2018d. Detailed Human Avatars from Monocular Video. http://arxiv.org/abs/1808.01338.
(arXiv: 1808.01338)
Abstract
We present a novel method for high detail-preserving human avatar creation from monocular video. A parameterized body model is refined and optimized to maximally resemble subjects from a video showing them from all sides. Our avatars feature a natural face, hairstyle, clothes with garment wrinkles, and high-resolution texture. Our paper contributes facial landmark and shading-based human body shape refinement, a semantic texture prior, and a novel texture stitching strategy, resulting in the most sophisticated-looking human avatars obtained from a single video to date. Numerous results show the robustness and versatility of our method. A user study illustrates its superiority over the state-of-the-art in terms of identity preservation, level of detail, realism, and overall user preference.
Export
BibTeX
@online{Alldieck_arXiv1808.01338, TITLE = {Detailed Human Avatars from Monocular Video}, AUTHOR = {Alldieck, Thiemo and Magnor, Marcus A. and Xu, Weipeng and Theobalt, Christian and Pons-Moll, Gerard}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1808.01338}, EPRINT = {1808.01338}, EPRINTTYPE = {arXiv}, YEAR = {2018}, ABSTRACT = {We present a novel method for high detail-preserving human avatar creation from monocular video. A parameterized body model is refined and optimized to maximally resemble subjects from a video showing them from all sides. Our avatars feature a natural face, hairstyle, clothes with garment wrinkles, and high-resolution texture. Our paper contributes facial landmark and shading-based human body shape refinement, a semantic texture prior, and a novel texture stitching strategy, resulting in the most sophisticated-looking human avatars obtained from a single video to date. Numerous results show the robustness and versatility of our method. A user study illustrates its superiority over the state-of-the-art in terms of identity preservation, level of detail, realism, and overall user preference.}, }
Endnote
%0 Report %A Alldieck, Thiemo %A Magnor, Marcus A. %A Xu, Weipeng %A Theobalt, Christian %A Pons-Moll, Gerard %+ Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society %T Detailed Human Avatars from Monocular Video : %G eng %U http://hdl.handle.net/21.11116/0000-0002-5E0F-6 %U http://arxiv.org/abs/1808.01338 %D 2018 %X We present a novel method for high detail-preserving human avatar creation from monocular video. A parameterized body model is refined and optimized to maximally resemble subjects from a video showing them from all sides. Our avatars feature a natural face, hairstyle, clothes with garment wrinkles, and high-resolution texture. Our paper contributes facial landmark and shading-based human body shape refinement, a semantic texture prior, and a novel texture stitching strategy, resulting in the most sophisticated-looking human avatars obtained from a single video to date. Numerous results show the robustness and versatility of our method. A user study illustrates its superiority over the state-of-the-art in terms of identity preservation, level of detail, realism, and overall user preference. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Beigpour, S., Shekhar, S., Mansouryar, M., Myszkowski, K., and Seidel, H.-P. 2018. Light-Field Appearance Editing Based on Intrinsic Decomposition. Journal of Perceptual Imaging1, 1.
Export
BibTeX
@article{Beigpour2018, TITLE = {Light-Field Appearance Editing Based on Intrinsic Decomposition}, AUTHOR = {Beigpour, Shida and Shekhar, Sumit and Mansouryar, Mohsen and Myszkowski, Karol and Seidel, Hans-Peter}, LANGUAGE = {eng}, DOI = {10.2352/J.Percept.Imaging.2018.1.1.010502}, YEAR = {2018}, JOURNAL = {Journal of Perceptual Imaging}, VOLUME = {1}, NUMBER = {1}, PAGES = {1--15}, EID = {10502}, }
Endnote
%0 Journal Article %A Beigpour, Shida %A Shekhar, Sumit %A Mansouryar, Mohsen %A Myszkowski, Karol %A Seidel, Hans-Peter %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Light-Field Appearance Editing Based on Intrinsic Decomposition : %G eng %U http://hdl.handle.net/21.11116/0000-0001-5F88-C %R 10.2352/J.Percept.Imaging.2018.1.1.010502 %7 2018 %D 2018 %J Journal of Perceptual Imaging %O JPI %V 1 %N 1 %& 1 %P 1 - 15 %Z sequence number: 10502
Bernard, F., Thunberg, J., Swoboda, P., and Theobalt, C. 2018a. Higher-order Projected Power Iterations for Scalable Multi-Matching. http://arxiv.org/abs/1811.10541.
(arXiv: 1811.10541)
Abstract
The matching of multiple objects (e.g. shapes or images) is a fundamental problem in vision and graphics. In order to robustly handle ambiguities, noise and repetitive patterns in challenging real-world settings, it is essential to take geometric consistency between points into account. Computationally, the multi-matching problem is difficult. It can be phrased as simultaneously solving multiple (NP-hard) quadratic assignment problems (QAPs) that are coupled via cycle-consistency constraints. The main limitations of existing multi-matching methods are that they either ignore geometric consistency and thus have limited robustness, or they are restricted to small-scale problems due to their (relatively) high computational cost. We address these shortcomings by introducing a Higher-order Projected Power Iteration method, which is (i) efficient and scales to tens of thousands of points, (ii) straightforward to implement, (iii) able to incorporate geometric consistency, and (iv) guarantees cycle-consistent multi-matchings. Experimentally we show that our approach is superior to existing methods.
Export
BibTeX
@online{Bernard_arXIv1811.10541, TITLE = {Higher-order Projected Power Iterations for Scalable Multi-Matching}, AUTHOR = {Bernard, Florian and Thunberg, Johan and Swoboda, Paul and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1811.10541}, EPRINT = {1811.10541}, EPRINTTYPE = {arXiv}, YEAR = {2018}, ABSTRACT = {The matching of multiple objects (e.g. shapes or images) is a fundamental problem in vision and graphics. In order to robustly handle ambiguities, noise and repetitive patterns in challenging real-world settings, it is essential to take geometric consistency between points into account. Computationally, the multi-matching problem is difficult. It can be phrased as simultaneously solving multiple (NP-hard) quadratic assignment problems (QAPs) that are coupled via cycle-consistency constraints. The main limitations of existing multi-matching methods are that they either ignore geometric consistency and thus have limited robustness, or they are restricted to small-scale problems due to their (relatively) high computational cost. We address these shortcomings by introducing a Higher-order Projected Power Iteration method, which is (i) efficient and scales to tens of thousands of points, (ii) straightforward to implement, (iii) able to incorporate geometric consistency, and (iv) guarantees cycle-consistent multi-matchings. Experimentally we show that our approach is superior to existing methods.}, }
Endnote
%0 Report %A Bernard, Florian %A Thunberg, Johan %A Swoboda, Paul %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Higher-order Projected Power Iterations for Scalable Multi-Matching : %G eng %U http://hdl.handle.net/21.11116/0000-0002-A8D0-5 %U http://arxiv.org/abs/1811.10541 %D 2018 %X The matching of multiple objects (e.g. shapes or images) is a fundamental problem in vision and graphics. In order to robustly handle ambiguities, noise and repetitive patterns in challenging real-world settings, it is essential to take geometric consistency between points into account. Computationally, the multi-matching problem is difficult. It can be phrased as simultaneously solving multiple (NP-hard) quadratic assignment problems (QAPs) that are coupled via cycle-consistency constraints. The main limitations of existing multi-matching methods are that they either ignore geometric consistency and thus have limited robustness, or they are restricted to small-scale problems due to their (relatively) high computational cost. We address these shortcomings by introducing a Higher-order Projected Power Iteration method, which is (i) efficient and scales to tens of thousands of points, (ii) straightforward to implement, (iii) able to incorporate geometric consistency, and (iv) guarantees cycle-consistent multi-matchings. Experimentally we show that our approach is superior to existing methods. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Statistics, Machine Learning, stat.ML
Bernard, F., Theobalt, C., and Moeller, M. 2018b. DS*: Tighter Lifting-Free Convex Relaxations for Quadratic Matching Problems. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2018), IEEE.
Export
BibTeX
@inproceedings{BernardCVPR2018, TITLE = {DS*: {T}ighter Lifting-Free Convex Relaxations for Quadratic Matching Problems}, AUTHOR = {Bernard, Florian and Theobalt, Christian and Moeller, Michael}, LANGUAGE = {eng}, ISBN = {978-1-5386-6420-9}, DOI = {10.1109/CVPR.2018.00453}, PUBLISHER = {IEEE}, YEAR = {2018}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2018)}, PAGES = {4310--4319}, ADDRESS = {Salt Lake City, UT, USA}, }
Endnote
%0 Conference Proceedings %A Bernard, Florian %A Theobalt, Christian %A Moeller, Michael %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T DS*: Tighter Lifting-Free Convex Relaxations for Quadratic Matching Problems : %G eng %U http://hdl.handle.net/21.11116/0000-0002-E92F-4 %R 10.1109/CVPR.2018.00453 %D 2018 %B 31st IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2018-06-18 - 2018-06-22 %C Salt Lake City, UT, USA %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 4310 - 4319 %I IEEE %@ 978-1-5386-6420-9
Bernard, F., Thunberg, J., Goncalves, J., and Theobalt, C. 2018c. Synchronisation of Partial Multi-Matchings via Non-negative Factorisations. http://arxiv.org/abs/1803.06320.
(arXiv: 1803.06320)
Abstract
In this work we study permutation synchronisation for the challenging case of partial permutations, which plays an important role for the problem of matching multiple objects (e.g. images or shapes). The term synchronisation refers to the property that the set of pairwise matchings is cycle-consistent, i.e. in the full matching case all compositions of pairwise matchings over cycles must be equal to the identity. Motivated by clustering and matrix factorisation perspectives of cycle-consistency, we derive an algorithm to tackle the permutation synchronisation problem based on non-negative factorisations. In order to deal with the inherent non-convexity of the permutation synchronisation problem, we use an initialisation procedure based on a novel rotation scheme applied to the solution of the spectral relaxation. Moreover, this rotation scheme facilitates a convenient Euclidean projection to obtain a binary solution after solving our relaxed problem. In contrast to state-of-the-art methods, our approach is guaranteed to produce cycle-consistent results. We experimentally demonstrate the efficacy of our method and show that it achieves better results compared to existing methods.
Export
BibTeX
@online{Bernard_arXiv1803.06320, TITLE = {Synchronisation of Partial Multi-Matchings via Non-negative Factorisations}, AUTHOR = {Bernard, Florian and Thunberg, Johan and Goncalves, Jorge and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1803.06320}, EPRINT = {1803.06320}, EPRINTTYPE = {arXiv}, YEAR = {2018}, ABSTRACT = {In this work we study permutation synchronisation for the challenging case of partial permutations, which plays an important role for the problem of matching multiple objects (e.g. images or shapes). The term synchronisation refers to the property that the set of pairwise matchings is cycle-consistent, i.e. in the full matching case all compositions of pairwise matchings over cycles must be equal to the identity. Motivated by clustering and matrix factorisation perspectives of cycle-consistency, we derive an algorithm to tackle the permutation synchronisation problem based on non-negative factorisations. In order to deal with the inherent non-convexity of the permutation synchronisation problem, we use an initialisation procedure based on a novel rotation scheme applied to the solution of the spectral relaxation. Moreover, this rotation scheme facilitates a convenient Euclidean projection to obtain a binary solution after solving our relaxed problem. In contrast to state-of-the-art methods, our approach is guaranteed to produce cycle-consistent results. We experimentally demonstrate the efficacy of our method and show that it achieves better results compared to existing methods.}, }
Endnote
%0 Report %A Bernard, Florian %A Thunberg, Johan %A Goncalves, Jorge %A Theobalt, Christian %+ External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Synchronisation of Partial Multi-Matchings via Non-negative Factorisations : %G eng %U http://hdl.handle.net/21.11116/0000-0001-40BC-3 %U http://arxiv.org/abs/1803.06320 %D 2018 %X In this work we study permutation synchronisation for the challenging case of partial permutations, which plays an important role for the problem of matching multiple objects (e.g. images or shapes). The term synchronisation refers to the property that the set of pairwise matchings is cycle-consistent, i.e. in the full matching case all compositions of pairwise matchings over cycles must be equal to the identity. Motivated by clustering and matrix factorisation perspectives of cycle-consistency, we derive an algorithm to tackle the permutation synchronisation problem based on non-negative factorisations. In order to deal with the inherent non-convexity of the permutation synchronisation problem, we use an initialisation procedure based on a novel rotation scheme applied to the solution of the spectral relaxation. Moreover, this rotation scheme facilitates a convenient Euclidean projection to obtain a binary solution after solving our relaxed problem. In contrast to state-of-the-art methods, our approach is guaranteed to produce cycle-consistent results. We experimentally demonstrate the efficacy of our method and show that it achieves better results compared to existing methods. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Mathematics, Optimization and Control, math.OC,Statistics, Machine Learning, stat.ML
Castelli Aleardi, L., Salihoglu, S., Singh, G., and Ovsjanikov, M. 2018. Spectral Measures of Distortion for Change Detection in Dynamic Graphs. https://hal.archives-ouvertes.fr/hal-01864079v2.
Export
BibTeX
@misc{Castelli_hal-01864079v2, TITLE = {Spectral Measures of Distortion for Change Detection in Dynamic Graphs}, AUTHOR = {Castelli Aleardi, Luca and Salihoglu, Semih and Singh, Gurprit and Ovsjanikov, Maks}, LANGUAGE = {eng}, URL = {https://hal.archives-ouvertes.fr/hal-01864079v2}, PUBLISHER = {HAL}, YEAR = {2018}, }
Endnote
%0 Report %A Castelli Aleardi, Luca %A Salihoglu, Semih %A Singh, Gurprit %A Ovsjanikov, Maks %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Spectral Measures of Distortion for Change Detection in Dynamic Graphs : %G eng %U http://hdl.handle.net/21.11116/0000-0002-9F5D-4 %F OTHER: hal-01864079v2 %U https://hal.archives-ouvertes.fr/hal-01864079v2 %I HAL %D 2018 %U https://hal.archives-ouvertes.fr/hal-01864079/
Chen, R., Gotsman, C., and Hormann, K. 2018a. Path Planning with Divergence-Based Distance Functions. Computer Aided Geometric Design66.
Export
BibTeX
@article{Chen_CAGD2018, TITLE = {Path Planning with Divergence-Based Distance Functions}, AUTHOR = {Chen, Renjie and Gotsman, Craig and Hormann, Kai}, LANGUAGE = {eng}, ISSN = {0167-8396}, DOI = {10.1016/j.cagd.2018.09.002}, PUBLISHER = {North-Holland}, ADDRESS = {Amsterdam}, YEAR = {2018}, DATE = {2018}, JOURNAL = {Computer Aided Geometric Design}, VOLUME = {66}, PAGES = {52--74}, }
Endnote
%0 Journal Article %A Chen, Renjie %A Gotsman, Craig %A Hormann, Kai %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Path Planning with Divergence-Based Distance Functions : %G eng %U http://hdl.handle.net/21.11116/0000-0002-72C1-3 %R 10.1016/j.cagd.2018.09.002 %7 2018 %D 2018 %J Computer Aided Geometric Design %V 66 %& 52 %P 52 - 74 %I North-Holland %C Amsterdam %@ false
Chen, R., Gotsman, C., and Hormann, K. 2018b. Efficient Path Generation with Reduced Coordinates. Computer Graphics Forum (Proc. Eurographics Symposium on Geometric Processing 2018)37, 5.
Export
BibTeX
@article{ChenSGP2018, TITLE = {Efficient Path Generation with Reduced Coordinates}, AUTHOR = {Chen, Renjie and Gotsman, Craig and Hormann, Kai}, LANGUAGE = {eng}, ISSN = {0167-7055}, DOI = {10.1111/cgf.13489}, PUBLISHER = {Wiley-Blackwell}, ADDRESS = {Chichester}, YEAR = {2018}, DATE = {2018}, JOURNAL = {Computer Graphics Forum (Proc. Eurographics Symposium on Geometric Processing)}, VOLUME = {37}, NUMBER = {5}, PAGES = {37--48}, BOOKTITLE = {Symposium on Geometry Processing 2018 (Eurographics Symposium on Geometric Processing 2018)}, EDITOR = {Ju, Tao and Vaxman, Amir}, }
Endnote
%0 Journal Article %A Chen, Renjie %A Gotsman, Craig %A Hormann, Kai %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Efficient Path Generation with Reduced Coordinates : %G eng %U http://hdl.handle.net/21.11116/0000-0001-E6D6-A %R 10.1111/cgf.13489 %7 2018 %D 2018 %J Computer Graphics Forum %V 37 %N 5 %& 37 %P 37 - 48 %I Wiley-Blackwell %C Chichester %@ false %B Symposium on Geometry Processing 2018 %O Paris, France, July 7 – 11, 2018 SGP 2018 Eurographics Symposium on Geometric Processing 2018
Du, X., Liu, X., Yan, D.-M., Jiang, C., Ye, J., and Zhang, H. 2018. Field-Aligned Isotropic Surface Remeshing. Computer Graphics Forum37, 6.
Export
BibTeX
@article{DuCGF2018, TITLE = {Field-Aligned Isotropic Surface Remeshing}, AUTHOR = {Du, Xingyi and Liu, Xiaohan and Yan, Dong-Ming and Jiang, Caigui and Ye, Juntao and Zhang, Hui}, LANGUAGE = {eng}, ISSN = {0167-7055}, DOI = {10.1111/cgf.13329}, PUBLISHER = {Blackwell-Wiley}, ADDRESS = {Oxford}, YEAR = {2018}, DATE = {2018}, JOURNAL = {Computer Graphics Forum}, VOLUME = {37}, NUMBER = {6}, PAGES = {343--357}, }
Endnote
%0 Journal Article %A Du, Xingyi %A Liu, Xiaohan %A Yan, Dong-Ming %A Jiang, Caigui %A Ye, Juntao %A Zhang, Hui %+ External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Field-Aligned Isotropic Surface Remeshing : %G eng %U http://hdl.handle.net/21.11116/0000-0001-E209-6 %R 10.1111/cgf.13329 %7 2018 %D 2018 %J Computer Graphics Forum %O Computer Graphics Forum : journal of the European Association for Computer Graphics Comput. Graph. Forum %V 37 %N 6 %& 343 %P 343 - 357 %I Blackwell-Wiley %C Oxford %@ false
Golla, B., Seidel, H.-P., and Chen, R. 2018. Piecewise Linear Mapping Optimization Based on the Complex View. Computer Graphics Forum (Proc. Pacific Graphics 2018)37, 7.
Export
BibTeX
@article{Golla_PG2018, TITLE = {Piecewise Linear Mapping Optimization Based on the Complex View}, AUTHOR = {Golla, Bj{\"o}rn and Seidel, Hans-Peter and Chen, Renjie}, LANGUAGE = {eng}, ISSN = {1467-8659}, DOI = {10.1111/cgf.13563}, PUBLISHER = {Wiley-Blackwell}, ADDRESS = {Oxford, UK}, YEAR = {2018}, DATE = {2018}, JOURNAL = {Computer Graphics Forum (Proc. Pacific Graphics)}, VOLUME = {37}, NUMBER = {7}, PAGES = {233--243}, BOOKTITLE = {The 26th Pacific Conference on Computer Graphics and Applications (Pacific Graphics 2018)}, }
Endnote
%0 Journal Article %A Golla, Björn %A Seidel, Hans-Peter %A Chen, Renjie %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Piecewise Linear Mapping Optimization Based on the Complex View : %G eng %U http://hdl.handle.net/21.11116/0000-0002-72CD-7 %R 10.1111/cgf.13563 %7 2018 %D 2018 %J Computer Graphics Forum %V 37 %N 7 %& 233 %P 233 - 243 %I Wiley-Blackwell %C Oxford, UK %@ false %B The 26th Pacific Conference on Computer Graphics and Applications %O Pacific Graphics 2018 PG 2018 Hong Kong, 8-11 October 2018
Hajipour, H. 2018. Weakly-supervised Surface Reconstruction Using Floating Radial Basis Functions. .
Export
BibTeX
@mastersthesis{HajipourMSc2018, TITLE = {Weakly-supervised Surface Reconstruction Using Floating Radial Basis Functions}, AUTHOR = {Hajipour, Hossein}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2018}, DATE = {2018}, }
Endnote
%0 Thesis %A Hajipour, Hossein %Y Theobalt, Christian %A referee: Tewari, Ayush %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Weakly-supervised Surface Reconstruction Using Floating Radial Basis Functions : %G eng %U http://hdl.handle.net/21.11116/0000-0003-E013-A %I Universität des Saarlandes %C Saarbrücken %D 2018 %V master %9 master
Hladký, J. and Ďurikovič, R. 2018. Fire Simulation in 3D Computer Animation with Turbulence Dynamics including Fire Separation and Profile Modeling. International Journal of Networking and Computing8, 2.
Export
BibTeX
@article{DBLP:journals/ijnc/HladkyD18, TITLE = {Fire Simulation in {3D} Computer Animation with Turbulence Dynamics including Fire Separation and Profile Modeling}, AUTHOR = {Hladk{\'y}, Jozef and {\v D}urikovi{\v c}, Roman}, LANGUAGE = {eng}, ISSN = {2185-2847}, URL = {http://www.ijnc.org/index.php/ijnc/article/view/180}, YEAR = {2018}, JOURNAL = {International Journal of Networking and Computing}, VOLUME = {8}, NUMBER = {2}, PAGES = {186--204}, }
Endnote
%0 Journal Article %A Hladký, Jozef %A Ďurikovič, Roman %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Fire Simulation in 3D Computer Animation with Turbulence Dynamics including Fire Separation and Profile Modeling : %G eng %U http://hdl.handle.net/21.11116/0000-0002-F652-C %U http://www.ijnc.org/index.php/ijnc/article/view/180 %7 2018 %D 2018 %J International Journal of Networking and Computing %V 8 %N 2 %& 186 %P 186 - 204 %@ false
Kenzel, M., Kerbl, B., Schmalstieg, D., and Steinberger, M. 2018. A High-Performance Software Graphics Pipeline Architecture for the GPU. ACM Transactions on Graphics (Proc. ACM SIGGRAPH 2018)37, 4.
Export
BibTeX
@article{Kenzel_SIGGRAPH2018, TITLE = {A High-Performance Software Graphics Pipeline Architecture for the {GPU}}, AUTHOR = {Kenzel, Michael and Kerbl, Bernhard and Schmalstieg, Dieter and Steinberger, Markus}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3197517.3201374}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2018}, DATE = {2018}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH)}, VOLUME = {37}, NUMBER = {4}, PAGES = {1--15}, EID = {140}, BOOKTITLE = {Proceedings of ACM SIGGRAPH 2018}, }
Endnote
%0 Journal Article %A Kenzel, Michael %A Kerbl, Bernhard %A Schmalstieg, Dieter %A Steinberger, Markus %+ External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T A High-Performance Software Graphics Pipeline Architecture for the GPU : %G eng %U http://hdl.handle.net/21.11116/0000-0002-72E1-F %R 10.1145/3197517.3201374 %7 2018 %D 2018 %J ACM Transactions on Graphics %V 37 %N 4 %& 1 %P 1 - 15 %Z sequence number: 140 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH 2018 %O ACM SIGGRAPH 2018 Vancouver, Canada , 12 - 16 August
Kim, H., Zollhöfer, M., Tewari, A., Thies, J., Richardt, C., and Theobalt, C. 2018a. InverseFaceNet: Deep Monocular Inverse Face Rendering. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2018), IEEE.
Export
BibTeX
@inproceedings{kim2018inverse, TITLE = {{InverseFaceNet}: {D}eep Monocular Inverse Face Rendering}, AUTHOR = {Kim, Hyeongwoo and Zollh{\"o}fer, Michael and Tewari, Ayush and Thies, Justus and Richardt, Christian and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-5386-6420-9}, DOI = {10.1109/CVPR.2018.00486}, PUBLISHER = {IEEE}, YEAR = {2018}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2018)}, PAGES = {4625--4634}, ADDRESS = {Salt Lake City, UT, USA}, }
Endnote
%0 Conference Proceedings %A Kim, Hyeongwoo %A Zollhöfer, Michael %A Tewari, Ayush %A Thies, Justus %A Richardt, Christian %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T InverseFaceNet: Deep Monocular Inverse Face Rendering : %G eng %U http://hdl.handle.net/21.11116/0000-0002-F476-6 %R 10.1109/CVPR.2018.00486 %D 2018 %B 31st IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2018-06-18 - 2018-06-22 %C Salt Lake City, UT, USA %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 4625 - 4634 %I IEEE %@ 978-1-5386-6420-9
Kim, H., Garrido, P., Tewari, A., et al. 2018b. Deep Video Portraits. ACM Transactions on Graphics (Proc. ACM SIGGRAPH 2018)37, 4.
Export
BibTeX
@article{Kim_SIGGRAPH2018, TITLE = {Deep Video Portraits}, AUTHOR = {Kim, Hyeongwoo and Garrido, Pablo and Tewari, Ayush and Xu, Weipeng and Thies, Justus and Nie{\ss}ner, Matthias and P{\'e}rez, Patrick and Richardt, Christian and Zollh{\"o}fer, Michael and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3197517.3201283}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2018}, DATE = {2018}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH)}, VOLUME = {37}, NUMBER = {4}, PAGES = {1--14}, EID = {163}, BOOKTITLE = {Proceedings of ACM SIGGRAPH 2018}, }
Endnote
%0 Journal Article %A Kim, Hyeongwoo %A Garrido, Pablo %A Tewari, Ayush %A Xu, Weipeng %A Thies, Justus %A Nießner, Matthias %A Pérez, Patrick %A Richardt, Christian %A Zollhöfer, Michael %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Deep Video Portraits : %G eng %U http://hdl.handle.net/21.11116/0000-0002-5D8D-8 %R 10.1145/3197517.3201283 %7 2018 %D 2018 %J ACM Transactions on Graphics %V 37 %N 4 %& 1 %P 1 - 14 %Z sequence number: 163 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH 2018 %O ACM SIGGRAPH 2018 Vancouver, Canada , 12 - 16 August
Kim, H., Garrido, P., Tewari, A., et al. 2018c. Deep Video Portraits. http://arxiv.org/abs/1805.11714.
(arXiv: 1805.11714)
Abstract
We present a novel approach that enables photo-realistic re-animation of portrait videos using only an input video. In contrast to existing approaches that are restricted to manipulations of facial expressions only, we are the first to transfer the full 3D head position, head rotation, face expression, eye gaze, and eye blinking from a source actor to a portrait video of a target actor. The core of our approach is a generative neural network with a novel space-time architecture. The network takes as input synthetic renderings of a parametric face model, based on which it predicts photo-realistic video frames for a given target actor. The realism in this rendering-to-video transfer is achieved by careful adversarial training, and as a result, we can create modified target videos that mimic the behavior of the synthetically-created input. In order to enable source-to-target video re-animation, we render a synthetic target video with the reconstructed head animation parameters from a source video, and feed it into the trained network -- thus taking full control of the target. With the ability to freely recombine source and target parameters, we are able to demonstrate a large variety of video rewrite applications without explicitly modeling hair, body or background. For instance, we can reenact the full head using interactive user-controlled editing, and realize high-fidelity visual dubbing. To demonstrate the high quality of our output, we conduct an extensive series of experiments and evaluations, where for instance a user study shows that our video edits are hard to detect.
Export
BibTeX
@online{Kim_arXiv1805.11714, TITLE = {Deep Video Portraits}, AUTHOR = {Kim, Hyeongwoo and Garrido, Pablo and Tewari, Ayush and Xu, Weipeng and Thies, Justus and Nie{\ss}ner, Matthias and P{\'e}rez, Patrick and Richardt, Christian and Zollh{\"o}fer, Michael and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1805.11714}, EPRINT = {1805.11714}, EPRINTTYPE = {arXiv}, YEAR = {2018}, ABSTRACT = {We present a novel approach that enables photo-realistic re-animation of portrait videos using only an input video. In contrast to existing approaches that are restricted to manipulations of facial expressions only, we are the first to transfer the full 3D head position, head rotation, face expression, eye gaze, and eye blinking from a source actor to a portrait video of a target actor. The core of our approach is a generative neural network with a novel space-time architecture. The network takes as input synthetic renderings of a parametric face model, based on which it predicts photo-realistic video frames for a given target actor. The realism in this rendering-to-video transfer is achieved by careful adversarial training, and as a result, we can create modified target videos that mimic the behavior of the synthetically-created input. In order to enable source-to-target video re-animation, we render a synthetic target video with the reconstructed head animation parameters from a source video, and feed it into the trained network -- thus taking full control of the target. With the ability to freely recombine source and target parameters, we are able to demonstrate a large variety of video rewrite applications without explicitly modeling hair, body or background. For instance, we can reenact the full head using interactive user-controlled editing, and realize high-fidelity visual dubbing. To demonstrate the high quality of our output, we conduct an extensive series of experiments and evaluations, where for instance a user study shows that our video edits are hard to detect.}, }
Endnote
%0 Report %A Kim, Hyeongwoo %A Garrido, Pablo %A Tewari, Ayush %A Xu, Weipeng %A Thies, Justus %A Nießner, Matthias %A Pérez, Patrick %A Richardt, Christian %A Zollhöfer, Michael %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Deep Video Portraits : %G eng %U http://hdl.handle.net/21.11116/0000-0002-5E1E-5 %U http://arxiv.org/abs/1805.11714 %D 2018 %X We present a novel approach that enables photo-realistic re-animation of portrait videos using only an input video. In contrast to existing approaches that are restricted to manipulations of facial expressions only, we are the first to transfer the full 3D head position, head rotation, face expression, eye gaze, and eye blinking from a source actor to a portrait video of a target actor. The core of our approach is a generative neural network with a novel space-time architecture. The network takes as input synthetic renderings of a parametric face model, based on which it predicts photo-realistic video frames for a given target actor. The realism in this rendering-to-video transfer is achieved by careful adversarial training, and as a result, we can create modified target videos that mimic the behavior of the synthetically-created input. In order to enable source-to-target video re-animation, we render a synthetic target video with the reconstructed head animation parameters from a source video, and feed it into the trained network -- thus taking full control of the target. With the ability to freely recombine source and target parameters, we are able to demonstrate a large variety of video rewrite applications without explicitly modeling hair, body or background. For instance, we can reenact the full head using interactive user-controlled editing, and realize high-fidelity visual dubbing. To demonstrate the high quality of our output, we conduct an extensive series of experiments and evaluations, where for instance a user study shows that our video edits are hard to detect. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Artificial Intelligence, cs.AI,Computer Science, Graphics, cs.GR
Lee, H. and Didyk, P. 2018. Real-time Apparent Resolution Enhancement for Head-mounted Displays. Proceedings of the ACM on Computer Graphics and Interactive Techniques1, 1.
Export
BibTeX
@article{Lee:2018:RAR:3242771.3203202, TITLE = {Real-time Apparent Resolution Enhancement for Head-mounted Displays}, AUTHOR = {Lee, Haebom and Didyk, Piotr}, LANGUAGE = {eng}, ISSN = {2577-6193}, DOI = {10.1145/3203202}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2018}, JOURNAL = {Proceedings of the ACM on Computer Graphics and Interactive Techniques}, VOLUME = {1}, NUMBER = {1}, PAGES = {1--15}, EID = {19}, }
Endnote
%0 Journal Article %A Lee, Haebom %A Didyk, Piotr %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Real-time Apparent Resolution Enhancement for Head-mounted Displays : %G eng %U http://hdl.handle.net/21.11116/0000-0002-DB97-D %R 10.1145/3203202 %7 2018 %D 2018 %J Proceedings of the ACM on Computer Graphics and Interactive Techniques %V 1 %N 1 %& 1 %P 1 - 15 %Z sequence number: 19 %I ACM %C New York, NY %@ false
Leimkühler, T., Kellnhofer, P., Ritschel, T., Myszkowski, K., and Seidel, H.-P. 2018a. Perceptual Real-Time 2D-to-3D Conversion Using Cue Fusion. IEEE Transactions on Visualization and Computer Graphics24, 6.
Export
BibTeX
@article{Leimkuehler2018, TITLE = {Perceptual real-time {2D}-to-{3D} conversion using cue fusion}, AUTHOR = {Leimk{\"u}hler, Thomas and Kellnhofer, Petr and Ritschel, Tobias and Myszkowski, Karol and Seidel, Hans-Peter}, LANGUAGE = {eng}, ISSN = {1077-2626}, DOI = {10.1109/TVCG.2017.2703612}, PUBLISHER = {IEEE Computer Society}, ADDRESS = {New York, NY}, YEAR = {2018}, DATE = {2018}, JOURNAL = {IEEE Transactions on Visualization and Computer Graphics}, VOLUME = {24}, NUMBER = {6}, PAGES = {2037--2050}, }
Endnote
%0 Journal Article %A Leimkühler, Thomas %A Kellnhofer, Petr %A Ritschel, Tobias %A Myszkowski, Karol %A Seidel, Hans-Peter %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Perceptual Real-Time 2D-to-3D Conversion Using Cue Fusion : %G eng %U http://hdl.handle.net/21.11116/0000-0001-409A-9 %R 10.1109/TVCG.2017.2703612 %7 2018 %D 2018 %J IEEE Transactions on Visualization and Computer Graphics %V 24 %N 6 %& 2037 %P 2037 - 2050 %I IEEE Computer Society %C New York, NY %@ false
Leimkühler, T., Singh, G., Myszkowski, K., Seidel, H.-P., and Ritschel, T. 2018b. End-to-end Sampling Patterns. http://arxiv.org/abs/1806.06710.
(arXiv: 1806.06710)
Abstract
Sample patterns have many uses in Computer Graphics, ranging from procedural object placement over Monte Carlo image synthesis to non-photorealistic depiction. Their properties such as discrepancy, spectra, anisotropy, or progressiveness have been analyzed extensively. However, designing methods to produce sampling patterns with certain properties can require substantial hand-crafting effort, both in coding, mathematical derivation and compute time. In particular, there is no systematic way to derive the best sampling algorithm for a specific end-task. Tackling this issue, we suggest another level of abstraction: a toolkit to end-to-end optimize over all sampling methods to find the one producing user-prescribed properties such as discrepancy or a spectrum that best fit the end-task. A user simply implements the forward losses and the sampling method is found automatically -- without coding or mathematical derivation -- by making use of back-propagation abilities of modern deep learning frameworks. While this optimization takes long, at deployment time the sampling method is quick to execute as iterated unstructured non-linear filtering using radial basis functions (RBFs) to represent high-dimensional kernels. Several important previous methods are special cases of this approach, which we compare to previous work and demonstrate its usefulness in several typical Computer Graphics applications. Finally, we propose sampling patterns with properties not shown before, such as high-dimensional blue noise with projective properties.
Export
BibTeX
@online{Leimkuehler_arXiv1806.06710, TITLE = {End-to-end Sampling Patterns}, AUTHOR = {Leimk{\"u}hler, Thomas and Singh, Gurprit and Myszkowski, Karol and Seidel, Hans-Peter and Ritschel, Tobias}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1806.06710}, EPRINT = {1806.06710}, EPRINTTYPE = {arXiv}, YEAR = {2018}, ABSTRACT = {Sample patterns have many uses in Computer Graphics, ranging from procedural object placement over Monte Carlo image synthesis to non-photorealistic depiction. Their properties such as discrepancy, spectra, anisotropy, or progressiveness have been analyzed extensively. However, designing methods to produce sampling patterns with certain properties can require substantial hand-crafting effort, both in coding, mathematical derivation and compute time. In particular, there is no systematic way to derive the best sampling algorithm for a specific end-task. Tackling this issue, we suggest another level of abstraction: a toolkit to end-to-end optimize over all sampling methods to find the one producing user-prescribed properties such as discrepancy or a spectrum that best fit the end-task. A user simply implements the forward losses and the sampling method is found automatically -- without coding or mathematical derivation -- by making use of back-propagation abilities of modern deep learning frameworks. While this optimization takes long, at deployment time the sampling method is quick to execute as iterated unstructured non-linear filtering using radial basis functions (RBFs) to represent high-dimensional kernels. Several important previous methods are special cases of this approach, which we compare to previous work and demonstrate its usefulness in several typical Computer Graphics applications. Finally, we propose sampling patterns with properties not shown before, such as high-dimensional blue noise with projective properties.}, }
Endnote
%0 Report %A Leimkühler, Thomas %A Singh, Gurprit %A Myszkowski, Karol %A Seidel, Hans-Peter %A Ritschel, Tobias %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T End-to-end Sampling Patterns : %G eng %U http://hdl.handle.net/21.11116/0000-0002-1376-4 %U http://arxiv.org/abs/1806.06710 %D 2018 %X Sample patterns have many uses in Computer Graphics, ranging from procedural object placement over Monte Carlo image synthesis to non-photorealistic depiction. Their properties such as discrepancy, spectra, anisotropy, or progressiveness have been analyzed extensively. However, designing methods to produce sampling patterns with certain properties can require substantial hand-crafting effort, both in coding, mathematical derivation and compute time. In particular, there is no systematic way to derive the best sampling algorithm for a specific end-task. Tackling this issue, we suggest another level of abstraction: a toolkit to end-to-end optimize over all sampling methods to find the one producing user-prescribed properties such as discrepancy or a spectrum that best fit the end-task. A user simply implements the forward losses and the sampling method is found automatically -- without coding or mathematical derivation -- by making use of back-propagation abilities of modern deep learning frameworks. While this optimization takes long, at deployment time the sampling method is quick to execute as iterated unstructured non-linear filtering using radial basis functions (RBFs) to represent high-dimensional kernels. Several important previous methods are special cases of this approach, which we compare to previous work and demonstrate its usefulness in several typical Computer Graphics applications. Finally, we propose sampling patterns with properties not shown before, such as high-dimensional blue noise with projective properties. %K Computer Science, Graphics, cs.GR
Leimkühler, T., Seidel, H.-P., and Ritschel, T. 2018c. Laplacian Kernel Splatting for Efficient Depth-of-field and Motion Blur Synthesis or Reconstruction. ACM Transactions on Graphics (Proc. ACM SIGGRAPH 2018)37, 4.
Export
BibTeX
@article{LeimkuehlerSIGGRAPH2018, TITLE = {Laplacian Kernel Splatting for Efficient Depth-of-field and Motion Blur Synthesis or Reconstruction}, AUTHOR = {Leimk{\"u}hler, Thomas and Seidel, Hans-Peter and Ritschel, Tobias}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3197517.3201379}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2018}, DATE = {2018}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH)}, VOLUME = {37}, NUMBER = {4}, PAGES = {1--11}, EID = {55}, BOOKTITLE = {Proceedings of ACM SIGGRAPH 2018}, }
Endnote
%0 Journal Article %A Leimkühler, Thomas %A Seidel, Hans-Peter %A Ritschel, Tobias %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Laplacian Kernel Splatting for Efficient Depth-of-field and Motion Blur Synthesis or Reconstruction : %G eng %U http://hdl.handle.net/21.11116/0000-0002-0630-1 %R 10.1145/3197517.3201379 %7 2018 %D 2018 %J ACM Transactions on Graphics %V 37 %N 4 %& 1 %P 1 - 11 %Z sequence number: 55 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH 2018 %O ACM SIGGRAPH 2018 Vancouver, Canada , 12 - 16 August
Lin, K.Z., Xu, W., Sun, Q., Theobalt, C., and Chua, T.-S. 2018. Learning a Disentangled Embedding for Monocular 3D Shape Retrieval and Pose Estimation. http://arxiv.org/abs/1812.09899.
(arXiv: 1812.09899)
Abstract
We propose a novel approach to jointly perform 3D object retrieval and pose estimation from monocular images.In order to make the method robust to real world scene variations in the images, e.g. texture, lighting and background,we learn an embedding space from 3D data that only includes the relevant information, namely the shape and pose.Our method can then be trained for robustness under real world scene variations without having to render a large training set simulating these variations. Our learned embedding explicitly disentangles a shape vector and a pose vector, which alleviates both pose bias for 3D shape retrieval and categorical bias for pose estimation. Having the learned disentangled embedding, we train a CNN to map the images to the embedding space, and then retrieve the closest 3D shape from the database and estimate the 6D pose of the object using the embedding vectors. Our method achieves 10.8 median error for pose estimation and 0.514 top-1-accuracy for category agnostic 3D object retrieval on the Pascal3D+ dataset. It therefore outperforms the previous state-of-the-art methods on both tasks.
Export
BibTeX
@online{Kyaw_arXiv1812.09899, TITLE = {{Learning a Disentangled Embedding for Monocular 3D Shape Retrieval and Pose Estimation}}, AUTHOR = {Lin, Kyaw Zaw and Xu, Weipeng and Sun, Qianru and Theobalt, Christian and Chua, Tat-Seng}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1812.09899}, EPRINT = {1812.09899}, EPRINTTYPE = {arXiv}, YEAR = {2018}, ABSTRACT = {We propose a novel approach to jointly perform 3D object retrieval and pose estimation from monocular images.In order to make the method robust to real world scene variations in the images, e.g. texture, lighting and background,we learn an embedding space from 3D data that only includes the relevant information, namely the shape and pose.Our method can then be trained for robustness under real world scene variations without having to render a large training set simulating these variations. Our learned embedding explicitly disentangles a shape vector and a pose vector, which alleviates both pose bias for 3D shape retrieval and categorical bias for pose estimation. Having the learned disentangled embedding, we train a CNN to map the images to the embedding space, and then retrieve the closest 3D shape from the database and estimate the 6D pose of the object using the embedding vectors. Our method achieves 10.8 median error for pose estimation and 0.514 top-1-accuracy for category agnostic 3D object retrieval on the Pascal3D+ dataset. It therefore outperforms the previous state-of-the-art methods on both tasks.}, }
Endnote
%0 Report %A Lin, Kyaw Zaw %A Xu, Weipeng %A Sun, Qianru %A Theobalt, Christian %A Chua, Tat-Seng %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Learning a Disentangled Embedding for Monocular 3D Shape Retrieval and Pose Estimation : %G eng %U http://hdl.handle.net/21.11116/0000-0002-D519-2 %U http://arxiv.org/abs/1812.09899 %D 2018 %X We propose a novel approach to jointly perform 3D object retrieval and pose estimation from monocular images.In order to make the method robust to real world scene variations in the images, e.g. texture, lighting and background,we learn an embedding space from 3D data that only includes the relevant information, namely the shape and pose.Our method can then be trained for robustness under real world scene variations without having to render a large training set simulating these variations. Our learned embedding explicitly disentangles a shape vector and a pose vector, which alleviates both pose bias for 3D shape retrieval and categorical bias for pose estimation. Having the learned disentangled embedding, we train a CNN to map the images to the embedding space, and then retrieve the closest 3D shape from the database and estimate the 6D pose of the object using the embedding vectors. Our method achieves 10.8 median error for pose estimation and 0.514 top-1-accuracy for category agnostic 3D object retrieval on the Pascal3D+ dataset. It therefore outperforms the previous state-of-the-art methods on both tasks. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Liu, L., Chen, N., Ceylan, D., Theobalt, C., Wang, W., and Mitra, N.J. 2018a. CurveFusion: Reconstructing thin Structures from RGBD Sequences. ACM Transactions on Graphics37, 6.
Export
BibTeX
@article{Liu:2018:CvF, TITLE = {{CurveFusion}: reconstructing thin structures from {RGBD} sequences}, AUTHOR = {Liu, Lingjie and Chen, Nenglun and Ceylan, Duygu and Theobalt, Christian and Wang, Wenping and Mitra, Niloy J.}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3272127.3275097}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2018}, DATE = {2018}, JOURNAL = {ACM Transactions on Graphics}, VOLUME = {37}, NUMBER = {6}, PAGES = {1--12}, EID = {2018}, }
Endnote
%0 Journal Article %A Liu, Lingjie %A Chen, Nenglun %A Ceylan, Duygu %A Theobalt, Christian %A Wang, Wenping %A Mitra, Niloy J. %+ External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations %T CurveFusion: Reconstructing thin Structures from RGBD Sequences : %G eng %U http://hdl.handle.net/21.11116/0000-0002-E6F2-9 %R 10.1145/3272127.3275097 %7 2018 %D 2018 %J ACM Transactions on Graphics %V 37 %N 6 %& 1 %P 1 - 12 %Z sequence number: 2018 %I ACM %C New York, NY %@ false
Liu, L., Xu, W., Zollhöfer, M., et al. 2018b. Neural Animation and Reenactment of Human Actor Videos. http://arxiv.org/abs/1809.03658.
(arXiv: 1809.03658)
Abstract
We propose a method for generating (near) video-realistic animations of real humans under user control. In contrast to conventional human character rendering, we do not require the availability of a production-quality photo-realistic 3D model of the human, but instead rely on a video sequence in conjunction with a (medium-quality) controllable 3D template model of the person. With that, our approach significantly reduces production cost compared to conventional rendering approaches based on production-quality 3D models, and can also be used to realistically edit existing videos. Technically, this is achieved by training a neural network that translates simple synthetic images of a human character into realistic imagery. For training our networks, we first track the 3D motion of the person in the video using the template model, and subsequently generate a synthetically rendered version of the video. These images are then used to train a conditional generative adversarial network that translates synthetic images of the 3D model into realistic imagery of the human. We evaluate our method for the reenactment of another person that is tracked in order to obtain the motion data, and show video results generated from artist-designed skeleton motion. Our results outperform the state-of-the-art in learning-based human image synthesis. Project page: http://gvv.mpi-inf.mpg.de/projects/wxu/HumanReenactment/
Export
BibTeX
@online{Liu_arXiv1809.03658, TITLE = {Neural Animation and Reenactment of Human Actor Videos}, AUTHOR = {Liu, Lingjie and Xu, Weipeng and Zollh{\"o}fer, Michael and Kim, Hyeongwoo and Bernard, Florian and Habermann, Marc and Wang, Wenping and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1809.03658}, EPRINT = {1809.03658}, EPRINTTYPE = {arXiv}, YEAR = {2018}, ABSTRACT = {We propose a method for generating (near) video-realistic animations of real humans under user control. In contrast to conventional human character rendering, we do not require the availability of a production-quality photo-realistic 3D model of the human, but instead rely on a video sequence in conjunction with a (medium-quality) controllable 3D template model of the person. With that, our approach significantly reduces production cost compared to conventional rendering approaches based on production-quality 3D models, and can also be used to realistically edit existing videos. Technically, this is achieved by training a neural network that translates simple synthetic images of a human character into realistic imagery. For training our networks, we first track the 3D motion of the person in the video using the template model, and subsequently generate a synthetically rendered version of the video. These images are then used to train a conditional generative adversarial network that translates synthetic images of the 3D model into realistic imagery of the human. We evaluate our method for the reenactment of another person that is tracked in order to obtain the motion data, and show video results generated from artist-designed skeleton motion. Our results outperform the state-of-the-art in learning-based human image synthesis. Project page: http://gvv.mpi-inf.mpg.de/projects/wxu/HumanReenactment/}, }
Endnote
%0 Report %A Liu, Lingjie %A Xu, Weipeng %A Zollhöfer, Michael %A Kim, Hyeongwoo %A Bernard, Florian %A Habermann, Marc %A Wang, Wenping %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Neural Animation and Reenactment of Human Actor Videos : %G eng %U http://hdl.handle.net/21.11116/0000-0002-5E06-F %U http://arxiv.org/abs/1809.03658 %D 2018 %X We propose a method for generating (near) video-realistic animations of real humans under user control. In contrast to conventional human character rendering, we do not require the availability of a production-quality photo-realistic 3D model of the human, but instead rely on a video sequence in conjunction with a (medium-quality) controllable 3D template model of the person. With that, our approach significantly reduces production cost compared to conventional rendering approaches based on production-quality 3D models, and can also be used to realistically edit existing videos. Technically, this is achieved by training a neural network that translates simple synthetic images of a human character into realistic imagery. For training our networks, we first track the 3D motion of the person in the video using the template model, and subsequently generate a synthetically rendered version of the video. These images are then used to train a conditional generative adversarial network that translates synthetic images of the 3D model into realistic imagery of the human. We evaluate our method for the reenactment of another person that is tracked in order to obtain the motion data, and show video results generated from artist-designed skeleton motion. Our results outperform the state-of-the-art in learning-based human image synthesis. Project page: http://gvv.mpi-inf.mpg.de/projects/wxu/HumanReenactment/ %K Computer Science, Computer Vision and Pattern Recognition, cs.CV %U http://gvv.mpi-inf.mpg.de/projects/wxu/HumanReenactment/
Mehta, D., Kim, K.I., and Theobalt, C. 2018a. On Implicit Filter Level Sparsity in Convolutional Neural Networks. http://arxiv.org/abs/1811.12495.
(arXiv: 1811.12495)
Abstract
We investigate filter level sparsity that emerges in convolutional neural networks (CNNs) which employ Batch Normalization and ReLU activation, and are trained with adaptive gradient descent techniques and L2 regularization (or weight decay). We conduct an extensive experimental study casting these initial findings into hypotheses and conclusions about the mechanisms underlying the emergent filter level sparsity. This study allows new insight into the performance gap obeserved between adapative and non-adaptive gradient descent methods in practice. Further, analysis of the effect of training strategies and hyperparameters on the sparsity leads to practical suggestions in designing CNN training strategies enabling us to explore the tradeoffs between feature selectivity, network capacity, and generalization performance. Lastly, we show that the implicit sparsity can be harnessed for neural network speedup at par or better than explicit sparsification / pruning approaches, without needing any modifications to the typical training pipeline.
Export
BibTeX
@online{Mehta_arXIv1811.12495, TITLE = {On Implicit Filter Level Sparsity in Convolutional Neural Networks}, AUTHOR = {Mehta, Dushyant and Kim, Kwang In and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1811.12495}, EPRINT = {1811.12495}, EPRINTTYPE = {arXiv}, YEAR = {2018}, ABSTRACT = {We investigate filter level sparsity that emerges in convolutional neural networks (CNNs) which employ Batch Normalization and ReLU activation, and are trained with adaptive gradient descent techniques and L2 regularization (or weight decay). We conduct an extensive experimental study casting these initial findings into hypotheses and conclusions about the mechanisms underlying the emergent filter level sparsity. This study allows new insight into the performance gap obeserved between adapative and non-adaptive gradient descent methods in practice. Further, analysis of the effect of training strategies and hyperparameters on the sparsity leads to practical suggestions in designing CNN training strategies enabling us to explore the tradeoffs between feature selectivity, network capacity, and generalization performance. Lastly, we show that the implicit sparsity can be harnessed for neural network speedup at par or better than explicit sparsification / pruning approaches, without needing any modifications to the typical training pipeline.}, }
Endnote
%0 Report %A Mehta, Dushyant %A Kim, Kwang In %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T On Implicit Filter Level Sparsity in Convolutional Neural Networks : %G eng %U http://hdl.handle.net/21.11116/0000-0002-E920-3 %U http://arxiv.org/abs/1811.12495 %D 2018 %X We investigate filter level sparsity that emerges in convolutional neural networks (CNNs) which employ Batch Normalization and ReLU activation, and are trained with adaptive gradient descent techniques and L2 regularization (or weight decay). We conduct an extensive experimental study casting these initial findings into hypotheses and conclusions about the mechanisms underlying the emergent filter level sparsity. This study allows new insight into the performance gap obeserved between adapative and non-adaptive gradient descent methods in practice. Further, analysis of the effect of training strategies and hyperparameters on the sparsity leads to practical suggestions in designing CNN training strategies enabling us to explore the tradeoffs between feature selectivity, network capacity, and generalization performance. Lastly, we show that the implicit sparsity can be harnessed for neural network speedup at par or better than explicit sparsification / pruning approaches, without needing any modifications to the typical training pipeline. %K Computer Science, Learning, cs.LG,Computer Science, Computer Vision and Pattern Recognition, cs.CV,eess.SP,Statistics, Machine Learning, stat.ML
Mehta, D., Sotnychenko, O., Mueller, F., et al. 2018b. Single-Shot Multi-person 3D Pose Estimation from Monocular RGB. 3DV 2018 , International Conference on 3D Vision, IEEE.
Export
BibTeX
@inproceedings{Mehta_3DV2018, TITLE = {Single-Shot Multi-person {3D} Pose Estimation from Monocular {RGB}}, AUTHOR = {Mehta, Dushyant and Sotnychenko, Oleksandr and Mueller, Franziska and Xu, Weipeng and Sridhar, Srinath and Pons-Moll, Gerard and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-5386-8425-2 ; 978-1-5386-8426-9}, DOI = {10.1109/3DV.2018.00024}, PUBLISHER = {IEEE}, YEAR = {2018}, DATE = {2018}, BOOKTITLE = {3DV 2018 , International Conference on 3D Vision}, PAGES = {120--130}, ADDRESS = {Verona, Italy}, }
Endnote
%0 Conference Proceedings %A Mehta, Dushyant %A Sotnychenko, Oleksandr %A Mueller, Franziska %A Xu, Weipeng %A Sridhar, Srinath %A Pons-Moll, Gerard %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Single-Shot Multi-person 3D Pose Estimation from Monocular RGB : %G eng %U http://hdl.handle.net/21.11116/0000-0002-5C46-9 %R 10.1109/3DV.2018.00024 %D 2018 %B International Conference on 3D Vision %Z date of event: 2018-09-05 - 2018-09-08 %C Verona, Italy %B 3DV 2018 %P 120 - 130 %I IEEE %@ 978-1-5386-8425-2 978-1-5386-8426-9
Mehta, D., Sotnychenko, O., Mueller, F., et al. 2018c. Demo of XNect: Real-time Multi-person 3D Human Pose Estimation with a Single RGB Camera. ECCV 2018 Demo Sessions.
Export
BibTeX
@inproceedings{XNectDemo_ECCV2018, TITLE = {Demo of {XNect}: Real-time Multi-person {3D} Human Pose Estimation with a Single {RGB} Camera}, AUTHOR = {Mehta, Dushyant and Sotnychenko, Oleksandr and Mueller, Franziska and Rhodin, Helge and Xu, Weipeng and Pons-Moll, Gerard and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://gvv.mpi-inf.mpg.de/projects/XNectDemo/}, YEAR = {2018}, BOOKTITLE = {ECCV 2018 Demo Sessions}, ADDRESS = {Munich, Germany}, }
Endnote
%0 Conference Proceedings %A Mehta, Dushyant %A Sotnychenko, Oleksandr %A Mueller, Franziska %A Rhodin, Helge %A Xu, Weipeng %A Pons-Moll, Gerard %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Demo of XNect: Real-time Multi-person 3D Human Pose Estimation with a Single RGB Camera : %G eng %U http://hdl.handle.net/21.11116/0000-0002-F4DC-3 %U http://gvv.mpi-inf.mpg.de/projects/XNectDemo/ %D 2018 %B European Conference on Computer Vision %Z date of event: 2018-09-08 - 2018-09-14 %C Munich, Germany %B ECCV 2018 Demo Sessions %U http://gvv.mpi-inf.mpg.de/projects/XNectDemo/
Meka, A., Maximov, M., Zollhöfer, M., et al. 2018a. LIME: Live Intrinsic Material Estimation. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2018), IEEE.
Export
BibTeX
@inproceedings{Meka:2018, TITLE = {{LIME}: {L}ive Intrinsic Material Estimation}, AUTHOR = {Meka, Abhimitra and Maximov, Maxim and Zollh{\"o}fer, Michael and Chatterjee, Avishek and Seidel, Hans-Peter and Richardt, Christian and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-5386-6420-9}, DOI = {10.1109/CVPR.2018.00661}, PUBLISHER = {IEEE}, YEAR = {2018}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2018)}, PAGES = {6315--6324}, ADDRESS = {Salt Lake City, UT, USA}, }
Endnote
%0 Conference Proceedings %A Meka, Abhimitra %A Maximov, Maxim %A Zollhöfer, Michael %A Chatterjee, Avishek %A Seidel, Hans-Peter %A Richardt, Christian %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T LIME: Live Intrinsic Material Estimation : %G eng %U http://hdl.handle.net/21.11116/0000-0002-F391-7 %R 10.1109/CVPR.2018.00661 %D 2018 %B 31st IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2018-06-18 - 2018-06-22 %C Salt Lake City, UT, USA %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 6315 - 6324 %I IEEE %@ 978-1-5386-6420-9 %U http://gvv.mpi-inf.mpg.de/projects/LIME/
Meka, A., Maximov, M., Zollhöfer, M., et al. 2018b. LIME: Live Intrinsic Material Estimation. http://arxiv.org/abs/1801.01075.
(arXiv: 1801.01075)
Abstract
We present the first end to end approach for real time material estimation for general object shapes with uniform material that only requires a single color image as input. In addition to Lambertian surface properties, our approach fully automatically computes the specular albedo, material shininess, and a foreground segmentation. We tackle this challenging and ill posed inverse rendering problem using recent advances in image to image translation techniques based on deep convolutional encoder decoder architectures. The underlying core representations of our approach are specular shading, diffuse shading and mirror images, which allow to learn the effective and accurate separation of diffuse and specular albedo. In addition, we propose a novel highly efficient perceptual rendering loss that mimics real world image formation and obtains intermediate results even during run time. The estimation of material parameters at real time frame rates enables exciting mixed reality applications, such as seamless illumination consistent integration of virtual objects into real world scenes, and virtual material cloning. We demonstrate our approach in a live setup, compare it to the state of the art, and demonstrate its effectiveness through quantitative and qualitative evaluation.
Export
BibTeX
@online{Meka_arXiv1801.01075, TITLE = {LIME: {L}ive Intrinsic Material Estimation}, AUTHOR = {Meka, Abhimitra and Maximov, Maxim and Zollh{\"o}fer, Michael and Chatterjee, Avishek and Seidel, Hans-Peter and Richardt, Christian and Theobalt, Christian}, URL = {http://arxiv.org/abs/1801.01075}, EPRINT = {1801.01075}, EPRINTTYPE = {arXiv}, YEAR = {2018}, ABSTRACT = {We present the first end to end approach for real time material estimation for general object shapes with uniform material that only requires a single color image as input. In addition to Lambertian surface properties, our approach fully automatically computes the specular albedo, material shininess, and a foreground segmentation. We tackle this challenging and ill posed inverse rendering problem using recent advances in image to image translation techniques based on deep convolutional encoder decoder architectures. The underlying core representations of our approach are specular shading, diffuse shading and mirror images, which allow to learn the effective and accurate separation of diffuse and specular albedo. In addition, we propose a novel highly efficient perceptual rendering loss that mimics real world image formation and obtains intermediate results even during run time. The estimation of material parameters at real time frame rates enables exciting mixed reality applications, such as seamless illumination consistent integration of virtual objects into real world scenes, and virtual material cloning. We demonstrate our approach in a live setup, compare it to the state of the art, and demonstrate its effectiveness through quantitative and qualitative evaluation.}, }
Endnote
%0 Report %A Meka, Abhimitra %A Maximov, Maxim %A Zollhöfer, Michael %A Chatterjee, Avishek %A Seidel, Hans-Peter %A Richardt, Christian %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society D2 External Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T LIME: Live Intrinsic Material Estimation : %U http://hdl.handle.net/21.11116/0000-0001-40D9-2 %U http://arxiv.org/abs/1801.01075 %D 2018 %X We present the first end to end approach for real time material estimation for general object shapes with uniform material that only requires a single color image as input. In addition to Lambertian surface properties, our approach fully automatically computes the specular albedo, material shininess, and a foreground segmentation. We tackle this challenging and ill posed inverse rendering problem using recent advances in image to image translation techniques based on deep convolutional encoder decoder architectures. The underlying core representations of our approach are specular shading, diffuse shading and mirror images, which allow to learn the effective and accurate separation of diffuse and specular albedo. In addition, we propose a novel highly efficient perceptual rendering loss that mimics real world image formation and obtains intermediate results even during run time. The estimation of material parameters at real time frame rates enables exciting mixed reality applications, such as seamless illumination consistent integration of virtual objects into real world scenes, and virtual material cloning. We demonstrate our approach in a live setup, compare it to the state of the art, and demonstrate its effectiveness through quantitative and qualitative evaluation. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Mlakar, D., Winter, M., Seidel, H.-P., Steinberger, M., and Zayer, R. 2018. AlSub: Fully Parallel and Modular Subdivision. http://arxiv.org/abs/1809.06047.
(arXiv: 1809.06047)
Abstract
In recent years, mesh subdivision---the process of forging smooth free-form surfaces from coarse polygonal meshes---has become an indispensable production instrument. Although subdivision performance is crucial during simulation, animation and rendering, state-of-the-art approaches still rely on serial implementations for complex parts of the subdivision process. Therefore, they often fail to harness the power of modern parallel devices, like the graphics processing unit (GPU), for large parts of the algorithm and must resort to time-consuming serial preprocessing. In this paper, we show that a complete parallelization of the subdivision process for modern architectures is possible. Building on sparse matrix linear algebra, we show how to structure the complete subdivision process into a sequence of algebra operations. By restructuring and grouping these operations, we adapt the process for different use cases, such as regular subdivision of dynamic meshes, uniform subdivision for immutable topology, and feature-adaptive subdivision for efficient rendering of animated models. As the same machinery is used for all use cases, identical subdivision results are achieved in all parts of the production pipeline. As a second contribution, we show how these linear algebra formulations can effectively be translated into efficient GPU kernels. Applying our strategies to $\sqrt{3}$, Loop and Catmull-Clark subdivision shows significant speedups of our approach compared to state-of-the-art solutions, while we completely avoid serial preprocessing.
Export
BibTeX
@online{Mlakar_arXiv1809.06047, TITLE = {{AlSub}: {Fully Parallel and Modular Subdivision}}, AUTHOR = {Mlakar, Daniel and Winter, Martin and Seidel, Hans-Peter and Steinberger, Markus and Zayer, Rhaleb}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1809.06047}, EPRINT = {1809.06047}, EPRINTTYPE = {arXiv}, YEAR = {2018}, ABSTRACT = {In recent years, mesh subdivision---the process of forging smooth free-form surfaces from coarse polygonal meshes---has become an indispensable production instrument. Although subdivision performance is crucial during simulation, animation and rendering, state-of-the-art approaches still rely on serial implementations for complex parts of the subdivision process. Therefore, they often fail to harness the power of modern parallel devices, like the graphics processing unit (GPU), for large parts of the algorithm and must resort to time-consuming serial preprocessing. In this paper, we show that a complete parallelization of the subdivision process for modern architectures is possible. Building on sparse matrix linear algebra, we show how to structure the complete subdivision process into a sequence of algebra operations. By restructuring and grouping these operations, we adapt the process for different use cases, such as regular subdivision of dynamic meshes, uniform subdivision for immutable topology, and feature-adaptive subdivision for efficient rendering of animated models. As the same machinery is used for all use cases, identical subdivision results are achieved in all parts of the production pipeline. As a second contribution, we show how these linear algebra formulations can effectively be translated into efficient GPU kernels. Applying our strategies to $\sqrt{3}$, Loop and Catmull-Clark subdivision shows significant speedups of our approach compared to state-of-the-art solutions, while we completely avoid serial preprocessing.}, }
Endnote
%0 Report %A Mlakar, Daniel %A Winter, Martin %A Seidel, Hans-Peter %A Steinberger, Markus %A Zayer, Rhaleb %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T AlSub: Fully Parallel and Modular Subdivision : %G eng %U http://hdl.handle.net/21.11116/0000-0002-E5E2-C %U http://arxiv.org/abs/1809.06047 %D 2018 %X In recent years, mesh subdivision---the process of forging smooth free-form surfaces from coarse polygonal meshes---has become an indispensable production instrument. Although subdivision performance is crucial during simulation, animation and rendering, state-of-the-art approaches still rely on serial implementations for complex parts of the subdivision process. Therefore, they often fail to harness the power of modern parallel devices, like the graphics processing unit (GPU), for large parts of the algorithm and must resort to time-consuming serial preprocessing. In this paper, we show that a complete parallelization of the subdivision process for modern architectures is possible. Building on sparse matrix linear algebra, we show how to structure the complete subdivision process into a sequence of algebra operations. By restructuring and grouping these operations, we adapt the process for different use cases, such as regular subdivision of dynamic meshes, uniform subdivision for immutable topology, and feature-adaptive subdivision for efficient rendering of animated models. As the same machinery is used for all use cases, identical subdivision results are achieved in all parts of the production pipeline. As a second contribution, we show how these linear algebra formulations can effectively be translated into efficient GPU kernels. Applying our strategies to $\sqrt{3}$, Loop and Catmull-Clark subdivision shows significant speedups of our approach compared to state-of-the-art solutions, while we completely avoid serial preprocessing. %K Computer Science, Graphics, cs.GR
Mueller, F., Bernard, F., Sotnychenko, O., et al. 2018. GANerated Hands for Real-Time 3D Hand Tracking from Monocular RGB. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2018), IEEE.
Export
BibTeX
@inproceedings{Mueller_CVPR2018, TITLE = {{GANerated} Hands for Real-Time {3D} Hand Tracking from Monocular {RGB}}, AUTHOR = {Mueller, Franziska and Bernard, Florian and Sotnychenko, Oleksandr and Mehta, Dushyant and Sridhar, Srinath and Casas, Dan and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-5386-6420-9}, DOI = {10.1109/CVPR.2018.00013}, PUBLISHER = {IEEE}, YEAR = {2018}, DATE = {2018}, BOOKTITLE = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2018)}, PAGES = {49--59}, ADDRESS = {Salt Lake City, UT, USA}, }
Endnote
%0 Conference Proceedings %A Mueller, Franziska %A Bernard, Florian %A Sotnychenko, Oleksandr %A Mehta, Dushyant %A Sridhar, Srinath %A Casas, Dan %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T GANerated Hands for Real-Time 3D Hand Tracking from Monocular RGB : %G eng %U http://hdl.handle.net/21.11116/0000-0002-EFFA-8 %R 10.1109/CVPR.2018.00013 %D 2018 %B 31st IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2018-06-18 - 2018-06-22 %C Salt Lake City, UT, USA %B IEEE/CVF Conference on Computer Vision and Pattern Recognition %P 49 - 59 %I IEEE %@ 978-1-5386-6420-9
Myszkowski, K., Tursun, O.T., Kellnhofer, P., et al. 2018. Perceptual Display: Apparent Enhancement of Scene Detail and Depth. Electronic Imaging (Proc. HVEI 2018), SPIE/IS&T.
(Keynote Talk)
Abstract
Predicting human visual perception of image differences has several applications such as compression, rendering, editing and retargeting. Current approaches however, ignore the fact that the human visual system compensates for geometric transformations, e.g. we see that an image and a rotated copy are identical. Instead, they will report a large, false-positive difference. At the same time, if the transformations become too strong or too spatially incoherent, comparing two images indeed gets increasingly difficult. Between these two extremes, we propose a system to quantify the effect of transformations, not only on the perception of image differences, but also on saliency and motion parallax. To this end, we first fit local homographies to a given optical flow field and then convert this field into a field of elementary transformations such as translation, rotation, scaling, and perspective. We conduct a perceptual experiment quantifying the increase of difficulty when compensating for elementary transformations. Transformation entropy is proposed as a novel measure of complexity in a flow field. This representation is then used for applications, such as comparison of non-aligned images, where transformations cause threshold elevation, and detection of salient transformations.
Export
BibTeX
@inproceedings{Myszkowski2018Perceptual, TITLE = {Perceptual Display: Apparent Enhancement of Scene Detail and Depth}, AUTHOR = {Myszkowski, Karol and Tursun, Okan Tarhan and Kellnhofer, Petr and Templin, Krzysztof and Arabadzhiyska, Elena and Didyk, Piotr and Seidel, Hans-Peter}, LANGUAGE = {eng}, ISSN = {2470-1173}, DOI = {10.2352/ISSN.2470-1173.2018.14.HVEI-501}, PUBLISHER = {SPIE/IS\&T}, YEAR = {2018}, ABSTRACT = {Predicting human visual perception of image differences has several applications such as compression, rendering, editing and retargeting. Current approaches however, ignore the fact that the human visual system compensates for geometric transformations, e.g. we see that an image and a rotated copy are identical. Instead, they will report a large, false-positive difference. At the same time, if the transformations become too strong or too spatially incoherent, comparing two images indeed gets increasingly difficult. Between these two extremes, we propose a system to quantify the effect of transformations, not only on the perception of image differences, but also on saliency and motion parallax. To this end, we first fit local homographies to a given optical flow field and then convert this field into a field of elementary transformations such as translation, rotation, scaling, and perspective. We conduct a perceptual experiment quantifying the increase of difficulty when compensating for elementary transformations. Transformation entropy is proposed as a novel measure of complexity in a flow field. This representation is then used for applications, such as comparison of non-aligned images, where transformations cause threshold elevation, and detection of salient transformations.}, BOOKTITLE = {Human Vision and Electronic Imaging (HVEI 2018)}, PAGES = {1--10}, EID = {501}, JOURNAL = {Electronic Imaging (Proc. HVEI)}, VOLUME = {2018}, ADDRESS = {San Francisco, CA, USA}, }
Endnote
%0 Conference Proceedings %A Myszkowski, Karol %A Tursun, Okan Tarhan %A Kellnhofer, Petr %A Templin, Krzysztof %A Arabadzhiyska, Elena %A Didyk, Piotr %A Seidel, Hans-Peter %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Perceptual Display: Apparent Enhancement of Scene Detail and Depth : %G eng %U http://hdl.handle.net/21.11116/0000-0001-5F64-5 %R 10.2352/ISSN.2470-1173.2018.14.HVEI-501 %D 2018 %B Human Vision and Electronic Imaging %Z date of event: 2018-01-28 - 2018-02-02 %C San Francisco, CA, USA %X Predicting human visual perception of image differences has several applications such as compression, rendering, editing and retargeting. Current approaches however, ignore the fact that the human visual system compensates for geometric transformations, e.g. we see that an image and a rotated copy are identical. Instead, they will report a large, false-positive difference. At the same time, if the transformations become too strong or too spatially incoherent, comparing two images indeed gets increasingly difficult. Between these two extremes, we propose a system to quantify the effect of transformations, not only on the perception of image differences, but also on saliency and motion parallax. To this end, we first fit local homographies to a given optical flow field and then convert this field into a field of elementary transformations such as translation, rotation, scaling, and perspective. We conduct a perceptual experiment quantifying the increase of difficulty when compensating for elementary transformations. Transformation entropy is proposed as a novel measure of complexity in a flow field. This representation is then used for applications, such as comparison of non-aligned images, where transformations cause threshold elevation, and detection of salient transformations. %B Human Vision and Electronic Imaging %P 1 - 10 %Z sequence number: 501 %I SPIE/IS&T %J Electronic Imaging %V 2018 %@ false
Öztireli, A.C. and Singh, G. 2018. Sampling Analysis Using Correlations for Monte Carlo Rendering. SIGGRAPH Asia 2018 Courses (ACM SIGGRAPH Asia 2018), ACM.
Export
BibTeX
@inproceedings{Oeztireli_SIGGRAPHASIA18, TITLE = {Sampling Analysis Using Correlations for {M}onte {C}arlo Rendering}, AUTHOR = {{\"O}ztireli, A. Cengiz and Singh, Gurprit}, LANGUAGE = {eng}, DOI = {10.1145/3277644.3277783}, PUBLISHER = {ACM}, YEAR = {2018}, DATE = {2018}, BOOKTITLE = {SIGGRAPH Asia 2018 Courses (ACM SIGGRAPH Asia 2018)}, PAGES = {1--48}, EID = {16}, ADDRESS = {Tokyo, Japan}, }
Endnote
%0 Conference Proceedings %A Öztireli, A. Cengiz %A Singh, Gurprit %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Sampling Analysis Using Correlations for Monte Carlo Rendering : %G eng %U http://hdl.handle.net/21.11116/0000-0002-9F4D-6 %R 10.1145/3277644.3277783 %D 2018 %B 11th ACM SIGGRAPH Conference and Exhibition on Computer Graphics and Interactive Techniques in Asia %Z date of event: 2018-12-04 - 2018-12-07 %C Tokyo, Japan %B SIGGRAPH Asia 2018 Courses %P 1 - 48 %Z sequence number: 16 %I ACM
Piovarči, M., Levin, D.I.W., Kaufman, D.M., and Didyk, P. 2018a. Perception-Aware Modeling and Fabrication of Digital Drawing Tools. ACM Transactions on Graphics (Proc. ACM SIGGRAPH 2018)37, 4.
Export
BibTeX
@article{Piovarci_SIGGRAPH2018, TITLE = {Perception-Aware Modeling and Fabrication of Digital Drawing Tools}, AUTHOR = {Piovar{\v c}i, Michal and Levin, David I. W. and Kaufman, Danny M. and Didyk, Piotr}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3197517.3201322}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2018}, DATE = {2018}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH)}, VOLUME = {37}, NUMBER = {4}, PAGES = {1--15}, EID = {123}, BOOKTITLE = {Proceedings of ACM SIGGRAPH 2018}, }
Endnote
%0 Journal Article %A Piovarči, Michal %A Levin, David I. W. %A Kaufman, Danny M. %A Didyk, Piotr %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Perception-Aware Modeling and Fabrication of Digital Drawing Tools : %G eng %U http://hdl.handle.net/21.11116/0000-0002-5D7D-B %R 10.1145/3197517.3201322 %7 2018 %D 2018 %J ACM Transactions on Graphics %V 37 %N 4 %& 1 %P 1 - 15 %Z sequence number: 123 %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH 2018 %O ACM SIGGRAPH 2018 Vancouver, Canada , 12 - 16 August
Piovarči, M., Wessely, M., Jagielski, M., Alexa, M., Matusik, W., and Didyk, P. 2018b. Design and Analysis of Directional Front Projection Screens. Computers and Graphics74.
Export
BibTeX
@article{Piovarci_2018, TITLE = {Design and Analysis of Directional Front Projection Screens}, AUTHOR = {Piovar{\v c}i, Michal and Wessely, Michael and Jagielski, Michal and Alexa, Marc and Matusik, Wojciech and Didyk, Piotr}, LANGUAGE = {eng}, ISSN = {0097-8493}, DOI = {10.1016/j.cag.2018.05.010}, PUBLISHER = {Pergamon}, ADDRESS = {New York}, YEAR = {2018}, DATE = {2018}, JOURNAL = {Computers and Graphics}, VOLUME = {74}, PAGES = {213--224}, }
Endnote
%0 Journal Article %A Piovarči, Michal %A Wessely, Michael %A Jagielski, Michal %A Alexa, Marc %A Matusik, Wojciech %A Didyk, Piotr %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Design and Analysis of Directional Front Projection Screens : %G eng %U http://hdl.handle.net/21.11116/0000-0002-16D9-1 %R 10.1016/j.cag.2018.05.010 %7 2018 %D 2018 %J Computers and Graphics %V 74 %& 213 %P 213 - 224 %I Pergamon %C New York %@ false
Robertini, N., Bernard, F., Xu, W., and Theobalt, C. 2018. Illumination-Invariant Robust Multiview 3D Human Motion Capture. 2018 IEEE Winter Conference on Applications of Computer Vision (WACV 2018), IEEE.
Export
BibTeX
@inproceedings{Robertini_WACV2018, TITLE = {Illumination-Invariant Robust Multiview {3D} Human Motion Capture}, AUTHOR = {Robertini, Nadia and Bernard, Florian and Xu, Weipeng and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-1-5386-4886-5}, DOI = {10.1109/WACV.2018.00185}, PUBLISHER = {IEEE}, YEAR = {2018}, DATE = {2018}, BOOKTITLE = {2018 IEEE Winter Conference on Applications of Computer Vision (WACV 2018)}, PAGES = {1661--1670}, ADDRESS = {Lake Tahoe, NV, USA}, }
Endnote
%0 Conference Proceedings %A Robertini, Nadia %A Bernard, Florian %A Xu, Weipeng %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Illumination-Invariant Robust Multiview 3D Human Motion Capture : %G eng %U http://hdl.handle.net/21.11116/0000-0001-A474-3 %R 10.1109/WACV.2018.00185 %D 2018 %B IEEE Winter Conference on Applications of Computer Vision %Z date of event: 2018-03-12 - 2018-03-15 %C Lake Tahoe, NV, USA %B 2018 IEEE Winter Conference on Applications of Computer Vision %P 1661 - 1670 %I IEEE %@ 978-1-5386-4886-5
Sarkar, K., Bernard, F., Varanasi, K., Theobalt, C., and Stricker, D. 2018. Structured Low-Rank Matrix Factorization for Point-Cloud Denoising. 3DV 2018 , International Conference on 3D Vision, IEEE.
Export
BibTeX
@inproceedings{Sarkar_3DV2018, TITLE = {Structured Low-Rank Matrix Factorization for Point-Cloud Denoising}, AUTHOR = {Sarkar, Kripasindhu and Bernard, Florian and Varanasi, Kiran and Theobalt, Christian and Stricker, Didier}, LANGUAGE = {eng}, ISBN = {978-1-5386-8425-2 ; 978-1-5386-8426-9}, DOI = {10.1109/3DV.2018.00058}, PUBLISHER = {IEEE}, YEAR = {2018}, DATE = {2018}, BOOKTITLE = {3DV 2018 , International Conference on 3D Vision}, PAGES = {444--453}, ADDRESS = {Verona, Italy}, }
Endnote
%0 Conference Proceedings %A Sarkar, Kripasindhu %A Bernard, Florian %A Varanasi, Kiran %A Theobalt, Christian %A Stricker, Didier %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Structured Low-Rank Matrix Factorization for Point-Cloud Denoising : %G eng %U http://hdl.handle.net/21.11116/0000-0002-D62C-C %R 10.1109/3DV.2018.00058 %D 2018 %B International Conference on 3D Vision %Z date of event: 2018-09-05 - 2018-09-08 %C Verona, Italy %B 3DV 2018 %P 444 - 453 %I IEEE %@ 978-1-5386-8425-2 978-1-5386-8426-9
Serrano, A., Gutierrez, D., Myszkowski, K., Seidel, H.-P., and Masia, B. 2018. An Intuitive Control Space for Material Appearance. http://arxiv.org/abs/1806.04950.
(arXiv: 1806.04950)
Abstract
Many different techniques for measuring material appearance have been proposed in the last few years. These have produced large public datasets, which have been used for accurate, data-driven appearance modeling. However, although these datasets have allowed us to reach an unprecedented level of realism in visual appearance, editing the captured data remains a challenge. In this paper, we present an intuitive control space for predictable editing of captured BRDF data, which allows for artistic creation of plausible novel material appearances, bypassing the difficulty of acquiring novel samples. We first synthesize novel ma