Current Year

Article
Bernard, F., Thunberg, J., Goncalves, J., and Theobalt, C. 2019. Synchronisation of Partial Multi-Matchings via Non-negative Factorisations. Pattern Recognition92.
Export
BibTeX
@article{Bernard2019, TITLE = {Synchronisation of Partial Multi-Matchings via Non-negative Factorisations}, AUTHOR = {Bernard, Florian and Thunberg, Johan and Goncalves, Jorge and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0031-3203}, DOI = {10.1016/j.patcog.2019.03.021}, PUBLISHER = {Pergamon}, ADDRESS = {Oxford}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {Pattern Recognition}, VOLUME = {92}, PAGES = {146--155}, }
Endnote
%0 Journal Article %A Bernard, Florian %A Thunberg, Johan %A Goncalves, Jorge %A Theobalt, Christian %+ External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Synchronisation of Partial Multi-Matchings via Non-negative Factorisations : %G eng %U http://hdl.handle.net/21.11116/0000-0003-B2EC-A %R 10.1016/j.patcog.2019.03.021 %7 2019 %D 2019 %J Pattern Recognition %O Pattern Recognit. %V 92 %& 146 %P 146 - 155 %I Pergamon %C Oxford %@ false
Dokter, M., Hladký, J., Parger, M., Schmalstieg, D., Seidel, H.-P., and Steinberger, M. 2019. Hierarchical Rasterization of Curved Primitives for Vector Graphics Rendering on the GPU. Computer Graphics Forum (Proc. EUROGRAPHICS 2019)38, 2.
Export
BibTeX
@article{Dokter_EG2019, TITLE = {Hierarchical Rasterization of Curved Primitives for Vector Graphics Rendering on the {GPU}}, AUTHOR = {Dokter, Mark and Hladk{\'y}, Jozef and Parger, Mathias and Schmalstieg, Dieter and Seidel, Hans-Peter and Steinberger, Markus}, LANGUAGE = {eng}, ISSN = {0167-7055}, DOI = {10.1111/cgf.13622}, PUBLISHER = {Wiley-Blackwell}, ADDRESS = {Oxford}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {Computer Graphics Forum (Proc. EUROGRAPHICS)}, VOLUME = {38}, NUMBER = {2}, PAGES = {93--103}, BOOKTITLE = {EUROGRAPHICS 2019 STAR -- State of The Art Reports}, }
Endnote
%0 Journal Article %A Dokter, Mark %A Hladký, Jozef %A Parger, Mathias %A Schmalstieg, Dieter %A Seidel, Hans-Peter %A Steinberger, Markus %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Hierarchical Rasterization of Curved Primitives for Vector Graphics Rendering on the GPU : %G eng %U http://hdl.handle.net/21.11116/0000-0002-FC80-1 %R 10.1111/cgf.13622 %7 2019 %D 2019 %J Computer Graphics Forum %V 38 %N 2 %& 93 %P 93 - 103 %I Wiley-Blackwell %C Oxford %@ false %B EUROGRAPHICS 2019 STAR – State of The Art Reports %O EUROGRAPHICS 2019 The 40th Annual Conference of the European Association for Computer Graphics ; Genova, Italy, May 6-10, 2019 EG 2019
Habermann, M., Xu, W., Zollhöfer, M., Pons-Moll, G., and Theobalt, C. 2019a. LiveCap: Real-time Human Performance Capture from Monocular Video. ACM Transactions on Graphics38, 2.
Export
BibTeX
@article{Habermann_TOG19, TITLE = {{LiveCap}: {R}eal-time Human Performance Capture from Monocular Video}, AUTHOR = {Habermann, Marc and Xu, Weipeng and Zollh{\"o}fer, Michael and Pons-Moll, Gerard and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {0730-0301}, DOI = {10.1145/3311970}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {ACM Transactions on Graphics}, VOLUME = {38}, NUMBER = {2}, EID = {14}, }
Endnote
%0 Journal Article %A Habermann, Marc %A Xu, Weipeng %A Zollhöfer, Michael %A Pons-Moll, Gerard %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T LiveCap: Real-time Human Performance Capture from Monocular Video : %G eng %U http://hdl.handle.net/21.11116/0000-0002-B947-E %R 10.1145/3311970 %7 2019 %D 2019 %J ACM Transactions on Graphics %V 38 %N 2 %Z sequence number: 14 %I ACM %C New York, NY %@ false
Hladký, J., Seidel, H.-P., and Steinberger, M. 2019. Tessellated Shading Streaming. Computer Graphics Forum (Proc. Eurographics Symposium on Rendering 2019)38, 4.
Export
BibTeX
@article{Hladky_EGSR2019, TITLE = {Tessellated Shading Streaming}, AUTHOR = {Hladk{\'y}, Jozef and Seidel, Hans-Peter and Steinberger, Markus}, LANGUAGE = {eng}, ISSN = {0167-7055}, URL = {https://diglib.eg.org/handle/10.1111/cgf13780}, DOI = {10.1111/cgf.13780}, PUBLISHER = {Wiley-Blackwell}, ADDRESS = {Oxford}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {Computer Graphics Forum (Proc. Eurographics Symposium on Rendering)}, VOLUME = {38}, NUMBER = {4}, PAGES = {171--182}, BOOKTITLE = {Eurographics Symposium on Rendering 2019}, EDITOR = {Boubekeur, Tamy and Sen, Pradeep}, }
Endnote
%0 Journal Article %A Hladký, Jozef %A Seidel, Hans-Peter %A Steinberger, Markus %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Tessellated Shading Streaming : %G eng %U http://hdl.handle.net/21.11116/0000-0004-4897-1 %R 10.1111/cgf.13780 %U https://diglib.eg.org/handle/10.1111/cgf13780 %7 2019 %D 2019 %J Computer Graphics Forum %V 38 %N 4 %& 171 %P 171 - 182 %I Wiley-Blackwell %C Oxford %@ false %B Eurographics Symposium on Rendering 2019 %O Eurographics Symposium on Rendering 2019 EGSR 2019 Strasbourg, France, July 10 - 12, 2109
Singh, G., Öztireli, C., Ahmed, A.G.M., et al. 2019. Analysis of Sample Correlations for Monte Carlo Rendering. Computer Graphics Forum (Proc. EUROGRAPHICS 2019)38, 2.
Export
BibTeX
@article{Singh_EG2019STAR, TITLE = {Analysis of Sample Correlations for {Monte Carlo} Rendering}, AUTHOR = {Singh, Gurprit and O\"ztireli, Cengiz and Ahmed, Abdalla G.M. and Coeurjolly, David and Subr, Kartic and Ostromoukhov, Victor and Deussen, Oliver and Ramamoorthi, Ravi and Jarosz, Wojciech}, LANGUAGE = {eng}, ISSN = {0167-7055}, DOI = {10.1111/cgf.13653}, PUBLISHER = {Blackwell-Wiley}, ADDRESS = {Oxford}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {Computer Graphics Forum (Proc. EUROGRAPHICS)}, VOLUME = {38}, NUMBER = {2}, PAGES = {473--491}, BOOKTITLE = {EUROGRAPHICS 2019 STAR -- State of The Art Reports}, }
Endnote
%0 Journal Article %A Singh, Gurprit %A Öztireli, Cengiz %A Ahmed, Abdalla G.M. %A Coeurjolly, David %A Subr, Kartic %A Ostromoukhov, Victor %A Deussen, Oliver %A Ramamoorthi, Ravi %A Jarosz, Wojciech %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations %T Analysis of Sample Correlations for Monte Carlo Rendering : %G eng %U http://hdl.handle.net/21.11116/0000-0002-F487-2 %R 10.1111/cgf.13653 %7 2019 %D 2019 %J Computer Graphics Forum %O Computer Graphics Forum : journal of the European Association for Computer Graphics Comput. Graph. Forum %V 38 %N 2 %& 473 %P 473 - 491 %I Blackwell-Wiley %C Oxford %@ false %B EUROGRAPHICS 2019 STAR – State of The Art Reports %O EUROGRAPHICS 2019 EG 2019 The 40th Annual Conference of the European Association for Computer Graphics ; Genova, Italy, May 6-10
Sumin, D., Rittig, T., Babaei, V., et al. Geometry-Aware Scattering Compensation for 3D Printing. ACM Transactions on Graphics (Proc. ACM SIGGRAPH 2019).
(Accepted/in press)
Export
BibTeX
@article{SuminRittig2019, TITLE = {Geometry-Aware Scattering Compensation for {3D} Printing}, AUTHOR = {Sumin, Denis and Rittig, Tobias and Babaei, Vahid and Nindel, Thomas and Wilkie, Alexander and Didyk, Piotr and Bickel, Bernd and K{\v r}iv{\'a}nek, Jaroslav and Myszkowski, Karol and Weyrich, Tim}, LANGUAGE = {eng}, ISSN = {0730-0301}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2019}, PUBLREMARK = {Accepted}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH)}, BOOKTITLE = {Proceedings of ACM SIGGRAPH 2019}, }
Endnote
%0 Journal Article %A Sumin, Denis %A Rittig, Tobias %A Babaei, Vahid %A Nindel, Thomas %A Wilkie, Alexander %A Didyk, Piotr %A Bickel, Bernd %A Křivánek, Jaroslav %A Myszkowski, Karol %A Weyrich, Tim %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Geometry-Aware Scattering Compensation for 3D Printing : %G eng %U http://hdl.handle.net/21.11116/0000-0003-7D65-0 %D 2019 %J ACM Transactions on Graphics %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH 2019 %O ACM SIGGRAPH 2019 Los Angeles, CA, USA, 28 July - 1 August
Thies, J., Zollhöfer, M., Stamminger, M., Theobalt, C., and Nießner, M. 2019. Face2Face: Real-Time Face Capture and Reenactment of RGB Videos. Communications of the ACM62, 1.
Export
BibTeX
@article{thies2019face, TITLE = {{Face2Face}: {R}eal-Time Face Capture and Reenactment of {RGB} Videos}, AUTHOR = {Thies, Justus and Zollh{\"o}fer, Michael and Stamminger, Marc and Theobalt, Christian and Nie{\ss}ner, Matthias}, LANGUAGE = {eng}, ISSN = {0001-0782}, DOI = {10.1145/3292039}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {Communications of the ACM}, VOLUME = {62}, NUMBER = {1}, PAGES = {96--104}, }
Endnote
%0 Journal Article %A Thies, Justus %A Zollhöfer, Michael %A Stamminger, Marc %A Theobalt, Christian %A Nießner, Matthias %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Face2Face: Real-Time Face Capture and Reenactment of RGB Videos : %G eng %U http://hdl.handle.net/21.11116/0000-0002-C0A7-8 %R 10.1145/3292039 %7 2019 %D 2019 %J Communications of the ACM %V 62 %N 1 %& 96 %P 96 - 104 %I ACM %C New York, NY %@ false
Tursun, O.T., Arabadzhiyska, E., Wernikowski, M., et al. Luminance-Contrast-Aware Foveated Rendering. ACM Transactions on Graphics (Proc. ACM SIGGRAPH 2019).
(Accepted/in press)
Export
BibTeX
@article{Tursun2019Luminance, TITLE = {Luminance-Contrast-Aware Foveated Rendering}, AUTHOR = {Tursun, Okan Tarhan and Arabadzhiyska, Elena and Wernikowski, Marek and Mantiuk, Rados{\l}aw and Seidel, Hans-Peter and Myszkowski, Karol and Didyk, Piotr}, LANGUAGE = {eng}, ISSN = {0730-0301}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2019}, PUBLREMARK = {Accepted}, MARGINALMARK = {$\bullet$}, JOURNAL = {ACM Transactions on Graphics (Proc. ACM SIGGRAPH)}, BOOKTITLE = {Proceedings of ACM SIGGRAPH 2019}, }
Endnote
%0 Journal Article %A Tursun, Okan Tarhan %A Arabadzhiyska, Elena %A Wernikowski, Marek %A Mantiuk, Radosław %A Seidel, Hans-Peter %A Myszkowski, Karol %A Didyk, Piotr %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Luminance-Contrast-Aware Foveated Rendering : %G eng %U http://hdl.handle.net/21.11116/0000-0003-75D5-9 %D 2019 %J ACM Transactions on Graphics %I ACM %C New York, NY %@ false %B Proceedings of ACM SIGGRAPH 2019 %O ACM SIGGRAPH 2019 Los Angeles, CA, USA, 28 July - 1 August
Xu, W., Chatterjee, A., Zollhöfer, M., et al. 2019. Mo2Cap2: Real-time Mobile 3D Motion Capture with a Cap-mounted Fisheye Camera. IEEE Transactions on Visualization and Computer Graphics (Proc. IEEE VR 2019)25, 5.
Export
BibTeX
@article{Xu2019Mo2Cap2, TITLE = {{Mo2Cap2}: Real-time Mobile {3D} Motion Capture with a Cap-mounted Fisheye Camera}, AUTHOR = {Xu, Weipeng and Chatterjee, Avishek and Zollh{\"o}fer, Michael and Rhodin, Helge and Fua, Pascal and Seidel, Hans-Peter and Theobalt, Christian}, LANGUAGE = {eng}, ISSN = {1077-2626}, DOI = {10.1109/TVCG.2019.2898650}, PUBLISHER = {IEEE}, ADDRESS = {Piscataway, NJ}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {IEEE Transactions on Visualization and Computer Graphics (Proc. IEEE VR)}, VOLUME = {25}, NUMBER = {5}, PAGES = {2093--2101}, BOOKTITLE = {Selected Proceedings IEEE Virtual Reality 2019 (IEEE VR 2019)}, }
Endnote
%0 Journal Article %A Xu, Weipeng %A Chatterjee, Avishek %A Zollhöfer, Michael %A Rhodin, Helge %A Fua, Pascal %A Seidel, Hans-Peter %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Mo2Cap2: Real-time Mobile 3D Motion Capture with a Cap-mounted Fisheye Camera : %G eng %U http://hdl.handle.net/21.11116/0000-0002-F1DB-7 %R 10.1109/TVCG.2019.2898650 %7 2019 %D 2019 %J IEEE Transactions on Visualization and Computer Graphics %V 25 %N 5 %& 2093 %P 2093 - 2101 %I IEEE %C Piscataway, NJ %@ false %B Selected Proceedings IEEE Virtual Reality 2019 %O IEEE VR 2019 Osaka, Japan, March 23rd - 27th
Yu, H., Bemana, M., Wernikowski, M., et al. 2019. A Perception-driven Hybrid Decomposition for Multi-layer Accommodative Displays. IEEE Transactions on Visualization and Computer Graphics (Proc. IEEE VR 2019)25, 5.
Export
BibTeX
@article{Yu_VR2019, TITLE = {A Perception-driven Hybrid Decomposition for Multi-layer Accommodative Displays}, AUTHOR = {Yu, Hyeonseung and Bemana, Mojtaba and Wernikowski, Marek and Chwesiuk, Micha{\l} and Tursun, Okan Tarhan and Singh, Gurprit and Myszkowski, Karol and Mantiuk, Rados{\l}aw and Seidel, Hans-Peter and Didyk, Piotr}, LANGUAGE = {eng}, ISSN = {1077-2626}, DOI = {10.1109/TVCG.2019.2898821}, PUBLISHER = {IEEE Computer Society}, ADDRESS = {New York, NY}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, JOURNAL = {IEEE Transactions on Visualization and Computer Graphics (Proc. IEEE VR)}, VOLUME = {25}, NUMBER = {5}, PAGES = {1940--1950}, BOOKTITLE = {Selected Proceedings IEEE Virtual Reality 2019 (IEEE VR 2019)}, EDITOR = {Thomas, Bruce and Welch, Greg and Kuhlen, Torsten and Johnson, Kyle}, }
Endnote
%0 Journal Article %A Yu, Hyeonseung %A Bemana, Mojtaba %A Wernikowski, Marek %A Chwesiuk, Michał %A Tursun, Okan Tarhan %A Singh, Gurprit %A Myszkowski, Karol %A Mantiuk, Radosław %A Seidel, Hans-Peter %A Didyk, Piotr %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T A Perception-driven Hybrid Decomposition for Multi-layer Accommodative Displays : %G eng %U http://hdl.handle.net/21.11116/0000-0002-DCB5-A %R 10.1109/TVCG.2019.2898821 %7 2019 %D 2019 %J IEEE Transactions on Visualization and Computer Graphics %V 25 %N 5 %& 1940 %P 1940 - 1950 %I IEEE Computer Society %C New York, NY %@ false %B Selected Proceedings IEEE Virtual Reality 2019 %O IEEE VR 2019 Osaka, Japan, 23rd - 27th March
Conference Paper
Alldieck, T., Magnor, M.A., Bhatnagar, B.L., Theobalt, C., and Pons-Moll, G. Learning to Reconstruct People in Clothing from a Single RGB Camera. 32nd IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2019), IEEE.
(Accepted/in press)
Export
BibTeX
@inproceedings{alldieck19cvpr, TITLE = {Learning to Reconstruct People in Clothing from a Single {RGB} Camera}, AUTHOR = {Alldieck, Thiemo and Magnor, Marcus A. and Bhatnagar, Bharat Lal and Theobalt, Christian and Pons-Moll, Gerard}, PUBLISHER = {IEEE}, YEAR = {2019}, PUBLREMARK = {Accepted}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {32nd IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2019)}, ADDRESS = {Long Beach, CA, USA}, }
Endnote
%0 Conference Proceedings %A Alldieck, Thiemo %A Magnor, Marcus A. %A Bhatnagar, Bharat Lal %A Theobalt, Christian %A Pons-Moll, Gerard %+ Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society External Organizations Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society %T Learning to Reconstruct People in Clothing from a Single RGB Camera : %U http://hdl.handle.net/21.11116/0000-0003-5F97-9 %D 2019 %B 32nd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2019-06-16 - 2019-06-20 %C Long Beach, CA, USA %B 32nd IEEE Conference on Computer Vision and Pattern Recognition %I IEEE
Castelli Aleardi, L., Salihoglu, S., Singh, G., and Ovsjanikov, M. 2019. Spectral Measures of Distortion for Change Detection in Dynamic Graphs. Complex Networks and Their Applications VII, Springer.
Export
BibTeX
@inproceedings{Castelli_COMPLEX2018, TITLE = {Spectral Measures of Distortion for Change Detection in Dynamic Graphs}, AUTHOR = {Castelli Aleardi, Luca and Salihoglu, Semih and Singh, Gurprit and Ovsjanikov, Maks}, LANGUAGE = {eng}, ISBN = {978-3-030-05413-7; 978-3-030-05414-4}, DOI = {10.1007/978-3-030-05414-4_5}, PUBLISHER = {Springer}, YEAR = {2018}, MARGINALMARK = {$\bullet$}, DATE = {2019}, BOOKTITLE = {Complex Networks and Their Applications VII}, EDITOR = {Aiello, Luca Maria and Cherifi, Chantal and Cherifi, Hocine and Lambiotte, Renaud and Li{\'o}, Pietro and Rocha, Luis M.}, PAGES = {54--66}, SERIES = {Studies in Computational Intelligence}, VOLUME = {813}, ADDRESS = {Cambridge, UK}, }
Endnote
%0 Conference Proceedings %A Castelli Aleardi, Luca %A Salihoglu, Semih %A Singh, Gurprit %A Ovsjanikov, Maks %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Spectral Measures of Distortion for Change Detection in Dynamic Graphs : %G eng %U http://hdl.handle.net/21.11116/0000-0003-F1F9-4 %R 10.1007/978-3-030-05414-4_5 %D 2019 %B 7th International Conference on Complex Networks and Their Applications %Z date of event: 2018-12-11 - 2018-12-13 %C Cambridge, UK %B Complex Networks and Their Applications VII %E Aiello, Luca Maria; Cherifi, Chantal; Cherifi, Hocine; Lambiotte, Renaud; Lió, Pietro; Rocha, Luis M. %P 54 - 66 %I Springer %@ 978-3-030-05413-7 978-3-030-05414-4 %B Studies in Computational Intelligence %N 813
Habermann, M., Xu, W., Rohdin, H., Zollhöfer, M., Pons-Moll, G., and Theobalt, C. 2019b. NRST: Non-rigid Surface Tracking from Monocular Video. Pattern Recognition (GCPR 2018), Springer.
Export
BibTeX
@inproceedings{Habermann_GVPR18, TITLE = {{NRST}: {N}on-rigid Surface Tracking from Monocular Video}, AUTHOR = {Habermann, Marc and Xu, Weipeng and Rohdin, Helge and Zollh{\"o}fer, Michael and Pons-Moll, Gerard and Theobalt, Christian}, LANGUAGE = {eng}, ISBN = {978-3-030-12938-5}, DOI = {10.1007/978-3-030-12939-2_23}, PUBLISHER = {Springer}, YEAR = {2018}, MARGINALMARK = {$\bullet$}, DATE = {2019}, BOOKTITLE = {Pattern Recognition (GCPR 2018)}, EDITOR = {Brox, Thomas and Bruhn, Andr{\'e}s and Fritz, Mario}, PAGES = {335--348}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {11269}, ADDRESS = {Stuttgart, Germany}, }
Endnote
%0 Conference Proceedings %A Habermann, Marc %A Xu, Weipeng %A Rohdin, Helge %A Zollhöfer, Michael %A Pons-Moll, Gerard %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T NRST: Non-rigid Surface Tracking from Monocular Video : %G eng %U http://hdl.handle.net/21.11116/0000-0002-B94C-9 %R 10.1007/978-3-030-12939-2_23 %D 2019 %B 40th German Conference on Pattern Recognition %Z date of event: 2018-10-09 - 2018-10-12 %C Stuttgart, Germany %B Pattern Recognition %E Brox, Thomas; Bruhn, Andrés; Fritz, Mario %P 335 - 348 %I Springer %@ 978-3-030-12938-5 %B Lecture Notes in Computer Science %N 11269
Habibie, I., Xu, W., Mehta, D., Pons-Moll, G., and Theobalt, C. In the Wild Human Pose Estimation using Explicit 2D Features and Intermediate 3D Representations. 32nd IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2019), IEEE.
(Accepted/in press)
Export
BibTeX
@inproceedings{habibieCVPR19, TITLE = {In the Wild Human Pose Estimation using Explicit {2D} Features and Intermediate 3D Representations}, AUTHOR = {Habibie, Ikhsanul and Xu, Weipeng and Mehta, Dushyant and Pons-Moll, Gerard and Theobalt, Christian}, PUBLISHER = {IEEE}, YEAR = {2019}, PUBLREMARK = {Accepted}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {32nd IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2019)}, ADDRESS = {Long Beach, CA, USA}, }
Endnote
%0 Conference Proceedings %A Habibie, Ikhsanul %A Xu, Weipeng %A Mehta, Dushyant %A Pons-Moll, Gerard %A Theobalt, Christian %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T In the Wild Human Pose Estimation using Explicit 2D Features and Intermediate 3D Representations : %U http://hdl.handle.net/21.11116/0000-0003-6520-7 %D 2019 %B 32nd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2019-06-16 - 2019-06-20 %C Long Beach, CA, USA %B 32nd IEEE Conference on Computer Vision and Pattern Recognition %I IEEE
Mehta, D., Sotnychenko, O., Mueller, F., et al. 2019a. XNect Demo (v2): Real-time Multi-person 3D Human Pose Estimation with a Single RGB Camera. CVPR 2019 Demonstrations.
Export
BibTeX
@inproceedings{XNectDemoV2_CVPR2019, TITLE = {Demo of {VNect} (v2): {R}eal-time {3D} Human Pose Estimation with a Single {RGB} Camera}, AUTHOR = {Mehta, Dushyant and Sotnychenko, Oleksandr and Mueller, Franziska and Xu, Weipeng and Seidel, Hans-Peter and Fua, Pascal and Elgharib, Mohamed and Rhodin, Helge and Pons-Moll, Gerard and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://gvv.mpi-inf.mpg.de/projects/XNectDemoV2/}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {CVPR 2019 Demonstrations}, ADDRESS = {Long Beach, CA, USA}, }
Endnote
%0 Conference Proceedings %A Mehta, Dushyant %A Sotnychenko, Oleksandr %A Mueller, Franziska %A Xu, Weipeng %A Seidel, Hans-Peter %A Fua, Pascal %A Elgharib, Mohamed %A Rhodin, Helge %A Pons-Moll, Gerard %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T XNect Demo (v2): Real-time Multi-person 3D Human Pose Estimation with a Single RGB Camera : %G eng %U http://hdl.handle.net/21.11116/0000-0004-71DB-6 %U http://gvv.mpi-inf.mpg.de/projects/XNectDemoV2/ %D 2019 %B 32nd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2019-06-16 - 2019-06-20 %C Long Beach, CA, USA %B CVPR 2019 Demonstrations %U http://gvv.mpi-inf.mpg.de/projects/XNectDemoV2/
Winter, M., Mlakar, D., Zayer, R., Seidel, H.-P., and Steinberger, M. 2019. Adaptive Sparse Matrix-Matrix Multiplication on the GPU. PPoPP’19, 24th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, ACM.
Export
BibTeX
@inproceedings{PPOPP:2019:ASPMM, TITLE = {Adaptive Sparse Matrix-Matrix Multiplication on the {GPU}}, AUTHOR = {Winter, Martin and Mlakar, Daniel and Zayer, Rhaleb and Seidel, Hans-Peter and Steinberger, Markus}, LANGUAGE = {eng}, ISBN = {978-1-4503-6225-2}, DOI = {10.1145/3293883.3295701}, PUBLISHER = {ACM}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2019}, BOOKTITLE = {PPoPP'19, 24th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming}, PAGES = {68--81}, ADDRESS = {Washington, DC, USA}, }
Endnote
%0 Conference Proceedings %A Winter, Martin %A Mlakar, Daniel %A Zayer, Rhaleb %A Seidel, Hans-Peter %A Steinberger, Markus %+ External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Adaptive Sparse Matrix-Matrix Multiplication on the GPU : %G eng %U http://hdl.handle.net/21.11116/0000-0002-EFE9-B %R 10.1145/3293883.3295701 %D 2019 %B 24th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming %Z date of event: 2019-02-16 - 2019-02-20 %C Washington, DC, USA %B PPoPP'19 %P 68 - 81 %I ACM %@ 978-1-4503-6225-2
Ye, N., Wolski, K., and Mantiuk, R.K. Predicting Visible Image Differences under Varying Display Brightness and Viewing Distance. 32nd IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2019), IEEE.
(Accepted/in press)
Export
BibTeX
@inproceedings{Ye19, TITLE = {Predicting Visible Image Differences under Varying Display Brightness and Viewing Distance}, AUTHOR = {Ye, Nanyang and Wolski, Krzysztof and Mantiuk, Rafa{\l} K.}, LANGUAGE = {eng}, PUBLISHER = {IEEE}, YEAR = {2019}, PUBLREMARK = {Accepted}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {32nd IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2019)}, ADDRESS = {Long Beach, CA, USA}, }
Endnote
%0 Conference Proceedings %A Ye, Nanyang %A Wolski, Krzysztof %A Mantiuk, Rafał K. %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Predicting Visible Image Differences under Varying Display Brightness and Viewing Distance : %G eng %U http://hdl.handle.net/21.11116/0000-0003-2748-1 %D 2019 %B 32nd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2019-06-16 - 2019-06-20 %C Long Beach, CA, USA %B 32nd IEEE Conference on Computer Vision and Pattern Recognition %I IEEE
Yu, T., Zheng, Z., Zhong, Y., et al. SimulCap : Single-View Human Performance Capture with Cloth Simulation. 32nd IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2019), IEEE.
(Accepted/in press)
Export
BibTeX
@inproceedings{SimulCap19, TITLE = {{SimulCap} : {S}ingle-View Human Performance Capture with Cloth Simulation}, AUTHOR = {Yu, Tao and Zheng, Zerong and Zhong, Yuan and Zhao, Jianhui and Quionhai, Dai and Pons-Moll, Gerard and Liu, Yebin}, PUBLISHER = {IEEE}, YEAR = {2019}, PUBLREMARK = {Accepted}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {32nd IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2019)}, ADDRESS = {Long Beach, CA, USA}, }
Endnote
%0 Conference Proceedings %A Yu, Tao %A Zheng, Zerong %A Zhong, Yuan %A Zhao, Jianhui %A Quionhai, Dai %A Pons-Moll, Gerard %A Liu, Yebin %+ External Organizations External Organizations External Organizations External Organizations External Organizations Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T SimulCap : Single-View Human Performance Capture with Cloth Simulation : %U http://hdl.handle.net/21.11116/0000-0003-651E-B %D 2019 %B 32nd IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2019-06-16 - 2019-06-20 %C Long Beach, CA, USA %B 32nd IEEE Conference on Computer Vision and Pattern Recognition %I IEEE
Paper
Alldieck, T., Pons-Moll, G., Theobalt, C., and Magnor, M.A. 2019a. Tex2Shape: Detailed Full Human Body Geometry from a Single Image. http://arxiv.org/abs/1904.08645.
(arXiv: 1904.08645)
Abstract
We present a simple yet effective method to infer detailed full human body shape from only a single photograph. Our model can infer full-body shape including face, hair, and clothing including wrinkles at interactive frame-rates. Results feature details even on parts that are occluded in the input image. Our main idea is to turn shape regression into an aligned image-to-image translation problem. The input to our method is a partial texture map of the visible region obtained from off-the-shelf methods. From a partial texture, we estimate detailed normal and vector displacement maps, which can be applied to a low-resolution smooth body model to add detail and clothing. Despite being trained purely with synthetic data, our model generalizes well to real-world photographs. Numerous results demonstrate the versatility and robustness of our method.
Export
BibTeX
@online{Alldieck_arXiv1904.08645, TITLE = {{Tex2Shape}: Detailed Full Human Body Geometry from a Single Image}, AUTHOR = {Alldieck, Thiemo and Pons-Moll, Gerard and Theobalt, Christian and Magnor, Marcus A.}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1904.08645}, EPRINT = {1904.08645}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present a simple yet effective method to infer detailed full human body shape from only a single photograph. Our model can infer full-body shape including face, hair, and clothing including wrinkles at interactive frame-rates. Results feature details even on parts that are occluded in the input image. Our main idea is to turn shape regression into an aligned image-to-image translation problem. The input to our method is a partial texture map of the visible region obtained from off-the-shelf methods. From a partial texture, we estimate detailed normal and vector displacement maps, which can be applied to a low-resolution smooth body model to add detail and clothing. Despite being trained purely with synthetic data, our model generalizes well to real-world photographs. Numerous results demonstrate the versatility and robustness of our method.}, }
Endnote
%0 Report %A Alldieck, Thiemo %A Pons-Moll, Gerard %A Theobalt, Christian %A Magnor, Marcus A. %+ External Organizations Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Tex2Shape: Detailed Full Human Body Geometry from a Single Image : %G eng %U http://hdl.handle.net/21.11116/0000-0003-ECBE-E %U http://arxiv.org/abs/1904.08645 %D 2019 %X We present a simple yet effective method to infer detailed full human body shape from only a single photograph. Our model can infer full-body shape including face, hair, and clothing including wrinkles at interactive frame-rates. Results feature details even on parts that are occluded in the input image. Our main idea is to turn shape regression into an aligned image-to-image translation problem. The input to our method is a partial texture map of the visible region obtained from off-the-shelf methods. From a partial texture, we estimate detailed normal and vector displacement maps, which can be applied to a low-resolution smooth body model to add detail and clothing. Despite being trained purely with synthetic data, our model generalizes well to real-world photographs. Numerous results demonstrate the versatility and robustness of our method. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Alldieck, T., Magnor, M.A., Bhatnagar, B.L., Theobalt, C., and Pons-Moll, G. 2019b. Learning to Reconstruct People in Clothing from a Single RGB Camera. http://arxiv.org/abs/1903.05885.
(arXiv: 1903.05885)
Abstract
We present a learning-based model to infer the personalized 3D shape of people from a few frames (1-8) of a monocular video in which the person is moving, in less than 10 seconds with a reconstruction accuracy of 5mm. Our model learns to predict the parameters of a statistical body model and instance displacements that add clothing and hair to the shape. The model achieves fast and accurate predictions based on two key design choices. First, by predicting shape in a canonical T-pose space, the network learns to encode the images of the person into pose-invariant latent codes, where the information is fused. Second, based on the observation that feed-forward predictions are fast but do not always align with the input images, we predict using both, bottom-up and top-down streams (one per view) allowing information to flow in both directions. Learning relies only on synthetic 3D data. Once learned, the model can take a variable number of frames as input, and is able to reconstruct shapes even from a single image with an accuracy of 6mm. Results on 3 different datasets demonstrate the efficacy and accuracy of our approach.
Export
BibTeX
@online{Alldieck_arXiv1903.05885, TITLE = {Learning to Reconstruct People in Clothing from a Single {RGB} Camera}, AUTHOR = {Alldieck, Thiemo and Magnor, Marcus A. and Bhatnagar, Bharat Lal and Theobalt, Christian and Pons-Moll, Gerard}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1903.05885}, EPRINT = {1903.05885}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present a learning-based model to infer the personalized 3D shape of people from a few frames (1-8) of a monocular video in which the person is moving, in less than 10 seconds with a reconstruction accuracy of 5mm. Our model learns to predict the parameters of a statistical body model and instance displacements that add clothing and hair to the shape. The model achieves fast and accurate predictions based on two key design choices. First, by predicting shape in a canonical T-pose space, the network learns to encode the images of the person into pose-invariant latent codes, where the information is fused. Second, based on the observation that feed-forward predictions are fast but do not always align with the input images, we predict using both, bottom-up and top-down streams (one per view) allowing information to flow in both directions. Learning relies only on synthetic 3D data. Once learned, the model can take a variable number of frames as input, and is able to reconstruct shapes even from a single image with an accuracy of 6mm. Results on 3 different datasets demonstrate the efficacy and accuracy of our approach.}, }
Endnote
%0 Report %A Alldieck, Thiemo %A Magnor, Marcus A. %A Bhatnagar, Bharat Lal %A Theobalt, Christian %A Pons-Moll, Gerard %+ Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society External Organizations Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society %T Learning to Reconstruct People in Clothing from a Single RGB Camera : %G eng %U http://hdl.handle.net/21.11116/0000-0003-FE01-E %U http://arxiv.org/abs/1903.05885 %D 2019 %X We present a learning-based model to infer the personalized 3D shape of people from a few frames (1-8) of a monocular video in which the person is moving, in less than 10 seconds with a reconstruction accuracy of 5mm. Our model learns to predict the parameters of a statistical body model and instance displacements that add clothing and hair to the shape. The model achieves fast and accurate predictions based on two key design choices. First, by predicting shape in a canonical T-pose space, the network learns to encode the images of the person into pose-invariant latent codes, where the information is fused. Second, based on the observation that feed-forward predictions are fast but do not always align with the input images, we predict using both, bottom-up and top-down streams (one per view) allowing information to flow in both directions. Learning relies only on synthetic 3D data. Once learned, the model can take a variable number of frames as input, and is able to reconstruct shapes even from a single image with an accuracy of 6mm. Results on 3 different datasets demonstrate the efficacy and accuracy of our approach. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Elgharib, M., BR, M., Tewari, A., et al. 2019. EgoFace: Egocentric Face Performance Capture and Videorealistic Reenactment. http://arxiv.org/abs/1905.10822.
(arXiv: 1905.10822)
Abstract
Face performance capture and reenactment techniques use multiple cameras and sensors, positioned at a distance from the face or mounted on heavy wearable devices. This limits their applications in mobile and outdoor environments. We present EgoFace, a radically new lightweight setup for face performance capture and front-view videorealistic reenactment using a single egocentric RGB camera. Our lightweight setup allows operations in uncontrolled environments, and lends itself to telepresence applications such as video-conferencing from dynamic environments. The input image is projected into a low dimensional latent space of the facial expression parameters. Through careful adversarial training of the parameter-space synthetic rendering, a videorealistic animation is produced. Our problem is challenging as the human visual system is sensitive to the smallest face irregularities that could occur in the final results. This sensitivity is even stronger for video results. Our solution is trained in a pre-processing stage, through a supervised manner without manual annotations. EgoFace captures a wide variety of facial expressions, including mouth movements and asymmetrical expressions. It works under varying illuminations, background, movements, handles people from different ethnicities and can operate in real time.
Export
BibTeX
@online{Elgharib_arXiv1905.10822, TITLE = {{EgoFace}: Egocentric Face Performance Capture and Videorealistic Reenactment}, AUTHOR = {Elgharib, Mohamed and BR, Mallikarjun and Tewari, Ayush and Kim, Hyeongwoo and Liu, Wentao and Seidel, Hans-Peter and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1905.10822}, EPRINT = {1905.10822}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Face performance capture and reenactment techniques use multiple cameras and sensors, positioned at a distance from the face or mounted on heavy wearable devices. This limits their applications in mobile and outdoor environments. We present EgoFace, a radically new lightweight setup for face performance capture and front-view videorealistic reenactment using a single egocentric RGB camera. Our lightweight setup allows operations in uncontrolled environments, and lends itself to telepresence applications such as video-conferencing from dynamic environments. The input image is projected into a low dimensional latent space of the facial expression parameters. Through careful adversarial training of the parameter-space synthetic rendering, a videorealistic animation is produced. Our problem is challenging as the human visual system is sensitive to the smallest face irregularities that could occur in the final results. This sensitivity is even stronger for video results. Our solution is trained in a pre-processing stage, through a supervised manner without manual annotations. EgoFace captures a wide variety of facial expressions, including mouth movements and asymmetrical expressions. It works under varying illuminations, background, movements, handles people from different ethnicities and can operate in real time.}, }
Endnote
%0 Report %A Elgharib, Mohamed %A BR, Mallikarjun %A Tewari, Ayush %A Kim, Hyeongwoo %A Liu, Wentao %A Seidel, Hans-Peter %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T EgoFace: Egocentric Face Performance Capture and Videorealistic Reenactment : %G eng %U http://hdl.handle.net/21.11116/0000-0003-F1E6-9 %U http://arxiv.org/abs/1905.10822 %D 2019 %X Face performance capture and reenactment techniques use multiple cameras and sensors, positioned at a distance from the face or mounted on heavy wearable devices. This limits their applications in mobile and outdoor environments. We present EgoFace, a radically new lightweight setup for face performance capture and front-view videorealistic reenactment using a single egocentric RGB camera. Our lightweight setup allows operations in uncontrolled environments, and lends itself to telepresence applications such as video-conferencing from dynamic environments. The input image is projected into a low dimensional latent space of the facial expression parameters. Through careful adversarial training of the parameter-space synthetic rendering, a videorealistic animation is produced. Our problem is challenging as the human visual system is sensitive to the smallest face irregularities that could occur in the final results. This sensitivity is even stronger for video results. Our solution is trained in a pre-processing stage, through a supervised manner without manual annotations. EgoFace captures a wide variety of facial expressions, including mouth movements and asymmetrical expressions. It works under varying illuminations, background, movements, handles people from different ethnicities and can operate in real time. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR %U http://gvv.mpi-inf.mpg.de/projects/EgoFace/
Fried, O., Tewari, A., Zollhöfer, M., et al. 2019. Text-based Editing of Talking-head Video. http://arxiv.org/abs/1906.01524.
(arXiv: 1906.01524)
Abstract
Editing talking-head video to change the speech content or to remove filler words is challenging. We propose a novel method to edit talking-head video based on its transcript to produce a realistic output video in which the dialogue of the speaker has been modified, while maintaining a seamless audio-visual flow (i.e. no jump cuts). Our method automatically annotates an input talking-head video with phonemes, visemes, 3D face pose and geometry, reflectance, expression and scene illumination per frame. To edit a video, the user has to only edit the transcript, and an optimization strategy then chooses segments of the input corpus as base material. The annotated parameters corresponding to the selected segments are seamlessly stitched together and used to produce an intermediate video representation in which the lower half of the face is rendered with a parametric face model. Finally, a recurrent video generation network transforms this representation to a photorealistic video that matches the edited transcript. We demonstrate a large variety of edits, such as the addition, removal, and alteration of words, as well as convincing language translation and full sentence synthesis.
Export
BibTeX
@online{Fried_arXiv1906.01524, TITLE = {Text-based Editing of Talking-head Video}, AUTHOR = {Fried, Ohad and Tewari, Ayush and Zollh{\"o}fer, Michael and Finkelstein, Adam and Shechtman, Eli and Goldman, Dan B. and Genova, Kyle and Jin, Zeyu and Theobalt, Christian and Agrawala, Maneesh}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1906.01524}, EPRINT = {1906.01524}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Editing talking-head video to change the speech content or to remove filler words is challenging. We propose a novel method to edit talking-head video based on its transcript to produce a realistic output video in which the dialogue of the speaker has been modified, while maintaining a seamless audio-visual flow (i.e. no jump cuts). Our method automatically annotates an input talking-head video with phonemes, visemes, 3D face pose and geometry, reflectance, expression and scene illumination per frame. To edit a video, the user has to only edit the transcript, and an optimization strategy then chooses segments of the input corpus as base material. The annotated parameters corresponding to the selected segments are seamlessly stitched together and used to produce an intermediate video representation in which the lower half of the face is rendered with a parametric face model. Finally, a recurrent video generation network transforms this representation to a photorealistic video that matches the edited transcript. We demonstrate a large variety of edits, such as the addition, removal, and alteration of words, as well as convincing language translation and full sentence synthesis.}, }
Endnote
%0 Report %A Fried, Ohad %A Tewari, Ayush %A Zollhöfer, Michael %A Finkelstein, Adam %A Shechtman, Eli %A Goldman, Dan B. %A Genova, Kyle %A Jin, Zeyu %A Theobalt, Christian %A Agrawala, Maneesh %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Text-based Editing of Talking-head Video : %G eng %U http://hdl.handle.net/21.11116/0000-0003-FE15-8 %U http://arxiv.org/abs/1906.01524 %D 2019 %X Editing talking-head video to change the speech content or to remove filler words is challenging. We propose a novel method to edit talking-head video based on its transcript to produce a realistic output video in which the dialogue of the speaker has been modified, while maintaining a seamless audio-visual flow (i.e. no jump cuts). Our method automatically annotates an input talking-head video with phonemes, visemes, 3D face pose and geometry, reflectance, expression and scene illumination per frame. To edit a video, the user has to only edit the transcript, and an optimization strategy then chooses segments of the input corpus as base material. The annotated parameters corresponding to the selected segments are seamlessly stitched together and used to produce an intermediate video representation in which the lower half of the face is rendered with a parametric face model. Finally, a recurrent video generation network transforms this representation to a photorealistic video that matches the edited transcript. We demonstrate a large variety of edits, such as the addition, removal, and alteration of words, as well as convincing language translation and full sentence synthesis. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR,Computer Science, Learning, cs.LG
Habibie, I., Xu, W., Mehta, D., Pons-Moll, G., and Theobalt, C. 2019. In the Wild Human Pose Estimation Using Explicit 2D Features and Intermediate 3D Representations. http://arxiv.org/abs/1904.03289.
(arXiv: 1904.03289)
Abstract
Convolutional Neural Network based approaches for monocular 3D human pose estimation usually require a large amount of training images with 3D pose annotations. While it is feasible to provide 2D joint annotations for large corpora of in-the-wild images with humans, providing accurate 3D annotations to such in-the-wild corpora is hardly feasible in practice. Most existing 3D labelled data sets are either synthetically created or feature in-studio images. 3D pose estimation algorithms trained on such data often have limited ability to generalize to real world scene diversity. We therefore propose a new deep learning based method for monocular 3D human pose estimation that shows high accuracy and generalizes better to in-the-wild scenes. It has a network architecture that comprises a new disentangled hidden space encoding of explicit 2D and 3D features, and uses supervision by a new learned projection model from predicted 3D pose. Our algorithm can be jointly trained on image data with 3D labels and image data with only 2D labels. It achieves state-of-the-art accuracy on challenging in-the-wild data.
Export
BibTeX
@online{, TITLE = {In the Wild Human Pose Estimation Using Explicit {2D} Features and Intermediate {3D} Representations}, AUTHOR = {Habibie, Ikhsanul and Xu, Weipeng and Mehta, Dushyant and Pons-Moll, Gerard and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1904.03289}, EPRINT = {1904.03289}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Convolutional Neural Network based approaches for monocular 3D human pose estimation usually require a large amount of training images with 3D pose annotations. While it is feasible to provide 2D joint annotations for large corpora of in-the-wild images with humans, providing accurate 3D annotations to such in-the-wild corpora is hardly feasible in practice. Most existing 3D labelled data sets are either synthetically created or feature in-studio images. 3D pose estimation algorithms trained on such data often have limited ability to generalize to real world scene diversity. We therefore propose a new deep learning based method for monocular 3D human pose estimation that shows high accuracy and generalizes better to in-the-wild scenes. It has a network architecture that comprises a new disentangled hidden space encoding of explicit 2D and 3D features, and uses supervision by a new learned projection model from predicted 3D pose. Our algorithm can be jointly trained on image data with 3D labels and image data with only 2D labels. It achieves state-of-the-art accuracy on challenging in-the-wild data.}, }
Endnote
%0 Report %A Habibie, Ikhsanul %A Xu, Weipeng %A Mehta, Dushyant %A Pons-Moll, Gerard %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T In the Wild Human Pose Estimation Using Explicit 2D Features and Intermediate 3D Representations : %G eng %U http://hdl.handle.net/21.11116/0000-0003-F76E-C %U http://arxiv.org/abs/1904.03289 %D 2019 %X Convolutional Neural Network based approaches for monocular 3D human pose estimation usually require a large amount of training images with 3D pose annotations. While it is feasible to provide 2D joint annotations for large corpora of in-the-wild images with humans, providing accurate 3D annotations to such in-the-wild corpora is hardly feasible in practice. Most existing 3D labelled data sets are either synthetically created or feature in-studio images. 3D pose estimation algorithms trained on such data often have limited ability to generalize to real world scene diversity. We therefore propose a new deep learning based method for monocular 3D human pose estimation that shows high accuracy and generalizes better to in-the-wild scenes. It has a network architecture that comprises a new disentangled hidden space encoding of explicit 2D and 3D features, and uses supervision by a new learned projection model from predicted 3D pose. Our algorithm can be jointly trained on image data with 3D labels and image data with only 2D labels. It achieves state-of-the-art accuracy on challenging in-the-wild data. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Jiang, C., Tang, C., Seidel, H.-P., Chen, R., and Wonka, P. 2019. Computational Design of Lightweight Trusses. http://arxiv.org/abs/1901.05637.
(arXiv: 1901.05637)
Abstract
Trusses are load-carrying light-weight structures consisting of bars connected at joints ubiquitously applied in a variety of engineering scenarios. Designing optimal trusses that satisfy functional specifications with a minimal amount of material has interested both theoreticians and practitioners for more than a century. In this paper, we introduce two main ideas to improve upon the state of the art. First, we formulate an alternating linear programming problem for geometry optimization. Second, we introduce two sets of complementary topological operations, including a novel subdivision scheme for global topology refinement inspired by Michell's famed theoretical study. Based on these two ideas, we build an efficient computational framework for the design of lightweight trusses. \AD{We illustrate our framework with a variety of functional specifications and extensions. We show that our method achieves trusses with smaller volumes and is over two orders of magnitude faster compared with recent state-of-the-art approaches.
Export
BibTeX
@online{Jiang_arXIv1901.05637, TITLE = {Computational Design of Lightweight Trusses}, AUTHOR = {Jiang, Caigui and Tang, Chengcheng and Seidel, Hans-Peter and Chen, Renjie and Wonka, Peter}, URL = {http://arxiv.org/abs/1901.05637}, EPRINT = {1901.05637}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Trusses are load-carrying light-weight structures consisting of bars connected at joints ubiquitously applied in a variety of engineering scenarios. Designing optimal trusses that satisfy functional specifications with a minimal amount of material has interested both theoreticians and practitioners for more than a century. In this paper, we introduce two main ideas to improve upon the state of the art. First, we formulate an alternating linear programming problem for geometry optimization. Second, we introduce two sets of complementary topological operations, including a novel subdivision scheme for global topology refinement inspired by Michell's famed theoretical study. Based on these two ideas, we build an efficient computational framework for the design of lightweight trusses. \AD{We illustrate our framework with a variety of functional specifications and extensions. We show that our method achieves trusses with smaller volumes and is over two orders of magnitude faster compared with recent state-of-the-art approaches.}, }
Endnote
%0 Report %A Jiang, Caigui %A Tang, Chengcheng %A Seidel, Hans-Peter %A Chen, Renjie %A Wonka, Peter %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T Computational Design of Lightweight Trusses : %U http://hdl.handle.net/21.11116/0000-0003-A7E9-A %U http://arxiv.org/abs/1901.05637 %D 2019 %X Trusses are load-carrying light-weight structures consisting of bars connected at joints ubiquitously applied in a variety of engineering scenarios. Designing optimal trusses that satisfy functional specifications with a minimal amount of material has interested both theoreticians and practitioners for more than a century. In this paper, we introduce two main ideas to improve upon the state of the art. First, we formulate an alternating linear programming problem for geometry optimization. Second, we introduce two sets of complementary topological operations, including a novel subdivision scheme for global topology refinement inspired by Michell's famed theoretical study. Based on these two ideas, we build an efficient computational framework for the design of lightweight trusses. \AD{We illustrate our framework with a variety of functional specifications and extensions. We show that our method achieves trusses with smaller volumes and is over two orders of magnitude faster compared with recent state-of-the-art approaches. %K Computer Science, Graphics, cs.GR
Mehta, D., Kim, K.I., and Theobalt, C. 2019b. Implicit Filter Sparsification In Convolutional Neural Networks. http://arxiv.org/abs/1905.04967.
(arXiv: 1905.04967)
Abstract
We show implicit filter level sparsity manifests in convolutional neural networks (CNNs) which employ Batch Normalization and ReLU activation, and are trained with adaptive gradient descent techniques and L2 regularization or weight decay. Through an extensive empirical study (Mehta et al., 2019) we hypothesize the mechanism behind the sparsification process, and find surprising links to certain filter sparsification heuristics proposed in literature. Emergence of, and the subsequent pruning of selective features is observed to be one of the contributing mechanisms, leading to feature sparsity at par or better than certain explicit sparsification / pruning approaches. In this workshop article we summarize our findings, and point out corollaries of selective-featurepenalization which could also be employed as heuristics for filter pruning
Export
BibTeX
@online{Mehta_arXiv1905.04967, TITLE = {Implicit Filter Sparsification In Convolutional Neural Networks}, AUTHOR = {Mehta, Dushyant and Kim, Kwang In and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1905.04967}, EPRINT = {1905.04967}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We show implicit filter level sparsity manifests in convolutional neural networks (CNNs) which employ Batch Normalization and ReLU activation, and are trained with adaptive gradient descent techniques and L2 regularization or weight decay. Through an extensive empirical study (Mehta et al., 2019) we hypothesize the mechanism behind the sparsification process, and find surprising links to certain filter sparsification heuristics proposed in literature. Emergence of, and the subsequent pruning of selective features is observed to be one of the contributing mechanisms, leading to feature sparsity at par or better than certain explicit sparsification / pruning approaches. In this workshop article we summarize our findings, and point out corollaries of selective-featurepenalization which could also be employed as heuristics for filter pruning}, }
Endnote
%0 Report %A Mehta, Dushyant %A Kim, Kwang In %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society %T Implicit Filter Sparsification In Convolutional Neural Networks : %G eng %U http://hdl.handle.net/21.11116/0000-0003-FE07-8 %U http://arxiv.org/abs/1905.04967 %D 2019 %X We show implicit filter level sparsity manifests in convolutional neural networks (CNNs) which employ Batch Normalization and ReLU activation, and are trained with adaptive gradient descent techniques and L2 regularization or weight decay. Through an extensive empirical study (Mehta et al., 2019) we hypothesize the mechanism behind the sparsification process, and find surprising links to certain filter sparsification heuristics proposed in literature. Emergence of, and the subsequent pruning of selective features is observed to be one of the contributing mechanisms, leading to feature sparsity at par or better than certain explicit sparsification / pruning approaches. In this workshop article we summarize our findings, and point out corollaries of selective-featurepenalization which could also be employed as heuristics for filter pruning %K Computer Science, Learning, cs.LG,Computer Science, Computer Vision and Pattern Recognition, cs.CV,Statistics, Machine Learning, stat.ML
Mehta, D., Sotnychenko, O., Mueller, F., et al. 2019c. XNect: Real-time Multi-person 3D Human Pose Estimation with a Single RGB Camera. http://arxiv.org/abs/1907.00837.
(arXiv: 1907.00837)
Abstract
We present a real-time approach for multi-person 3D motion capture at over 30 fps using a single RGB camera. It operates in generic scenes and is robust to difficult occlusions both by other people and objects. Our method operates in subsequent stages. The first stage is a convolutional neural network (CNN) that estimates 2D and 3D pose features along with identity assignments for all visible joints of all individuals. We contribute a new architecture for this CNN, called SelecSLS Net, that uses novel selective long and short range skip connections to improve the information flow allowing for a drastically faster network without compromising accuracy. In the second stage, a fully-connected neural network turns the possibly partial (on account of occlusion) 2D pose and 3D pose features for each subject into a complete 3D pose estimate per individual. The third stage applies space-time skeletal model fitting to the predicted 2D and 3D pose per subject to further reconcile the 2D and 3D pose, and enforce temporal coherence. Our method returns the full skeletal pose in joint angles for each subject. This is a further key distinction from previous work that neither extracted global body positions nor joint angle results of a coherent skeleton in real time for multi-person scenes. The proposed system runs on consumer hardware at a previously unseen speed of more than 30 fps given 512x320 images as input while achieving state-of-the-art accuracy, which we will demonstrate on a range of challenging real-world scenes.
Export
BibTeX
@online{Mehta_arXiv1907.00837, TITLE = {{XNect}: Real-time Multi-person {3D} Human Pose Estimation with a Single {RGB} Camera}, AUTHOR = {Mehta, Dushyant and Sotnychenko, Oleksandr and Mueller, Franziska and Xu, Weipeng and Elgharib, Mohamed and Fua, Pascal and Seidel, Hans-Peter and Rhodin, Helge and Pons-Moll, Gerard and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1907.00837}, EPRINT = {1907.00837}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present a real-time approach for multi-person 3D motion capture at over 30 fps using a single RGB camera. It operates in generic scenes and is robust to difficult occlusions both by other people and objects. Our method operates in subsequent stages. The first stage is a convolutional neural network (CNN) that estimates 2D and 3D pose features along with identity assignments for all visible joints of all individuals. We contribute a new architecture for this CNN, called SelecSLS Net, that uses novel selective long and short range skip connections to improve the information flow allowing for a drastically faster network without compromising accuracy. In the second stage, a fully-connected neural network turns the possibly partial (on account of occlusion) 2D pose and 3D pose features for each subject into a complete 3D pose estimate per individual. The third stage applies space-time skeletal model fitting to the predicted 2D and 3D pose per subject to further reconcile the 2D and 3D pose, and enforce temporal coherence. Our method returns the full skeletal pose in joint angles for each subject. This is a further key distinction from previous work that neither extracted global body positions nor joint angle results of a coherent skeleton in real time for multi-person scenes. The proposed system runs on consumer hardware at a previously unseen speed of more than 30 fps given 512x320 images as input while achieving state-of-the-art accuracy, which we will demonstrate on a range of challenging real-world scenes.}, }
Endnote
%0 Report %A Mehta, Dushyant %A Sotnychenko, Oleksandr %A Mueller, Franziska %A Xu, Weipeng %A Elgharib, Mohamed %A Fua, Pascal %A Seidel, Hans-Peter %A Rhodin, Helge %A Pons-Moll, Gerard %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Vision and Machine Learning, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T XNect: Real-time Multi-person 3D Human Pose Estimation with a Single RGB Camera : %G eng %U http://hdl.handle.net/21.11116/0000-0003-FE21-A %U http://arxiv.org/abs/1907.00837 %D 2019 %X We present a real-time approach for multi-person 3D motion capture at over 30 fps using a single RGB camera. It operates in generic scenes and is robust to difficult occlusions both by other people and objects. Our method operates in subsequent stages. The first stage is a convolutional neural network (CNN) that estimates 2D and 3D pose features along with identity assignments for all visible joints of all individuals. We contribute a new architecture for this CNN, called SelecSLS Net, that uses novel selective long and short range skip connections to improve the information flow allowing for a drastically faster network without compromising accuracy. In the second stage, a fully-connected neural network turns the possibly partial (on account of occlusion) 2D pose and 3D pose features for each subject into a complete 3D pose estimate per individual. The third stage applies space-time skeletal model fitting to the predicted 2D and 3D pose per subject to further reconcile the 2D and 3D pose, and enforce temporal coherence. Our method returns the full skeletal pose in joint angles for each subject. This is a further key distinction from previous work that neither extracted global body positions nor joint angle results of a coherent skeleton in real time for multi-person scenes. The proposed system runs on consumer hardware at a previously unseen speed of more than 30 fps given 512x320 images as input while achieving state-of-the-art accuracy, which we will demonstrate on a range of challenging real-world scenes. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR
Shimada, S., Golyanik, V., Theobalt, C., and Stricker, D. 2019. IsMo-GAN: Adversarial Learning for Monocular Non-Rigid 3D Reconstruction. http://arxiv.org/abs/1904.12144.
(arXiv: 1904.12144)
Abstract
The majority of the existing methods for non-rigid 3D surface regression from monocular 2D images require an object template or point tracks over multiple frames as an input, and are still far from real-time processing rates. In this work, we present the Isometry-Aware Monocular Generative Adversarial Network (IsMo-GAN) - an approach for direct 3D reconstruction from a single image, trained for the deformation model in an adversarial manner on a light-weight synthetic dataset. IsMo-GAN reconstructs surfaces from real images under varying illumination, camera poses, textures and shading at over 250 Hz. In multiple experiments, it consistently outperforms several approaches in the reconstruction accuracy, runtime, generalisation to unknown surfaces and robustness to occlusions. In comparison to the state-of-the-art, we reduce the reconstruction error by 10-30% including the textureless case and our surfaces evince fewer artefacts qualitatively.
Export
BibTeX
@online{Shimada_arXiv1904.12144, TITLE = {{IsMo}-{GAN}: Adversarial Learning for Monocular Non-Rigid {3D} Reconstruction}, AUTHOR = {Shimada, Soshi and Golyanik, Vladislav and Theobalt, Christian and Stricker, Didier}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1904.12144}, EPRINT = {1904.12144}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {The majority of the existing methods for non-rigid 3D surface regression from monocular 2D images require an object template or point tracks over multiple frames as an input, and are still far from real-time processing rates. In this work, we present the Isometry-Aware Monocular Generative Adversarial Network (IsMo-GAN) -- an approach for direct 3D reconstruction from a single image, trained for the deformation model in an adversarial manner on a light-weight synthetic dataset. IsMo-GAN reconstructs surfaces from real images under varying illumination, camera poses, textures and shading at over 250 Hz. In multiple experiments, it consistently outperforms several approaches in the reconstruction accuracy, runtime, generalisation to unknown surfaces and robustness to occlusions. In comparison to the state-of-the-art, we reduce the reconstruction error by 10-30% including the textureless case and our surfaces evince fewer artefacts qualitatively.}, }
Endnote
%0 Report %A Shimada, Soshi %A Golyanik, Vladislav %A Theobalt, Christian %A Stricker, Didier %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations %T IsMo-GAN: Adversarial Learning for Monocular Non-Rigid 3D Reconstruction : %G eng %U http://hdl.handle.net/21.11116/0000-0003-FE04-B %U http://arxiv.org/abs/1904.12144 %D 2019 %X The majority of the existing methods for non-rigid 3D surface regression from monocular 2D images require an object template or point tracks over multiple frames as an input, and are still far from real-time processing rates. In this work, we present the Isometry-Aware Monocular Generative Adversarial Network (IsMo-GAN) - an approach for direct 3D reconstruction from a single image, trained for the deformation model in an adversarial manner on a light-weight synthetic dataset. IsMo-GAN reconstructs surfaces from real images under varying illumination, camera poses, textures and shading at over 250 Hz. In multiple experiments, it consistently outperforms several approaches in the reconstruction accuracy, runtime, generalisation to unknown surfaces and robustness to occlusions. In comparison to the state-of-the-art, we reduce the reconstruction error by 10-30% including the textureless case and our surfaces evince fewer artefacts qualitatively. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV
Tretschk, E., Tewari, A., Zollhöfer, M., Golyanik, V., and Theobalt, C. 2019. DEMEA: Deep Mesh Autoencoders for Non-Rigidly Deforming Objects. http://arxiv.org/abs/1905.10290.
(arXiv: 1905.10290)
Abstract
Mesh autoencoders are commonly used for dimensionality reduction, sampling and mesh modeling. We propose a general-purpose DEep MEsh Autoencoder (DEMEA) which adds a novel embedded deformation layer to a graph-convolutional mesh autoencoder. The embedded deformation layer (EDL) is a differentiable deformable geometric proxy which explicitly models point displacements of non-rigid deformations in a lower dimensional space and serves as a local rigidity regularizer. DEMEA decouples the parameterization of the deformation from the final mesh resolution since the deformation is defined over a lower dimensional embedded deformation graph. We perform a large-scale study on four different datasets of deformable objects. Reasoning about the local rigidity of meshes using EDL allows us to achieve higher-quality results for highly deformable objects, compared to directly regressing vertex positions. We demonstrate multiple applications of DEMEA, including non-rigid 3D reconstruction from depth and shading cues, non-rigid surface tracking, as well as the transfer of deformations over different meshes.
Export
BibTeX
@online{Tretschk_arXIv1905.10290, TITLE = {{DEMEA}: Deep Mesh Autoencoders for Non-Rigidly Deforming Objects}, AUTHOR = {Tretschk, Edgar and Tewari, Ayush and Zollh{\"o}fer, Michael and Golyanik, Vladislav and Theobalt, Christian}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1905.10290}, EPRINT = {1905.10290}, EPRINTTYPE = {arXiv}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Mesh autoencoders are commonly used for dimensionality reduction, sampling and mesh modeling. We propose a general-purpose DEep MEsh Autoencoder (DEMEA) which adds a novel embedded deformation layer to a graph-convolutional mesh autoencoder. The embedded deformation layer (EDL) is a differentiable deformable geometric proxy which explicitly models point displacements of non-rigid deformations in a lower dimensional space and serves as a local rigidity regularizer. DEMEA decouples the parameterization of the deformation from the final mesh resolution since the deformation is defined over a lower dimensional embedded deformation graph. We perform a large-scale study on four different datasets of deformable objects. Reasoning about the local rigidity of meshes using EDL allows us to achieve higher-quality results for highly deformable objects, compared to directly regressing vertex positions. We demonstrate multiple applications of DEMEA, including non-rigid 3D reconstruction from depth and shading cues, non-rigid surface tracking, as well as the transfer of deformations over different meshes.}, }
Endnote
%0 Report %A Tretschk, Edgar %A Tewari, Ayush %A Zollhöfer, Michael %A Golyanik, Vladislav %A Theobalt, Christian %+ Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Computer Graphics, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T DEMEA: Deep Mesh Autoencoders for Non-Rigidly Deforming Objects : %G eng %U http://hdl.handle.net/21.11116/0000-0003-FE0C-3 %U http://arxiv.org/abs/1905.10290 %D 2019 %X Mesh autoencoders are commonly used for dimensionality reduction, sampling and mesh modeling. We propose a general-purpose DEep MEsh Autoencoder (DEMEA) which adds a novel embedded deformation layer to a graph-convolutional mesh autoencoder. The embedded deformation layer (EDL) is a differentiable deformable geometric proxy which explicitly models point displacements of non-rigid deformations in a lower dimensional space and serves as a local rigidity regularizer. DEMEA decouples the parameterization of the deformation from the final mesh resolution since the deformation is defined over a lower dimensional embedded deformation graph. We perform a large-scale study on four different datasets of deformable objects. Reasoning about the local rigidity of meshes using EDL allows us to achieve higher-quality results for highly deformable objects, compared to directly regressing vertex positions. We demonstrate multiple applications of DEMEA, including non-rigid 3D reconstruction from depth and shading cues, non-rigid surface tracking, as well as the transfer of deformations over different meshes. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Graphics, cs.GR