D5
Databases and Information Systems

The Year Before Last

2021
[1]
D. I. Adelani, J. Abbott, G. Neubig, D. D’souza, J. Kreutzer, C. Lignos, C. Palen-Michel, H. Buzaaba, S. Rijhwani, S. Ruder, S. Mayhew, I. A. Azime, S. H. Muhammad, C. C. Emezue, J. Nakatumba-Nabende, P. Ogayo, A. Anuoluwapo, C. Gitau, D. Mbaye, J. Alabi, S. M. Yimam, T. R. Gwadabe, I. Ezeani, R. A. Niyongabo, J. Mukiibi, V. Otiende, I. Orife, D. David, S. Ngom, T. Adewumi, P. Rayson, M. Adeyemi, G. Muriuki, E. Anebi, C. Chukwuneke, N. Odu, E. P. Wairagala, S. Oyerinde, C. Siro, T. S. Bateesa, T. Oloyede, Y. Wambui, V. Akinode, D. Nabagereka, M. Katusiime, A. Awokoya, M. MBOUP, D. Gebreyohannes, H. Tilaye, K. Nwaike, D. Wolde, A. Faye, B. Sibanda, O. Ahia, B. F. P. Dossou, K. Ogueji, T. I. DIOP, A. Diallo, A. Akinfaderin, T. Marengereke, and S. Osei, “MasakhaNER: Named Entity Recognition for African Languages,” Transactions of the Association for Computational Linguistics, vol. 9, 2021.
Export
BibTeX
@article{Adelani2021, TITLE = {{MasakhaNER}: {N}amed Entity Recognition for {A}frican Languages}, AUTHOR = {Adelani, David Ifeoluwa and Abbott, Jade and Neubig, Graham and D{\textquoteright}souza, Daniel and Kreutzer, Julia and Lignos, Constantine and Palen-Michel, Chester and Buzaaba, Happy and Rijhwani, Shruti and Ruder, Sebastian and Mayhew, Stephen and Azime, Israel Abebe and Muhammad, Shamsuddeen H. and Emezue, Chris Chinenye and Nakatumba-Nabende, Joyce and Ogayo, Perez and Anuoluwapo, Aremu and Gitau, Catherine and Mbaye, Derguene and Alabi, Jesujoba and Yimam, Seid Muhie and Gwadabe, Tajuddeen Rabiu and Ezeani, Ignatius and Niyongabo, Rubungo Andre and Mukiibi, Jonathan and Otiende, Verrah and Orife, Iroro and David, Davis and Ngom, Samba and Adewumi, Tosin and Rayson, Paul and Adeyemi, Mofetoluwa and Muriuki, Gerald and Anebi, Emmanuel and Chukwuneke, Chiamaka and Odu, Nkiruka and Wairagala, Eric Peter and Oyerinde, Samuel and Siro, Clemencia and Bateesa, Tobius Saul and Oloyede, Temilola and Wambui, Yvonne and Akinode, Victor and Nabagereka, Deborah and Katusiime, Maurice and Awokoya, Ayodele and MBOUP, Mouhamadane and Gebreyohannes, Dibora and Tilaye, Henok and Nwaike, Kelechi and Wolde, Degaga and Faye, Abdoulaye and Sibanda, Blessing and Ahia, Orevaoghene and Dossou, Bonaventure F. P. and Ogueji, Kelechi and DIOP, Thierno Ibrahima and Diallo, Abdoulaye and Akinfaderin, Adewale and Marengereke, Tendai and Osei, Salomey}, LANGUAGE = {eng}, ISSN = {2307-387X}, DOI = {10.1162/tacl_a_00416}, PUBLISHER = {ACL}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, JOURNAL = {Transactions of the Association for Computational Linguistics}, VOLUME = {9}, PAGES = {1116--1131}, }
Endnote
%0 Journal Article %A Adelani, David Ifeoluwa %A Abbott, Jade %A Neubig, Graham %A D’souza, Daniel %A Kreutzer, Julia %A Lignos, Constantine %A Palen-Michel, Chester %A Buzaaba, Happy %A Rijhwani, Shruti %A Ruder, Sebastian %A Mayhew, Stephen %A Azime, Israel Abebe %A Muhammad, Shamsuddeen H. %A Emezue, Chris Chinenye %A Nakatumba-Nabende, Joyce %A Ogayo, Perez %A Anuoluwapo, Aremu %A Gitau, Catherine %A Mbaye, Derguene %A Alabi, Jesujoba %A Yimam, Seid Muhie %A Gwadabe, Tajuddeen Rabiu %A Ezeani, Ignatius %A Niyongabo, Rubungo Andre %A Mukiibi, Jonathan %A Otiende, Verrah %A Orife, Iroro %A David, Davis %A Ngom, Samba %A Adewumi, Tosin %A Rayson, Paul %A Adeyemi, Mofetoluwa %A Muriuki, Gerald %A Anebi, Emmanuel %A Chukwuneke, Chiamaka %A Odu, Nkiruka %A Wairagala, Eric Peter %A Oyerinde, Samuel %A Siro, Clemencia %A Bateesa, Tobius Saul %A Oloyede, Temilola %A Wambui, Yvonne %A Akinode, Victor %A Nabagereka, Deborah %A Katusiime, Maurice %A Awokoya, Ayodele %A MBOUP, Mouhamadane %A Gebreyohannes, Dibora %A Tilaye, Henok %A Nwaike, Kelechi %A Wolde, Degaga %A Faye, Abdoulaye %A Sibanda, Blessing %A Ahia, Orevaoghene %A Dossou, Bonaventure F. P. %A Ogueji, Kelechi %A DIOP, Thierno Ibrahima %A Diallo, Abdoulaye %A Akinfaderin, Adewale %A Marengereke, Tendai %A Osei, Salomey %+ External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations %T MasakhaNER: Named Entity Recognition for African Languages : %G eng %U http://hdl.handle.net/21.11116/0000-000A-115A-E %R 10.1162/tacl_a_00416 %7 2021 %D 2021 %J Transactions of the Association for Computational Linguistics %V 9 %& 1116 %P 1116 - 1131 %I ACL %@ false
[2]
J. Ali, P. Lahoti, and K. P. Gummadi, “Accounting for Model Uncertainty in Algorithmic Discrimination,” in AIES ’21, Fourth AAAI/ACM Conference on Artificial Intelligence, Ethics and Society, Virtual Conference, 2021.
Export
BibTeX
@inproceedings{Ali_AIES2021, TITLE = {Accounting for Model Uncertainty in Algorithmic Discrimination}, AUTHOR = {Ali, Junaid and Lahoti, Preethi and Gummadi, Krishna P.}, LANGUAGE = {eng}, ISBN = {978-1-4503-8473-5}, DOI = {10.1145/3461702.3462630}, PUBLISHER = {ACM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {AIES '21, Fourth AAAI/ACM Conference on Artificial Intelligence, Ethics and Society}, EDITOR = {Fourcade, Marion and Kuipers, Benjamin and Lazar, Seth and Mulligan, Deirdre}, PAGES = {336--345}, ADDRESS = {Virtual Conference}, }
Endnote
%0 Conference Proceedings %A Ali, Junaid %A Lahoti, Preethi %A Gummadi, Krishna P. %+ Computer Graphics, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Accounting for Model Uncertainty in Algorithmic Discrimination : %G eng %U http://hdl.handle.net/21.11116/0000-0008-72E3-7 %R 10.1145/3461702.3462630 %D 2021 %B Fourth AAAI/ACM Conference on Artificial Intelligence, Ethics and Society %Z date of event: 2021-05-19 - 2021-05-21 %C Virtual Conference %B AIES '21 %E Fourcade, Marion; Kuipers, Benjamin; Lazar, Seth; Mulligan, Deirdre %P 336 - 345 %I ACM %@ 978-1-4503-8473-5
[3]
H. Arnaout, S. Razniewski, G. Weikum, and J. Z. Pan, “Negative Knowledge for Open-world Wikidata,” in The Web Conference (WWW 2021), Ljubljana, Slovenia, 2021.
Export
BibTeX
@inproceedings{Arnaout_WWW21, TITLE = {Negative Knowledge for Open-world {W}ikidata}, AUTHOR = {Arnaout, Hiba and Razniewski, Simon and Weikum, Gerhard and Pan, Jeff Z.}, LANGUAGE = {eng}, ISBN = {978-1-4503-8313-4}, DOI = {10.1145/3442442.3452339}, PUBLISHER = {ACM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {The Web Conference (WWW 2021)}, EDITOR = {Leskovec, Jure and Grobelnik, Marko and Najork, Mark and Tan, Jie and Zia, Leila}, PAGES = {544--551}, ADDRESS = {Ljubljana, Slovenia}, }
Endnote
%0 Conference Proceedings %A Arnaout, Hiba %A Razniewski, Simon %A Weikum, Gerhard %A Pan, Jeff Z. %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Negative Knowledge for Open-world Wikidata : %G eng %U http://hdl.handle.net/21.11116/0000-0009-6351-C %R 10.1145/3442442.3452339 %D 2021 %B The Web Conference %Z date of event: 2021-04-19 - 2021-04-23 %C Ljubljana, Slovenia %B The Web Conference %E Leskovec, Jure; Grobelnik, Marko; Najork, Mark; Tan, Jie; Zia, Leila %P 544 - 551 %I ACM %@ 978-1-4503-8313-4
[4]
H. Arnaout, S. Razniewski, G. Weikum, and J. Z. Pan, “Negative Statements Considered Useful,” Journal of Web Semantics, vol. 71, 2021.
Export
BibTeX
@article{Arnaout2021, TITLE = {Negative Statements Considered Useful}, AUTHOR = {Arnaout, Hiba and Razniewski, Simon and Weikum, Gerhard and Pan, Jeff Z.}, LANGUAGE = {eng}, DOI = {10.1016/j.websem.2021.100661}, PUBLISHER = {Elsevier}, ADDRESS = {Amsterdam}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, JOURNAL = {Journal of Web Semantics}, VOLUME = {71}, EID = {100661}, }
Endnote
%0 Journal Article %A Arnaout, Hiba %A Razniewski, Simon %A Weikum, Gerhard %A Pan, Jeff Z. %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Negative Statements Considered Useful : %G eng %U http://hdl.handle.net/21.11116/0000-0009-A586-5 %R 10.1016/j.websem.2021.100661 %7 2021 %D 2021 %J Journal of Web Semantics %V 71 %Z sequence number: 100661 %I Elsevier %C Amsterdam
[5]
H. Arnaout, S. Razniewski, G. Weikum, and J. Z. Pan, “Wikinegata: a Knowledge Base with Interesting Negative Statements,” Proceedings of the VLDB Endowment (Proc. VLDB 2021), vol. 14, no. 12, 2021.
Export
BibTeX
@article{Arnaout2021_PVLDB, TITLE = {Wikinegata: {A} Knowledge Base with Interesting Negative Statements}, AUTHOR = {Arnaout, Hiba and Razniewski, Simon and Weikum, Gerhard and Pan, Jeff Z.}, LANGUAGE = {eng}, PUBLISHER = {VLDB Endowment Inc.}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, JOURNAL = {Proceedings of the VLDB Endowment (Proc. VLDB)}, VOLUME = {14}, NUMBER = {12}, PAGES = {2807--2810}, BOOKTITLE = {Proceedings of the 47th International Conference on Very Large Data Bases (VLDB 2021)}, EDITOR = {Dong, Xin Luna and Naumann, Felix}, }
Endnote
%0 Journal Article %A Arnaout, Hiba %A Razniewski, Simon %A Weikum, Gerhard %A Pan, Jeff Z. %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Wikinegata: a Knowledge Base with Interesting Negative Statements : %G eng %U http://hdl.handle.net/21.11116/0000-0009-6319-C %7 2021 %D 2021 %J Proceedings of the VLDB Endowment %O PVLDB %V 14 %N 12 %& 2807 %P 2807 - 2810 %I VLDB Endowment Inc. %B Proceedings of the 47th International Conference on Very Large Data Bases %O VLDB 2021 Copenhagen, Denmark, 16-20 August 2021
[6]
A. B. Biswas, H. Arnaout, and S. Razniewski, “Neguess: Wikidata-entity Guessing Game with Negative Clues,” in Proceedings of the ISWC 2021 Posters, Demos and Industry Tracks (ISWC-Posters-Demos-Industry 2021), Virtual Conference, 2021.
Export
BibTeX
@inproceedings{Biswas_ISWC21, TITLE = {Neguess: {W}ikidata-entity Guessing Game with Negative Clues}, AUTHOR = {Biswas, Aditya Bikram and Arnaout, Hiba and Razniewski, Simon}, LANGUAGE = {eng}, ISSN = {1613-0073}, URL = {http://ceur-ws.org/Vol-2980/paper350.pdf; urn:nbn:de:0074-2980-6}, PUBLISHER = {CEUR-WS.org}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the ISWC 2021 Posters, Demos and Industry Tracks (ISWC-Posters-Demos-Industry 2021)}, EDITOR = {Seneviratne, Oshani and Pesquita, Catia and Sequeda, Juan and Etcheverry, Lorena}, EID = {350}, SERIES = {CEUR Workshop Proceedings}, VOLUME = {2980}, ADDRESS = {Virtual Conference}, }
Endnote
%0 Conference Proceedings %A Biswas, Aditya Bikram %A Arnaout, Hiba %A Razniewski, Simon %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Neguess: Wikidata-entity Guessing Game with Negative Clues : %G eng %U http://hdl.handle.net/21.11116/0000-0009-65AD-3 %U http://ceur-ws.org/Vol-2980/paper350.pdf %D 2021 %B 20th International Semantic Web Conference %Z date of event: 2021-10-24 - 2021-10-28 %C Virtual Conference %B Proceedings of the ISWC 2021 Posters, Demos and Industry Tracks %E Seneviratne, Oshani; Pesquita, Catia; Sequeda, Juan; Etcheverry, Lorena %Z sequence number: 350 %I CEUR-WS.org %B CEUR Workshop Proceedings %N 2980 %@ false
[7]
K. Budhathoki, M. Boley, and J. Vreeken, “Discovering Reliable Causal Rules,” in Proceedings of the SIAM International Conference on Data Mining (SDM 2021), Virtual Conference, 2021.
Export
BibTeX
@inproceedings{budhathoki:21:dice, TITLE = {Discovering Reliable Causal Rules}, AUTHOR = {Budhathoki, Kailash and Boley, Mario and Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-1-61197-670-0}, DOI = {10.1137/1.9781611976700.1}, PUBLISHER = {SIAM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the SIAM International Conference on Data Mining (SDM 2021)}, EDITOR = {Demeniconi, Carlotta and Davidson, Ian}, PAGES = {1--9}, ADDRESS = {Virtual Conference}, }
Endnote
%0 Conference Proceedings %A Budhathoki, Kailash %A Boley, Mario %A Vreeken, Jilles %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Discovering Reliable Causal Rules : %G eng %U http://hdl.handle.net/21.11116/0000-0008-2571-F %R 10.1137/1.9781611976700.1 %D 2021 %B SIAM International Conference on Data Mining %Z date of event: 2021-04-29 - 2021-05-01 %C Virtual Conference %B Proceedings of the SIAM International Conference on Data Mining %E Demeniconi, Carlotta; Davidson, Ian %P 1 - 9 %I SIAM %@ 978-1-61197-670-0
[8]
E. Chang, X. Shen, D. Zhu, V. Demberg, and H. Su, “Neural Data-to-Text Generation with LM-based Text Augmentation,” in The 16th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2021), Online, 2021.
Export
BibTeX
@inproceedings{chang2021neural, TITLE = {Neural Data-to-Text Generation with {LM}-based Text Augmentation}, AUTHOR = {Chang, Ernie and Shen, Xiaoyu and Zhu, Dawei and Demberg, Vera and Su, Hui}, LANGUAGE = {eng}, ISBN = {978-1-954085-02-2}, DOI = {10.18653/v1/2021.eacl-main.64}, PUBLISHER = {ACL}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {The 16th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2021)}, EDITOR = {Merlo, Paola}, PAGES = {758--768}, ADDRESS = {Online}, }
Endnote
%0 Conference Proceedings %A Chang, Ernie %A Shen, Xiaoyu %A Zhu, Dawei %A Demberg, Vera %A Su, Hui %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T Neural Data-to-Text Generation with LM-based Text Augmentation : %G eng %U http://hdl.handle.net/21.11116/0000-0008-149E-0 %R 10.18653/v1/2021.eacl-main.64 %D 2021 %B 16th Conference of the European Chapter of the Association for Computational Linguistics %Z date of event: 2021-04-19 - 2021-04-23 %C Online %B The 16th Conference of the European Chapter of the Association for Computational Linguistics %E Merlo, Paola %P 758 - 768 %I ACL %@ 978-1-954085-02-2
[9]
P. Christmann, R. Saha Roy, and G. Weikum, “Beyond NED: Fast and Effective Search Space Reduction for Complex Question Answering over Knowledge Bases,” 2021. [Online]. Available: https://arxiv.org/abs/2108.08597. (arXiv: 2108.08597)
Abstract
Answering complex questions over knowledge bases (KB-QA) faces huge input<br>data with billions of facts, involving millions of entities and thousands of<br>predicates. For efficiency, QA systems first reduce the answer search space by<br>identifying a set of facts that is likely to contain all answers and relevant<br>cues. The most common technique or doing this is to apply named entity<br>disambiguation (NED) systems to the question, and retrieve KB facts for the<br>disambiguated entities. This work presents CLOCQ, an efficient method that<br>prunes irrelevant parts of the search space using KB-aware signals. CLOCQ uses<br>a top-k query processor over score-ordered lists of KB items that combine<br>signals about lexical matching, relevance to the question, coherence among<br>candidate items, and connectivity in the KB graph. Experiments with two recent<br>QA benchmarks for complex questions demonstrate the superiority of CLOCQ over<br>state-of-the-art baselines with respect to answer presence, size of the search<br>space, and runtimes.<br>
Export
BibTeX
@online{Christmann_2108.08597, TITLE = {Beyond {NED}: {F}ast and Effective Search Space Reduction for Complex Question Answering over Knowledge Bases}, AUTHOR = {Christmann, Philipp and Saha Roy, Rishiraj and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2108.08597}, EPRINT = {2108.08597}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Answering complex questions over knowledge bases (KB-QA) faces huge input<br>data with billions of facts, involving millions of entities and thousands of<br>predicates. For efficiency, QA systems first reduce the answer search space by<br>identifying a set of facts that is likely to contain all answers and relevant<br>cues. The most common technique or doing this is to apply named entity<br>disambiguation (NED) systems to the question, and retrieve KB facts for the<br>disambiguated entities. This work presents CLOCQ, an efficient method that<br>prunes irrelevant parts of the search space using KB-aware signals. CLOCQ uses<br>a top-k query processor over score-ordered lists of KB items that combine<br>signals about lexical matching, relevance to the question, coherence among<br>candidate items, and connectivity in the KB graph. Experiments with two recent<br>QA benchmarks for complex questions demonstrate the superiority of CLOCQ over<br>state-of-the-art baselines with respect to answer presence, size of the search<br>space, and runtimes.<br>}, }
Endnote
%0 Report %A Christmann, Philipp %A Saha Roy, Rishiraj %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Beyond NED: Fast and Effective Search Space Reduction for Complex Question Answering over Knowledge Bases : %G eng %U http://hdl.handle.net/21.11116/0000-0009-6360-B %U https://arxiv.org/abs/2108.08597 %D 2021 %X Answering complex questions over knowledge bases (KB-QA) faces huge input<br>data with billions of facts, involving millions of entities and thousands of<br>predicates. For efficiency, QA systems first reduce the answer search space by<br>identifying a set of facts that is likely to contain all answers and relevant<br>cues. The most common technique or doing this is to apply named entity<br>disambiguation (NED) systems to the question, and retrieve KB facts for the<br>disambiguated entities. This work presents CLOCQ, an efficient method that<br>prunes irrelevant parts of the search space using KB-aware signals. CLOCQ uses<br>a top-k query processor over score-ordered lists of KB items that combine<br>signals about lexical matching, relevance to the question, coherence among<br>candidate items, and connectivity in the KB graph. Experiments with two recent<br>QA benchmarks for complex questions demonstrate the superiority of CLOCQ over<br>state-of-the-art baselines with respect to answer presence, size of the search<br>space, and runtimes.<br> %K Computer Science, Information Retrieval, cs.IR,Computer Science, Computation and Language, cs.CL
[10]
P. Christmann, “CLOCQ: Efficient Search Space Reduction for Complex Question Answering over Knowledge Bases,” Universität des Saarlandes, Saarbrücken, 2021.
Abstract
Commonsense knowledge (CSK) about concepts and their properties is useful for AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB and others compiled large CSK collections, but are restricted in their expressiveness to subject-predicate-object (SPO) triples with simple concepts for S and monolithic strings for P and O. Also, these projects have either prioritized precision or recall, but hardly reconcile these complementary goals. This thesis presents a methodology, called Ascent, to automatically build a large-scale knowledge base (KB) of CSK assertions, with advanced expressiveness and both better precision and recall than prior works. Ascent goes beyond triples by capturing composite concepts with subgroups and aspects, and by refining assertions with semantic facets. The latter are important to express temporal and spatial validity of assertions and further qualifiers. Ascent combines open information extraction with judicious cleaning using language models. Intrinsic evaluation shows the superior size and quality of the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the benefits of Ascent.
Export
BibTeX
@mastersthesis{ChristmannMSc2021, TITLE = {{CLOCQ}: Efficient Search Space Reduction for Complex Question Answering over Knowledge Bases}, AUTHOR = {Christmann, Philipp}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, ABSTRACT = {Commonsense knowledge (CSK) about concepts and their properties is useful for AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB and others compiled large CSK collections, but are restricted in their expressiveness to subject-predicate-object (SPO) triples with simple concepts for S and monolithic strings for P and O. Also, these projects have either prioritized precision or recall, but hardly reconcile these complementary goals. This thesis presents a methodology, called Ascent, to automatically build a large-scale knowledge base (KB) of CSK assertions, with advanced expressiveness and both better precision and recall than prior works. Ascent goes beyond triples by capturing composite concepts with subgroups and aspects, and by refining assertions with semantic facets. The latter are important to express temporal and spatial validity of assertions and further qualifiers. Ascent combines open information extraction with judicious cleaning using language models. Intrinsic evaluation shows the superior size and quality of the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the benefits of Ascent.}, }
Endnote
%0 Thesis %A Christmann, Philipp %Y Saha Roy, Rishiraj %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T CLOCQ: Efficient Search Space Reduction for Complex Question Answering over Knowledge Bases : %G eng %U http://hdl.handle.net/21.11116/0000-000C-BEF6-9 %I Universit&#228;t des Saarlandes %C Saarbr&#252;cken %D 2021 %P 54 p. %V master %9 master %X Commonsense knowledge (CSK) about concepts and their properties is useful for AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB and others compiled large CSK collections, but are restricted in their expressiveness to subject-predicate-object (SPO) triples with simple concepts for S and monolithic strings for P and O. Also, these projects have either prioritized precision or recall, but hardly reconcile these complementary goals. This thesis presents a methodology, called Ascent, to automatically build a large-scale knowledge base (KB) of CSK assertions, with advanced expressiveness and both better precision and recall than prior works. Ascent goes beyond triples by capturing composite concepts with subgroups and aspects, and by refining assertions with semantic facets. The latter are important to express temporal and spatial validity of assertions and further qualifiers. Ascent combines open information extraction with judicious cleaning using language models. Intrinsic evaluation shows the superior size and quality of the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the benefits of Ascent.
[11]
C. X. Chu, S. Razniewski, and G. Weikum, “KnowFi: Knowledge Extraction from Long Fictional Texts,” in Automated Knowledge Base Construction (AKBC 2021), Virtual Conference, 2021.
Export
BibTeX
@inproceedings{DBLP:conf/akbc/ChuRW21, TITLE = {{KnowFi}: {K}nowledge Extraction from Long Fictional Texts}, AUTHOR = {Chu, Cuong Xuan and Razniewski, Simon and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {https://openreview.net/forum?id=8smkJ2ekBRC}, PUBLISHER = {OpenReview}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Automated Knowledge Base Construction (AKBC 2021)}, PAGES = {1--19}, ADDRESS = {Virtual Conference}, }
Endnote
%0 Conference Proceedings %A Chu, Cuong Xuan %A Razniewski, Simon %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T KnowFi: Knowledge Extraction from Long Fictional Texts : %G eng %U http://hdl.handle.net/21.11116/0000-000C-DC15-5 %U https://openreview.net/forum?id=8smkJ2ekBRC %D 2021 %B 3rd Conference on Automated Knowledge Base Construction %Z date of event: 2021-10-04 - 2021-10-08 %C Virtual Conference %B Automated Knowledge Base Construction %P 1 - 19 %I OpenReview
[12]
D. Dave, V. Anu, and A. S. Varde, “Automating the Classification of Requirements Data,” in IEEE International Conference on Big Data, Orlando, FL, USA (Virtual Event), 2021.
Export
BibTeX
@inproceedings{Dave_BigData21, TITLE = {Automating the Classification of Requirements Data}, AUTHOR = {Dave, Dev and Anu, Vaibhav and Varde, Aparna S.}, LANGUAGE = {eng}, ISBN = {978-1-6654-3902-2}, DOI = {10.1109/BigData52589.2021.9671548}, PUBLISHER = {IEEE}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE International Conference on Big Data}, EDITOR = {Chen, Yixin and Ludwig, Heiko and Tu, Yicheng and Fayyad, Usama and Zhu, Xingquan and Xu, Xiaohua and Byna, Suren and Liu, Xiong and Zyhang, Jianping and Pan, Shirui and Papalexakis, Vagelis and Wang, Jianwu and Cuzzocrea, Alfredo and Ordonez, Carlos}, PAGES = {5878--5880}, ADDRESS = {Orlando, FL, USA (Virtual Event)}, }
Endnote
%0 Conference Proceedings %A Dave, Dev %A Anu, Vaibhav %A Varde, Aparna S. %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Automating the Classification of Requirements Data : %G eng %U http://hdl.handle.net/21.11116/0000-000A-C562-9 %R 10.1109/BigData52589.2021.9671548 %D 2021 %B IEEE International Conference on Big Data %Z date of event: 2021-12-15 - 2021-12-18 %C Orlando, FL, USA (Virtual Event) %B IEEE International Conference on Big Data %E Chen, Yixin; Ludwig, Heiko; Tu, Yicheng; Fayyad, Usama; Zhu, Xingquan; Xu, Xiaohua; Byna, Suren; Liu, Xiong; Zyhang, Jianping; Pan, Shirui; Papalexakis, Vagelis; Wang, Jianwu; Cuzzocrea, Alfredo; Ordonez, Carlos %P 5878 - 5880 %I IEEE %@ 978-1-6654-3902-2
[13]
L. De Stefani, E. Terolli, and E. Upfal, “Tiered Sampling: An Efficient Method for Counting Sparse Motifs in Massive Graph Streams,” ACM Transactions on Knowledge Discovery from Data, vol. 15, no. 5, 2021.
Export
BibTeX
@article{DeStefani2021, TITLE = {Tiered Sampling: {A}n Efficient Method for Counting Sparse Motifs in Massive Graph Streams}, AUTHOR = {De Stefani, Lorenzo and Terolli, Erisa and Upfal, Eli}, LANGUAGE = {eng}, ISSN = {1556-4681}, DOI = {10.1145/3441299}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, JOURNAL = {ACM Transactions on Knowledge Discovery from Data}, VOLUME = {15}, NUMBER = {5}, PAGES = {1--52}, EID = {79}, }
Endnote
%0 Journal Article %A De Stefani, Lorenzo %A Terolli, Erisa %A Upfal, Eli %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Tiered Sampling: An Efficient Method for Counting Sparse Motifs in Massive Graph Streams : %G eng %U http://hdl.handle.net/21.11116/0000-0008-ED51-2 %R 10.1145/3441299 %7 2021 %D 2021 %J ACM Transactions on Knowledge Discovery from Data %V 15 %N 5 %& 1 %P 1 - 52 %Z sequence number: 79 %I ACM %C New York, NY %@ false
[14]
J. Fischer, F. B. Ardakani, K. Kattler, J. Walter, and M. H. Schulz, “CpG Content-dependent Associations between Transcription Factors and Histone Modifications,” PLoS One, vol. 16, no. 4, 2021.
Export
BibTeX
@article{fischer:21:cpgtfhm, TITLE = {{CpG} content-dependent associations between transcription factors and histone modifications}, AUTHOR = {Fischer, Jonas and Ardakani, Fatemeh Behjati and Kattler, Kathrin and Walter, J{\"o}rn and Schulz, Marcel Holger}, LANGUAGE = {eng}, ISSN = {1932-6203}, DOI = {10.1371/journal.pone.0249985}, PUBLISHER = {Public Library of Science}, ADDRESS = {San Francisco, CA}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, JOURNAL = {PLoS One}, VOLUME = {16}, NUMBER = {4}, EID = {0249985}, }
Endnote
%0 Journal Article %A Fischer, Jonas %A Ardakani, Fatemeh Behjati %A Kattler, Kathrin %A Walter, J&#246;rn %A Schulz, Marcel Holger %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Computational Biology and Applied Algorithmics, MPI for Informatics, Max Planck Society External Organizations External Organizations Computational Biology and Applied Algorithmics, MPI for Informatics, Max Planck Society %T CpG Content-dependent Associations between Transcription Factors and Histone Modifications : %G eng %U http://hdl.handle.net/21.11116/0000-0008-5602-5 %R 10.1371/journal.pone.0249985 %7 2021 %D 2021 %J PLoS One %V 16 %N 4 %Z sequence number: 0249985 %I Public Library of Science %C San Francisco, CA %@ false
[15]
J. Fischer, A. Oláh, and J. Vreeken, “What’s in the Box? Exploring the Inner Life of Neural Networks with Robust Rules,” in Proceedings of the 38th International Conference on Machine Learning (ICML 2021), Virtual Event, 2021.
Export
BibTeX
@inproceedings{Fischer_ICML2021, TITLE = {What's in the Box? {Exploring} the Inner Life of Neural Networks with Robust Rules}, AUTHOR = {Fischer, Jonas and Ol{\'a}h, Anna and Vreeken, Jilles}, LANGUAGE = {eng}, PUBLISHER = {MLR Press}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the 38th International Conference on Machine Learning (ICML 2021)}, EDITOR = {Meila, Marina and Zhang, Tong}, PAGES = {3352--3362}, EID = {26}, SERIES = {Proceedings of the Machine Learning}, VOLUME = {139}, ADDRESS = {Virtual Event}, }
Endnote
%0 Conference Proceedings %A Fischer, Jonas %A Ol&#225;h, Anna %A Vreeken, Jilles %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T What&#8217;s in the Box? Exploring the Inner Life of Neural Networks with Robust Rules : %G eng %U http://hdl.handle.net/21.11116/0000-0009-49F8-E %D 2021 %B 38th International Conference on Machine Learning %Z date of event: 2021-07-18 - 2021-07-24 %C Virtual Event %B Proceedings of the 38th International Conference on Machine Learning %E Meila, Marina; Zhang, Tong %P 3352 - 3362 %Z sequence number: 26 %I MLR Press %B Proceedings of the Machine Learning %N 139
[16]
J. Fischer and R. Burkholz, “Plant ‘n’ Seek: Can You Find the Winning Ticket?,” 2021. [Online]. Available: https://arxiv.org/abs/2111.11153. (arXiv: 2111.11153)
Abstract
The lottery ticket hypothesis has sparked the rapid development of pruning<br>algorithms that perform structure learning by identifying a sparse subnetwork<br>of a large randomly initialized neural network. The existence of such 'winning<br>tickets' has been proven theoretically but at suboptimal sparsity levels.<br>Contemporary pruning algorithms have furthermore been struggling to identify<br>sparse lottery tickets for complex learning tasks. Is this suboptimal sparsity<br>merely an artifact of existence proofs and algorithms or a general limitation<br>of the pruning approach? And, if very sparse tickets exist, are current<br>algorithms able to find them or are further improvements needed to achieve<br>effective network compression? To answer these questions systematically, we<br>derive a framework to plant and hide target architectures within large randomly<br>initialized neural networks. For three common challenges in machine learning,<br>we hand-craft extremely sparse network topologies, plant them in large neural<br>networks, and evaluate state-of-the-art lottery ticket pruning methods. We find<br>that current limitations of pruning algorithms to identify extremely sparse<br>tickets are likely of algorithmic rather than fundamental nature and anticipate<br>that our planting framework will facilitate future developments of efficient<br>pruning algorithms, as we have addressed the issue of missing baselines in the<br>field raised by Frankle et al.<br>
Export
BibTeX
@online{FischerarXiv2111.11153, TITLE = {Plant 'n' Seek: Can You Find the Winning Ticket?}, AUTHOR = {Fischer, Jonas and Burkholz, Rebekka}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2111.11153}, EPRINT = {2111.11153}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {The lottery ticket hypothesis has sparked the rapid development of pruning<br>algorithms that perform structure learning by identifying a sparse subnetwork<br>of a large randomly initialized neural network. The existence of such 'winning<br>tickets' has been proven theoretically but at suboptimal sparsity levels.<br>Contemporary pruning algorithms have furthermore been struggling to identify<br>sparse lottery tickets for complex learning tasks. Is this suboptimal sparsity<br>merely an artifact of existence proofs and algorithms or a general limitation<br>of the pruning approach? And, if very sparse tickets exist, are current<br>algorithms able to find them or are further improvements needed to achieve<br>effective network compression? To answer these questions systematically, we<br>derive a framework to plant and hide target architectures within large randomly<br>initialized neural networks. For three common challenges in machine learning,<br>we hand-craft extremely sparse network topologies, plant them in large neural<br>networks, and evaluate state-of-the-art lottery ticket pruning methods. We find<br>that current limitations of pruning algorithms to identify extremely sparse<br>tickets are likely of algorithmic rather than fundamental nature and anticipate<br>that our planting framework will facilitate future developments of efficient<br>pruning algorithms, as we have addressed the issue of missing baselines in the<br>field raised by Frankle et al.<br>}, }
Endnote
%0 Report %A Fischer, Jonas %A Burkholz, Rebekka %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Plant 'n' Seek: Can You Find the Winning Ticket? : %G eng %U http://hdl.handle.net/21.11116/0000-0009-B124-6 %U https://arxiv.org/abs/2111.11153 %D 2021 %X The lottery ticket hypothesis has sparked the rapid development of pruning<br>algorithms that perform structure learning by identifying a sparse subnetwork<br>of a large randomly initialized neural network. The existence of such 'winning<br>tickets' has been proven theoretically but at suboptimal sparsity levels.<br>Contemporary pruning algorithms have furthermore been struggling to identify<br>sparse lottery tickets for complex learning tasks. Is this suboptimal sparsity<br>merely an artifact of existence proofs and algorithms or a general limitation<br>of the pruning approach? And, if very sparse tickets exist, are current<br>algorithms able to find them or are further improvements needed to achieve<br>effective network compression? To answer these questions systematically, we<br>derive a framework to plant and hide target architectures within large randomly<br>initialized neural networks. For three common challenges in machine learning,<br>we hand-craft extremely sparse network topologies, plant them in large neural<br>networks, and evaluate state-of-the-art lottery ticket pruning methods. We find<br>that current limitations of pruning algorithms to identify extremely sparse<br>tickets are likely of algorithmic rather than fundamental nature and anticipate<br>that our planting framework will facilitate future developments of efficient<br>pruning algorithms, as we have addressed the issue of missing baselines in the<br>field raised by Frankle et al.<br> %K Computer Science, Learning, cs.LG,Computer Science, Artificial Intelligence, cs.AI,Statistics, Machine Learning, stat.ML
[17]
J. Fischer and R. Burkholz, “Towards Strong Pruning for Lottery Tickets with Non-Zero Biases,” 2021. [Online]. Available: https://arxiv.org/abs/2110.11150. (arXiv: 2110.11150)
Abstract
The strong lottery ticket hypothesis holds the promise that pruning randomly<br>initialized deep neural networks could offer a computationally efficient<br>alternative to deep learning with stochastic gradient descent. Common parameter<br>initialization schemes and existence proofs, however, are focused on networks<br>with zero biases, thus foregoing the potential universal approximation property<br>of pruning. To fill this gap, we extend multiple initialization schemes and<br>existence proofs to non-zero biases, including explicit 'looks-linear'<br>approaches for ReLU activation functions. These do not only enable truly<br>orthogonal parameter initialization but also reduce potential pruning errors.<br>In experiments on standard benchmark data sets, we further highlight the<br>practical benefits of non-zero bias initialization schemes, and present<br>theoretically inspired extensions for state-of-the-art strong lottery ticket<br>pruning.<br>
Export
BibTeX
@online{Fischer_arXiv2110.11150, TITLE = {Towards Strong Pruning for Lottery Tickets with Non-Zero Biases}, AUTHOR = {Fischer, Jonas and Burkholz, Rebekka}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2110.11150}, EPRINT = {2110.11150}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {The strong lottery ticket hypothesis holds the promise that pruning randomly<br>initialized deep neural networks could offer a computationally efficient<br>alternative to deep learning with stochastic gradient descent. Common parameter<br>initialization schemes and existence proofs, however, are focused on networks<br>with zero biases, thus foregoing the potential universal approximation property<br>of pruning. To fill this gap, we extend multiple initialization schemes and<br>existence proofs to non-zero biases, including explicit 'looks-linear'<br>approaches for ReLU activation functions. These do not only enable truly<br>orthogonal parameter initialization but also reduce potential pruning errors.<br>In experiments on standard benchmark data sets, we further highlight the<br>practical benefits of non-zero bias initialization schemes, and present<br>theoretically inspired extensions for state-of-the-art strong lottery ticket<br>pruning.<br>}, }
Endnote
%0 Report %A Fischer, Jonas %A Burkholz, Rebekka %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Towards Strong Pruning for Lottery Tickets with Non-Zero Biases : %G eng %U http://hdl.handle.net/21.11116/0000-0009-B12A-0 %U https://arxiv.org/abs/2110.11150 %D 2021 %X The strong lottery ticket hypothesis holds the promise that pruning randomly<br>initialized deep neural networks could offer a computationally efficient<br>alternative to deep learning with stochastic gradient descent. Common parameter<br>initialization schemes and existence proofs, however, are focused on networks<br>with zero biases, thus foregoing the potential universal approximation property<br>of pruning. To fill this gap, we extend multiple initialization schemes and<br>existence proofs to non-zero biases, including explicit 'looks-linear'<br>approaches for ReLU activation functions. These do not only enable truly<br>orthogonal parameter initialization but also reduce potential pruning errors.<br>In experiments on standard benchmark data sets, we further highlight the<br>practical benefits of non-zero bias initialization schemes, and present<br>theoretically inspired extensions for state-of-the-art strong lottery ticket<br>pruning.<br> %K Computer Science, Learning, cs.LG,Computer Science, Artificial Intelligence, cs.AI
[18]
J. Fischer and J. Vreeken, “Differentiable Pattern Set Mining,” in KDD ’21, 27th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, Virtual Event, Singapore, 2021.
Export
BibTeX
@inproceedings{Fischer_KDD2021, TITLE = {Differentiable Pattern Set Mining}, AUTHOR = {Fischer, Jonas and Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-1-4503-8332-5}, DOI = {10.1145/3447548.3467348}, PUBLISHER = {ACM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {KDD '21, 27th ACM SIGKDD Conference on Knowledge Discovery and Data Mining}, EDITOR = {Zhu, Fieda and Ooi, Beng Chin and Miao, Chunyan and Cong, Gao and Tang, Jiliang and Derr, Tyler}, PAGES = {383--392}, ADDRESS = {Virtual Event, Singapore}, }
Endnote
%0 Conference Proceedings %A Fischer, Jonas %A Vreeken, Jilles %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Differentiable Pattern Set Mining : %G eng %U http://hdl.handle.net/21.11116/0000-0009-652F-2 %R 10.1145/3447548.3467348 %D 2021 %B 27th ACM SIGKDD Conference on Knowledge Discovery and Data Mining %Z date of event: 2021-08-14 - 2021-08-18 %C Virtual Event, Singapore %B KDD '21 %E Zhu, Fieda; Ooi, Beng Chin; Miao, Chunyan; Cong, Gao; Tang, Jiliang; Derr, Tyler %P 383 - 392 %I ACM %@ 978-1-4503-8332-5
[19]
M. H. Gad-Elrab, “Explainable Methods for Knowledge Graph Refinement and Exploration via Symbolic Reasoning,” Universität des Saarlandes, Saarbrücken, 2021.
Abstract
Knowledge Graphs (KGs) have applications in many domains such as Finance, Manufacturing, and Healthcare. While recent efforts have created large KGs, their content is far from complete and sometimes includes invalid statements. Therefore, it is crucial to refine the constructed KGs to enhance their coverage and accuracy via KG completion and KG validation. It is also vital to provide human-comprehensible explanations for such refinements, so that humans have trust in the KG quality. Enabling KG exploration, by search and browsing, is also essential for users to understand the KG value and limitations towards down-stream applications. However, the large size of KGs makes KG exploration very challenging. While the type taxonomy of KGs is a useful asset along these lines, it remains insufficient for deep exploration. In this dissertation we tackle the aforementioned challenges of KG refinement and KG exploration by combining logical reasoning over the KG with other techniques such as KG embedding models and text mining. Through such combination, we introduce methods that provide human-understandable output. Concretely, we introduce methods to tackle KG incompleteness by learning exception-aware rules over the existing KG. Learned rules are then used in inferring missing links in the KG accurately. Furthermore, we propose a framework for constructing human-comprehensible explanations for candidate facts from both KG and text. Extracted explanations are used to insure the validity of KG facts. Finally, to facilitate KG exploration, we introduce a method that combines KG embeddings with rule mining to compute informative entity clusters with explanations.
Export
BibTeX
@phdthesis{Elrabphd2021, TITLE = {Explainable Methods for Knowledge Graph Refinement and Exploration via Symbolic Reasoning}, AUTHOR = {Gad-Elrab, Mohamed Hassan}, LANGUAGE = {eng}, URL = {urn:nbn:de:bsz:291--ds-344237}, DOI = {10.22028/D291-34423}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, ABSTRACT = {Knowledge Graphs (KGs) have applications in many domains such as Finance, Manufacturing, and Healthcare. While recent efforts have created large KGs, their content is far from complete and sometimes includes invalid statements. Therefore, it is crucial to refine the constructed KGs to enhance their coverage and accuracy via KG completion and KG validation. It is also vital to provide human-comprehensible explanations for such refinements, so that humans have trust in the KG quality. Enabling KG exploration, by search and browsing, is also essential for users to understand the KG value and limitations towards down-stream applications. However, the large size of KGs makes KG exploration very challenging. While the type taxonomy of KGs is a useful asset along these lines, it remains insufficient for deep exploration. In this dissertation we tackle the aforementioned challenges of KG refinement and KG exploration by combining logical reasoning over the KG with other techniques such as KG embedding models and text mining. Through such combination, we introduce methods that provide human-understandable output. Concretely, we introduce methods to tackle KG incompleteness by learning exception-aware rules over the existing KG. Learned rules are then used in inferring missing links in the KG accurately. Furthermore, we propose a framework for constructing human-comprehensible explanations for candidate facts from both KG and text. Extracted explanations are used to insure the validity of KG facts. Finally, to facilitate KG exploration, we introduce a method that combines KG embeddings with rule mining to compute informative entity clusters with explanations.}, }
Endnote
%0 Thesis %A Gad-Elrab, Mohamed Hassan %Y Weikum, Gerhard %A referee: Theobald, Martin %A referee: Stepanova, Daria %A referee: Razniewski, Simon %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Explainable Methods for Knowledge Graph Refinement and Exploration via Symbolic Reasoning : %G eng %U http://hdl.handle.net/21.11116/0000-0009-427E-0 %R 10.22028/D291-34423 %U urn:nbn:de:bsz:291--ds-344237 %F OTHER: hdl:20.500.11880/31629 %I Universit&#228;t des Saarlandes %C Saarbr&#252;cken %D 2021 %P 176 p. %V phd %9 phd %X Knowledge Graphs (KGs) have applications in many domains such as Finance, Manufacturing, and Healthcare. While recent efforts have created large KGs, their content is far from complete and sometimes includes invalid statements. Therefore, it is crucial to refine the constructed KGs to enhance their coverage and accuracy via KG completion and KG validation. It is also vital to provide human-comprehensible explanations for such refinements, so that humans have trust in the KG quality. Enabling KG exploration, by search and browsing, is also essential for users to understand the KG value and limitations towards down-stream applications. However, the large size of KGs makes KG exploration very challenging. While the type taxonomy of KGs is a useful asset along these lines, it remains insufficient for deep exploration. In this dissertation we tackle the aforementioned challenges of KG refinement and KG exploration by combining logical reasoning over the KG with other techniques such as KG embedding models and text mining. Through such combination, we introduce methods that provide human-understandable output. Concretely, we introduce methods to tackle KG incompleteness by learning exception-aware rules over the existing KG. Learned rules are then used in inferring missing links in the KG accurately. Furthermore, we propose a framework for constructing human-comprehensible explanations for candidate facts from both KG and text. Extracted explanations are used to insure the validity of KG facts. Finally, to facilitate KG exploration, we introduce a method that combines KG embeddings with rule mining to compute informative entity clusters with explanations. %K knowledge graphs symbolic learning embedding models rule learning Big Data %U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/31629
[20]
A. Ghazimatin, “Enhancing Explainability and Scrutability of Recommender Systems,” Universität des Saarlandes, Saarbrücken, 2021.
Abstract
Our increasing reliance on complex algorithms for recommendations calls for models and methods for explainable, scrutable, and trustworthy AI. While explainability is required for understanding the relationships between model inputs and outputs, a scrutable system allows us to modify its behavior as desired. These properties help bridge the gap between our expectations and the algorithm’s behavior and accordingly boost our trust in AI. Aiming to cope with information overload, recommender systems play a crucial role in filtering content (such as products, news, songs, and movies) and shaping a personalized experience for their users. Consequently, there has been a growing demand from the information consumers to receive proper explanations for their personalized recommendations. These explanations aim at helping users understand why certain items are recommended to them and how their previous inputs to the system relate to the generation of such recommendations. Besides, in the event of receiving undesirable content, explanations could possibly contain valuable information as to how the system’s behavior can be modified accordingly. In this thesis, we present our contributions towards explainability and scrutability of recommender systems: • We introduce a user-centric framework, FAIRY, for discovering and ranking post-hoc explanations for the social feeds generated by black-box platforms. These explanations reveal relationships between users’ profiles and their feed items and are extracted from the local interaction graphs of users. FAIRY employs a learning-to-rank (LTR) method to score candidate explanations based on their relevance and surprisal. • We propose a method, PRINCE, to facilitate provider-side explainability in graph-based recommender systems that use personalized PageRank at their core. PRINCE explanations are comprehensible for users, because they present subsets of the user’s prior actions responsible for the received recommendations. PRINCE operates in a counterfactual setup and builds on a polynomial-time algorithm for finding the smallest counterfactual explanations. • We propose a human-in-the-loop framework, ELIXIR, for enhancing scrutability and subsequently the recommendation models by leveraging user feedback on explanations. ELIXIR enables recommender systems to collect user feedback on pairs of recommendations and explanations. The feedback is incorporated into the model by imposing a soft constraint for learning user-specific item representations. We evaluate all proposed models and methods with real user studies and demonstrate their benefits at achieving explainability and scrutability in recommender systems.
Export
BibTeX
@phdthesis{Ghazphd2021, TITLE = {Enhancing Explainability and Scrutability of Recommender Systems}, AUTHOR = {Ghazimatin, Azin}, LANGUAGE = {eng}, URL = {nbn:de:bsz:291--ds-355166}, DOI = {10.22028/D291-35516}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, ABSTRACT = {Our increasing reliance on complex algorithms for recommendations calls for models and methods for explainable, scrutable, and trustworthy AI. While explainability is required for understanding the relationships between model inputs and outputs, a scrutable system allows us to modify its behavior as desired. These properties help bridge the gap between our expectations and the algorithm{\textquoteright}s behavior and accordingly boost our trust in AI. Aiming to cope with information overload, recommender systems play a crucial role in {fi}ltering content (such as products, news, songs, and movies) and shaping a personalized experience for their users. Consequently, there has been a growing demand from the information consumers to receive proper explanations for their personalized recommendations. These explanations aim at helping users understand why certain items are recommended to them and how their previous inputs to the system relate to the generation of such recommendations. Besides, in the event of receiving undesirable content, explanations could possibly contain valuable information as to how the system{\textquoteright}s behavior can be modi{fi}ed accordingly. In this thesis, we present our contributions towards explainability and scrutability of recommender systems: \mbox{$\bullet$} We introduce a user-centric framework, FAIRY, for discovering and ranking post-hoc explanations for the social feeds generated by black-box platforms. These explanations reveal relationships between users{\textquoteright} pro{fi}les and their feed items and are extracted from the local interaction graphs of users. FAIRY employs a learning-to-rank (LTR) method to score candidate explanations based on their relevance and surprisal. \mbox{$\bullet$} We propose a method, PRINCE, to facilitate provider-side explainability in graph-based recommender systems that use personalized PageRank at their core. PRINCE explanations are comprehensible for users, because they present subsets of the user{\textquoteright}s prior actions responsible for the received recommendations. PRINCE operates in a counterfactual setup and builds on a polynomial-time algorithm for {fi}nding the smallest counterfactual explanations. \mbox{$\bullet$} We propose a human-in-the-loop framework, ELIXIR, for enhancing scrutability and subsequently the recommendation models by leveraging user feedback on explanations. ELIXIR enables recommender systems to collect user feedback on pairs of recommendations and explanations. The feedback is incorporated into the model by imposing a soft constraint for learning user-speci{fi}c item representations. We evaluate all proposed models and methods with real user studies and demonstrate their bene{fi}ts at achieving explainability and scrutability in recommender systems.}, }
Endnote
%0 Thesis %A Ghazimatin, Azin %Y Weikum, Gerhard %A referee: Saha Roy, Rishiraj %A referee: Amer-Yahia, Sihem %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Enhancing Explainability and Scrutability of Recommender Systems : %G eng %U http://hdl.handle.net/21.11116/0000-000A-3C99-7 %R 10.22028/D291-35516 %U nbn:de:bsz:291--ds-355166 %F OTHER: hdl:20.500.11880/32590 %I Universit&#228;t des Saarlandes %C Saarbr&#252;cken %D 2021 %P 136 p. %V phd %9 phd %X Our increasing reliance on complex algorithms for recommendations calls for models and methods for explainable, scrutable, and trustworthy AI. While explainability is required for understanding the relationships between model inputs and outputs, a scrutable system allows us to modify its behavior as desired. These properties help bridge the gap between our expectations and the algorithm&#8217;s behavior and accordingly boost our trust in AI. Aiming to cope with information overload, recommender systems play a crucial role in &#64257;ltering content (such as products, news, songs, and movies) and shaping a personalized experience for their users. Consequently, there has been a growing demand from the information consumers to receive proper explanations for their personalized recommendations. These explanations aim at helping users understand why certain items are recommended to them and how their previous inputs to the system relate to the generation of such recommendations. Besides, in the event of receiving undesirable content, explanations could possibly contain valuable information as to how the system&#8217;s behavior can be modi&#64257;ed accordingly. In this thesis, we present our contributions towards explainability and scrutability of recommender systems: &#8226; We introduce a user-centric framework, FAIRY, for discovering and ranking post-hoc explanations for the social feeds generated by black-box platforms. These explanations reveal relationships between users&#8217; pro&#64257;les and their feed items and are extracted from the local interaction graphs of users. FAIRY employs a learning-to-rank (LTR) method to score candidate explanations based on their relevance and surprisal. &#8226; We propose a method, PRINCE, to facilitate provider-side explainability in graph-based recommender systems that use personalized PageRank at their core. PRINCE explanations are comprehensible for users, because they present subsets of the user&#8217;s prior actions responsible for the received recommendations. PRINCE operates in a counterfactual setup and builds on a polynomial-time algorithm for &#64257;nding the smallest counterfactual explanations. &#8226; We propose a human-in-the-loop framework, ELIXIR, for enhancing scrutability and subsequently the recommendation models by leveraging user feedback on explanations. ELIXIR enables recommender systems to collect user feedback on pairs of recommendations and explanations. The feedback is incorporated into the model by imposing a soft constraint for learning user-speci&#64257;c item representations. We evaluate all proposed models and methods with real user studies and demonstrate their bene&#64257;ts at achieving explainability and scrutability in recommender systems. %U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/32590
[21]
A. Ghazimatin, S. Pramanik, R. Saha Roy, and G. Weikum, “ELIXIR: Learning from User Feedback on Explanations to Improve Recommender Models,” 2021. [Online]. Available: https://arxiv.org/abs/2102.09388. (arXiv: 2102.09388)
Abstract
System-provided explanations for recommendations are an important component<br>towards transparent and trustworthy AI. In state-of-the-art research, this is a<br>one-way signal, though, to improve user acceptance. In this paper, we turn the<br>role of explanations around and investigate how they can contribute to<br>enhancing the quality of generated recommendations themselves. We devise a<br>human-in-the-loop framework, called ELIXIR, where user feedback on explanations<br>is leveraged for pairwise learning of user preferences. ELIXIR leverages<br>feedback on pairs of recommendations and explanations to learn user-specific<br>latent preference vectors, overcoming sparseness by label propagation with<br>item-similarity-based neighborhoods. Our framework is instantiated using<br>generalized graph recommendation via Random Walk with Restart. Insightful<br>experiments with a real user study show significant improvements in movie and<br>book recommendations over item-level feedback.<br>
Export
BibTeX
@online{Ghazimatin_2102.09388, TITLE = {{ELIXIR}: {L}earning from User Feedback on Explanations to Improve Recommender Models}, AUTHOR = {Ghazimatin, Azin and Pramanik, Soumajit and Saha Roy, Rishiraj and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2102.09388}, EPRINT = {2102.09388}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {System-provided explanations for recommendations are an important component<br>towards transparent and trustworthy AI. In state-of-the-art research, this is a<br>one-way signal, though, to improve user acceptance. In this paper, we turn the<br>role of explanations around and investigate how they can contribute to<br>enhancing the quality of generated recommendations themselves. We devise a<br>human-in-the-loop framework, called ELIXIR, where user feedback on explanations<br>is leveraged for pairwise learning of user preferences. ELIXIR leverages<br>feedback on pairs of recommendations and explanations to learn user-specific<br>latent preference vectors, overcoming sparseness by label propagation with<br>item-similarity-based neighborhoods. Our framework is instantiated using<br>generalized graph recommendation via Random Walk with Restart. Insightful<br>experiments with a real user study show significant improvements in movie and<br>book recommendations over item-level feedback.<br>}, }
Endnote
%0 Report %A Ghazimatin, Azin %A Pramanik, Soumajit %A Saha Roy, Rishiraj %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T ELIXIR: Learning from User Feedback on Explanations to Improve Recommender Models : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0309-B %U https://arxiv.org/abs/2102.09388 %D 2021 %X System-provided explanations for recommendations are an important component<br>towards transparent and trustworthy AI. In state-of-the-art research, this is a<br>one-way signal, though, to improve user acceptance. In this paper, we turn the<br>role of explanations around and investigate how they can contribute to<br>enhancing the quality of generated recommendations themselves. We devise a<br>human-in-the-loop framework, called ELIXIR, where user feedback on explanations<br>is leveraged for pairwise learning of user preferences. ELIXIR leverages<br>feedback on pairs of recommendations and explanations to learn user-specific<br>latent preference vectors, overcoming sparseness by label propagation with<br>item-similarity-based neighborhoods. Our framework is instantiated using<br>generalized graph recommendation via Random Walk with Restart. Insightful<br>experiments with a real user study show significant improvements in movie and<br>book recommendations over item-level feedback.<br> %K Computer Science, Information Retrieval, cs.IR,Computer Science, Artificial Intelligence, cs.AI,Computer Science, Learning, cs.LG
[22]
A. Ghazimatin, S. Pramanik, R. Saha Roy, and G. Weikum, “ELIXIR: Learning from User Feedback on Explanations to Improve Recommender Models,” in The Web Conference 2021 (WWW 2021), Ljubljana, Slovenia, 2021.
Export
BibTeX
@inproceedings{Ghazimatin_WWW21, TITLE = {{ELIXIR}: {L}earning from User Feedback on Explanations to Improve Recommender Models}, AUTHOR = {Ghazimatin, Azin and Pramanik, Soumajit and Saha Roy, Rishiraj and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-8312-7}, DOI = {10.1145/3442381.3449848}, PUBLISHER = {ACM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {The Web Conference 2021 (WWW 2021)}, EDITOR = {Leskovec, Jure and Grobelnik, Marko and Najork, Marc and Tang, Jie and Zia, Leila}, PAGES = {3850--3860}, ADDRESS = {Ljubljana, Slovenia}, }
Endnote
%0 Conference Proceedings %A Ghazimatin, Azin %A Pramanik, Soumajit %A Saha Roy, Rishiraj %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T ELIXIR: Learning from User Feedback on Explanations to Improve Recommender Models : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0303-1 %R 10.1145/3442381.3449848 %D 2021 %B 30th The Web Conference %Z date of event: 2021-04-19 - 2021-04-23 %C Ljubljana, Slovenia %B The Web Conference 2021 %E Leskovec, Jure; Grobelnik, Marko; Najork, Marc; Tang, Jie; Zia, Leila %P 3850 - 3860 %I ACM %@ 978-1-4503-8312-7
[23]
B. Gonzalez-Moodie, S. Daiek, J. Lorenzo-Trueba, and A. S. Varde, “Multispectral Drone Data Analysis on Coastal Dunes,” in IEEE International Conference on Big Data, Orlando, FL, USA (Virtual Event), 2021.
Export
BibTeX
@inproceedings{Gonzalez-Moodie_BigData21, TITLE = {Multispectral Drone Data Analysis on Coastal Dunes}, AUTHOR = {Gonzalez-Moodie, Britnie and Daiek, Shane and Lorenzo-Trueba, Jorge and Varde, Aparna S.}, LANGUAGE = {eng}, ISBN = {978-1-6654-3902-2}, DOI = {10.1109/BigData52589.2021.9671340}, PUBLISHER = {IEEE}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {IEEE International Conference on Big Data}, EDITOR = {Chen, Yixin and Ludwig, Heiko and Tu, Yicheng and Fayyad, Usama and Zhu, Xingquan and Xu, Xiaohua and Byna, Suren and Liu, Xiong and Zyhang, Jianping and Pan, Shirui and Papalexakis, Vagelis and Wang, Jianwu and Cuzzocrea, Alfredo and Ordonez, Carlos}, PAGES = {5903--5905}, ADDRESS = {Orlando, FL, USA (Virtual Event)}, }
Endnote
%0 Conference Proceedings %A Gonzalez-Moodie, Britnie %A Daiek, Shane %A Lorenzo-Trueba, Jorge %A Varde, Aparna S. %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Multispectral Drone Data Analysis on Coastal Dunes : %G eng %U http://hdl.handle.net/21.11116/0000-000A-C6F1-6 %R 10.1109/BigData52589.2021.9671340 %D 2021 %B IEEE International Conference on Big Data %Z date of event: 2021-12-15 - 2021-12-18 %C Orlando, FL, USA (Virtual Event) %B IEEE International Conference on Big Data %E Chen, Yixin; Ludwig, Heiko; Tu, Yicheng; Fayyad, Usama; Zhu, Xingquan; Xu, Xiaohua; Byna, Suren; Liu, Xiong; Zyhang, Jianping; Pan, Shirui; Papalexakis, Vagelis; Wang, Jianwu; Cuzzocrea, Alfredo; Ordonez, Carlos %P 5903 - 5905 %I IEEE %@ 978-1-6654-3902-2
[24]
A. Guimarães and G. Weikum, “X-Posts Explained: Analyzing and Predicting Controversial Contributions in Thematically Diverse Reddit Forums,” in Proceedings of the Fifteenth International Conference on Web and Social Media (ICWSM 2021), Atlanta, GA, USA, 2021.
Export
BibTeX
@inproceedings{Guimaraes_ICWSM2021, TITLE = {X-Posts Explained: {A}nalyzing and Predicting Controversial Contributions in Thematically Diverse {R}eddit Forums}, AUTHOR = {Guimar{\~a}es, Anna and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-57735-869-5}, URL = {https://ojs.aaai.org/index.php/ICWSM/article/view/18050}, PUBLISHER = {AAAI}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the Fifteenth International Conference on Web and Social Media (ICWSM 2021)}, PAGES = {163--172}, ADDRESS = {Atlanta, GA, USA}, }
Endnote
%0 Conference Proceedings %A Guimar&#227;es, Anna %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T X-Posts Explained: Analyzing and Predicting Controversial Contributions in Thematically Diverse Reddit Forums : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0345-7 %U https://ojs.aaai.org/index.php/ICWSM/article/view/18050 %D 2021 %B 15th International Conference on Web and Social Media %Z date of event: 2021-06-07 - 2021-06-10 %C Atlanta, GA, USA %B Proceedings of the Fifteenth International Conference on Web and Social Media %P 163 - 172 %I AAAI %@ 978-1-57735-869-5 %U https://ojs.aaai.org/index.php/ICWSM/article/view/18050/17853
[25]
A. Guimarães, E. Terolli, and G. Weikum, “Comparing Health Forums: User Engagement, Salient Entities, Medical Detail,” in CSCW ’21 Companion, Virtual Event, USA, 2021.
Export
BibTeX
@inproceedings{Guimaraes21, TITLE = {Comparing Health Forums: {U}ser Engagement, Salient Entities, Medical Detail}, AUTHOR = {Guimar{\~a}es, Anna and Terolli, Erisa and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-8479-7}, DOI = {10.1145/3462204.3481748}, PUBLISHER = {ACM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {CSCW '21 Companion}, EDITOR = {Ding, Sharon and Fussell, Susan and Monroy-Hern{\'a}ndez, Andr{\'e}s and Munson, Sean and Shklovski, Irina and Naaman, Mor}, PAGES = {57--61}, ADDRESS = {Virtual Event, USA}, }
Endnote
%0 Conference Proceedings %A Guimar&#227;es, Anna %A Terolli, Erisa %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Comparing Health Forums: User Engagement, Salient Entities, Medical Detail : %G eng %U http://hdl.handle.net/21.11116/0000-000A-BDA5-7 %R 10.1145/3462204.3481748 %D 2021 %B 24th ACM Conference on Computer-Supported Cooperative Work and Social Computing %Z date of event: 2021-10-23 - 2021-10-27 %C Virtual Event, USA %B CSCW '21 Companion %E Ding, Sharon; Fussell, Susan; Monroy-Hern&#225;ndez, Andr&#233;s; Munson, Sean; Shklovski, Irina; Naaman, Mor %P 57 - 61 %I ACM %@ 978-1-4503-8479-7
[26]
M. Hedderich, J. Fischer, D. Klakow, and J. Vreeken, “Label-Descriptive Patterns and their Application to Characterizing Classification Errors,” 2021. [Online]. Available: https://arxiv.org/abs/2110.09599. (arXiv: 2110.09599)
Abstract
State-of-the-art deep learning methods achieve human-like performance on many<br>tasks, but make errors nevertheless. Characterizing these errors in easily<br>interpretable terms gives insight into whether a model is prone to making<br>systematic errors, but also gives a way to act and improve the model. In this<br>paper we propose a method that allows us to do so for arbitrary classifiers by<br>mining a small set of patterns that together succinctly describe the input data<br>that is partitioned according to correctness of prediction. We show this is an<br>instance of the more general label description problem, which we formulate in<br>terms of the Minimum Description Length principle. To discover good pattern<br>sets we propose the efficient and hyperparameter-free Premise algorithm, which<br>through an extensive set of experiments we show on both synthetic and<br>real-world data performs very well in practice; unlike existing solutions it<br>ably recovers ground truth patterns, even on highly imbalanced data over many<br>unique items, or where patterns are only weakly associated to labels. Through<br>two real-world case studies we confirm that Premise gives clear and actionable<br>insight into the systematic errors made by modern NLP classifiers.<br>
Export
BibTeX
@online{Hedderich_arXiv2110.09599, TITLE = {Label-Descriptive Patterns and their Application to Characterizing Classification Errors}, AUTHOR = {Hedderich, Michael and Fischer, Jonas and Klakow, Dietrich and Vreeken, Jilles}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2110.09599}, EPRINT = {2110.09599}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {State-of-the-art deep learning methods achieve human-like performance on many<br>tasks, but make errors nevertheless. Characterizing these errors in easily<br>interpretable terms gives insight into whether a model is prone to making<br>systematic errors, but also gives a way to act and improve the model. In this<br>paper we propose a method that allows us to do so for arbitrary classifiers by<br>mining a small set of patterns that together succinctly describe the input data<br>that is partitioned according to correctness of prediction. We show this is an<br>instance of the more general label description problem, which we formulate in<br>terms of the Minimum Description Length principle. To discover good pattern<br>sets we propose the efficient and hyperparameter-free Premise algorithm, which<br>through an extensive set of experiments we show on both synthetic and<br>real-world data performs very well in practice; unlike existing solutions it<br>ably recovers ground truth patterns, even on highly imbalanced data over many<br>unique items, or where patterns are only weakly associated to labels. Through<br>two real-world case studies we confirm that Premise gives clear and actionable<br>insight into the systematic errors made by modern NLP classifiers.<br>}, }
Endnote
%0 Report %A Hedderich, Michael %A Fischer, Jonas %A Klakow, Dietrich %A Vreeken, Jilles %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Label-Descriptive Patterns and their Application to Characterizing Classification Errors : %G eng %U http://hdl.handle.net/21.11116/0000-0009-B127-3 %U https://arxiv.org/abs/2110.09599 %D 2021 %X State-of-the-art deep learning methods achieve human-like performance on many<br>tasks, but make errors nevertheless. Characterizing these errors in easily<br>interpretable terms gives insight into whether a model is prone to making<br>systematic errors, but also gives a way to act and improve the model. In this<br>paper we propose a method that allows us to do so for arbitrary classifiers by<br>mining a small set of patterns that together succinctly describe the input data<br>that is partitioned according to correctness of prediction. We show this is an<br>instance of the more general label description problem, which we formulate in<br>terms of the Minimum Description Length principle. To discover good pattern<br>sets we propose the efficient and hyperparameter-free Premise algorithm, which<br>through an extensive set of experiments we show on both synthetic and<br>real-world data performs very well in practice; unlike existing solutions it<br>ably recovers ground truth patterns, even on highly imbalanced data over many<br>unique items, or where patterns are only weakly associated to labels. Through<br>two real-world case studies we confirm that Premise gives clear and actionable<br>insight into the systematic errors made by modern NLP classifiers.<br> %K Computer Science, Learning, cs.LG,Computer Science, Computation and Language, cs.CL
[27]
E. Heiter, J. Fischer, and J. Vreeken, “Factoring Out Prior Knowledge from Low-dimensional Embeddings,” 2021. [Online]. Available: https://arxiv.org/abs/2103.01828. (arXiv: 2103.01828)
Abstract
Low-dimensional embedding techniques such as tSNE and UMAP allow visualizing<br>high-dimensional data and therewith facilitate the discovery of interesting<br>structure. Although they are widely used, they visualize data as is, rather<br>than in light of the background knowledge we have about the data. What we<br>already know, however, strongly determines what is novel and hence interesting.<br>In this paper we propose two methods for factoring out prior knowledge in the<br>form of distance matrices from low-dimensional embeddings. To factor out prior<br>knowledge from tSNE embeddings, we propose JEDI that adapts the tSNE objective<br>in a principled way using Jensen-Shannon divergence. To factor out prior<br>knowledge from any downstream embedding approach, we propose CONFETTI, in which<br>we directly operate on the input distance matrices. Extensive experiments on<br>both synthetic and real world data show that both methods work well, providing<br>embeddings that exhibit meaningful structure that would otherwise remain<br>hidden.<br>
Export
BibTeX
@online{heiter:21:factoring, TITLE = {Factoring Out Prior Knowledge from Low-dimensional Embeddings}, AUTHOR = {Heiter, Edith and Fischer, Jonas and Vreeken, Jilles}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2103.01828}, EPRINT = {2103.01828}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Low-dimensional embedding techniques such as tSNE and UMAP allow visualizing<br>high-dimensional data and therewith facilitate the discovery of interesting<br>structure. Although they are widely used, they visualize data as is, rather<br>than in light of the background knowledge we have about the data. What we<br>already know, however, strongly determines what is novel and hence interesting.<br>In this paper we propose two methods for factoring out prior knowledge in the<br>form of distance matrices from low-dimensional embeddings. To factor out prior<br>knowledge from tSNE embeddings, we propose JEDI that adapts the tSNE objective<br>in a principled way using Jensen-Shannon divergence. To factor out prior<br>knowledge from any downstream embedding approach, we propose CONFETTI, in which<br>we directly operate on the input distance matrices. Extensive experiments on<br>both synthetic and real world data show that both methods work well, providing<br>embeddings that exhibit meaningful structure that would otherwise remain<br>hidden.<br>}, }
Endnote
%0 Report %A Heiter, Edith %A Fischer, Jonas %A Vreeken, Jilles %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Factoring Out Prior Knowledge from Low-dimensional Embeddings : %G eng %U http://hdl.handle.net/21.11116/0000-0008-16ED-5 %U https://arxiv.org/abs/2103.01828 %D 2021 %X Low-dimensional embedding techniques such as tSNE and UMAP allow visualizing<br>high-dimensional data and therewith facilitate the discovery of interesting<br>structure. Although they are widely used, they visualize data as is, rather<br>than in light of the background knowledge we have about the data. What we<br>already know, however, strongly determines what is novel and hence interesting.<br>In this paper we propose two methods for factoring out prior knowledge in the<br>form of distance matrices from low-dimensional embeddings. To factor out prior<br>knowledge from tSNE embeddings, we propose JEDI that adapts the tSNE objective<br>in a principled way using Jensen-Shannon divergence. To factor out prior<br>knowledge from any downstream embedding approach, we propose CONFETTI, in which<br>we directly operate on the input distance matrices. Extensive experiments on<br>both synthetic and real world data show that both methods work well, providing<br>embeddings that exhibit meaningful structure that would otherwise remain<br>hidden.<br> %K Computer Science, Learning, cs.LG,Statistics, Machine Learning, stat.ML
[28]
V. T. Ho, K. Pal, and G. Weikum, “QuTE: Answering Quantity Queries from Web Tables,” in SIGMOD ’21, International Conference on Management of Data, Xi’an, Shaanxi, China, 2021.
Export
BibTeX
@inproceedings{Thinh_SIG21, TITLE = {Qu{TE}: {A}nswering Quantity Queries from Web Tables}, AUTHOR = {Ho, Vinh Thinh and Pal, Koninika and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-8343-1}, DOI = {10.1145/3448016.3452763}, PUBLISHER = {ACM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {SIGMOD '21, International Conference on Management of Data}, EDITOR = {Li, Guoliang and Li, Zhanhuai and Idreos, Stratos and Srivastava, Divesh}, PAGES = {2740--2744}, ADDRESS = {Xi'an, Shaanxi, China}, }
Endnote
%0 Conference Proceedings %A Ho, Vinh Thinh %A Pal, Koninika %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T QuTE: Answering Quantity Queries from Web Tables : %G eng %U http://hdl.handle.net/21.11116/0000-0008-052E-0 %R 10.1145/3448016.3452763 %D 2021 %B International Conference on Management of Data %Z date of event: 2021-06-19 - 2021-06-25 %C Xi'an, Shaanxi, China %B SIGMOD '21 %E Li, Guoliang; Li, Zhanhuai; Idreos, Stratos; Srivastava, Divesh %P 2740 - 2744 %I ACM %@ 978-1-4503-8343-1
[29]
V. T. Ho, K. Pal, S. Razniewski, K. Berberich, and G. Weikum, “Extracting Contextualized Quantity Facts from Web Tables,” in The Web Conference 2021 (WWW 2021), Ljubljana, Slovenia, 2021.
Export
BibTeX
@inproceedings{Thinh_WWW21, TITLE = {Extracting Contextualized Quantity Facts from Web Tables}, AUTHOR = {Ho, Vinh Thinh and Pal, Koninika and Razniewski, Simon and Berberich, Klaus and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-8312-7}, DOI = {10.1145/3442381.3450072}, PUBLISHER = {ACM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {The Web Conference 2021 (WWW 2021)}, EDITOR = {Leskovec, Jure and Grobelnik, Marko and Najork, Mark and Tang, Jie and Zia, Leila}, PAGES = {4033--4042}, ADDRESS = {Ljubljana, Slovenia}, }
Endnote
%0 Conference Proceedings %A Ho, Vinh Thinh %A Pal, Koninika %A Razniewski, Simon %A Berberich, Klaus %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Extracting Contextualized Quantity Facts from Web Tables : %G eng %U http://hdl.handle.net/21.11116/0000-0008-04A0-E %R 10.1145/3442381.3450072 %D 2021 %B 30th The Web Conference %Z date of event: 2021-04-19 - 2021-04-23 %C Ljubljana, Slovenia %B The Web Conference 2021 %E Leskovec, Jure; Grobelnik, Marko; Najork, Mark; Tang, Jie; Zia, Leila %P 4033 - 4042 %I ACM %@ 978-1-4503-8312-7
[30]
K. Hui and K. Berberich, “Transitivity, Time Consumption, and Quality of Preference Judgments in Crowdsourcing,” 2021. [Online]. Available: https://arxiv.org/abs/2104.08926. (arXiv: 2104.08926)
Abstract
Preference judgments have been demonstrated as a better alternative to graded<br>judgments to assess the relevance of documents relative to queries. Existing<br>work has verified transitivity among preference judgments when collected from<br>trained judges, which reduced the number of judgments dramatically. Moreover,<br>strict preference judgments and weak preference judgments, where the latter<br>additionally allow judges to state that two documents are equally relevant for<br>a given query, are both widely used in literature. However, whether<br>transitivity still holds when collected from crowdsourcing, i.e., whether the<br>two kinds of preference judgments behave similarly remains unclear. In this<br>work, we collect judgments from multiple judges using a crowdsourcing platform<br>and aggregate them to compare the two kinds of preference judgments in terms of<br>transitivity, time consumption, and quality. That is, we look into whether<br>aggregated judgments are transitive, how long it takes judges to make them, and<br>whether judges agree with each other and with judgments from TREC. Our key<br>findings are that only strict preference judgments are transitive. Meanwhile,<br>weak preference judgments behave differently in terms of transitivity, time<br>consumption, as well as of quality of judgment.<br>
Export
BibTeX
@online{Hui2104.08926, TITLE = {Transitivity, Time Consumption, and Quality of Preference Judgments in Crowdsourcing}, AUTHOR = {Hui, Kai and Berberich, Klaus}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2104.08926}, EPRINT = {2104.08926}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Preference judgments have been demonstrated as a better alternative to graded<br>judgments to assess the relevance of documents relative to queries. Existing<br>work has verified transitivity among preference judgments when collected from<br>trained judges, which reduced the number of judgments dramatically. Moreover,<br>strict preference judgments and weak preference judgments, where the latter<br>additionally allow judges to state that two documents are equally relevant for<br>a given query, are both widely used in literature. However, whether<br>transitivity still holds when collected from crowdsourcing, i.e., whether the<br>two kinds of preference judgments behave similarly remains unclear. In this<br>work, we collect judgments from multiple judges using a crowdsourcing platform<br>and aggregate them to compare the two kinds of preference judgments in terms of<br>transitivity, time consumption, and quality. That is, we look into whether<br>aggregated judgments are transitive, how long it takes judges to make them, and<br>whether judges agree with each other and with judgments from TREC. Our key<br>findings are that only strict preference judgments are transitive. Meanwhile,<br>weak preference judgments behave differently in terms of transitivity, time<br>consumption, as well as of quality of judgment.<br>}, }
Endnote
%0 Report %A Hui, Kai %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Transitivity, Time Consumption, and Quality of Preference Judgments in Crowdsourcing : %G eng %U http://hdl.handle.net/21.11116/0000-0009-651A-9 %U https://arxiv.org/abs/2104.08926 %D 2021 %X Preference judgments have been demonstrated as a better alternative to graded<br>judgments to assess the relevance of documents relative to queries. Existing<br>work has verified transitivity among preference judgments when collected from<br>trained judges, which reduced the number of judgments dramatically. Moreover,<br>strict preference judgments and weak preference judgments, where the latter<br>additionally allow judges to state that two documents are equally relevant for<br>a given query, are both widely used in literature. However, whether<br>transitivity still holds when collected from crowdsourcing, i.e., whether the<br>two kinds of preference judgments behave similarly remains unclear. In this<br>work, we collect judgments from multiple judges using a crowdsourcing platform<br>and aggregate them to compare the two kinds of preference judgments in terms of<br>transitivity, time consumption, and quality. That is, we look into whether<br>aggregated judgments are transitive, how long it takes judges to make them, and<br>whether judges agree with each other and with judgments from TREC. Our key<br>findings are that only strict preference judgments are transitive. Meanwhile,<br>weak preference judgments behave differently in terms of transitivity, time<br>consumption, as well as of quality of judgment.<br> %K Computer Science, Information Retrieval, cs.IR
[31]
Z. Jia, S. Pramanik, R. Saha Roy, and G. Weikum, “Complex Temporal Question Answering on Knowledge Graphs,” 2021. [Online]. Available: https://arxiv.org/abs/2109.08935. (arXiv: 2109.08935)
Abstract
Question answering over knowledge graphs (KG-QA) is a vital topic in IR.<br>Questions with temporal intent are a special class of practical importance, but<br>have not received much attention in research. This work presents EXAQT, the<br>first end-to-end system for answering complex temporal questions that have<br>multiple entities and predicates, and associated temporal conditions. EXAQT<br>answers natural language questions over KGs in two stages, one geared towards<br>high recall, the other towards precision at top ranks. The first step computes<br>question-relevant compact subgraphs within the KG, and judiciously enhances<br>them with pertinent temporal facts, using Group Steiner Trees and fine-tuned<br>BERT models. The second step constructs relational graph convolutional networks<br>(R-GCNs) from the first step's output, and enhances the R-GCNs with time-aware<br>entity embeddings and attention over temporal relations. We evaluate EXAQT on<br>TimeQuestions, a large dataset of 16k temporal questions we compiled from a<br>variety of general purpose KG-QA benchmarks. Results show that EXAQT<br>outperforms three state-of-the-art systems for answering complex questions over<br>KGs, thereby justifying specialized treatment of temporal QA.<br>
Export
BibTeX
@online{Jia2109.08935, TITLE = {Complex Temporal Question Answering on Knowledge Graphs}, AUTHOR = {Jia, Zhen and Pramanik, Soumajit and Saha Roy, Rishiraj and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2109.08935}, EPRINT = {2109.08935}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Question answering over knowledge graphs (KG-QA) is a vital topic in IR.<br>Questions with temporal intent are a special class of practical importance, but<br>have not received much attention in research. This work presents EXAQT, the<br>first end-to-end system for answering complex temporal questions that have<br>multiple entities and predicates, and associated temporal conditions. EXAQT<br>answers natural language questions over KGs in two stages, one geared towards<br>high recall, the other towards precision at top ranks. The first step computes<br>question-relevant compact subgraphs within the KG, and judiciously enhances<br>them with pertinent temporal facts, using Group Steiner Trees and fine-tuned<br>BERT models. The second step constructs relational graph convolutional networks<br>(R-GCNs) from the first step's output, and enhances the R-GCNs with time-aware<br>entity embeddings and attention over temporal relations. We evaluate EXAQT on<br>TimeQuestions, a large dataset of 16k temporal questions we compiled from a<br>variety of general purpose KG-QA benchmarks. Results show that EXAQT<br>outperforms three state-of-the-art systems for answering complex questions over<br>KGs, thereby justifying specialized treatment of temporal QA.<br>}, }
Endnote
%0 Report %A Jia, Zhen %A Pramanik, Soumajit %A Saha Roy, Rishiraj %A Weikum, Gerhard %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Complex Temporal Question Answering on Knowledge Graphs : %G eng %U http://hdl.handle.net/21.11116/0000-0009-64F7-0 %U https://arxiv.org/abs/2109.08935 %D 2021 %X Question answering over knowledge graphs (KG-QA) is a vital topic in IR.<br>Questions with temporal intent are a special class of practical importance, but<br>have not received much attention in research. This work presents EXAQT, the<br>first end-to-end system for answering complex temporal questions that have<br>multiple entities and predicates, and associated temporal conditions. EXAQT<br>answers natural language questions over KGs in two stages, one geared towards<br>high recall, the other towards precision at top ranks. The first step computes<br>question-relevant compact subgraphs within the KG, and judiciously enhances<br>them with pertinent temporal facts, using Group Steiner Trees and fine-tuned<br>BERT models. The second step constructs relational graph convolutional networks<br>(R-GCNs) from the first step's output, and enhances the R-GCNs with time-aware<br>entity embeddings and attention over temporal relations. We evaluate EXAQT on<br>TimeQuestions, a large dataset of 16k temporal questions we compiled from a<br>variety of general purpose KG-QA benchmarks. Results show that EXAQT<br>outperforms three state-of-the-art systems for answering complex questions over<br>KGs, thereby justifying specialized treatment of temporal QA.<br> %K Computer Science, Information Retrieval, cs.IR,Computer Science, Computation and Language, cs.CL
[32]
Z. Jia, S. Pramanik, R. Saha Roy, and G. Weikum, “Complex Temporal Question Answering on Knowledge Graphs,” in CIKM ’21, 30th ACM International Conference on Information & Knowledge Management, Virtual Event, Australia, 2021.
Export
BibTeX
@inproceedings{jia2021complex, TITLE = {Complex Temporal Question Answering on Knowledge Graphs}, AUTHOR = {Jia, Zhen and Pramanik, Soumajit and Saha Roy, Rishiraj and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-8446-9}, DOI = {10.1145/3459637.3482416}, PUBLISHER = {ACM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {CIKM '21, 30th ACM International Conference on Information \& Knowledge Management}, EDITOR = {Demartini, Gianluca and Zuccon, Guido and Culpepper, J. Shane and Huang, Zi and Tong, Hanghang}, PAGES = {792--802}, ADDRESS = {Virtual Event, Australia}, }
Endnote
%0 Conference Proceedings %A Jia, Zhen %A Pramanik, Soumajit %A Saha Roy, Rishiraj %A Weikum, Gerhard %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Complex Temporal Question Answering on Knowledge Graphs : %G eng %U http://hdl.handle.net/21.11116/0000-000C-A3A2-4 %R 10.1145/3459637.3482416 %D 2021 %B 30th ACM International Conference on Information & Knowledge Management %Z date of event: 2021-11-01 - 2021-11-05 %C Virtual Event, Australia %B CIKM '21 %E Demartini, Gianluca; Zuccon, Guido; Culpepper, J. Shane; Huang, Zi; Tong, Hanghang %P 792 - 802 %I ACM %@ 978-1-4503-8446-9
[33]
K. M. Jose, “Improving Efficiency of Dense Retrieval Methods with Query Expansion,” Universität des Saarlandes, Saarbrücken, 2021.
Export
BibTeX
@mastersthesis{JoseMSc21, TITLE = {Improving Efficiency of Dense Retrieval Methods with Query Expansion}, AUTHOR = {Jose, Kevin Martin}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, }
Endnote
%0 Thesis %A Jose, Kevin Martin %Y Yates, Andrew %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Improving Efficiency of Dense Retrieval Methods with Query Expansion : %G eng %U http://hdl.handle.net/21.11116/0000-000D-17AB-9 %I Universit&#228;t des Saarlandes %C Saarbr&#252;cken %D 2021 %P X, 51 p. %V master %9 master
[34]
K. M. Jose, T. Nguyen, S. MacAvaney, J. Dalton, and A. Yates, “DiffIR: Exploring Differences in Ranking Models’ Behavior,” in SIGIR ’21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, Canada, 2021.
Export
BibTeX
@inproceedings{Jose_SIGIR21, TITLE = {{DiffIR}: {E}xploring Differences in Ranking Models' Behavior}, AUTHOR = {Jose, Kevin Martin and Nguyen, Thong and MacAvaney, Sean and Dalton, Jeffrey and Yates, Andrew}, LANGUAGE = {eng}, ISBN = {978-1-4503-8037-9}, DOI = {10.1145/3404835.3462784}, PUBLISHER = {ACM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {SIGIR '21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval}, EDITOR = {Diaz, Fernando and Shah, Chirag and Suel, Torsten and Castells, Pablo and Jones, Rosie and Sakai, Tetsuya and Bellog{\'i}n, Alejandro and Yushioka, Massaharu}, PAGES = {2595--2599}, ADDRESS = {Virtual Event, Canada}, }
Endnote
%0 Conference Proceedings %A Jose, Kevin Martin %A Nguyen, Thong %A MacAvaney, Sean %A Dalton, Jeffrey %A Yates, Andrew %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T DiffIR: Exploring Differences in Ranking Models' Behavior : %G eng %U http://hdl.handle.net/21.11116/0000-0009-666D-B %R 10.1145/3404835.3462784 %D 2021 %B 44th International ACM SIGIR Conference on Research and Development in Information Retrieval %Z date of event: 2021-07-11 - 2021-07-15 %C Virtual Event, Canada %B SIGIR '21 %E Diaz, Fernando; Shah, Chirag; Suel, Torsten; Castells, Pablo; Jones, Rosie; Sakai, Tetsuya; Bellog&#237;n, Alejandro; Yushioka, Massaharu %P 2595 - 2599 %I ACM %@ 978-1-4503-8037-9
[35]
M. Kaiser, R. Saha Roy, and G. Weikum, “Reinforcement Learning from Reformulations in Conversational Question Answering over Knowledge Graphs,” in SIGIR ’21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, Canada, 2021.
Export
BibTeX
@inproceedings{kaiser2021reinforcement, TITLE = {Reinforcement Learning from Reformulations in~Conversational Question Answering over Knowledge Graphs}, AUTHOR = {Kaiser, Magdalena and Saha Roy, Rishiraj and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-8037-9}, DOI = {10.1145/3404835.3462859}, PUBLISHER = {ACM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {SIGIR '21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval}, EDITOR = {Diaz, Fernando and Shah, Chirag and Suel, Torsten and Castells, Pablo and Jones, Rosie and Sakai, Tetsuya and Bellog{\'i}n, Alejandro and Yushioka, Massaharu}, PAGES = {459--469}, ADDRESS = {Virtual Event, Canada}, }
Endnote
%0 Conference Proceedings %A Kaiser, Magdalena %A Saha Roy, Rishiraj %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Reinforcement Learning from Reformulations in&#160;Conversational Question Answering over Knowledge Graphs : %G eng %U http://hdl.handle.net/21.11116/0000-0008-513E-8 %R 10.1145/3404835.3462859 %D 2021 %B 44th International ACM SIGIR Conference on Research and Development in Information Retrieval %Z date of event: 2021-07-11 - 2021-07-15 %C Virtual Event, Canada %B SIGIR '21 %E Diaz, Fernando; Shah, Chirag; Suel, Torsten; Castells, Pablo; Jones, Rosie; Sakai, Tetsuya; Bellog&#237;n, Alejandro; Yushioka, Massaharu %P 459 - 469 %I ACM %@ 978-1-4503-8037-9
[36]
M. Kaiser, R. Saha Roy, and G. Weikum, “Reinforcement Learning from Reformulations in Conversational Question Answering over Knowledge Graphs,” 2021. [Online]. Available: https://arxiv.org/abs/2105.04850. (arXiv: 2105.04850)
Abstract
The rise of personal assistants has made conversational question answering<br>(ConvQA) a very popular mechanism for user-system interaction. State-of-the-art<br>methods for ConvQA over knowledge graphs (KGs) can only learn from crisp<br>question-answer pairs found in popular benchmarks. In reality, however, such<br>training data is hard to come by: users would rarely mark answers explicitly as<br>correct or wrong. In this work, we take a step towards a more natural learning<br>paradigm - from noisy and implicit feedback via question reformulations. A<br>reformulation is likely to be triggered by an incorrect system response,<br>whereas a new follow-up question could be a positive signal on the previous<br>turn's answer. We present a reinforcement learning model, termed CONQUER, that<br>can learn from a conversational stream of questions and reformulations. CONQUER<br>models the answering process as multiple agents walking in parallel on the KG,<br>where the walks are determined by actions sampled using a policy network. This<br>policy network takes the question along with the conversational context as<br>inputs and is trained via noisy rewards obtained from the reformulation<br>likelihood. To evaluate CONQUER, we create and release ConvRef, a benchmark<br>with about 11k natural conversations containing around 205k reformulations.<br>Experiments show that CONQUER successfully learns to answer conversational<br>questions from noisy reward signals, significantly improving over a<br>state-of-the-art baseline.<br>
Export
BibTeX
@online{Kaiser_2105.04850, TITLE = {Reinforcement Learning from Reformulations in Conversational Question Answering over Knowledge Graphs}, AUTHOR = {Kaiser, Magdalena and Saha Roy, Rishiraj and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2105.04850}, EPRINT = {2105.04850}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {The rise of personal assistants has made conversational question answering<br>(ConvQA) a very popular mechanism for user-system interaction. State-of-the-art<br>methods for ConvQA over knowledge graphs (KGs) can only learn from crisp<br>question-answer pairs found in popular benchmarks. In reality, however, such<br>training data is hard to come by: users would rarely mark answers explicitly as<br>correct or wrong. In this work, we take a step towards a more natural learning<br>paradigm -- from noisy and implicit feedback via question reformulations. A<br>reformulation is likely to be triggered by an incorrect system response,<br>whereas a new follow-up question could be a positive signal on the previous<br>turn's answer. We present a reinforcement learning model, termed CONQUER, that<br>can learn from a conversational stream of questions and reformulations. CONQUER<br>models the answering process as multiple agents walking in parallel on the KG,<br>where the walks are determined by actions sampled using a policy network. This<br>policy network takes the question along with the conversational context as<br>inputs and is trained via noisy rewards obtained from the reformulation<br>likelihood. To evaluate CONQUER, we create and release ConvRef, a benchmark<br>with about 11k natural conversations containing around 205k reformulations.<br>Experiments show that CONQUER successfully learns to answer conversational<br>questions from noisy reward signals, significantly improving over a<br>state-of-the-art baseline.<br>}, }
Endnote
%0 Report %A Kaiser, Magdalena %A Saha Roy, Rishiraj %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Reinforcement Learning from Reformulations in Conversational Question Answering over Knowledge Graphs : %G eng %U http://hdl.handle.net/21.11116/0000-0009-67C9-1 %U https://arxiv.org/abs/2105.04850 %D 2021 %X The rise of personal assistants has made conversational question answering<br>(ConvQA) a very popular mechanism for user-system interaction. State-of-the-art<br>methods for ConvQA over knowledge graphs (KGs) can only learn from crisp<br>question-answer pairs found in popular benchmarks. In reality, however, such<br>training data is hard to come by: users would rarely mark answers explicitly as<br>correct or wrong. In this work, we take a step towards a more natural learning<br>paradigm - from noisy and implicit feedback via question reformulations. A<br>reformulation is likely to be triggered by an incorrect system response,<br>whereas a new follow-up question could be a positive signal on the previous<br>turn's answer. We present a reinforcement learning model, termed CONQUER, that<br>can learn from a conversational stream of questions and reformulations. CONQUER<br>models the answering process as multiple agents walking in parallel on the KG,<br>where the walks are determined by actions sampled using a policy network. This<br>policy network takes the question along with the conversational context as<br>inputs and is trained via noisy rewards obtained from the reformulation<br>likelihood. To evaluate CONQUER, we create and release ConvRef, a benchmark<br>with about 11k natural conversations containing around 205k reformulations.<br>Experiments show that CONQUER successfully learns to answer conversational<br>questions from noisy reward signals, significantly improving over a<br>state-of-the-art baseline.<br> %K Computer Science, Information Retrieval, cs.IR,Computer Science, Computation and Language, cs.CL
[37]
J. Kalofolias, P. Welke, and J. Vreeken, “SUSAN: The Structural Similarity Random Walk Kernel,” in Proceedings of the SIAM International Conference on Data Mining (SDM 2021), Virtual Conference, 2021.
Export
BibTeX
@inproceedings{kalofolias:21:susan, TITLE = {{SUSAN}: The Structural Similarity Random Walk Kernel}, AUTHOR = {Kalofolias, Janis and Welke, Pascal and Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-1-61197-670-0}, DOI = {10.1137/1.9781611976700.34}, PUBLISHER = {SIAM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the SIAM International Conference on Data Mining (SDM 2021)}, EDITOR = {Demeniconi, Carlotta and Davidson, Ian}, PAGES = {298--306}, ADDRESS = {Virtual Conference}, }
Endnote
%0 Conference Proceedings %A Kalofolias, Janis %A Welke, Pascal %A Vreeken, Jilles %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T SUSAN: The Structural Similarity Random Walk Kernel : %G eng %U http://hdl.handle.net/21.11116/0000-0008-26C9-B %R 10.1137/1.9781611976700.34 %D 2021 %B SIAM International Conference on Data Mining %Z date of event: 2021-04-29 - 2021-05-01 %C Virtual Conference %B Proceedings of the SIAM International Conference on Data Mining %E Demeniconi, Carlotta; Davidson, Ian %P 298 - 306 %I SIAM %@ 978-1-61197-670-0
[38]
M. Kamp, J. Fischer, and J. Vreeken, “Federated Learning from Small Datasets,” 2021. [Online]. Available: https://arxiv.org/abs/2110.03469. (arXiv: 2110.03469)
Abstract
Federated learning allows multiple parties to collaboratively train a joint<br>model without sharing local data. This enables applications of machine learning<br>in settings of inherently distributed, undisclosable data such as in the<br>medical domain. In practice, joint training is usually achieved by aggregating<br>local models, for which local training objectives have to be in expectation<br>similar to the joint (global) objective. Often, however, local datasets are so<br>small that local objectives differ greatly from the global objective, resulting<br>in federated learning to fail. We propose a novel approach that intertwines<br>model aggregations with permutations of local models. The permutations expose<br>each local model to a daisy chain of local datasets resulting in more efficient<br>training in data-sparse domains. This enables training on extremely small local<br>datasets, such as patient data across hospitals, while retaining the training<br>efficiency and privacy benefits of federated learning.<br>
Export
BibTeX
@online{Kamp2110.03469, TITLE = {Federated Learning from Small Datasets}, AUTHOR = {Kamp, Michael and Fischer, Jonas and Vreeken, Jilles}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2110.03469}, EPRINT = {2110.03469}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Federated learning allows multiple parties to collaboratively train a joint<br>model without sharing local data. This enables applications of machine learning<br>in settings of inherently distributed, undisclosable data such as in the<br>medical domain. In practice, joint training is usually achieved by aggregating<br>local models, for which local training objectives have to be in expectation<br>similar to the joint (global) objective. Often, however, local datasets are so<br>small that local objectives differ greatly from the global objective, resulting<br>in federated learning to fail. We propose a novel approach that intertwines<br>model aggregations with permutations of local models. The permutations expose<br>each local model to a daisy chain of local datasets resulting in more efficient<br>training in data-sparse domains. This enables training on extremely small local<br>datasets, such as patient data across hospitals, while retaining the training<br>efficiency and privacy benefits of federated learning.<br>}, }
Endnote
%0 Report %A Kamp, Michael %A Fischer, Jonas %A Vreeken, Jilles %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Federated Learning from Small Datasets : %G eng %U http://hdl.handle.net/21.11116/0000-0009-653B-4 %U https://arxiv.org/abs/2110.03469 %D 2021 %X Federated learning allows multiple parties to collaboratively train a joint<br>model without sharing local data. This enables applications of machine learning<br>in settings of inherently distributed, undisclosable data such as in the<br>medical domain. In practice, joint training is usually achieved by aggregating<br>local models, for which local training objectives have to be in expectation<br>similar to the joint (global) objective. Often, however, local datasets are so<br>small that local objectives differ greatly from the global objective, resulting<br>in federated learning to fail. We propose a novel approach that intertwines<br>model aggregations with permutations of local models. The permutations expose<br>each local model to a daisy chain of local datasets resulting in more efficient<br>training in data-sparse domains. This enables training on extremely small local<br>datasets, such as patient data across hospitals, while retaining the training<br>efficiency and privacy benefits of federated learning.<br> %K Computer Science, Learning, cs.LG,Computer Science, Artificial Intelligence, cs.AI,Computer Science, Distributed, Parallel, and Cluster Computing, cs.DC
[39]
P. Lahoti, K. Gummadi, and G. Weikum, “Detecting and Mitigating Test-time Failure Risks via Model-agnostic Uncertainty Learning,” 2021. [Online]. Available: https://arxiv.org/abs/2109.04432. (arXiv: 2109.04432)
Abstract
Reliably predicting potential failure risks of machine learning (ML) systems<br>when deployed with production data is a crucial aspect of trustworthy AI. This<br>paper introduces Risk Advisor, a novel post-hoc meta-learner for estimating<br>failure risks and predictive uncertainties of any already-trained black-box<br>classification model. In addition to providing a risk score, the Risk Advisor<br>decomposes the uncertainty estimates into aleatoric and epistemic uncertainty<br>components, thus giving informative insights into the sources of uncertainty<br>inducing the failures. Consequently, Risk Advisor can distinguish between<br>failures caused by data variability, data shifts and model limitations and<br>advise on mitigation actions (e.g., collecting more data to counter data<br>shift). Extensive experiments on various families of black-box classification<br>models and on real-world and synthetic datasets covering common ML failure<br>scenarios show that the Risk Advisor reliably predicts deployment-time failure<br>risks in all the scenarios, and outperforms strong baselines.<br>
Export
BibTeX
@online{Lahoti2109.04432, TITLE = {Detecting and Mitigating Test-time Failure Risks via Model-agnostic Uncertainty Learning}, AUTHOR = {Lahoti, Preethi and Gummadi, Krishna and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2109.04432}, EPRINT = {2109.04432}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Reliably predicting potential failure risks of machine learning (ML) systems<br>when deployed with production data is a crucial aspect of trustworthy AI. This<br>paper introduces Risk Advisor, a novel post-hoc meta-learner for estimating<br>failure risks and predictive uncertainties of any already-trained black-box<br>classification model. In addition to providing a risk score, the Risk Advisor<br>decomposes the uncertainty estimates into aleatoric and epistemic uncertainty<br>components, thus giving informative insights into the sources of uncertainty<br>inducing the failures. Consequently, Risk Advisor can distinguish between<br>failures caused by data variability, data shifts and model limitations and<br>advise on mitigation actions (e.g., collecting more data to counter data<br>shift). Extensive experiments on various families of black-box classification<br>models and on real-world and synthetic datasets covering common ML failure<br>scenarios show that the Risk Advisor reliably predicts deployment-time failure<br>risks in all the scenarios, and outperforms strong baselines.<br>}, }
Endnote
%0 Report %A Lahoti, Preethi %A Gummadi, Krishna %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Detecting and Mitigating Test-time Failure Risks via Model-agnostic Uncertainty Learning : %G eng %U http://hdl.handle.net/21.11116/0000-0009-6491-2 %U https://arxiv.org/abs/2109.04432 %D 2021 %X Reliably predicting potential failure risks of machine learning (ML) systems<br>when deployed with production data is a crucial aspect of trustworthy AI. This<br>paper introduces Risk Advisor, a novel post-hoc meta-learner for estimating<br>failure risks and predictive uncertainties of any already-trained black-box<br>classification model. In addition to providing a risk score, the Risk Advisor<br>decomposes the uncertainty estimates into aleatoric and epistemic uncertainty<br>components, thus giving informative insights into the sources of uncertainty<br>inducing the failures. Consequently, Risk Advisor can distinguish between<br>failures caused by data variability, data shifts and model limitations and<br>advise on mitigation actions (e.g., collecting more data to counter data<br>shift). Extensive experiments on various families of black-box classification<br>models and on real-world and synthetic datasets covering common ML failure<br>scenarios show that the Risk Advisor reliably predicts deployment-time failure<br>risks in all the scenarios, and outperforms strong baselines.<br> %K Computer Science, Learning, cs.LG,Computer Science, Information Retrieval, cs.IR,Statistics, Machine Learning, stat.ML
[40]
J. Lin, R. Nogueira, and A. Yates, Pretrained Transformers for Text Ranking : BERT and Beyond. San Rafael, CA: Morgan & Claypool Publishers, 2021.
Export
BibTeX
@book{DBLP:series/synthesis/2021LinNY, TITLE = {Pretrained Transformers for Text Ranking : {BERT} and Beyond}, AUTHOR = {Lin, Jimmy and Nogueira, Rodrigo and Yates, Andrew}, LANGUAGE = {eng}, ISSN = {1947-4040}, ISBN = {978-1-63639-228-8; 978-1-63639-230-1}, DOI = {10.2200/S01123ED1V01Y202108HLT053}, PUBLISHER = {Morgan \& Claypool Publishers}, ADDRESS = {San Rafael, CA}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, PAGES = {XVII, 307}, SERIES = {Synthesis Lectures on Human Language Technologies}, VOLUME = {53}, }
Endnote
%0 Book %A Lin, Jimmy %A Nogueira, Rodrigo %A Yates, Andrew %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Pretrained Transformers for Text Ranking : BERT and Beyond : %G eng %U http://hdl.handle.net/21.11116/0000-000C-FE79-F %@ 978-1-63639-228-8 %@ 978-1-63639-230-1 %R 10.2200/S01123ED1V01Y202108HLT053 %I Morgan & Claypool Publishers %C San Rafael, CA %D 2021 %P XVII, 307 %B Synthesis Lectures on Human Language Technologies %N 53 %@ false
[41]
S. MacAvaney, A. Yates, S. Feldman, D. Downey, A. Cohan, and N. Goharian, “Simplified Data Wrangling with ir_datasets,” in SIGIR ’21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, Canada, 2021.
Export
BibTeX
@inproceedings{MacAvaney_SIGIR21, TITLE = {Simplified Data Wrangling with ir{\textunderscore}datasets}, AUTHOR = {MacAvaney, Sean and Yates, Andrew and Feldman, Sergey and Downey, Doug and Cohan, Arman and Goharian, Nazli}, LANGUAGE = {eng}, ISBN = {978-1-4503-8037-9}, DOI = {10.1145/3404835.3463254}, PUBLISHER = {ACM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {SIGIR '21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval}, EDITOR = {Diaz, Fernando and Shah, Chirag and Suel, Torsten and Castells, Pablo and Jones, Rosie and Sakai, Tetsuya and Bellog{\'i}n, Alejandro and Yushioka, Massaharu}, PAGES = {2429--2436}, ADDRESS = {Virtual Event, Canada}, }
Endnote
%0 Conference Proceedings %A MacAvaney, Sean %A Yates, Andrew %A Feldman, Sergey %A Downey, Doug %A Cohan, Arman %A Goharian, Nazli %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations %T Simplified Data Wrangling with ir_datasets : %G eng %U http://hdl.handle.net/21.11116/0000-0009-665F-B %R 10.1145/3404835.3463254 %D 2021 %B 44th International ACM SIGIR Conference on Research and Development in Information Retrieval %Z date of event: 2021-07-11 - 2021-07-15 %C Virtual Event, Canada %B SIGIR '21 %E Diaz, Fernando; Shah, Chirag; Suel, Torsten; Castells, Pablo; Jones, Rosie; Sakai, Tetsuya; Bellog&#237;n, Alejandro; Yushioka, Massaharu %P 2429 - 2436 %I ACM %@ 978-1-4503-8037-9
[42]
S. MacAvaney, A. Yates, S. Feldman, D. Downey, A. Cohan, and N. Goharian, “Simplified Data Wrangling with ir_datasets,” 2021. [Online]. Available: https://arxiv.org/abs/2103.02280. (arXiv: 2103.02280)
Abstract
Managing the data for Information Retrieval (IR) experiments can be<br>challenging. Dataset documentation is scattered across the Internet and once<br>one obtains a copy of the data, there are numerous different data formats to<br>work with. Even basic formats can have subtle dataset-specific nuances that<br>need to be considered for proper use. To help mitigate these challenges, we<br>introduce a new robust and lightweight tool (ir_datasets) for acquiring,<br>managing, and performing typical operations over datasets used in IR. We<br>primarily focus on textual datasets used for ad-hoc search. This tool provides<br>both a Python and command line interface to numerous IR datasets and<br>benchmarks. To our knowledge, this is the most extensive tool of its kind.<br>Integrations with popular IR indexing and experimentation toolkits demonstrate<br>the tool's utility. We also provide documentation of these datasets through the<br>ir_datasets catalog: https://ir-datasets.com/. The catalog acts as a hub for<br>information on datasets used in IR, providing core information about what data<br>each benchmark provides as well as links to more detailed information. We<br>welcome community contributions and intend to continue to maintain and grow<br>this tool.<br>
Export
BibTeX
@online{MacAvaney_2103.02280, TITLE = {Simplified Data Wrangling with ir{\textunderscore}datasets}, AUTHOR = {MacAvaney, Sean and Yates, Andrew and Feldman, Sergey and Downey, Doug and Cohan, Arman and Goharian, Nazli}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2103.02280}, EPRINT = {2103.02280}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Managing the data for Information Retrieval (IR) experiments can be<br>challenging. Dataset documentation is scattered across the Internet and once<br>one obtains a copy of the data, there are numerous different data formats to<br>work with. Even basic formats can have subtle dataset-specific nuances that<br>need to be considered for proper use. To help mitigate these challenges, we<br>introduce a new robust and lightweight tool (ir_datasets) for acquiring,<br>managing, and performing typical operations over datasets used in IR. We<br>primarily focus on textual datasets used for ad-hoc search. This tool provides<br>both a Python and command line interface to numerous IR datasets and<br>benchmarks. To our knowledge, this is the most extensive tool of its kind.<br>Integrations with popular IR indexing and experimentation toolkits demonstrate<br>the tool's utility. We also provide documentation of these datasets through the<br>ir_datasets catalog: https://ir-datasets.com/. The catalog acts as a hub for<br>information on datasets used in IR, providing core information about what data<br>each benchmark provides as well as links to more detailed information. We<br>welcome community contributions and intend to continue to maintain and grow<br>this tool.<br>}, }
Endnote
%0 Report %A MacAvaney, Sean %A Yates, Andrew %A Feldman, Sergey %A Downey, Doug %A Cohan, Arman %A Goharian, Nazli %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations %T Simplified Data Wrangling with ir_datasets : %G eng %U http://hdl.handle.net/21.11116/0000-0009-6679-D %U https://arxiv.org/abs/2103.02280 %D 2021 %X Managing the data for Information Retrieval (IR) experiments can be<br>challenging. Dataset documentation is scattered across the Internet and once<br>one obtains a copy of the data, there are numerous different data formats to<br>work with. Even basic formats can have subtle dataset-specific nuances that<br>need to be considered for proper use. To help mitigate these challenges, we<br>introduce a new robust and lightweight tool (ir_datasets) for acquiring,<br>managing, and performing typical operations over datasets used in IR. We<br>primarily focus on textual datasets used for ad-hoc search. This tool provides<br>both a Python and command line interface to numerous IR datasets and<br>benchmarks. To our knowledge, this is the most extensive tool of its kind.<br>Integrations with popular IR indexing and experimentation toolkits demonstrate<br>the tool's utility. We also provide documentation of these datasets through the<br>ir_datasets catalog: https://ir-datasets.com/. The catalog acts as a hub for<br>information on datasets used in IR, providing core information about what data<br>each benchmark provides as well as links to more detailed information. We<br>welcome community contributions and intend to continue to maintain and grow<br>this tool.<br> %K Computer Science, Information Retrieval, cs.IR,Computer Science, Computation and Language, cs.CL
[43]
I. Mackie, J. Dalton, and A. Yates, “How Deep is your Learning: The DL-HARD Annotated Deep Learning Dataset,” 2021. [Online]. Available: https://arxiv.org/abs/2105.07975. (arXiv: 2105.07975)
Abstract
Deep Learning Hard (DL-HARD) is a new annotated dataset designed to more<br>effectively evaluate neural ranking models on complex topics. It builds on TREC<br>Deep Learning (DL) topics by extensively annotating them with question intent<br>categories, answer types, wikified entities, topic categories, and result type<br>metadata from a commercial web search engine. Based on this data, we introduce<br>a framework for identifying challenging queries. DL-HARD contains fifty topics<br>from the official DL 2019/2020 evaluation benchmark, half of which are newly<br>and independently assessed. We perform experiments using the official submitted<br>runs to DL on DL-HARD and find substantial differences in metrics and the<br>ranking of participating systems. Overall, DL-HARD is a new resource that<br>promotes research on neural ranking methods by focusing on challenging and<br>complex topics.<br>
Export
BibTeX
@online{Mackie_2105.07975, TITLE = {How Deep is your Learning: The {DL}-{HARD} Annotated Deep Learning Dataset}, AUTHOR = {Mackie, Iain and Dalton, Jeffery and Yates, Andrew}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2105.07975}, EPRINT = {2105.07975}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Deep Learning Hard (DL-HARD) is a new annotated dataset designed to more<br>effectively evaluate neural ranking models on complex topics. It builds on TREC<br>Deep Learning (DL) topics by extensively annotating them with question intent<br>categories, answer types, wikified entities, topic categories, and result type<br>metadata from a commercial web search engine. Based on this data, we introduce<br>a framework for identifying challenging queries. DL-HARD contains fifty topics<br>from the official DL 2019/2020 evaluation benchmark, half of which are newly<br>and independently assessed. We perform experiments using the official submitted<br>runs to DL on DL-HARD and find substantial differences in metrics and the<br>ranking of participating systems. Overall, DL-HARD is a new resource that<br>promotes research on neural ranking methods by focusing on challenging and<br>complex topics.<br>}, }
Endnote
%0 Report %A Mackie, Iain %A Dalton, Jeffery %A Yates, Andrew %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T How Deep is your Learning: The DL-HARD Annotated Deep Learning Dataset : %G eng %U http://hdl.handle.net/21.11116/0000-0009-67AB-3 %U https://arxiv.org/abs/2105.07975 %D 2021 %X Deep Learning Hard (DL-HARD) is a new annotated dataset designed to more<br>effectively evaluate neural ranking models on complex topics. It builds on TREC<br>Deep Learning (DL) topics by extensively annotating them with question intent<br>categories, answer types, wikified entities, topic categories, and result type<br>metadata from a commercial web search engine. Based on this data, we introduce<br>a framework for identifying challenging queries. DL-HARD contains fifty topics<br>from the official DL 2019/2020 evaluation benchmark, half of which are newly<br>and independently assessed. We perform experiments using the official submitted<br>runs to DL on DL-HARD and find substantial differences in metrics and the<br>ranking of participating systems. Overall, DL-HARD is a new resource that<br>promotes research on neural ranking methods by focusing on challenging and<br>complex topics.<br> %K Computer Science, Information Retrieval, cs.IR
[44]
I. Mackie, J. Dalton, and A. Yates, “How Deep is your Learning: the DL-HARD Annotated Deep Learning Dataset,” in SIGIR ’21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, Canada, 2021.
Export
BibTeX
@inproceedings{Mackie_SIGIR21, TITLE = {How Deep is your Learning: {T}he {DL}-{HARD} Annotated Deep Learning Dataset}, AUTHOR = {Mackie, Iain and Dalton, Jeffrey and Yates, Andrew}, LANGUAGE = {eng}, ISBN = {978-1-4503-8037-9}, DOI = {10.1145/3404835.3463262}, PUBLISHER = {ACM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {SIGIR '21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval}, EDITOR = {Diaz, Fernando and Shah, Chirag and Suel, Torsten and Castells, Pablo and Jones, Rosie and Sakai, Tetsuya and Bellog{\'i}n, Alejandro and Yushioka, Massaharu}, PAGES = {2335--2341}, ADDRESS = {Virtual Event, Canada}, }
Endnote
%0 Conference Proceedings %A Mackie, Iain %A Dalton, Jeffrey %A Yates, Andrew %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T How Deep is your Learning: the DL-HARD Annotated Deep Learning Dataset : %G eng %U http://hdl.handle.net/21.11116/0000-0009-6640-C %R 10.1145/3404835.3463262 %D 2021 %B 44th International ACM SIGIR Conference on Research and Development in Information Retrieval %Z date of event: 2021-07-11 - 2021-07-15 %C Virtual Event, Canada %B SIGIR '21 %E Diaz, Fernando; Shah, Chirag; Suel, Torsten; Castells, Pablo; Jones, Rosie; Sakai, Tetsuya; Bellog&#237;n, Alejandro; Yushioka, Massaharu %P 2335 - 2341 %I ACM %@ 978-1-4503-8037-9
[45]
P. Mandros, “Discovering robust dependencies from data,” Universität des Saarlandes, Saarbrücken, 2021.
Export
BibTeX
@phdthesis{Panphd2020, TITLE = {Discovering robust dependencies from data}, AUTHOR = {Mandros, Panagiotis}, LANGUAGE = {eng}, URL = {urn:nbn:de:bsz:291--ds-342919}, DOI = {10.22028/D291-34291}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, }
Endnote
%0 Thesis %A Mandros, Panagiotis %Y Vreeken, Jilles %A referee: Weikum, Gerhard %A referee: Webb, Geoffrey %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Discovering robust dependencies from data : %G eng %U http://hdl.handle.net/21.11116/0000-0008-E4CF-E %R 10.22028/D291-34291 %U urn:nbn:de:bsz:291--ds-342919 %F OTHER: hdl:20.500.11880/31535 %I Universit&#228;t des Saarlandes %C Saarbr&#252;cken %D 2021 %P 194 p. %V phd %9 phd %U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/31535
[46]
A. Marx, “Information-Theoretic Causal Discovery,” Universität des Saarlandes, Saarbrücken, 2021.
Export
BibTeX
@phdthesis{Marxphd2020, TITLE = {Information-Theoretic Causal Discovery}, AUTHOR = {Marx, Alexander}, LANGUAGE = {eng}, URL = {urn:nbn:de:bsz:291--ds-342908}, DOI = {10.22028/D291-34290}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, }
Endnote
%0 Thesis %A Marx, Alexander %Y Vreeken, Jilles %A referee: Weikum, Gerhard %A referee: Ommen, Thijs van %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Information-Theoretic Causal Discovery : %G eng %U http://hdl.handle.net/21.11116/0000-0008-EECA-9 %R 10.22028/D291-34290 %U urn:nbn:de:bsz:291--ds-342908 %F OTHER: hdl:20.500.11880/31480 %I Universit&#228;t des Saarlandes %C Saarbr&#252;cken %D 2021 %P 195 p. %V phd %9 phd %U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/31480
[47]
A. Marx, A. Gretton, and J. M. Mooij, “A Weaker Faithfulness Assumption based on Triple Interactions,” 2021. [Online]. Available: https://arxiv.org/abs/2010.14265. (arXiv: 2010.14265)
Abstract
One of the core assumptions in causal discovery is the faithfulness<br>assumption---i.e. assuming that independencies found in the data are due to<br>separations in the true causal graph. This assumption can, however, be violated<br>in many ways, including xor connections, deterministic functions or cancelling<br>paths. In this work, we propose a weaker assumption that we call 2-adjacency<br>faithfulness. In contrast to adjacency faithfulness, which assumes that there<br>is no conditional independence between each pair of variables that are<br>connected in the causal graph, we only require no conditional independence<br>between a node and a subset of its Markov blanket that can contain up to two<br>nodes. Equivalently, we adapt orientation faithfulness to this setting. We<br>further propose a sound orientation rule for causal discovery that applies<br>under weaker assumptions. As a proof of concept, we derive a modified Grow and<br>Shrink algorithm that recovers the Markov blanket of a target node and prove<br>its correctness under strictly weaker assumptions than the standard<br>faithfulness assumption.<br>
Export
BibTeX
@online{Marxarxiv21, TITLE = {A Weaker Faithfulness Assumption based on Triple Interactions}, AUTHOR = {Marx, Alexander and Gretton, Arthur and Mooij, Joris M.}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2010.14265}, EPRINT = {2010.14265}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {One of the core assumptions in causal discovery is the faithfulness<br>assumption---i.e. assuming that independencies found in the data are due to<br>separations in the true causal graph. This assumption can, however, be violated<br>in many ways, including xor connections, deterministic functions or cancelling<br>paths. In this work, we propose a weaker assumption that we call 2-adjacency<br>faithfulness. In contrast to adjacency faithfulness, which assumes that there<br>is no conditional independence between each pair of variables that are<br>connected in the causal graph, we only require no conditional independence<br>between a node and a subset of its Markov blanket that can contain up to two<br>nodes. Equivalently, we adapt orientation faithfulness to this setting. We<br>further propose a sound orientation rule for causal discovery that applies<br>under weaker assumptions. As a proof of concept, we derive a modified Grow and<br>Shrink algorithm that recovers the Markov blanket of a target node and prove<br>its correctness under strictly weaker assumptions than the standard<br>faithfulness assumption.<br>}, }
Endnote
%0 Report %A Marx, Alexander %A Gretton, Arthur %A Mooij, Joris M. %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T A Weaker Faithfulness Assumption based on Triple Interactions : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0BCE-5 %U https://arxiv.org/abs/2010.14265 %D 2021 %X One of the core assumptions in causal discovery is the faithfulness<br>assumption---i.e. assuming that independencies found in the data are due to<br>separations in the true causal graph. This assumption can, however, be violated<br>in many ways, including xor connections, deterministic functions or cancelling<br>paths. In this work, we propose a weaker assumption that we call 2-adjacency<br>faithfulness. In contrast to adjacency faithfulness, which assumes that there<br>is no conditional independence between each pair of variables that are<br>connected in the causal graph, we only require no conditional independence<br>between a node and a subset of its Markov blanket that can contain up to two<br>nodes. Equivalently, we adapt orientation faithfulness to this setting. We<br>further propose a sound orientation rule for causal discovery that applies<br>under weaker assumptions. As a proof of concept, we derive a modified Grow and<br>Shrink algorithm that recovers the Markov blanket of a target node and prove<br>its correctness under strictly weaker assumptions than the standard<br>faithfulness assumption.<br> %K Statistics, Machine Learning, stat.ML,Computer Science, Artificial Intelligence, cs.AI,Computer Science, Learning, cs.LG
[48]
A. Marx, L. Yang, and M. van Leeuwen, “Estimating Conditional Mutual Information for Discrete-Continuous Mixtures using Multidimensional Adaptive Histograms,” in Proceedings of the SIAM International Conference on Data Mining (SDM 2021), Virtual Conference, 2021.
Export
BibTeX
@inproceedings{marx:20:myl, TITLE = {Estimating Conditional Mutual Information for Discrete-Continuous Mixtures using Multidimensional Adaptive Histograms}, AUTHOR = {Marx, Alexander and Yang, Lincen and van Leeuwen, Matthijs}, LANGUAGE = {eng}, ISBN = {978-1-61197-670-0}, DOI = {10.1137/1.9781611976700.44}, PUBLISHER = {SIAM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the SIAM International Conference on Data Mining (SDM 2021)}, PAGES = {387--395}, ADDRESS = {Virtual Conference}, }
Endnote
%0 Conference Proceedings %A Marx, Alexander %A Yang, Lincen %A van Leeuwen, Matthijs %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Estimating Conditional Mutual Information for Discrete-Continuous Mixtures using Multidimensional Adaptive Histograms : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0BC7-C %R 10.1137/1.9781611976700.44 %D 2021 %B SIAM International Conference on Data Mining %Z date of event: 2021-04-29 - 2021-05-01 %C Virtual Conference %B Proceedings of the SIAM International Conference on Data Mining %P 387 - 395 %I SIAM %@ 978-1-61197-670-0
[49]
A. Marx and J. Fischer, “Estimating Mutual Information via Geodesic kNN,” 2021. [Online]. Available: https://arxiv.org/abs/2110.13883. (arXiv: 2110.13883)
Abstract
Estimating mutual information (MI) between two continuous random variables<br>$X$ and $Y$ allows to capture non-linear dependencies between them,<br>non-parametrically. As such, MI estimation lies at the core of many data<br>science applications. Yet, robustly estimating MI for high-dimensional $X$ and<br>$Y$ is still an open research question.<br> In this paper, we formulate this problem through the lens of manifold<br>learning. That is, we leverage the common assumption that the information of<br>$X$ and $Y$ is captured by a low-dimensional manifold embedded in the observed<br>high-dimensional space and transfer it to MI estimation. As an extension to<br>state-of-the-art $k$NN estimators, we propose to determine the $k$-nearest<br>neighbours via geodesic distances on this manifold rather than form the ambient<br>space, which allows us to estimate MI even in the high-dimensional setting. An<br>empirical evaluation of our method, G-KSG, against the state-of-the-art shows<br>that it yields good estimations of the MI in classical benchmark, and manifold<br>tasks, even for high dimensional datasets, which none of the existing methods<br>can provide.<br>
Export
BibTeX
@online{Marx_arXiv2110.13883, TITLE = {{Estimating Mutual Information via Geodesic $k$NN}}, AUTHOR = {Marx, Alexander and Fischer, Jonas}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2110.13883}, EPRINT = {2110.13883}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Estimating mutual information (MI) between two continuous random variables<br>$X$ and $Y$ allows to capture non-linear dependencies between them,<br>non-parametrically. As such, MI estimation lies at the core of many data<br>science applications. Yet, robustly estimating MI for high-dimensional $X$ and<br>$Y$ is still an open research question.<br> In this paper, we formulate this problem through the lens of manifold<br>learning. That is, we leverage the common assumption that the information of<br>$X$ and $Y$ is captured by a low-dimensional manifold embedded in the observed<br>high-dimensional space and transfer it to MI estimation. As an extension to<br>state-of-the-art $k$NN estimators, we propose to determine the $k$-nearest<br>neighbours via geodesic distances on this manifold rather than form the ambient<br>space, which allows us to estimate MI even in the high-dimensional setting. An<br>empirical evaluation of our method, G-KSG, against the state-of-the-art shows<br>that it yields good estimations of the MI in classical benchmark, and manifold<br>tasks, even for high dimensional datasets, which none of the existing methods<br>can provide.<br>}, }
Endnote
%0 Report %A Marx, Alexander %A Fischer, Jonas %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Estimating Mutual Information via Geodesic kNN : %G eng %U http://hdl.handle.net/21.11116/0000-0009-B130-8 %U https://arxiv.org/abs/2110.13883 %D 2021 %X Estimating mutual information (MI) between two continuous random variables<br>$X$ and $Y$ allows to capture non-linear dependencies between them,<br>non-parametrically. As such, MI estimation lies at the core of many data<br>science applications. Yet, robustly estimating MI for high-dimensional $X$ and<br>$Y$ is still an open research question.<br> In this paper, we formulate this problem through the lens of manifold<br>learning. That is, we leverage the common assumption that the information of<br>$X$ and $Y$ is captured by a low-dimensional manifold embedded in the observed<br>high-dimensional space and transfer it to MI estimation. As an extension to<br>state-of-the-art $k$NN estimators, we propose to determine the $k$-nearest<br>neighbours via geodesic distances on this manifold rather than form the ambient<br>space, which allows us to estimate MI even in the high-dimensional setting. An<br>empirical evaluation of our method, G-KSG, against the state-of-the-art shows<br>that it yields good estimations of the MI in classical benchmark, and manifold<br>tasks, even for high dimensional datasets, which none of the existing methods<br>can provide.<br> %K Computer Science, Information Theory, cs.IT,Mathematics, Information Theory, math.IT
[50]
O. A. Mian, A. Marx, and J. Vreeken, “Discovering Fully Oriented Causal Networks,” in Thirty-Fifth AAAI Conference on Artificial Intelligence, Vancouver, Canada, 2021.
Export
BibTeX
@inproceedings{mian:20:globe, TITLE = {Discovering Fully Oriented Causal Networks}, AUTHOR = {Mian, Osman A. and Marx, Alexander and Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-1-57735-866-4}, DOI = {10.1609/aaai.v35i10.17085}, PUBLISHER = {AAAI}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Thirty-Fifth AAAI Conference on Artificial Intelligence}, PAGES = {8975--8982}, ADDRESS = {Vancouver, Canada}, }
Endnote
%0 Conference Proceedings %A Mian, Osman A. %A Marx, Alexander %A Vreeken, Jilles %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Discovering Fully Oriented Causal Networks : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0BCB-8 %R 10.1609/aaai.v35i10.17085 %D 2021 %B The Thirty-Fifth Conference on Artificial Intelligence %Z date of event: 2021-02-02 - 2021-02-09 %C Vancouver, Canada %B Thirty-Fifth AAAI Conference on Artificial Intelligence %P 8975 - 8982 %I AAAI %@ 978-1-57735-866-4
[51]
P. Mirza, M. Abouhamra, and G. Weikum, “AligNarr: Aligning Narratives on Movies,” in The 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (ACL-IJCNLP 2019), Virtual, 2021.
Export
BibTeX
@inproceedings{Mirza_ACL-short.54, TITLE = {{AligNarr}: {A}ligning Narratives on Movies}, AUTHOR = {Mirza, Paramita and Abouhamra, Mostafa and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-954085-53-4}, URL = {https://aclanthology.org/2021.acl-short.54}, DOI = {10.18653/v1/2021.acl-short.54}, PUBLISHER = {ACL}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {The 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (ACL-IJCNLP 2019)}, EDITOR = {Xia, Fei and Li, Wenjie and Navigli, Roberto}, PAGES = {427--433}, ADDRESS = {Virtual}, }
Endnote
%0 Conference Proceedings %A Mirza, Paramita %A Abouhamra, Mostafa %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T AligNarr: Aligning Narratives on Movies : %G eng %U http://hdl.handle.net/21.11116/0000-0009-4A1F-3 %U https://aclanthology.org/2021.acl-short.54 %R 10.18653/v1/2021.acl-short.54 %D 2021 %B The 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing %Z date of event: 2021-08-01 - 2021-08-06 %C Virtual %B The 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing %E Xia, Fei; Li, Wenjie; Navigli, Roberto %P 427 - 433 %I ACL %@ 978-1-954085-53-4
[52]
S. Nag Chowdhury, R. Bhowmik, H. Ravi, G. de Melo, S. Razniewski, and G. Weikum, “Exploiting Image-Text Synergy for Contextual Image Captioning,” in Proceedings of the Third Workshop on Beyond Vision and LANguage: inTEgrating Real-world kNowledge (LANTERN), Kyiv, Ukraine (Online), 2021.
Export
BibTeX
@inproceedings{Chod_ECAL2021, TITLE = {Exploiting Image-Text Synergy for Contextual Image Captioning}, AUTHOR = {Nag Chowdhury, Sreyasi and Bhowmik, Rajarshi and Ravi, Hareesh and de Melo, Gerard and Razniewski, Simon and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-954085-15-2}, URL = {https://aclanthology.org/2021.lantern-1.3}, PUBLISHER = {ACL}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the Third Workshop on Beyond Vision and LANguage: inTEgrating Real-world kNowledge (LANTERN)}, EDITOR = {Mosbach, Marius and Hedderich, Michael A. and Pezzelle, Sandro and Mogadala, Aditya and Klakow, Dietrich and Moens, Marie-Francine and Akata, Zeynep}, PAGES = {30--37}, ADDRESS = {Kyiv, Ukraine (Online)}, }
Endnote
%0 Conference Proceedings %A Nag Chowdhury, Sreyasi %A Bhowmik, Rajarshi %A Ravi, Hareesh %A de Melo, Gerard %A Razniewski, Simon %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Exploiting Image-Text Synergy for Contextual Image Captioning : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0E60-D %U https://aclanthology.org/2021.lantern-1.3 %D 2021 %B The Third Workshop Beyond Vision and LANguage: inTEgrating Real-world kNowledge %Z date of event: 2021-04-20 - 2021-04-20 %C Kyiv, Ukraine (Online) %B Proceedings of the Third Workshop on Beyond Vision and LANguage: inTEgrating Real-world kNowledge (LANTERN) %E Mosbach, Marius; Hedderich, Michael A.; Pezzelle, Sandro; Mogadala, Aditya; Klakow, Dietrich; Moens, Marie-Francine; Akata, Zeynep %P 30 - 37 %I ACL %@ 978-1-954085-15-2
[53]
S. Nag Chowdhury, R. Wickramarachchi, M. H. Gad-Elrab, D. Stepanova, and C. Henson, “Towards Leveraging Commonsense Knowledge for Autonomous Driving,” in International Semantic Web Conference (ISWC) 2021: Posters, Demos, and Industry Tracks, Virtual Conference, 2021.
Export
BibTeX
@inproceedings{NagChowdhury_ISWC2021, TITLE = {Towards Leveraging Commonsense Knowledge for Autonomous Driving}, AUTHOR = {Nag Chowdhury, Sreyasi and Wickramarachchi, Ruwan and Gad-Elrab, Mohamed Hassan and Stepanova, Daria and Henson, Cory}, LANGUAGE = {eng}, ISSN = {1613-0073}, URL = {https://ceur-ws.org/Vol-2980/paper396.pdf; urn:nbn:de:0074-2980-6}, PUBLISHER = {CEUR-WS.org}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {International Semantic Web Conference (ISWC) 2021: Posters, Demos, and Industry Tracks}, EDITOR = {Seneviratne, Oshani and Pesquita, Catia and Sequeda, Juan and Etcheverry, Lorena}, PAGES = {1--5}, EID = {396}, SERIES = {CEUR Workshop Proceedings}, VOLUME = {2980}, ADDRESS = {Virtual Conference}, }
Endnote
%0 Conference Proceedings %A Nag Chowdhury, Sreyasi %A Wickramarachchi, Ruwan %A Gad-Elrab, Mohamed Hassan %A Stepanova, Daria %A Henson, Cory %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations %T Towards Leveraging Commonsense Knowledge for Autonomous Driving : %G eng %U http://hdl.handle.net/21.11116/0000-0009-42CD-6 %U https://ceur-ws.org/Vol-2980/paper396.pdf %D 2021 %B 20th International Semantic Web Conference %Z date of event: 2021-10-24 - 2021-10-28 %C Virtual Conference %B International Semantic Web Conference (ISWC) 2021: Posters, Demos, and Industry Tracks %E Seneviratne, Oshani; Pesquita, Catia; Sequeda, Juan; Etcheverry, Lorena %P 1 - 5 %Z sequence number: 396 %I CEUR-WS.org %B CEUR Workshop Proceedings %N 2980 %@ false
[54]
S. Nag Chowdhury, “Text-image synergy for multimodal retrieval and annotation,” Universität des Saarlandes, Saarbrücken, 2021.
Abstract
Text and images are the two most common data modalities found on the Internet. Understanding the synergy between text and images, that is, seamlessly analyzing information from these modalities may be trivial for humans, but is challenging for software systems. In this dissertation we study problems where deciphering text-image synergy is crucial for finding solutions. We propose methods and ideas that establish semantic connections between text and images in multimodal contents, and empirically show their effectiveness in four interconnected problems: Image Retrieval, Image Tag Refinement, Image-Text Alignment, and Image Captioning. Our promising results and observations open up interesting scopes for future research involving text-image data understanding.Text and images are the two most common data modalities found on the Internet. Understanding the synergy between text and images, that is, seamlessly analyzing information from these modalities may be trivial for humans, but is challenging for software systems. In this dissertation we study problems where deciphering text-image synergy is crucial for finding solutions. We propose methods and ideas that establish semantic connections between text and images in multimodal contents, and empirically show their effectiveness in four interconnected problems: Image Retrieval, Image Tag Refinement, Image-Text Alignment, and Image Captioning. Our promising results and observations open up interesting scopes for future research involving text-image data understanding.
Export
BibTeX
@phdthesis{Chowphd2021, TITLE = {Text-image synergy for multimodal retrieval and annotation}, AUTHOR = {Nag Chowdhury, Sreyasi}, LANGUAGE = {eng}, URL = {urn:nbn:de:bsz:291--ds-345092}, DOI = {10.22028/D291-34509}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, ABSTRACT = {Text and images are the two most common data modalities found on the Internet. Understanding the synergy between text and images, that is, seamlessly analyzing information from these modalities may be trivial for humans, but is challenging for software systems. In this dissertation we study problems where deciphering text-image synergy is crucial for finding solutions. We propose methods and ideas that establish semantic connections between text and images in multimodal contents, and empirically show their effectiveness in four interconnected problems: Image Retrieval, Image Tag Refinement, Image-Text Alignment, and Image Captioning. Our promising results and observations open up interesting scopes for future research involving text-image data understanding.Text and images are the two most common data modalities found on the Internet. Understanding the synergy between text and images, that is, seamlessly analyzing information from these modalities may be trivial for humans, but is challenging for software systems. In this dissertation we study problems where deciphering text-image synergy is crucial for finding solutions. We propose methods and ideas that establish semantic connections between text and images in multimodal contents, and empirically show their effectiveness in four interconnected problems: Image Retrieval, Image Tag Refinement, Image-Text Alignment, and Image Captioning. Our promising results and observations open up interesting scopes for future research involving text-image data understanding.}, }
Endnote
%0 Thesis %A Nag Chowdhury, Sreyasi %A referee: Weikum, Gerhard %A referee: de Melo, Gerard %A referee: Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Text-image synergy for multimodal retrieval and annotation : %G eng %U http://hdl.handle.net/21.11116/0000-0009-428A-1 %R 10.22028/D291-34509 %U urn:nbn:de:bsz:291--ds-345092 %F OTHER: hdl:20.500.11880/31690 %I Universit&#228;t des Saarlandes %C Saarbr&#252;cken %D 2021 %P 131 p. %V phd %9 phd %X Text and images are the two most common data modalities found on the Internet. Understanding the synergy between text and images, that is, seamlessly analyzing information from these modalities may be trivial for humans, but is challenging for software systems. In this dissertation we study problems where deciphering text-image synergy is crucial for finding solutions. We propose methods and ideas that establish semantic connections between text and images in multimodal contents, and empirically show their effectiveness in four interconnected problems: Image Retrieval, Image Tag Refinement, Image-Text Alignment, and Image Captioning. Our promising results and observations open up interesting scopes for future research involving text-image data understanding.Text and images are the two most common data modalities found on the Internet. Understanding the synergy between text and images, that is, seamlessly analyzing information from these modalities may be trivial for humans, but is challenging for software systems. In this dissertation we study problems where deciphering text-image synergy is crucial for finding solutions. We propose methods and ideas that establish semantic connections between text and images in multimodal contents, and empirically show their effectiveness in four interconnected problems: Image Retrieval, Image Tag Refinement, Image-Text Alignment, and Image Captioning. Our promising results and observations open up interesting scopes for future research involving text-image data understanding. %K image retrieval image-text alignment image captioning commonsense knowledge %U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/31690
[55]
S. Nag Chowdhury, S. Razniewski, and G. Weikum, “SANDI: Story-and-Images Alignment,” in The 16th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2021), Online, 2021.
Export
BibTeX
@inproceedings{Thinh_EACL21, TITLE = {{SANDI}: {S}tory-and-Images Alignment}, AUTHOR = {Nag Chowdhury, Sreyasi and Razniewski, Simon and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-954085-02-2}, URL = {https://aclanthology.org/2021.eacl-main.85}, PUBLISHER = {ACL}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {The 16th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2021)}, EDITOR = {Merlo, Paola and Tiedemann, Jorg and Tsarfaty, Reut}, PAGES = {989--999}, ADDRESS = {Online}, }
Endnote
%0 Conference Proceedings %A Nag Chowdhury, Sreyasi %A Razniewski, Simon %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T SANDI: Story-and-Images Alignment : %G eng %U http://hdl.handle.net/21.11116/0000-0008-04A2-C %U https://aclanthology.org/2021.eacl-main.85 %D 2021 %B 16th Conference of the European Chapter of the Association for Computational Linguistics %Z date of event: 2021-04-19 - 2021-04-23 %C Online %B The 16th Conference of the European Chapter of the Association for Computational Linguistics %E Merlo, Paola; Tiedemann, Jorg; Tsarfaty, Reut %P 989 - 999 %I ACL %@ 978-1-954085-02-2
[56]
S. Naseri, J. Dalton, A. Yates, and J. Allan, “CEQE: Contextualized Embeddings for Query Expansion,” 2021. [Online]. Available: https://arxiv.org/abs/2103.05256. (arXiv: 2103.05256)
Abstract
In this work we leverage recent advances in context-sensitive language models<br>to improve the task of query expansion. Contextualized word representation<br>models, such as ELMo and BERT, are rapidly replacing static embedding models.<br>We propose a new model, Contextualized Embeddings for Query Expansion (CEQE),<br>that utilizes query-focused contextualized embedding vectors. We study the<br>behavior of contextual representations generated for query expansion in ad-hoc<br>document retrieval. We conduct our experiments on probabilistic retrieval<br>models as well as in combination with neural ranking models. We evaluate CEQE<br>on two standard TREC collections: Robust and Deep Learning. We find that CEQE<br>outperforms static embedding-based expansion methods on multiple collections<br>(by up to 18% on Robust and 31% on Deep Learning on average precision) and also<br>improves over proven probabilistic pseudo-relevance feedback (PRF) models. We<br>further find that multiple passes of expansion and reranking result in<br>continued gains in effectiveness with CEQE-based approaches outperforming other<br>approaches. The final model incorporating neural and CEQE-based expansion score<br>achieves gains of up to 5% in P@20 and 2% in AP on Robust over the<br>state-of-the-art transformer-based re-ranking model, Birch.<br>
Export
BibTeX
@online{Naseri_2103.05256, TITLE = {{CEQE}: Contextualized Embeddings for Query Expansion}, AUTHOR = {Naseri, Shahrzad and Dalton, Jeffrey and Yates, Andrew and Allan, James}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2103.05256}, EPRINT = {2103.05256}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {In this work we leverage recent advances in context-sensitive language models<br>to improve the task of query expansion. Contextualized word representation<br>models, such as ELMo and BERT, are rapidly replacing static embedding models.<br>We propose a new model, Contextualized Embeddings for Query Expansion (CEQE),<br>that utilizes query-focused contextualized embedding vectors. We study the<br>behavior of contextual representations generated for query expansion in ad-hoc<br>document retrieval. We conduct our experiments on probabilistic retrieval<br>models as well as in combination with neural ranking models. We evaluate CEQE<br>on two standard TREC collections: Robust and Deep Learning. We find that CEQE<br>outperforms static embedding-based expansion methods on multiple collections<br>(by up to 18% on Robust and 31% on Deep Learning on average precision) and also<br>improves over proven probabilistic pseudo-relevance feedback (PRF) models. We<br>further find that multiple passes of expansion and reranking result in<br>continued gains in effectiveness with CEQE-based approaches outperforming other<br>approaches. The final model incorporating neural and CEQE-based expansion score<br>achieves gains of up to 5% in P@20 and 2% in AP on Robust over the<br>state-of-the-art transformer-based re-ranking model, Birch.<br>}, }
Endnote
%0 Report %A Naseri, Shahrzad %A Dalton, Jeffrey %A Yates, Andrew %A Allan, James %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T CEQE: Contextualized Embeddings for Query Expansion : %G eng %U http://hdl.handle.net/21.11116/0000-0009-6779-C %U https://arxiv.org/abs/2103.05256 %D 2021 %X In this work we leverage recent advances in context-sensitive language models<br>to improve the task of query expansion. Contextualized word representation<br>models, such as ELMo and BERT, are rapidly replacing static embedding models.<br>We propose a new model, Contextualized Embeddings for Query Expansion (CEQE),<br>that utilizes query-focused contextualized embedding vectors. We study the<br>behavior of contextual representations generated for query expansion in ad-hoc<br>document retrieval. We conduct our experiments on probabilistic retrieval<br>models as well as in combination with neural ranking models. We evaluate CEQE<br>on two standard TREC collections: Robust and Deep Learning. We find that CEQE<br>outperforms static embedding-based expansion methods on multiple collections<br>(by up to 18% on Robust and 31% on Deep Learning on average precision) and also<br>improves over proven probabilistic pseudo-relevance feedback (PRF) models. We<br>further find that multiple passes of expansion and reranking result in<br>continued gains in effectiveness with CEQE-based approaches outperforming other<br>approaches. The final model incorporating neural and CEQE-based expansion score<br>achieves gains of up to 5% in P@20 and 2% in AP on Robust over the<br>state-of-the-art transformer-based re-ranking model, Birch.<br> %K Computer Science, Information Retrieval, cs.IR
[57]
S. Naseri, J. Dalton, A. Yates, and J. Allan, “CEQE: Contextualized Embeddings for Query Expansion,” in Advances in Information Retrieval (ECIR 2021), Lucca, Italy (Online Event), 2021.
Export
BibTeX
@inproceedings{Naseri_ECIR2021, TITLE = {{CEQE}: {C}ontextualized Embeddings for Query Expansion}, AUTHOR = {Naseri, Shahrzad and Dalton, Jeff and Yates, Andrew and Allan, James}, LANGUAGE = {eng}, ISBN = {978-3-030-72112-1}, DOI = {10.1007/978-3-030-72113-8_31}, PUBLISHER = {Springer}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, BOOKTITLE = {Advances in Information Retrieval (ECIR 2021)}, EDITOR = {Hiemstra, Djoerd and Moens, Marie-Francine and Mothe, Josiane and Perego, Raffaele and Potthast, Martin and Sebastiani, Fabrizio}, PAGES = {467--482}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12656}, ADDRESS = {Lucca, Italy (Online Event)}, }
Endnote
%0 Conference Proceedings %A Naseri, Shahrzad %A Dalton, Jeff %A Yates, Andrew %A Allan, James %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T CEQE: Contextualized Embeddings for Query Expansion : %G eng %U http://hdl.handle.net/21.11116/0000-0009-6628-8 %R 10.1007/978-3-030-72113-8_31 %D 2021 %B 43rd European Conference on IR Research %Z date of event: 2021-03-28 - 2021-04-01 %C Lucca, Italy (Online Event) %B Advances in Information Retrieval %E Hiemstra, Djoerd; Moens, Marie-Francine; Mothe, Josiane; Perego, Raffaele; Potthast, Martin; Sebastiani, Fabrizio %P 467 - 482 %I Springer %@ 978-3-030-72112-1 %B Lecture Notes in Computer Science %N 12656
[58]
T. Nguyen, “Grounding Depression Detection in Clinical Questionnaires by Detecting Mental Health Symptoms,” Universität des Saarlandes, Saarbrücken, 2021.
Export
BibTeX
@mastersthesis{NguyenMSc21, TITLE = {Grounding Depression Detection in Clinical Questionnaires by Detecting Mental Health Symptoms}, AUTHOR = {Nguyen, Thong}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, }
Endnote
%0 Thesis %A Nguyen, Thong %Y Yates, Andrew %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Grounding Depression Detection in Clinical Questionnaires by Detecting Mental Health Symptoms : %G eng %U http://hdl.handle.net/21.11116/0000-000D-2DA3-9 %I Universit&#228;t des Saarlandes %C Saarbr&#252;cken %D 2021 %P X, 68 p. %V master %9 master
[59]
T.-P. Nguyen, S. Razniewski, and G. Weikum, “Advanced Semantics for Commonsense Knowledge Extraction,” in The Web Conference 2021 (WWW 2021), Ljubljana, Slovenia, 2021.
Export
BibTeX
@inproceedings{Nguyen_WWW21, TITLE = {Advanced Semantics for Commonsense Knowledge Extraction}, AUTHOR = {Nguyen, Tuan-Phong and Razniewski, Simon and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-8312-7}, DOI = {10.1145/3442381.3449827}, PUBLISHER = {ACM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {The Web Conference 2021 (WWW 2021)}, EDITOR = {Leskovec, Jure and Grobelnik, Marko and Najork, Marc and Tang, Jie and Zia, Leila}, PAGES = {2636--2647}, ADDRESS = {Ljubljana, Slovenia}, }
Endnote
%0 Conference Proceedings %A Nguyen, Tuan-Phong %A Razniewski, Simon %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Advanced Semantics for Commonsense Knowledge Extraction : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0196-D %R 10.1145/3442381.3449827 %D 2021 %B 30th The Web Conference %Z date of event: 2021-04-30 - %C Ljubljana, Slovenia %B The Web Conference 2021 %E Leskovec, Jure; Grobelnik, Marko; Najork, Marc; Tang, Jie; Zia, Leila %P 2636 - 2647 %I ACM %@ 978-1-4503-8312-7
[60]
T.-P. Nguyen, S. Razniewski, and G. Weikum, “Inside ASCENT: Exploring a Deep Commonsense Knowledge Base and its Usage in Question Answering,” 2021. [Online]. Available: https://arxiv.org/abs/2105.13662. (arXiv: 2105.13662)
Abstract
ASCENT is a fully automated methodology for extracting and consolidating<br>commonsense assertions from web contents (Nguyen et al., WWW 2021). It advances<br>traditional triple-based commonsense knowledge representation by capturing<br>semantic facets like locations and purposes, and composite concepts, i.e.,<br>subgroups and related aspects of subjects. In this demo, we present a web<br>portal that allows users to understand its construction process, explore its<br>content, and observe its impact in the use case of question answering. The demo<br>website and an introductory video are both available online.<br>
Export
BibTeX
@online{Nguyen_2105.13662, TITLE = {Inside {ASCENT}: {E}xploring a Deep Commonsense Knowledge Base and its Usage in Question Answering}, AUTHOR = {Nguyen, Tuan-Phong and Razniewski, Simon and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2105.13662}, EPRINT = {2105.13662}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {ASCENT is a fully automated methodology for extracting and consolidating<br>commonsense assertions from web contents (Nguyen et al., WWW 2021). It advances<br>traditional triple-based commonsense knowledge representation by capturing<br>semantic facets like locations and purposes, and composite concepts, i.e.,<br>subgroups and related aspects of subjects. In this demo, we present a web<br>portal that allows users to understand its construction process, explore its<br>content, and observe its impact in the use case of question answering. The demo<br>website and an introductory video are both available online.<br>}, }
Endnote
%0 Report %A Nguyen, Tuan-Phong %A Razniewski, Simon %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Inside ASCENT: Exploring a Deep Commonsense Knowledge Base and its Usage in Question Answering : %G eng %U http://hdl.handle.net/21.11116/0000-0009-4A2E-2 %U https://arxiv.org/abs/2105.13662 %D 2021 %X ASCENT is a fully automated methodology for extracting and consolidating<br>commonsense assertions from web contents (Nguyen et al., WWW 2021). It advances<br>traditional triple-based commonsense knowledge representation by capturing<br>semantic facets like locations and purposes, and composite concepts, i.e.,<br>subgroups and related aspects of subjects. In this demo, we present a web<br>portal that allows users to understand its construction process, explore its<br>content, and observe its impact in the use case of question answering. The demo<br>website and an introductory video are both available online.<br> %K Computer Science, Artificial Intelligence, cs.AI,Computer Science, Computation and Language, cs.CL %U https://youtu.be/qMkJXqu_Yd4
[61]
S. Pramanik, J. Alabi, R. Saha Roy, and G. Weikum, “UNIQORN: Unified Question Answering over RDF Knowledge Graphs and Natural Language Text,” 2021. [Online]. Available: https://arxiv.org/abs/2108.08614. (arXiv: 2108.08614)
Abstract
Question answering over knowledge graphs and other RDF data has been greatly<br>advanced, with a number of good systems providing crisp answers for natural<br>language questions or telegraphic queries. Some of these systems incorporate<br>textual sources as additional evidence for the answering process, but cannot<br>compute answers that are present in text alone. Conversely, systems from the IR<br>and NLP communities have addressed QA over text, but barely utilize semantic<br>data and knowledge. This paper presents the first QA system that can seamlessly<br>operate over RDF datasets and text corpora, or both together, in a unified<br>framework. Our method, called UNIQORN, builds a context graph on the fly, by<br>retrieving question-relevant triples from the RDF data and/or the text corpus,<br>where the latter case is handled by automatic information extraction. The<br>resulting graph is typically rich but highly noisy. UNIQORN copes with this<br>input by advanced graph algorithms for Group Steiner Trees, that identify the<br>best answer candidates in the context graph. Experimental results on several<br>benchmarks of complex questions with multiple entities and relations, show that<br>UNIQORN, an unsupervised method with only five parameters, produces results<br>comparable to the state-of-the-art on KGs, text corpora, and heterogeneous<br>sources. The graph-based methodology provides user-interpretable evidence for<br>the complete answering process.<br>
Export
BibTeX
@online{Pramanik_2108.08614, TITLE = {{UNIQORN}: {U}nified Question Answering over {RDF} Knowledge Graphs and Natural Language Text}, AUTHOR = {Pramanik, Soumajit and Alabi, Jesujoba and Saha Roy, Rishiraj and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2108.08614}, EPRINT = {2108.08614}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Question answering over knowledge graphs and other RDF data has been greatly<br>advanced, with a number of good systems providing crisp answers for natural<br>language questions or telegraphic queries. Some of these systems incorporate<br>textual sources as additional evidence for the answering process, but cannot<br>compute answers that are present in text alone. Conversely, systems from the IR<br>and NLP communities have addressed QA over text, but barely utilize semantic<br>data and knowledge. This paper presents the first QA system that can seamlessly<br>operate over RDF datasets and text corpora, or both together, in a unified<br>framework. Our method, called UNIQORN, builds a context graph on the fly, by<br>retrieving question-relevant triples from the RDF data and/or the text corpus,<br>where the latter case is handled by automatic information extraction. The<br>resulting graph is typically rich but highly noisy. UNIQORN copes with this<br>input by advanced graph algorithms for Group Steiner Trees, that identify the<br>best answer candidates in the context graph. Experimental results on several<br>benchmarks of complex questions with multiple entities and relations, show that<br>UNIQORN, an unsupervised method with only five parameters, produces results<br>comparable to the state-of-the-art on KGs, text corpora, and heterogeneous<br>sources. The graph-based methodology provides user-interpretable evidence for<br>the complete answering process.<br>}, }
Endnote
%0 Report %A Pramanik, Soumajit %A Alabi, Jesujoba %A Saha Roy, Rishiraj %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T UNIQORN: Unified Question Answering over RDF Knowledge Graphs and Natural Language Text : %G eng %U http://hdl.handle.net/21.11116/0000-0009-6365-6 %U https://arxiv.org/abs/2108.08614 %D 2021 %X Question answering over knowledge graphs and other RDF data has been greatly<br>advanced, with a number of good systems providing crisp answers for natural<br>language questions or telegraphic queries. Some of these systems incorporate<br>textual sources as additional evidence for the answering process, but cannot<br>compute answers that are present in text alone. Conversely, systems from the IR<br>and NLP communities have addressed QA over text, but barely utilize semantic<br>data and knowledge. This paper presents the first QA system that can seamlessly<br>operate over RDF datasets and text corpora, or both together, in a unified<br>framework. Our method, called UNIQORN, builds a context graph on the fly, by<br>retrieving question-relevant triples from the RDF data and/or the text corpus,<br>where the latter case is handled by automatic information extraction. The<br>resulting graph is typically rich but highly noisy. UNIQORN copes with this<br>input by advanced graph algorithms for Group Steiner Trees, that identify the<br>best answer candidates in the context graph. Experimental results on several<br>benchmarks of complex questions with multiple entities and relations, show that<br>UNIQORN, an unsupervised method with only five parameters, produces results<br>comparable to the state-of-the-art on KGs, text corpora, and heterogeneous<br>sources. The graph-based methodology provides user-interpretable evidence for<br>the complete answering process.<br> %K Computer Science, Information Retrieval, cs.IR,Computer Science, Computation and Language, cs.CL
[62]
S. Razniewski, H. Arnaout, S. Ghosh, and F. Suchanek, “On the Limits of Machine Knowledge: Completeness, Recall and Negation in Web-scale Knowledge Bases,” Proceedings of the VLDB Endowment (Proc. VLDB 2021), vol. 14, no. 12, 2021.
Export
BibTeX
@article{Razniewski2021_PVLDB, TITLE = {On the Limits of Machine Knowledge: {C}ompleteness, Recall and Negation in Web-scale Knowledge Bases}, AUTHOR = {Razniewski, Simon and Arnaout, Hiba and Ghosh, Shrestha and Suchanek, Fabian}, LANGUAGE = {eng}, PUBLISHER = {VLDB Endowment Inc.}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, JOURNAL = {Proceedings of the VLDB Endowment (Proc. VLDB)}, VOLUME = {14}, NUMBER = {12}, PAGES = {3175--3177}, BOOKTITLE = {Proceedings of the 47th International Conference on Very Large Data Bases (VLDB 2021)}, EDITOR = {Dong, Xin Luna and Naumann, Felix}, }
Endnote
%0 Journal Article %A Razniewski, Simon %A Arnaout, Hiba %A Ghosh, Shrestha %A Suchanek, Fabian %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T On the Limits of Machine Knowledge: Completeness, Recall and Negation in Web-scale Knowledge Bases : %G eng %U http://hdl.handle.net/21.11116/0000-0009-6544-9 %7 2021 %D 2021 %J Proceedings of the VLDB Endowment %O PVLDB %V 14 %N 12 %& 3175 %P 3175 - 3177 %I VLDB Endowment Inc. %B Proceedings of the 47th International Conference on Very Large Data Bases %O VLDB 2021 Copenhagen, Denmark, 16-20 August 2021
[63]
S. Razniewski, N. Tandon, and A. S. Varde, “Information to Wisdom: Commonsense Knowledge Extraction and Compilation,” in WSDM ’21, 14th International Conference on Web Search and Data Mining, Virtual Event, Israel, 2021.
Export
BibTeX
@inproceedings{Razniewski_WSDM21, TITLE = {Information to Wisdom: {C}ommonsense Knowledge Extraction and Compilation}, AUTHOR = {Razniewski, Simon and Tandon, Niket and Varde, Aparna S.}, LANGUAGE = {eng}, ISBN = {978-1-4503-8297-7}, DOI = {10.1145/3437963.3441664}, PUBLISHER = {ACM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {WSDM '21, 14th International Conference on Web Search and Data Mining}, EDITOR = {Lewin-Eytan, Liane and Carmel, David and Yom-Tov, Elad and Agichtein, Eugene and Gabrilovich, Evgeniy}, PAGES = {1143--1146}, ADDRESS = {Virtual Event, Israel}, }
Endnote
%0 Conference Proceedings %A Razniewski, Simon %A Tandon, Niket %A Varde, Aparna S. %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Information to Wisdom: Commonsense Knowledge Extraction and Compilation : %G eng %U http://hdl.handle.net/21.11116/0000-0009-65FE-8 %R 10.1145/3437963.3441664 %D 2021 %B 14th International Conference on Web Search and Data Mining %Z date of event: 2021-03-08 - 2021-03-12 %C Virtual Event, Israel %B WSDM '21 %E Lewin-Eytan, Liane; Carmel, David; Yom-Tov, Elad; Agichtein, Eugene; Gabrilovich, Evgeniy %P 1143 - 1146 %I ACM %@ 978-1-4503-8297-7
[64]
S. Razniewski, A. Yates, N. Kassner, and G. Weikum, “Language Models As or For Knowledge Bases,” 2021. [Online]. Available: https://arxiv.org/abs/2110.04888. (arXiv: 2110.04888)
Abstract
Pre-trained language models (LMs) have recently gained attention for their<br>potential as an alternative to (or proxy for) explicit knowledge bases (KBs).<br>In this position paper, we examine this hypothesis, identify strengths and<br>limitations of both LMs and KBs, and discuss the complementary nature of the<br>two paradigms. In particular, we offer qualitative arguments that latent LMs<br>are not suitable as a substitute for explicit KBs, but could play a major role<br>for augmenting and curating KBs.<br>
Export
BibTeX
@online{Razniewski_2110.04888, TITLE = {Language Models As or For Knowledge Bases}, AUTHOR = {Razniewski, Simon and Yates, Andrew and Kassner, Nora and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2110.04888}, EPRINT = {2110.04888}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Pre-trained language models (LMs) have recently gained attention for their<br>potential as an alternative to (or proxy for) explicit knowledge bases (KBs).<br>In this position paper, we examine this hypothesis, identify strengths and<br>limitations of both LMs and KBs, and discuss the complementary nature of the<br>two paradigms. In particular, we offer qualitative arguments that latent LMs<br>are not suitable as a substitute for explicit KBs, but could play a major role<br>for augmenting and curating KBs.<br>}, }
Endnote
%0 Report %A Razniewski, Simon %A Yates, Andrew %A Kassner, Nora %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Language Models As or For Knowledge Bases : %G eng %U http://hdl.handle.net/21.11116/0000-0009-6510-3 %U https://arxiv.org/abs/2110.04888 %D 2021 %X Pre-trained language models (LMs) have recently gained attention for their<br>potential as an alternative to (or proxy for) explicit knowledge bases (KBs).<br>In this position paper, we examine this hypothesis, identify strengths and<br>limitations of both LMs and KBs, and discuss the complementary nature of the<br>two paradigms. In particular, we offer qualitative arguments that latent LMs<br>are not suitable as a substitute for explicit KBs, but could play a major role<br>for augmenting and curating KBs.<br> %K Computer Science, Computation and Language, cs.CL,Computer Science, Artificial Intelligence, cs.AI,Computer Science, Databases, cs.DB
[65]
S. Razniewski, “Commonsense Knowledge Base Construction in the Age of Big Data,” 2021. [Online]. Available: https://arxiv.org/abs/2105.01925. (arXiv: 2105.01925)
Abstract
Compiling commonsense knowledge is traditionally an AI topic approached by<br>manual labor. Recent advances in web data processing have enabled automated<br>approaches. In this demonstration we will showcase three systems for automated<br>commonsense knowledge base construction, highlighting each time one aspect of<br>specific interest to the data management community. (i) We use Quasimodo to<br>illustrate knowledge extraction systems engineering, (ii) Dice to illustrate<br>the role that schema constraints play in cleaning fuzzy commonsense knowledge,<br>and (iii) Ascent to illustrate the relevance of conceptual modelling. The demos<br>are available online at https://quasimodo.r2.enst.fr,<br>https://dice.mpi-inf.mpg.de and ascent.mpi-inf.mpg.de.<br>
Export
BibTeX
@online{Razniewski_2105.01925, TITLE = {Commonsense Knowledge Base Construction in the Age of Big Data}, AUTHOR = {Razniewski, Simon}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2105.01925}, EPRINT = {2105.01925}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Compiling commonsense knowledge is traditionally an AI topic approached by<br>manual labor. Recent advances in web data processing have enabled automated<br>approaches. In this demonstration we will showcase three systems for automated<br>commonsense knowledge base construction, highlighting each time one aspect of<br>specific interest to the data management community. (i) We use Quasimodo to<br>illustrate knowledge extraction systems engineering, (ii) Dice to illustrate<br>the role that schema constraints play in cleaning fuzzy commonsense knowledge,<br>and (iii) Ascent to illustrate the relevance of conceptual modelling. The demos<br>are available online at https://quasimodo.r2.enst.fr,<br>https://dice.mpi-inf.mpg.de and ascent.mpi-inf.mpg.de.<br>}, }
Endnote
%0 Report %A Razniewski, Simon %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Commonsense Knowledge Base Construction in the Age of Big Data : %G eng %U http://hdl.handle.net/21.11116/0000-0009-6604-0 %U https://arxiv.org/abs/2105.01925 %D 2021 %X Compiling commonsense knowledge is traditionally an AI topic approached by<br>manual labor. Recent advances in web data processing have enabled automated<br>approaches. In this demonstration we will showcase three systems for automated<br>commonsense knowledge base construction, highlighting each time one aspect of<br>specific interest to the data management community. (i) We use Quasimodo to<br>illustrate knowledge extraction systems engineering, (ii) Dice to illustrate<br>the role that schema constraints play in cleaning fuzzy commonsense knowledge,<br>and (iii) Ascent to illustrate the relevance of conceptual modelling. The demos<br>are available online at https://quasimodo.r2.enst.fr,<br>https://dice.mpi-inf.mpg.de and ascent.mpi-inf.mpg.de.<br> %K Computer Science, Artificial Intelligence, cs.AI,Computer Science, Computation and Language, cs.CL,Computer Science, Databases, cs.DB
[66]
J. Romero, “Pyformlang: An Educational Library for Formal Language Manipulation,” in SIGCSE ’21, The 52nd ACM Technical Symposium on Computer Science Education, Virtual Event, USA, 2021.
Export
BibTeX
@inproceedings{Romero_SIGCSE21, TITLE = {Pyformlang: {An} Educational Library for Formal Language Manipulation}, AUTHOR = {Romero, Julien}, LANGUAGE = {eng}, ISBN = {978-1-4503-8062-1}, DOI = {10.1145/3408877.3432464}, PUBLISHER = {ACM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {SIGCSE '21, The 52nd ACM Technical Symposium on Computer Science Education}, EDITOR = {Sherriff, Mark and Merkle, Laurence D. and Cutter, Pamela and Monge, Alvaro and Sheard, Judithe}, PAGES = {576--582}, ADDRESS = {Virtual Event, USA}, }
Endnote
%0 Conference Proceedings %A Romero, Julien %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Pyformlang: An Educational Library for Formal Language Manipulation : %G eng %U http://hdl.handle.net/21.11116/0000-0007-F836-5 %R 10.1145/3408877.3432464 %D 2021 %B The 52nd ACM Technical Symposium on Computer Science Education %Z date of event: 2021-03-13 - 2021-03-20 %C Virtual Event, USA %B SIGCSE '21 %E Sherriff, Mark; Merkle, Laurence D.; Cutter, Pamela; Monge, Alvaro; Sheard, Judithe %P 576 - 582 %I ACM %@ 978-1-4503-8062-1
[67]
R. Saha Roy and A. Anand, Question Answering for the Curated Web: Tasks and Methods in QA over Knowledge Bases and Text Collections. San Rafael, CA: Morgan & Claypool, 2021.
Export
BibTeX
@book{SahaRoy2021, TITLE = {Question Answering for the Curated Web: Tasks and Methods in {QA} over Knowledge Bases and Text Collections}, AUTHOR = {Saha Roy, Rishiraj and Anand, Avishek}, LANGUAGE = {eng}, ISBN = {978-1636392387}, DOI = {10.2200/S0113ED1V01Y202109ICR076}, PUBLISHER = {Morgan \& Claypool}, ADDRESS = {San Rafael, CA}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, PAGES = {194 p.}, SERIES = {Synthesis Lectures on Information Concepts, Retrieval, and Services}, }
Endnote
%0 Book %A Saha Roy, Rishiraj %A Anand, Avishek %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Question Answering for the Curated Web: Tasks and Methods in QA over Knowledge Bases and Text Collections : %G eng %U http://hdl.handle.net/21.11116/0000-0009-B116-6 %R 10.2200/S0113ED1V01Y202109ICR076 %@ 978-1636392387 %I Morgan & Claypool %C San Rafael, CA %D 2021 %P 194 p. %B Synthesis Lectures on Information Concepts, Retrieval, and Services
[68]
F. Schmidt, A. Marx, N. Baumgarten, M. Hebel, M. Wegner, M. Kaulich, M. S. Leisegang, R. P. Brandes, J. Göke, J. Vreeken, and M. H. Schulz, “Integrative Analysis of Epigenetics Data Identifies Gene-specific Regulatory Elements,” Nucleic Acids Research (London), vol. 49, no. 18, 2021.
Export
BibTeX
@article{Schmidt_NAR21, TITLE = {Integrative Analysis of Epigenetics Data Identifies Gene-specific Regulatory Elements}, AUTHOR = {Schmidt, Florian and Marx, Alexander and Baumgarten, Nina and Hebel, Marie and Wegner, Martin and Kaulich, Manuel and Leisegang, Matthias S. and Brandes, Ralf P and G{\"o}ke, Jonathan and Vreeken, Jilles and Schulz, Marcel Holger}, LANGUAGE = {eng}, ISSN = {0305-1048}, DOI = {10.1093/nar/gkab798}, PUBLISHER = {Oxford University Press}, ADDRESS = {Oxford}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, JOURNAL = {Nucleic Acids Research (London)}, VOLUME = {49}, NUMBER = {18}, PAGES = {10397--10418}, }
Endnote
%0 Journal Article %A Schmidt, Florian %A Marx, Alexander %A Baumgarten, Nina %A Hebel, Marie %A Wegner, Martin %A Kaulich, Manuel %A Leisegang, Matthias S. %A Brandes, Ralf P %A G&#246;ke, Jonathan %A Vreeken, Jilles %A Schulz, Marcel Holger %+ Computational Biology and Applied Algorithmics, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Computational Biology and Applied Algorithmics, MPI for Informatics, Max Planck Society %T Integrative Analysis of Epigenetics Data Identifies Gene-specific Regulatory Elements : %G eng %U http://hdl.handle.net/21.11116/0000-0009-6D54-F %R 10.1093/nar/gkab798 %2 PMC8501997 %7 2021 %D 2021 %J Nucleic Acids Research (London) %O Nucleic Acids Res %V 49 %N 18 %& 10397 %P 10397 - 10418 %I Oxford University Press %C Oxford %@ false
[69]
X. Shen, “Deep Latent-Variable Models for Neural Text Generation,” Universität des Saarlandes, Saarbrücken, 2021.
Export
BibTeX
@phdthesis{Shenphd2021, TITLE = {Deep Latent-Variable Models for Neural Text Generation}, AUTHOR = {Shen, Xiaoyu}, LANGUAGE = {eng}, URL = {nbn:de:bsz:291--ds-350558}, DOI = {10.22028/D291-35055}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, }
Endnote
%0 Thesis %A Shen, Xiaoyu %Y Klakow, Dietrich %A referee: Weikum, Gerhard %A referee: Sch&#252;tze, Hinrich %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Deep Latent-Variable Models for Neural Text Generation : %G eng %U http://hdl.handle.net/21.11116/0000-0009-B25D-6 %R 10.22028/D291-35055 %U nbn:de:bsz:291--ds-350558 %F OTHER: hdl:20.500.11880/32106 %I Universit&#228;t des Saarlandes %C Saarbr&#252;cken %D 2021 %P 201 p. %V phd %9 phd %U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/32106
[70]
S. Shrinivasan, “Knowledge Base Stability,” Universität des Saarlandes, Saarbrücken, 2021.
Export
BibTeX
@mastersthesis{ShrinivasanMSc21, TITLE = {Knowledge Base Stability}, AUTHOR = {Shrinivasan, Suhas}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, }
Endnote
%0 Thesis %A Shrinivasan, Suhas %Y Razniewski, Simon %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Knowledge Base Stability : %G eng %U http://hdl.handle.net/21.11116/0000-000D-15A0-6 %I Universit&#228;t des Saarlandes %C Saarbr&#252;cken %D 2021 %P 87 p. %V master %9 master
[71]
S. Singhania, S. Razniewski, and G. Weikum, “Predicting Document Coverage for Relation Extraction,” 2021. [Online]. Available: https://arxiv.org/abs/2111.13611. (arXiv: 2111.13611)
Abstract
This paper presents a new task of predicting the coverage of a text document<br>for relation extraction (RE): does the document contain many relational tuples<br>for a given entity? Coverage predictions are useful in selecting the best<br>documents for knowledge base construction with large input corpora. To study<br>this problem, we present a dataset of 31,366 diverse documents for 520<br>entities. We analyze the correlation of document coverage with features like<br>length, entity mention frequency, Alexa rank, language complexity and<br>information retrieval scores. Each of these features has only moderate<br>predictive power. We employ methods combining features with statistical models<br>like TF-IDF and language models like BERT. The model combining features and<br>BERT, HERB, achieves an F1 score of up to 46%. We demonstrate the utility of<br>coverage predictions on two use cases: KB construction and claim refutation.<br>
Export
BibTeX
@online{Singhania2021, TITLE = {Predicting Document Coverage for Relation Extraction}, AUTHOR = {Singhania, Sneha and Razniewski, Simon and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2111.13611}, EPRINT = {2111.13611}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {This paper presents a new task of predicting the coverage of a text document<br>for relation extraction (RE): does the document contain many relational tuples<br>for a given entity? Coverage predictions are useful in selecting the best<br>documents for knowledge base construction with large input corpora. To study<br>this problem, we present a dataset of 31,366 diverse documents for 520<br>entities. We analyze the correlation of document coverage with features like<br>length, entity mention frequency, Alexa rank, language complexity and<br>information retrieval scores. Each of these features has only moderate<br>predictive power. We employ methods combining features with statistical models<br>like TF-IDF and language models like BERT. The model combining features and<br>BERT, HERB, achieves an F1 score of up to 46%. We demonstrate the utility of<br>coverage predictions on two use cases: KB construction and claim refutation.<br>}, }
Endnote
%0 Report %A Singhania, Sneha %A Razniewski, Simon %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Predicting Document Coverage for Relation Extraction : %G eng %U http://hdl.handle.net/21.11116/0000-000A-237F-1 %U https://arxiv.org/abs/2111.13611 %D 2021 %X This paper presents a new task of predicting the coverage of a text document<br>for relation extraction (RE): does the document contain many relational tuples<br>for a given entity? Coverage predictions are useful in selecting the best<br>documents for knowledge base construction with large input corpora. To study<br>this problem, we present a dataset of 31,366 diverse documents for 520<br>entities. We analyze the correlation of document coverage with features like<br>length, entity mention frequency, Alexa rank, language complexity and<br>information retrieval scores. Each of these features has only moderate<br>predictive power. We employ methods combining features with statistical models<br>like TF-IDF and language models like BERT. The model combining features and<br>BERT, HERB, achieves an F1 score of up to 46%. We demonstrate the utility of<br>coverage predictions on two use cases: KB construction and claim refutation.<br> %K Computer Science, Computation and Language, cs.CL,Computer Science, Artificial Intelligence, cs.AI
[72]
A. Tigunova, P. Mirza, A. Yates, and G. Weikum, “Exploring Personal Knowledge Extraction from Conversations with CHARM,” in WSDM ’21, 14th International Conference on Web Search and Data Mining, Virtual Event, Israel, 2021.
Export
BibTeX
@inproceedings{Tigunova_WSDM21, TITLE = {Exploring Personal Knowledge Extraction from Conversations with {CHARM}}, AUTHOR = {Tigunova, Anna and Mirza, Paramita and Yates, Andrew and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-8297-7}, DOI = {10.1145/3437963.3441699}, PUBLISHER = {ACM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {WSDM '21, 14th International Conference on Web Search and Data Mining}, EDITOR = {Lewin-Eytan, Liane and Carmel, David and Yom-Tov, Elad and Agichtein, Eugene and Gabrilovich, Evgeniy}, PAGES = {1077--1080}, ADDRESS = {Virtual Event, Israel}, }
Endnote
%0 Conference Proceedings %A Tigunova, Anna %A Mirza, Paramita %A Yates, Andrew %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Exploring Personal Knowledge Extraction from Conversations with CHARM : %G eng %U http://hdl.handle.net/21.11116/0000-0007-F850-7 %R 10.1145/3437963.3441699 %D 2021 %B 14th International Conference on Web Search and Data Mining %Z date of event: 2021-03-08 - 2021-03-12 %C Virtual Event, Israel %B WSDM '21 %E Lewin-Eytan, Liane; Carmel, David; Yom-Tov, Elad; Agichtein, Eugene; Gabrilovich, Evgeniy %P 1077 - 1080 %I ACM %@ 978-1-4503-8297-7
[73]
A. Tigunova, P. Mirza, A. Yates, and G. Weikum, “PRIDE: Predicting Relationships in Conversations,” in Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing (EMNLP 2021), Punta Cana, Dominican Republic, 2021.
Export
BibTeX
@inproceedings{DBLP:conf/emnlp/TigunovaMYW21, TITLE = {{PRIDE}: {P}redicting Relationships in Conversations}, AUTHOR = {Tigunova, Anna and Mirza, Paramita and Yates, Andrew and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {https://aclanthology.org/2021.emnlp-main.380/; https://aclanthology.org/2022.emnlp-main}, DOI = {10.18653/v1/2021.emnlp-main.380}, PUBLISHER = {ACL}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing (EMNLP 2021)}, EDITOR = {Moens, Marie-Francine and Huang, Xuanjing and Specia, Lucia and Yih, Scott Wen-tau}, PAGES = {4636--4650}, ADDRESS = {Punta Cana, Dominican Republic}, }
Endnote
%0 Conference Proceedings %A Tigunova, Anna %A Mirza, Paramita %A Yates, Andrew %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T PRIDE: Predicting Relationships in Conversations : %G eng %U http://hdl.handle.net/21.11116/0000-000C-DBF2-C %U https://aclanthology.org/2021.emnlp-main.380/ %R 10.18653/v1/2021.emnlp-main.380 %D 2021 %B The Conference on Empirical Methods in Natural Language Processing %Z date of event: 2021-11-07 - 2021-11-11 %C Punta Cana, Dominican Republic %B Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing %E Moens, Marie-Francine; Huang, Xuanjing; Specia, Lucia; Yih, Scott Wen-tau %P 4636 - 4650 %I ACL
[74]
G. H. Torbati, A. Yates, and G. Weikum, “You Get What You Chat: Using Conversations to Personalize Search-based Recommendations,” 2021. [Online]. Available: https://arxiv.org/abs/2109.04716. (arXiv: 2109.04716)
Abstract
Prior work on personalized recommendations has focused on exploiting explicit<br>signals from user-specific queries, clicks, likes, and ratings. This paper<br>investigates tapping into a different source of implicit signals of interests<br>and tastes: online chats between users. The paper develops an expressive model<br>and effective methods for personalizing search-based entity recommendations.<br>User models derived from chats augment different methods for re-ranking entity<br>answers for medium-grained queries. The paper presents specific techniques to<br>enhance the user models by capturing domain-specific vocabularies and by<br>entity-based expansion. Experiments are based on a collection of online chats<br>from a controlled user study covering three domains: books, travel, food. We<br>evaluate different configurations and compare chat-based user models against<br>concise user profiles from questionnaires. Overall, these two variants perform<br>on par in terms of NCDG@20, but each has advantages in certain domains.<br>
Export
BibTeX
@online{Haratinezhad2109.04716, TITLE = {You Get What You Chat: Using Conversations to Personalize Search-based Recommendations}, AUTHOR = {Torbati, Ghazaleh Haratinezhad and Yates, Andrew and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2109.04716}, EPRINT = {2109.04716}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Prior work on personalized recommendations has focused on exploiting explicit<br>signals from user-specific queries, clicks, likes, and ratings. This paper<br>investigates tapping into a different source of implicit signals of interests<br>and tastes: online chats between users. The paper develops an expressive model<br>and effective methods for personalizing search-based entity recommendations.<br>User models derived from chats augment different methods for re-ranking entity<br>answers for medium-grained queries. The paper presents specific techniques to<br>enhance the user models by capturing domain-specific vocabularies and by<br>entity-based expansion. Experiments are based on a collection of online chats<br>from a controlled user study covering three domains: books, travel, food. We<br>evaluate different configurations and compare chat-based user models against<br>concise user profiles from questionnaires. Overall, these two variants perform<br>on par in terms of NCDG@20, but each has advantages in certain domains.<br>}, }
Endnote
%0 Report %A Torbati, Ghazaleh Haratinezhad %A Yates, Andrew %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T You Get What You Chat: Using Conversations to Personalize Search-based Recommendations : %G eng %U http://hdl.handle.net/21.11116/0000-0009-64B9-6 %U https://arxiv.org/abs/2109.04716 %D 2021 %X Prior work on personalized recommendations has focused on exploiting explicit<br>signals from user-specific queries, clicks, likes, and ratings. This paper<br>investigates tapping into a different source of implicit signals of interests<br>and tastes: online chats between users. The paper develops an expressive model<br>and effective methods for personalizing search-based entity recommendations.<br>User models derived from chats augment different methods for re-ranking entity<br>answers for medium-grained queries. The paper presents specific techniques to<br>enhance the user models by capturing domain-specific vocabularies and by<br>entity-based expansion. Experiments are based on a collection of online chats<br>from a controlled user study covering three domains: books, travel, food. We<br>evaluate different configurations and compare chat-based user models against<br>concise user profiles from questionnaires. Overall, these two variants perform<br>on par in terms of NCDG@20, but each has advantages in certain domains.<br> %K Computer Science, Information Retrieval, cs.IR
[75]
G. H. Torbati, A. Yates, and G. Weikum, “Personalized Entity Search by Sparse and Scrutable User Profiles,” 2021. [Online]. Available: https://arxiv.org/abs/2109.04713. (arXiv: 2109.04713)
Abstract
Prior work on personalizing web search results has focused on considering<br>query-and-click logs to capture users individual interests. For product search,<br>extensive user histories about purchases and ratings have been exploited.<br>However, for general entity search, such as for books on specific topics or<br>travel destinations with certain features, personalization is largely<br>underexplored. In this paper, we address personalization of book search, as an<br>exemplary case of entity search, by exploiting sparse user profiles obtained<br>through online questionnaires. We devise and compare a variety of re-ranking<br>methods based on language models or neural learning. Our experiments show that<br>even very sparse information about individuals can enhance the effectiveness of<br>the search results.<br>
Export
BibTeX
@online{Haratinezhad2109.04713, TITLE = {Personalized Entity Search by Sparse and Scrutable User Profiles}, AUTHOR = {Torbati, Ghazaleh Haratinezhad and Yates, Andrew and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2109.04713}, EPRINT = {2109.04713}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Prior work on personalizing web search results has focused on considering<br>query-and-click logs to capture users individual interests. For product search,<br>extensive user histories about purchases and ratings have been exploited.<br>However, for general entity search, such as for books on specific topics or<br>travel destinations with certain features, personalization is largely<br>underexplored. In this paper, we address personalization of book search, as an<br>exemplary case of entity search, by exploiting sparse user profiles obtained<br>through online questionnaires. We devise and compare a variety of re-ranking<br>methods based on language models or neural learning. Our experiments show that<br>even very sparse information about individuals can enhance the effectiveness of<br>the search results.<br>}, }
Endnote
%0 Report %A Torbati, Ghazaleh Haratinezhad %A Yates, Andrew %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Personalized Entity Search by Sparse and Scrutable User Profiles : %G eng %U http://hdl.handle.net/21.11116/0000-0009-64AC-5 %U https://arxiv.org/abs/2109.04713 %D 2021 %X Prior work on personalizing web search results has focused on considering<br>query-and-click logs to capture users individual interests. For product search,<br>extensive user histories about purchases and ratings have been exploited.<br>However, for general entity search, such as for books on specific topics or<br>travel destinations with certain features, personalization is largely<br>underexplored. In this paper, we address personalization of book search, as an<br>exemplary case of entity search, by exploiting sparse user profiles obtained<br>through online questionnaires. We devise and compare a variety of re-ranking<br>methods based on language models or neural learning. Our experiments show that<br>even very sparse information about individuals can enhance the effectiveness of<br>the search results.<br> %K Computer Science, Information Retrieval, cs.IR
[76]
G. H. Torbati, A. Yates, and G. Weikum, “You Get What You Chat: Using Conversations to Personalize Search-based Recommendations,” in Advances in Information Retrieval (ECIR 2021), Lucca, Italy (Online Event), 2021.
Export
BibTeX
@inproceedings{Torbati_ECIR2021, TITLE = {You Get What You Chat: {U}sing Conversations to Personalize Search-based Recommendations}, AUTHOR = {Torbati, Ghazaleh Haratinezhad and Yates, Andrew and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-3-030-72112-1}, DOI = {10.1007/978-3-030-72113-8_14}, PUBLISHER = {Springer}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, BOOKTITLE = {Advances in Information Retrieval (ECIR 2021)}, EDITOR = {Hiemstra, Djoerd and Moens, Marie-Francine and Mothe, Josiane and Perego, Raffaele and Potthast, Martin and Sebastiani, Fabrizio}, PAGES = {207--223}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12656}, ADDRESS = {Lucca, Italy (Online Event)}, }
Endnote
%0 Conference Proceedings %A Torbati, Ghazaleh Haratinezhad %A Yates, Andrew %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T You Get What You Chat: Using Conversations to Personalize Search-based Recommendations : %G eng %U http://hdl.handle.net/21.11116/0000-0007-ECA2-8 %R 10.1007/978-3-030-72113-8_14 %D 2021 %B 43rd European Conference on IR Research %Z date of event: 2021-03-28 - 2021-04-01 %C Lucca, Italy (Online Event) %B Advances in Information Retrieval %E Hiemstra, Djoerd; Moens, Marie-Francine; Mothe, Josiane; Perego, Raffaele; Potthast, Martin; Sebastiani, Fabrizio %P 207 - 223 %I Springer %@ 978-3-030-72112-1 %B Lecture Notes in Computer Science %N 12656
[77]
K. H. Tran, A. Ghazimatin, and R. Saha Roy, “Counterfactual Explanations for Neural Recommenders,” 2021. [Online]. Available: https://arxiv.org/abs/2105.05008. (arXiv: 2105.05008)
Abstract
Understanding why specific items are recommended to users can significantly<br>increase their trust and satisfaction in the system. While neural recommenders<br>have become the state-of-the-art in recent years, the complexity of deep models<br>still makes the generation of tangible explanations for end users a challenging<br>problem. Existing methods are usually based on attention distributions over a<br>variety of features, which are still questionable regarding their suitability<br>as explanations, and rather unwieldy to grasp for an end user. Counterfactual<br>explanations based on a small set of the user's own actions have been shown to<br>be an acceptable solution to the tangibility problem. However, current work on<br>such counterfactuals cannot be readily applied to neural models. In this work,<br>we propose ACCENT, the first general framework for finding counterfactual<br>explanations for neural recommenders. It extends recently-proposed influence<br>functions for identifying training points most relevant to a recommendation,<br>from a single to a pair of items, while deducing a counterfactual set in an<br>iterative process. We use ACCENT to generate counterfactual explanations for<br>two popular neural models, Neural Collaborative Filtering (NCF) and Relational<br>Collaborative Filtering (RCF), and demonstrate its feasibility on a sample of<br>the popular MovieLens 100K dataset.<br>
Export
BibTeX
@online{Tran_2105.05008, TITLE = {Counterfactual Explanations for Neural Recommenders}, AUTHOR = {Tran, Khanh Hiep and Ghazimatin, Azin and Saha Roy, Rishiraj}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2105.05008}, EPRINT = {2105.05008}, EPRINTTYPE = {arXiv}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Understanding why specific items are recommended to users can significantly<br>increase their trust and satisfaction in the system. While neural recommenders<br>have become the state-of-the-art in recent years, the complexity of deep models<br>still makes the generation of tangible explanations for end users a challenging<br>problem. Existing methods are usually based on attention distributions over a<br>variety of features, which are still questionable regarding their suitability<br>as explanations, and rather unwieldy to grasp for an end user. Counterfactual<br>explanations based on a small set of the user's own actions have been shown to<br>be an acceptable solution to the tangibility problem. However, current work on<br>such counterfactuals cannot be readily applied to neural models. In this work,<br>we propose ACCENT, the first general framework for finding counterfactual<br>explanations for neural recommenders. It extends recently-proposed influence<br>functions for identifying training points most relevant to a recommendation,<br>from a single to a pair of items, while deducing a counterfactual set in an<br>iterative process. We use ACCENT to generate counterfactual explanations for<br>two popular neural models, Neural Collaborative Filtering (NCF) and Relational<br>Collaborative Filtering (RCF), and demonstrate its feasibility on a sample of<br>the popular MovieLens 100K dataset.<br>}, }
Endnote
%0 Report %A Tran, Khanh Hiep %A Ghazimatin, Azin %A Saha Roy, Rishiraj %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Counterfactual Explanations for Neural Recommenders : %G eng %U http://hdl.handle.net/21.11116/0000-0009-67C3-7 %U https://arxiv.org/abs/2105.05008 %D 2021 %X Understanding why specific items are recommended to users can significantly<br>increase their trust and satisfaction in the system. While neural recommenders<br>have become the state-of-the-art in recent years, the complexity of deep models<br>still makes the generation of tangible explanations for end users a challenging<br>problem. Existing methods are usually based on attention distributions over a<br>variety of features, which are still questionable regarding their suitability<br>as explanations, and rather unwieldy to grasp for an end user. Counterfactual<br>explanations based on a small set of the user's own actions have been shown to<br>be an acceptable solution to the tangibility problem. However, current work on<br>such counterfactuals cannot be readily applied to neural models. In this work,<br>we propose ACCENT, the first general framework for finding counterfactual<br>explanations for neural recommenders. It extends recently-proposed influence<br>functions for identifying training points most relevant to a recommendation,<br>from a single to a pair of items, while deducing a counterfactual set in an<br>iterative process. We use ACCENT to generate counterfactual explanations for<br>two popular neural models, Neural Collaborative Filtering (NCF) and Relational<br>Collaborative Filtering (RCF), and demonstrate its feasibility on a sample of<br>the popular MovieLens 100K dataset.<br> %K Computer Science, Information Retrieval, cs.IR,Computer Science, Learning, cs.LG
[78]
K. H. Tran, A. Ghazimatin, and R. Saha Roy, “Counterfactual Explanations for Neural Recommenders,” in SIGIR ’21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, Canada, 2021.
Export
BibTeX
@inproceedings{tran2021counterfactual, TITLE = {Counterfactual Explanations for Neural Recommenders}, AUTHOR = {Tran, Khanh Hiep and Ghazimatin, Azin and Saha Roy, Rishiraj}, LANGUAGE = {eng}, DOI = {10.1145/3404835.3463005}, PUBLISHER = {ACM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {SIGIR '21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval}, EDITOR = {Diaz, Fernando and Shah, Chirag and Suel, Torsten and Castells, Pablo and Jones, Rosie and Sakai, Tetsuya and Bellogin, Alejandro and Yushioka, Massaharu}, PAGES = {1627--1631}, ADDRESS = {Virtual Event, Canada}, }
Endnote
%0 Conference Proceedings %A Tran, Khanh Hiep %A Ghazimatin, Azin %A Saha Roy, Rishiraj %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Counterfactual Explanations for Neural Recommenders : %G eng %U http://hdl.handle.net/21.11116/0000-0008-5140-4 %R 10.1145/3404835.3463005 %D 2021 %B 44th International ACM SIGIR Conference on Research and Development in Information Retrieval %Z date of event: 2021-07-11 - 2021-07-15 %C Virtual Event, Canada %B SIGIR '21 %E Diaz, Fernando; Shah, Chirag; Suel, Torsten; Castells, Pablo; Jones, Rosie; Sakai, Tetsuya; Bellogin, Alejandro; Yushioka, Massaharu %P 1627 - 1631 %I ACM
[79]
G. Weikum, “Knowledge Graphs 2021: A Data Odyssey,” Proceedings of the VLDB Endowment (Proc. VLDB 2021), vol. 14, no. 12, 2021.
Export
BibTeX
@article{Weikum2021_PVLDB, TITLE = {Knowledge Graphs 2021: {A} Data Odyssey}, AUTHOR = {Weikum, Gerhard}, LANGUAGE = {eng}, PUBLISHER = {VLDB Endowment Inc.}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, JOURNAL = {Proceedings of the VLDB Endowment (Proc. VLDB)}, VOLUME = {14}, NUMBER = {12}, PAGES = {3233--3238}, BOOKTITLE = {Proceedings of the 47th International Conference on Very Large Data Bases (VLDB 2021)}, EDITOR = {Dong, Xin Luna and Naumann, Felix}, }
Endnote
%0 Journal Article %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Knowledge Graphs 2021: A Data Odyssey : %G eng %U http://hdl.handle.net/21.11116/0000-0009-631F-6 %7 2021 %D 2021 %J Proceedings of the VLDB Endowment %O PVLDB %V 14 %N 12 %& 3233 %P 3233 - 3238 %I VLDB Endowment Inc. %B Proceedings of the 47th International Conference on Very Large Data Bases %O VLDB 2021 Copenhagen, Denmark, 16-20 August 2021
[80]
G. Weikum, L. Dong, S. Razniewski, and F. Suchanek, “Machine Knowledge: Creation and Curation of Comprehensive Knowledge Bases,” Foundations and Trends in Databases, vol. 10, no. 2–4, 2021.
Export
BibTeX
@article{Weikum10.1561/1900000064, TITLE = {Machine Knowledge: {C}reation and Curation of Comprehensive Knowledge Bases}, AUTHOR = {Weikum, Gerhard and Dong, Luna and Razniewski, Simon and Suchanek, Fabian}, LANGUAGE = {eng}, ISSN = {1931-7883}, ISBN = {978-1-68083-836-7}, DOI = {10.1561/1900000064}, PUBLISHER = {Now Publishers}, ADDRESS = {Boston}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, JOURNAL = {Foundations and Trends in Databases}, VOLUME = {10}, NUMBER = {2-4}, PAGES = {108--490}, }
Endnote
%0 Journal Article %A Weikum, Gerhard %A Dong, Luna %A Razniewski, Simon %A Suchanek, Fabian %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Machine Knowledge: Creation and Curation of Comprehensive Knowledge Bases : %G eng %U http://hdl.handle.net/21.11116/0000-0009-6317-E %R 10.1561/1900000064 %@ 978-1-68083-836-7 %7 2021 %D 2021 %J Foundations and Trends in Databases %V 10 %N 2-4 %& 108 %P 108 - 490 %I Now Publishers %C Boston %@ false
[81]
A. Yates, R. Nogueira, and J. Lin, “Pretrained Transformers for Text Ranking: BERT and Beyond,” in SIGIR ’21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, Canada, 2021.
Export
BibTeX
@inproceedings{Yates_SIGIR21, TITLE = {Pretrained Transformers for Text Ranking: {BERT} and Beyond}, AUTHOR = {Yates, Andrew and Nogueira, Rodrigo and Lin, Jimmy}, LANGUAGE = {eng}, ISBN = {978-1-4503-8037-9}, DOI = {10.1145/3404835.3462812}, PUBLISHER = {ACM}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {SIGIR '21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval}, EDITOR = {Diaz, Fernando and Shah, Chirag and Suel, Torsten and Castells, Pablo and Jones, Rosie and Sakai, Tetsuya and Bellog{\'i}n, Alejandro and Yushioka, Massaharu}, PAGES = {2666--2668}, ADDRESS = {Virtual Event, Canada}, }
Endnote
%0 Conference Proceedings %A Yates, Andrew %A Nogueira, Rodrigo %A Lin, Jimmy %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Pretrained Transformers for Text Ranking: BERT and Beyond : %G eng %U http://hdl.handle.net/21.11116/0000-0009-6674-2 %R 10.1145/3404835.3462812 %D 2021 %B 44th International ACM SIGIR Conference on Research and Development in Information Retrieval %Z date of event: 2021-07-11 - 2021-07-15 %C Virtual Event, Canada %B SIGIR '21 %E Diaz, Fernando; Shah, Chirag; Suel, Torsten; Castells, Pablo; Jones, Rosie; Sakai, Tetsuya; Bellog&#237;n, Alejandro; Yushioka, Massaharu %P 2666 - 2668 %I ACM %@ 978-1-4503-8037-9
[82]
X. Zhang, A. Yates, and J. Lin, “Comparing Score Aggregation Approaches for Document Retrieval with Pretrained Transformers,” in Advances in Information Retrieval (ECIR 2021), Lucca, Italy (Online Event), 2021.
Export
BibTeX
@inproceedings{Zhang_ECIR2021, TITLE = {Comparing Score Aggregation Approaches for Document Retrieval with Pretrained Transformers}, AUTHOR = {Zhang, Xinyu and Yates, Andrew and Lin, Jimmy}, LANGUAGE = {eng}, ISBN = {978-3-030-72239-5}, DOI = {10.1007/978-3-030-72240-1_11}, PUBLISHER = {Springer}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, BOOKTITLE = {Advances in Information Retrieval (ECIR 2021)}, EDITOR = {Hiemstra, Djoerd and Moens, Marie-Francine and Mothe, Josiane and Perego, Raffaele and Potthast, Martin and Sebastiani, Fabrizio}, PAGES = {150--163}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12657}, ADDRESS = {Lucca, Italy (Online Event)}, }
Endnote
%0 Conference Proceedings %A Zhang, Xinyu %A Yates, Andrew %A Lin, Jimmy %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Comparing Score Aggregation Approaches for Document Retrieval with Pretrained Transformers : %G eng %U http://hdl.handle.net/21.11116/0000-0009-6614-E %R 10.1007/978-3-030-72240-1_11 %D 2021 %B 43rd European Conference on IR Research %Z date of event: 2021-03-28 - 2021-04-01 %C Lucca, Italy (Online Event) %B Advances in Information Retrieval %E Hiemstra, Djoerd; Moens, Marie-Francine; Mothe, Josiane; Perego, Raffaele; Potthast, Martin; Sebastiani, Fabrizio %P 150 - 163 %I Springer %@ 978-3-030-72239-5 %B Lecture Notes in Computer Science %N 12657
[83]
X. Zhang, J. Xin, A. Yates, and J. Lin, “Bag-of-Words Baselines for Semantic Code Search,” in The 1st Workshop on Natural Language Processing for Programming (NLP4Prog 2021), Bangkog, Thailand (Online), 2021.
Export
BibTeX
@inproceedings{Zhang_NLP4Prog2021, TITLE = {Bag-of-Words Baselines for Semantic Code Search}, AUTHOR = {Zhang, Xinyu and Xin, Ji and Yates, Andrew and Lin, Jimmy}, LANGUAGE = {eng}, ISBN = {978-1-954085-64-0}, URL = {https://aclanthology.org/2021.nlp4prog-1.0}, PUBLISHER = {ACL}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {The 1st Workshop on Natural Language Processing for Programming (NLP4Prog 2021)}, EDITOR = {Lachmy, Royi and Yao, Ziyu and Durrett, Greg and Gligoric, Milos and Li, Junyi Jessy and Mooney, Ray and Neubig, Graham and Su, Yu and Sun, Huan and Tsarfaty, Reut}, PAGES = {88--94}, ADDRESS = {Bangkog, Thailand (Online)}, }
Endnote
%0 Conference Proceedings %A Zhang, Xinyu %A Xin, Ji %A Yates, Andrew %A Lin, Jimmy %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Bag-of-Words Baselines for Semantic Code Search : %G eng %U http://hdl.handle.net/21.11116/0000-0009-619E-8 %U https://aclanthology.org/2021.nlp4prog-1.0 %D 2021 %B 1st Workshop on Natural Language Processing for Programming %Z date of event: 2021-08-06 - 2021-08-06 %C Bangkog, Thailand (Online) %B The 1st Workshop on Natural Language Processing for Programming %E Lachmy, Royi; Yao, Ziyu; Durrett, Greg; Gligoric, Milos; Li, Junyi Jessy; Mooney, Ray; Neubig, Graham; Su, Yu; Sun, Huan; Tsarfaty, Reut %P 88 - 94 %I ACL %@ 978-1-954085-64-0
[84]
Z. Zheng, K. Hui, B. He, X. Han, L. Sun, and A. Yates, “Contextualized Query Expansion via Unsupervised Chunk Selection for Text Retrieval,” Information Processing & Management, vol. 58, no. 5, 2021.
Export
BibTeX
@article{Zheng2021, TITLE = {Contextualized Query Expansion via Unsupervised Chunk Selection for Text Retrieval}, AUTHOR = {Zheng, Zhi and Hui, Kai and He, Ben and Han, Xianpei and Sun, Le and Yates, Andrew}, LANGUAGE = {eng}, ISSN = {0306-4573}, DOI = {10.1016/j.ipm.2021.102672}, PUBLISHER = {Elsevier}, ADDRESS = {Amsterdam}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, DATE = {2021}, JOURNAL = {Information Processing \& Management}, VOLUME = {58}, NUMBER = {5}, EID = {102672}, }
Endnote
%0 Journal Article %A Zheng, Zhi %A Hui, Kai %A He, Ben %A Han, Xianpei %A Sun, Le %A Yates, Andrew %+ External Organizations External Organizations External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Contextualized Query Expansion via Unsupervised Chunk Selection for Text Retrieval : %G eng %U http://hdl.handle.net/21.11116/0000-0009-4747-8 %R 10.1016/j.ipm.2021.102672 %7 2021 %D 2021 %J Information Processing & Management %V 58 %N 5 %Z sequence number: 102672 %I Elsevier %C Amsterdam %@ false