Publications
2023
[1]
A. S. Anwari, “Learning Filters to Improve Social Media Search,” Universität des Saarlandes, Saarbrücken, 2023.
Export
BibTeX
@mastersthesis{AnwariMSc23,
TITLE = {Learning Filters to Improve Social Media Search},
AUTHOR = {Anwari, Ahmed Sohail},
LANGUAGE = {eng},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
DATE = {2023},
}
Endnote
%0 Thesis
%A Anwari, Ahmed Sohail
%Y Yates, Andrew
%A referee: Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Learning Filters to Improve Social Media Search :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-1C08-C
%I Universität des Saarlandes
%C Saarbrücken
%D 2023
%P XI, 69 p.
%V master
%9 master
[2]
H. Arnaout, T.-P. Nguyen, S. Razniewski, and G. Weikum, “UnCommonSense in Action! Informative Negations for Commonsense Knowledge Bases,” in WSDM ’23, 16th ACM International Conference on Web Search and Data Mining, Singapore, 2023.
Export
BibTeX
@inproceedings{Arnaout_WSDM23,
TITLE = {{UnCommonSense} in Action! {I}nformative Negations for Commonsense Knowledge Bases},
AUTHOR = {Arnaout, Hiba and Nguyen, Tuan-Phong and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-9407-9},
DOI = {10.1145/3539597.3573027},
PUBLISHER = {ACM},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {WSDM '23, 16th ACM International Conference on Web Search and Data Mining},
EDITOR = {Chua, Tat-Seng and Lauw, Hady and Si, Luo and Terzi, Evimaria and Tsaparas, Panayiotis},
PAGES = {1120--1123},
ADDRESS = {Singapore},
}
Endnote
%0 Conference Proceedings
%A Arnaout, Hiba
%A Nguyen, Tuan-Phong
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T UnCommonSense in Action! Informative Negations for Commonsense Knowledge Bases :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-18BC-6
%R 10.1145/3539597.3573027
%D 2023
%B 16th ACM International Conference on Web Search and Data Mining
%Z date of event: 2023-02-27 - 2023-03-03
%C Singapore
%B WSDM '23
%E Chua, Tat-Seng; Lauw, Hady; Si, Luo; Terzi, Evimaria; Tsaparas, Panayiotis
%P 1120 - 1123
%I ACM
%@ 978-1-4503-9407-9
[3]
H. Arnaout and S. Razniewski, “Can Large Language Models Generate Salient Negative Statements?,” 2023. [Online]. Available: https://arxiv.org/abs/2305.16755. (arXiv: 2305.16755)
Abstract
We examine the ability of large language models (LLMs) to generate salient<br>(interesting) negative statements about real-world entities; an emerging<br>research topic of the last few years. We probe the LLMs using zero- and k-shot<br>unconstrained probes, and compare with traditional methods for negation<br>generation, i.e., pattern-based textual extractions and knowledge-graph-based<br>inferences, as well as crowdsourced gold statements. We measure the correctness<br>and salience of the generated lists about subjects from different domains. Our<br>evaluation shows that guided probes do in fact improve the quality of generated<br>negatives, compared to the zero-shot variant. Nevertheless, using both prompts,<br>LLMs still struggle with the notion of factuality of negatives, frequently<br>generating many ambiguous statements, or statements with negative keywords but<br>a positive meaning.<br>
Export
BibTeX
@online{Arnaout2305.16755,
TITLE = {Can Large Language Models Generate Salient Negative Statements?},
AUTHOR = {Arnaout, Hiba and Razniewski, Simon},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2305.16755},
EPRINT = {2305.16755},
EPRINTTYPE = {arXiv},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {We examine the ability of large language models (LLMs) to generate salient<br>(interesting) negative statements about real-world entities; an emerging<br>research topic of the last few years. We probe the LLMs using zero- and k-shot<br>unconstrained probes, and compare with traditional methods for negation<br>generation, i.e., pattern-based textual extractions and knowledge-graph-based<br>inferences, as well as crowdsourced gold statements. We measure the correctness<br>and salience of the generated lists about subjects from different domains. Our<br>evaluation shows that guided probes do in fact improve the quality of generated<br>negatives, compared to the zero-shot variant. Nevertheless, using both prompts,<br>LLMs still struggle with the notion of factuality of negatives, frequently<br>generating many ambiguous statements, or statements with negative keywords but<br>a positive meaning.<br>},
}
Endnote
%0 Report
%A Arnaout, Hiba
%A Razniewski, Simon
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Can Large Language Models Generate Salient Negative Statements? :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-3D73-E
%U https://arxiv.org/abs/2305.16755
%D 2023
%X We examine the ability of large language models (LLMs) to generate salient<br>(interesting) negative statements about real-world entities; an emerging<br>research topic of the last few years. We probe the LLMs using zero- and k-shot<br>unconstrained probes, and compare with traditional methods for negation<br>generation, i.e., pattern-based textual extractions and knowledge-graph-based<br>inferences, as well as crowdsourced gold statements. We measure the correctness<br>and salience of the generated lists about subjects from different domains. Our<br>evaluation shows that guided probes do in fact improve the quality of generated<br>negatives, compared to the zero-shot variant. Nevertheless, using both prompts,<br>LLMs still struggle with the notion of factuality of negatives, frequently<br>generating many ambiguous statements, or statements with negative keywords but<br>a positive meaning.<br>
%K Computer Science, Computation and Language, cs.CL,Computer Science, Artificial Intelligence, cs.AI
[4]
A. Bashir, “Leveraging Self-Supervised Learning in Domain-Specific Language Models,” Universität des Saarlandes, Saarbrücken, 2023.
Export
BibTeX
@mastersthesis{BashirMSc23,
TITLE = {Leveraging Self-Supervised Learning in Domain-Speci{fi}c Language Models},
AUTHOR = {Bashir, Abdallah},
LANGUAGE = {eng},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
DATE = {2023},
}
Endnote
%0 Thesis
%A Bashir, Abdallah
%Y Terolli, Erisa
%Y Ernst, Patrick
%A referee: Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Leveraging Self-Supervised Learning in Domain-Specific Language Models :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-2D82-E
%I Universität des Saarlandes
%C Saarbrücken
%D 2023
%P XI, 54 p.
%V master
%9 master
[5]
L. Boualili and A. Yates, “A Study of Term-Topic Embeddings for Ranking,” in Advances in Information Retrieval (ECIR 2023), Dublin, Ireland, 2023.
Export
BibTeX
@inproceedings{Boualili_ECIR23,
TITLE = {A Study of Term-Topic Embeddings for Ranking},
AUTHOR = {Boualili, Lila and Yates, Andrew},
LANGUAGE = {eng},
ISBN = {978-3-031-28237-9},
DOI = {10.1007/978-3-031-28238-6_25},
PUBLISHER = {Springer},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
DATE = {2023},
BOOKTITLE = {Advances in Information Retrieval (ECIR 2023)},
EDITOR = {Kamps, Jaap and Goeuriot, Lorraine and Crestani, Fabio and Maistro, Maria and Joho, Hideao and Davis, Brian and Gurrin, Cathal and Kruschwitz, Udo and Caputo, Annalina},
PAGES = {359--366},
SERIES = {Lecture Notes in Computer Science},
VOLUME = {13981},
ADDRESS = {Dublin, Ireland},
}
Endnote
%0 Conference Proceedings
%A Boualili, Lila
%A Yates, Andrew
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T A Study of Term-Topic Embeddings for Ranking :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-DC34-2
%R 10.1007/978-3-031-28238-6_25
%D 2023
%B 45th European Conference on IR Research
%Z date of event: 2023-04-02 - 2023-04-06
%C Dublin, Ireland
%B Advances in Information Retrieval
%E Kamps, Jaap; Goeuriot, Lorraine; Crestani, Fabio; Maistro, Maria; Joho, Hideao; Davis, Brian; Gurrin, Cathal; Kruschwitz, Udo; Caputo, Annalina
%P 359 - 366
%I Springer
%@ 978-3-031-28237-9
%B Lecture Notes in Computer Science
%N 13981
[6]
L. Chen, S. Razniewski, and G. Weikum, “Knowledge Base Completion for Long-Tail Entities,” 2023. [Online]. Available: https://arxiv.org/abs/2306.17472. (arXiv: 2306.17472)
Abstract
Despite their impressive scale, knowledge bases (KBs), such as Wikidata,<br>still contain significant gaps. Language models (LMs) have been proposed as a<br>source for filling these gaps. However, prior works have focused on prominent<br>entities with rich coverage by LMs, neglecting the crucial case of long-tail<br>entities. In this paper, we present a novel method for LM-based-KB completion<br>that is specifically geared for facts about long-tail entities. The method<br>leverages two different LMs in two stages: for candidate retrieval and for<br>candidate verification and disambiguation. To evaluate our method and various<br>baselines, we introduce a novel dataset, called MALT, rooted in Wikidata. Our<br>method outperforms all baselines in F1, with major gains especially in recall.<br>
Export
BibTeX
@online{Chen2306.17472,
TITLE = {Knowledge Base Completion for Long-Tail Entities},
AUTHOR = {Chen, Lihu and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2306.17472},
EPRINT = {2306.17472},
EPRINTTYPE = {arXiv},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Despite their impressive scale, knowledge bases (KBs), such as Wikidata,<br>still contain significant gaps. Language models (LMs) have been proposed as a<br>source for filling these gaps. However, prior works have focused on prominent<br>entities with rich coverage by LMs, neglecting the crucial case of long-tail<br>entities. In this paper, we present a novel method for LM-based-KB completion<br>that is specifically geared for facts about long-tail entities. The method<br>leverages two different LMs in two stages: for candidate retrieval and for<br>candidate verification and disambiguation. To evaluate our method and various<br>baselines, we introduce a novel dataset, called MALT, rooted in Wikidata. Our<br>method outperforms all baselines in F1, with major gains especially in recall.<br>},
}
Endnote
%0 Report
%A Chen, Lihu
%A Razniewski, Simon
%A Weikum, Gerhard
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Knowledge Base Completion for Long-Tail Entities :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-6C7F-D
%U https://arxiv.org/abs/2306.17472
%D 2023
%X Despite their impressive scale, knowledge bases (KBs), such as Wikidata,<br>still contain significant gaps. Language models (LMs) have been proposed as a<br>source for filling these gaps. However, prior works have focused on prominent<br>entities with rich coverage by LMs, neglecting the crucial case of long-tail<br>entities. In this paper, we present a novel method for LM-based-KB completion<br>that is specifically geared for facts about long-tail entities. The method<br>leverages two different LMs in two stages: for candidate retrieval and for<br>candidate verification and disambiguation. To evaluate our method and various<br>baselines, we introduce a novel dataset, called MALT, rooted in Wikidata. Our<br>method outperforms all baselines in F1, with major gains especially in recall.<br>
%K Computer Science, Computation and Language, cs.CL
[7]
P. Christmann, R. Saha Roy, and G. Weikum, “Explainable Conversational Question Answering over Heterogeneous Sources via Iterative Graph Neural Networks,” in SIGIR ’23, 46th International ACM SIGIR Conference on Research and Development in Information Retrieval, Taipei, Taiwan. (Accepted/in press)
Export
BibTeX
@inproceedings{Christmann:SIGIR2023,
TITLE = {Explainable Conversational Question Answering over Heterogeneous Sources via Iterative Graph Neural Networks},
AUTHOR = {Christmann, Philipp and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
PUBLISHER = {ACM},
YEAR = {2023},
PUBLREMARK = {Accepted},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {SIGIR '23, 46th International ACM SIGIR Conference on Research and Development in Information Retrieval},
ADDRESS = {Taipei, Taiwan},
}
Endnote
%0 Conference Proceedings
%A Christmann, Philipp
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Explainable Conversational Question Answering over Heterogeneous Sources via Iterative Graph Neural Networks :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-FE28-A
%D 2023
%B 46th International ACM SIGIR Conference on Research and Development in Information Retrieval
%Z date of event: 2023-07-23 - 2023-07-27
%C Taipei, Taiwan
%B SIGIR '23
%I ACM
[8]
P. Christmann, R. Saha Roy, and G. Weikum, “CompMix: A Benchmark for Heterogeneous Question Answering,” 2023. [Online]. Available: https://arxiv.org/abs/2306.12235. (arXiv: 2306.12235)
Abstract
Fact-centric question answering (QA) often requires access to multiple,<br>heterogeneous, information sources. By jointly considering several sources like<br>a knowledge base (KB), a text collection, and tables from the web, QA systems<br>can enhance their answer coverage and confidence. However, existing QA<br>benchmarks are mostly constructed with a single source of knowledge in mind.<br>This limits capabilities of these benchmarks to fairly evaluate QA systems that<br>can tap into more than one information repository. To bridge this gap, we<br>release CompMix, a crowdsourced QA benchmark which naturally demands the<br>integration of a mixture of input sources. CompMix has a total of 9,410<br>questions, and features several complex intents like joins and temporal<br>conditions. Evaluation of a range of QA systems on CompMix highlights the need<br>for further research on leveraging information from heterogeneous sources.<br>
Export
BibTeX
@online{Christmann2306.12235,
TITLE = {{CompMix}: A Benchmark for Heterogeneous Question Answering},
AUTHOR = {Christmann, Philipp and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2306.12235},
EPRINT = {2306.12235},
EPRINTTYPE = {arXiv},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Fact-centric question answering (QA) often requires access to multiple,<br>heterogeneous, information sources. By jointly considering several sources like<br>a knowledge base (KB), a text collection, and tables from the web, QA systems<br>can enhance their answer coverage and confidence. However, existing QA<br>benchmarks are mostly constructed with a single source of knowledge in mind.<br>This limits capabilities of these benchmarks to fairly evaluate QA systems that<br>can tap into more than one information repository. To bridge this gap, we<br>release CompMix, a crowdsourced QA benchmark which naturally demands the<br>integration of a mixture of input sources. CompMix has a total of 9,410<br>questions, and features several complex intents like joins and temporal<br>conditions. Evaluation of a range of QA systems on CompMix highlights the need<br>for further research on leveraging information from heterogeneous sources.<br>},
}
Endnote
%0 Report
%A Christmann, Philipp
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T CompMix: A Benchmark for Heterogeneous Question Answering :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-579D-1
%U https://arxiv.org/abs/2306.12235
%D 2023
%X Fact-centric question answering (QA) often requires access to multiple,<br>heterogeneous, information sources. By jointly considering several sources like<br>a knowledge base (KB), a text collection, and tables from the web, QA systems<br>can enhance their answer coverage and confidence. However, existing QA<br>benchmarks are mostly constructed with a single source of knowledge in mind.<br>This limits capabilities of these benchmarks to fairly evaluate QA systems that<br>can tap into more than one information repository. To bridge this gap, we<br>release CompMix, a crowdsourced QA benchmark which naturally demands the<br>integration of a mixture of input sources. CompMix has a total of 9,410<br>questions, and features several complex intents like joins and temporal<br>conditions. Evaluation of a range of QA systems on CompMix highlights the need<br>for further research on leveraging information from heterogeneous sources.<br>
%K Computer Science, Information Retrieval, cs.IR
[9]
P. Christmann, R. Saha Roy, and G. Weikum, “CLOCQ: A Toolkit for Fast and Easy Access to Knowledge Bases,” in BTW 2023, Dresden, Germany, 2023.
Export
BibTeX
@inproceedings{Christmann_BTW2023,
TITLE = {{CLOCQ}: {A} Toolkit for Fast and Easy Access to Knowledge Bases},
AUTHOR = {Christmann, Philipp and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-3-88579-725-8},
DOI = {10.18420/BTW2023-28},
PUBLISHER = {GI},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {BTW 2023},
EDITOR = {K{\"o}nig-Ries, Birgitta and Scherzinger, Stefanie and Lehner, Wolfgang and Vossen, Gottfried},
PAGES = {579--591},
SERIES = {Lecture Notes in Informatics},
VOLUME = {P-331},
ADDRESS = {Dresden, Germany},
}
Endnote
%0 Conference Proceedings
%A Christmann, Philipp
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T CLOCQ: A Toolkit for Fast and Easy Access to Knowledge Bases :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-BF14-7
%R 10.18420/BTW2023-28
%D 2023
%B 20th Conference on Database Systems for Business, Technology and Web
%Z date of event: 2023-03-06 - 2023-03-10
%C Dresden, Germany
%B BTW 2023
%E König-Ries, Birgitta; Scherzinger, Stefanie; Lehner, Wolfgang; Vossen, Gottfried
%P 579 - 591
%I GI
%@ 978-3-88579-725-8
%B Lecture Notes in Informatics
%N P-331
[10]
X. L. Dong, B. Li, J. Stoyanovich, A. K. H. Tung, G. Weikum, A. Halevy, and W.-C. Tan, “Personal Data for Personal Use: Vision or Reality?,” in SIGMOD ’23 Companion, Seattle WA USA, 2023.
Export
BibTeX
@inproceedings{DongPODS23,
TITLE = {Personal Data for Personal Use: Vision or Reality?},
AUTHOR = {Dong, Xin Luna and Li, Bo and Stoyanovich, Julia and Tung, Anthony Kum Hoe and Weikum, Gerhard and Halevy, Alon and Tan, Wang-Chiew},
LANGUAGE = {eng},
ISBN = {978-1-4503-9507-6},
DOI = {10.1145/3555041.3589378},
PUBLISHER = {ACM},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {SIGMOD '23 Companion},
EDITOR = {Das, Sudipto and Pandis, Ippokratis and Candan, K. Sel{\c c}uk and Amer-Yahia, Sihem},
PAGES = {263--264},
ADDRESS = {Seattle WA USA},
}
Endnote
%0 Conference Proceedings
%A Dong, Xin Luna
%A Li, Bo
%A Stoyanovich, Julia
%A Tung, Anthony Kum Hoe
%A Weikum, Gerhard
%A Halevy, Alon
%A Tan, Wang-Chiew
%+ External Organizations
External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
%T Personal Data for Personal Use: Vision or Reality? :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-5775-E
%R 10.1145/3555041.3589378
%D 2023
%B ACM/SIGMOD International Conference on Management of Data
%Z date of event: 2023-06-18 - 2023-06-23
%C Seattle WA USA
%B SIGMOD '23 Companion
%E Das, Sudipto; Pandis, Ippokratis; Candan, K. Selçuk; Amer-Yahia, Sihem
%P 263 - 264
%I ACM
%@ 978-1-4503-9507-6
[11]
A. Ghazimatin, “Enhancing Explainability and Scrutability of Recommender Systems,” in BTW 2023, Dresden, Germany, 2023.
Export
BibTeX
@inproceedings{DBLP:conf/btw/Ghazimatin23,
TITLE = {Enhancing Explainability and Scrutability of Recommender Systems},
AUTHOR = {Ghazimatin, Azin},
LANGUAGE = {eng},
ISBN = {978-3-88579-725-8},
DOI = {10.18420/BTW2023-32},
PUBLISHER = {GI},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {BTW 2023},
EDITOR = {K{\"o}nig-Ries, Birgitta and Scherzinger, Stefanie and Lehner, Wolfgang and Vossen, Gottfried},
PAGES = {633--640},
SERIES = {Lecture Notes in Informatics},
VOLUME = {P-331},
ADDRESS = {Dresden, Germany},
}
Endnote
%0 Conference Proceedings
%A Ghazimatin, Azin
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Enhancing Explainability and Scrutability of Recommender Systems :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-DC4D-7
%R 10.18420/BTW2023-32
%D 2023
%B 20th Conference on Database Systems for Business, Technology and Web
%Z date of event: 2023-03-06 - 2023-03-10
%C Dresden, Germany
%B BTW 2023
%E König-Ries, Birgitta; Scherzinger, Stefanie; Lehner, Wolfgang; Vossen, Gottfried
%P 633 - 640
%I GI
%@ 978-3-88579-725-8
%B Lecture Notes in Informatics
%N P-331
[12]
S. Ghosh, S. Razniewski, and G. Weikum, “Answering Count Questions with Structured Answers from Text,” Journal of Web Semantics, vol. 76, 2023.
Export
BibTeX
@article{Ghosh23,
TITLE = {Answering Count Questions with Structured Answers from Text},
AUTHOR = {Ghosh, Shrestha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
DOI = {10.1016/j.websem.2022.100769},
PUBLISHER = {Elsevier},
ADDRESS = {Amsterdam},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
DATE = {2023},
JOURNAL = {Journal of Web Semantics},
VOLUME = {76},
EID = {100769},
}
Endnote
%0 Journal Article
%A Ghosh, Shrestha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Answering Count Questions with Structured Answers from Text :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-47CB-0
%R 10.1016/j.websem.2022.100769
%7 2022
%D 2023
%J Journal of Web Semantics
%V 76
%Z sequence number: 100769
%I Elsevier
%C Amsterdam
[13]
S. Ghosh, S. Razniewski, and G. Weikum, “CoQEx: Entity Counts Explained,” in WSDM ’23, 16th ACM International Conference on Web Search and Data Mining, Singapore, 2023.
Export
BibTeX
@inproceedings{Christmann_WSDM23,
TITLE = {{CoQEx}: {E}ntity Counts Explained},
AUTHOR = {Ghosh, Shrestha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-9407-9},
DOI = {10.1145/3539597.3573021},
PUBLISHER = {ACM},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {WSDM '23, 16th ACM International Conference on Web Search and Data Mining},
EDITOR = {Chua, Tat-Seng and Lauw, Hady and Si, Luo and Terzi, Evimaria and Tsaparas, Panayiotis},
PAGES = {1168--1171},
ADDRESS = {Singapore},
}
Endnote
%0 Conference Proceedings
%A Ghosh, Shrestha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T CoQEx: Entity Counts Explained :
%G eng
%U http://hdl.handle.net/21.11116/0000-000B-F41F-0
%R 10.1145/3539597.3573021
%D 2023
%B 16th ACM International Conference on Web Search and Data Mining
%Z date of event: 2023-02-27 - 2023-03-03
%C Singapore
%B WSDM '23
%E Chua, Tat-Seng; Lauw, Hady; Si, Luo; Terzi, Evimaria; Tsaparas, Panayiotis
%P 1168 - 1171
%I ACM
%@ 978-1-4503-9407-9
[14]
S. Ghosh, S. Razniewski, and G. Weikum, “Class Cardinality Comparison as a Fermi Problem,” in The ACM Web Conference 2023 (WWW 2023), Austin, TX, USA, 2023.
Abstract
Questions on class cardinality comparisons are quite tricky to answer and<br>come with its own challenges. They require some kind of reasoning since web<br>documents and knowledge bases, indispensable sources of information, rarely<br>store direct answers to questions, such as, ``Are there more astronauts or<br>Physics Nobel Laureates?'' We tackle questions on class cardinality comparison<br>by tapping into three sources for absolute cardinalities as well as the<br>cardinalities of orthogonal subgroups of the classes. We propose novel<br>techniques for aggregating signals with partial coverage for more reliable<br>estimates and evaluate them on a dataset of 4005 class pairs, achieving an<br>accuracy of 83.7%.<br>
Export
BibTeX
@inproceedings{Ghosh2303.04532,
TITLE = {Class Cardinality Comparison as a {F}ermi Problem},
AUTHOR = {Ghosh, Shrestha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-9419-2},
DOI = {10.1145/3543873.3587334},
PUBLISHER = {ACM},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Questions on class cardinality comparisons are quite tricky to answer and<br>come with its own challenges. They require some kind of reasoning since web<br>documents and knowledge bases, indispensable sources of information, rarely<br>store direct answers to questions, such as, ``Are there more astronauts or<br>Physics Nobel Laureates?'' We tackle questions on class cardinality comparison<br>by tapping into three sources for absolute cardinalities as well as the<br>cardinalities of orthogonal subgroups of the classes. We propose novel<br>techniques for aggregating signals with partial coverage for more reliable<br>estimates and evaluate them on a dataset of 4005 class pairs, achieving an<br>accuracy of 83.7%.<br>},
BOOKTITLE = {The ACM Web Conference 2023 (WWW 2023)},
EDITOR = {Ding, YIng and Tang, Jie and Sequeda, Juan and Aroyo, Lora and Castillo, Carlos and Houben, Geert-Jan},
PAGES = {148--151},
ADDRESS = {Austin, TX, USA},
}
Endnote
%0 Conference Proceedings
%A Ghosh, Shrestha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Class Cardinality Comparison as a Fermi Problem :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-BF05-8
%R 10.1145/3543873.3587334
%D 2023
%B ACM Web Conference
%Z date of event: 2023-04-30 - 2023-05-04
%C Austin, TX, USA
%X Questions on class cardinality comparisons are quite tricky to answer and<br>come with its own challenges. They require some kind of reasoning since web<br>documents and knowledge bases, indispensable sources of information, rarely<br>store direct answers to questions, such as, ``Are there more astronauts or<br>Physics Nobel Laureates?'' We tackle questions on class cardinality comparison<br>by tapping into three sources for absolute cardinalities as well as the<br>cardinalities of orthogonal subgroups of the classes. We propose novel<br>techniques for aggregating signals with partial coverage for more reliable<br>estimates and evaluate them on a dataset of 4005 class pairs, achieving an<br>accuracy of 83.7%.<br>
%K Computer Science, Information Retrieval, cs.IR,Computer Science, Artificial Intelligence, cs.AI
%B The ACM Web Conference 2023
%E Ding, YIng; Tang, Jie; Sequeda, Juan; Aroyo, Lora; Castillo, Carlos; Houben, Geert-Jan
%P 148 - 151
%I ACM
%@ 978-1-4503-9419-2
[15]
J. Kalofolias, “Subgroup Discovery for Structured Target Concepts,” Universität des Saarlandes, Saarbrücken, 2023.
Export
BibTeX
@phdthesis{Kalofolias_PhD2023,
TITLE = {Subgroup Discovery for Structured Target Concepts},
AUTHOR = {Kalofolias, Janis},
LANGUAGE = {eng},
URL = {urn:nbn:de:bsz:291--ds-393710},
DOI = {10.22028/D291-39371},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
}
Endnote
%0 Thesis
%A Kalofolias, Janis
%Y Vreeken, Jilles
%A referee: Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Subgroup Discovery for Structured Target Concepts :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-FE96-D
%R 10.22028/D291-39371
%U urn:nbn:de:bsz:291--ds-393710
%F OTHER: hdl:20.500.11880/35569
%I Universität des Saarlandes
%C Saarbrücken
%D 2023
%P xi, 215 p.
%V phd
%9 phd
%U https://scidok.sulb.uni-saarland.de/handle/20.500.11880/35569
[16]
T.-P. Nguyen, S. Razniewski, A. Varde, and G. Weikum, “Extracting Cultural Commonsense Knowledge at Scale,” in The ACM Web Conference 2023 (WWW 2023), Austin, TX, USA, 2023.
Export
BibTeX
@inproceedings{Nguyen_WWW23,
TITLE = {Extracting Cultural Commonsense Knowledge at Scale},
AUTHOR = {Nguyen, Tuan-Phong and Razniewski, Simon and Varde, Aparna and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-9416-1},
DOI = {10.1145/3543507.3583535},
PUBLISHER = {ACM},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {The ACM Web Conference 2023 (WWW 2023)},
EDITOR = {Ding, YIng and Tang, Jie and Sequeda, Juan and Aroyo, Lora and Castillo, Carlos and Houben, Geert-Jan},
PAGES = {1907--1917},
ADDRESS = {Austin, TX, USA},
}
Endnote
%0 Conference Proceedings
%A Nguyen, Tuan-Phong
%A Razniewski, Simon
%A Varde, Aparna
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Extracting Cultural Commonsense Knowledge at Scale :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-9FF7-B
%R 10.1145/3543507.3583535
%D 2023
%B ACM Web Conference
%Z date of event: 2023-04-30 - 2023-05-04
%C Austin, TX, USA
%B The ACM Web
Conference 2023
%E Ding, YIng; Tang, Jie; Sequeda, Juan; Aroyo, Lora; Castillo, Carlos; Houben, Geert-Jan
%P 1907 - 1917
%I ACM
%@ 978-1-4503-9416-1
[17]
T.-P. Nguyen, S. Razniewski, J. Romero, and G. Weikum, “Refined Commonsense Knowledge from Large-Scale Web Contents,” IEEE Transactions on Knowledge and Data Engineering, vol. 35, no. 8, 2023.
Export
BibTeX
@article{Nguyen_TKDE_2022,
TITLE = {Refined Commonsense Knowledge from Large-Scale Web Contents},
AUTHOR = {Nguyen, Tuan-Phong and Razniewski, Simon and Romero, Julien and Weikum, Gerhard},
LANGUAGE = {eng},
ISSN = {1558-2191},
DOI = {10.1109/TKDE.2022.3206505},
PUBLISHER = {IEEE},
ADDRESS = {Piscataway, NJ},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
DATE = {2023},
JOURNAL = {IEEE Transactions on Knowledge and Data Engineering},
VOLUME = {35},
NUMBER = {8},
PAGES = {8431--8447},
}
Endnote
%0 Journal Article
%A Nguyen, Tuan-Phong
%A Razniewski, Simon
%A Romero, Julien
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Refined Commonsense Knowledge from Large-Scale Web Contents :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-9FEE-6
%R 10.1109/TKDE.2022.3206505
%7 2022
%D 2023
%J IEEE Transactions on Knowledge and Data Engineering
%V 35
%N 8
%& 8431
%P 8431 - 8447
%I IEEE
%C Piscataway, NJ
%@ false
[18]
J. Z. Pan, S. Razniewski, J.-C. Kalo, S. Singhania, J. Chen, S. Dietze, H. Jabeen, J. Omeliyanenko, W. Zhang, M. Lissandrini, R. Biswas, G. de Melo, A. Bonifati, E. Vakaj, M. Dragoni, and D. Graux, “Large Language Models and Knowledge Graphs: Opportunities and Challenges,” 2023. [Online]. Available: https://arxiv.org/abs/2308.06374. (arXiv: 2308.06374)
Abstract
Large Language Models (LLMs) have taken Knowledge Representation -- and the<br>world -- by storm. This inflection point marks a shift from explicit knowledge<br>representation to a renewed focus on the hybrid representation of both explicit<br>knowledge and parametric knowledge. In this position paper, we will discuss<br>some of the common debate points within the community on LLMs (parametric<br>knowledge) and Knowledge Graphs (explicit knowledge) and speculate on<br>opportunities and visions that the renewed focus brings, as well as related<br>research topics and challenges.<br>
Export
BibTeX
@online{Pan2308.06374,
TITLE = {Large Language Models and Knowledge Graphs: Opportunities and Challenges},
AUTHOR = {Pan, Jeff Z. and Razniewski, Simon and Kalo, Jan-Christoph and Singhania, Sneha and Chen, Jiaoyan and Dietze, Stefan and Jabeen, Hajira and Omeliyanenko, Janna and Zhang, Wen and Lissandrini, Matteo and Biswas, Russa and de Melo, Gerard and Bonifati, Angela and Vakaj, Edlira and Dragoni, Mauro and Graux, Damien},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2308.06374},
EPRINT = {2308.06374},
EPRINTTYPE = {arXiv},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Large Language Models (LLMs) have taken Knowledge Representation -- and the<br>world -- by storm. This inflection point marks a shift from explicit knowledge<br>representation to a renewed focus on the hybrid representation of both explicit<br>knowledge and parametric knowledge. In this position paper, we will discuss<br>some of the common debate points within the community on LLMs (parametric<br>knowledge) and Knowledge Graphs (explicit knowledge) and speculate on<br>opportunities and visions that the renewed focus brings, as well as related<br>research topics and challenges.<br>},
}
Endnote
%0 Report
%A Pan, Jeff Z.
%A Razniewski, Simon
%A Kalo, Jan-Christoph
%A Singhania, Sneha
%A Chen, Jiaoyan
%A Dietze, Stefan
%A Jabeen, Hajira
%A Omeliyanenko, Janna
%A Zhang, Wen
%A Lissandrini, Matteo
%A Biswas, Russa
%A de Melo, Gerard
%A Bonifati, Angela
%A Vakaj, Edlira
%A Dragoni, Mauro
%A Graux, Damien
%+ External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
%T Large Language Models and Knowledge Graphs: Opportunities and Challenges :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-A223-4
%U https://arxiv.org/abs/2308.06374
%D 2023
%X Large Language Models (LLMs) have taken Knowledge Representation -- and the<br>world -- by storm. This inflection point marks a shift from explicit knowledge<br>representation to a renewed focus on the hybrid representation of both explicit<br>knowledge and parametric knowledge. In this position paper, we will discuss<br>some of the common debate points within the community on LLMs (parametric<br>knowledge) and Knowledge Graphs (explicit knowledge) and speculate on<br>opportunities and visions that the renewed focus brings, as well as related<br>research topics and challenges.<br>
%K Computer Science, Artificial Intelligence, cs.AI,Computer Science, Computation and Language, cs.CL
[19]
S. Razniewski, H. Arnaout, S. Ghosh, and F. Suchanek, “Completeness, Recall, and Negation in Open-World Knowledge Bases: A Survey,” 2023. [Online]. Available: https://arxiv.org/abs/2305.05403. (arXiv: 2305.05403)
Abstract
General-purpose knowledge bases (KBs) are a cornerstone of knowledge-centric<br>AI. Many of them are constructed pragmatically from Web sources, and are thus<br>far from complete. This poses challenges for the consumption as well as the<br>curation of their content. While several surveys target the problem of<br>completing incomplete KBs, the first problem is arguably to know whether and<br>where the KB is incomplete in the first place, and to which degree.<br> In this survey we discuss how knowledge about completeness, recall, and<br>negation in KBs can be expressed, extracted, and inferred. We cover (i) the<br>logical foundations of knowledge representation and querying under partial<br>closed-world semantics; (ii) the estimation of this information via statistical<br>patterns; (iii) the extraction of information about recall from KBs and text;<br>(iv) the identification of interesting negative statements; and (v) relaxed<br>notions of relative recall.<br> This survey is targeted at two types of audiences: (1) practitioners who are<br>interested in tracking KB quality, focusing extraction efforts, and building<br>quality-aware downstream applications; and (2) data management, knowledge base<br>and semantic web researchers who wish to understand the state of the art of<br>knowledge bases beyond the open-world assumption. Consequently, our survey<br>presents both fundamental methodologies and their working, and gives<br>practice-oriented recommendations on how to choose between different approaches<br>for a problem at hand.<br>
Export
BibTeX
@online{Razniewski_2305.05403,
TITLE = {Completeness, Recall, and Negation in Open-World Knowledge Bases: A Survey},
AUTHOR = {Razniewski, Simon and Arnaout, Hiba and Ghosh, Shrestha and Suchanek, Fabian},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2305.05403},
EPRINT = {2305.05403},
EPRINTTYPE = {arXiv},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {General-purpose knowledge bases (KBs) are a cornerstone of knowledge-centric<br>AI. Many of them are constructed pragmatically from Web sources, and are thus<br>far from complete. This poses challenges for the consumption as well as the<br>curation of their content. While several surveys target the problem of<br>completing incomplete KBs, the first problem is arguably to know whether and<br>where the KB is incomplete in the first place, and to which degree.<br> In this survey we discuss how knowledge about completeness, recall, and<br>negation in KBs can be expressed, extracted, and inferred. We cover (i) the<br>logical foundations of knowledge representation and querying under partial<br>closed-world semantics; (ii) the estimation of this information via statistical<br>patterns; (iii) the extraction of information about recall from KBs and text;<br>(iv) the identification of interesting negative statements; and (v) relaxed<br>notions of relative recall.<br> This survey is targeted at two types of audiences: (1) practitioners who are<br>interested in tracking KB quality, focusing extraction efforts, and building<br>quality-aware downstream applications; and (2) data management, knowledge base<br>and semantic web researchers who wish to understand the state of the art of<br>knowledge bases beyond the open-world assumption. Consequently, our survey<br>presents both fundamental methodologies and their working, and gives<br>practice-oriented recommendations on how to choose between different approaches<br>for a problem at hand.<br>},
}
Endnote
%0 Report
%A Razniewski, Simon
%A Arnaout, Hiba
%A Ghosh, Shrestha
%A Suchanek, Fabian
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Completeness, Recall, and Negation in Open-World Knowledge Bases: A
Survey :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-1C00-4
%U https://arxiv.org/abs/2305.05403
%D 2023
%X General-purpose knowledge bases (KBs) are a cornerstone of knowledge-centric<br>AI. Many of them are constructed pragmatically from Web sources, and are thus<br>far from complete. This poses challenges for the consumption as well as the<br>curation of their content. While several surveys target the problem of<br>completing incomplete KBs, the first problem is arguably to know whether and<br>where the KB is incomplete in the first place, and to which degree.<br> In this survey we discuss how knowledge about completeness, recall, and<br>negation in KBs can be expressed, extracted, and inferred. We cover (i) the<br>logical foundations of knowledge representation and querying under partial<br>closed-world semantics; (ii) the estimation of this information via statistical<br>patterns; (iii) the extraction of information about recall from KBs and text;<br>(iv) the identification of interesting negative statements; and (v) relaxed<br>notions of relative recall.<br> This survey is targeted at two types of audiences: (1) practitioners who are<br>interested in tracking KB quality, focusing extraction efforts, and building<br>quality-aware downstream applications; and (2) data management, knowledge base<br>and semantic web researchers who wish to understand the state of the art of<br>knowledge bases beyond the open-world assumption. Consequently, our survey<br>presents both fundamental methodologies and their working, and gives<br>practice-oriented recommendations on how to choose between different approaches<br>for a problem at hand.<br>
%K Computer Science, Artificial Intelligence, cs.AI,Computer Science, Computation and Language, cs.CL,Computer Science, Databases, cs.DB,Computer Science, Digital Libraries, cs.DL
[20]
S. Singhania, S. Razniewski, and G. Weikum, “Extracting Multi-valued Relations from Language Models,” 2023. [Online]. Available: https://arxiv.org/abs/2307.03122v2. (arXiv: 2307.03122)
Abstract
The widespread usage of latent language representations via pre-trained<br>language models (LMs) suggests that they are a promising source of structured<br>knowledge. However, existing methods focus only on a single object per<br>subject-relation pair, even though often multiple objects are correct. To<br>overcome this limitation, we analyze these representations for their potential<br>to yield materialized multi-object relational knowledge. We formulate the<br>problem as a rank-then-select task. For ranking candidate objects, we evaluate<br>existing prompting techniques and propose new ones incorporating domain<br>knowledge. Among the selection methods, we find that choosing objects with a<br>likelihood above a learned relation-specific threshold gives a 49.5% F1 score.<br>Our results highlight the difficulty of employing LMs for the multi-valued<br>slot-filling task and pave the way for further research on extracting<br>relational knowledge from latent language representations.<br>
Export
BibTeX
@online{Singhania2307.03122,
TITLE = {Extracting Multi-valued Relations from Language Models},
AUTHOR = {Singhania, Sneha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2307.03122v2},
EPRINT = {2307.03122},
EPRINTTYPE = {arXiv},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {The widespread usage of latent language representations via pre-trained<br>language models (LMs) suggests that they are a promising source of structured<br>knowledge. However, existing methods focus only on a single object per<br>subject-relation pair, even though often multiple objects are correct. To<br>overcome this limitation, we analyze these representations for their potential<br>to yield materialized multi-object relational knowledge. We formulate the<br>problem as a rank-then-select task. For ranking candidate objects, we evaluate<br>existing prompting techniques and propose new ones incorporating domain<br>knowledge. Among the selection methods, we find that choosing objects with a<br>likelihood above a learned relation-specific threshold gives a 49.5% F1 score.<br>Our results highlight the difficulty of employing LMs for the multi-valued<br>slot-filling task and pave the way for further research on extracting<br>relational knowledge from latent language representations.<br>},
}
Endnote
%0 Report
%A Singhania, Sneha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Extracting Multi-valued Relations from Language Models :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-938B-0
%U https://arxiv.org/abs/2307.03122v2
%D 2023
%X The widespread usage of latent language representations via pre-trained<br>language models (LMs) suggests that they are a promising source of structured<br>knowledge. However, existing methods focus only on a single object per<br>subject-relation pair, even though often multiple objects are correct. To<br>overcome this limitation, we analyze these representations for their potential<br>to yield materialized multi-object relational knowledge. We formulate the<br>problem as a rank-then-select task. For ranking candidate objects, we evaluate<br>existing prompting techniques and propose new ones incorporating domain<br>knowledge. Among the selection methods, we find that choosing objects with a<br>likelihood above a learned relation-specific threshold gives a 49.5% F1 score.<br>Our results highlight the difficulty of employing LMs for the multi-valued<br>slot-filling task and pave the way for further research on extracting<br>relational knowledge from latent language representations.<br>
%K Computer Science, Computation and Language, cs.CL
[21]
G. H. Torbati, G. Weikum, and A. Yates, “Search-based Recommendation : The Case for Difficult Predictions,” in The ACM Web Conference 2023 (WWW 2023), Austin, TX, USA, 2023.
Abstract
Questions on class cardinality comparisons are quite tricky to answer and<br>come with its own challenges. They require some kind of reasoning since web<br>documents and knowledge bases, indispensable sources of information, rarely<br>store direct answers to questions, such as, ``Are there more astronauts or<br>Physics Nobel Laureates?'' We tackle questions on class cardinality comparison<br>by tapping into three sources for absolute cardinalities as well as the<br>cardinalities of orthogonal subgroups of the classes. We propose novel<br>techniques for aggregating signals with partial coverage for more reliable<br>estimates and evaluate them on a dataset of 4005 class pairs, achieving an<br>accuracy of 83.7%.<br>
Export
BibTeX
@inproceedings{Torbati_WWW23,
TITLE = {Search-based Recommendation : {T}he Case for Difficult Predictions},
AUTHOR = {Torbati, Ghazaleh Haratinezhad and Weikum, Gerhard and Yates, Andrew},
LANGUAGE = {eng},
ISBN = {978-1-4503-9419-2},
DOI = {10.1145/3543873.3587374},
PUBLISHER = {ACM},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Questions on class cardinality comparisons are quite tricky to answer and<br>come with its own challenges. They require some kind of reasoning since web<br>documents and knowledge bases, indispensable sources of information, rarely<br>store direct answers to questions, such as, ``Are there more astronauts or<br>Physics Nobel Laureates?'' We tackle questions on class cardinality comparison<br>by tapping into three sources for absolute cardinalities as well as the<br>cardinalities of orthogonal subgroups of the classes. We propose novel<br>techniques for aggregating signals with partial coverage for more reliable<br>estimates and evaluate them on a dataset of 4005 class pairs, achieving an<br>accuracy of 83.7%.<br>},
BOOKTITLE = {The ACM Web Conference 2023 (WWW 2023)},
EDITOR = {Ding, Ying and Tang, Jie and Sequeda, Juan and Aroyo, Lora and Castillo, Carlos and Houben, Geert-Jan},
PAGES = {318--321},
ADDRESS = {Austin, TX, USA},
}
Endnote
%0 Conference Proceedings
%A Torbati, Ghazaleh Haratinezhad
%A Weikum, Gerhard
%A Yates, Andrew
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Search-based Recommendation : The Case for Difficult Predictions :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-DC45-F
%R 10.1145/3543873.3587374
%D 2023
%B ACM Web Conference
%Z date of event: 2023-04-30 - 2023-05-04
%C Austin, TX, USA
%X Questions on class cardinality comparisons are quite tricky to answer and<br>come with its own challenges. They require some kind of reasoning since web<br>documents and knowledge bases, indispensable sources of information, rarely<br>store direct answers to questions, such as, ``Are there more astronauts or<br>Physics Nobel Laureates?'' We tackle questions on class cardinality comparison<br>by tapping into three sources for absolute cardinalities as well as the<br>cardinalities of orthogonal subgroups of the classes. We propose novel<br>techniques for aggregating signals with partial coverage for more reliable<br>estimates and evaluate them on a dataset of 4005 class pairs, achieving an<br>accuracy of 83.7%.<br>
%K Computer Science, Information Retrieval, cs.IR,Computer Science, Artificial Intelligence, cs.AI
%B The ACM Web Conference 2023
%E Ding, Ying; Tang, Jie; Sequeda, Juan; Aroyo, Lora; Castillo, Carlos; Houben, Geert-Jan
%P 318 - 321
%I ACM
%@ 978-1-4503-9419-2
[22]
G. H. Torbati, A. Tigunova, and G. Weikum, “Unveiling Challenging Cases in Text-based Recommender Systems,” in Perspectives on the Evaluation of Recommender Systems 2023, Singapore, Singapore, 2023.
Export
BibTeX
@inproceedings{Torbati_PERSPECTIVES23,
TITLE = {Unveiling Challenging Cases in Text-based Recommender Systems},
AUTHOR = {Torbati, Ghazaleh Haratinezhad and Tigunova, Anna and Weikum, Gerhard},
LANGUAGE = {eng},
ISSN = {1613-0073},
URL = {https://ceur-ws.org/Vol-3476/paper5.pdf; urn:nbn:de:0074-3476-4},
PUBLISHER = {CEUR-WS.org},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {Perspectives on the Evaluation of Recommender Systems 2023},
EDITOR = {Said, Alain and Zangerle, Eva and Bauer, Christine},
SERIES = {CEUR Workshop Proceedings},
VOLUME = {3476},
ADDRESS = {Singapore, Singapore},
}
Endnote
%0 Conference Proceedings
%A Torbati, Ghazaleh Haratinezhad
%A Tigunova, Anna
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Unveiling Challenging Cases in Text-based Recommender Systems :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-AEBA-E
%U https://ceur-ws.org/Vol-3476/paper5.pdf
%D 2023
%B 3rd Workshop Perspectives on the Evaluation of Recommender Systems
%Z date of event: 2023-09-19 - 2023-09-19
%C Singapore, Singapore
%B Perspectives on the Evaluation of Recommender Systems 2023
%E Said, Alain; Zangerle, Eva; Bauer, Christine
%I CEUR-WS.org
%B CEUR Workshop Proceedings
%N 3476
%@ false
[23]
A. Varde, D. Karthikeyan, and W. Wang, “Facilitating COVID Recognition from X-Rays with Computer Vision Models and Transfer Learning,” Multimedia Tools and Applications, 2023.
Export
BibTeX
@article{Varde23,
TITLE = {Facilitating {COVID} Recognition from {X}-Rays with Computer Vision Models and Transfer Learning},
AUTHOR = {Varde, Aparna and Karthikeyan, Divydharshini and Wang, Weitian},
LANGUAGE = {eng},
ISSN = {1380-7501},
DOI = {10.1007/s11042-023-15744-9},
PUBLISHER = {Springer Nature},
ADDRESS = {New York, NY},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
JOURNAL = {Multimedia Tools and Applications},
}
Endnote
%0 Journal Article
%A Varde, Aparna
%A Karthikeyan, Divydharshini
%A Wang, Weitian
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
%T Facilitating COVID Recognition from X-Rays with Computer Vision Models and Transfer Learning :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-578B-5
%R 10.1007/s11042-023-15744-9
%7 2023
%D 2023
%J Multimedia Tools and Applications
%I Springer Nature
%C New York, NY
%@ false
[24]
B. Veseli, S. Singhania, S. Razniewski, and G. Weikum, “Evaluating Language Models for Knowledge Base Completion,” in The Semantic Web (ESWC 2023), Hersonissos, Greece. (Accepted/in press)
Export
BibTeX
@inproceedings{Veseli_ESWC23,
TITLE = {Evaluating Language Models for Knowledge Base Completion},
AUTHOR = {Veseli, Blerta and Singhania, Sneha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
PUBLISHER = {Springer},
YEAR = {2023},
PUBLREMARK = {Accepted},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {The Semantic Web (ESWC 2023)},
SERIES = {Lecture Notes in Computer Science},
ADDRESS = {Hersonissos, Greece},
}
Endnote
%0 Conference Proceedings
%A Veseli, Blerta
%A Singhania, Sneha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Evaluating Language Models for Knowledge Base Completion :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-DC39-D
%D 2023
%B The European Semantic Web Conference
%Z date of event: 2023-05-28 - 2023-06-01
%C Hersonissos, Greece
%B The Semantic Web
%I Springer
%B Lecture Notes in Computer Science
[25]
B. Veseli, S. Singhania, S. Razniewski, and G. Weikum, “Evaluating Language Models for Knowledge Base Completion,” 2023. [Online]. Available: https://arxiv.org/abs/2303.11082. (arXiv: 2303.11082)
Abstract
Structured knowledge bases (KBs) are a foundation of many intelligent<br>applications, yet are notoriously incomplete. Language models (LMs) have<br>recently been proposed for unsupervised knowledge base completion (KBC), yet,<br>despite encouraging initial results, questions regarding their suitability<br>remain open. Existing evaluations often fall short because they only evaluate<br>on popular subjects, or sample already existing facts from KBs. In this work,<br>we introduce a novel, more challenging benchmark dataset, and a methodology<br>tailored for a realistic assessment of the KBC potential of LMs. For automated<br>assessment, we curate a dataset called WD-KNOWN, which provides an unbiased<br>random sample of Wikidata, containing over 3.9 million facts. In a second step,<br>we perform a human evaluation on predictions that are not yet in the KB, as<br>only this provides real insights into the added value over existing KBs. Our<br>key finding is that biases in dataset conception of previous benchmarks lead to<br>a systematic overestimate of LM performance for KBC. However, our results also<br>reveal strong areas of LMs. We could, for example, perform a significant<br>completion of Wikidata on the relations nativeLanguage, by a factor of ~21<br>(from 260k to 5.8M) at 82% precision, usedLanguage, by a factor of ~2.1 (from<br>2.1M to 6.6M) at 82% precision, and citizenOf by a factor of ~0.3 (from 4.2M to<br>5.3M) at 90% precision. Moreover, we find that LMs possess surprisingly strong<br>generalization capabilities: even on relations where most facts were not<br>directly observed in LM training, prediction quality can be high.<br>
Export
BibTeX
@online{,
TITLE = {Evaluating Language Models for Knowledge Base Completion},
AUTHOR = {Veseli, Blerta and Singhania, Sneha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2303.11082},
EPRINT = {2303.11082},
EPRINTTYPE = {arXiv},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Structured knowledge bases (KBs) are a foundation of many intelligent<br>applications, yet are notoriously incomplete. Language models (LMs) have<br>recently been proposed for unsupervised knowledge base completion (KBC), yet,<br>despite encouraging initial results, questions regarding their suitability<br>remain open. Existing evaluations often fall short because they only evaluate<br>on popular subjects, or sample already existing facts from KBs. In this work,<br>we introduce a novel, more challenging benchmark dataset, and a methodology<br>tailored for a realistic assessment of the KBC potential of LMs. For automated<br>assessment, we curate a dataset called WD-KNOWN, which provides an unbiased<br>random sample of Wikidata, containing over 3.9 million facts. In a second step,<br>we perform a human evaluation on predictions that are not yet in the KB, as<br>only this provides real insights into the added value over existing KBs. Our<br>key finding is that biases in dataset conception of previous benchmarks lead to<br>a systematic overestimate of LM performance for KBC. However, our results also<br>reveal strong areas of LMs. We could, for example, perform a significant<br>completion of Wikidata on the relations nativeLanguage, by a factor of ~21<br>(from 260k to 5.8M) at 82% precision, usedLanguage, by a factor of ~2.1 (from<br>2.1M to 6.6M) at 82% precision, and citizenOf by a factor of ~0.3 (from 4.2M to<br>5.3M) at 90% precision. Moreover, we find that LMs possess surprisingly strong<br>generalization capabilities: even on relations where most facts were not<br>directly observed in LM training, prediction quality can be high.<br>},
}
Endnote
%0 Report
%A Veseli, Blerta
%A Singhania, Sneha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Evaluating Language Models for Knowledge Base Completion :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-D3CD-F
%U https://arxiv.org/abs/2303.11082
%D 2023
%X Structured knowledge bases (KBs) are a foundation of many intelligent<br>applications, yet are notoriously incomplete. Language models (LMs) have<br>recently been proposed for unsupervised knowledge base completion (KBC), yet,<br>despite encouraging initial results, questions regarding their suitability<br>remain open. Existing evaluations often fall short because they only evaluate<br>on popular subjects, or sample already existing facts from KBs. In this work,<br>we introduce a novel, more challenging benchmark dataset, and a methodology<br>tailored for a realistic assessment of the KBC potential of LMs. For automated<br>assessment, we curate a dataset called WD-KNOWN, which provides an unbiased<br>random sample of Wikidata, containing over 3.9 million facts. In a second step,<br>we perform a human evaluation on predictions that are not yet in the KB, as<br>only this provides real insights into the added value over existing KBs. Our<br>key finding is that biases in dataset conception of previous benchmarks lead to<br>a systematic overestimate of LM performance for KBC. However, our results also<br>reveal strong areas of LMs. We could, for example, perform a significant<br>completion of Wikidata on the relations nativeLanguage, by a factor of ~21<br>(from 260k to 5.8M) at 82% precision, usedLanguage, by a factor of ~2.1 (from<br>2.1M to 6.6M) at 82% precision, and citizenOf by a factor of ~0.3 (from 4.2M to<br>5.3M) at 90% precision. Moreover, we find that LMs possess surprisingly strong<br>generalization capabilities: even on relations where most facts were not<br>directly observed in LM training, prediction quality can be high.<br>
%K Computer Science, Computation and Language, cs.CL,Computer Science, Artificial Intelligence, cs.AI
%U https://github.com/bveseli/LMsForKBC
[26]
M. Zhang, P. Mundra, C. Chikweze, F. Nargesian, and G. Weikum, “Approximate Query Answering over Open Data,” in HILDA 2023, Workshop on Human-In-the-Loop Data Analytics, Seattle, WA, USA, 2023.
Export
BibTeX
@inproceedings{Zhang_HILDA23,
TITLE = {Approximate Query Answering over Open Data},
AUTHOR = {Zhang, Mengqi and Mundra, Pranay and Chikweze, Chukwubuikem and Nargesian, Fatemeh and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {979-8-4007-0216-7},
DOI = {10.1145/3597465.3605227},
PUBLISHER = {ACM},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {HILDA 2023, Workshop on Human-In-the-Loop Data Analytics},
PAGES = {1--3},
EID = {11},
ADDRESS = {Seattle, WA, USA},
}
Endnote
%0 Conference Proceedings
%A Zhang, Mengqi
%A Mundra, Pranay
%A Chikweze, Chukwubuikem
%A Nargesian, Fatemeh
%A Weikum, Gerhard
%+ External Organizations
External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Approximate Query Answering over Open Data :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-941D-C
%R 10.1145/3597465.3605227
%D 2023
%B Workshop on Human-In-the-Loop Data Analytics
%Z date of event: 2023-06-18 - 2023-06-18
%C Seattle, WA, USA
%B HILDA 2023
%P 1 - 3
%Z sequence number: 11
%I ACM
%@ 979-8-4007-0216-7
2022
[27]
H. Arnaout, T.-K. Tran, D. Stepanova, M. H. Gad-Elrab, S. Razniewski, and G. Weikum, “Utilizing Language Model Probes for Knowledge Graph Repair,” in Wiki Workshop 2022, Virtual Event, 2022.
Export
BibTeX
@inproceedings{Arnaout_Wiki2022,
TITLE = {Utilizing Language Model Probes for Knowledge Graph Repair},
AUTHOR = {Arnaout, Hiba and Tran, Trung-Kien and Stepanova, Daria and Gad-Elrab, Mohamed Hassan and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://wikiworkshop.org/2022/},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {Wiki Workshop 2022},
ADDRESS = {Virtual Event},
}
Endnote
%0 Conference Proceedings
%A Arnaout, Hiba
%A Tran, Trung-Kien
%A Stepanova, Daria
%A Gad-Elrab, Mohamed Hassan
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Utilizing Language Model Probes for Knowledge Graph Repair :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-63F4-3
%U https://wikiworkshop.org/2022/
%D 2022
%B Wiki Workshop 2022
%Z date of event: 2022-04-25 - 2022-04-25
%C Virtual Event
%B Wiki Workshop 2022
[28]
H. Arnaout, S. Razniewski, G. Weikum, and J. Z. Pan, “UnCommonSense: Informative Negative Knowledge about Everyday Concepts,” in CIKM ’22, 31st ACM International Conference on Information and Knowledge Management, Atlanta GA USA, 2022.
Abstract
Commonsense knowledge about everyday concepts is an important asset for AI<br>applications, such as question answering and chatbots. Recently, we have seen<br>an increasing interest in the construction of structured commonsense knowledge<br>bases (CSKBs). An important part of human commonsense is about properties that<br>do not apply to concepts, yet existing CSKBs only store positive statements.<br>Moreover, since CSKBs operate under the open-world assumption, absent<br>statements are considered to have unknown truth rather than being invalid. This<br>paper presents the UNCOMMONSENSE framework for materializing informative<br>negative commonsense statements. Given a target concept, comparable concepts<br>are identified in the CSKB, for which a local closed-world assumption is<br>postulated. This way, positive statements about comparable concepts that are<br>absent for the target concept become seeds for negative statement candidates.<br>The large set of candidates is then scrutinized, pruned and ranked by<br>informativeness. Intrinsic and extrinsic evaluations show that our method<br>significantly outperforms the state-of-the-art. A large dataset of informative<br>negations is released as a resource for future research.<br>
Export
BibTeX
@inproceedings{ArnaoutCIKM2022,
TITLE = {{UnCommonSense}: Informative Negative Knowledge about Everyday Concepts},
AUTHOR = {Arnaout, Hiba and Razniewski, Simon and Weikum, Gerhard and Pan, Jeff Z.},
LANGUAGE = {eng},
ISBN = {978-1-4503-9236-5},
DOI = {10.1145/3511808.3557484},
PUBLISHER = {ACM},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Commonsense knowledge about everyday concepts is an important asset for AI<br>applications, such as question answering and chatbots. Recently, we have seen<br>an increasing interest in the construction of structured commonsense knowledge<br>bases (CSKBs). An important part of human commonsense is about properties that<br>do not apply to concepts, yet existing CSKBs only store positive statements.<br>Moreover, since CSKBs operate under the open-world assumption, absent<br>statements are considered to have unknown truth rather than being invalid. This<br>paper presents the UNCOMMONSENSE framework for materializing informative<br>negative commonsense statements. Given a target concept, comparable concepts<br>are identified in the CSKB, for which a local closed-world assumption is<br>postulated. This way, positive statements about comparable concepts that are<br>absent for the target concept become seeds for negative statement candidates.<br>The large set of candidates is then scrutinized, pruned and ranked by<br>informativeness. Intrinsic and extrinsic evaluations show that our method<br>significantly outperforms the state-of-the-art. A large dataset of informative<br>negations is released as a resource for future research.<br>},
BOOKTITLE = {CIKM '22, 31st ACM International Conference on Information and Knowledge Management},
EDITOR = {Al Hasan, Mohammad and Xiong, Li},
PAGES = {37--46},
ADDRESS = {Atlanta GA USA},
}
Endnote
%0 Conference Proceedings
%A Arnaout, Hiba
%A Razniewski, Simon
%A Weikum, Gerhard
%A Pan, Jeff Z.
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T UnCommonSense: Informative Negative Knowledge about Everyday Concepts :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-F224-C
%R 10.1145/3511808.3557484
%D 2022
%B 31st ACM International Conference on Information and Knowledge Management
%Z date of event: 2022-10-17 - 2022-10-21
%C Atlanta GA USA
%X Commonsense knowledge about everyday concepts is an important asset for AI<br>applications, such as question answering and chatbots. Recently, we have seen<br>an increasing interest in the construction of structured commonsense knowledge<br>bases (CSKBs). An important part of human commonsense is about properties that<br>do not apply to concepts, yet existing CSKBs only store positive statements.<br>Moreover, since CSKBs operate under the open-world assumption, absent<br>statements are considered to have unknown truth rather than being invalid. This<br>paper presents the UNCOMMONSENSE framework for materializing informative<br>negative commonsense statements. Given a target concept, comparable concepts<br>are identified in the CSKB, for which a local closed-world assumption is<br>postulated. This way, positive statements about comparable concepts that are<br>absent for the target concept become seeds for negative statement candidates.<br>The large set of candidates is then scrutinized, pruned and ranked by<br>informativeness. Intrinsic and extrinsic evaluations show that our method<br>significantly outperforms the state-of-the-art. A large dataset of informative<br>negations is released as a resource for future research.<br>
%K Computer Science, Artificial Intelligence, cs.AI,Computer Science, Databases, cs.DB,Computer Science, Information Retrieval, cs.IR
%B CIKM '22
%E Al Hasan, Mohammad; Xiong, Li
%P 37 - 46
%I ACM
%@ 978-1-4503-9236-5
[29]
H. Arnaout, S. Razniewski, G. Weikum, and J. Z. Pan, “UnCommonSense: Informative Negative Knowledge about Everyday Concepts,” 2022. [Online]. Available: https://arxiv.org/abs/2208.09292. (arXiv: 2208.09292)
Abstract
Commonsense knowledge about everyday concepts is an important asset for AI<br>applications, such as question answering and chatbots. Recently, we have seen<br>an increasing interest in the construction of structured commonsense knowledge<br>bases (CSKBs). An important part of human commonsense is about properties that<br>do not apply to concepts, yet existing CSKBs only store positive statements.<br>Moreover, since CSKBs operate under the open-world assumption, absent<br>statements are considered to have unknown truth rather than being invalid. This<br>paper presents the UNCOMMONSENSE framework for materializing informative<br>negative commonsense statements. Given a target concept, comparable concepts<br>are identified in the CSKB, for which a local closed-world assumption is<br>postulated. This way, positive statements about comparable concepts that are<br>absent for the target concept become seeds for negative statement candidates.<br>The large set of candidates is then scrutinized, pruned and ranked by<br>informativeness. Intrinsic and extrinsic evaluations show that our method<br>significantly outperforms the state-of-the-art. A large dataset of informative<br>negations is released as a resource for future research.<br>
Export
BibTeX
@online{Arnaout2208.09292,
TITLE = {{UnCommonSense}: Informative Negative Knowledge about Everyday Concepts},
AUTHOR = {Arnaout, Hiba and Razniewski, Simon and Weikum, Gerhard and Pan, Jeff Z.},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2208.09292},
EPRINT = {2208.09292},
EPRINTTYPE = {arXiv},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Commonsense knowledge about everyday concepts is an important asset for AI<br>applications, such as question answering and chatbots. Recently, we have seen<br>an increasing interest in the construction of structured commonsense knowledge<br>bases (CSKBs). An important part of human commonsense is about properties that<br>do not apply to concepts, yet existing CSKBs only store positive statements.<br>Moreover, since CSKBs operate under the open-world assumption, absent<br>statements are considered to have unknown truth rather than being invalid. This<br>paper presents the UNCOMMONSENSE framework for materializing informative<br>negative commonsense statements. Given a target concept, comparable concepts<br>are identified in the CSKB, for which a local closed-world assumption is<br>postulated. This way, positive statements about comparable concepts that are<br>absent for the target concept become seeds for negative statement candidates.<br>The large set of candidates is then scrutinized, pruned and ranked by<br>informativeness. Intrinsic and extrinsic evaluations show that our method<br>significantly outperforms the state-of-the-art. A large dataset of informative<br>negations is released as a resource for future research.<br>},
}
Endnote
%0 Report
%A Arnaout, Hiba
%A Razniewski, Simon
%A Weikum, Gerhard
%A Pan, Jeff Z.
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T UnCommonSense: Informative Negative Knowledge about Everyday Concepts :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-1651-0
%U https://arxiv.org/abs/2208.09292
%D 2022
%X Commonsense knowledge about everyday concepts is an important asset for AI<br>applications, such as question answering and chatbots. Recently, we have seen<br>an increasing interest in the construction of structured commonsense knowledge<br>bases (CSKBs). An important part of human commonsense is about properties that<br>do not apply to concepts, yet existing CSKBs only store positive statements.<br>Moreover, since CSKBs operate under the open-world assumption, absent<br>statements are considered to have unknown truth rather than being invalid. This<br>paper presents the UNCOMMONSENSE framework for materializing informative<br>negative commonsense statements. Given a target concept, comparable concepts<br>are identified in the CSKB, for which a local closed-world assumption is<br>postulated. This way, positive statements about comparable concepts that are<br>absent for the target concept become seeds for negative statement candidates.<br>The large set of candidates is then scrutinized, pruned and ranked by<br>informativeness. Intrinsic and extrinsic evaluations show that our method<br>significantly outperforms the state-of-the-art. A large dataset of informative<br>negations is released as a resource for future research.<br>
%K Computer Science, Artificial Intelligence, cs.AI,Computer Science, Information Retrieval, cs.IR
[30]
I. Chernyavsky, A. S. Varde, and S. Razniewski, “CSK-Detector: Commonsense in Object Detection,” in IEEE International Conference on Big Data, Osaka, Japan, 2022.
Export
BibTeX
@inproceedings{ChernyavskyBIGDATA22,
TITLE = {{CSK-Detector}: {C}ommonsense in object detection},
AUTHOR = {Chernyavsky, Irina and Varde, Aparna S. and Razniewski, Simon},
LANGUAGE = {eng},
ISBN = {978-1-6654-8045-1},
DOI = {10.1109/BigData55660.2022.10020915},
PUBLISHER = {IEEE},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {IEEE International Conference on Big Data},
PAGES = {6609--6612},
ADDRESS = {Osaka, Japan},
}
Endnote
%0 Conference Proceedings
%A Chernyavsky, Irina
%A Varde, Aparna S.
%A Razniewski, Simon
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T CSK-Detector: Commonsense in Object Detection :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-B77F-8
%R 10.1109/BigData55660.2022.10020915
%D 2022
%B IEEE International Conference on Big Data
%Z date of event: 2022-12-17 - 2022-12-20
%C Osaka, Japan
%B IEEE International Conference on Big Data
%P 6609 - 6612
%I IEEE
%@ 978-1-6654-8045-1
[31]
P. Christmann, R. Saha Roy, and G. Weikum, “Conversational Question Answering on Heterogeneous Sources,” in SIGIR ’22, 45th International ACM SIGIR Conference on Research and Development in Information Retrieval, Madrid, Spain, 2022.
Export
BibTeX
@inproceedings{Christmann_SIGIR2022,
TITLE = {Conversational Question Answering on Heterogeneous Sources},
AUTHOR = {Christmann, Philipp and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-8732-3},
DOI = {10.1145/3477495.3531815},
PUBLISHER = {ACM},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {SIGIR '22, 45th International ACM SIGIR Conference on Research and Development in Information Retrieval},
EDITOR = {Amigo, Enrique and Castells, Pablo and Gonzalo, Julio and Carterett, Ben and Culpepper, J. Shane and Kazai, Gabriella},
PAGES = {144--154},
ADDRESS = {Madrid, Spain},
}
Endnote
%0 Conference Proceedings
%A Christmann, Philipp
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Conversational Question Answering on Heterogeneous Sources :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-6148-8
%R 10.1145/3477495.3531815
%D 2022
%B 45th International ACM SIGIR Conference on Research and Development in Information Retrieval
%Z date of event: 2022-07-11 - 2022-07-15
%C Madrid, Spain
%B SIGIR '22
%E Amigo, Enrique; Castells, Pablo; Gonzalo, Julio; Carterett, Ben; Culpepper, J. Shane; Kazai, Gabriella
%P 144 - 154
%I ACM
%@ 978-1-4503-8732-3
[32]
P. Christmann, R. Saha Roy, and G. Weikum, “Beyond NED: Fast and Effective Search Space Reduction for Complex Question Answering over Knowledge Bases,” in WSDM ’22, Fifteenth ACM International Conference on Web Search and Data Mining, Tempe, AZ, USA (Virutal Event), 2022.
Export
BibTeX
@inproceedings{Christmann_WSDM22,
TITLE = {Beyond {NED}: {F}ast and Effective Search Space Reduction for Complex Question Answering over Knowledge Bases},
AUTHOR = {Christmann, Philipp and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-9132-0},
DOI = {10.1145/3488560.3498488},
PUBLISHER = {ACM},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {WSDM '22, Fifteenth ACM International Conference on Web Search and Data Mining},
PAGES = {172--180},
ADDRESS = {Tempe, AZ, USA (Virutal Event)},
}
Endnote
%0 Conference Proceedings
%A Christmann, Philipp
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Beyond NED: Fast and Effective Search Space Reduction for Complex Question Answering over Knowledge Bases :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-27C6-B
%R 10.1145/3488560.3498488
%D 2022
%B Fifteenth ACM International Conference on Web Search and Data Mining
%Z date of event: 2022-02-21 - 2022-02-25
%C Tempe, AZ, USA (Virutal Event)
%B WSDM '22
%P 172 - 180
%I ACM
%@ 978-1-4503-9132-0
[33]
P. Christmann, R. Saha Roy, and G. Weikum, “Question Entity and Relation Linking to Knowledge Bases via CLOCQ,” in Joint Proceedings of SemREC 2022 and SMART 2022 co-located with 21st International Semantic Web Conference (ISWC 2022), Hybrid Event, Hanghzou, China, 2022.
Export
BibTeX
@inproceedings{Christmann_SMART22,
TITLE = {Question Entity and Relation Linking to Knowledge Bases via {CLOCQ}},
AUTHOR = {Christmann, Philipp and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://ceur-ws.org/Vol-3337/smart-paper1.pdf; urn:nbn:de:0074-3337-1},
PUBLISHER = {CEUR-WS.org},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {Joint Proceedings of SemREC 2022 and SMART 2022 co-located with 21st International Semantic Web Conference (ISWC 2022)},
EDITOR = {Singh, Gunjan and Mutharaju, Raghava and Kapanipathi, Pavan and Mihindukulasooriya, Nandana and Dubey, Mohnish and Usbeck, Ricardo and Banerjee, Debayan},
PAGES = {33--47},
SERIES = {CEUR Workshop Proceedings},
VOLUME = {3337},
ADDRESS = {Hybrid Event, Hanghzou, China},
}
Endnote
%0 Conference Proceedings
%A Christmann, Philipp
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Question Entity and Relation Linking to Knowledge Bases via CLOCQ : Question Entity and Relation Linking to Knowledge Bases via {CLOCQ}
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-9595-3
%U https://ceur-ws.org/Vol-3337/smart-paper1.pdf
%D 2022
%B 2nd Semantic Reasoning Evaluation Challenge 3rd SeMantic Answer Type, Relation and Entity Prediction Tasks Challengeand
%Z date of event: 2022-10-24 - 2022-10-27
%C Hybrid Event, Hanghzou, China
%B Joint Proceedings of SemREC 2022 and SMART 2022
co-located with 21st International Semantic Web Conference (ISWC 2022)
%E Singh, Gunjan; Mutharaju, Raghava; Kapanipathi, Pavan; Mihindukulasooriya, Nandana; Dubey, Mohnish; Usbeck, Ricardo; Banerjee, Debayan
%P 33 - 47
%I CEUR-WS.org
%B CEUR Workshop Proceedings
%N 3337
[34]
P. Christmann, R. Saha Roy, and G. Weikum, “Conversational Question Answering on Heterogeneous Sources,” 2022. [Online]. Available: https://arxiv.org/abs/2204.11677. (arXiv: 2204.11677)
Abstract
Conversational question answering (ConvQA) tackles sequential information<br>needs where contexts in follow-up questions are left implicit. Current ConvQA<br>systems operate over homogeneous sources of information: either a knowledge<br>base (KB), or a text corpus, or a collection of tables. This paper addresses<br>the novel issue of jointly tapping into all of these together, this way<br>boosting answer coverage and confidence. We present CONVINSE, an end-to-end<br>pipeline for ConvQA over heterogeneous sources, operating in three stages: i)<br>learning an explicit structured representation of an incoming question and its<br>conversational context, ii) harnessing this frame-like representation to<br>uniformly capture relevant evidences from KB, text, and tables, and iii)<br>running a fusion-in-decoder model to generate the answer. We construct and<br>release the first benchmark, ConvMix, for ConvQA over heterogeneous sources,<br>comprising 3000 real-user conversations with 16000 questions, along with entity<br>annotations, completed question utterances, and question paraphrases.<br>Experiments demonstrate the viability and advantages of our method, compared to<br>state-of-the-art baselines.<br>
Export
BibTeX
@online{Christmann2204.11677,
TITLE = {Conversational Question Answering on Heterogeneous Sources},
AUTHOR = {Christmann, Philipp and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2204.11677},
EPRINT = {2204.11677},
EPRINTTYPE = {arXiv},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Conversational question answering (ConvQA) tackles sequential information<br>needs where contexts in follow-up questions are left implicit. Current ConvQA<br>systems operate over homogeneous sources of information: either a knowledge<br>base (KB), or a text corpus, or a collection of tables. This paper addresses<br>the novel issue of jointly tapping into all of these together, this way<br>boosting answer coverage and confidence. We present CONVINSE, an end-to-end<br>pipeline for ConvQA over heterogeneous sources, operating in three stages: i)<br>learning an explicit structured representation of an incoming question and its<br>conversational context, ii) harnessing this frame-like representation to<br>uniformly capture relevant evidences from KB, text, and tables, and iii)<br>running a fusion-in-decoder model to generate the answer. We construct and<br>release the first benchmark, ConvMix, for ConvQA over heterogeneous sources,<br>comprising 3000 real-user conversations with 16000 questions, along with entity<br>annotations, completed question utterances, and question paraphrases.<br>Experiments demonstrate the viability and advantages of our method, compared to<br>state-of-the-art baselines.<br>},
}
Endnote
%0 Report
%A Christmann, Philipp
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Conversational Question Answering on Heterogeneous Sources :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-164E-5
%U https://arxiv.org/abs/2204.11677
%D 2022
%X Conversational question answering (ConvQA) tackles sequential information<br>needs where contexts in follow-up questions are left implicit. Current ConvQA<br>systems operate over homogeneous sources of information: either a knowledge<br>base (KB), or a text corpus, or a collection of tables. This paper addresses<br>the novel issue of jointly tapping into all of these together, this way<br>boosting answer coverage and confidence. We present CONVINSE, an end-to-end<br>pipeline for ConvQA over heterogeneous sources, operating in three stages: i)<br>learning an explicit structured representation of an incoming question and its<br>conversational context, ii) harnessing this frame-like representation to<br>uniformly capture relevant evidences from KB, text, and tables, and iii)<br>running a fusion-in-decoder model to generate the answer. We construct and<br>release the first benchmark, ConvMix, for ConvQA over heterogeneous sources,<br>comprising 3000 real-user conversations with 16000 questions, along with entity<br>annotations, completed question utterances, and question paraphrases.<br>Experiments demonstrate the viability and advantages of our method, compared to<br>state-of-the-art baselines.<br>
%K Computer Science, Information Retrieval, cs.IR,Computer Science, Computation and Language, cs.CL
[35]
C. X. Chu, “Knowledge Extraction from Fictional Texts,” Universität des Saarlandes, Saarbrücken, 2022.
Abstract
Knowledge extraction from text is a key task in natural language processing, which involves many sub-tasks, such as taxonomy induction, named entity recognition and typing, relation extraction, knowledge canonicalization and so on. By constructing structured knowledge from natural language text, knowledge extraction becomes a key asset for search engines, question answering and other downstream applications. However, current knowledge extraction methods mostly focus on prominent real-world entities with Wikipedia and mainstream news articles as sources. The constructed knowledge bases, therefore, lack information about long-tail domains, with fiction and fantasy as archetypes. Fiction and fantasy are core parts of our human culture, spanning from literature to movies, TV series, comics and video games. With thousands of fictional universes which have been created, knowledge from fictional domains are subject of search-engine queries - by fans as well as cultural analysts. Unlike the real-world domain, knowledge extraction on such specific domains like fiction and fantasy has to tackle several key challenges: - Training data: Sources for fictional domains mostly come from books and fan-built content, which is sparse and noisy, and contains difficult structures of texts, such as dialogues and quotes. Training data for key tasks such as taxonomy induction, named entity typing or relation extraction are also not available. - Domain characteristics and diversity: Fictional universes can be highly sophisticated, containing entities, social structures and sometimes languages that are completely different from the real world. State-of-the-art methods for knowledge extraction make assumptions on entity-class, subclass and entity-entity relations that are often invalid for fictional domains. With different genres of fictional domains, another requirement is to transfer models across domains. - Long fictional texts: While state-of-the-art models have limitations on the input sequence length, it is essential to develop methods that are able to deal with very long texts (e.g. entire books), to capture multiple contexts and leverage widely spread cues. This dissertation addresses the above challenges, by developing new methodologies that advance the state of the art on knowledge extraction in fictional domains. - The first contribution is a method, called TiFi, for constructing type systems (taxonomy induction) for fictional domains. By tapping noisy fan-built content from online communities such as Wikia, TiFi induces taxonomies through three main steps: category cleaning, edge cleaning and top-level construction. Exploiting a variety of features from the original input, TiFi is able to construct taxonomies for a diverse range of fictional domains with high precision. - The second contribution is a comprehensive approach, called ENTYFI, for named entity recognition and typing in long fictional texts. Built on 205 automatically induced high-quality type systems for popular fictional domains, ENTYFI exploits the overlap and reuse of these fictional domains on unseen texts. By combining different typing modules with a consolidation stage, ENTYFI is able to do fine-grained entity typing in long fictional texts with high precision and recall. - The third contribution is an end-to-end system, called KnowFi, for extracting relations between entities in very long texts such as entire books. KnowFi leverages background knowledge from 142 popular fictional domains to identify interesting relations and to collect distant training samples. KnowFi devises a similarity-based ranking technique to reduce false positives in training samples and to select potential text passages that contain seed pairs of entities. By training a hierarchical neural network for all relations, KnowFi is able to infer relations between entity pairs across long fictional texts, and achieves gains over the best prior methods for relation extraction.
Export
BibTeX
@phdthesis{Chuphd2022,
TITLE = {Knowledge Extraction from Fictional Texts},
AUTHOR = {Chu, Cuong Xuan},
LANGUAGE = {eng},
URL = {nbn:de:bsz:291--ds-361070},
DOI = {10.22028/D291-36107},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
DATE = {2022},
ABSTRACT = {Knowledge extraction from text is a key task in natural language processing, which involves many sub-tasks, such as taxonomy induction, named entity recognition and typing, relation extraction, knowledge canonicalization and so on. By constructing structured knowledge from natural language text, knowledge extraction becomes a key asset for search engines, question answering and other downstream applications. However, current knowledge extraction methods mostly focus on prominent real-world entities with Wikipedia and mainstream news articles as sources. The constructed knowledge bases, therefore, lack information about long-tail domains, with fiction and fantasy as archetypes. Fiction and fantasy are core parts of our human culture, spanning from literature to movies, TV series, comics and video games. With thousands of fictional universes which have been created, knowledge from fictional domains are subject of search-engine queries -- by fans as well as cultural analysts. Unlike the real-world domain, knowledge extraction on such specific domains like fiction and fantasy has to tackle several key challenges: -- Training data: Sources for fictional domains mostly come from books and fan-built content, which is sparse and noisy, and contains difficult structures of texts, such as dialogues and quotes. Training data for key tasks such as taxonomy induction, named entity typing or relation extraction are also not available. -- Domain characteristics and diversity: Fictional universes can be highly sophisticated, containing entities, social structures and sometimes languages that are completely different from the real world. State-of-the-art methods for knowledge extraction make assumptions on entity-class, subclass and entity-entity relations that are often invalid for fictional domains. With different genres of fictional domains, another requirement is to transfer models across domains. -- Long fictional texts: While state-of-the-art models have limitations on the input sequence length, it is essential to develop methods that are able to deal with very long texts (e.g. entire books), to capture multiple contexts and leverage widely spread cues. This dissertation addresses the above challenges, by developing new methodologies that advance the state of the art on knowledge extraction in fictional domains. -- The first contribution is a method, called TiFi, for constructing type systems (taxonomy induction) for fictional domains. By tapping noisy fan-built content from online communities such as Wikia, TiFi induces taxonomies through three main steps: category cleaning, edge cleaning and top-level construction. Exploiting a variety of features from the original input, TiFi is able to construct taxonomies for a diverse range of fictional domains with high precision. -- The second contribution is a comprehensive approach, called ENTYFI, for named entity recognition and typing in long fictional texts. Built on 205 automatically induced high-quality type systems for popular fictional domains, ENTYFI exploits the overlap and reuse of these fictional domains on unseen texts. By combining different typing modules with a consolidation stage, ENTYFI is able to do fine-grained entity typing in long fictional texts with high precision and recall. -- The third contribution is an end-to-end system, called KnowFi, for extracting relations between entities in very long texts such as entire books. KnowFi leverages background knowledge from 142 popular fictional domains to identify interesting relations and to collect distant training samples. KnowFi devises a similarity-based ranking technique to reduce false positives in training samples and to select potential text passages that contain seed pairs of entities. By training a hierarchical neural network for all relations, KnowFi is able to infer relations between entity pairs across long fictional texts, and achieves gains over the best prior methods for relation extraction.},
}
Endnote
%0 Thesis
%A Chu, Cuong Xuan
%Y Weikum, Gerhard
%A referee: Theobald, Martin
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
International Max Planck Research School, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Knowledge Extraction from Fictional Texts :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-9598-2
%R 10.22028/D291-36107
%U nbn:de:bsz:291--ds-361070
%F OTHER: hdl:20.500.11880/32914
%I Universität des Saarlandes
%C Saarbrücken
%D 2022
%P 129 p.
%V phd
%9 phd
%X Knowledge extraction from text is a key task in natural language processing, which involves many sub-tasks, such as taxonomy induction, named entity recognition and typing, relation extraction, knowledge canonicalization and so on. By constructing structured knowledge from natural language text, knowledge extraction becomes a key asset for search engines, question answering and other downstream applications. However, current knowledge extraction methods mostly focus on prominent real-world entities with Wikipedia and mainstream news articles as sources. The constructed knowledge bases, therefore, lack information about long-tail domains, with fiction and fantasy as archetypes. Fiction and fantasy are core parts of our human culture, spanning from literature to movies, TV series, comics and video games. With thousands of fictional universes which have been created, knowledge from fictional domains are subject of search-engine queries - by fans as well as cultural analysts. Unlike the real-world domain, knowledge extraction on such specific domains like fiction and fantasy has to tackle several key challenges: - Training data: Sources for fictional domains mostly come from books and fan-built content, which is sparse and noisy, and contains difficult structures of texts, such as dialogues and quotes. Training data for key tasks such as taxonomy induction, named entity typing or relation extraction are also not available. - Domain characteristics and diversity: Fictional universes can be highly sophisticated, containing entities, social structures and sometimes languages that are completely different from the real world. State-of-the-art methods for knowledge extraction make assumptions on entity-class, subclass and entity-entity relations that are often invalid for fictional domains. With different genres of fictional domains, another requirement is to transfer models across domains. - Long fictional texts: While state-of-the-art models have limitations on the input sequence length, it is essential to develop methods that are able to deal with very long texts (e.g. entire books), to capture multiple contexts and leverage widely spread cues. This dissertation addresses the above challenges, by developing new methodologies that advance the state of the art on knowledge extraction in fictional domains. - The first contribution is a method, called TiFi, for constructing type systems (taxonomy induction) for fictional domains. By tapping noisy fan-built content from online communities such as Wikia, TiFi induces taxonomies through three main steps: category cleaning, edge cleaning and top-level construction. Exploiting a variety of features from the original input, TiFi is able to construct taxonomies for a diverse range of fictional domains with high precision. - The second contribution is a comprehensive approach, called ENTYFI, for named entity recognition and typing in long fictional texts. Built on 205 automatically induced high-quality type systems for popular fictional domains, ENTYFI exploits the overlap and reuse of these fictional domains on unseen texts. By combining different typing modules with a consolidation stage, ENTYFI is able to do fine-grained entity typing in long fictional texts with high precision and recall. - The third contribution is an end-to-end system, called KnowFi, for extracting relations between entities in very long texts such as entire books. KnowFi leverages background knowledge from 142 popular fictional domains to identify interesting relations and to collect distant training samples. KnowFi devises a similarity-based ranking technique to reduce false positives in training samples and to select potential text passages that contain seed pairs of entities. By training a hierarchical neural network for all relations, KnowFi is able to infer relations between entity pairs across long fictional texts, and achieves gains over the best prior methods for relation extraction.
%U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/32914
[36]
D. Dave, A. Celestino, A. S. Varde, and V. Anu, “Management of Implicit Requirements Data in Large SRS Documents: Taxonomy and Techniques,” Sigmod Record, vol. 51, no. 2, 2022.
Export
BibTeX
@article{dave2022,
TITLE = {Management of Implicit Requirements Data in Large {SRS} Documents: {T}axonomy and Techniques},
AUTHOR = {Dave, Dev and Celestino, Angelica and Varde, Aparna S. and Anu, Vaibhav},
LANGUAGE = {eng},
ISSN = {0163-5808},
PUBLISHER = {Special Interest Group on the Management of Data},
ADDRESS = {New York, NY},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
JOURNAL = {Sigmod Record},
VOLUME = {51},
NUMBER = {2},
PAGES = {18--29},
}
Endnote
%0 Journal Article
%A Dave, Dev
%A Celestino, Angelica
%A Varde, Aparna S.
%A Anu, Vaibhav
%+ External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Management of Implicit Requirements Data in Large SRS Documents: Taxonomy and Techniques :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-F1AD-3
%7 2022
%D 2022
%J Sigmod Record
%V 51
%N 2
%& 18
%P 18 - 29
%I Special Interest Group on the Management of Data
%C New York, NY
%@ false
[37]
J. Fischer, “More than the sum of its parts,” Universität des Saarlandes, Saarbrücken, 2022.
Abstract
In this thesis we explore pattern mining and deep learning. Often seen as orthogonal, we show that these fields complement each other and propose to combine them to gain from each other’s strengths. We, first, show how to efficiently discover succinct and non-redundant sets of patterns that provide insight into data beyond conjunctive statements. We leverage the interpretability of such patterns to unveil how and which information flows through neural networks, as well as what characterizes their decisions. Conversely, we show how to combine continuous optimization with pattern discovery, proposing a neural network that directly encodes discrete patterns, which allows us to apply pattern mining at a scale orders of magnitude larger than previously possible. Large neural networks are, however, exceedingly expensive to train for which ‘lottery tickets’ – small, well-trainable sub-networks in randomly initialized neural networks – offer a remedy. We identify theoretical limitations of strong tickets and overcome them by equipping these tickets with the property of universal approximation. To analyze whether limitations in ticket sparsity are algorithmic or fundamental, we propose a framework to plant and hide lottery tickets. With novel ticket benchmarks we then conclude that the limitation is likely algorithmic, encouraging further developments for which our framework offers means to measure progress.
Export
BibTeX
@phdthesis{Fischerphd2022,
TITLE = {More than the sum of its parts},
AUTHOR = {Fischer, Jonas},
LANGUAGE = {eng},
URL = {nbn:de:bsz:291--ds-370240},
DOI = {10.22028/D291-37024},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
DATE = {2022},
ABSTRACT = {In this thesis we explore pattern mining and deep learning. Often seen as orthogonal, we show that these fields complement each other and propose to combine them to gain from each other{\textquoteright}s strengths. We, first, show how to efficiently discover succinct and non-redundant sets of patterns that provide insight into data beyond conjunctive statements. We leverage the interpretability of such patterns to unveil how and which information flows through neural networks, as well as what characterizes their decisions. Conversely, we show how to combine continuous optimization with pattern discovery, proposing a neural network that directly encodes discrete patterns, which allows us to apply pattern mining at a scale orders of magnitude larger than previously possible. Large neural networks are, however, exceedingly expensive to train for which {\textquoteleft}lottery tickets{\textquoteright} -- small, well-trainable sub-networks in randomly initialized neural networks -- offer a remedy. We identify theoretical limitations of strong tickets and overcome them by equipping these tickets with the property of universal approximation. To analyze whether limitations in ticket sparsity are algorithmic or fundamental, we propose a framework to plant and hide lottery tickets. With novel ticket benchmarks we then conclude that the limitation is likely algorithmic, encouraging further developments for which our framework offers means to measure progress.},
}
Endnote
%0 Thesis
%A Fischer, Jonas
%Y Vreeken, Jilles
%A referee: Weikum, Gerhard
%A referee: Parthasarathy, Srinivasan
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
International Max Planck Research School, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T More than the sum of its parts : pattern mining neural networks, and how they complement each other
%G eng
%U http://hdl.handle.net/21.11116/0000-000B-38BF-0
%R 10.22028/D291-37024
%U nbn:de:bsz:291--ds-370240
%F OTHER: hdl:20.500.11880/33893
%I Universität des Saarlandes
%C Saarbrücken
%D 2022
%P 250 p.
%V phd
%9 phd
%X In this thesis we explore pattern mining and deep learning. Often seen as orthogonal, we show that these fields complement each other and propose to combine them to gain from each other’s strengths. We, first, show how to efficiently discover succinct and non-redundant sets of patterns that provide insight into data beyond conjunctive statements. We leverage the interpretability of such patterns to unveil how and which information flows through neural networks, as well as what characterizes their decisions. Conversely, we show how to combine continuous optimization with pattern discovery, proposing a neural network that directly encodes discrete patterns, which allows us to apply pattern mining at a scale orders of magnitude larger than previously possible. Large neural networks are, however, exceedingly expensive to train for which ‘lottery tickets’ – small, well-trainable sub-networks in randomly initialized neural networks – offer a remedy. We identify theoretical limitations of strong tickets and overcome them by equipping these tickets with the property of universal approximation. To analyze whether limitations in ticket sparsity are algorithmic or fundamental, we propose a framework to plant and hide lottery tickets. With novel ticket benchmarks we then conclude that the limitation is likely algorithmic, encouraging further developments for which our framework offers means to measure progress.
%U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/33893
[38]
S. Ghosh, S. Razniewski, and G. Weikum, “Answering Count Queries with Explanatory Evidence,” in SIGIR ’22, 45th International ACM SIGIR Conference on Research and Development in Information Retrieval, Madrid, Spain, 2022.
Export
BibTeX
@inproceedings{Ghosh_SIGIR22,
TITLE = {Answering Count Queries with Explanatory Evidence},
AUTHOR = {Ghosh, Shrestha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-8732-3},
DOI = {10.1145/3477495.3531870},
PUBLISHER = {ACM},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {SIGIR '22, 45th International ACM SIGIR Conference on Research and Development in Information Retrieval},
EDITOR = {Amigo, Enrique and Castells, Pablo and Gonzalo, Julio and Carterett, Ben and Culpepper, J. Shane and Kazai, Gabriella},
PAGES = {2415--2419},
ADDRESS = {Madrid, Spain},
}
Endnote
%0 Conference Proceedings
%A Ghosh, Shrestha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Answering Count Queries with Explanatory Evidence :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-9E36-8
%R 10.1145/3477495.3531870
%D 2022
%B 45th International ACM SIGIR Conference on Research and Development in Information Retrieval
%Z date of event: 2022-07-11 - 2022-07-15
%C Madrid, Spain
%B SIGIR '22
%E Amigo, Enrique; Castells, Pablo; Gonzalo, Julio; Carterett, Ben; Culpepper, J. Shane; Kazai, Gabriella
%P 2415 - 2419
%I ACM
%@ 978-1-4503-8732-3
[39]
S. Ghosh, S. Razniewski, and G. Weikum, “Answering Count Questions with Structured Answers from Text,” 2022. . (arXiv: 2209.07250)
Abstract
In this work we address the challenging case of answering count queries in<br>web search, such as ``number of songs by John Lennon''. Prior methods merely<br>answer these with a single, and sometimes puzzling number or return a ranked<br>list of text snippets with different numbers. This paper proposes a methodology<br>for answering count queries with inference, contextualization and explanatory<br>evidence. Unlike previous systems, our method infers final answers from<br>multiple observations, supports semantic qualifiers for the counts, and<br>provides evidence by enumerating representative instances. Experiments with a<br>wide variety of queries, including existing benchmark show the benefits of our<br>method, and the influence of specific parameter settings. Our code, data and an<br>interactive system demonstration are publicly available at<br>https://github.com/ghoshs/CoQEx and https://nlcounqer.mpi-inf.mpg.de/.<br>
Export
BibTeX
@online{Ghosh_2209.07250,
TITLE = {Answering Count Questions with Structured Answers from Text},
AUTHOR = {Ghosh, Shrestha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
DOI = {10.48550/arXiv.2209.07250},
EPRINT = {2209.07250},
EPRINTTYPE = {arXiv},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
ABSTRACT = {In this work we address the challenging case of answering count queries in<br>web search, such as ``number of songs by John Lennon''. Prior methods merely<br>answer these with a single, and sometimes puzzling number or return a ranked<br>list of text snippets with different numbers. This paper proposes a methodology<br>for answering count queries with inference, contextualization and explanatory<br>evidence. Unlike previous systems, our method infers final answers from<br>multiple observations, supports semantic qualifiers for the counts, and<br>provides evidence by enumerating representative instances. Experiments with a<br>wide variety of queries, including existing benchmark show the benefits of our<br>method, and the influence of specific parameter settings. Our code, data and an<br>interactive system demonstration are publicly available at<br>https://github.com/ghoshs/CoQEx and https://nlcounqer.mpi-inf.mpg.de/.<br>},
}
Endnote
%0 Report
%A Ghosh, Shrestha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Answering Count Questions with Structured Answers from Text :
%G eng
%U http://hdl.handle.net/21.11116/0000-000B-1D84-0
%R 10.48550/arXiv.2209.07250
%D 2022
%X In this work we address the challenging case of answering count queries in<br>web search, such as ``number of songs by John Lennon''. Prior methods merely<br>answer these with a single, and sometimes puzzling number or return a ranked<br>list of text snippets with different numbers. This paper proposes a methodology<br>for answering count queries with inference, contextualization and explanatory<br>evidence. Unlike previous systems, our method infers final answers from<br>multiple observations, supports semantic qualifiers for the counts, and<br>provides evidence by enumerating representative instances. Experiments with a<br>wide variety of queries, including existing benchmark show the benefits of our<br>method, and the influence of specific parameter settings. Our code, data and an<br>interactive system demonstration are publicly available at<br>https://github.com/ghoshs/CoQEx and https://nlcounqer.mpi-inf.mpg.de/.<br>
%K Computer Science, Information Retrieval, cs.IR
[40]
A. Guimarães, “Data Science Methods for the Analysis of Controversial Social Media Discussions,” Universität des Saarlandes, Saarbrücken, 2022.
Abstract
Social media communities like Reddit and Twitter allow users to express their views on<br>topics of their interest, and to engage with other users who may share or oppose these views.<br>This can lead to productive discussions towards a consensus, or to contended debates, where<br>disagreements frequently arise.<br>Prior work on such settings has primarily focused on identifying notable instances of antisocial<br>behavior such as hate-speech and “trolling”, which represent possible threats to the health of<br>a community. These, however, are exceptionally severe phenomena, and do not encompass<br>controversies stemming from user debates, differences of opinions, and off-topic content, all<br>of which can naturally come up in a discussion without going so far as to compromise its<br>development.<br>This dissertation proposes a framework for the systematic analysis of social media discussions<br>that take place in the presence of controversial themes, disagreements, and mixed opinions from<br>participating users. For this, we develop a feature-based model to describe key elements of a<br>discussion, such as its salient topics, the level of activity from users, the sentiments it expresses,<br>and the user feedback it receives.<br>Initially, we build our feature model to characterize adversarial discussions surrounding<br>political campaigns on Twitter, with a focus on the factual and sentimental nature of their<br>topics and the role played by different users involved. We then extend our approach to Reddit<br>discussions, leveraging community feedback signals to define a new notion of controversy<br>and to highlight conversational archetypes that arise from frequent and interesting interaction<br>patterns. We use our feature model to build logistic regression classifiers that can predict future<br>instances of controversy in Reddit communities centered on politics, world news, sports, and<br>personal relationships. Finally, our model also provides the basis for a comparison of different<br>communities in the health domain, where topics and activity vary considerably despite their<br>shared overall focus. In each of these cases, our framework provides insight into how user<br>behavior can shape a community’s individual definition of controversy and its overall identity.
Export
BibTeX
@phdthesis{Decarvalhophd2021,
TITLE = {Data Science Methods for the Analysis of Controversial Social Media Discussions},
AUTHOR = {Guimar{\~a}es, Anna},
LANGUAGE = {eng},
URL = {nbn:de:bsz:291--ds-365021},
DOI = {10.22028/D291-36502},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
DATE = {2022},
ABSTRACT = {Social media communities like Reddit and Twitter allow users to express their views on<br>topics of their interest, and to engage with other users who may share or oppose these views.<br>This can lead to productive discussions towards a consensus, or to contended debates, where<br>disagreements frequently arise.<br>Prior work on such settings has primarily focused on identifying notable instances of antisocial<br>behavior such as hate-speech and {\textquotedblleft}trolling{\textquotedblright}, which represent possible threats to the health of<br>a community. These, however, are exceptionally severe phenomena, and do not encompass<br>controversies stemming from user debates, differences of opinions, and off-topic content, all<br>of which can naturally come up in a discussion without going so far as to compromise its<br>development.<br>This dissertation proposes a framework for the systematic analysis of social media discussions<br>that take place in the presence of controversial themes, disagreements, and mixed opinions from<br>participating users. For this, we develop a feature-based model to describe key elements of a<br>discussion, such as its salient topics, the level of activity from users, the sentiments it expresses,<br>and the user feedback it receives.<br>Initially, we build our feature model to characterize adversarial discussions surrounding<br>political campaigns on Twitter, with a focus on the factual and sentimental nature of their<br>topics and the role played by different users involved. We then extend our approach to Reddit<br>discussions, leveraging community feedback signals to define a new notion of controversy<br>and to highlight conversational archetypes that arise from frequent and interesting interaction<br>patterns. We use our feature model to build logistic regression classifiers that can predict future<br>instances of controversy in Reddit communities centered on politics, world news, sports, and<br>personal relationships. Finally, our model also provides the basis for a comparison of different<br>communities in the health domain, where topics and activity vary considerably despite their<br>shared overall focus. In each of these cases, our framework provides insight into how user<br>behavior can shape a community{\textquoteright}s individual definition of controversy and its overall identity.},
}
Endnote
%0 Thesis
%A Guimarães, Anna
%Y Weikum, Gerhard
%A referee: de Melo, Gerard
%A referee: Yates, Andrew
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
International Max Planck Research School, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Data Science Methods for the Analysis of
Controversial Social Media Discussions :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-CDF7-9
%R 10.22028/D291-36502
%U nbn:de:bsz:291--ds-365021
%F OTHER: hdl:20.500.11880/33161
%I Universität des Saarlandes
%C Saarbrücken
%D 2022
%P 94 p.
%V phd
%9 phd
%X Social media communities like Reddit and Twitter allow users to express their views on<br>topics of their interest, and to engage with other users who may share or oppose these views.<br>This can lead to productive discussions towards a consensus, or to contended debates, where<br>disagreements frequently arise.<br>Prior work on such settings has primarily focused on identifying notable instances of antisocial<br>behavior such as hate-speech and “trolling”, which represent possible threats to the health of<br>a community. These, however, are exceptionally severe phenomena, and do not encompass<br>controversies stemming from user debates, differences of opinions, and off-topic content, all<br>of which can naturally come up in a discussion without going so far as to compromise its<br>development.<br>This dissertation proposes a framework for the systematic analysis of social media discussions<br>that take place in the presence of controversial themes, disagreements, and mixed opinions from<br>participating users. For this, we develop a feature-based model to describe key elements of a<br>discussion, such as its salient topics, the level of activity from users, the sentiments it expresses,<br>and the user feedback it receives.<br>Initially, we build our feature model to characterize adversarial discussions surrounding<br>political campaigns on Twitter, with a focus on the factual and sentimental nature of their<br>topics and the role played by different users involved. We then extend our approach to Reddit<br>discussions, leveraging community feedback signals to define a new notion of controversy<br>and to highlight conversational archetypes that arise from frequent and interesting interaction<br>patterns. We use our feature model to build logistic regression classifiers that can predict future<br>instances of controversy in Reddit communities centered on politics, world news, sports, and<br>personal relationships. Finally, our model also provides the basis for a comparison of different<br>communities in the health domain, where topics and activity vary considerably despite their<br>shared overall focus. In each of these cases, our framework provides insight into how user<br>behavior can shape a community’s individual definition of controversy and its overall identity.
%U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/33161
[41]
M. A. Hedderich, J. Fischer, D. Klakow, and J. Vreeken, “Label-Descriptive Patterns and Their Application to Characterizing Classification Errors,” in Proceedings of the 39th International Conference on Machine Learning (ICML 2022), Baltimore, MA, USA, 2022.
Export
BibTeX
@inproceedings{Hedderich_ICML22,
TITLE = {Label-Descriptive Patterns and Their Application to Characterizing Classification Errors},
AUTHOR = {Hedderich, Michael A. and Fischer, Jonas and Klakow, Dietrich and Vreeken, Jilles},
LANGUAGE = {eng},
ISSN = {1938-7228},
URL = {https://proceedings.mlr.press/v162/hedderich22a.html},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {Proceedings of the 39th International Conference on Machine Learning (ICML 2022)},
EDITOR = {Chaudhuri, Kamalika and Jegelka, Stefanie and Le, Song and Csaba, Szepesvari and Gang, Niu and Sabato, Sivan},
PAGES = {8691--8707},
SERIES = {Proceedings of the Machine Learning Research},
VOLUME = {162},
ADDRESS = {Baltimore, MA, USA},
}
Endnote
%0 Conference Proceedings
%A Hedderich, Michael A.
%A Fischer, Jonas
%A Klakow, Dietrich
%A Vreeken, Jilles
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
%T Label-Descriptive Patterns and Their Application to Characterizing Classification Errors :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-165A-7
%U https://proceedings.mlr.press/v162/hedderich22a.html
%D 2022
%B 39th International Conference on Machine Learning
%Z date of event: 2022-07-17 - 2022-07-23
%C Baltimore, MA, USA
%B Proceedings of the 39th International Conference on Machine Learning
%E Chaudhuri, Kamalika; Jegelka, Stefanie; Le, Song; Csaba, Szepesvari; Gang, Niu; Sabato, Sivan
%P 8691 - 8707
%B Proceedings of the Machine Learning Research
%N 162
%@ false
[42]
V. T. Ho, D. Stepanova, D. Milchevski, J. Strötgen, and G. Weikum, “Enhancing Knowledge Bases with Quantity Facts,” in WWW ’22, ACM Web Conference, Virtual Event, Lyon, France, 2022.
Export
BibTeX
@inproceedings{Ho_WWW22,
TITLE = {Enhancing Knowledge Bases with Quantity Facts},
AUTHOR = {Ho, Vinh Thinh and Stepanova, Daria and Milchevski, Dragan and Str{\"o}tgen, Jannik and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-9096-5},
DOI = {10.1145/3485447.3511932},
PUBLISHER = {ACM},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {WWW '22, ACM Web Conference},
EDITOR = {Laforest, Fr{\'e}d{\'e}rique and Troncy, Rapha{\"e}l and Simperl, Elena and Agarwal, Deepak and Gionis, Aristides and Herman, Ivan and M{\'e}dini, Lionel},
PAGES = {893--901},
ADDRESS = {Virtual Event, Lyon, France},
}
Endnote
%0 Conference Proceedings
%A Ho, Vinh Thinh
%A Stepanova, Daria
%A Milchevski, Dragan
%A Strötgen, Jannik
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Enhancing Knowledge Bases with Quantity Facts :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-614E-2
%R 10.1145/3485447.3511932
%D 2022
%B ACM Web Conference
%Z date of event: 2022-04-25 - 2022-04-29
%C Virtual Event, Lyon, France
%B WWW '22
%E Laforest, Frédérique; Troncy, Raphaël; Simperl, Elena; Agarwal, Deepak; Gionis, Aristides; Herman, Ivan; Médini, Lionel
%P 893 - 901
%I ACM
%@ 978-1-4503-9096-5
[43]
V. T. Ho, “Entities with Quantities: Extraction, Search and Ranking,” Universität des Saarlandes, Saarbrücken, 2022.
Export
BibTeX
@phdthesis{Ho_PhD2022,
TITLE = {Entities with Quantities: Extraction, Search and Ranking},
AUTHOR = {Ho, Vinh Thinh},
LANGUAGE = {eng},
URL = {urn:nbn:de:bsz:291--ds-380308},
DOI = {10.22028/D291-38030},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
DATE = {2022},
}
Endnote
%0 Thesis
%A Ho, Vinh Thinh
%Y Weikum, Gerhard
%A referee: Stepanova, Daria
%A referee: Theobald, Martin
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
International Max Planck Research School, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Entities with Quantities: Extraction, Search and Ranking :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-B756-5
%R 10.22028/D291-38030
%U urn:nbn:de:bsz:291--ds-380308
%F OTHER: hdl:20.500.11880/34538
%I Universität des Saarlandes
%C Saarbrücken
%D 2022
%P xii, 131p.
%V phd
%9 phd
%U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/34538
[44]
L.-A. Kaffee, S. Razniewski, G. Amaral, and K. S. Alghamdi, Eds., Wikidata Workshop 2022. CEUR-WS, 2022.
Export
BibTeX
@proceedings{Kaffee_Wikidata22,
TITLE = {Wikidata Workshop 2022},
EDITOR = {Kaffee, Lucie-Aim{\'e}e and Razniewski, Simon and Amaral, Gabriel and Alghamdi, Kholoud Saad},
LANGUAGE = {eng},
URL = {https://ceur-ws.org/Vol-3262/; urn:nbn:de:0074-3262-0},
PUBLISHER = {CEUR-WS},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
SERIES = {CEUR Workshop Proceedings},
VOLUME = {3262},
ADDRESS = {Virtual Event, Hangzhou, China},
}
Endnote
%0 Conference Proceedings
%E Kaffee, Lucie-Aimée
%E Razniewski, Simon
%E Amaral, Gabriel
%E Alghamdi, Kholoud Saad
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
%T Wikidata Workshop 2022 : Proceedings of the 3rd Wikidata Workshop 2022,
co-located with the 21st International Semantic Web Conference (ISWC2022)
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-1663-C
%U https://ceur-ws.org/Vol-3262/
%U urn:nbn:de:0074-3262-0
%I CEUR-WS
%D 2022
%B 3rd Wikidata Workshop
%Z date of event: -
%C Virtual Event, Hangzhou, China
%S CEUR Workshop Proceedings
%V 3262
[45]
P. Lahoti, K. Gummadi, and G. Weikum, “Responsible Model Deployment via Model-agnostic Uncertainty Learning,” Machine Learning, vol. 112, 2022.
Export
BibTeX
@article{Lahoti2022,
TITLE = {Responsible Model Deployment via Model-agnostic Uncertainty Learning},
AUTHOR = {Lahoti, Preethi and Gummadi, Krishna and Weikum, Gerhard},
LANGUAGE = {eng},
ISSN = {0885-6125},
DOI = {10.1007/s10994-022-06248-y},
PUBLISHER = {Springer},
ADDRESS = {Dordrecht},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
JOURNAL = {Machine Learning},
VOLUME = {112},
PAGES = {939--970},
}
Endnote
%0 Journal Article
%A Lahoti, Preethi
%A Gummadi, Krishna
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Responsible Model Deployment via Model-agnostic Uncertainty Learning :
%G eng
%U http://hdl.handle.net/21.11116/0000-000B-58F0-3
%R 10.1007/s10994-022-06248-y
%7 2022
%D 2022
%J Machine Learning
%V 112
%& 939
%P 939 - 970
%I Springer
%C Dordrecht
%@ false
[46]
P. Lahoti, K. Gummadi, and G. Weikum, “Detecting and Mitigating Test-time Failure Risks via Model-agnostic Uncertainty Learning,” in 21st IEEE International Conference on Data Mining (ICDM 2021), Auckland, New Zealand (Virtual Conference), 2022.
Export
BibTeX
@inproceedings{Gummadi_ICDM21,
TITLE = {Detecting and Mitigating Test-time Failure Risks via Model-agnostic Uncertainty Learning},
AUTHOR = {Lahoti, Preethi and Gummadi, Krishna and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-6654-2398-4},
DOI = {10.1109/ICDM51629.2021.00141},
PUBLISHER = {IEEE},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2022},
BOOKTITLE = {21st IEEE International Conference on Data Mining (ICDM 2021)},
EDITOR = {Bailey, James and Miettinen, Pauli and Koh, Yun Sing and Tao, Dacheng and Wu, Xindong},
PAGES = {1174--1179},
ADDRESS = {Auckland, New Zealand (Virtual Conference)},
}
Endnote
%0 Conference Proceedings
%A Lahoti, Preethi
%A Gummadi, Krishna
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Detecting and Mitigating Test-time Failure Risks via Model-agnostic
Uncertainty Learning :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-5E15-6
%R 10.1109/ICDM51629.2021.00141
%D 2022
%B 21st IEEE International Conference on Data Mining
%Z date of event: 2021-12-07 - 2021-12-10
%C Auckland, New Zealand (Virtual Conference)
%B 21st IEEE International Conference on Data Mining
%E Bailey, James; Miettinen, Pauli; Koh, Yun Sing; Tao, Dacheng; Wu, Xindong
%P 1174 - 1179
%I IEEE
%@ 978-1-6654-2398-4
[47]
P. Lahoti, “Operationalizing Fairness for Responsible Machine Learning,” Universität des Saarlandes, Saarbrücken, 2022.
Abstract
As machine learning (ML) is increasingly used for decision making in scenarios that impact humans, there is a growing awareness of its potential for unfairness. A large body of recent work has focused on proposing formal notions of fairness in ML, as well as approaches to mitigate unfairness. However, there is a growing disconnect between the ML fairness literature and the needs to operationalize fairness in practice. This thesis addresses the need for responsible ML by developing new models and methods to address challenges in operationalizing fairness in practice. Specifically, it makes the following contributions. First, we tackle a key assumption in the group fairness literature that sensitive demographic attributes such as race and gender are known upfront, and can be readily used in model training to mitigate unfairness. In practice, factors like privacy and regulation often prohibit ML models from collecting or using protected attributes in decision making. To address this challenge we introduce the novel notion of computationally-identifiable errors and propose Adversarially Reweighted Learning (ARL), an optimization method that seeks to improve the worst-case performance over unobserved groups, without requiring access to the protected attributes in the dataset. Second, we argue that while group fairness notions are a desirable fairness criterion, they are fundamentally limited as they reduce fairness to an average statistic over pre-identified protected groups. In practice, automated decisions are made at an individual level, and can adversely impact individual people irrespective of the group statistic. We advance the paradigm of individual fairness by proposing iFair (individually fair representations), an optimization approach for learning a low dimensional latent representation of the data with two goals: to encode the data as well as possible, while removing any information about protected attributes in the transformed representation. Third, we advance the individual fairness paradigm, which requires that similar individuals receive similar outcomes. However, similarity metrics computed over observed feature space can be brittle, and inherently limited in their ability to accurately capture similarity between individuals. To address this, we introduce a novel notion of fairness graphs, wherein pairs of individuals can be identified as deemed similar with respect to the ML objective. We cast the problem of individual fairness into graph embedding, and propose PFR (pairwise fair representations), a method to learn a unified pairwise fair representation of the data. Fourth, we tackle the challenge that production data after model deployment is constantly evolving. As a consequence, in spite of the best efforts in training a fair model, ML systems can be prone to failure risks due to a variety of unforeseen reasons. To ensure responsible model deployment, potential failure risks need to be predicted, and mitigation actions need to be devised, for example, deferring to a human expert when uncertain or collecting additional data to address model’s blind-spots. We propose Risk Advisor, a model-agnostic meta-learner to predict potential failure risks and to give guidance on the sources of uncertainty inducing the risks, by leveraging information theoretic notions of aleatoric and epistemic uncertainty. This dissertation brings ML fairness closer to real-world applications by developing methods that address key practical challenges. Extensive experiments on a variety of real-world and synthetic datasets show that our proposed methods are viable in practice.
Export
BibTeX
@phdthesis{Lahotophd2022,
TITLE = {Operationalizing Fairness for Responsible Machine Learning},
AUTHOR = {Lahoti, Preethi},
LANGUAGE = {eng},
URL = {nbn:de:bsz:291--ds-365860},
DOI = {10.22028/D291-36586},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
DATE = {2022},
ABSTRACT = {As machine learning (ML) is increasingly used for decision making in scenarios that impact humans, there is a growing awareness of its potential for unfairness. A large body of recent work has focused on proposing formal notions of fairness in ML, as well as approaches to mitigate unfairness. However, there is a growing disconnect between the ML fairness literature and the needs to operationalize fairness in practice. This thesis addresses the need for responsible ML by developing new models and methods to address challenges in operationalizing fairness in practice. Specifically, it makes the following contributions. First, we tackle a key assumption in the group fairness literature that sensitive demographic attributes such as race and gender are known upfront, and can be readily used in model training to mitigate unfairness. In practice, factors like privacy and regulation often prohibit ML models from collecting or using protected attributes in decision making. To address this challenge we introduce the novel notion of computationally-identifiable errors and propose Adversarially Reweighted Learning (ARL), an optimization method that seeks to improve the worst-case performance over unobserved groups, without requiring access to the protected attributes in the dataset. Second, we argue that while group fairness notions are a desirable fairness criterion, they are fundamentally limited as they reduce fairness to an average statistic over pre-identified protected groups. In practice, automated decisions are made at an individual level, and can adversely impact individual people irrespective of the group statistic. We advance the paradigm of individual fairness by proposing iFair (individually fair representations), an optimization approach for learning a low dimensional latent representation of the data with two goals: to encode the data as well as possible, while removing any information about protected attributes in the transformed representation. Third, we advance the individual fairness paradigm, which requires that similar individuals receive similar outcomes. However, similarity metrics computed over observed feature space can be brittle, and inherently limited in their ability to accurately capture similarity between individuals. To address this, we introduce a novel notion of fairness graphs, wherein pairs of individuals can be identified as deemed similar with respect to the ML objective. We cast the problem of individual fairness into graph embedding, and propose PFR (pairwise fair representations), a method to learn a unified pairwise fair representation of the data. Fourth, we tackle the challenge that production data after model deployment is constantly evolving. As a consequence, in spite of the best efforts in training a fair model, ML systems can be prone to failure risks due to a variety of unforeseen reasons. To ensure responsible model deployment, potential failure risks need to be predicted, and mitigation actions need to be devised, for example, deferring to a human expert when uncertain or collecting additional data to address model{\textquoteright}s blind-spots. We propose Risk Advisor, a model-agnostic meta-learner to predict potential failure risks and to give guidance on the sources of uncertainty inducing the risks, by leveraging information theoretic notions of aleatoric and epistemic uncertainty. This dissertation brings ML fairness closer to real-world applications by developing methods that address key practical challenges. Extensive experiments on a variety of real-world and synthetic datasets show that our proposed methods are viable in practice.},
}
Endnote
%0 Thesis
%A Lahoti, Preethi
%Y Weikum, Gerhard
%A referee: Gummadi, Krishna
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
International Max Planck Research School, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Group K. Gummadi, Max Planck Institute for Software Systems, Max Planck Society
%T Operationalizing Fairness for
Responsible Machine Learning :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-CEC6-F
%R 10.22028/D291-36586
%U nbn:de:bsz:291--ds-365860
%F OTHER: hdl:20.500.11880/33465
%I Universität des Saarlandes
%C Saarbrücken
%D 2022
%P 129 p.
%V phd
%9 phd
%X As machine learning (ML) is increasingly used for decision making in scenarios that impact humans, there is a growing awareness of its potential for unfairness. A large body of recent work has focused on proposing formal notions of fairness in ML, as well as approaches to mitigate unfairness. However, there is a growing disconnect between the ML fairness literature and the needs to operationalize fairness in practice. This thesis addresses the need for responsible ML by developing new models and methods to address challenges in operationalizing fairness in practice. Specifically, it makes the following contributions. First, we tackle a key assumption in the group fairness literature that sensitive demographic attributes such as race and gender are known upfront, and can be readily used in model training to mitigate unfairness. In practice, factors like privacy and regulation often prohibit ML models from collecting or using protected attributes in decision making. To address this challenge we introduce the novel notion of computationally-identifiable errors and propose Adversarially Reweighted Learning (ARL), an optimization method that seeks to improve the worst-case performance over unobserved groups, without requiring access to the protected attributes in the dataset. Second, we argue that while group fairness notions are a desirable fairness criterion, they are fundamentally limited as they reduce fairness to an average statistic over pre-identified protected groups. In practice, automated decisions are made at an individual level, and can adversely impact individual people irrespective of the group statistic. We advance the paradigm of individual fairness by proposing iFair (individually fair representations), an optimization approach for learning a low dimensional latent representation of the data with two goals: to encode the data as well as possible, while removing any information about protected attributes in the transformed representation. Third, we advance the individual fairness paradigm, which requires that similar individuals receive similar outcomes. However, similarity metrics computed over observed feature space can be brittle, and inherently limited in their ability to accurately capture similarity between individuals. To address this, we introduce a novel notion of fairness graphs, wherein pairs of individuals can be identified as deemed similar with respect to the ML objective. We cast the problem of individual fairness into graph embedding, and propose PFR (pairwise fair representations), a method to learn a unified pairwise fair representation of the data. Fourth, we tackle the challenge that production data after model deployment is constantly evolving. As a consequence, in spite of the best efforts in training a fair model, ML systems can be prone to failure risks due to a variety of unforeseen reasons. To ensure responsible model deployment, potential failure risks need to be predicted, and mitigation actions need to be devised, for example, deferring to a human expert when uncertain or collecting additional data to address model’s blind-spots. We propose Risk Advisor, a model-agnostic meta-learner to predict potential failure risks and to give guidance on the sources of uncertainty inducing the risks, by leveraging information theoretic notions of aleatoric and epistemic uncertainty. This dissertation brings ML fairness closer to real-world applications by developing methods that address key practical challenges. Extensive experiments on a variety of real-world and synthetic datasets show that our proposed methods are viable in practice.
%U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/33465
[48]
J. Lin, R. Nogueira, and A. Yates, Pretrained Transformers for Text Ranking : BERT and Beyond. Cham: Springer International Publishin, 2022.
Export
BibTeX
@book{LinSLHT53,
TITLE = {Pretrained Transformers for Text Ranking : {BERT} and Beyond},
AUTHOR = {Lin, Jimmy and Nogueira, Rodrigo and Yates, Andrew},
LANGUAGE = {eng},
ISBN = {978-3-031-02181-7},
DOI = {10.1007/978-3-031-02181-7},
PUBLISHER = {Springer International Publishin},
ADDRESS = {Cham},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
PAGES = {XVII, 307},
SERIES = {Synthesis Lectures on Human Language Technologies},
}
Endnote
%0 Book
%A Lin, Jimmy
%A Nogueira, Rodrigo
%A Yates, Andrew
%+ External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Pretrained Transformers for Text Ranking : BERT and Beyond :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-DC2B-D
%@ 978-3-031-02181-7
%R 10.1007/978-3-031-02181-7
%I Springer International Publishin
%C Cham
%D 2022
%P XVII, 307
%B Synthesis Lectures on Human Language Technologies
[49]
A. Marx and J. Fischer, “Estimating Mutual Information via Geodesic kNN,” in Proceedings of the SIAM International Conference on Data Mining (SDM 2022), Alexandria, VA, USA, 2022.
Export
BibTeX
@inproceedings{Marx_SDM2022,
TITLE = {{Estimating Mutual Information via Geodesic $k$NN}},
AUTHOR = {Marx, Alexander and Fischer, Jonas},
LANGUAGE = {eng},
ISBN = {978-1-61197-717-2},
DOI = {10.1137/1.9781611977172.47},
PUBLISHER = {SIAM},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {Proceedings of the SIAM International Conference on Data Mining (SDM 2022)},
PAGES = {415--423},
ADDRESS = {Alexandria, VA, USA},
}
Endnote
%0 Conference Proceedings
%A Marx, Alexander
%A Fischer, Jonas
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Estimating Mutual Information via Geodesic kNN :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-B19D-E
%R 10.1137/1.9781611977172.47
%D 2022
%B SIAM International Conference on Data Mining
%Z date of event: 2022-04-28 - 2022-04-30
%C Alexandria, VA, USA
%B Proceedings of the SIAM International Conference on Data Mining
%P 415 - 423
%I SIAM
%@ 978-1-61197-717-2
[50]
T. Nguyen, A. Yates, A. Zirikly, B. Desmet, and A. Cohan, “Improving the Generalizability of Depression Detection by Leveraging Clinical Questionnaires,” 2022. [Online]. Available: https://arxiv.org/abs/2204.10432. (arXiv: 2204.10432)
Abstract
Automated methods have been widely used to identify and analyze mental health<br>conditions (e.g., depression) from various sources of information, including<br>social media. Yet, deployment of such models in real-world healthcare<br>applications faces challenges including poor out-of-domain generalization and<br>lack of trust in black box models. In this work, we propose approaches for<br>depression detection that are constrained to different degrees by the presence<br>of symptoms described in PHQ9, a questionnaire used by clinicians in the<br>depression screening process. In dataset-transfer experiments on three social<br>media datasets, we find that grounding the model in PHQ9's symptoms<br>substantially improves its ability to generalize to out-of-distribution data<br>compared to a standard BERT-based approach. Furthermore, this approach can<br>still perform competitively on in-domain data. These results and our<br>qualitative analyses suggest that grounding model predictions in<br>clinically-relevant symptoms can improve generalizability while producing a<br>model that is easier to inspect.<br>
Export
BibTeX
@online{Nguyen2204.10432,
TITLE = {Improving the Generalizability of Depression Detection by Leveraging Clinical Questionnaires},
AUTHOR = {Nguyen, Thong and Yates, Andrew and Zirikly, Ayah and Desmet, Bart and Cohan, Arman},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2204.10432},
EPRINT = {2204.10432},
EPRINTTYPE = {arXiv},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Automated methods have been widely used to identify and analyze mental health<br>conditions (e.g., depression) from various sources of information, including<br>social media. Yet, deployment of such models in real-world healthcare<br>applications faces challenges including poor out-of-domain generalization and<br>lack of trust in black box models. In this work, we propose approaches for<br>depression detection that are constrained to different degrees by the presence<br>of symptoms described in PHQ9, a questionnaire used by clinicians in the<br>depression screening process. In dataset-transfer experiments on three social<br>media datasets, we find that grounding the model in PHQ9's symptoms<br>substantially improves its ability to generalize to out-of-distribution data<br>compared to a standard BERT-based approach. Furthermore, this approach can<br>still perform competitively on in-domain data. These results and our<br>qualitative analyses suggest that grounding model predictions in<br>clinically-relevant symptoms can improve generalizability while producing a<br>model that is easier to inspect.<br>},
}
Endnote
%0 Report
%A Nguyen, Thong
%A Yates, Andrew
%A Zirikly, Ayah
%A Desmet, Bart
%A Cohan, Arman
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
%T Improving the Generalizability of Depression Detection by Leveraging
Clinical Questionnaires :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-166D-2
%U https://arxiv.org/abs/2204.10432
%D 2022
%X Automated methods have been widely used to identify and analyze mental health<br>conditions (e.g., depression) from various sources of information, including<br>social media. Yet, deployment of such models in real-world healthcare<br>applications faces challenges including poor out-of-domain generalization and<br>lack of trust in black box models. In this work, we propose approaches for<br>depression detection that are constrained to different degrees by the presence<br>of symptoms described in PHQ9, a questionnaire used by clinicians in the<br>depression screening process. In dataset-transfer experiments on three social<br>media datasets, we find that grounding the model in PHQ9's symptoms<br>substantially improves its ability to generalize to out-of-distribution data<br>compared to a standard BERT-based approach. Furthermore, this approach can<br>still perform competitively on in-domain data. These results and our<br>qualitative analyses suggest that grounding model predictions in<br>clinically-relevant symptoms can improve generalizability while producing a<br>model that is easier to inspect.<br>
%K Computer Science, Computation and Language, cs.CL
[51]
T. Nguyen, A. Yates, A. Zirikly, B. Desmet, and A. Cohan, “Improving the Generalizability of Depression Detection by Leveraging Clinical Questionnaires,” in The 60th Annual Meeting of the Association for Computational Linguistics (ACL 2022), Dublin, Ireland, 2022.
Export
BibTeX
@inproceedings{Nguyen_ACL22,
TITLE = {Improving the Generalizability of Depression Detection by Leveraging Clinical Questionnaires},
AUTHOR = {Nguyen, Thong and Yates, Andrew and Zirikly, Ayah and Desmet, Bart and Cohan, Arman},
LANGUAGE = {eng},
ISBN = {978-1-955917-21-6},
DOI = {10.18653/v1/2022.acl-long.578},
PUBLISHER = {ACL},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {The 60th Annual Meeting of the Association for Computational Linguistics (ACL 2022)},
EDITOR = {Muresan, Smaranda and Nakov, Preslav and Villavicencio, Aline},
PAGES = {8446--8459},
ADDRESS = {Dublin, Ireland},
}
Endnote
%0 Conference Proceedings
%A Nguyen, Thong
%A Yates, Andrew
%A Zirikly, Ayah
%A Desmet, Bart
%A Cohan, Arman
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
%T Improving the Generalizability of Depression Detection by Leveraging Clinical Questionnaires :
%G eng
%U http://hdl.handle.net/21.11116/0000-000B-1DAA-6
%R 10.18653/v1/2022.acl-long.578
%D 2022
%B 60th Annual Meeting of the Association for Computational Linguistic
%Z date of event: 2022-05-22 - 2022-05-27
%C Dublin, Ireland
%B The 60th Annual Meeting of the Association for Computational Linguistics
%E Muresan, Smaranda; Nakov, Preslav; Villavicencio, Aline
%P 8446 - 8459
%I ACL
%@ 978-1-955917-21-6
[52]
T.-P. Nguyen, S. Razniewski, A. Varde, and G. Weikum, “Extracting Cultural Commonsense Knowledge at Scale,” 2022. [Online]. Available: https://arxiv.org/abs/2210.07763. (arXiv: 2210.07763)
Abstract
Structured knowledge is important for many AI applications. Commonsense<br>knowledge, which is crucial for robust human-centric AI, is covered by a small<br>number of structured knowledge projects. However, they lack knowledge about<br>human traits and behaviors conditioned on socio-cultural contexts, which is<br>crucial for situative AI. This paper presents CANDLE, an end-to-end methodology<br>for extracting high-quality cultural commonsense knowledge (CCSK) at scale.<br>CANDLE extracts CCSK assertions from a huge web corpus and organizes them into<br>coherent clusters, for 3 domains of subjects (geography, religion, occupation)<br>and several cultural facets (food, drinks, clothing, traditions, rituals,<br>behaviors). CANDLE includes judicious techniques for classification-based<br>filtering and scoring of interestingness. Experimental evaluations show the<br>superiority of the CANDLE CCSK collection over prior works, and an extrinsic<br>use case demonstrates the benefits of CCSK for the GPT-3 language model. Code<br>and data can be accessed at https://cultural-csk.herokuapp.com/.<br>
Export
BibTeX
@online{Nguyen2210.07763,
TITLE = {Extracting Cultural Commonsense Knowledge at Scale},
AUTHOR = {Nguyen, Tuan-Phong and Razniewski, Simon and Varde, Aparna and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2210.07763},
DOI = {10.48550/arXiv.2210.07763},
EPRINT = {2210.07763},
EPRINTTYPE = {arXiv},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Structured knowledge is important for many AI applications. Commonsense<br>knowledge, which is crucial for robust human-centric AI, is covered by a small<br>number of structured knowledge projects. However, they lack knowledge about<br>human traits and behaviors conditioned on socio-cultural contexts, which is<br>crucial for situative AI. This paper presents CANDLE, an end-to-end methodology<br>for extracting high-quality cultural commonsense knowledge (CCSK) at scale.<br>CANDLE extracts CCSK assertions from a huge web corpus and organizes them into<br>coherent clusters, for 3 domains of subjects (geography, religion, occupation)<br>and several cultural facets (food, drinks, clothing, traditions, rituals,<br>behaviors). CANDLE includes judicious techniques for classification-based<br>filtering and scoring of interestingness. Experimental evaluations show the<br>superiority of the CANDLE CCSK collection over prior works, and an extrinsic<br>use case demonstrates the benefits of CCSK for the GPT-3 language model. Code<br>and data can be accessed at https://cultural-csk.herokuapp.com/.<br>},
}
Endnote
%0 Report
%A Nguyen, Tuan-Phong
%A Razniewski, Simon
%A Varde, Aparna
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Extracting Cultural Commonsense Knowledge at Scale :
%G eng
%U http://hdl.handle.net/21.11116/0000-000B-58B3-8
%U https://arxiv.org/abs/2210.07763
%R 10.48550/arXiv.2210.07763
%D 2022
%X Structured knowledge is important for many AI applications. Commonsense<br>knowledge, which is crucial for robust human-centric AI, is covered by a small<br>number of structured knowledge projects. However, they lack knowledge about<br>human traits and behaviors conditioned on socio-cultural contexts, which is<br>crucial for situative AI. This paper presents CANDLE, an end-to-end methodology<br>for extracting high-quality cultural commonsense knowledge (CCSK) at scale.<br>CANDLE extracts CCSK assertions from a huge web corpus and organizes them into<br>coherent clusters, for 3 domains of subjects (geography, religion, occupation)<br>and several cultural facets (food, drinks, clothing, traditions, rituals,<br>behaviors). CANDLE includes judicious techniques for classification-based<br>filtering and scoring of interestingness. Experimental evaluations show the<br>superiority of the CANDLE CCSK collection over prior works, and an extrinsic<br>use case demonstrates the benefits of CCSK for the GPT-3 language model. Code<br>and data can be accessed at https://cultural-csk.herokuapp.com/.<br>
%K Computer Science, Computation and Language, cs.CL,Computer Science, Artificial Intelligence, cs.AI
[53]
T.-P. Nguyen and S. Razniewski, “Materialized Knowledge Bases from Commonsense Transformers,” in Proceedings of the First Workshop on Commonsense Representation and Reasoning (CSRR 2022), Dublin, Ireland, 2022.
Export
BibTeX
@inproceedings{Nguyen_CSRR22,
TITLE = {Materialized Knowledge Bases from Commonsense Transformers},
AUTHOR = {Nguyen, Tuan-Phong and Razniewski, Simon},
LANGUAGE = {eng},
ISBN = {978-1-955917-28-5},
URL = {https://openreview.net/forum?id=HI5M4MYedZ5},
PUBLISHER = {ACL},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {Proceedings of the First Workshop on Commonsense Representation and Reasoning (CSRR 2022)},
EDITOR = {Bosselut, Antoine and Li, Xiang and Yuchen, Bill and Shwartz, Vered and Majumder, Bodhisattwa Prasad and Kumar Lal, Yash and Rudinger, Rachel and Ren, Xiang and Tandon, Niket and Zouhar, Vil{\'e}m},
PAGES = {36--42},
ADDRESS = {Dublin, Ireland},
}
Endnote
%0 Conference Proceedings
%A Nguyen, Tuan-Phong
%A Razniewski, Simon
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Materialized Knowledge Bases from Commonsense Transformers :
%G eng
%U http://hdl.handle.net/21.11116/0000-000B-1D87-D
%U https://openreview.net/forum?id=HI5M4MYedZ5
%D 2022
%B 1st Workshop on Commonsense Representation and Reasoning
%Z date of event: 2022-05-27 - 2022-05-27
%C Dublin, Ireland
%B Proceedings of the First Workshop on Commonsense Representation and Reasoning
%E Bosselut, Antoine; Li, Xiang; Yuchen, Bill; Shwartz, Vered; Majumder, Bodhisattwa Prasad; Kumar Lal, Yash; Rudinger, Rachel; Ren, Xiang; Tandon, Niket; Zouhar, Vilém
%P 36 - 42
%I ACL
%@ 978-1-955917-28-5
[54]
R. Pradeep, Y. Liu, X. Zhang, Y. Li, A. Yates, and J. Lin, “Squeezing Water from a Stone: A Bag of Tricks for Further Improving Cross-Encoder Effectiveness for Reranking,” in Advances in Information Retrieval (ECIR 2022), Stavanger, Norway, 2022.
Export
BibTeX
@inproceedings{Pradeep_ECIR2022,
TITLE = {Squeezing Water from a Stone: {A} Bag of Tricks for Further Improving Cross-Encoder Effectiveness for Reranking},
AUTHOR = {Pradeep, Ronak and Liu, Yuqi and Zhang, Xinyu and Li, Yilin and Yates, Andrew and Lin, Jimmy},
LANGUAGE = {eng},
ISBN = {978-3-030-99736-6},
DOI = {10.1007/978-3-030-99736-6_44},
PUBLISHER = {Springer},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
DATE = {2022},
BOOKTITLE = {Advances in Information Retrieval (ECIR 2022)},
EDITOR = {Hagen, Matthias and Verbene, Suzan and Macdonald, Craig and Seifert, Christin and Balog, Krisztian and N{\o}rv{\aa}g, Kjetil and Setty, Vinay},
PAGES = {655--670},
SERIES = {Lecture Notes in Computer Science},
VOLUME = {13185},
ADDRESS = {Stavanger, Norway},
}
Endnote
%0 Conference Proceedings
%A Pradeep, Ronak
%A Liu, Yuqi
%A Zhang, Xinyu
%A Li, Yilin
%A Yates, Andrew
%A Lin, Jimmy
%+ External Organizations
External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Squeezing Water from a Stone: A Bag of Tricks for Further Improving Cross-Encoder Effectiveness for Reranking :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-9E28-8
%R 10.1007/978-3-030-99736-6_44
%D 2022
%B 44th European Conference on IR Research
%Z date of event: 2022-04-10 - 2022-04-14
%C Stavanger, Norway
%B Advances in Information Retrieval
%E Hagen, Matthias; Verbene, Suzan; Macdonald, Craig; Seifert, Christin; Balog, Krisztian; Nørvåg, Kjetil; Setty, Vinay
%P 655 - 670
%I Springer
%@ 978-3-030-99736-6
%B Lecture Notes in Computer Science
%N 13185
[55]
M. Puri, A. S. Varde, and G. de Melo, “Commonsense Based Text Mining on Urban Policy,” Language Resources and Evaluation, 2022.
Export
BibTeX
@article{Puri2022,
TITLE = {Commonsense Based Text Mining on Urban Policy},
AUTHOR = {Puri, Manish and Varde, Aparna S. and de Melo, Gerard},
LANGUAGE = {eng},
ISSN = {1574-020X; 1572-0218; 1572-8412; 1574-0218; 0010-4817},
DOI = {10.1007/s10579-022-09584-6},
PUBLISHER = {Springer},
ADDRESS = {New York, NY},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
JOURNAL = {Language Resources and Evaluation},
}
Endnote
%0 Journal Article
%A Puri, Manish
%A Varde, Aparna S.
%A de Melo, Gerard
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Commonsense Based Text Mining on Urban Policy :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-20AC-0
%R 10.1007/s10579-022-09584-6
%7 2022
%D 2022
%J Language Resources and Evaluation
%O Computers and the Humanities Lang Resources & Evaluation
%I Springer
%C New York, NY
%@ false
%U https://rdcu.be/cJwGl
[56]
J. Romero and S. Razniewski, “Do Children Texts Hold The Key To Commonsense Knowledge?,” 2022. [Online]. Available: https://arxiv.org/abs/2210.04530. (arXiv: 2210.04530)
Abstract
Compiling comprehensive repositories of commonsense knowledge is a<br>long-standing problem in AI. Many concerns revolve around the issue of<br>reporting bias, i.e., that frequency in text sources is not a good proxy for<br>relevance or truth. This paper explores whether children's texts hold the key<br>to commonsense knowledge compilation, based on the hypothesis that such content<br>makes fewer assumptions on the reader's knowledge, and therefore spells out<br>commonsense more explicitly. An analysis with several corpora shows that<br>children's texts indeed contain much more, and more typical commonsense<br>assertions. Moreover, experiments show that this advantage can be leveraged in<br>popular language-model-based commonsense knowledge extraction settings, where<br>task-unspecific fine-tuning on small amounts of children texts (childBERT)<br>already yields significant improvements. This provides a refreshing perspective<br>different from the common trend of deriving progress from ever larger models<br>and corpora.<br>
Export
BibTeX
@online{Romero2210.04530,
TITLE = {Do Children Texts Hold The Key To Commonsense Knowledge?},
AUTHOR = {Romero, Julien and Razniewski, Simon},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2210.04530},
DOI = {10.48550/arXiv.2210.04530},
EPRINT = {2210.04530},
EPRINTTYPE = {arXiv},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Compiling comprehensive repositories of commonsense knowledge is a<br>long-standing problem in AI. Many concerns revolve around the issue of<br>reporting bias, i.e., that frequency in text sources is not a good proxy for<br>relevance or truth. This paper explores whether children's texts hold the key<br>to commonsense knowledge compilation, based on the hypothesis that such content<br>makes fewer assumptions on the reader's knowledge, and therefore spells out<br>commonsense more explicitly. An analysis with several corpora shows that<br>children's texts indeed contain much more, and more typical commonsense<br>assertions. Moreover, experiments show that this advantage can be leveraged in<br>popular language-model-based commonsense knowledge extraction settings, where<br>task-unspecific fine-tuning on small amounts of children texts (childBERT)<br>already yields significant improvements. This provides a refreshing perspective<br>different from the common trend of deriving progress from ever larger models<br>and corpora.<br>},
}
Endnote
%0 Report
%A Romero, Julien
%A Razniewski, Simon
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Do Children Texts Hold The Key To Commonsense Knowledge? :
%G eng
%U http://hdl.handle.net/21.11116/0000-000B-58AA-3
%U https://arxiv.org/abs/2210.04530
%R 10.48550/arXiv.2210.04530
%D 2022
%X Compiling comprehensive repositories of commonsense knowledge is a<br>long-standing problem in AI. Many concerns revolve around the issue of<br>reporting bias, i.e., that frequency in text sources is not a good proxy for<br>relevance or truth. This paper explores whether children's texts hold the key<br>to commonsense knowledge compilation, based on the hypothesis that such content<br>makes fewer assumptions on the reader's knowledge, and therefore spells out<br>commonsense more explicitly. An analysis with several corpora shows that<br>children's texts indeed contain much more, and more typical commonsense<br>assertions. Moreover, experiments show that this advantage can be leveraged in<br>popular language-model-based commonsense knowledge extraction settings, where<br>task-unspecific fine-tuning on small amounts of children texts (childBERT)<br>already yields significant improvements. This provides a refreshing perspective<br>different from the common trend of deriving progress from ever larger models<br>and corpora.<br>
%K Computer Science, Computation and Language, cs.CL,Computer Science, Artificial Intelligence, cs.AI
[57]
J. Romero and S. Razniewski, “Do Children Texts Hold The Key To Commonsense Knowledge?,” in Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing (EMNLP 2022), Abu Dhabi, United Arab Emirates, 2022.
Export
BibTeX
@inproceedings{DBLP:conf/emnlp/RomeroR22,
TITLE = {Do Children Texts Hold The Key To Commonsense Knowledge?},
AUTHOR = {Romero, Julien and Razniewski, Simon},
LANGUAGE = {eng},
URL = {https://aclanthology.org/2022.emnlp-main.752/; https://aclanthology.org/2022.emnlp-main},
PUBLISHER = {ACL},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing (EMNLP 2022)},
EDITOR = {Goldberg, Yoav and Kozareva, Zornitsa and Zhang, Yue},
PAGES = {10954--10959},
ADDRESS = {Abu Dhabi, United Arab Emirates},
}
Endnote
%0 Conference Proceedings
%A Romero, Julien
%A Razniewski, Simon
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Do Children Texts Hold The Key To Commonsense Knowledge? :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-DBE5-B
%U https://aclanthology.org/2022.emnlp-main.752/
%D 2022
%B Conference on Empirical Methods in Natural Language Processing
%Z date of event: 2022-12-07 - 2022-12-11
%C Abu Dhabi, United Arab Emirates
%B Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing
%E Goldberg, Yoav; Kozareva, Zornitsa; Zhang, Yue
%P 10954 - 10959
%I ACL
[58]
S. Singhania, T.-P. Nguyen, and S. Razniewski, Eds., Knowledge Base Construction from Pre-trained Language Models 2022. CEUR-WS, 2022.
Export
BibTeX
@proceedings{SinghaniaLMKBC22,
TITLE = {Knowledge Base Construction from Pre-trained Language Models 2022 (LM-KBC 2022)},
EDITOR = {Singhania, Sneha and Nguyen, Tuan-Phong and Razniewski, Simon},
LANGUAGE = {eng},
URL = {urn:nbn:de:0074-3274-1; http://ceur-ws.org/Vol-3274/},
PUBLISHER = {CEUR-WS},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
SERIES = {CEUR Workshop Proceedings},
VOLUME = {3274},
ADDRESS = {Virtual Event, Hanghzou, China},
}
Endnote
%0 Conference Proceedings
%E Singhania, Sneha
%E Nguyen, Tuan-Phong
%E Razniewski, Simon
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Knowledge Base Construction from Pre-trained Language Models 2022 : Proceedings of the Semantic Web Challenge on Knowledge Base Construction from Pre-trained Language Models 2022
co-located with the 21st International Semantic Web Conference (ISWC2022)
%G eng
%U http://hdl.handle.net/21.11116/0000-000B-C723-D
%U urn:nbn:de:0074-3274-1
%U http://ceur-ws.org/Vol-3274/
%I CEUR-WS
%D 2022
%B Semantic Web Challenge on Knowledge Base Construction from Pre-trained Language Models
%Z date of event: 2022-10 - 2022-10
%D 2022
%C Virtual Event, Hanghzou, China
%S CEUR Workshop Proceedings
%V 3274
[59]
S. Singhania, S. Razniewski, and G. Weikum, “Predicting Document Coverage for Relation Extraction,” Transactions of the Association of Computational Linguistics, vol. 10, 2022.
Export
BibTeX
@article{Singhania2022,
TITLE = {Predicting Document Coverage for Relation Extraction},
AUTHOR = {Singhania, Sneha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
ISSN = {2307-387X},
DOI = {10.1162/tacl_a_00456},
PUBLISHER = {ACL},
ADDRESS = {Cambridge, MA},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
JOURNAL = {Transactions of the Association of Computational Linguistics},
VOLUME = {10},
PAGES = {207--223},
}
Endnote
%0 Journal Article
%A Singhania, Sneha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Predicting Document Coverage for Relation Extraction :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-27B8-B
%R 10.1162/tacl_a_00456
%7 2022
%D 2022
%J Transactions of the Association of Computational Linguistics
%V 10
%& 207
%P 207 - 223
%I ACL
%C Cambridge, MA
%@ false
[60]
A. Tigunova, “Extracting Personal Information from Conversations,” Universität des Saarlandes, Saarbrücken, 2022.
Abstract
Personal knowledge is a versatile resource that is valuable for a wide range of downstream applications. Background facts about users can allow chatbot assistants to produce more topical and empathic replies. In the context of recommendation and retrieval models, personal facts can be used to customize the ranking results for individual users. A Personal Knowledge Base, populated with personal facts, such as demographic information, interests and interpersonal relationships, is a unique endpoint for storing and querying personal knowledge. Such knowledge bases are easily interpretable and can provide users with full control over their own personal knowledge, including revising stored facts and managing access by downstream services for personalization purposes. To alleviate users from extensive manual effort to build such personal knowledge base, we can leverage automated extraction methods applied to the textual content of the users, such as dialogue transcripts or social media posts. Mainstream extraction methods specialize on well-structured data, such as biographical texts or encyclopedic articles, which are rare for most people. In turn, conversational data is abundant but challenging to process and requires specialized methods for extraction of personal facts. In this dissertation we address the acquisition of personal knowledge from conversational data. We propose several novel deep learning models for inferring speakers’ personal attributes: • Demographic attributes, age, gender, profession and family status, are inferred by HAMs - hierarchical neural classifiers with attention mechanism. Trained HAMs can be transferred between different types of conversational data and provide interpretable predictions. • Long-tailed personal attributes, hobby and profession, are predicted with CHARM - a zero-shot learning model, overcoming the lack of labeled training samples for rare attribute values. By linking conversational utterances to external sources, CHARM is able to predict attribute values which it never saw during training. • Interpersonal relationships are inferred with PRIDE - a hierarchical transformer-based model. To accurately predict fine-grained relationships, PRIDE leverages personal traits of the speakers and the style of conversational utterances. Experiments with various conversational texts, including Reddit discussions and movie scripts, demonstrate the viability of our methods and their superior performance compared to state-of-the-art baselines.
Export
BibTeX
@phdthesis{Tiguphd2022,
TITLE = {Extracting Personal Information from Conversations},
AUTHOR = {Tigunova, Anna},
LANGUAGE = {eng},
URL = {nbn:de:bsz:291--ds-356280},
DOI = {10.22028/D291-35628},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
DATE = {2022},
ABSTRACT = {Personal knowledge is a versatile resource that is valuable for a wide range of downstream applications. Background facts about users can allow chatbot assistants to produce more topical and empathic replies. In the context of recommendation and retrieval models, personal facts can be used to customize the ranking results for individual users. A Personal Knowledge Base, populated with personal facts, such as demographic information, interests and interpersonal relationships, is a unique endpoint for storing and querying personal knowledge. Such knowledge bases are easily interpretable and can provide users with full control over their own personal knowledge, including revising stored facts and managing access by downstream services for personalization purposes. To alleviate users from extensive manual effort to build such personal knowledge base, we can leverage automated extraction methods applied to the textual content of the users, such as dialogue transcripts or social media posts. Mainstream extraction methods specialize on well-structured data, such as biographical texts or encyclopedic articles, which are rare for most people. In turn, conversational data is abundant but challenging to process and requires specialized methods for extraction of personal facts. In this dissertation we address the acquisition of personal knowledge from conversational data. We propose several novel deep learning models for inferring speakers{\textquoteright} personal attributes: \mbox{$\bullet$} Demographic attributes, age, gender, profession and family status, are inferred by HAMs -- hierarchical neural classifiers with attention mechanism. Trained HAMs can be transferred between different types of conversational data and provide interpretable predictions. \mbox{$\bullet$} Long-tailed personal attributes, hobby and profession, are predicted with CHARM -- a zero-shot learning model, overcoming the lack of labeled training samples for rare attribute values. By linking conversational utterances to external sources, CHARM is able to predict attribute values which it never saw during training. \mbox{$\bullet$} Interpersonal relationships are inferred with PRIDE -- a hierarchical transformer-based model. To accurately predict fine-grained relationships, PRIDE leverages personal traits of the speakers and the style of conversational utterances. Experiments with various conversational texts, including Reddit discussions and movie scripts, demonstrate the viability of our methods and their superior performance compared to state-of-the-art baselines.},
}
Endnote
%0 Thesis
%A Tigunova, Anna
%Y Weikum, Gerhard
%A referee: Yates, Andrew
%A referee: Demberg, Vera
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
International Max Planck Research School, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Extracting Personal Information from
Conversations :
%G eng
%U http://hdl.handle.net/21.11116/0000-000B-3FE1-1
%R 10.22028/D291-35628
%U nbn:de:bsz:291--ds-356280
%F OTHER: hdl:20.500.11880/32546
%I Universität des Saarlandes
%C Saarbrücken
%D 2022
%P 139 p.
%V phd
%9 phd
%X Personal knowledge is a versatile resource that is valuable for a wide range of downstream applications. Background facts about users can allow chatbot assistants to produce more topical and empathic replies. In the context of recommendation and retrieval models, personal facts can be used to customize the ranking results for individual users. A Personal Knowledge Base, populated with personal facts, such as demographic information, interests and interpersonal relationships, is a unique endpoint for storing and querying personal knowledge. Such knowledge bases are easily interpretable and can provide users with full control over their own personal knowledge, including revising stored facts and managing access by downstream services for personalization purposes. To alleviate users from extensive manual effort to build such personal knowledge base, we can leverage automated extraction methods applied to the textual content of the users, such as dialogue transcripts or social media posts. Mainstream extraction methods specialize on well-structured data, such as biographical texts or encyclopedic articles, which are rare for most people. In turn, conversational data is abundant but challenging to process and requires specialized methods for extraction of personal facts. In this dissertation we address the acquisition of personal knowledge from conversational data. We propose several novel deep learning models for inferring speakers’ personal attributes: • Demographic attributes, age, gender, profession and family status, are inferred by HAMs - hierarchical neural classifiers with attention mechanism. Trained HAMs can be transferred between different types of conversational data and provide interpretable predictions. • Long-tailed personal attributes, hobby and profession, are predicted with CHARM - a zero-shot learning model, overcoming the lack of labeled training samples for rare attribute values. By linking conversational utterances to external sources, CHARM is able to predict attribute values which it never saw during training. • Interpersonal relationships are inferred with PRIDE - a hierarchical transformer-based model. To accurately predict fine-grained relationships, PRIDE leverages personal traits of the speakers and the style of conversational utterances. Experiments with various conversational texts, including Reddit discussions and movie scripts, demonstrate the viability of our methods and their superior performance compared to state-of-the-art baselines.
%U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/32546
[61]
H. D. Tran and A. Yates, “Dense Retrieval with Entity Views,” in CIKM ’22, 31st ACM International Conference on Information and Knowledge Management, Atlanta GA USA, 2022.
Export
BibTeX
@inproceedings{TranCIKM2022,
TITLE = {Dense Retrieval with Entity Views},
AUTHOR = {Tran, Hai Dang and Yates, Andrew},
LANGUAGE = {eng},
ISBN = {978-1-4503-9236-5},
DOI = {10.1145/3511808.3557285},
PUBLISHER = {ACM},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {CIKM '22, 31st ACM International Conference on Information and Knowledge Management},
EDITOR = {Al Hasan, Mohammad and Xiong, Li},
PAGES = {1955--1964},
ADDRESS = {Atlanta GA USA},
}
Endnote
%0 Conference Proceedings
%A Tran, Hai Dang
%A Yates, Andrew
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Dense Retrieval with Entity Views :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-1669-6
%R 10.1145/3511808.3557285
%D 2022
%B 31st ACM International Conference on Information and Knowledge Management
%Z date of event: 2022-10-17 - 2022-10-21
%C Atlanta GA USA
%B CIKM '22
%E Al Hasan, Mohammad; Xiong, Li
%P 1955 - 1964
%I ACM
%@ 978-1-4503-9236-5
[62]
A. S. Varde, “Computational Estimation by Scientific Data Mining with Classical Methods to Automate Learning Strategies of Scientists,” ACM Transactions on Knowledge Discovery from Data, vol. 16, no. 5, 2022.
Export
BibTeX
@article{Varde2022b,
TITLE = {Computational Estimation by Scientific Data Mining with Classical Methods to Automate Learning Strategies of Scientists},
AUTHOR = {Varde, Aparna S.},
LANGUAGE = {eng},
DOI = {10.1145/3502736},
PUBLISHER = {ACM},
ADDRESS = {New York, NY},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
JOURNAL = {ACM Transactions on Knowledge Discovery from Data},
VOLUME = {16},
NUMBER = {5},
PAGES = {1--52},
EID = {86},
}
Endnote
%0 Journal Article
%A Varde, Aparna S.
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Computational Estimation by Scientific Data Mining with Classical Methods to Automate Learning Strategies of Scientists :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-9D92-0
%R 10.1145/3502736
%7 2022
%D 2022
%J ACM Transactions on Knowledge Discovery from Data
%V 16
%N 5
%& 1
%P 1 - 52
%Z sequence number: 86
%I ACM
%C New York, NY
[63]
A. S. Varde, A. Pandey, and X. Du, “Prediction Tool on Fine Particle Pollutants and Air Quality for Environmental Engineering,” SN Computer Science, vol. 3, no. 3, 2022.
Export
BibTeX
@article{Varde2022,
TITLE = {Prediction Tool on Fine Particle Pollutants and Air Quality for Environmental Engineering},
AUTHOR = {Varde, Aparna S. and Pandey, Abidha and Du, Xu},
LANGUAGE = {eng},
ISSN = {2661-8907},
DOI = {10.1007/s42979-022-01068-2},
PUBLISHER = {Springer Nature},
ADDRESS = {Singapore},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
JOURNAL = {SN Computer Science},
VOLUME = {3},
NUMBER = {3},
EID = {184},
}
Endnote
%0 Journal Article
%A Varde, Aparna S.
%A Pandey, Abidha
%A Du, Xu
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
%T Prediction Tool on Fine Particle Pollutants and Air Quality for Environmental Engineering :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-2F55-3
%R 10.1007/s42979-022-01068-2
%7 2022
%D 2022
%J SN Computer Science
%V 3
%N 3
%Z sequence number: 184
%I Springer Nature
%C Singapore
%@ false
[64]
Y. Wang, “Coreference Resolution for Extracting Quantity-Facts from Multiple Sentences,” Universität des Saarlandes, Saarbrücken, 2022.
Abstract
Commonsense knowledge (CSK) about concepts and their properties is useful for AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB and others compiled large CSK collections, but are restricted in their expressiveness to subject-predicate-object (SPO) triples with simple concepts for S and monolithic strings for P and O. Also, these projects have either prioritized precision or recall, but hardly reconcile these complementary goals. This thesis presents a methodology, called Ascent, to automatically build a large-scale knowledge base (KB) of CSK assertions, with advanced expressiveness and both better precision and recall than prior works. Ascent goes beyond triples by capturing composite concepts with subgroups and aspects, and by refining assertions with semantic facets. The latter are important to express temporal and spatial validity of assertions and further qualifiers. Ascent combines open information extraction with judicious cleaning using language models. Intrinsic evaluation shows the superior size and quality of the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the benefits of Ascent.
Export
BibTeX
@mastersthesis{NguyenMSc2020,
TITLE = {Coreference Resolution for Extracting Quantity-Facts from Multiple Sentences},
AUTHOR = {Wang, Yongqing},
LANGUAGE = {eng},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2022},
MARGINALMARK = {$\bullet$},
DATE = {2022},
ABSTRACT = {Commonsense knowledge (CSK) about concepts and their properties is useful for AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB and others compiled large CSK collections, but are restricted in their expressiveness to subject-predicate-object (SPO) triples with simple concepts for S and monolithic strings for P and O. Also, these projects have either prioritized precision or recall, but hardly reconcile these complementary goals. This thesis presents a methodology, called Ascent, to automatically build a large-scale knowledge base (KB) of CSK assertions, with advanced expressiveness and both better precision and recall than prior works. Ascent goes beyond triples by capturing composite concepts with subgroups and aspects, and by refining assertions with semantic facets. The latter are important to express temporal and spatial validity of assertions and further qualifiers. Ascent combines open information extraction with judicious cleaning using language models. Intrinsic evaluation shows the superior size and quality of the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the benefits of Ascent.},
}
Endnote
%0 Thesis
%A Wang, Yongqing
%Y Pal, Koninika
%A referee: Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Coreference Resolution for Extracting Quantity-Facts from Multiple Sentences :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-F5F1-F
%I Universität des Saarlandes
%C Saarbrücken
%D 2022
%P XI, 58 p.
%V master
%9 master
%X Commonsense knowledge (CSK) about concepts and their properties is useful for AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB and others compiled large CSK collections, but are restricted in their expressiveness to subject-predicate-object (SPO) triples with simple concepts for S and monolithic strings for P and O. Also, these projects have either prioritized precision or recall, but hardly reconcile these complementary goals. This thesis presents a methodology, called Ascent, to automatically build a large-scale knowledge base (KB) of CSK assertions, with advanced expressiveness and both better precision and recall than prior works. Ascent goes beyond triples by capturing composite concepts with subgroups and aspects, and by refining assertions with semantic facets. The latter are important to express temporal and spatial validity of assertions and further qualifiers. Ascent combines open information extraction with judicious cleaning using language models. Intrinsic evaluation shows the superior size and quality of the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the benefits of Ascent.
2021
[65]
D. I. Adelani, J. Abbott, G. Neubig, D. D’souza, J. Kreutzer, C. Lignos, C. Palen-Michel, H. Buzaaba, S. Rijhwani, S. Ruder, S. Mayhew, I. A. Azime, S. H. Muhammad, C. C. Emezue, J. Nakatumba-Nabende, P. Ogayo, A. Anuoluwapo, C. Gitau, D. Mbaye, J. Alabi, S. M. Yimam, T. R. Gwadabe, I. Ezeani, R. A. Niyongabo, J. Mukiibi, V. Otiende, I. Orife, D. David, S. Ngom, T. Adewumi, P. Rayson, M. Adeyemi, G. Muriuki, E. Anebi, C. Chukwuneke, N. Odu, E. P. Wairagala, S. Oyerinde, C. Siro, T. S. Bateesa, T. Oloyede, Y. Wambui, V. Akinode, D. Nabagereka, M. Katusiime, A. Awokoya, M. MBOUP, D. Gebreyohannes, H. Tilaye, K. Nwaike, D. Wolde, A. Faye, B. Sibanda, O. Ahia, B. F. P. Dossou, K. Ogueji, T. I. DIOP, A. Diallo, A. Akinfaderin, T. Marengereke, and S. Osei, “MasakhaNER: Named Entity Recognition for African Languages,” Transactions of the Association for Computational Linguistics, vol. 9, 2021.
Export
BibTeX
@article{Adelani2021,
TITLE = {{MasakhaNER}: {N}amed Entity Recognition for {A}frican Languages},
AUTHOR = {Adelani, David Ifeoluwa and Abbott, Jade and Neubig, Graham and D{\textquoteright}souza, Daniel and Kreutzer, Julia and Lignos, Constantine and Palen-Michel, Chester and Buzaaba, Happy and Rijhwani, Shruti and Ruder, Sebastian and Mayhew, Stephen and Azime, Israel Abebe and Muhammad, Shamsuddeen H. and Emezue, Chris Chinenye and Nakatumba-Nabende, Joyce and Ogayo, Perez and Anuoluwapo, Aremu and Gitau, Catherine and Mbaye, Derguene and Alabi, Jesujoba and Yimam, Seid Muhie and Gwadabe, Tajuddeen Rabiu and Ezeani, Ignatius and Niyongabo, Rubungo Andre and Mukiibi, Jonathan and Otiende, Verrah and Orife, Iroro and David, Davis and Ngom, Samba and Adewumi, Tosin and Rayson, Paul and Adeyemi, Mofetoluwa and Muriuki, Gerald and Anebi, Emmanuel and Chukwuneke, Chiamaka and Odu, Nkiruka and Wairagala, Eric Peter and Oyerinde, Samuel and Siro, Clemencia and Bateesa, Tobius Saul and Oloyede, Temilola and Wambui, Yvonne and Akinode, Victor and Nabagereka, Deborah and Katusiime, Maurice and Awokoya, Ayodele and MBOUP, Mouhamadane and Gebreyohannes, Dibora and Tilaye, Henok and Nwaike, Kelechi and Wolde, Degaga and Faye, Abdoulaye and Sibanda, Blessing and Ahia, Orevaoghene and Dossou, Bonaventure F. P. and Ogueji, Kelechi and DIOP, Thierno Ibrahima and Diallo, Abdoulaye and Akinfaderin, Adewale and Marengereke, Tendai and Osei, Salomey},
LANGUAGE = {eng},
ISSN = {2307-387X},
DOI = {10.1162/tacl_a_00416},
PUBLISHER = {ACL},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
JOURNAL = {Transactions of the Association for Computational Linguistics},
VOLUME = {9},
PAGES = {1116--1131},
}
Endnote
%0 Journal Article
%A Adelani, David Ifeoluwa
%A Abbott, Jade
%A Neubig, Graham
%A D’souza, Daniel
%A Kreutzer, Julia
%A Lignos, Constantine
%A Palen-Michel, Chester
%A Buzaaba, Happy
%A Rijhwani, Shruti
%A Ruder, Sebastian
%A Mayhew, Stephen
%A Azime, Israel Abebe
%A Muhammad, Shamsuddeen H.
%A Emezue, Chris Chinenye
%A Nakatumba-Nabende, Joyce
%A Ogayo, Perez
%A Anuoluwapo, Aremu
%A Gitau, Catherine
%A Mbaye, Derguene
%A Alabi, Jesujoba
%A Yimam, Seid Muhie
%A Gwadabe, Tajuddeen Rabiu
%A Ezeani, Ignatius
%A Niyongabo, Rubungo Andre
%A Mukiibi, Jonathan
%A Otiende, Verrah
%A Orife, Iroro
%A David, Davis
%A Ngom, Samba
%A Adewumi, Tosin
%A Rayson, Paul
%A Adeyemi, Mofetoluwa
%A Muriuki, Gerald
%A Anebi, Emmanuel
%A Chukwuneke, Chiamaka
%A Odu, Nkiruka
%A Wairagala, Eric Peter
%A Oyerinde, Samuel
%A Siro, Clemencia
%A Bateesa, Tobius Saul
%A Oloyede, Temilola
%A Wambui, Yvonne
%A Akinode, Victor
%A Nabagereka, Deborah
%A Katusiime, Maurice
%A Awokoya, Ayodele
%A MBOUP, Mouhamadane
%A Gebreyohannes, Dibora
%A Tilaye, Henok
%A Nwaike, Kelechi
%A Wolde, Degaga
%A Faye, Abdoulaye
%A Sibanda, Blessing
%A Ahia, Orevaoghene
%A Dossou, Bonaventure F. P.
%A Ogueji, Kelechi
%A DIOP, Thierno Ibrahima
%A Diallo, Abdoulaye
%A Akinfaderin, Adewale
%A Marengereke, Tendai
%A Osei, Salomey
%+ External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
%T MasakhaNER: Named Entity Recognition for African Languages :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-115A-E
%R 10.1162/tacl_a_00416
%7 2021
%D 2021
%J Transactions of the Association for Computational Linguistics
%V 9
%& 1116
%P 1116 - 1131
%I ACL
%@ false
[66]
J. Ali, P. Lahoti, and K. P. Gummadi, “Accounting for Model Uncertainty in Algorithmic Discrimination,” in AIES ’21, Fourth AAAI/ACM Conference on Artificial Intelligence, Ethics and Society, Virtual Conference, 2021.
Export
BibTeX
@inproceedings{Ali_AIES2021,
TITLE = {Accounting for Model Uncertainty in Algorithmic Discrimination},
AUTHOR = {Ali, Junaid and Lahoti, Preethi and Gummadi, Krishna P.},
LANGUAGE = {eng},
ISBN = {978-1-4503-8473-5},
DOI = {10.1145/3461702.3462630},
PUBLISHER = {ACM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {AIES '21, Fourth AAAI/ACM Conference on Artificial Intelligence, Ethics and Society},
EDITOR = {Fourcade, Marion and Kuipers, Benjamin and Lazar, Seth and Mulligan, Deirdre},
PAGES = {336--345},
ADDRESS = {Virtual Conference},
}
Endnote
%0 Conference Proceedings
%A Ali, Junaid
%A Lahoti, Preethi
%A Gummadi, Krishna P.
%+ Computer Graphics, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Accounting for Model Uncertainty in Algorithmic Discrimination :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-72E3-7
%R 10.1145/3461702.3462630
%D 2021
%B Fourth AAAI/ACM Conference on Artificial Intelligence, Ethics and Society
%Z date of event: 2021-05-19 - 2021-05-21
%C Virtual Conference
%B AIES '21
%E Fourcade, Marion; Kuipers, Benjamin; Lazar, Seth; Mulligan, Deirdre
%P 336 - 345
%I ACM
%@ 978-1-4503-8473-5
[67]
H. Arnaout, S. Razniewski, G. Weikum, and J. Z. Pan, “Negative Knowledge for Open-world Wikidata,” in The Web Conference (WWW 2021), Ljubljana, Slovenia, 2021.
Export
BibTeX
@inproceedings{Arnaout_WWW21,
TITLE = {Negative Knowledge for Open-world {W}ikidata},
AUTHOR = {Arnaout, Hiba and Razniewski, Simon and Weikum, Gerhard and Pan, Jeff Z.},
LANGUAGE = {eng},
ISBN = {978-1-4503-8313-4},
DOI = {10.1145/3442442.3452339},
PUBLISHER = {ACM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {The Web Conference (WWW 2021)},
EDITOR = {Leskovec, Jure and Grobelnik, Marko and Najork, Mark and Tan, Jie and Zia, Leila},
PAGES = {544--551},
ADDRESS = {Ljubljana, Slovenia},
}
Endnote
%0 Conference Proceedings
%A Arnaout, Hiba
%A Razniewski, Simon
%A Weikum, Gerhard
%A Pan, Jeff Z.
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Negative Knowledge for Open-world Wikidata :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-6351-C
%R 10.1145/3442442.3452339
%D 2021
%B The Web Conference
%Z date of event: 2021-04-19 - 2021-04-23
%C Ljubljana, Slovenia
%B The Web Conference
%E Leskovec, Jure; Grobelnik, Marko; Najork, Mark; Tan, Jie; Zia, Leila
%P 544 - 551
%I ACM
%@ 978-1-4503-8313-4
[68]
H. Arnaout, S. Razniewski, G. Weikum, and J. Z. Pan, “Negative Statements Considered Useful,” Journal of Web Semantics, vol. 71, 2021.
Export
BibTeX
@article{Arnaout2021,
TITLE = {Negative Statements Considered Useful},
AUTHOR = {Arnaout, Hiba and Razniewski, Simon and Weikum, Gerhard and Pan, Jeff Z.},
LANGUAGE = {eng},
DOI = {10.1016/j.websem.2021.100661},
PUBLISHER = {Elsevier},
ADDRESS = {Amsterdam},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2021},
JOURNAL = {Journal of Web Semantics},
VOLUME = {71},
EID = {100661},
}
Endnote
%0 Journal Article
%A Arnaout, Hiba
%A Razniewski, Simon
%A Weikum, Gerhard
%A Pan, Jeff Z.
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Negative Statements Considered Useful :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-A586-5
%R 10.1016/j.websem.2021.100661
%7 2021
%D 2021
%J Journal of Web Semantics
%V 71
%Z sequence number: 100661
%I Elsevier
%C Amsterdam
[69]
H. Arnaout, S. Razniewski, G. Weikum, and J. Z. Pan, “Wikinegata: a Knowledge Base with Interesting Negative Statements,” Proceedings of the VLDB Endowment (Proc. VLDB 2021), vol. 14, no. 12, 2021.
Export
BibTeX
@article{Arnaout2021_PVLDB,
TITLE = {Wikinegata: {A} Knowledge Base with Interesting Negative Statements},
AUTHOR = {Arnaout, Hiba and Razniewski, Simon and Weikum, Gerhard and Pan, Jeff Z.},
LANGUAGE = {eng},
PUBLISHER = {VLDB Endowment Inc.},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
JOURNAL = {Proceedings of the VLDB Endowment (Proc. VLDB)},
VOLUME = {14},
NUMBER = {12},
PAGES = {2807--2810},
BOOKTITLE = {Proceedings of the 47th International Conference on Very Large Data Bases (VLDB 2021)},
EDITOR = {Dong, Xin Luna and Naumann, Felix},
}
Endnote
%0 Journal Article
%A Arnaout, Hiba
%A Razniewski, Simon
%A Weikum, Gerhard
%A Pan, Jeff Z.
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Wikinegata: a Knowledge Base with Interesting Negative
Statements :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-6319-C
%7 2021
%D 2021
%J Proceedings of the VLDB Endowment
%O PVLDB
%V 14
%N 12
%& 2807
%P 2807 - 2810
%I VLDB Endowment Inc.
%B Proceedings of the 47th International Conference on Very Large Data Bases
%O VLDB 2021 Copenhagen, Denmark, 16-20 August 2021
[70]
A. B. Biswas, H. Arnaout, and S. Razniewski, “Neguess: Wikidata-entity Guessing Game with Negative Clues,” in Proceedings of the ISWC 2021 Posters, Demos and Industry Tracks (ISWC-Posters-Demos-Industry 2021), Virtual Conference, 2021.
Export
BibTeX
@inproceedings{Biswas_ISWC21,
TITLE = {Neguess: {W}ikidata-entity Guessing Game with Negative Clues},
AUTHOR = {Biswas, Aditya Bikram and Arnaout, Hiba and Razniewski, Simon},
LANGUAGE = {eng},
ISSN = {1613-0073},
URL = {http://ceur-ws.org/Vol-2980/paper350.pdf; urn:nbn:de:0074-2980-6},
PUBLISHER = {CEUR-WS.org},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {Proceedings of the ISWC 2021 Posters, Demos and Industry Tracks (ISWC-Posters-Demos-Industry 2021)},
EDITOR = {Seneviratne, Oshani and Pesquita, Catia and Sequeda, Juan and Etcheverry, Lorena},
EID = {350},
SERIES = {CEUR Workshop Proceedings},
VOLUME = {2980},
ADDRESS = {Virtual Conference},
}
Endnote
%0 Conference Proceedings
%A Biswas, Aditya Bikram
%A Arnaout, Hiba
%A Razniewski, Simon
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Neguess: Wikidata-entity Guessing Game with Negative Clues :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-65AD-3
%U http://ceur-ws.org/Vol-2980/paper350.pdf
%D 2021
%B 20th International Semantic Web Conference
%Z date of event: 2021-10-24 - 2021-10-28
%C Virtual Conference
%B Proceedings of the ISWC 2021 Posters, Demos and Industry Tracks
%E Seneviratne, Oshani; Pesquita, Catia; Sequeda, Juan; Etcheverry, Lorena
%Z sequence number: 350
%I CEUR-WS.org
%B CEUR Workshop Proceedings
%N 2980
%@ false
[71]
K. Budhathoki, M. Boley, and J. Vreeken, “Discovering Reliable Causal Rules,” in Proceedings of the SIAM International Conference on Data Mining (SDM 2021), Virtual Conference, 2021.
Export
BibTeX
@inproceedings{budhathoki:21:dice,
TITLE = {Discovering Reliable Causal Rules},
AUTHOR = {Budhathoki, Kailash and Boley, Mario and Vreeken, Jilles},
LANGUAGE = {eng},
ISBN = {978-1-61197-670-0},
DOI = {10.1137/1.9781611976700.1},
PUBLISHER = {SIAM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {Proceedings of the SIAM International Conference on Data Mining (SDM 2021)},
EDITOR = {Demeniconi, Carlotta and Davidson, Ian},
PAGES = {1--9},
ADDRESS = {Virtual Conference},
}
Endnote
%0 Conference Proceedings
%A Budhathoki, Kailash
%A Boley, Mario
%A Vreeken, Jilles
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
%T Discovering Reliable Causal Rules :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-2571-F
%R 10.1137/1.9781611976700.1
%D 2021
%B SIAM International Conference on Data Mining
%Z date of event: 2021-04-29 - 2021-05-01
%C Virtual Conference
%B Proceedings of the SIAM International Conference on Data Mining
%E Demeniconi, Carlotta; Davidson, Ian
%P 1 - 9
%I SIAM
%@ 978-1-61197-670-0
[72]
E. Chang, X. Shen, D. Zhu, V. Demberg, and H. Su, “Neural Data-to-Text Generation with LM-based Text Augmentation,” in The 16th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2021), Online, 2021.
Export
BibTeX
@inproceedings{chang2021neural,
TITLE = {Neural Data-to-Text Generation with {LM}-based Text Augmentation},
AUTHOR = {Chang, Ernie and Shen, Xiaoyu and Zhu, Dawei and Demberg, Vera and Su, Hui},
LANGUAGE = {eng},
ISBN = {978-1-954085-02-2},
DOI = {10.18653/v1/2021.eacl-main.64},
PUBLISHER = {ACL},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {The 16th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2021)},
EDITOR = {Merlo, Paola},
PAGES = {758--768},
ADDRESS = {Online},
}
Endnote
%0 Conference Proceedings
%A Chang, Ernie
%A Shen, Xiaoyu
%A Zhu, Dawei
%A Demberg, Vera
%A Su, Hui
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
%T Neural Data-to-Text Generation with LM-based Text Augmentation :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-149E-0
%R 10.18653/v1/2021.eacl-main.64
%D 2021
%B 16th Conference of the European Chapter of the Association for Computational Linguistics
%Z date of event: 2021-04-19 - 2021-04-23
%C Online
%B The 16th Conference of the European Chapter of the Association for Computational Linguistics
%E Merlo, Paola
%P 758 - 768
%I ACL
%@ 978-1-954085-02-2
[73]
P. Christmann, R. Saha Roy, and G. Weikum, “Beyond NED: Fast and Effective Search Space Reduction for Complex Question Answering over Knowledge Bases,” 2021. [Online]. Available: https://arxiv.org/abs/2108.08597. (arXiv: 2108.08597)
Abstract
Answering complex questions over knowledge bases (KB-QA) faces huge input<br>data with billions of facts, involving millions of entities and thousands of<br>predicates. For efficiency, QA systems first reduce the answer search space by<br>identifying a set of facts that is likely to contain all answers and relevant<br>cues. The most common technique or doing this is to apply named entity<br>disambiguation (NED) systems to the question, and retrieve KB facts for the<br>disambiguated entities. This work presents CLOCQ, an efficient method that<br>prunes irrelevant parts of the search space using KB-aware signals. CLOCQ uses<br>a top-k query processor over score-ordered lists of KB items that combine<br>signals about lexical matching, relevance to the question, coherence among<br>candidate items, and connectivity in the KB graph. Experiments with two recent<br>QA benchmarks for complex questions demonstrate the superiority of CLOCQ over<br>state-of-the-art baselines with respect to answer presence, size of the search<br>space, and runtimes.<br>
Export
BibTeX
@online{Christmann_2108.08597,
TITLE = {Beyond {NED}: {F}ast and Effective Search Space Reduction for Complex Question Answering over Knowledge Bases},
AUTHOR = {Christmann, Philipp and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2108.08597},
EPRINT = {2108.08597},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Answering complex questions over knowledge bases (KB-QA) faces huge input<br>data with billions of facts, involving millions of entities and thousands of<br>predicates. For efficiency, QA systems first reduce the answer search space by<br>identifying a set of facts that is likely to contain all answers and relevant<br>cues. The most common technique or doing this is to apply named entity<br>disambiguation (NED) systems to the question, and retrieve KB facts for the<br>disambiguated entities. This work presents CLOCQ, an efficient method that<br>prunes irrelevant parts of the search space using KB-aware signals. CLOCQ uses<br>a top-k query processor over score-ordered lists of KB items that combine<br>signals about lexical matching, relevance to the question, coherence among<br>candidate items, and connectivity in the KB graph. Experiments with two recent<br>QA benchmarks for complex questions demonstrate the superiority of CLOCQ over<br>state-of-the-art baselines with respect to answer presence, size of the search<br>space, and runtimes.<br>},
}
Endnote
%0 Report
%A Christmann, Philipp
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Beyond NED: Fast and Effective Search Space Reduction for Complex Question Answering over Knowledge Bases :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-6360-B
%U https://arxiv.org/abs/2108.08597
%D 2021
%X Answering complex questions over knowledge bases (KB-QA) faces huge input<br>data with billions of facts, involving millions of entities and thousands of<br>predicates. For efficiency, QA systems first reduce the answer search space by<br>identifying a set of facts that is likely to contain all answers and relevant<br>cues. The most common technique or doing this is to apply named entity<br>disambiguation (NED) systems to the question, and retrieve KB facts for the<br>disambiguated entities. This work presents CLOCQ, an efficient method that<br>prunes irrelevant parts of the search space using KB-aware signals. CLOCQ uses<br>a top-k query processor over score-ordered lists of KB items that combine<br>signals about lexical matching, relevance to the question, coherence among<br>candidate items, and connectivity in the KB graph. Experiments with two recent<br>QA benchmarks for complex questions demonstrate the superiority of CLOCQ over<br>state-of-the-art baselines with respect to answer presence, size of the search<br>space, and runtimes.<br>
%K Computer Science, Information Retrieval, cs.IR,Computer Science, Computation and Language, cs.CL
[74]
P. Christmann, “CLOCQ: Efficient Search Space Reduction for Complex Question Answering over Knowledge Bases,” Universität des Saarlandes, Saarbrücken, 2021.
Abstract
Commonsense knowledge (CSK) about concepts and their properties is useful for AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB and others compiled large CSK collections, but are restricted in their expressiveness to subject-predicate-object (SPO) triples with simple concepts for S and monolithic strings for P and O. Also, these projects have either prioritized precision or recall, but hardly reconcile these complementary goals. This thesis presents a methodology, called Ascent, to automatically build a large-scale knowledge base (KB) of CSK assertions, with advanced expressiveness and both better precision and recall than prior works. Ascent goes beyond triples by capturing composite concepts with subgroups and aspects, and by refining assertions with semantic facets. The latter are important to express temporal and spatial validity of assertions and further qualifiers. Ascent combines open information extraction with judicious cleaning using language models. Intrinsic evaluation shows the superior size and quality of the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the benefits of Ascent.
Export
BibTeX
@mastersthesis{ChristmannMSc2021,
TITLE = {{CLOCQ}: Efficient Search Space Reduction for Complex Question Answering over Knowledge Bases},
AUTHOR = {Christmann, Philipp},
LANGUAGE = {eng},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2021},
ABSTRACT = {Commonsense knowledge (CSK) about concepts and their properties is useful for AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB and others compiled large CSK collections, but are restricted in their expressiveness to subject-predicate-object (SPO) triples with simple concepts for S and monolithic strings for P and O. Also, these projects have either prioritized precision or recall, but hardly reconcile these complementary goals. This thesis presents a methodology, called Ascent, to automatically build a large-scale knowledge base (KB) of CSK assertions, with advanced expressiveness and both better precision and recall than prior works. Ascent goes beyond triples by capturing composite concepts with subgroups and aspects, and by refining assertions with semantic facets. The latter are important to express temporal and spatial validity of assertions and further qualifiers. Ascent combines open information extraction with judicious cleaning using language models. Intrinsic evaluation shows the superior size and quality of the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the benefits of Ascent.},
}
Endnote
%0 Thesis
%A Christmann, Philipp
%Y Saha Roy, Rishiraj
%A referee: Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T CLOCQ: Efficient Search Space Reduction for Complex Question Answering over Knowledge Bases :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-BEF6-9
%I Universität des Saarlandes
%C Saarbrücken
%D 2021
%P 54 p.
%V master
%9 master
%X Commonsense knowledge (CSK) about concepts and their properties is useful for AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB and others compiled large CSK collections, but are restricted in their expressiveness to subject-predicate-object (SPO) triples with simple concepts for S and monolithic strings for P and O. Also, these projects have either prioritized precision or recall, but hardly reconcile these complementary goals. This thesis presents a methodology, called Ascent, to automatically build a large-scale knowledge base (KB) of CSK assertions, with advanced expressiveness and both better precision and recall than prior works. Ascent goes beyond triples by capturing composite concepts with subgroups and aspects, and by refining assertions with semantic facets. The latter are important to express temporal and spatial validity of assertions and further qualifiers. Ascent combines open information extraction with judicious cleaning using language models. Intrinsic evaluation shows the superior size and quality of the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the benefits of Ascent.
[75]
C. X. Chu, S. Razniewski, and G. Weikum, “KnowFi: Knowledge Extraction from Long Fictional Texts,” in Automated Knowledge Base Construction (AKBC 2021), Virtual Conference, 2021.
Export
BibTeX
@inproceedings{DBLP:conf/akbc/ChuRW21,
TITLE = {{KnowFi}: {K}nowledge Extraction from Long Fictional Texts},
AUTHOR = {Chu, Cuong Xuan and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://openreview.net/forum?id=8smkJ2ekBRC},
PUBLISHER = {OpenReview},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {Automated Knowledge Base Construction (AKBC 2021)},
PAGES = {1--19},
ADDRESS = {Virtual Conference},
}
Endnote
%0 Conference Proceedings
%A Chu, Cuong Xuan
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T KnowFi: Knowledge Extraction from Long Fictional Texts :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-DC15-5
%U https://openreview.net/forum?id=8smkJ2ekBRC
%D 2021
%B 3rd Conference on Automated Knowledge Base Construction
%Z date of event: 2021-10-04 - 2021-10-08
%C Virtual Conference
%B Automated Knowledge Base Construction
%P 1 - 19
%I OpenReview
[76]
D. Dave, V. Anu, and A. S. Varde, “Automating the Classification of Requirements Data,” in IEEE International Conference on Big Data, Orlando, FL, USA (Virtual Event), 2021.
Export
BibTeX
@inproceedings{Dave_BigData21,
TITLE = {Automating the Classification of Requirements Data},
AUTHOR = {Dave, Dev and Anu, Vaibhav and Varde, Aparna S.},
LANGUAGE = {eng},
ISBN = {978-1-6654-3902-2},
DOI = {10.1109/BigData52589.2021.9671548},
PUBLISHER = {IEEE},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {IEEE International Conference on Big Data},
EDITOR = {Chen, Yixin and Ludwig, Heiko and Tu, Yicheng and Fayyad, Usama and Zhu, Xingquan and Xu, Xiaohua and Byna, Suren and Liu, Xiong and Zyhang, Jianping and Pan, Shirui and Papalexakis, Vagelis and Wang, Jianwu and Cuzzocrea, Alfredo and Ordonez, Carlos},
PAGES = {5878--5880},
ADDRESS = {Orlando, FL, USA (Virtual Event)},
}
Endnote
%0 Conference Proceedings
%A Dave, Dev
%A Anu, Vaibhav
%A Varde, Aparna S.
%+ External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Automating the Classification of Requirements Data :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-C562-9
%R 10.1109/BigData52589.2021.9671548
%D 2021
%B IEEE International Conference on Big Data
%Z date of event: 2021-12-15 - 2021-12-18
%C Orlando, FL, USA (Virtual Event)
%B IEEE International Conference on Big Data
%E Chen, Yixin; Ludwig, Heiko; Tu, Yicheng; Fayyad, Usama; Zhu, Xingquan; Xu, Xiaohua; Byna, Suren; Liu, Xiong; Zyhang, Jianping; Pan, Shirui; Papalexakis, Vagelis; Wang, Jianwu; Cuzzocrea, Alfredo; Ordonez, Carlos
%P 5878 - 5880
%I IEEE
%@ 978-1-6654-3902-2
[77]
L. De Stefani, E. Terolli, and E. Upfal, “Tiered Sampling: An Efficient Method for Counting Sparse Motifs in Massive Graph Streams,” ACM Transactions on Knowledge Discovery from Data, vol. 15, no. 5, 2021.
Export
BibTeX
@article{DeStefani2021,
TITLE = {Tiered Sampling: {A}n Efficient Method for Counting Sparse Motifs in Massive Graph Streams},
AUTHOR = {De Stefani, Lorenzo and Terolli, Erisa and Upfal, Eli},
LANGUAGE = {eng},
ISSN = {1556-4681},
DOI = {10.1145/3441299},
PUBLISHER = {ACM},
ADDRESS = {New York, NY},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2021},
JOURNAL = {ACM Transactions on Knowledge Discovery from Data},
VOLUME = {15},
NUMBER = {5},
PAGES = {1--52},
EID = {79},
}
Endnote
%0 Journal Article
%A De Stefani, Lorenzo
%A Terolli, Erisa
%A Upfal, Eli
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Tiered Sampling: An Efficient Method for Counting Sparse Motifs in Massive Graph Streams :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-ED51-2
%R 10.1145/3441299
%7 2021
%D 2021
%J ACM Transactions on Knowledge Discovery from Data
%V 15
%N 5
%& 1
%P 1 - 52
%Z sequence number: 79
%I ACM
%C New York, NY
%@ false
[78]
J. Fischer, F. B. Ardakani, K. Kattler, J. Walter, and M. H. Schulz, “CpG Content-dependent Associations between Transcription Factors and Histone Modifications,” PLoS One, vol. 16, no. 4, 2021.
Export
BibTeX
@article{fischer:21:cpgtfhm,
TITLE = {{CpG} content-dependent associations between transcription factors and histone modifications},
AUTHOR = {Fischer, Jonas and Ardakani, Fatemeh Behjati and Kattler, Kathrin and Walter, J{\"o}rn and Schulz, Marcel Holger},
LANGUAGE = {eng},
ISSN = {1932-6203},
DOI = {10.1371/journal.pone.0249985},
PUBLISHER = {Public Library of Science},
ADDRESS = {San Francisco, CA},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
JOURNAL = {PLoS One},
VOLUME = {16},
NUMBER = {4},
EID = {0249985},
}
Endnote
%0 Journal Article
%A Fischer, Jonas
%A Ardakani, Fatemeh Behjati
%A Kattler, Kathrin
%A Walter, Jörn
%A Schulz, Marcel Holger
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Computational Biology and Applied Algorithmics, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
Computational Biology and Applied Algorithmics, MPI for Informatics, Max Planck Society
%T CpG Content-dependent Associations between Transcription Factors and Histone Modifications :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-5602-5
%R 10.1371/journal.pone.0249985
%7 2021
%D 2021
%J PLoS One
%V 16
%N 4
%Z sequence number: 0249985
%I Public Library of Science
%C San Francisco, CA
%@ false
[79]
J. Fischer, A. Oláh, and J. Vreeken, “What’s in the Box? Exploring the Inner Life of Neural Networks with Robust Rules,” in Proceedings of the 38th International Conference on Machine Learning (ICML 2021), Virtual Event, 2021.
Export
BibTeX
@inproceedings{Fischer_ICML2021,
TITLE = {What's in the Box? {Exploring} the Inner Life of Neural Networks with Robust Rules},
AUTHOR = {Fischer, Jonas and Ol{\'a}h, Anna and Vreeken, Jilles},
LANGUAGE = {eng},
PUBLISHER = {MLR Press},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {Proceedings of the 38th International Conference on Machine Learning (ICML 2021)},
EDITOR = {Meila, Marina and Zhang, Tong},
PAGES = {3352--3362},
EID = {26},
SERIES = {Proceedings of the Machine Learning},
VOLUME = {139},
ADDRESS = {Virtual Event},
}
Endnote
%0 Conference Proceedings
%A Fischer, Jonas
%A Oláh, Anna
%A Vreeken, Jilles
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T What’s in the Box? Exploring the Inner Life of Neural Networks with Robust Rules :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-49F8-E
%D 2021
%B 38th International Conference on Machine Learning
%Z date of event: 2021-07-18 - 2021-07-24
%C Virtual Event
%B Proceedings of the 38th International Conference on Machine Learning
%E Meila, Marina; Zhang, Tong
%P 3352 - 3362
%Z sequence number: 26
%I MLR Press
%B Proceedings of the Machine Learning
%N 139
[80]
J. Fischer and R. Burkholz, “Plant ‘n’ Seek: Can You Find the Winning Ticket?,” 2021. [Online]. Available: https://arxiv.org/abs/2111.11153. (arXiv: 2111.11153)
Abstract
The lottery ticket hypothesis has sparked the rapid development of pruning<br>algorithms that perform structure learning by identifying a sparse subnetwork<br>of a large randomly initialized neural network. The existence of such 'winning<br>tickets' has been proven theoretically but at suboptimal sparsity levels.<br>Contemporary pruning algorithms have furthermore been struggling to identify<br>sparse lottery tickets for complex learning tasks. Is this suboptimal sparsity<br>merely an artifact of existence proofs and algorithms or a general limitation<br>of the pruning approach? And, if very sparse tickets exist, are current<br>algorithms able to find them or are further improvements needed to achieve<br>effective network compression? To answer these questions systematically, we<br>derive a framework to plant and hide target architectures within large randomly<br>initialized neural networks. For three common challenges in machine learning,<br>we hand-craft extremely sparse network topologies, plant them in large neural<br>networks, and evaluate state-of-the-art lottery ticket pruning methods. We find<br>that current limitations of pruning algorithms to identify extremely sparse<br>tickets are likely of algorithmic rather than fundamental nature and anticipate<br>that our planting framework will facilitate future developments of efficient<br>pruning algorithms, as we have addressed the issue of missing baselines in the<br>field raised by Frankle et al.<br>
Export
BibTeX
@online{FischerarXiv2111.11153,
TITLE = {Plant 'n' Seek: Can You Find the Winning Ticket?},
AUTHOR = {Fischer, Jonas and Burkholz, Rebekka},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2111.11153},
EPRINT = {2111.11153},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {The lottery ticket hypothesis has sparked the rapid development of pruning<br>algorithms that perform structure learning by identifying a sparse subnetwork<br>of a large randomly initialized neural network. The existence of such 'winning<br>tickets' has been proven theoretically but at suboptimal sparsity levels.<br>Contemporary pruning algorithms have furthermore been struggling to identify<br>sparse lottery tickets for complex learning tasks. Is this suboptimal sparsity<br>merely an artifact of existence proofs and algorithms or a general limitation<br>of the pruning approach? And, if very sparse tickets exist, are current<br>algorithms able to find them or are further improvements needed to achieve<br>effective network compression? To answer these questions systematically, we<br>derive a framework to plant and hide target architectures within large randomly<br>initialized neural networks. For three common challenges in machine learning,<br>we hand-craft extremely sparse network topologies, plant them in large neural<br>networks, and evaluate state-of-the-art lottery ticket pruning methods. We find<br>that current limitations of pruning algorithms to identify extremely sparse<br>tickets are likely of algorithmic rather than fundamental nature and anticipate<br>that our planting framework will facilitate future developments of efficient<br>pruning algorithms, as we have addressed the issue of missing baselines in the<br>field raised by Frankle et al.<br>},
}
Endnote
%0 Report
%A Fischer, Jonas
%A Burkholz, Rebekka
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Plant 'n' Seek: Can You Find the Winning Ticket? :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-B124-6
%U https://arxiv.org/abs/2111.11153
%D 2021
%X The lottery ticket hypothesis has sparked the rapid development of pruning<br>algorithms that perform structure learning by identifying a sparse subnetwork<br>of a large randomly initialized neural network. The existence of such 'winning<br>tickets' has been proven theoretically but at suboptimal sparsity levels.<br>Contemporary pruning algorithms have furthermore been struggling to identify<br>sparse lottery tickets for complex learning tasks. Is this suboptimal sparsity<br>merely an artifact of existence proofs and algorithms or a general limitation<br>of the pruning approach? And, if very sparse tickets exist, are current<br>algorithms able to find them or are further improvements needed to achieve<br>effective network compression? To answer these questions systematically, we<br>derive a framework to plant and hide target architectures within large randomly<br>initialized neural networks. For three common challenges in machine learning,<br>we hand-craft extremely sparse network topologies, plant them in large neural<br>networks, and evaluate state-of-the-art lottery ticket pruning methods. We find<br>that current limitations of pruning algorithms to identify extremely sparse<br>tickets are likely of algorithmic rather than fundamental nature and anticipate<br>that our planting framework will facilitate future developments of efficient<br>pruning algorithms, as we have addressed the issue of missing baselines in the<br>field raised by Frankle et al.<br>
%K Computer Science, Learning, cs.LG,Computer Science, Artificial Intelligence, cs.AI,Statistics, Machine Learning, stat.ML
[81]
J. Fischer and R. Burkholz, “Towards Strong Pruning for Lottery Tickets with Non-Zero Biases,” 2021. [Online]. Available: https://arxiv.org/abs/2110.11150. (arXiv: 2110.11150)
Abstract
The strong lottery ticket hypothesis holds the promise that pruning randomly<br>initialized deep neural networks could offer a computationally efficient<br>alternative to deep learning with stochastic gradient descent. Common parameter<br>initialization schemes and existence proofs, however, are focused on networks<br>with zero biases, thus foregoing the potential universal approximation property<br>of pruning. To fill this gap, we extend multiple initialization schemes and<br>existence proofs to non-zero biases, including explicit 'looks-linear'<br>approaches for ReLU activation functions. These do not only enable truly<br>orthogonal parameter initialization but also reduce potential pruning errors.<br>In experiments on standard benchmark data sets, we further highlight the<br>practical benefits of non-zero bias initialization schemes, and present<br>theoretically inspired extensions for state-of-the-art strong lottery ticket<br>pruning.<br>
Export
BibTeX
@online{Fischer_arXiv2110.11150,
TITLE = {Towards Strong Pruning for Lottery Tickets with Non-Zero Biases},
AUTHOR = {Fischer, Jonas and Burkholz, Rebekka},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2110.11150},
EPRINT = {2110.11150},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {The strong lottery ticket hypothesis holds the promise that pruning randomly<br>initialized deep neural networks could offer a computationally efficient<br>alternative to deep learning with stochastic gradient descent. Common parameter<br>initialization schemes and existence proofs, however, are focused on networks<br>with zero biases, thus foregoing the potential universal approximation property<br>of pruning. To fill this gap, we extend multiple initialization schemes and<br>existence proofs to non-zero biases, including explicit 'looks-linear'<br>approaches for ReLU activation functions. These do not only enable truly<br>orthogonal parameter initialization but also reduce potential pruning errors.<br>In experiments on standard benchmark data sets, we further highlight the<br>practical benefits of non-zero bias initialization schemes, and present<br>theoretically inspired extensions for state-of-the-art strong lottery ticket<br>pruning.<br>},
}
Endnote
%0 Report
%A Fischer, Jonas
%A Burkholz, Rebekka
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Towards Strong Pruning for Lottery Tickets with Non-Zero Biases :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-B12A-0
%U https://arxiv.org/abs/2110.11150
%D 2021
%X The strong lottery ticket hypothesis holds the promise that pruning randomly<br>initialized deep neural networks could offer a computationally efficient<br>alternative to deep learning with stochastic gradient descent. Common parameter<br>initialization schemes and existence proofs, however, are focused on networks<br>with zero biases, thus foregoing the potential universal approximation property<br>of pruning. To fill this gap, we extend multiple initialization schemes and<br>existence proofs to non-zero biases, including explicit 'looks-linear'<br>approaches for ReLU activation functions. These do not only enable truly<br>orthogonal parameter initialization but also reduce potential pruning errors.<br>In experiments on standard benchmark data sets, we further highlight the<br>practical benefits of non-zero bias initialization schemes, and present<br>theoretically inspired extensions for state-of-the-art strong lottery ticket<br>pruning.<br>
%K Computer Science, Learning, cs.LG,Computer Science, Artificial Intelligence, cs.AI
[82]
J. Fischer and J. Vreeken, “Differentiable Pattern Set Mining,” in KDD ’21, 27th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, Virtual Event, Singapore, 2021.
Export
BibTeX
@inproceedings{Fischer_KDD2021,
TITLE = {Differentiable Pattern Set Mining},
AUTHOR = {Fischer, Jonas and Vreeken, Jilles},
LANGUAGE = {eng},
ISBN = {978-1-4503-8332-5},
DOI = {10.1145/3447548.3467348},
PUBLISHER = {ACM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {KDD '21, 27th ACM SIGKDD Conference on Knowledge Discovery and Data Mining},
EDITOR = {Zhu, Fieda and Ooi, Beng Chin and Miao, Chunyan and Cong, Gao and Tang, Jiliang and Derr, Tyler},
PAGES = {383--392},
ADDRESS = {Virtual Event, Singapore},
}
Endnote
%0 Conference Proceedings
%A Fischer, Jonas
%A Vreeken, Jilles
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Differentiable Pattern Set Mining :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-652F-2
%R 10.1145/3447548.3467348
%D 2021
%B 27th ACM SIGKDD Conference on Knowledge Discovery and Data Mining
%Z date of event: 2021-08-14 - 2021-08-18
%C Virtual Event, Singapore
%B KDD '21
%E Zhu, Fieda; Ooi, Beng Chin; Miao, Chunyan; Cong, Gao; Tang, Jiliang; Derr, Tyler
%P 383 - 392
%I ACM
%@ 978-1-4503-8332-5
[83]
M. H. Gad-Elrab, “Explainable Methods for Knowledge Graph Refinement and Exploration via Symbolic Reasoning,” Universität des Saarlandes, Saarbrücken, 2021.
Abstract
Knowledge Graphs (KGs) have applications in many domains such as Finance, Manufacturing, and Healthcare. While recent efforts have created large KGs, their content is far from complete and sometimes includes invalid statements. Therefore, it is crucial to refine the constructed KGs to enhance their coverage and accuracy via KG completion and KG validation. It is also vital to provide human-comprehensible explanations for such refinements, so that humans have trust in the KG quality. Enabling KG exploration, by search and browsing, is also essential for users to understand the KG value and limitations towards down-stream applications. However, the large size of KGs makes KG exploration very challenging. While the type taxonomy of KGs is a useful asset along these lines, it remains insufficient for deep exploration. In this dissertation we tackle the aforementioned challenges of KG refinement and KG exploration by combining logical reasoning over the KG with other techniques such as KG embedding models and text mining. Through such combination, we introduce methods that provide human-understandable output. Concretely, we introduce methods to tackle KG incompleteness by learning exception-aware rules over the existing KG. Learned rules are then used in inferring missing links in the KG accurately. Furthermore, we propose a framework for constructing human-comprehensible explanations for candidate facts from both KG and text. Extracted explanations are used to insure the validity of KG facts. Finally, to facilitate KG exploration, we introduce a method that combines KG embeddings with rule mining to compute informative entity clusters with explanations.
Export
BibTeX
@phdthesis{Elrabphd2021,
TITLE = {Explainable Methods for Knowledge Graph Refinement and Exploration via Symbolic Reasoning},
AUTHOR = {Gad-Elrab, Mohamed Hassan},
LANGUAGE = {eng},
URL = {urn:nbn:de:bsz:291--ds-344237},
DOI = {10.22028/D291-34423},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2021},
ABSTRACT = {Knowledge Graphs (KGs) have applications in many domains such as Finance, Manufacturing, and Healthcare. While recent efforts have created large KGs, their content is far from complete and sometimes includes invalid statements. Therefore, it is crucial to refine the constructed KGs to enhance their coverage and accuracy via KG completion and KG validation. It is also vital to provide human-comprehensible explanations for such refinements, so that humans have trust in the KG quality. Enabling KG exploration, by search and browsing, is also essential for users to understand the KG value and limitations towards down-stream applications. However, the large size of KGs makes KG exploration very challenging. While the type taxonomy of KGs is a useful asset along these lines, it remains insufficient for deep exploration. In this dissertation we tackle the aforementioned challenges of KG refinement and KG exploration by combining logical reasoning over the KG with other techniques such as KG embedding models and text mining. Through such combination, we introduce methods that provide human-understandable output. Concretely, we introduce methods to tackle KG incompleteness by learning exception-aware rules over the existing KG. Learned rules are then used in inferring missing links in the KG accurately. Furthermore, we propose a framework for constructing human-comprehensible explanations for candidate facts from both KG and text. Extracted explanations are used to insure the validity of KG facts. Finally, to facilitate KG exploration, we introduce a method that combines KG embeddings with rule mining to compute informative entity clusters with explanations.},
}
Endnote
%0 Thesis
%A Gad-Elrab, Mohamed Hassan
%Y Weikum, Gerhard
%A referee: Theobald, Martin
%A referee: Stepanova, Daria
%A referee: Razniewski, Simon
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
International Max Planck Research School, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Explainable Methods for Knowledge Graph Refinement and Exploration via Symbolic Reasoning :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-427E-0
%R 10.22028/D291-34423
%U urn:nbn:de:bsz:291--ds-344237
%F OTHER: hdl:20.500.11880/31629
%I Universität des Saarlandes
%C Saarbrücken
%D 2021
%P 176 p.
%V phd
%9 phd
%X Knowledge Graphs (KGs) have applications in many domains such as Finance, Manufacturing, and Healthcare. While recent efforts have created large KGs, their content is far from complete and sometimes includes invalid statements. Therefore, it is crucial to refine the constructed KGs to enhance their coverage and accuracy via KG completion and KG validation. It is also vital to provide human-comprehensible explanations for such refinements, so that humans have trust in the KG quality. Enabling KG exploration, by search and browsing, is also essential for users to understand the KG value and limitations towards down-stream applications. However, the large size of KGs makes KG exploration very challenging. While the type taxonomy of KGs is a useful asset along these lines, it remains insufficient for deep exploration. In this dissertation we tackle the aforementioned challenges of KG refinement and KG exploration by combining logical reasoning over the KG with other techniques such as KG embedding models and text mining. Through such combination, we introduce methods that provide human-understandable output. Concretely, we introduce methods to tackle KG incompleteness by learning exception-aware rules over the existing KG. Learned rules are then used in inferring missing links in the KG accurately. Furthermore, we propose a framework for constructing human-comprehensible explanations for candidate facts from both KG and text. Extracted explanations are used to insure the validity of KG facts. Finally, to facilitate KG exploration, we introduce a method that combines KG embeddings with rule mining to compute informative entity clusters with explanations.
%K knowledge graphs
symbolic learning
embedding models
rule learning
Big Data
%U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/31629
[84]
A. Ghazimatin, “Enhancing Explainability and Scrutability of Recommender Systems,” Universität des Saarlandes, Saarbrücken, 2021.
Abstract
Our increasing reliance on complex algorithms for recommendations calls for models and methods for explainable, scrutable, and trustworthy AI. While explainability is required for understanding the relationships between model inputs and outputs, a scrutable system allows us to modify its behavior as desired. These properties help bridge the gap between our expectations and the algorithm’s behavior and accordingly boost our trust in AI. Aiming to cope with information overload, recommender systems play a crucial role in filtering content (such as products, news, songs, and movies) and shaping a personalized experience for their users. Consequently, there has been a growing demand from the information consumers to receive proper explanations for their personalized recommendations. These explanations aim at helping users understand why certain items are recommended to them and how their previous inputs to the system relate to the generation of such recommendations. Besides, in the event of receiving undesirable content, explanations could possibly contain valuable information as to how the system’s behavior can be modified accordingly. In this thesis, we present our contributions towards explainability and scrutability of recommender systems: • We introduce a user-centric framework, FAIRY, for discovering and ranking post-hoc explanations for the social feeds generated by black-box platforms. These explanations reveal relationships between users’ profiles and their feed items and are extracted from the local interaction graphs of users. FAIRY employs a learning-to-rank (LTR) method to score candidate explanations based on their relevance and surprisal. • We propose a method, PRINCE, to facilitate provider-side explainability in graph-based recommender systems that use personalized PageRank at their core. PRINCE explanations are comprehensible for users, because they present subsets of the user’s prior actions responsible for the received recommendations. PRINCE operates in a counterfactual setup and builds on a polynomial-time algorithm for finding the smallest counterfactual explanations. • We propose a human-in-the-loop framework, ELIXIR, for enhancing scrutability and subsequently the recommendation models by leveraging user feedback on explanations. ELIXIR enables recommender systems to collect user feedback on pairs of recommendations and explanations. The feedback is incorporated into the model by imposing a soft constraint for learning user-specific item representations. We evaluate all proposed models and methods with real user studies and demonstrate their benefits at achieving explainability and scrutability in recommender systems.
Export
BibTeX
@phdthesis{Ghazphd2021,
TITLE = {Enhancing Explainability and Scrutability of Recommender Systems},
AUTHOR = {Ghazimatin, Azin},
LANGUAGE = {eng},
URL = {nbn:de:bsz:291--ds-355166},
DOI = {10.22028/D291-35516},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2021},
ABSTRACT = {Our increasing reliance on complex algorithms for recommendations calls for models and methods for explainable, scrutable, and trustworthy AI. While explainability is required for understanding the relationships between model inputs and outputs, a scrutable system allows us to modify its behavior as desired. These properties help bridge the gap between our expectations and the algorithm{\textquoteright}s behavior and accordingly boost our trust in AI. Aiming to cope with information overload, recommender systems play a crucial role in {fi}ltering content (such as products, news, songs, and movies) and shaping a personalized experience for their users. Consequently, there has been a growing demand from the information consumers to receive proper explanations for their personalized recommendations. These explanations aim at helping users understand why certain items are recommended to them and how their previous inputs to the system relate to the generation of such recommendations. Besides, in the event of receiving undesirable content, explanations could possibly contain valuable information as to how the system{\textquoteright}s behavior can be modi{fi}ed accordingly. In this thesis, we present our contributions towards explainability and scrutability of recommender systems: \mbox{$\bullet$} We introduce a user-centric framework, FAIRY, for discovering and ranking post-hoc explanations for the social feeds generated by black-box platforms. These explanations reveal relationships between users{\textquoteright} pro{fi}les and their feed items and are extracted from the local interaction graphs of users. FAIRY employs a learning-to-rank (LTR) method to score candidate explanations based on their relevance and surprisal. \mbox{$\bullet$} We propose a method, PRINCE, to facilitate provider-side explainability in graph-based recommender systems that use personalized PageRank at their core. PRINCE explanations are comprehensible for users, because they present subsets of the user{\textquoteright}s prior actions responsible for the received recommendations. PRINCE operates in a counterfactual setup and builds on a polynomial-time algorithm for {fi}nding the smallest counterfactual explanations. \mbox{$\bullet$} We propose a human-in-the-loop framework, ELIXIR, for enhancing scrutability and subsequently the recommendation models by leveraging user feedback on explanations. ELIXIR enables recommender systems to collect user feedback on pairs of recommendations and explanations. The feedback is incorporated into the model by imposing a soft constraint for learning user-speci{fi}c item representations. We evaluate all proposed models and methods with real user studies and demonstrate their bene{fi}ts at achieving explainability and scrutability in recommender systems.},
}
Endnote
%0 Thesis
%A Ghazimatin, Azin
%Y Weikum, Gerhard
%A referee: Saha Roy, Rishiraj
%A referee: Amer-Yahia, Sihem
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
International Max Planck Research School, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Enhancing Explainability and Scrutability of Recommender Systems :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-3C99-7
%R 10.22028/D291-35516
%U nbn:de:bsz:291--ds-355166
%F OTHER: hdl:20.500.11880/32590
%I Universität des Saarlandes
%C Saarbrücken
%D 2021
%P 136 p.
%V phd
%9 phd
%X Our increasing reliance on complex algorithms for recommendations calls for models and methods for explainable, scrutable, and trustworthy AI. While explainability is required for understanding the relationships between model inputs and outputs, a scrutable system allows us to modify its behavior as desired. These properties help bridge the gap between our expectations and the algorithm’s behavior and accordingly boost our trust in AI. Aiming to cope with information overload, recommender systems play a crucial role in filtering content (such as products, news, songs, and movies) and shaping a personalized experience for their users. Consequently, there has been a growing demand from the information consumers to receive proper explanations for their personalized recommendations. These explanations aim at helping users understand why certain items are recommended to them and how their previous inputs to the system relate to the generation of such recommendations. Besides, in the event of receiving undesirable content, explanations could possibly contain valuable information as to how the system’s behavior can be modified accordingly. In this thesis, we present our contributions towards explainability and scrutability of recommender systems: • We introduce a user-centric framework, FAIRY, for discovering and ranking post-hoc explanations for the social feeds generated by black-box platforms. These explanations reveal relationships between users’ profiles and their feed items and are extracted from the local interaction graphs of users. FAIRY employs a learning-to-rank (LTR) method to score candidate explanations based on their relevance and surprisal. • We propose a method, PRINCE, to facilitate provider-side explainability in graph-based recommender systems that use personalized PageRank at their core. PRINCE explanations are comprehensible for users, because they present subsets of the user’s prior actions responsible for the received recommendations. PRINCE operates in a counterfactual setup and builds on a polynomial-time algorithm for finding the smallest counterfactual explanations. • We propose a human-in-the-loop framework, ELIXIR, for enhancing scrutability and subsequently the recommendation models by leveraging user feedback on explanations. ELIXIR enables recommender systems to collect user feedback on pairs of recommendations and explanations. The feedback is incorporated into the model by imposing a soft constraint for learning user-specific item representations. We evaluate all proposed models and methods with real user studies and demonstrate their benefits at achieving explainability and scrutability in recommender systems.
%U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/32590
[85]
A. Ghazimatin, S. Pramanik, R. Saha Roy, and G. Weikum, “ELIXIR: Learning from User Feedback on Explanations to Improve Recommender Models,” 2021. [Online]. Available: https://arxiv.org/abs/2102.09388. (arXiv: 2102.09388)
Abstract
System-provided explanations for recommendations are an important component<br>towards transparent and trustworthy AI. In state-of-the-art research, this is a<br>one-way signal, though, to improve user acceptance. In this paper, we turn the<br>role of explanations around and investigate how they can contribute to<br>enhancing the quality of generated recommendations themselves. We devise a<br>human-in-the-loop framework, called ELIXIR, where user feedback on explanations<br>is leveraged for pairwise learning of user preferences. ELIXIR leverages<br>feedback on pairs of recommendations and explanations to learn user-specific<br>latent preference vectors, overcoming sparseness by label propagation with<br>item-similarity-based neighborhoods. Our framework is instantiated using<br>generalized graph recommendation via Random Walk with Restart. Insightful<br>experiments with a real user study show significant improvements in movie and<br>book recommendations over item-level feedback.<br>
Export
BibTeX
@online{Ghazimatin_2102.09388,
TITLE = {{ELIXIR}: {L}earning from User Feedback on Explanations to Improve Recommender Models},
AUTHOR = {Ghazimatin, Azin and Pramanik, Soumajit and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2102.09388},
EPRINT = {2102.09388},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {System-provided explanations for recommendations are an important component<br>towards transparent and trustworthy AI. In state-of-the-art research, this is a<br>one-way signal, though, to improve user acceptance. In this paper, we turn the<br>role of explanations around and investigate how they can contribute to<br>enhancing the quality of generated recommendations themselves. We devise a<br>human-in-the-loop framework, called ELIXIR, where user feedback on explanations<br>is leveraged for pairwise learning of user preferences. ELIXIR leverages<br>feedback on pairs of recommendations and explanations to learn user-specific<br>latent preference vectors, overcoming sparseness by label propagation with<br>item-similarity-based neighborhoods. Our framework is instantiated using<br>generalized graph recommendation via Random Walk with Restart. Insightful<br>experiments with a real user study show significant improvements in movie and<br>book recommendations over item-level feedback.<br>},
}
Endnote
%0 Report
%A Ghazimatin, Azin
%A Pramanik, Soumajit
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T ELIXIR: Learning from User Feedback on Explanations to Improve
Recommender Models :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-0309-B
%U https://arxiv.org/abs/2102.09388
%D 2021
%X System-provided explanations for recommendations are an important component<br>towards transparent and trustworthy AI. In state-of-the-art research, this is a<br>one-way signal, though, to improve user acceptance. In this paper, we turn the<br>role of explanations around and investigate how they can contribute to<br>enhancing the quality of generated recommendations themselves. We devise a<br>human-in-the-loop framework, called ELIXIR, where user feedback on explanations<br>is leveraged for pairwise learning of user preferences. ELIXIR leverages<br>feedback on pairs of recommendations and explanations to learn user-specific<br>latent preference vectors, overcoming sparseness by label propagation with<br>item-similarity-based neighborhoods. Our framework is instantiated using<br>generalized graph recommendation via Random Walk with Restart. Insightful<br>experiments with a real user study show significant improvements in movie and<br>book recommendations over item-level feedback.<br>
%K Computer Science, Information Retrieval, cs.IR,Computer Science, Artificial Intelligence, cs.AI,Computer Science, Learning, cs.LG
[86]
A. Ghazimatin, S. Pramanik, R. Saha Roy, and G. Weikum, “ELIXIR: Learning from User Feedback on Explanations to Improve Recommender Models,” in The Web Conference 2021 (WWW 2021), Ljubljana, Slovenia, 2021.
Export
BibTeX
@inproceedings{Ghazimatin_WWW21,
TITLE = {{ELIXIR}: {L}earning from User Feedback on Explanations to Improve Recommender Models},
AUTHOR = {Ghazimatin, Azin and Pramanik, Soumajit and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-8312-7},
DOI = {10.1145/3442381.3449848},
PUBLISHER = {ACM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {The Web Conference 2021 (WWW 2021)},
EDITOR = {Leskovec, Jure and Grobelnik, Marko and Najork, Marc and Tang, Jie and Zia, Leila},
PAGES = {3850--3860},
ADDRESS = {Ljubljana, Slovenia},
}
Endnote
%0 Conference Proceedings
%A Ghazimatin, Azin
%A Pramanik, Soumajit
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T ELIXIR: Learning from User Feedback on Explanations to Improve Recommender Models :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-0303-1
%R 10.1145/3442381.3449848
%D 2021
%B 30th The Web Conference
%Z date of event: 2021-04-19 - 2021-04-23
%C Ljubljana, Slovenia
%B The Web Conference 2021
%E Leskovec, Jure; Grobelnik, Marko; Najork, Marc; Tang, Jie; Zia, Leila
%P 3850 - 3860
%I ACM
%@ 978-1-4503-8312-7
[87]
B. Gonzalez-Moodie, S. Daiek, J. Lorenzo-Trueba, and A. S. Varde, “Multispectral Drone Data Analysis on Coastal Dunes,” in IEEE International Conference on Big Data, Orlando, FL, USA (Virtual Event), 2021.
Export
BibTeX
@inproceedings{Gonzalez-Moodie_BigData21,
TITLE = {Multispectral Drone Data Analysis on Coastal Dunes},
AUTHOR = {Gonzalez-Moodie, Britnie and Daiek, Shane and Lorenzo-Trueba, Jorge and Varde, Aparna S.},
LANGUAGE = {eng},
ISBN = {978-1-6654-3902-2},
DOI = {10.1109/BigData52589.2021.9671340},
PUBLISHER = {IEEE},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {IEEE International Conference on Big Data},
EDITOR = {Chen, Yixin and Ludwig, Heiko and Tu, Yicheng and Fayyad, Usama and Zhu, Xingquan and Xu, Xiaohua and Byna, Suren and Liu, Xiong and Zyhang, Jianping and Pan, Shirui and Papalexakis, Vagelis and Wang, Jianwu and Cuzzocrea, Alfredo and Ordonez, Carlos},
PAGES = {5903--5905},
ADDRESS = {Orlando, FL, USA (Virtual Event)},
}
Endnote
%0 Conference Proceedings
%A Gonzalez-Moodie, Britnie
%A Daiek, Shane
%A Lorenzo-Trueba, Jorge
%A Varde, Aparna S.
%+ External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Multispectral Drone Data Analysis on Coastal Dunes :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-C6F1-6
%R 10.1109/BigData52589.2021.9671340
%D 2021
%B IEEE International Conference on Big Data
%Z date of event: 2021-12-15 - 2021-12-18
%C Orlando, FL, USA (Virtual Event)
%B IEEE International Conference on Big Data
%E Chen, Yixin; Ludwig, Heiko; Tu, Yicheng; Fayyad, Usama; Zhu, Xingquan; Xu, Xiaohua; Byna, Suren; Liu, Xiong; Zyhang, Jianping; Pan, Shirui; Papalexakis, Vagelis; Wang, Jianwu; Cuzzocrea, Alfredo; Ordonez, Carlos
%P 5903 - 5905
%I IEEE
%@ 978-1-6654-3902-2
[88]
A. Guimarães and G. Weikum, “X-Posts Explained: Analyzing and Predicting Controversial Contributions in Thematically Diverse Reddit Forums,” in Proceedings of the Fifteenth International Conference on Web and Social Media (ICWSM 2021), Atlanta, GA, USA, 2021.
Export
BibTeX
@inproceedings{Guimaraes_ICWSM2021,
TITLE = {X-Posts Explained: {A}nalyzing and Predicting Controversial Contributions in Thematically Diverse {R}eddit Forums},
AUTHOR = {Guimar{\~a}es, Anna and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-57735-869-5},
URL = {https://ojs.aaai.org/index.php/ICWSM/article/view/18050},
PUBLISHER = {AAAI},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {Proceedings of the Fifteenth International Conference on Web and Social Media (ICWSM 2021)},
PAGES = {163--172},
ADDRESS = {Atlanta, GA, USA},
}
Endnote
%0 Conference Proceedings
%A Guimarães, Anna
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T X-Posts Explained: Analyzing and Predicting Controversial Contributions in Thematically Diverse Reddit Forums :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-0345-7
%U https://ojs.aaai.org/index.php/ICWSM/article/view/18050
%D 2021
%B 15th International Conference on Web and Social Media
%Z date of event: 2021-06-07 - 2021-06-10
%C Atlanta, GA, USA
%B Proceedings of the Fifteenth International Conference on Web and Social Media
%P 163 - 172
%I AAAI
%@ 978-1-57735-869-5
%U https://ojs.aaai.org/index.php/ICWSM/article/view/18050/17853
[89]
A. Guimarães, E. Terolli, and G. Weikum, “Comparing Health Forums: User Engagement, Salient Entities, Medical Detail,” in CSCW ’21 Companion, Virtual Event, USA, 2021.
Export
BibTeX
@inproceedings{Guimaraes21,
TITLE = {Comparing Health Forums: {U}ser Engagement, Salient Entities, Medical Detail},
AUTHOR = {Guimar{\~a}es, Anna and Terolli, Erisa and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-8479-7},
DOI = {10.1145/3462204.3481748},
PUBLISHER = {ACM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {CSCW '21 Companion},
EDITOR = {Ding, Sharon and Fussell, Susan and Monroy-Hern{\'a}ndez, Andr{\'e}s and Munson, Sean and Shklovski, Irina and Naaman, Mor},
PAGES = {57--61},
ADDRESS = {Virtual Event, USA},
}
Endnote
%0 Conference Proceedings
%A Guimarães, Anna
%A Terolli, Erisa
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Comparing Health Forums: User Engagement, Salient Entities, Medical Detail :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-BDA5-7
%R 10.1145/3462204.3481748
%D 2021
%B 24th ACM Conference on Computer-Supported Cooperative Work and Social Computing
%Z date of event: 2021-10-23 - 2021-10-27
%C Virtual Event, USA
%B CSCW '21 Companion
%E Ding, Sharon; Fussell, Susan; Monroy-Hernández, Andrés; Munson, Sean; Shklovski, Irina; Naaman, Mor
%P 57 - 61
%I ACM
%@ 978-1-4503-8479-7
[90]
M. Hedderich, J. Fischer, D. Klakow, and J. Vreeken, “Label-Descriptive Patterns and their Application to Characterizing Classification Errors,” 2021. [Online]. Available: https://arxiv.org/abs/2110.09599. (arXiv: 2110.09599)
Abstract
State-of-the-art deep learning methods achieve human-like performance on many<br>tasks, but make errors nevertheless. Characterizing these errors in easily<br>interpretable terms gives insight into whether a model is prone to making<br>systematic errors, but also gives a way to act and improve the model. In this<br>paper we propose a method that allows us to do so for arbitrary classifiers by<br>mining a small set of patterns that together succinctly describe the input data<br>that is partitioned according to correctness of prediction. We show this is an<br>instance of the more general label description problem, which we formulate in<br>terms of the Minimum Description Length principle. To discover good pattern<br>sets we propose the efficient and hyperparameter-free Premise algorithm, which<br>through an extensive set of experiments we show on both synthetic and<br>real-world data performs very well in practice; unlike existing solutions it<br>ably recovers ground truth patterns, even on highly imbalanced data over many<br>unique items, or where patterns are only weakly associated to labels. Through<br>two real-world case studies we confirm that Premise gives clear and actionable<br>insight into the systematic errors made by modern NLP classifiers.<br>
Export
BibTeX
@online{Hedderich_arXiv2110.09599,
TITLE = {Label-Descriptive Patterns and their Application to Characterizing Classification Errors},
AUTHOR = {Hedderich, Michael and Fischer, Jonas and Klakow, Dietrich and Vreeken, Jilles},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2110.09599},
EPRINT = {2110.09599},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {State-of-the-art deep learning methods achieve human-like performance on many<br>tasks, but make errors nevertheless. Characterizing these errors in easily<br>interpretable terms gives insight into whether a model is prone to making<br>systematic errors, but also gives a way to act and improve the model. In this<br>paper we propose a method that allows us to do so for arbitrary classifiers by<br>mining a small set of patterns that together succinctly describe the input data<br>that is partitioned according to correctness of prediction. We show this is an<br>instance of the more general label description problem, which we formulate in<br>terms of the Minimum Description Length principle. To discover good pattern<br>sets we propose the efficient and hyperparameter-free Premise algorithm, which<br>through an extensive set of experiments we show on both synthetic and<br>real-world data performs very well in practice; unlike existing solutions it<br>ably recovers ground truth patterns, even on highly imbalanced data over many<br>unique items, or where patterns are only weakly associated to labels. Through<br>two real-world case studies we confirm that Premise gives clear and actionable<br>insight into the systematic errors made by modern NLP classifiers.<br>},
}
Endnote
%0 Report
%A Hedderich, Michael
%A Fischer, Jonas
%A Klakow, Dietrich
%A Vreeken, Jilles
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
%T Label-Descriptive Patterns and their Application to Characterizing
Classification Errors :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-B127-3
%U https://arxiv.org/abs/2110.09599
%D 2021
%X State-of-the-art deep learning methods achieve human-like performance on many<br>tasks, but make errors nevertheless. Characterizing these errors in easily<br>interpretable terms gives insight into whether a model is prone to making<br>systematic errors, but also gives a way to act and improve the model. In this<br>paper we propose a method that allows us to do so for arbitrary classifiers by<br>mining a small set of patterns that together succinctly describe the input data<br>that is partitioned according to correctness of prediction. We show this is an<br>instance of the more general label description problem, which we formulate in<br>terms of the Minimum Description Length principle. To discover good pattern<br>sets we propose the efficient and hyperparameter-free Premise algorithm, which<br>through an extensive set of experiments we show on both synthetic and<br>real-world data performs very well in practice; unlike existing solutions it<br>ably recovers ground truth patterns, even on highly imbalanced data over many<br>unique items, or where patterns are only weakly associated to labels. Through<br>two real-world case studies we confirm that Premise gives clear and actionable<br>insight into the systematic errors made by modern NLP classifiers.<br>
%K Computer Science, Learning, cs.LG,Computer Science, Computation and Language, cs.CL
[91]
E. Heiter, J. Fischer, and J. Vreeken, “Factoring Out Prior Knowledge from Low-dimensional Embeddings,” 2021. [Online]. Available: https://arxiv.org/abs/2103.01828. (arXiv: 2103.01828)
Abstract
Low-dimensional embedding techniques such as tSNE and UMAP allow visualizing<br>high-dimensional data and therewith facilitate the discovery of interesting<br>structure. Although they are widely used, they visualize data as is, rather<br>than in light of the background knowledge we have about the data. What we<br>already know, however, strongly determines what is novel and hence interesting.<br>In this paper we propose two methods for factoring out prior knowledge in the<br>form of distance matrices from low-dimensional embeddings. To factor out prior<br>knowledge from tSNE embeddings, we propose JEDI that adapts the tSNE objective<br>in a principled way using Jensen-Shannon divergence. To factor out prior<br>knowledge from any downstream embedding approach, we propose CONFETTI, in which<br>we directly operate on the input distance matrices. Extensive experiments on<br>both synthetic and real world data show that both methods work well, providing<br>embeddings that exhibit meaningful structure that would otherwise remain<br>hidden.<br>
Export
BibTeX
@online{heiter:21:factoring,
TITLE = {Factoring Out Prior Knowledge from Low-dimensional Embeddings},
AUTHOR = {Heiter, Edith and Fischer, Jonas and Vreeken, Jilles},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2103.01828},
EPRINT = {2103.01828},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Low-dimensional embedding techniques such as tSNE and UMAP allow visualizing<br>high-dimensional data and therewith facilitate the discovery of interesting<br>structure. Although they are widely used, they visualize data as is, rather<br>than in light of the background knowledge we have about the data. What we<br>already know, however, strongly determines what is novel and hence interesting.<br>In this paper we propose two methods for factoring out prior knowledge in the<br>form of distance matrices from low-dimensional embeddings. To factor out prior<br>knowledge from tSNE embeddings, we propose JEDI that adapts the tSNE objective<br>in a principled way using Jensen-Shannon divergence. To factor out prior<br>knowledge from any downstream embedding approach, we propose CONFETTI, in which<br>we directly operate on the input distance matrices. Extensive experiments on<br>both synthetic and real world data show that both methods work well, providing<br>embeddings that exhibit meaningful structure that would otherwise remain<br>hidden.<br>},
}
Endnote
%0 Report
%A Heiter, Edith
%A Fischer, Jonas
%A Vreeken, Jilles
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Factoring Out Prior Knowledge from Low-dimensional Embeddings :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-16ED-5
%U https://arxiv.org/abs/2103.01828
%D 2021
%X Low-dimensional embedding techniques such as tSNE and UMAP allow visualizing<br>high-dimensional data and therewith facilitate the discovery of interesting<br>structure. Although they are widely used, they visualize data as is, rather<br>than in light of the background knowledge we have about the data. What we<br>already know, however, strongly determines what is novel and hence interesting.<br>In this paper we propose two methods for factoring out prior knowledge in the<br>form of distance matrices from low-dimensional embeddings. To factor out prior<br>knowledge from tSNE embeddings, we propose JEDI that adapts the tSNE objective<br>in a principled way using Jensen-Shannon divergence. To factor out prior<br>knowledge from any downstream embedding approach, we propose CONFETTI, in which<br>we directly operate on the input distance matrices. Extensive experiments on<br>both synthetic and real world data show that both methods work well, providing<br>embeddings that exhibit meaningful structure that would otherwise remain<br>hidden.<br>
%K Computer Science, Learning, cs.LG,Statistics, Machine Learning, stat.ML
[92]
V. T. Ho, K. Pal, and G. Weikum, “QuTE: Answering Quantity Queries from Web Tables,” in SIGMOD ’21, International Conference on Management of Data, Xi’an, Shaanxi, China, 2021.
Export
BibTeX
@inproceedings{Thinh_SIG21,
TITLE = {Qu{TE}: {A}nswering Quantity Queries from Web Tables},
AUTHOR = {Ho, Vinh Thinh and Pal, Koninika and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-8343-1},
DOI = {10.1145/3448016.3452763},
PUBLISHER = {ACM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {SIGMOD '21, International Conference on Management of Data},
EDITOR = {Li, Guoliang and Li, Zhanhuai and Idreos, Stratos and Srivastava, Divesh},
PAGES = {2740--2744},
ADDRESS = {Xi'an, Shaanxi, China},
}
Endnote
%0 Conference Proceedings
%A Ho, Vinh Thinh
%A Pal, Koninika
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T QuTE: Answering Quantity Queries from Web Tables :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-052E-0
%R 10.1145/3448016.3452763
%D 2021
%B International Conference on Management of Data
%Z date of event: 2021-06-19 - 2021-06-25
%C Xi'an, Shaanxi, China
%B SIGMOD '21
%E Li, Guoliang; Li, Zhanhuai; Idreos, Stratos; Srivastava, Divesh
%P 2740 - 2744
%I ACM
%@ 978-1-4503-8343-1
[93]
V. T. Ho, K. Pal, S. Razniewski, K. Berberich, and G. Weikum, “Extracting Contextualized Quantity Facts from Web Tables,” in The Web Conference 2021 (WWW 2021), Ljubljana, Slovenia, 2021.
Export
BibTeX
@inproceedings{Thinh_WWW21,
TITLE = {Extracting Contextualized Quantity Facts from Web Tables},
AUTHOR = {Ho, Vinh Thinh and Pal, Koninika and Razniewski, Simon and Berberich, Klaus and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-8312-7},
DOI = {10.1145/3442381.3450072},
PUBLISHER = {ACM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {The Web Conference 2021 (WWW 2021)},
EDITOR = {Leskovec, Jure and Grobelnik, Marko and Najork, Mark and Tang, Jie and Zia, Leila},
PAGES = {4033--4042},
ADDRESS = {Ljubljana, Slovenia},
}
Endnote
%0 Conference Proceedings
%A Ho, Vinh Thinh
%A Pal, Koninika
%A Razniewski, Simon
%A Berberich, Klaus
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Extracting Contextualized Quantity Facts from Web Tables :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-04A0-E
%R 10.1145/3442381.3450072
%D 2021
%B 30th The Web Conference
%Z date of event: 2021-04-19 - 2021-04-23
%C Ljubljana, Slovenia
%B The Web Conference 2021
%E Leskovec, Jure; Grobelnik, Marko; Najork, Mark; Tang, Jie; Zia, Leila
%P 4033 - 4042
%I ACM
%@ 978-1-4503-8312-7
[94]
K. Hui and K. Berberich, “Transitivity, Time Consumption, and Quality of Preference Judgments in Crowdsourcing,” 2021. [Online]. Available: https://arxiv.org/abs/2104.08926. (arXiv: 2104.08926)
Abstract
Preference judgments have been demonstrated as a better alternative to graded<br>judgments to assess the relevance of documents relative to queries. Existing<br>work has verified transitivity among preference judgments when collected from<br>trained judges, which reduced the number of judgments dramatically. Moreover,<br>strict preference judgments and weak preference judgments, where the latter<br>additionally allow judges to state that two documents are equally relevant for<br>a given query, are both widely used in literature. However, whether<br>transitivity still holds when collected from crowdsourcing, i.e., whether the<br>two kinds of preference judgments behave similarly remains unclear. In this<br>work, we collect judgments from multiple judges using a crowdsourcing platform<br>and aggregate them to compare the two kinds of preference judgments in terms of<br>transitivity, time consumption, and quality. That is, we look into whether<br>aggregated judgments are transitive, how long it takes judges to make them, and<br>whether judges agree with each other and with judgments from TREC. Our key<br>findings are that only strict preference judgments are transitive. Meanwhile,<br>weak preference judgments behave differently in terms of transitivity, time<br>consumption, as well as of quality of judgment.<br>
Export
BibTeX
@online{Hui2104.08926,
TITLE = {Transitivity, Time Consumption, and Quality of Preference Judgments in Crowdsourcing},
AUTHOR = {Hui, Kai and Berberich, Klaus},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2104.08926},
EPRINT = {2104.08926},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Preference judgments have been demonstrated as a better alternative to graded<br>judgments to assess the relevance of documents relative to queries. Existing<br>work has verified transitivity among preference judgments when collected from<br>trained judges, which reduced the number of judgments dramatically. Moreover,<br>strict preference judgments and weak preference judgments, where the latter<br>additionally allow judges to state that two documents are equally relevant for<br>a given query, are both widely used in literature. However, whether<br>transitivity still holds when collected from crowdsourcing, i.e., whether the<br>two kinds of preference judgments behave similarly remains unclear. In this<br>work, we collect judgments from multiple judges using a crowdsourcing platform<br>and aggregate them to compare the two kinds of preference judgments in terms of<br>transitivity, time consumption, and quality. That is, we look into whether<br>aggregated judgments are transitive, how long it takes judges to make them, and<br>whether judges agree with each other and with judgments from TREC. Our key<br>findings are that only strict preference judgments are transitive. Meanwhile,<br>weak preference judgments behave differently in terms of transitivity, time<br>consumption, as well as of quality of judgment.<br>},
}
Endnote
%0 Report
%A Hui, Kai
%A Berberich, Klaus
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Transitivity, Time Consumption, and Quality of Preference Judgments in Crowdsourcing :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-651A-9
%U https://arxiv.org/abs/2104.08926
%D 2021
%X Preference judgments have been demonstrated as a better alternative to graded<br>judgments to assess the relevance of documents relative to queries. Existing<br>work has verified transitivity among preference judgments when collected from<br>trained judges, which reduced the number of judgments dramatically. Moreover,<br>strict preference judgments and weak preference judgments, where the latter<br>additionally allow judges to state that two documents are equally relevant for<br>a given query, are both widely used in literature. However, whether<br>transitivity still holds when collected from crowdsourcing, i.e., whether the<br>two kinds of preference judgments behave similarly remains unclear. In this<br>work, we collect judgments from multiple judges using a crowdsourcing platform<br>and aggregate them to compare the two kinds of preference judgments in terms of<br>transitivity, time consumption, and quality. That is, we look into whether<br>aggregated judgments are transitive, how long it takes judges to make them, and<br>whether judges agree with each other and with judgments from TREC. Our key<br>findings are that only strict preference judgments are transitive. Meanwhile,<br>weak preference judgments behave differently in terms of transitivity, time<br>consumption, as well as of quality of judgment.<br>
%K Computer Science, Information Retrieval, cs.IR
[95]
Z. Jia, S. Pramanik, R. Saha Roy, and G. Weikum, “Complex Temporal Question Answering on Knowledge Graphs,” 2021. [Online]. Available: https://arxiv.org/abs/2109.08935. (arXiv: 2109.08935)
Abstract
Question answering over knowledge graphs (KG-QA) is a vital topic in IR.<br>Questions with temporal intent are a special class of practical importance, but<br>have not received much attention in research. This work presents EXAQT, the<br>first end-to-end system for answering complex temporal questions that have<br>multiple entities and predicates, and associated temporal conditions. EXAQT<br>answers natural language questions over KGs in two stages, one geared towards<br>high recall, the other towards precision at top ranks. The first step computes<br>question-relevant compact subgraphs within the KG, and judiciously enhances<br>them with pertinent temporal facts, using Group Steiner Trees and fine-tuned<br>BERT models. The second step constructs relational graph convolutional networks<br>(R-GCNs) from the first step's output, and enhances the R-GCNs with time-aware<br>entity embeddings and attention over temporal relations. We evaluate EXAQT on<br>TimeQuestions, a large dataset of 16k temporal questions we compiled from a<br>variety of general purpose KG-QA benchmarks. Results show that EXAQT<br>outperforms three state-of-the-art systems for answering complex questions over<br>KGs, thereby justifying specialized treatment of temporal QA.<br>
Export
BibTeX
@online{Jia2109.08935,
TITLE = {Complex Temporal Question Answering on Knowledge Graphs},
AUTHOR = {Jia, Zhen and Pramanik, Soumajit and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2109.08935},
EPRINT = {2109.08935},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Question answering over knowledge graphs (KG-QA) is a vital topic in IR.<br>Questions with temporal intent are a special class of practical importance, but<br>have not received much attention in research. This work presents EXAQT, the<br>first end-to-end system for answering complex temporal questions that have<br>multiple entities and predicates, and associated temporal conditions. EXAQT<br>answers natural language questions over KGs in two stages, one geared towards<br>high recall, the other towards precision at top ranks. The first step computes<br>question-relevant compact subgraphs within the KG, and judiciously enhances<br>them with pertinent temporal facts, using Group Steiner Trees and fine-tuned<br>BERT models. The second step constructs relational graph convolutional networks<br>(R-GCNs) from the first step's output, and enhances the R-GCNs with time-aware<br>entity embeddings and attention over temporal relations. We evaluate EXAQT on<br>TimeQuestions, a large dataset of 16k temporal questions we compiled from a<br>variety of general purpose KG-QA benchmarks. Results show that EXAQT<br>outperforms three state-of-the-art systems for answering complex questions over<br>KGs, thereby justifying specialized treatment of temporal QA.<br>},
}
Endnote
%0 Report
%A Jia, Zhen
%A Pramanik, Soumajit
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Complex Temporal Question Answering on Knowledge Graphs :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-64F7-0
%U https://arxiv.org/abs/2109.08935
%D 2021
%X Question answering over knowledge graphs (KG-QA) is a vital topic in IR.<br>Questions with temporal intent are a special class of practical importance, but<br>have not received much attention in research. This work presents EXAQT, the<br>first end-to-end system for answering complex temporal questions that have<br>multiple entities and predicates, and associated temporal conditions. EXAQT<br>answers natural language questions over KGs in two stages, one geared towards<br>high recall, the other towards precision at top ranks. The first step computes<br>question-relevant compact subgraphs within the KG, and judiciously enhances<br>them with pertinent temporal facts, using Group Steiner Trees and fine-tuned<br>BERT models. The second step constructs relational graph convolutional networks<br>(R-GCNs) from the first step's output, and enhances the R-GCNs with time-aware<br>entity embeddings and attention over temporal relations. We evaluate EXAQT on<br>TimeQuestions, a large dataset of 16k temporal questions we compiled from a<br>variety of general purpose KG-QA benchmarks. Results show that EXAQT<br>outperforms three state-of-the-art systems for answering complex questions over<br>KGs, thereby justifying specialized treatment of temporal QA.<br>
%K Computer Science, Information Retrieval, cs.IR,Computer Science, Computation and Language, cs.CL
[96]
Z. Jia, S. Pramanik, R. Saha Roy, and G. Weikum, “Complex Temporal Question Answering on Knowledge Graphs,” in CIKM ’21, 30th ACM International Conference on Information & Knowledge Management, Virtual Event, Australia, 2021.
Export
BibTeX
@inproceedings{jia2021complex,
TITLE = {Complex Temporal Question Answering on Knowledge Graphs},
AUTHOR = {Jia, Zhen and Pramanik, Soumajit and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-8446-9},
DOI = {10.1145/3459637.3482416},
PUBLISHER = {ACM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {CIKM '21, 30th ACM International Conference on Information \& Knowledge Management},
EDITOR = {Demartini, Gianluca and Zuccon, Guido and Culpepper, J. Shane and Huang, Zi and Tong, Hanghang},
PAGES = {792--802},
ADDRESS = {Virtual Event, Australia},
}
Endnote
%0 Conference Proceedings
%A Jia, Zhen
%A Pramanik, Soumajit
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Complex Temporal Question Answering on Knowledge Graphs :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-A3A2-4
%R 10.1145/3459637.3482416
%D 2021
%B 30th ACM International Conference on Information & Knowledge Management
%Z date of event: 2021-11-01 - 2021-11-05
%C Virtual Event, Australia
%B CIKM '21
%E Demartini, Gianluca; Zuccon, Guido; Culpepper, J. Shane; Huang, Zi; Tong, Hanghang
%P 792 - 802
%I ACM
%@ 978-1-4503-8446-9
[97]
K. M. Jose, “Improving Efficiency of Dense Retrieval Methods with Query Expansion,” Universität des Saarlandes, Saarbrücken, 2021.
Export
BibTeX
@mastersthesis{JoseMSc21,
TITLE = {Improving Efficiency of Dense Retrieval Methods with Query Expansion},
AUTHOR = {Jose, Kevin Martin},
LANGUAGE = {eng},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2021},
}
Endnote
%0 Thesis
%A Jose, Kevin Martin
%Y Yates, Andrew
%A referee: Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Improving Efficiency of Dense Retrieval Methods with Query Expansion :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-17AB-9
%I Universität des Saarlandes
%C Saarbrücken
%D 2021
%P X, 51 p.
%V master
%9 master
[98]
K. M. Jose, T. Nguyen, S. MacAvaney, J. Dalton, and A. Yates, “DiffIR: Exploring Differences in Ranking Models’ Behavior,” in SIGIR ’21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, Canada, 2021.
Export
BibTeX
@inproceedings{Jose_SIGIR21,
TITLE = {{DiffIR}: {E}xploring Differences in Ranking Models' Behavior},
AUTHOR = {Jose, Kevin Martin and Nguyen, Thong and MacAvaney, Sean and Dalton, Jeffrey and Yates, Andrew},
LANGUAGE = {eng},
ISBN = {978-1-4503-8037-9},
DOI = {10.1145/3404835.3462784},
PUBLISHER = {ACM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {SIGIR '21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval},
EDITOR = {Diaz, Fernando and Shah, Chirag and Suel, Torsten and Castells, Pablo and Jones, Rosie and Sakai, Tetsuya and Bellog{\'i}n, Alejandro and Yushioka, Massaharu},
PAGES = {2595--2599},
ADDRESS = {Virtual Event, Canada},
}
Endnote
%0 Conference Proceedings
%A Jose, Kevin Martin
%A Nguyen, Thong
%A MacAvaney, Sean
%A Dalton, Jeffrey
%A Yates, Andrew
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T DiffIR: Exploring Differences in Ranking Models' Behavior :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-666D-B
%R 10.1145/3404835.3462784
%D 2021
%B 44th International ACM SIGIR Conference on Research and Development in Information Retrieval
%Z date of event: 2021-07-11 - 2021-07-15
%C Virtual Event, Canada
%B SIGIR '21
%E Diaz, Fernando; Shah, Chirag; Suel, Torsten; Castells, Pablo; Jones, Rosie; Sakai, Tetsuya; Bellogín, Alejandro; Yushioka, Massaharu
%P 2595 - 2599
%I ACM
%@ 978-1-4503-8037-9
[99]
M. Kaiser, R. Saha Roy, and G. Weikum, “Reinforcement Learning from Reformulations in Conversational Question Answering over Knowledge Graphs,” in SIGIR ’21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, Canada, 2021.
Export
BibTeX
@inproceedings{kaiser2021reinforcement,
TITLE = {Reinforcement Learning from Reformulations in~Conversational Question Answering over Knowledge Graphs},
AUTHOR = {Kaiser, Magdalena and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-8037-9},
DOI = {10.1145/3404835.3462859},
PUBLISHER = {ACM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {SIGIR '21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval},
EDITOR = {Diaz, Fernando and Shah, Chirag and Suel, Torsten and Castells, Pablo and Jones, Rosie and Sakai, Tetsuya and Bellog{\'i}n, Alejandro and Yushioka, Massaharu},
PAGES = {459--469},
ADDRESS = {Virtual Event, Canada},
}
Endnote
%0 Conference Proceedings
%A Kaiser, Magdalena
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Reinforcement Learning from Reformulations in Conversational Question Answering over Knowledge Graphs :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-513E-8
%R 10.1145/3404835.3462859
%D 2021
%B 44th International ACM SIGIR Conference on Research and Development in Information Retrieval
%Z date of event: 2021-07-11 - 2021-07-15
%C Virtual Event, Canada
%B SIGIR '21
%E Diaz, Fernando; Shah, Chirag; Suel, Torsten; Castells, Pablo; Jones, Rosie; Sakai, Tetsuya; Bellogín, Alejandro; Yushioka, Massaharu
%P 459 - 469
%I ACM
%@ 978-1-4503-8037-9
[100]
M. Kaiser, R. Saha Roy, and G. Weikum, “Reinforcement Learning from Reformulations in Conversational Question Answering over Knowledge Graphs,” 2021. [Online]. Available: https://arxiv.org/abs/2105.04850. (arXiv: 2105.04850)
Abstract
The rise of personal assistants has made conversational question answering<br>(ConvQA) a very popular mechanism for user-system interaction. State-of-the-art<br>methods for ConvQA over knowledge graphs (KGs) can only learn from crisp<br>question-answer pairs found in popular benchmarks. In reality, however, such<br>training data is hard to come by: users would rarely mark answers explicitly as<br>correct or wrong. In this work, we take a step towards a more natural learning<br>paradigm - from noisy and implicit feedback via question reformulations. A<br>reformulation is likely to be triggered by an incorrect system response,<br>whereas a new follow-up question could be a positive signal on the previous<br>turn's answer. We present a reinforcement learning model, termed CONQUER, that<br>can learn from a conversational stream of questions and reformulations. CONQUER<br>models the answering process as multiple agents walking in parallel on the KG,<br>where the walks are determined by actions sampled using a policy network. This<br>policy network takes the question along with the conversational context as<br>inputs and is trained via noisy rewards obtained from the reformulation<br>likelihood. To evaluate CONQUER, we create and release ConvRef, a benchmark<br>with about 11k natural conversations containing around 205k reformulations.<br>Experiments show that CONQUER successfully learns to answer conversational<br>questions from noisy reward signals, significantly improving over a<br>state-of-the-art baseline.<br>
Export
BibTeX
@online{Kaiser_2105.04850,
TITLE = {Reinforcement Learning from Reformulations in Conversational Question Answering over Knowledge Graphs},
AUTHOR = {Kaiser, Magdalena and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2105.04850},
EPRINT = {2105.04850},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {The rise of personal assistants has made conversational question answering<br>(ConvQA) a very popular mechanism for user-system interaction. State-of-the-art<br>methods for ConvQA over knowledge graphs (KGs) can only learn from crisp<br>question-answer pairs found in popular benchmarks. In reality, however, such<br>training data is hard to come by: users would rarely mark answers explicitly as<br>correct or wrong. In this work, we take a step towards a more natural learning<br>paradigm -- from noisy and implicit feedback via question reformulations. A<br>reformulation is likely to be triggered by an incorrect system response,<br>whereas a new follow-up question could be a positive signal on the previous<br>turn's answer. We present a reinforcement learning model, termed CONQUER, that<br>can learn from a conversational stream of questions and reformulations. CONQUER<br>models the answering process as multiple agents walking in parallel on the KG,<br>where the walks are determined by actions sampled using a policy network. This<br>policy network takes the question along with the conversational context as<br>inputs and is trained via noisy rewards obtained from the reformulation<br>likelihood. To evaluate CONQUER, we create and release ConvRef, a benchmark<br>with about 11k natural conversations containing around 205k reformulations.<br>Experiments show that CONQUER successfully learns to answer conversational<br>questions from noisy reward signals, significantly improving over a<br>state-of-the-art baseline.<br>},
}
Endnote
%0 Report
%A Kaiser, Magdalena
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Reinforcement Learning from Reformulations in Conversational Question Answering over Knowledge Graphs :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-67C9-1
%U https://arxiv.org/abs/2105.04850
%D 2021
%X The rise of personal assistants has made conversational question answering<br>(ConvQA) a very popular mechanism for user-system interaction. State-of-the-art<br>methods for ConvQA over knowledge graphs (KGs) can only learn from crisp<br>question-answer pairs found in popular benchmarks. In reality, however, such<br>training data is hard to come by: users would rarely mark answers explicitly as<br>correct or wrong. In this work, we take a step towards a more natural learning<br>paradigm - from noisy and implicit feedback via question reformulations. A<br>reformulation is likely to be triggered by an incorrect system response,<br>whereas a new follow-up question could be a positive signal on the previous<br>turn's answer. We present a reinforcement learning model, termed CONQUER, that<br>can learn from a conversational stream of questions and reformulations. CONQUER<br>models the answering process as multiple agents walking in parallel on the KG,<br>where the walks are determined by actions sampled using a policy network. This<br>policy network takes the question along with the conversational context as<br>inputs and is trained via noisy rewards obtained from the reformulation<br>likelihood. To evaluate CONQUER, we create and release ConvRef, a benchmark<br>with about 11k natural conversations containing around 205k reformulations.<br>Experiments show that CONQUER successfully learns to answer conversational<br>questions from noisy reward signals, significantly improving over a<br>state-of-the-art baseline.<br>
%K Computer Science, Information Retrieval, cs.IR,Computer Science, Computation and Language, cs.CL
[101]
J. Kalofolias, P. Welke, and J. Vreeken, “SUSAN: The Structural Similarity Random Walk Kernel,” in Proceedings of the SIAM International Conference on Data Mining (SDM 2021), Virtual Conference, 2021.
Export
BibTeX
@inproceedings{kalofolias:21:susan,
TITLE = {{SUSAN}: The Structural Similarity Random Walk Kernel},
AUTHOR = {Kalofolias, Janis and Welke, Pascal and Vreeken, Jilles},
LANGUAGE = {eng},
ISBN = {978-1-61197-670-0},
DOI = {10.1137/1.9781611976700.34},
PUBLISHER = {SIAM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {Proceedings of the SIAM International Conference on Data Mining (SDM 2021)},
EDITOR = {Demeniconi, Carlotta and Davidson, Ian},
PAGES = {298--306},
ADDRESS = {Virtual Conference},
}
Endnote
%0 Conference Proceedings
%A Kalofolias, Janis
%A Welke, Pascal
%A Vreeken, Jilles
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T SUSAN: The Structural Similarity Random Walk Kernel :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-26C9-B
%R 10.1137/1.9781611976700.34
%D 2021
%B SIAM International Conference on Data Mining
%Z date of event: 2021-04-29 - 2021-05-01
%C Virtual Conference
%B Proceedings of the SIAM International Conference on Data Mining
%E Demeniconi, Carlotta; Davidson, Ian
%P 298 - 306
%I SIAM
%@ 978-1-61197-670-0
[102]
M. Kamp, J. Fischer, and J. Vreeken, “Federated Learning from Small Datasets,” 2021. [Online]. Available: https://arxiv.org/abs/2110.03469. (arXiv: 2110.03469)
Abstract
Federated learning allows multiple parties to collaboratively train a joint<br>model without sharing local data. This enables applications of machine learning<br>in settings of inherently distributed, undisclosable data such as in the<br>medical domain. In practice, joint training is usually achieved by aggregating<br>local models, for which local training objectives have to be in expectation<br>similar to the joint (global) objective. Often, however, local datasets are so<br>small that local objectives differ greatly from the global objective, resulting<br>in federated learning to fail. We propose a novel approach that intertwines<br>model aggregations with permutations of local models. The permutations expose<br>each local model to a daisy chain of local datasets resulting in more efficient<br>training in data-sparse domains. This enables training on extremely small local<br>datasets, such as patient data across hospitals, while retaining the training<br>efficiency and privacy benefits of federated learning.<br>
Export
BibTeX
@online{Kamp2110.03469,
TITLE = {Federated Learning from Small Datasets},
AUTHOR = {Kamp, Michael and Fischer, Jonas and Vreeken, Jilles},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2110.03469},
EPRINT = {2110.03469},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Federated learning allows multiple parties to collaboratively train a joint<br>model without sharing local data. This enables applications of machine learning<br>in settings of inherently distributed, undisclosable data such as in the<br>medical domain. In practice, joint training is usually achieved by aggregating<br>local models, for which local training objectives have to be in expectation<br>similar to the joint (global) objective. Often, however, local datasets are so<br>small that local objectives differ greatly from the global objective, resulting<br>in federated learning to fail. We propose a novel approach that intertwines<br>model aggregations with permutations of local models. The permutations expose<br>each local model to a daisy chain of local datasets resulting in more efficient<br>training in data-sparse domains. This enables training on extremely small local<br>datasets, such as patient data across hospitals, while retaining the training<br>efficiency and privacy benefits of federated learning.<br>},
}
Endnote
%0 Report
%A Kamp, Michael
%A Fischer, Jonas
%A Vreeken, Jilles
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Federated Learning from Small Datasets :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-653B-4
%U https://arxiv.org/abs/2110.03469
%D 2021
%X Federated learning allows multiple parties to collaboratively train a joint<br>model without sharing local data. This enables applications of machine learning<br>in settings of inherently distributed, undisclosable data such as in the<br>medical domain. In practice, joint training is usually achieved by aggregating<br>local models, for which local training objectives have to be in expectation<br>similar to the joint (global) objective. Often, however, local datasets are so<br>small that local objectives differ greatly from the global objective, resulting<br>in federated learning to fail. We propose a novel approach that intertwines<br>model aggregations with permutations of local models. The permutations expose<br>each local model to a daisy chain of local datasets resulting in more efficient<br>training in data-sparse domains. This enables training on extremely small local<br>datasets, such as patient data across hospitals, while retaining the training<br>efficiency and privacy benefits of federated learning.<br>
%K Computer Science, Learning, cs.LG,Computer Science, Artificial Intelligence, cs.AI,Computer Science, Distributed, Parallel, and Cluster Computing, cs.DC
[103]
P. Lahoti, K. Gummadi, and G. Weikum, “Detecting and Mitigating Test-time Failure Risks via Model-agnostic Uncertainty Learning,” 2021. [Online]. Available: https://arxiv.org/abs/2109.04432. (arXiv: 2109.04432)
Abstract
Reliably predicting potential failure risks of machine learning (ML) systems<br>when deployed with production data is a crucial aspect of trustworthy AI. This<br>paper introduces Risk Advisor, a novel post-hoc meta-learner for estimating<br>failure risks and predictive uncertainties of any already-trained black-box<br>classification model. In addition to providing a risk score, the Risk Advisor<br>decomposes the uncertainty estimates into aleatoric and epistemic uncertainty<br>components, thus giving informative insights into the sources of uncertainty<br>inducing the failures. Consequently, Risk Advisor can distinguish between<br>failures caused by data variability, data shifts and model limitations and<br>advise on mitigation actions (e.g., collecting more data to counter data<br>shift). Extensive experiments on various families of black-box classification<br>models and on real-world and synthetic datasets covering common ML failure<br>scenarios show that the Risk Advisor reliably predicts deployment-time failure<br>risks in all the scenarios, and outperforms strong baselines.<br>
Export
BibTeX
@online{Lahoti2109.04432,
TITLE = {Detecting and Mitigating Test-time Failure Risks via Model-agnostic Uncertainty Learning},
AUTHOR = {Lahoti, Preethi and Gummadi, Krishna and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2109.04432},
EPRINT = {2109.04432},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Reliably predicting potential failure risks of machine learning (ML) systems<br>when deployed with production data is a crucial aspect of trustworthy AI. This<br>paper introduces Risk Advisor, a novel post-hoc meta-learner for estimating<br>failure risks and predictive uncertainties of any already-trained black-box<br>classification model. In addition to providing a risk score, the Risk Advisor<br>decomposes the uncertainty estimates into aleatoric and epistemic uncertainty<br>components, thus giving informative insights into the sources of uncertainty<br>inducing the failures. Consequently, Risk Advisor can distinguish between<br>failures caused by data variability, data shifts and model limitations and<br>advise on mitigation actions (e.g., collecting more data to counter data<br>shift). Extensive experiments on various families of black-box classification<br>models and on real-world and synthetic datasets covering common ML failure<br>scenarios show that the Risk Advisor reliably predicts deployment-time failure<br>risks in all the scenarios, and outperforms strong baselines.<br>},
}
Endnote
%0 Report
%A Lahoti, Preethi
%A Gummadi, Krishna
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Detecting and Mitigating Test-time Failure Risks via Model-agnostic
Uncertainty Learning :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-6491-2
%U https://arxiv.org/abs/2109.04432
%D 2021
%X Reliably predicting potential failure risks of machine learning (ML) systems<br>when deployed with production data is a crucial aspect of trustworthy AI. This<br>paper introduces Risk Advisor, a novel post-hoc meta-learner for estimating<br>failure risks and predictive uncertainties of any already-trained black-box<br>classification model. In addition to providing a risk score, the Risk Advisor<br>decomposes the uncertainty estimates into aleatoric and epistemic uncertainty<br>components, thus giving informative insights into the sources of uncertainty<br>inducing the failures. Consequently, Risk Advisor can distinguish between<br>failures caused by data variability, data shifts and model limitations and<br>advise on mitigation actions (e.g., collecting more data to counter data<br>shift). Extensive experiments on various families of black-box classification<br>models and on real-world and synthetic datasets covering common ML failure<br>scenarios show that the Risk Advisor reliably predicts deployment-time failure<br>risks in all the scenarios, and outperforms strong baselines.<br>
%K Computer Science, Learning, cs.LG,Computer Science, Information Retrieval, cs.IR,Statistics, Machine Learning, stat.ML
[104]
J. Lin, R. Nogueira, and A. Yates, Pretrained Transformers for Text Ranking : BERT and Beyond. San Rafael, CA: Morgan & Claypool Publishers, 2021.
Export
BibTeX
@book{DBLP:series/synthesis/2021LinNY,
TITLE = {Pretrained Transformers for Text Ranking : {BERT} and Beyond},
AUTHOR = {Lin, Jimmy and Nogueira, Rodrigo and Yates, Andrew},
LANGUAGE = {eng},
ISSN = {1947-4040},
ISBN = {978-1-63639-228-8; 978-1-63639-230-1},
DOI = {10.2200/S01123ED1V01Y202108HLT053},
PUBLISHER = {Morgan \& Claypool Publishers},
ADDRESS = {San Rafael, CA},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2021},
PAGES = {XVII, 307},
SERIES = {Synthesis Lectures on Human Language Technologies},
VOLUME = {53},
}
Endnote
%0 Book
%A Lin, Jimmy
%A Nogueira, Rodrigo
%A Yates, Andrew
%+ External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Pretrained Transformers for Text Ranking : BERT and Beyond :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-FE79-F
%@ 978-1-63639-228-8
%@ 978-1-63639-230-1
%R 10.2200/S01123ED1V01Y202108HLT053
%I Morgan & Claypool Publishers
%C San Rafael, CA
%D 2021
%P XVII, 307
%B Synthesis Lectures on Human Language Technologies
%N 53
%@ false
[105]
S. MacAvaney, A. Yates, S. Feldman, D. Downey, A. Cohan, and N. Goharian, “Simplified Data Wrangling with ir_datasets,” in SIGIR ’21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, Canada, 2021.
Export
BibTeX
@inproceedings{MacAvaney_SIGIR21,
TITLE = {Simplified Data Wrangling with ir{\textunderscore}datasets},
AUTHOR = {MacAvaney, Sean and Yates, Andrew and Feldman, Sergey and Downey, Doug and Cohan, Arman and Goharian, Nazli},
LANGUAGE = {eng},
ISBN = {978-1-4503-8037-9},
DOI = {10.1145/3404835.3463254},
PUBLISHER = {ACM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {SIGIR '21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval},
EDITOR = {Diaz, Fernando and Shah, Chirag and Suel, Torsten and Castells, Pablo and Jones, Rosie and Sakai, Tetsuya and Bellog{\'i}n, Alejandro and Yushioka, Massaharu},
PAGES = {2429--2436},
ADDRESS = {Virtual Event, Canada},
}
Endnote
%0 Conference Proceedings
%A MacAvaney, Sean
%A Yates, Andrew
%A Feldman, Sergey
%A Downey, Doug
%A Cohan, Arman
%A Goharian, Nazli
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
External Organizations
%T Simplified Data Wrangling with ir_datasets :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-665F-B
%R 10.1145/3404835.3463254
%D 2021
%B 44th International ACM SIGIR Conference on Research and Development in Information Retrieval
%Z date of event: 2021-07-11 - 2021-07-15
%C Virtual Event, Canada
%B SIGIR '21
%E Diaz, Fernando; Shah, Chirag; Suel, Torsten; Castells, Pablo; Jones, Rosie; Sakai, Tetsuya; Bellogín, Alejandro; Yushioka, Massaharu
%P 2429 - 2436
%I ACM
%@ 978-1-4503-8037-9
[106]
S. MacAvaney, A. Yates, S. Feldman, D. Downey, A. Cohan, and N. Goharian, “Simplified Data Wrangling with ir_datasets,” 2021. [Online]. Available: https://arxiv.org/abs/2103.02280. (arXiv: 2103.02280)
Abstract
Managing the data for Information Retrieval (IR) experiments can be<br>challenging. Dataset documentation is scattered across the Internet and once<br>one obtains a copy of the data, there are numerous different data formats to<br>work with. Even basic formats can have subtle dataset-specific nuances that<br>need to be considered for proper use. To help mitigate these challenges, we<br>introduce a new robust and lightweight tool (ir_datasets) for acquiring,<br>managing, and performing typical operations over datasets used in IR. We<br>primarily focus on textual datasets used for ad-hoc search. This tool provides<br>both a Python and command line interface to numerous IR datasets and<br>benchmarks. To our knowledge, this is the most extensive tool of its kind.<br>Integrations with popular IR indexing and experimentation toolkits demonstrate<br>the tool's utility. We also provide documentation of these datasets through the<br>ir_datasets catalog: https://ir-datasets.com/. The catalog acts as a hub for<br>information on datasets used in IR, providing core information about what data<br>each benchmark provides as well as links to more detailed information. We<br>welcome community contributions and intend to continue to maintain and grow<br>this tool.<br>
Export
BibTeX
@online{MacAvaney_2103.02280,
TITLE = {Simplified Data Wrangling with ir{\textunderscore}datasets},
AUTHOR = {MacAvaney, Sean and Yates, Andrew and Feldman, Sergey and Downey, Doug and Cohan, Arman and Goharian, Nazli},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2103.02280},
EPRINT = {2103.02280},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Managing the data for Information Retrieval (IR) experiments can be<br>challenging. Dataset documentation is scattered across the Internet and once<br>one obtains a copy of the data, there are numerous different data formats to<br>work with. Even basic formats can have subtle dataset-specific nuances that<br>need to be considered for proper use. To help mitigate these challenges, we<br>introduce a new robust and lightweight tool (ir_datasets) for acquiring,<br>managing, and performing typical operations over datasets used in IR. We<br>primarily focus on textual datasets used for ad-hoc search. This tool provides<br>both a Python and command line interface to numerous IR datasets and<br>benchmarks. To our knowledge, this is the most extensive tool of its kind.<br>Integrations with popular IR indexing and experimentation toolkits demonstrate<br>the tool's utility. We also provide documentation of these datasets through the<br>ir_datasets catalog: https://ir-datasets.com/. The catalog acts as a hub for<br>information on datasets used in IR, providing core information about what data<br>each benchmark provides as well as links to more detailed information. We<br>welcome community contributions and intend to continue to maintain and grow<br>this tool.<br>},
}
Endnote
%0 Report
%A MacAvaney, Sean
%A Yates, Andrew
%A Feldman, Sergey
%A Downey, Doug
%A Cohan, Arman
%A Goharian, Nazli
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
External Organizations
%T Simplified Data Wrangling with ir_datasets :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-6679-D
%U https://arxiv.org/abs/2103.02280
%D 2021
%X Managing the data for Information Retrieval (IR) experiments can be<br>challenging. Dataset documentation is scattered across the Internet and once<br>one obtains a copy of the data, there are numerous different data formats to<br>work with. Even basic formats can have subtle dataset-specific nuances that<br>need to be considered for proper use. To help mitigate these challenges, we<br>introduce a new robust and lightweight tool (ir_datasets) for acquiring,<br>managing, and performing typical operations over datasets used in IR. We<br>primarily focus on textual datasets used for ad-hoc search. This tool provides<br>both a Python and command line interface to numerous IR datasets and<br>benchmarks. To our knowledge, this is the most extensive tool of its kind.<br>Integrations with popular IR indexing and experimentation toolkits demonstrate<br>the tool's utility. We also provide documentation of these datasets through the<br>ir_datasets catalog: https://ir-datasets.com/. The catalog acts as a hub for<br>information on datasets used in IR, providing core information about what data<br>each benchmark provides as well as links to more detailed information. We<br>welcome community contributions and intend to continue to maintain and grow<br>this tool.<br>
%K Computer Science, Information Retrieval, cs.IR,Computer Science, Computation and Language, cs.CL
[107]
I. Mackie, J. Dalton, and A. Yates, “How Deep is your Learning: The DL-HARD Annotated Deep Learning Dataset,” 2021. [Online]. Available: https://arxiv.org/abs/2105.07975. (arXiv: 2105.07975)
Abstract
Deep Learning Hard (DL-HARD) is a new annotated dataset designed to more<br>effectively evaluate neural ranking models on complex topics. It builds on TREC<br>Deep Learning (DL) topics by extensively annotating them with question intent<br>categories, answer types, wikified entities, topic categories, and result type<br>metadata from a commercial web search engine. Based on this data, we introduce<br>a framework for identifying challenging queries. DL-HARD contains fifty topics<br>from the official DL 2019/2020 evaluation benchmark, half of which are newly<br>and independently assessed. We perform experiments using the official submitted<br>runs to DL on DL-HARD and find substantial differences in metrics and the<br>ranking of participating systems. Overall, DL-HARD is a new resource that<br>promotes research on neural ranking methods by focusing on challenging and<br>complex topics.<br>
Export
BibTeX
@online{Mackie_2105.07975,
TITLE = {How Deep is your Learning: The {DL}-{HARD} Annotated Deep Learning Dataset},
AUTHOR = {Mackie, Iain and Dalton, Jeffery and Yates, Andrew},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2105.07975},
EPRINT = {2105.07975},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Deep Learning Hard (DL-HARD) is a new annotated dataset designed to more<br>effectively evaluate neural ranking models on complex topics. It builds on TREC<br>Deep Learning (DL) topics by extensively annotating them with question intent<br>categories, answer types, wikified entities, topic categories, and result type<br>metadata from a commercial web search engine. Based on this data, we introduce<br>a framework for identifying challenging queries. DL-HARD contains fifty topics<br>from the official DL 2019/2020 evaluation benchmark, half of which are newly<br>and independently assessed. We perform experiments using the official submitted<br>runs to DL on DL-HARD and find substantial differences in metrics and the<br>ranking of participating systems. Overall, DL-HARD is a new resource that<br>promotes research on neural ranking methods by focusing on challenging and<br>complex topics.<br>},
}
Endnote
%0 Report
%A Mackie, Iain
%A Dalton, Jeffery
%A Yates, Andrew
%+ External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T How Deep is your Learning: The DL-HARD Annotated Deep Learning Dataset :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-67AB-3
%U https://arxiv.org/abs/2105.07975
%D 2021
%X Deep Learning Hard (DL-HARD) is a new annotated dataset designed to more<br>effectively evaluate neural ranking models on complex topics. It builds on TREC<br>Deep Learning (DL) topics by extensively annotating them with question intent<br>categories, answer types, wikified entities, topic categories, and result type<br>metadata from a commercial web search engine. Based on this data, we introduce<br>a framework for identifying challenging queries. DL-HARD contains fifty topics<br>from the official DL 2019/2020 evaluation benchmark, half of which are newly<br>and independently assessed. We perform experiments using the official submitted<br>runs to DL on DL-HARD and find substantial differences in metrics and the<br>ranking of participating systems. Overall, DL-HARD is a new resource that<br>promotes research on neural ranking methods by focusing on challenging and<br>complex topics.<br>
%K Computer Science, Information Retrieval, cs.IR
[108]
I. Mackie, J. Dalton, and A. Yates, “How Deep is your Learning: the DL-HARD Annotated Deep Learning Dataset,” in SIGIR ’21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, Canada, 2021.
Export
BibTeX
@inproceedings{Mackie_SIGIR21,
TITLE = {How Deep is your Learning: {T}he {DL}-{HARD} Annotated Deep Learning Dataset},
AUTHOR = {Mackie, Iain and Dalton, Jeffrey and Yates, Andrew},
LANGUAGE = {eng},
ISBN = {978-1-4503-8037-9},
DOI = {10.1145/3404835.3463262},
PUBLISHER = {ACM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {SIGIR '21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval},
EDITOR = {Diaz, Fernando and Shah, Chirag and Suel, Torsten and Castells, Pablo and Jones, Rosie and Sakai, Tetsuya and Bellog{\'i}n, Alejandro and Yushioka, Massaharu},
PAGES = {2335--2341},
ADDRESS = {Virtual Event, Canada},
}
Endnote
%0 Conference Proceedings
%A Mackie, Iain
%A Dalton, Jeffrey
%A Yates, Andrew
%+ External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T How Deep is your Learning: the DL-HARD Annotated Deep Learning Dataset :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-6640-C
%R 10.1145/3404835.3463262
%D 2021
%B 44th International ACM SIGIR Conference on Research and Development in Information Retrieval
%Z date of event: 2021-07-11 - 2021-07-15
%C Virtual Event, Canada
%B SIGIR '21
%E Diaz, Fernando; Shah, Chirag; Suel, Torsten; Castells, Pablo; Jones, Rosie; Sakai, Tetsuya; Bellogín, Alejandro; Yushioka, Massaharu
%P 2335 - 2341
%I ACM
%@ 978-1-4503-8037-9
[109]
P. Mandros, “Discovering robust dependencies from data,” Universität des Saarlandes, Saarbrücken, 2021.
Export
BibTeX
@phdthesis{Panphd2020,
TITLE = {Discovering robust dependencies from data},
AUTHOR = {Mandros, Panagiotis},
LANGUAGE = {eng},
URL = {urn:nbn:de:bsz:291--ds-342919},
DOI = {10.22028/D291-34291},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2021},
}
Endnote
%0 Thesis
%A Mandros, Panagiotis
%Y Vreeken, Jilles
%A referee: Weikum, Gerhard
%A referee: Webb, Geoffrey
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
International Max Planck Research School, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Discovering robust dependencies from data :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-E4CF-E
%R 10.22028/D291-34291
%U urn:nbn:de:bsz:291--ds-342919
%F OTHER: hdl:20.500.11880/31535
%I Universität des Saarlandes
%C Saarbrücken
%D 2021
%P 194 p.
%V phd
%9 phd
%U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/31535
[110]
A. Marx, “Information-Theoretic Causal Discovery,” Universität des Saarlandes, Saarbrücken, 2021.
Export
BibTeX
@phdthesis{Marxphd2020,
TITLE = {Information-Theoretic Causal Discovery},
AUTHOR = {Marx, Alexander},
LANGUAGE = {eng},
URL = {urn:nbn:de:bsz:291--ds-342908},
DOI = {10.22028/D291-34290},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2021},
}
Endnote
%0 Thesis
%A Marx, Alexander
%Y Vreeken, Jilles
%A referee: Weikum, Gerhard
%A referee: Ommen, Thijs van
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
International Max Planck Research School, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Information-Theoretic Causal Discovery :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-EECA-9
%R 10.22028/D291-34290
%U urn:nbn:de:bsz:291--ds-342908
%F OTHER: hdl:20.500.11880/31480
%I Universität des Saarlandes
%C Saarbrücken
%D 2021
%P 195 p.
%V phd
%9 phd
%U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/31480
[111]
A. Marx, A. Gretton, and J. M. Mooij, “A Weaker Faithfulness Assumption based on Triple Interactions,” 2021. [Online]. Available: https://arxiv.org/abs/2010.14265. (arXiv: 2010.14265)
Abstract
One of the core assumptions in causal discovery is the faithfulness<br>assumption---i.e. assuming that independencies found in the data are due to<br>separations in the true causal graph. This assumption can, however, be violated<br>in many ways, including xor connections, deterministic functions or cancelling<br>paths. In this work, we propose a weaker assumption that we call 2-adjacency<br>faithfulness. In contrast to adjacency faithfulness, which assumes that there<br>is no conditional independence between each pair of variables that are<br>connected in the causal graph, we only require no conditional independence<br>between a node and a subset of its Markov blanket that can contain up to two<br>nodes. Equivalently, we adapt orientation faithfulness to this setting. We<br>further propose a sound orientation rule for causal discovery that applies<br>under weaker assumptions. As a proof of concept, we derive a modified Grow and<br>Shrink algorithm that recovers the Markov blanket of a target node and prove<br>its correctness under strictly weaker assumptions than the standard<br>faithfulness assumption.<br>
Export
BibTeX
@online{Marxarxiv21,
TITLE = {A Weaker Faithfulness Assumption based on Triple Interactions},
AUTHOR = {Marx, Alexander and Gretton, Arthur and Mooij, Joris M.},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2010.14265},
EPRINT = {2010.14265},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {One of the core assumptions in causal discovery is the faithfulness<br>assumption---i.e. assuming that independencies found in the data are due to<br>separations in the true causal graph. This assumption can, however, be violated<br>in many ways, including xor connections, deterministic functions or cancelling<br>paths. In this work, we propose a weaker assumption that we call 2-adjacency<br>faithfulness. In contrast to adjacency faithfulness, which assumes that there<br>is no conditional independence between each pair of variables that are<br>connected in the causal graph, we only require no conditional independence<br>between a node and a subset of its Markov blanket that can contain up to two<br>nodes. Equivalently, we adapt orientation faithfulness to this setting. We<br>further propose a sound orientation rule for causal discovery that applies<br>under weaker assumptions. As a proof of concept, we derive a modified Grow and<br>Shrink algorithm that recovers the Markov blanket of a target node and prove<br>its correctness under strictly weaker assumptions than the standard<br>faithfulness assumption.<br>},
}
Endnote
%0 Report
%A Marx, Alexander
%A Gretton, Arthur
%A Mooij, Joris M.
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
%T A Weaker Faithfulness Assumption based on Triple Interactions :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-0BCE-5
%U https://arxiv.org/abs/2010.14265
%D 2021
%X One of the core assumptions in causal discovery is the faithfulness<br>assumption---i.e. assuming that independencies found in the data are due to<br>separations in the true causal graph. This assumption can, however, be violated<br>in many ways, including xor connections, deterministic functions or cancelling<br>paths. In this work, we propose a weaker assumption that we call 2-adjacency<br>faithfulness. In contrast to adjacency faithfulness, which assumes that there<br>is no conditional independence between each pair of variables that are<br>connected in the causal graph, we only require no conditional independence<br>between a node and a subset of its Markov blanket that can contain up to two<br>nodes. Equivalently, we adapt orientation faithfulness to this setting. We<br>further propose a sound orientation rule for causal discovery that applies<br>under weaker assumptions. As a proof of concept, we derive a modified Grow and<br>Shrink algorithm that recovers the Markov blanket of a target node and prove<br>its correctness under strictly weaker assumptions than the standard<br>faithfulness assumption.<br>
%K Statistics, Machine Learning, stat.ML,Computer Science, Artificial Intelligence, cs.AI,Computer Science, Learning, cs.LG
[112]
A. Marx, L. Yang, and M. van Leeuwen, “Estimating Conditional Mutual Information for Discrete-Continuous Mixtures using Multidimensional Adaptive Histograms,” in Proceedings of the SIAM International Conference on Data Mining (SDM 2021), Virtual Conference, 2021.
Export
BibTeX
@inproceedings{marx:20:myl,
TITLE = {Estimating Conditional Mutual Information for Discrete-Continuous Mixtures using Multidimensional Adaptive Histograms},
AUTHOR = {Marx, Alexander and Yang, Lincen and van Leeuwen, Matthijs},
LANGUAGE = {eng},
ISBN = {978-1-61197-670-0},
DOI = {10.1137/1.9781611976700.44},
PUBLISHER = {SIAM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {Proceedings of the SIAM International Conference on Data Mining (SDM 2021)},
PAGES = {387--395},
ADDRESS = {Virtual Conference},
}
Endnote
%0 Conference Proceedings
%A Marx, Alexander
%A Yang, Lincen
%A van Leeuwen, Matthijs
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
%T Estimating Conditional Mutual Information for Discrete-Continuous Mixtures using Multidimensional Adaptive Histograms :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-0BC7-C
%R 10.1137/1.9781611976700.44
%D 2021
%B SIAM International Conference on Data Mining
%Z date of event: 2021-04-29 - 2021-05-01
%C Virtual Conference
%B Proceedings of the SIAM International Conference on Data Mining
%P 387 - 395
%I SIAM
%@ 978-1-61197-670-0
[113]
A. Marx and J. Fischer, “Estimating Mutual Information via Geodesic kNN,” 2021. [Online]. Available: https://arxiv.org/abs/2110.13883. (arXiv: 2110.13883)
Abstract
Estimating mutual information (MI) between two continuous random variables<br>$X$ and $Y$ allows to capture non-linear dependencies between them,<br>non-parametrically. As such, MI estimation lies at the core of many data<br>science applications. Yet, robustly estimating MI for high-dimensional $X$ and<br>$Y$ is still an open research question.<br> In this paper, we formulate this problem through the lens of manifold<br>learning. That is, we leverage the common assumption that the information of<br>$X$ and $Y$ is captured by a low-dimensional manifold embedded in the observed<br>high-dimensional space and transfer it to MI estimation. As an extension to<br>state-of-the-art $k$NN estimators, we propose to determine the $k$-nearest<br>neighbours via geodesic distances on this manifold rather than form the ambient<br>space, which allows us to estimate MI even in the high-dimensional setting. An<br>empirical evaluation of our method, G-KSG, against the state-of-the-art shows<br>that it yields good estimations of the MI in classical benchmark, and manifold<br>tasks, even for high dimensional datasets, which none of the existing methods<br>can provide.<br>
Export
BibTeX
@online{Marx_arXiv2110.13883,
TITLE = {{Estimating Mutual Information via Geodesic $k$NN}},
AUTHOR = {Marx, Alexander and Fischer, Jonas},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2110.13883},
EPRINT = {2110.13883},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Estimating mutual information (MI) between two continuous random variables<br>$X$ and $Y$ allows to capture non-linear dependencies between them,<br>non-parametrically. As such, MI estimation lies at the core of many data<br>science applications. Yet, robustly estimating MI for high-dimensional $X$ and<br>$Y$ is still an open research question.<br> In this paper, we formulate this problem through the lens of manifold<br>learning. That is, we leverage the common assumption that the information of<br>$X$ and $Y$ is captured by a low-dimensional manifold embedded in the observed<br>high-dimensional space and transfer it to MI estimation. As an extension to<br>state-of-the-art $k$NN estimators, we propose to determine the $k$-nearest<br>neighbours via geodesic distances on this manifold rather than form the ambient<br>space, which allows us to estimate MI even in the high-dimensional setting. An<br>empirical evaluation of our method, G-KSG, against the state-of-the-art shows<br>that it yields good estimations of the MI in classical benchmark, and manifold<br>tasks, even for high dimensional datasets, which none of the existing methods<br>can provide.<br>},
}
Endnote
%0 Report
%A Marx, Alexander
%A Fischer, Jonas
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Estimating Mutual Information via Geodesic kNN :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-B130-8
%U https://arxiv.org/abs/2110.13883
%D 2021
%X Estimating mutual information (MI) between two continuous random variables<br>$X$ and $Y$ allows to capture non-linear dependencies between them,<br>non-parametrically. As such, MI estimation lies at the core of many data<br>science applications. Yet, robustly estimating MI for high-dimensional $X$ and<br>$Y$ is still an open research question.<br> In this paper, we formulate this problem through the lens of manifold<br>learning. That is, we leverage the common assumption that the information of<br>$X$ and $Y$ is captured by a low-dimensional manifold embedded in the observed<br>high-dimensional space and transfer it to MI estimation. As an extension to<br>state-of-the-art $k$NN estimators, we propose to determine the $k$-nearest<br>neighbours via geodesic distances on this manifold rather than form the ambient<br>space, which allows us to estimate MI even in the high-dimensional setting. An<br>empirical evaluation of our method, G-KSG, against the state-of-the-art shows<br>that it yields good estimations of the MI in classical benchmark, and manifold<br>tasks, even for high dimensional datasets, which none of the existing methods<br>can provide.<br>
%K Computer Science, Information Theory, cs.IT,Mathematics, Information Theory, math.IT
[114]
O. A. Mian, A. Marx, and J. Vreeken, “Discovering Fully Oriented Causal Networks,” in Thirty-Fifth AAAI Conference on Artificial Intelligence, Vancouver, Canada, 2021.
Export
BibTeX
@inproceedings{mian:20:globe,
TITLE = {Discovering Fully Oriented Causal Networks},
AUTHOR = {Mian, Osman A. and Marx, Alexander and Vreeken, Jilles},
LANGUAGE = {eng},
ISBN = {978-1-57735-866-4},
DOI = {10.1609/aaai.v35i10.17085},
PUBLISHER = {AAAI},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {Thirty-Fifth AAAI Conference on Artificial Intelligence},
PAGES = {8975--8982},
ADDRESS = {Vancouver, Canada},
}
Endnote
%0 Conference Proceedings
%A Mian, Osman A.
%A Marx, Alexander
%A Vreeken, Jilles
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Discovering Fully Oriented Causal Networks :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-0BCB-8
%R 10.1609/aaai.v35i10.17085
%D 2021
%B The Thirty-Fifth Conference on Artificial Intelligence
%Z date of event: 2021-02-02 - 2021-02-09
%C Vancouver, Canada
%B Thirty-Fifth AAAI Conference on Artificial Intelligence
%P 8975 - 8982
%I AAAI
%@ 978-1-57735-866-4
[115]
P. Mirza, M. Abouhamra, and G. Weikum, “AligNarr: Aligning Narratives on Movies,” in The 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (ACL-IJCNLP 2019), Virtual, 2021.
Export
BibTeX
@inproceedings{Mirza_ACL-short.54,
TITLE = {{AligNarr}: {A}ligning Narratives on Movies},
AUTHOR = {Mirza, Paramita and Abouhamra, Mostafa and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-954085-53-4},
URL = {https://aclanthology.org/2021.acl-short.54},
DOI = {10.18653/v1/2021.acl-short.54},
PUBLISHER = {ACL},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {The 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (ACL-IJCNLP 2019)},
EDITOR = {Xia, Fei and Li, Wenjie and Navigli, Roberto},
PAGES = {427--433},
ADDRESS = {Virtual},
}
Endnote
%0 Conference Proceedings
%A Mirza, Paramita
%A Abouhamra, Mostafa
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T AligNarr: Aligning Narratives on Movies :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-4A1F-3
%U https://aclanthology.org/2021.acl-short.54
%R 10.18653/v1/2021.acl-short.54
%D 2021
%B The 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing
%Z date of event: 2021-08-01 - 2021-08-06
%C Virtual
%B The 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing
%E Xia, Fei; Li, Wenjie; Navigli, Roberto
%P 427 - 433
%I ACL
%@ 978-1-954085-53-4
[116]
S. Nag Chowdhury, R. Bhowmik, H. Ravi, G. de Melo, S. Razniewski, and G. Weikum, “Exploiting Image-Text Synergy for Contextual Image Captioning,” in Proceedings of the Third Workshop on Beyond Vision and LANguage: inTEgrating Real-world kNowledge (LANTERN), Kyiv, Ukraine (Online), 2021.
Export
BibTeX
@inproceedings{Chod_ECAL2021,
TITLE = {Exploiting Image-Text Synergy for Contextual Image Captioning},
AUTHOR = {Nag Chowdhury, Sreyasi and Bhowmik, Rajarshi and Ravi, Hareesh and de Melo, Gerard and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-954085-15-2},
URL = {https://aclanthology.org/2021.lantern-1.3},
PUBLISHER = {ACL},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {Proceedings of the Third Workshop on Beyond Vision and LANguage: inTEgrating Real-world kNowledge (LANTERN)},
EDITOR = {Mosbach, Marius and Hedderich, Michael A. and Pezzelle, Sandro and Mogadala, Aditya and Klakow, Dietrich and Moens, Marie-Francine and Akata, Zeynep},
PAGES = {30--37},
ADDRESS = {Kyiv, Ukraine (Online)},
}
Endnote
%0 Conference Proceedings
%A Nag Chowdhury, Sreyasi
%A Bhowmik, Rajarshi
%A Ravi, Hareesh
%A de Melo, Gerard
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Exploiting Image-Text Synergy for Contextual Image Captioning :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-0E60-D
%U https://aclanthology.org/2021.lantern-1.3
%D 2021
%B The Third Workshop Beyond Vision and LANguage: inTEgrating Real-world kNowledge
%Z date of event: 2021-04-20 - 2021-04-20
%C Kyiv, Ukraine (Online)
%B Proceedings of the Third Workshop on Beyond Vision and LANguage: inTEgrating Real-world kNowledge (LANTERN)
%E Mosbach, Marius; Hedderich, Michael A.; Pezzelle, Sandro; Mogadala, Aditya; Klakow, Dietrich; Moens, Marie-Francine; Akata, Zeynep
%P 30 - 37
%I ACL
%@ 978-1-954085-15-2
[117]
S. Nag Chowdhury, R. Wickramarachchi, M. H. Gad-Elrab, D. Stepanova, and C. Henson, “Towards Leveraging Commonsense Knowledge for Autonomous Driving,” in International Semantic Web Conference (ISWC) 2021: Posters, Demos, and Industry Tracks, Virtual Conference, 2021.
Export
BibTeX
@inproceedings{NagChowdhury_ISWC2021,
TITLE = {Towards Leveraging Commonsense Knowledge for Autonomous Driving},
AUTHOR = {Nag Chowdhury, Sreyasi and Wickramarachchi, Ruwan and Gad-Elrab, Mohamed Hassan and Stepanova, Daria and Henson, Cory},
LANGUAGE = {eng},
ISSN = {1613-0073},
URL = {https://ceur-ws.org/Vol-2980/paper396.pdf; urn:nbn:de:0074-2980-6},
PUBLISHER = {CEUR-WS.org},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {International Semantic Web Conference (ISWC) 2021: Posters, Demos, and Industry Tracks},
EDITOR = {Seneviratne, Oshani and Pesquita, Catia and Sequeda, Juan and Etcheverry, Lorena},
PAGES = {1--5},
EID = {396},
SERIES = {CEUR Workshop Proceedings},
VOLUME = {2980},
ADDRESS = {Virtual Conference},
}
Endnote
%0 Conference Proceedings
%A Nag Chowdhury, Sreyasi
%A Wickramarachchi, Ruwan
%A Gad-Elrab, Mohamed Hassan
%A Stepanova, Daria
%A Henson, Cory
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
External Organizations
%T Towards Leveraging Commonsense Knowledge for Autonomous Driving :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-42CD-6
%U https://ceur-ws.org/Vol-2980/paper396.pdf
%D 2021
%B 20th International Semantic Web Conference
%Z date of event: 2021-10-24 - 2021-10-28
%C Virtual Conference
%B International Semantic Web Conference (ISWC) 2021: Posters, Demos, and Industry Tracks
%E Seneviratne, Oshani; Pesquita, Catia; Sequeda, Juan; Etcheverry, Lorena
%P 1 - 5
%Z sequence number: 396
%I CEUR-WS.org
%B CEUR Workshop Proceedings
%N 2980
%@ false
[118]
S. Nag Chowdhury, “Text-image synergy for multimodal retrieval and annotation,” Universität des Saarlandes, Saarbrücken, 2021.
Abstract
Text and images are the two most common data modalities found on the Internet. Understanding the synergy between text and images, that is, seamlessly analyzing information from these modalities may be trivial for humans, but is challenging for software systems. In this dissertation we study problems where deciphering text-image synergy is crucial for finding solutions. We propose methods and ideas that establish semantic connections between text and images in multimodal contents, and empirically show their effectiveness in four interconnected problems: Image Retrieval, Image Tag Refinement, Image-Text Alignment, and Image Captioning. Our promising results and observations open up interesting scopes for future research involving text-image data understanding.Text and images are the two most common data modalities found on the Internet. Understanding the synergy between text and images, that is, seamlessly analyzing information from these modalities may be trivial for humans, but is challenging for software systems. In this dissertation we study problems where deciphering text-image synergy is crucial for finding solutions. We propose methods and ideas that establish semantic connections between text and images in multimodal contents, and empirically show their effectiveness in four interconnected problems: Image Retrieval, Image Tag Refinement, Image-Text Alignment, and Image Captioning. Our promising results and observations open up interesting scopes for future research involving text-image data understanding.
Export
BibTeX
@phdthesis{Chowphd2021,
TITLE = {Text-image synergy for multimodal retrieval and annotation},
AUTHOR = {Nag Chowdhury, Sreyasi},
LANGUAGE = {eng},
URL = {urn:nbn:de:bsz:291--ds-345092},
DOI = {10.22028/D291-34509},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2021},
ABSTRACT = {Text and images are the two most common data modalities found on the Internet. Understanding the synergy between text and images, that is, seamlessly analyzing information from these modalities may be trivial for humans, but is challenging for software systems. In this dissertation we study problems where deciphering text-image synergy is crucial for finding solutions. We propose methods and ideas that establish semantic connections between text and images in multimodal contents, and empirically show their effectiveness in four interconnected problems: Image Retrieval, Image Tag Refinement, Image-Text Alignment, and Image Captioning. Our promising results and observations open up interesting scopes for future research involving text-image data understanding.Text and images are the two most common data modalities found on the Internet. Understanding the synergy between text and images, that is, seamlessly analyzing information from these modalities may be trivial for humans, but is challenging for software systems. In this dissertation we study problems where deciphering text-image synergy is crucial for finding solutions. We propose methods and ideas that establish semantic connections between text and images in multimodal contents, and empirically show their effectiveness in four interconnected problems: Image Retrieval, Image Tag Refinement, Image-Text Alignment, and Image Captioning. Our promising results and observations open up interesting scopes for future research involving text-image data understanding.},
}
Endnote
%0 Thesis
%A Nag Chowdhury, Sreyasi
%A referee: Weikum, Gerhard
%A referee: de Melo, Gerard
%A referee: Berberich, Klaus
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
International Max Planck Research School, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Text-image synergy for multimodal retrieval and annotation :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-428A-1
%R 10.22028/D291-34509
%U urn:nbn:de:bsz:291--ds-345092
%F OTHER: hdl:20.500.11880/31690
%I Universität des Saarlandes
%C Saarbrücken
%D 2021
%P 131 p.
%V phd
%9 phd
%X Text and images are the two most common data modalities found on the Internet. Understanding the synergy between text and images, that is, seamlessly analyzing information from these modalities may be trivial for humans, but is challenging for software systems. In this dissertation we study problems where deciphering text-image synergy is crucial for finding solutions. We propose methods and ideas that establish semantic connections between text and images in multimodal contents, and empirically show their effectiveness in four interconnected problems: Image Retrieval, Image Tag Refinement, Image-Text Alignment, and Image Captioning. Our promising results and observations open up interesting scopes for future research involving text-image data understanding.Text and images are the two most common data modalities found on the Internet. Understanding the synergy between text and images, that is, seamlessly analyzing information from these modalities may be trivial for humans, but is challenging for software systems. In this dissertation we study problems where deciphering text-image synergy is crucial for finding solutions. We propose methods and ideas that establish semantic connections between text and images in multimodal contents, and empirically show their effectiveness in four interconnected problems: Image Retrieval, Image Tag Refinement, Image-Text Alignment, and Image Captioning. Our promising results and observations open up interesting scopes for future research involving text-image data understanding.
%K image retrieval
image-text alignment
image captioning
commonsense knowledge
%U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/31690
[119]
S. Nag Chowdhury, S. Razniewski, and G. Weikum, “SANDI: Story-and-Images Alignment,” in The 16th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2021), Online, 2021.
Export
BibTeX
@inproceedings{Thinh_EACL21,
TITLE = {{SANDI}: {S}tory-and-Images Alignment},
AUTHOR = {Nag Chowdhury, Sreyasi and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-954085-02-2},
URL = {https://aclanthology.org/2021.eacl-main.85},
PUBLISHER = {ACL},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {The 16th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2021)},
EDITOR = {Merlo, Paola and Tiedemann, Jorg and Tsarfaty, Reut},
PAGES = {989--999},
ADDRESS = {Online},
}
Endnote
%0 Conference Proceedings
%A Nag Chowdhury, Sreyasi
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T SANDI: Story-and-Images Alignment :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-04A2-C
%U https://aclanthology.org/2021.eacl-main.85
%D 2021
%B 16th Conference of the European Chapter of the Association for Computational Linguistics
%Z date of event: 2021-04-19 - 2021-04-23
%C Online
%B The 16th Conference of the European Chapter of the
Association for Computational Linguistics
%E Merlo, Paola; Tiedemann, Jorg; Tsarfaty, Reut
%P 989 - 999
%I ACL
%@ 978-1-954085-02-2
[120]
S. Naseri, J. Dalton, A. Yates, and J. Allan, “CEQE: Contextualized Embeddings for Query Expansion,” 2021. [Online]. Available: https://arxiv.org/abs/2103.05256. (arXiv: 2103.05256)
Abstract
In this work we leverage recent advances in context-sensitive language models<br>to improve the task of query expansion. Contextualized word representation<br>models, such as ELMo and BERT, are rapidly replacing static embedding models.<br>We propose a new model, Contextualized Embeddings for Query Expansion (CEQE),<br>that utilizes query-focused contextualized embedding vectors. We study the<br>behavior of contextual representations generated for query expansion in ad-hoc<br>document retrieval. We conduct our experiments on probabilistic retrieval<br>models as well as in combination with neural ranking models. We evaluate CEQE<br>on two standard TREC collections: Robust and Deep Learning. We find that CEQE<br>outperforms static embedding-based expansion methods on multiple collections<br>(by up to 18% on Robust and 31% on Deep Learning on average precision) and also<br>improves over proven probabilistic pseudo-relevance feedback (PRF) models. We<br>further find that multiple passes of expansion and reranking result in<br>continued gains in effectiveness with CEQE-based approaches outperforming other<br>approaches. The final model incorporating neural and CEQE-based expansion score<br>achieves gains of up to 5% in P@20 and 2% in AP on Robust over the<br>state-of-the-art transformer-based re-ranking model, Birch.<br>
Export
BibTeX
@online{Naseri_2103.05256,
TITLE = {{CEQE}: Contextualized Embeddings for Query Expansion},
AUTHOR = {Naseri, Shahrzad and Dalton, Jeffrey and Yates, Andrew and Allan, James},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2103.05256},
EPRINT = {2103.05256},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {In this work we leverage recent advances in context-sensitive language models<br>to improve the task of query expansion. Contextualized word representation<br>models, such as ELMo and BERT, are rapidly replacing static embedding models.<br>We propose a new model, Contextualized Embeddings for Query Expansion (CEQE),<br>that utilizes query-focused contextualized embedding vectors. We study the<br>behavior of contextual representations generated for query expansion in ad-hoc<br>document retrieval. We conduct our experiments on probabilistic retrieval<br>models as well as in combination with neural ranking models. We evaluate CEQE<br>on two standard TREC collections: Robust and Deep Learning. We find that CEQE<br>outperforms static embedding-based expansion methods on multiple collections<br>(by up to 18% on Robust and 31% on Deep Learning on average precision) and also<br>improves over proven probabilistic pseudo-relevance feedback (PRF) models. We<br>further find that multiple passes of expansion and reranking result in<br>continued gains in effectiveness with CEQE-based approaches outperforming other<br>approaches. The final model incorporating neural and CEQE-based expansion score<br>achieves gains of up to 5% in P@20 and 2% in AP on Robust over the<br>state-of-the-art transformer-based re-ranking model, Birch.<br>},
}
Endnote
%0 Report
%A Naseri, Shahrzad
%A Dalton, Jeffrey
%A Yates, Andrew
%A Allan, James
%+ External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T CEQE: Contextualized Embeddings for Query Expansion :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-6779-C
%U https://arxiv.org/abs/2103.05256
%D 2021
%X In this work we leverage recent advances in context-sensitive language models<br>to improve the task of query expansion. Contextualized word representation<br>models, such as ELMo and BERT, are rapidly replacing static embedding models.<br>We propose a new model, Contextualized Embeddings for Query Expansion (CEQE),<br>that utilizes query-focused contextualized embedding vectors. We study the<br>behavior of contextual representations generated for query expansion in ad-hoc<br>document retrieval. We conduct our experiments on probabilistic retrieval<br>models as well as in combination with neural ranking models. We evaluate CEQE<br>on two standard TREC collections: Robust and Deep Learning. We find that CEQE<br>outperforms static embedding-based expansion methods on multiple collections<br>(by up to 18% on Robust and 31% on Deep Learning on average precision) and also<br>improves over proven probabilistic pseudo-relevance feedback (PRF) models. We<br>further find that multiple passes of expansion and reranking result in<br>continued gains in effectiveness with CEQE-based approaches outperforming other<br>approaches. The final model incorporating neural and CEQE-based expansion score<br>achieves gains of up to 5% in P@20 and 2% in AP on Robust over the<br>state-of-the-art transformer-based re-ranking model, Birch.<br>
%K Computer Science, Information Retrieval, cs.IR
[121]
S. Naseri, J. Dalton, A. Yates, and J. Allan, “CEQE: Contextualized Embeddings for Query Expansion,” in Advances in Information Retrieval (ECIR 2021), Lucca, Italy (Online Event), 2021.
Export
BibTeX
@inproceedings{Naseri_ECIR2021,
TITLE = {{CEQE}: {C}ontextualized Embeddings for Query Expansion},
AUTHOR = {Naseri, Shahrzad and Dalton, Jeff and Yates, Andrew and Allan, James},
LANGUAGE = {eng},
ISBN = {978-3-030-72112-1},
DOI = {10.1007/978-3-030-72113-8_31},
PUBLISHER = {Springer},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2021},
BOOKTITLE = {Advances in Information Retrieval (ECIR 2021)},
EDITOR = {Hiemstra, Djoerd and Moens, Marie-Francine and Mothe, Josiane and Perego, Raffaele and Potthast, Martin and Sebastiani, Fabrizio},
PAGES = {467--482},
SERIES = {Lecture Notes in Computer Science},
VOLUME = {12656},
ADDRESS = {Lucca, Italy (Online Event)},
}
Endnote
%0 Conference Proceedings
%A Naseri, Shahrzad
%A Dalton, Jeff
%A Yates, Andrew
%A Allan, James
%+ External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T CEQE: Contextualized Embeddings for Query Expansion :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-6628-8
%R 10.1007/978-3-030-72113-8_31
%D 2021
%B 43rd European Conference on IR Research
%Z date of event: 2021-03-28 - 2021-04-01
%C Lucca, Italy (Online Event)
%B Advances in Information Retrieval
%E Hiemstra, Djoerd; Moens, Marie-Francine; Mothe, Josiane; Perego, Raffaele; Potthast, Martin; Sebastiani, Fabrizio
%P 467 - 482
%I Springer
%@ 978-3-030-72112-1
%B Lecture Notes in Computer Science
%N 12656
[122]
T. Nguyen, “Grounding Depression Detection in Clinical Questionnaires by Detecting Mental Health Symptoms,” Universität des Saarlandes, Saarbrücken, 2021.
Export
BibTeX
@mastersthesis{NguyenMSc21,
TITLE = {Grounding Depression Detection in Clinical Questionnaires by Detecting Mental Health Symptoms},
AUTHOR = {Nguyen, Thong},
LANGUAGE = {eng},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2021},
}
Endnote
%0 Thesis
%A Nguyen, Thong
%Y Yates, Andrew
%A referee: Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Grounding Depression Detection in Clinical Questionnaires by Detecting
Mental Health Symptoms :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-2DA3-9
%I Universität des Saarlandes
%C Saarbrücken
%D 2021
%P X, 68 p.
%V master
%9 master
[123]
T.-P. Nguyen, S. Razniewski, and G. Weikum, “Advanced Semantics for Commonsense Knowledge Extraction,” in The Web Conference 2021 (WWW 2021), Ljubljana, Slovenia, 2021.
Export
BibTeX
@inproceedings{Nguyen_WWW21,
TITLE = {Advanced Semantics for Commonsense Knowledge Extraction},
AUTHOR = {Nguyen, Tuan-Phong and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-8312-7},
DOI = {10.1145/3442381.3449827},
PUBLISHER = {ACM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {The Web Conference 2021 (WWW 2021)},
EDITOR = {Leskovec, Jure and Grobelnik, Marko and Najork, Marc and Tang, Jie and Zia, Leila},
PAGES = {2636--2647},
ADDRESS = {Ljubljana, Slovenia},
}
Endnote
%0 Conference Proceedings
%A Nguyen, Tuan-Phong
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Advanced Semantics for Commonsense Knowledge Extraction :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-0196-D
%R 10.1145/3442381.3449827
%D 2021
%B 30th The Web Conference
%Z date of event: 2021-04-30 -
%C Ljubljana, Slovenia
%B The Web Conference 2021
%E Leskovec, Jure; Grobelnik, Marko; Najork, Marc; Tang, Jie; Zia, Leila
%P 2636 - 2647
%I ACM
%@ 978-1-4503-8312-7
[124]
T.-P. Nguyen, S. Razniewski, and G. Weikum, “Inside ASCENT: Exploring a Deep Commonsense Knowledge Base and its Usage in Question Answering,” 2021. [Online]. Available: https://arxiv.org/abs/2105.13662. (arXiv: 2105.13662)
Abstract
ASCENT is a fully automated methodology for extracting and consolidating<br>commonsense assertions from web contents (Nguyen et al., WWW 2021). It advances<br>traditional triple-based commonsense knowledge representation by capturing<br>semantic facets like locations and purposes, and composite concepts, i.e.,<br>subgroups and related aspects of subjects. In this demo, we present a web<br>portal that allows users to understand its construction process, explore its<br>content, and observe its impact in the use case of question answering. The demo<br>website and an introductory video are both available online.<br>
Export
BibTeX
@online{Nguyen_2105.13662,
TITLE = {Inside {ASCENT}: {E}xploring a Deep Commonsense Knowledge Base and its Usage in Question Answering},
AUTHOR = {Nguyen, Tuan-Phong and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2105.13662},
EPRINT = {2105.13662},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {ASCENT is a fully automated methodology for extracting and consolidating<br>commonsense assertions from web contents (Nguyen et al., WWW 2021). It advances<br>traditional triple-based commonsense knowledge representation by capturing<br>semantic facets like locations and purposes, and composite concepts, i.e.,<br>subgroups and related aspects of subjects. In this demo, we present a web<br>portal that allows users to understand its construction process, explore its<br>content, and observe its impact in the use case of question answering. The demo<br>website and an introductory video are both available online.<br>},
}
Endnote
%0 Report
%A Nguyen, Tuan-Phong
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Inside ASCENT: Exploring a Deep Commonsense Knowledge Base and its Usage in Question Answering :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-4A2E-2
%U https://arxiv.org/abs/2105.13662
%D 2021
%X ASCENT is a fully automated methodology for extracting and consolidating<br>commonsense assertions from web contents (Nguyen et al., WWW 2021). It advances<br>traditional triple-based commonsense knowledge representation by capturing<br>semantic facets like locations and purposes, and composite concepts, i.e.,<br>subgroups and related aspects of subjects. In this demo, we present a web<br>portal that allows users to understand its construction process, explore its<br>content, and observe its impact in the use case of question answering. The demo<br>website and an introductory video are both available online.<br>
%K Computer Science, Artificial Intelligence, cs.AI,Computer Science, Computation and Language, cs.CL
%U https://youtu.be/qMkJXqu_Yd4
[125]
S. Pramanik, J. Alabi, R. Saha Roy, and G. Weikum, “UNIQORN: Unified Question Answering over RDF Knowledge Graphs and Natural Language Text,” 2021. [Online]. Available: https://arxiv.org/abs/2108.08614. (arXiv: 2108.08614)
Abstract
Question answering over knowledge graphs and other RDF data has been greatly<br>advanced, with a number of good systems providing crisp answers for natural<br>language questions or telegraphic queries. Some of these systems incorporate<br>textual sources as additional evidence for the answering process, but cannot<br>compute answers that are present in text alone. Conversely, systems from the IR<br>and NLP communities have addressed QA over text, but barely utilize semantic<br>data and knowledge. This paper presents the first QA system that can seamlessly<br>operate over RDF datasets and text corpora, or both together, in a unified<br>framework. Our method, called UNIQORN, builds a context graph on the fly, by<br>retrieving question-relevant triples from the RDF data and/or the text corpus,<br>where the latter case is handled by automatic information extraction. The<br>resulting graph is typically rich but highly noisy. UNIQORN copes with this<br>input by advanced graph algorithms for Group Steiner Trees, that identify the<br>best answer candidates in the context graph. Experimental results on several<br>benchmarks of complex questions with multiple entities and relations, show that<br>UNIQORN, an unsupervised method with only five parameters, produces results<br>comparable to the state-of-the-art on KGs, text corpora, and heterogeneous<br>sources. The graph-based methodology provides user-interpretable evidence for<br>the complete answering process.<br>
Export
BibTeX
@online{Pramanik_2108.08614,
TITLE = {{UNIQORN}: {U}nified Question Answering over {RDF} Knowledge Graphs and Natural Language Text},
AUTHOR = {Pramanik, Soumajit and Alabi, Jesujoba and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2108.08614},
EPRINT = {2108.08614},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Question answering over knowledge graphs and other RDF data has been greatly<br>advanced, with a number of good systems providing crisp answers for natural<br>language questions or telegraphic queries. Some of these systems incorporate<br>textual sources as additional evidence for the answering process, but cannot<br>compute answers that are present in text alone. Conversely, systems from the IR<br>and NLP communities have addressed QA over text, but barely utilize semantic<br>data and knowledge. This paper presents the first QA system that can seamlessly<br>operate over RDF datasets and text corpora, or both together, in a unified<br>framework. Our method, called UNIQORN, builds a context graph on the fly, by<br>retrieving question-relevant triples from the RDF data and/or the text corpus,<br>where the latter case is handled by automatic information extraction. The<br>resulting graph is typically rich but highly noisy. UNIQORN copes with this<br>input by advanced graph algorithms for Group Steiner Trees, that identify the<br>best answer candidates in the context graph. Experimental results on several<br>benchmarks of complex questions with multiple entities and relations, show that<br>UNIQORN, an unsupervised method with only five parameters, produces results<br>comparable to the state-of-the-art on KGs, text corpora, and heterogeneous<br>sources. The graph-based methodology provides user-interpretable evidence for<br>the complete answering process.<br>},
}
Endnote
%0 Report
%A Pramanik, Soumajit
%A Alabi, Jesujoba
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T UNIQORN: Unified Question Answering over RDF Knowledge Graphs and Natural Language Text :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-6365-6
%U https://arxiv.org/abs/2108.08614
%D 2021
%X Question answering over knowledge graphs and other RDF data has been greatly<br>advanced, with a number of good systems providing crisp answers for natural<br>language questions or telegraphic queries. Some of these systems incorporate<br>textual sources as additional evidence for the answering process, but cannot<br>compute answers that are present in text alone. Conversely, systems from the IR<br>and NLP communities have addressed QA over text, but barely utilize semantic<br>data and knowledge. This paper presents the first QA system that can seamlessly<br>operate over RDF datasets and text corpora, or both together, in a unified<br>framework. Our method, called UNIQORN, builds a context graph on the fly, by<br>retrieving question-relevant triples from the RDF data and/or the text corpus,<br>where the latter case is handled by automatic information extraction. The<br>resulting graph is typically rich but highly noisy. UNIQORN copes with this<br>input by advanced graph algorithms for Group Steiner Trees, that identify the<br>best answer candidates in the context graph. Experimental results on several<br>benchmarks of complex questions with multiple entities and relations, show that<br>UNIQORN, an unsupervised method with only five parameters, produces results<br>comparable to the state-of-the-art on KGs, text corpora, and heterogeneous<br>sources. The graph-based methodology provides user-interpretable evidence for<br>the complete answering process.<br>
%K Computer Science, Information Retrieval, cs.IR,Computer Science, Computation and Language, cs.CL
[126]
S. Razniewski, H. Arnaout, S. Ghosh, and F. Suchanek, “On the Limits of Machine Knowledge: Completeness, Recall and Negation in Web-scale Knowledge Bases,” Proceedings of the VLDB Endowment (Proc. VLDB 2021), vol. 14, no. 12, 2021.
Export
BibTeX
@article{Razniewski2021_PVLDB,
TITLE = {On the Limits of Machine Knowledge: {C}ompleteness, Recall and Negation in Web-scale Knowledge Bases},
AUTHOR = {Razniewski, Simon and Arnaout, Hiba and Ghosh, Shrestha and Suchanek, Fabian},
LANGUAGE = {eng},
PUBLISHER = {VLDB Endowment Inc.},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
JOURNAL = {Proceedings of the VLDB Endowment (Proc. VLDB)},
VOLUME = {14},
NUMBER = {12},
PAGES = {3175--3177},
BOOKTITLE = {Proceedings of the 47th International Conference on Very Large Data Bases (VLDB 2021)},
EDITOR = {Dong, Xin Luna and Naumann, Felix},
}
Endnote
%0 Journal Article
%A Razniewski, Simon
%A Arnaout, Hiba
%A Ghosh, Shrestha
%A Suchanek, Fabian
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T On the Limits of Machine Knowledge: Completeness, Recall and Negation in Web-scale Knowledge Bases :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-6544-9
%7 2021
%D 2021
%J Proceedings of the VLDB Endowment
%O PVLDB
%V 14
%N 12
%& 3175
%P 3175 - 3177
%I VLDB Endowment Inc.
%B Proceedings of the 47th International Conference on Very Large Data Bases
%O VLDB 2021 Copenhagen, Denmark, 16-20 August 2021
[127]
S. Razniewski, N. Tandon, and A. S. Varde, “Information to Wisdom: Commonsense Knowledge Extraction and Compilation,” in WSDM ’21, 14th International Conference on Web Search and Data Mining, Virtual Event, Israel, 2021.
Export
BibTeX
@inproceedings{Razniewski_WSDM21,
TITLE = {Information to Wisdom: {C}ommonsense Knowledge Extraction and Compilation},
AUTHOR = {Razniewski, Simon and Tandon, Niket and Varde, Aparna S.},
LANGUAGE = {eng},
ISBN = {978-1-4503-8297-7},
DOI = {10.1145/3437963.3441664},
PUBLISHER = {ACM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {WSDM '21, 14th International Conference on Web Search and Data Mining},
EDITOR = {Lewin-Eytan, Liane and Carmel, David and Yom-Tov, Elad and Agichtein, Eugene and Gabrilovich, Evgeniy},
PAGES = {1143--1146},
ADDRESS = {Virtual Event, Israel},
}
Endnote
%0 Conference Proceedings
%A Razniewski, Simon
%A Tandon, Niket
%A Varde, Aparna S.
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Information to Wisdom: Commonsense Knowledge Extraction and Compilation :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-65FE-8
%R 10.1145/3437963.3441664
%D 2021
%B 14th International Conference on Web Search and Data Mining
%Z date of event: 2021-03-08 - 2021-03-12
%C Virtual Event, Israel
%B WSDM '21
%E Lewin-Eytan, Liane; Carmel, David; Yom-Tov, Elad; Agichtein, Eugene; Gabrilovich, Evgeniy
%P 1143 - 1146
%I ACM
%@ 978-1-4503-8297-7
[128]
S. Razniewski, A. Yates, N. Kassner, and G. Weikum, “Language Models As or For Knowledge Bases,” 2021. [Online]. Available: https://arxiv.org/abs/2110.04888. (arXiv: 2110.04888)
Abstract
Pre-trained language models (LMs) have recently gained attention for their<br>potential as an alternative to (or proxy for) explicit knowledge bases (KBs).<br>In this position paper, we examine this hypothesis, identify strengths and<br>limitations of both LMs and KBs, and discuss the complementary nature of the<br>two paradigms. In particular, we offer qualitative arguments that latent LMs<br>are not suitable as a substitute for explicit KBs, but could play a major role<br>for augmenting and curating KBs.<br>
Export
BibTeX
@online{Razniewski_2110.04888,
TITLE = {Language Models As or For Knowledge Bases},
AUTHOR = {Razniewski, Simon and Yates, Andrew and Kassner, Nora and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2110.04888},
EPRINT = {2110.04888},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Pre-trained language models (LMs) have recently gained attention for their<br>potential as an alternative to (or proxy for) explicit knowledge bases (KBs).<br>In this position paper, we examine this hypothesis, identify strengths and<br>limitations of both LMs and KBs, and discuss the complementary nature of the<br>two paradigms. In particular, we offer qualitative arguments that latent LMs<br>are not suitable as a substitute for explicit KBs, but could play a major role<br>for augmenting and curating KBs.<br>},
}
Endnote
%0 Report
%A Razniewski, Simon
%A Yates, Andrew
%A Kassner, Nora
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Language Models As or For Knowledge Bases :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-6510-3
%U https://arxiv.org/abs/2110.04888
%D 2021
%X Pre-trained language models (LMs) have recently gained attention for their<br>potential as an alternative to (or proxy for) explicit knowledge bases (KBs).<br>In this position paper, we examine this hypothesis, identify strengths and<br>limitations of both LMs and KBs, and discuss the complementary nature of the<br>two paradigms. In particular, we offer qualitative arguments that latent LMs<br>are not suitable as a substitute for explicit KBs, but could play a major role<br>for augmenting and curating KBs.<br>
%K Computer Science, Computation and Language, cs.CL,Computer Science, Artificial Intelligence, cs.AI,Computer Science, Databases, cs.DB
[129]
S. Razniewski, “Commonsense Knowledge Base Construction in the Age of Big Data,” 2021. [Online]. Available: https://arxiv.org/abs/2105.01925. (arXiv: 2105.01925)
Abstract
Compiling commonsense knowledge is traditionally an AI topic approached by<br>manual labor. Recent advances in web data processing have enabled automated<br>approaches. In this demonstration we will showcase three systems for automated<br>commonsense knowledge base construction, highlighting each time one aspect of<br>specific interest to the data management community. (i) We use Quasimodo to<br>illustrate knowledge extraction systems engineering, (ii) Dice to illustrate<br>the role that schema constraints play in cleaning fuzzy commonsense knowledge,<br>and (iii) Ascent to illustrate the relevance of conceptual modelling. The demos<br>are available online at https://quasimodo.r2.enst.fr,<br>https://dice.mpi-inf.mpg.de and ascent.mpi-inf.mpg.de.<br>
Export
BibTeX
@online{Razniewski_2105.01925,
TITLE = {Commonsense Knowledge Base Construction in the Age of Big Data},
AUTHOR = {Razniewski, Simon},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2105.01925},
EPRINT = {2105.01925},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Compiling commonsense knowledge is traditionally an AI topic approached by<br>manual labor. Recent advances in web data processing have enabled automated<br>approaches. In this demonstration we will showcase three systems for automated<br>commonsense knowledge base construction, highlighting each time one aspect of<br>specific interest to the data management community. (i) We use Quasimodo to<br>illustrate knowledge extraction systems engineering, (ii) Dice to illustrate<br>the role that schema constraints play in cleaning fuzzy commonsense knowledge,<br>and (iii) Ascent to illustrate the relevance of conceptual modelling. The demos<br>are available online at https://quasimodo.r2.enst.fr,<br>https://dice.mpi-inf.mpg.de and ascent.mpi-inf.mpg.de.<br>},
}
Endnote
%0 Report
%A Razniewski, Simon
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Commonsense Knowledge Base Construction in the Age of Big Data :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-6604-0
%U https://arxiv.org/abs/2105.01925
%D 2021
%X Compiling commonsense knowledge is traditionally an AI topic approached by<br>manual labor. Recent advances in web data processing have enabled automated<br>approaches. In this demonstration we will showcase three systems for automated<br>commonsense knowledge base construction, highlighting each time one aspect of<br>specific interest to the data management community. (i) We use Quasimodo to<br>illustrate knowledge extraction systems engineering, (ii) Dice to illustrate<br>the role that schema constraints play in cleaning fuzzy commonsense knowledge,<br>and (iii) Ascent to illustrate the relevance of conceptual modelling. The demos<br>are available online at https://quasimodo.r2.enst.fr,<br>https://dice.mpi-inf.mpg.de and ascent.mpi-inf.mpg.de.<br>
%K Computer Science, Artificial Intelligence, cs.AI,Computer Science, Computation and Language, cs.CL,Computer Science, Databases, cs.DB
[130]
J. Romero, “Pyformlang: An Educational Library for Formal Language Manipulation,” in SIGCSE ’21, The 52nd ACM Technical Symposium on Computer Science Education, Virtual Event, USA, 2021.
Export
BibTeX
@inproceedings{Romero_SIGCSE21,
TITLE = {Pyformlang: {An} Educational Library for Formal Language Manipulation},
AUTHOR = {Romero, Julien},
LANGUAGE = {eng},
ISBN = {978-1-4503-8062-1},
DOI = {10.1145/3408877.3432464},
PUBLISHER = {ACM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {SIGCSE '21, The 52nd ACM Technical Symposium on Computer Science Education},
EDITOR = {Sherriff, Mark and Merkle, Laurence D. and Cutter, Pamela and Monge, Alvaro and Sheard, Judithe},
PAGES = {576--582},
ADDRESS = {Virtual Event, USA},
}
Endnote
%0 Conference Proceedings
%A Romero, Julien
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Pyformlang: An Educational Library for Formal Language Manipulation :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-F836-5
%R 10.1145/3408877.3432464
%D 2021
%B The 52nd ACM Technical Symposium on Computer Science Education
%Z date of event: 2021-03-13 - 2021-03-20
%C Virtual Event, USA
%B SIGCSE '21
%E Sherriff, Mark; Merkle, Laurence D.; Cutter, Pamela; Monge, Alvaro; Sheard, Judithe
%P 576 - 582
%I ACM
%@ 978-1-4503-8062-1
[131]
R. Saha Roy and A. Anand, Question Answering for the Curated Web: Tasks and Methods in QA over Knowledge Bases and Text Collections. San Rafael, CA: Morgan & Claypool, 2021.
Export
BibTeX
@book{SahaRoy2021,
TITLE = {Question Answering for the Curated Web: Tasks and Methods in {QA} over Knowledge Bases and Text Collections},
AUTHOR = {Saha Roy, Rishiraj and Anand, Avishek},
LANGUAGE = {eng},
ISBN = {978-1636392387},
DOI = {10.2200/S0113ED1V01Y202109ICR076},
PUBLISHER = {Morgan \& Claypool},
ADDRESS = {San Rafael, CA},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2021},
PAGES = {194 p.},
SERIES = {Synthesis Lectures on Information Concepts, Retrieval, and Services},
}
Endnote
%0 Book
%A Saha Roy, Rishiraj
%A Anand, Avishek
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Question Answering for the Curated Web: Tasks and Methods in QA over Knowledge Bases and Text Collections :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-B116-6
%R 10.2200/S0113ED1V01Y202109ICR076
%@ 978-1636392387
%I Morgan & Claypool
%C San Rafael, CA
%D 2021
%P 194 p.
%B Synthesis Lectures on Information Concepts, Retrieval, and Services
[132]
F. Schmidt, A. Marx, N. Baumgarten, M. Hebel, M. Wegner, M. Kaulich, M. S. Leisegang, R. P. Brandes, J. Göke, J. Vreeken, and M. H. Schulz, “Integrative Analysis of Epigenetics Data Identifies Gene-specific Regulatory Elements,” Nucleic Acids Research (London), vol. 49, no. 18, 2021.
Export
BibTeX
@article{Schmidt_NAR21,
TITLE = {Integrative Analysis of Epigenetics Data Identifies Gene-specific Regulatory Elements},
AUTHOR = {Schmidt, Florian and Marx, Alexander and Baumgarten, Nina and Hebel, Marie and Wegner, Martin and Kaulich, Manuel and Leisegang, Matthias S. and Brandes, Ralf P and G{\"o}ke, Jonathan and Vreeken, Jilles and Schulz, Marcel Holger},
LANGUAGE = {eng},
ISSN = {0305-1048},
DOI = {10.1093/nar/gkab798},
PUBLISHER = {Oxford University Press},
ADDRESS = {Oxford},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2021},
JOURNAL = {Nucleic Acids Research (London)},
VOLUME = {49},
NUMBER = {18},
PAGES = {10397--10418},
}
Endnote
%0 Journal Article
%A Schmidt, Florian
%A Marx, Alexander
%A Baumgarten, Nina
%A Hebel, Marie
%A Wegner, Martin
%A Kaulich, Manuel
%A Leisegang, Matthias S.
%A Brandes, Ralf P
%A Göke, Jonathan
%A Vreeken, Jilles
%A Schulz, Marcel Holger
%+ Computational Biology and Applied Algorithmics, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
Computational Biology and Applied Algorithmics, MPI for Informatics, Max Planck Society
%T Integrative Analysis of Epigenetics Data Identifies Gene-specific Regulatory Elements :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-6D54-F
%R 10.1093/nar/gkab798
%2 PMC8501997
%7 2021
%D 2021
%J Nucleic Acids Research (London)
%O Nucleic Acids Res
%V 49
%N 18
%& 10397
%P 10397 - 10418
%I Oxford University Press
%C Oxford
%@ false
[133]
X. Shen, “Deep Latent-Variable Models for Neural Text Generation,” Universität des Saarlandes, Saarbrücken, 2021.
Export
BibTeX
@phdthesis{Shenphd2021,
TITLE = {Deep Latent-Variable Models for Neural Text Generation},
AUTHOR = {Shen, Xiaoyu},
LANGUAGE = {eng},
URL = {nbn:de:bsz:291--ds-350558},
DOI = {10.22028/D291-35055},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2021},
}
Endnote
%0 Thesis
%A Shen, Xiaoyu
%Y Klakow, Dietrich
%A referee: Weikum, Gerhard
%A referee: Schütze, Hinrich
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
International Max Planck Research School, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Deep Latent-Variable Models for Neural Text Generation :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-B25D-6
%R 10.22028/D291-35055
%U nbn:de:bsz:291--ds-350558
%F OTHER: hdl:20.500.11880/32106
%I Universität des Saarlandes
%C Saarbrücken
%D 2021
%P 201 p.
%V phd
%9 phd
%U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/32106
[134]
S. Shrinivasan, “Knowledge Base Stability,” Universität des Saarlandes, Saarbrücken, 2021.
Export
BibTeX
@mastersthesis{ShrinivasanMSc21,
TITLE = {Knowledge Base Stability},
AUTHOR = {Shrinivasan, Suhas},
LANGUAGE = {eng},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2021},
}
Endnote
%0 Thesis
%A Shrinivasan, Suhas
%Y Razniewski, Simon
%A referee: Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Knowledge Base Stability :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-15A0-6
%I Universität des Saarlandes
%C Saarbrücken
%D 2021
%P 87 p.
%V master
%9 master
[135]
S. Singhania, S. Razniewski, and G. Weikum, “Predicting Document Coverage for Relation Extraction,” 2021. [Online]. Available: https://arxiv.org/abs/2111.13611. (arXiv: 2111.13611)
Abstract
This paper presents a new task of predicting the coverage of a text document<br>for relation extraction (RE): does the document contain many relational tuples<br>for a given entity? Coverage predictions are useful in selecting the best<br>documents for knowledge base construction with large input corpora. To study<br>this problem, we present a dataset of 31,366 diverse documents for 520<br>entities. We analyze the correlation of document coverage with features like<br>length, entity mention frequency, Alexa rank, language complexity and<br>information retrieval scores. Each of these features has only moderate<br>predictive power. We employ methods combining features with statistical models<br>like TF-IDF and language models like BERT. The model combining features and<br>BERT, HERB, achieves an F1 score of up to 46%. We demonstrate the utility of<br>coverage predictions on two use cases: KB construction and claim refutation.<br>
Export
BibTeX
@online{Singhania2021,
TITLE = {Predicting Document Coverage for Relation Extraction},
AUTHOR = {Singhania, Sneha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2111.13611},
EPRINT = {2111.13611},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {This paper presents a new task of predicting the coverage of a text document<br>for relation extraction (RE): does the document contain many relational tuples<br>for a given entity? Coverage predictions are useful in selecting the best<br>documents for knowledge base construction with large input corpora. To study<br>this problem, we present a dataset of 31,366 diverse documents for 520<br>entities. We analyze the correlation of document coverage with features like<br>length, entity mention frequency, Alexa rank, language complexity and<br>information retrieval scores. Each of these features has only moderate<br>predictive power. We employ methods combining features with statistical models<br>like TF-IDF and language models like BERT. The model combining features and<br>BERT, HERB, achieves an F1 score of up to 46%. We demonstrate the utility of<br>coverage predictions on two use cases: KB construction and claim refutation.<br>},
}
Endnote
%0 Report
%A Singhania, Sneha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Predicting Document Coverage for Relation Extraction :
%G eng
%U http://hdl.handle.net/21.11116/0000-000A-237F-1
%U https://arxiv.org/abs/2111.13611
%D 2021
%X This paper presents a new task of predicting the coverage of a text document<br>for relation extraction (RE): does the document contain many relational tuples<br>for a given entity? Coverage predictions are useful in selecting the best<br>documents for knowledge base construction with large input corpora. To study<br>this problem, we present a dataset of 31,366 diverse documents for 520<br>entities. We analyze the correlation of document coverage with features like<br>length, entity mention frequency, Alexa rank, language complexity and<br>information retrieval scores. Each of these features has only moderate<br>predictive power. We employ methods combining features with statistical models<br>like TF-IDF and language models like BERT. The model combining features and<br>BERT, HERB, achieves an F1 score of up to 46%. We demonstrate the utility of<br>coverage predictions on two use cases: KB construction and claim refutation.<br>
%K Computer Science, Computation and Language, cs.CL,Computer Science, Artificial Intelligence, cs.AI
[136]
A. Tigunova, P. Mirza, A. Yates, and G. Weikum, “Exploring Personal Knowledge Extraction from Conversations with CHARM,” in WSDM ’21, 14th International Conference on Web Search and Data Mining, Virtual Event, Israel, 2021.
Export
BibTeX
@inproceedings{Tigunova_WSDM21,
TITLE = {Exploring Personal Knowledge Extraction from Conversations with {CHARM}},
AUTHOR = {Tigunova, Anna and Mirza, Paramita and Yates, Andrew and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-8297-7},
DOI = {10.1145/3437963.3441699},
PUBLISHER = {ACM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {WSDM '21, 14th International Conference on Web Search and Data Mining},
EDITOR = {Lewin-Eytan, Liane and Carmel, David and Yom-Tov, Elad and Agichtein, Eugene and Gabrilovich, Evgeniy},
PAGES = {1077--1080},
ADDRESS = {Virtual Event, Israel},
}
Endnote
%0 Conference Proceedings
%A Tigunova, Anna
%A Mirza, Paramita
%A Yates, Andrew
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Exploring Personal Knowledge Extraction from Conversations with CHARM :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-F850-7
%R 10.1145/3437963.3441699
%D 2021
%B 14th International Conference on Web Search and Data Mining
%Z date of event: 2021-03-08 - 2021-03-12
%C Virtual Event, Israel
%B WSDM '21
%E Lewin-Eytan, Liane; Carmel, David; Yom-Tov, Elad; Agichtein, Eugene; Gabrilovich, Evgeniy
%P 1077 - 1080
%I ACM
%@ 978-1-4503-8297-7
[137]
A. Tigunova, P. Mirza, A. Yates, and G. Weikum, “PRIDE: Predicting Relationships in Conversations,” in Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing (EMNLP 2021), Punta Cana, Dominican Republic, 2021.
Export
BibTeX
@inproceedings{DBLP:conf/emnlp/TigunovaMYW21,
TITLE = {{PRIDE}: {P}redicting Relationships in Conversations},
AUTHOR = {Tigunova, Anna and Mirza, Paramita and Yates, Andrew and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://aclanthology.org/2021.emnlp-main.380/; https://aclanthology.org/2022.emnlp-main},
DOI = {10.18653/v1/2021.emnlp-main.380},
PUBLISHER = {ACL},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing (EMNLP 2021)},
EDITOR = {Moens, Marie-Francine and Huang, Xuanjing and Specia, Lucia and Yih, Scott Wen-tau},
PAGES = {4636--4650},
ADDRESS = {Punta Cana, Dominican Republic},
}
Endnote
%0 Conference Proceedings
%A Tigunova, Anna
%A Mirza, Paramita
%A Yates, Andrew
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T PRIDE: Predicting Relationships in Conversations :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-DBF2-C
%U https://aclanthology.org/2021.emnlp-main.380/
%R 10.18653/v1/2021.emnlp-main.380
%D 2021
%B The Conference on Empirical Methods in Natural Language Processing
%Z date of event: 2021-11-07 - 2021-11-11
%C Punta Cana, Dominican Republic
%B Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing
%E Moens, Marie-Francine; Huang, Xuanjing; Specia, Lucia; Yih, Scott Wen-tau
%P 4636 - 4650
%I ACL
[138]
G. H. Torbati, A. Yates, and G. Weikum, “You Get What You Chat: Using Conversations to Personalize Search-based Recommendations,” 2021. [Online]. Available: https://arxiv.org/abs/2109.04716. (arXiv: 2109.04716)
Abstract
Prior work on personalized recommendations has focused on exploiting explicit<br>signals from user-specific queries, clicks, likes, and ratings. This paper<br>investigates tapping into a different source of implicit signals of interests<br>and tastes: online chats between users. The paper develops an expressive model<br>and effective methods for personalizing search-based entity recommendations.<br>User models derived from chats augment different methods for re-ranking entity<br>answers for medium-grained queries. The paper presents specific techniques to<br>enhance the user models by capturing domain-specific vocabularies and by<br>entity-based expansion. Experiments are based on a collection of online chats<br>from a controlled user study covering three domains: books, travel, food. We<br>evaluate different configurations and compare chat-based user models against<br>concise user profiles from questionnaires. Overall, these two variants perform<br>on par in terms of NCDG@20, but each has advantages in certain domains.<br>
Export
BibTeX
@online{Haratinezhad2109.04716,
TITLE = {You Get What You Chat: Using Conversations to Personalize Search-based Recommendations},
AUTHOR = {Torbati, Ghazaleh Haratinezhad and Yates, Andrew and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2109.04716},
EPRINT = {2109.04716},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Prior work on personalized recommendations has focused on exploiting explicit<br>signals from user-specific queries, clicks, likes, and ratings. This paper<br>investigates tapping into a different source of implicit signals of interests<br>and tastes: online chats between users. The paper develops an expressive model<br>and effective methods for personalizing search-based entity recommendations.<br>User models derived from chats augment different methods for re-ranking entity<br>answers for medium-grained queries. The paper presents specific techniques to<br>enhance the user models by capturing domain-specific vocabularies and by<br>entity-based expansion. Experiments are based on a collection of online chats<br>from a controlled user study covering three domains: books, travel, food. We<br>evaluate different configurations and compare chat-based user models against<br>concise user profiles from questionnaires. Overall, these two variants perform<br>on par in terms of NCDG@20, but each has advantages in certain domains.<br>},
}
Endnote
%0 Report
%A Torbati, Ghazaleh Haratinezhad
%A Yates, Andrew
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T You Get What You Chat: Using Conversations to Personalize Search-based Recommendations :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-64B9-6
%U https://arxiv.org/abs/2109.04716
%D 2021
%X Prior work on personalized recommendations has focused on exploiting explicit<br>signals from user-specific queries, clicks, likes, and ratings. This paper<br>investigates tapping into a different source of implicit signals of interests<br>and tastes: online chats between users. The paper develops an expressive model<br>and effective methods for personalizing search-based entity recommendations.<br>User models derived from chats augment different methods for re-ranking entity<br>answers for medium-grained queries. The paper presents specific techniques to<br>enhance the user models by capturing domain-specific vocabularies and by<br>entity-based expansion. Experiments are based on a collection of online chats<br>from a controlled user study covering three domains: books, travel, food. We<br>evaluate different configurations and compare chat-based user models against<br>concise user profiles from questionnaires. Overall, these two variants perform<br>on par in terms of NCDG@20, but each has advantages in certain domains.<br>
%K Computer Science, Information Retrieval, cs.IR
[139]
G. H. Torbati, A. Yates, and G. Weikum, “Personalized Entity Search by Sparse and Scrutable User Profiles,” 2021. [Online]. Available: https://arxiv.org/abs/2109.04713. (arXiv: 2109.04713)
Abstract
Prior work on personalizing web search results has focused on considering<br>query-and-click logs to capture users individual interests. For product search,<br>extensive user histories about purchases and ratings have been exploited.<br>However, for general entity search, such as for books on specific topics or<br>travel destinations with certain features, personalization is largely<br>underexplored. In this paper, we address personalization of book search, as an<br>exemplary case of entity search, by exploiting sparse user profiles obtained<br>through online questionnaires. We devise and compare a variety of re-ranking<br>methods based on language models or neural learning. Our experiments show that<br>even very sparse information about individuals can enhance the effectiveness of<br>the search results.<br>
Export
BibTeX
@online{Haratinezhad2109.04713,
TITLE = {Personalized Entity Search by Sparse and Scrutable User Profiles},
AUTHOR = {Torbati, Ghazaleh Haratinezhad and Yates, Andrew and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2109.04713},
EPRINT = {2109.04713},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Prior work on personalizing web search results has focused on considering<br>query-and-click logs to capture users individual interests. For product search,<br>extensive user histories about purchases and ratings have been exploited.<br>However, for general entity search, such as for books on specific topics or<br>travel destinations with certain features, personalization is largely<br>underexplored. In this paper, we address personalization of book search, as an<br>exemplary case of entity search, by exploiting sparse user profiles obtained<br>through online questionnaires. We devise and compare a variety of re-ranking<br>methods based on language models or neural learning. Our experiments show that<br>even very sparse information about individuals can enhance the effectiveness of<br>the search results.<br>},
}
Endnote
%0 Report
%A Torbati, Ghazaleh Haratinezhad
%A Yates, Andrew
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Personalized Entity Search by Sparse and Scrutable User Profiles :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-64AC-5
%U https://arxiv.org/abs/2109.04713
%D 2021
%X Prior work on personalizing web search results has focused on considering<br>query-and-click logs to capture users individual interests. For product search,<br>extensive user histories about purchases and ratings have been exploited.<br>However, for general entity search, such as for books on specific topics or<br>travel destinations with certain features, personalization is largely<br>underexplored. In this paper, we address personalization of book search, as an<br>exemplary case of entity search, by exploiting sparse user profiles obtained<br>through online questionnaires. We devise and compare a variety of re-ranking<br>methods based on language models or neural learning. Our experiments show that<br>even very sparse information about individuals can enhance the effectiveness of<br>the search results.<br>
%K Computer Science, Information Retrieval, cs.IR
[140]
G. H. Torbati, A. Yates, and G. Weikum, “You Get What You Chat: Using Conversations to Personalize Search-based Recommendations,” in Advances in Information Retrieval (ECIR 2021), Lucca, Italy (Online Event), 2021.
Export
BibTeX
@inproceedings{Torbati_ECIR2021,
TITLE = {You Get What You Chat: {U}sing Conversations to Personalize Search-based Recommendations},
AUTHOR = {Torbati, Ghazaleh Haratinezhad and Yates, Andrew and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-3-030-72112-1},
DOI = {10.1007/978-3-030-72113-8_14},
PUBLISHER = {Springer},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2021},
BOOKTITLE = {Advances in Information Retrieval (ECIR 2021)},
EDITOR = {Hiemstra, Djoerd and Moens, Marie-Francine and Mothe, Josiane and Perego, Raffaele and Potthast, Martin and Sebastiani, Fabrizio},
PAGES = {207--223},
SERIES = {Lecture Notes in Computer Science},
VOLUME = {12656},
ADDRESS = {Lucca, Italy (Online Event)},
}
Endnote
%0 Conference Proceedings
%A Torbati, Ghazaleh Haratinezhad
%A Yates, Andrew
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T You Get What You Chat: Using Conversations to Personalize Search-based Recommendations :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-ECA2-8
%R 10.1007/978-3-030-72113-8_14
%D 2021
%B 43rd European Conference on IR Research
%Z date of event: 2021-03-28 - 2021-04-01
%C Lucca, Italy (Online Event)
%B Advances in Information Retrieval
%E Hiemstra, Djoerd; Moens, Marie-Francine; Mothe, Josiane; Perego, Raffaele; Potthast, Martin; Sebastiani, Fabrizio
%P 207 - 223
%I Springer
%@ 978-3-030-72112-1
%B Lecture Notes in Computer Science
%N 12656
[141]
K. H. Tran, A. Ghazimatin, and R. Saha Roy, “Counterfactual Explanations for Neural Recommenders,” 2021. [Online]. Available: https://arxiv.org/abs/2105.05008. (arXiv: 2105.05008)
Abstract
Understanding why specific items are recommended to users can significantly<br>increase their trust and satisfaction in the system. While neural recommenders<br>have become the state-of-the-art in recent years, the complexity of deep models<br>still makes the generation of tangible explanations for end users a challenging<br>problem. Existing methods are usually based on attention distributions over a<br>variety of features, which are still questionable regarding their suitability<br>as explanations, and rather unwieldy to grasp for an end user. Counterfactual<br>explanations based on a small set of the user's own actions have been shown to<br>be an acceptable solution to the tangibility problem. However, current work on<br>such counterfactuals cannot be readily applied to neural models. In this work,<br>we propose ACCENT, the first general framework for finding counterfactual<br>explanations for neural recommenders. It extends recently-proposed influence<br>functions for identifying training points most relevant to a recommendation,<br>from a single to a pair of items, while deducing a counterfactual set in an<br>iterative process. We use ACCENT to generate counterfactual explanations for<br>two popular neural models, Neural Collaborative Filtering (NCF) and Relational<br>Collaborative Filtering (RCF), and demonstrate its feasibility on a sample of<br>the popular MovieLens 100K dataset.<br>
Export
BibTeX
@online{Tran_2105.05008,
TITLE = {Counterfactual Explanations for Neural Recommenders},
AUTHOR = {Tran, Khanh Hiep and Ghazimatin, Azin and Saha Roy, Rishiraj},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2105.05008},
EPRINT = {2105.05008},
EPRINTTYPE = {arXiv},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Understanding why specific items are recommended to users can significantly<br>increase their trust and satisfaction in the system. While neural recommenders<br>have become the state-of-the-art in recent years, the complexity of deep models<br>still makes the generation of tangible explanations for end users a challenging<br>problem. Existing methods are usually based on attention distributions over a<br>variety of features, which are still questionable regarding their suitability<br>as explanations, and rather unwieldy to grasp for an end user. Counterfactual<br>explanations based on a small set of the user's own actions have been shown to<br>be an acceptable solution to the tangibility problem. However, current work on<br>such counterfactuals cannot be readily applied to neural models. In this work,<br>we propose ACCENT, the first general framework for finding counterfactual<br>explanations for neural recommenders. It extends recently-proposed influence<br>functions for identifying training points most relevant to a recommendation,<br>from a single to a pair of items, while deducing a counterfactual set in an<br>iterative process. We use ACCENT to generate counterfactual explanations for<br>two popular neural models, Neural Collaborative Filtering (NCF) and Relational<br>Collaborative Filtering (RCF), and demonstrate its feasibility on a sample of<br>the popular MovieLens 100K dataset.<br>},
}
Endnote
%0 Report
%A Tran, Khanh Hiep
%A Ghazimatin, Azin
%A Saha Roy, Rishiraj
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Counterfactual Explanations for Neural Recommenders :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-67C3-7
%U https://arxiv.org/abs/2105.05008
%D 2021
%X Understanding why specific items are recommended to users can significantly<br>increase their trust and satisfaction in the system. While neural recommenders<br>have become the state-of-the-art in recent years, the complexity of deep models<br>still makes the generation of tangible explanations for end users a challenging<br>problem. Existing methods are usually based on attention distributions over a<br>variety of features, which are still questionable regarding their suitability<br>as explanations, and rather unwieldy to grasp for an end user. Counterfactual<br>explanations based on a small set of the user's own actions have been shown to<br>be an acceptable solution to the tangibility problem. However, current work on<br>such counterfactuals cannot be readily applied to neural models. In this work,<br>we propose ACCENT, the first general framework for finding counterfactual<br>explanations for neural recommenders. It extends recently-proposed influence<br>functions for identifying training points most relevant to a recommendation,<br>from a single to a pair of items, while deducing a counterfactual set in an<br>iterative process. We use ACCENT to generate counterfactual explanations for<br>two popular neural models, Neural Collaborative Filtering (NCF) and Relational<br>Collaborative Filtering (RCF), and demonstrate its feasibility on a sample of<br>the popular MovieLens 100K dataset.<br>
%K Computer Science, Information Retrieval, cs.IR,Computer Science, Learning, cs.LG
[142]
K. H. Tran, A. Ghazimatin, and R. Saha Roy, “Counterfactual Explanations for Neural Recommenders,” in SIGIR ’21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, Canada, 2021.
Export
BibTeX
@inproceedings{tran2021counterfactual,
TITLE = {Counterfactual Explanations for Neural Recommenders},
AUTHOR = {Tran, Khanh Hiep and Ghazimatin, Azin and Saha Roy, Rishiraj},
LANGUAGE = {eng},
DOI = {10.1145/3404835.3463005},
PUBLISHER = {ACM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {SIGIR '21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval},
EDITOR = {Diaz, Fernando and Shah, Chirag and Suel, Torsten and Castells, Pablo and Jones, Rosie and Sakai, Tetsuya and Bellogin, Alejandro and Yushioka, Massaharu},
PAGES = {1627--1631},
ADDRESS = {Virtual Event, Canada},
}
Endnote
%0 Conference Proceedings
%A Tran, Khanh Hiep
%A Ghazimatin, Azin
%A Saha Roy, Rishiraj
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Counterfactual Explanations for Neural Recommenders :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-5140-4
%R 10.1145/3404835.3463005
%D 2021
%B 44th International ACM SIGIR Conference on Research and Development in Information Retrieval
%Z date of event: 2021-07-11 - 2021-07-15
%C Virtual Event, Canada
%B SIGIR '21
%E Diaz, Fernando; Shah, Chirag; Suel, Torsten; Castells, Pablo; Jones, Rosie; Sakai, Tetsuya; Bellogin, Alejandro; Yushioka, Massaharu
%P 1627 - 1631
%I ACM
[143]
G. Weikum, “Knowledge Graphs 2021: A Data Odyssey,” Proceedings of the VLDB Endowment (Proc. VLDB 2021), vol. 14, no. 12, 2021.
Export
BibTeX
@article{Weikum2021_PVLDB,
TITLE = {Knowledge Graphs 2021: {A} Data Odyssey},
AUTHOR = {Weikum, Gerhard},
LANGUAGE = {eng},
PUBLISHER = {VLDB Endowment Inc.},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
JOURNAL = {Proceedings of the VLDB Endowment (Proc. VLDB)},
VOLUME = {14},
NUMBER = {12},
PAGES = {3233--3238},
BOOKTITLE = {Proceedings of the 47th International Conference on Very Large Data Bases (VLDB 2021)},
EDITOR = {Dong, Xin Luna and Naumann, Felix},
}
Endnote
%0 Journal Article
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Knowledge Graphs 2021: A Data Odyssey :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-631F-6
%7 2021
%D 2021
%J Proceedings of the VLDB Endowment
%O PVLDB
%V 14
%N 12
%& 3233
%P 3233 - 3238
%I VLDB Endowment Inc.
%B Proceedings of the 47th International Conference on Very Large Data Bases
%O VLDB 2021 Copenhagen, Denmark, 16-20 August 2021
[144]
G. Weikum, L. Dong, S. Razniewski, and F. Suchanek, “Machine Knowledge: Creation and Curation of Comprehensive Knowledge Bases,” Foundations and Trends in Databases, vol. 10, no. 2–4, 2021.
Export
BibTeX
@article{Weikum10.1561/1900000064,
TITLE = {Machine Knowledge: {C}reation and Curation of Comprehensive Knowledge Bases},
AUTHOR = {Weikum, Gerhard and Dong, Luna and Razniewski, Simon and Suchanek, Fabian},
LANGUAGE = {eng},
ISSN = {1931-7883},
ISBN = {978-1-68083-836-7},
DOI = {10.1561/1900000064},
PUBLISHER = {Now Publishers},
ADDRESS = {Boston},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2021},
JOURNAL = {Foundations and Trends in Databases},
VOLUME = {10},
NUMBER = {2-4},
PAGES = {108--490},
}
Endnote
%0 Journal Article
%A Weikum, Gerhard
%A Dong, Luna
%A Razniewski, Simon
%A Suchanek, Fabian
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Machine Knowledge: Creation and Curation of Comprehensive Knowledge Bases :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-6317-E
%R 10.1561/1900000064
%@ 978-1-68083-836-7
%7 2021
%D 2021
%J Foundations and Trends in Databases
%V 10
%N 2-4
%& 108
%P 108 - 490
%I Now Publishers
%C Boston
%@ false
[145]
A. Yates, R. Nogueira, and J. Lin, “Pretrained Transformers for Text Ranking: BERT and Beyond,” in SIGIR ’21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, Canada, 2021.
Export
BibTeX
@inproceedings{Yates_SIGIR21,
TITLE = {Pretrained Transformers for Text Ranking: {BERT} and Beyond},
AUTHOR = {Yates, Andrew and Nogueira, Rodrigo and Lin, Jimmy},
LANGUAGE = {eng},
ISBN = {978-1-4503-8037-9},
DOI = {10.1145/3404835.3462812},
PUBLISHER = {ACM},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {SIGIR '21, 44th International ACM SIGIR Conference on Research and Development in Information Retrieval},
EDITOR = {Diaz, Fernando and Shah, Chirag and Suel, Torsten and Castells, Pablo and Jones, Rosie and Sakai, Tetsuya and Bellog{\'i}n, Alejandro and Yushioka, Massaharu},
PAGES = {2666--2668},
ADDRESS = {Virtual Event, Canada},
}
Endnote
%0 Conference Proceedings
%A Yates, Andrew
%A Nogueira, Rodrigo
%A Lin, Jimmy
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
%T Pretrained Transformers for Text Ranking: BERT and Beyond :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-6674-2
%R 10.1145/3404835.3462812
%D 2021
%B 44th International ACM SIGIR Conference on Research and Development in Information Retrieval
%Z date of event: 2021-07-11 - 2021-07-15
%C Virtual Event, Canada
%B SIGIR '21
%E Diaz, Fernando; Shah, Chirag; Suel, Torsten; Castells, Pablo; Jones, Rosie; Sakai, Tetsuya; Bellogín, Alejandro; Yushioka, Massaharu
%P 2666 - 2668
%I ACM
%@ 978-1-4503-8037-9
[146]
X. Zhang, A. Yates, and J. Lin, “Comparing Score Aggregation Approaches for Document Retrieval with Pretrained Transformers,” in Advances in Information Retrieval (ECIR 2021), Lucca, Italy (Online Event), 2021.
Export
BibTeX
@inproceedings{Zhang_ECIR2021,
TITLE = {Comparing Score Aggregation Approaches for Document Retrieval with Pretrained Transformers},
AUTHOR = {Zhang, Xinyu and Yates, Andrew and Lin, Jimmy},
LANGUAGE = {eng},
ISBN = {978-3-030-72239-5},
DOI = {10.1007/978-3-030-72240-1_11},
PUBLISHER = {Springer},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2021},
BOOKTITLE = {Advances in Information Retrieval (ECIR 2021)},
EDITOR = {Hiemstra, Djoerd and Moens, Marie-Francine and Mothe, Josiane and Perego, Raffaele and Potthast, Martin and Sebastiani, Fabrizio},
PAGES = {150--163},
SERIES = {Lecture Notes in Computer Science},
VOLUME = {12657},
ADDRESS = {Lucca, Italy (Online Event)},
}
Endnote
%0 Conference Proceedings
%A Zhang, Xinyu
%A Yates, Andrew
%A Lin, Jimmy
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Comparing Score Aggregation Approaches for Document Retrieval with Pretrained Transformers :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-6614-E
%R 10.1007/978-3-030-72240-1_11
%D 2021
%B 43rd European Conference on IR Research
%Z date of event: 2021-03-28 - 2021-04-01
%C Lucca, Italy (Online Event)
%B Advances in Information Retrieval
%E Hiemstra, Djoerd; Moens, Marie-Francine; Mothe, Josiane; Perego, Raffaele; Potthast, Martin; Sebastiani, Fabrizio
%P 150 - 163
%I Springer
%@ 978-3-030-72239-5
%B Lecture Notes in Computer Science
%N 12657
[147]
X. Zhang, J. Xin, A. Yates, and J. Lin, “Bag-of-Words Baselines for Semantic Code Search,” in The 1st Workshop on Natural Language Processing for Programming (NLP4Prog 2021), Bangkog, Thailand (Online), 2021.
Export
BibTeX
@inproceedings{Zhang_NLP4Prog2021,
TITLE = {Bag-of-Words Baselines for Semantic Code Search},
AUTHOR = {Zhang, Xinyu and Xin, Ji and Yates, Andrew and Lin, Jimmy},
LANGUAGE = {eng},
ISBN = {978-1-954085-64-0},
URL = {https://aclanthology.org/2021.nlp4prog-1.0},
PUBLISHER = {ACL},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {The 1st Workshop on Natural Language Processing for Programming (NLP4Prog 2021)},
EDITOR = {Lachmy, Royi and Yao, Ziyu and Durrett, Greg and Gligoric, Milos and Li, Junyi Jessy and Mooney, Ray and Neubig, Graham and Su, Yu and Sun, Huan and Tsarfaty, Reut},
PAGES = {88--94},
ADDRESS = {Bangkog, Thailand (Online)},
}
Endnote
%0 Conference Proceedings
%A Zhang, Xinyu
%A Xin, Ji
%A Yates, Andrew
%A Lin, Jimmy
%+ External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Bag-of-Words Baselines for Semantic Code Search :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-619E-8
%U https://aclanthology.org/2021.nlp4prog-1.0
%D 2021
%B 1st Workshop on Natural Language Processing for Programming
%Z date of event: 2021-08-06 - 2021-08-06
%C Bangkog, Thailand (Online)
%B The 1st Workshop on Natural Language Processing for Programming
%E Lachmy, Royi; Yao, Ziyu; Durrett, Greg; Gligoric, Milos; Li, Junyi Jessy; Mooney, Ray; Neubig, Graham; Su, Yu; Sun, Huan; Tsarfaty, Reut
%P 88 - 94
%I ACL
%@ 978-1-954085-64-0
[148]
Z. Zheng, K. Hui, B. He, X. Han, L. Sun, and A. Yates, “Contextualized Query Expansion via Unsupervised Chunk Selection for Text Retrieval,” Information Processing & Management, vol. 58, no. 5, 2021.
Export
BibTeX
@article{Zheng2021,
TITLE = {Contextualized Query Expansion via Unsupervised Chunk Selection for Text Retrieval},
AUTHOR = {Zheng, Zhi and Hui, Kai and He, Ben and Han, Xianpei and Sun, Le and Yates, Andrew},
LANGUAGE = {eng},
ISSN = {0306-4573},
DOI = {10.1016/j.ipm.2021.102672},
PUBLISHER = {Elsevier},
ADDRESS = {Amsterdam},
YEAR = {2021},
MARGINALMARK = {$\bullet$},
DATE = {2021},
JOURNAL = {Information Processing \& Management},
VOLUME = {58},
NUMBER = {5},
EID = {102672},
}
Endnote
%0 Journal Article
%A Zheng, Zhi
%A Hui, Kai
%A He, Ben
%A Han, Xianpei
%A Sun, Le
%A Yates, Andrew
%+ External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Contextualized Query Expansion via Unsupervised Chunk Selection for Text Retrieval :
%G eng
%U http://hdl.handle.net/21.11116/0000-0009-4747-8
%R 10.1016/j.ipm.2021.102672
%7 2021
%D 2021
%J Information Processing & Management
%V 58
%N 5
%Z sequence number: 102672
%I Elsevier
%C Amsterdam
%@ false
2020
[149]
H. Arnaout, S. Razniewski, and G. Weikum, “Negative Statements Considered Useful,” 2020. [Online]. Available: http://arxiv.org/abs/2001.04425. (arXiv: 2001.04425)
Abstract
Knowledge bases (KBs), pragmatic collections of knowledge about notable<br>entities, are an important asset in applications such as search, question<br>answering and dialogue. Rooted in a long tradition in knowledge representation,<br>all popular KBs only store positive information, while they abstain from taking<br>any stance towards statements not contained in them.<br> In this paper, we make the case for explicitly stating interesting statements<br>which are not true. Negative statements would be important to overcome current<br>limitations of question answering, yet due to their potential abundance, any<br>effort towards compiling them needs a tight coupling with ranking. We introduce<br>two approaches towards compiling negative statements. (i) In peer-based<br>statistical inferences, we compare entities with highly related entities in<br>order to derive potential negative statements, which we then rank using<br>supervised and unsupervised features. (ii) In query-log-based text extraction,<br>we use a pattern-based approach for harvesting search engine query logs.<br>Experimental results show that both approaches hold promising and complementary<br>potential. Along with this paper, we publish the first datasets on interesting<br>negative information, containing over 1.1M statements for 100K popular Wikidata<br>entities.<br>
Export
BibTeX
@online{Arnaout_arXiv2001.04425,
TITLE = {Negative Statements Considered Useful},
AUTHOR = {Arnaout, Hiba and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {http://arxiv.org/abs/2001.04425},
EPRINT = {2001.04425},
EPRINTTYPE = {arXiv},
YEAR = {2020},
ABSTRACT = {Knowledge bases (KBs), pragmatic collections of knowledge about notable<br>entities, are an important asset in applications such as search, question<br>answering and dialogue. Rooted in a long tradition in knowledge representation,<br>all popular KBs only store positive information, while they abstain from taking<br>any stance towards statements not contained in them.<br> In this paper, we make the case for explicitly stating interesting statements<br>which are not true. Negative statements would be important to overcome current<br>limitations of question answering, yet due to their potential abundance, any<br>effort towards compiling them needs a tight coupling with ranking. We introduce<br>two approaches towards compiling negative statements. (i) In peer-based<br>statistical inferences, we compare entities with highly related entities in<br>order to derive potential negative statements, which we then rank using<br>supervised and unsupervised features. (ii) In query-log-based text extraction,<br>we use a pattern-based approach for harvesting search engine query logs.<br>Experimental results show that both approaches hold promising and complementary<br>potential. Along with this paper, we publish the first datasets on interesting<br>negative information, containing over 1.1M statements for 100K popular Wikidata<br>entities.<br>},
}
Endnote
%0 Report
%A Arnaout, Hiba
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Negative Statements Considered Useful :
%G eng
%U http://hdl.handle.net/21.11116/0000-0005-821F-6
%U http://arxiv.org/abs/2001.04425
%D 2020
%X Knowledge bases (KBs), pragmatic collections of knowledge about notable<br>entities, are an important asset in applications such as search, question<br>answering and dialogue. Rooted in a long tradition in knowledge representation,<br>all popular KBs only store positive information, while they abstain from taking<br>any stance towards statements not contained in them.<br> In this paper, we make the case for explicitly stating interesting statements<br>which are not true. Negative statements would be important to overcome current<br>limitations of question answering, yet due to their potential abundance, any<br>effort towards compiling them needs a tight coupling with ranking. We introduce<br>two approaches towards compiling negative statements. (i) In peer-based<br>statistical inferences, we compare entities with highly related entities in<br>order to derive potential negative statements, which we then rank using<br>supervised and unsupervised features. (ii) In query-log-based text extraction,<br>we use a pattern-based approach for harvesting search engine query logs.<br>Experimental results show that both approaches hold promising and complementary<br>potential. Along with this paper, we publish the first datasets on interesting<br>negative information, containing over 1.1M statements for 100K popular Wikidata<br>entities.<br>
%K Computer Science, Information Retrieval, cs.IR,Computer Science, Artificial Intelligence, cs.AI,Computer Science, Computation and Language, cs.CL,Computer Science, Databases, cs.DB
[150]
H. Arnaout, S. Razniewski, and G. Weikum, “Enriching Knowledge Bases with Interesting Negative Statements,” in Automated Knowledge Base Construction (AKBC 2020), Virtual Conference, 2020.
Export
BibTeX
@inproceedings{Arnaout_AKBC2020,
TITLE = {Enriching Knowledge Bases with Interesting Negative Statements},
AUTHOR = {Arnaout, Hiba and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
DOI = {10.24432/C5101K},
PUBLISHER = {OpenReview},
YEAR = {2020},
BOOKTITLE = {Automated Knowledge Base Construction (AKBC 2020)},
ADDRESS = {Virtual Conference},
}
Endnote
%0 Conference Proceedings
%A Arnaout, Hiba
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Enriching Knowledge Bases with Interesting Negative Statements :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-EBC9-E
%R 10.24432/C5101K
%D 2020
%B 2nd Conference on Automated Knowledge Base Construction
%Z date of event: 2020-06-22 - 2020-06-24
%C Virtual Conference
%B Automated Knowledge Base Construction
%I OpenReview
%U https://openreview.net/forum?id=pSLmyZKaS
[151]
K. Balog, V. Setty, C. Lioma, Y. Liu, M. Zhang, and K. Berberich, Eds., ICTIR ’20. ACM, 2020.
Export
BibTeX
@proceedings{Balog_ICTIR20,
TITLE = {ICTIR '20, ACM SIGIR International Conference on Theory of Information Retrieval},
EDITOR = {Balog, Krisztian and Setty, Vinay and Lioma, Christina and Liu, Yiqun and Zhang, Min and Berberich, Klaus},
LANGUAGE = {eng},
ISBN = {978-1-4503-8067-6},
DOI = {10.1145/3409256},
PUBLISHER = {ACM},
YEAR = {2020},
ADDRESS = {Virtual Event, Norway},
}
Endnote
%0 Conference Proceedings
%E Balog, Krisztian
%E Setty, Vinay
%E Lioma, Christina
%E Liu, Yiqun
%E Zhang, Min
%E Berberich, Klaus
%+ External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T ICTIR '20 : Proceedings of the 2020 ACM SIGIR
International Conference on Theory
of Information Retrieval
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-041D-4
%R 10.1145/3409256
%@ 978-1-4503-8067-6
%I ACM
%D 2020
%B ACM SIGIR International Conference on Theory of Information Retrieval
%Z date of event: 2020-09-14 - 2020-09-17
%D 2020
%C Virtual Event, Norway
[152]
C. Belth, X. Zheng, J. Vreeken, and D. Koutra, “What is Normal, What is Strange, and What is Missing in a Knowledge Graph: Unified Characterization via Inductive Summarization,” in Proceedings of The World Wide Web Conference (WWW 2020), Taipei, Taiwan, 2020.
Export
BibTeX
@inproceedings{belth:20:kgist,
TITLE = {What is Normal, What is Strange, and What is Missing in a Knowledge Graph: {U}nified Characterization via Inductive Summarization},
AUTHOR = {Belth, Caleb and Zheng, Xinyi and Vreeken, Jilles and Koutra, Danai},
LANGUAGE = {eng},
ISBN = {978-1-4503-7023-3},
DOI = {10.1145/3366423.3380189},
PUBLISHER = {ACM},
YEAR = {2020},
BOOKTITLE = {Proceedings of The World Wide Web Conference (WWW 2020)},
EDITOR = {Huang, Yennun and King, Irwin and Liu, Tie-Yan and van Steen, Maarten},
PAGES = {1115--1126},
ADDRESS = {Taipei, Taiwan},
}
Endnote
%0 Conference Proceedings
%A Belth, Caleb
%A Zheng, Xinyi
%A Vreeken, Jilles
%A Koutra, Danai
%+ External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T What is Normal, What is Strange, and What is Missing in a Knowledge Graph: Unified Characterization via Inductive Summarization :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-253F-9
%R 10.1145/3366423.3380189
%D 2020
%B The World Wide Web Conference
%Z date of event: 2020-04-20 - 2020-04-24
%C Taipei, Taiwan
%B Proceedings of The World Wide Web Conference
%E Huang, Yennun; King, Irwin; Liu, Tie-Yan; van Steen, Maarten
%P 1115 - 1126
%I ACM
%@ 978-1-4503-7023-3
[153]
J. J. Benjamin, C. Müller-Birn, and S. Razniewski, “Examining the Impact of Algorithm Awareness on Wikidata’s Recommender System Recoin,” 2020. [Online]. Available: https://arxiv.org/abs/2009.09049. (arXiv: 2009.09049)
Abstract
The global infrastructure of the Web, designed as an open and transparent<br>system, has a significant impact on our society. However, algorithmic systems<br>of corporate entities that neglect those principles increasingly populated the<br>Web. Typical representatives of these algorithmic systems are recommender<br>systems that influence our society both on a scale of global politics and<br>during mundane shopping decisions. Recently, such recommender systems have come<br>under critique for how they may strengthen existing or even generate new kinds<br>of biases. To this end, designers and engineers are increasingly urged to make<br>the functioning and purpose of recommender systems more transparent. Our<br>research relates to the discourse of algorithm awareness, that reconsiders the<br>role of algorithm visibility in interface design. We conducted online<br>experiments with 105 participants using MTurk for the recommender system<br>Recoin, a gadget for Wikidata. In these experiments, we presented users with<br>one of a set of three different designs of Recoin's user interface, each of<br>them exhibiting a varying degree of explainability and interactivity. Our<br>findings include a positive correlation between comprehension of and trust in<br>an algorithmic system in our interactive redesign. However, our results are not<br>conclusive yet, and suggest that the measures of comprehension, fairness,<br>accuracy and trust are not yet exhaustive for the empirical study of algorithm<br>awareness. Our qualitative insights provide a first indication for further<br>measures. Our study participants, for example, were less concerned with the<br>details of understanding an algorithmic calculation than with who or what is<br>judging the result of the algorithm.<br>
Export
BibTeX
@online{Benjamin2009.09049,
TITLE = {Examining the Impact of Algorithm Awareness on {W}ikidata's Recommender System Recoin},
AUTHOR = {Benjamin, Jesse Josua and M{\"u}ller-Birn, Claudia and Razniewski, Simon},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2009.09049},
EPRINT = {2009.09049},
EPRINTTYPE = {arXiv},
YEAR = {2020},
ABSTRACT = {The global infrastructure of the Web, designed as an open and transparent<br>system, has a significant impact on our society. However, algorithmic systems<br>of corporate entities that neglect those principles increasingly populated the<br>Web. Typical representatives of these algorithmic systems are recommender<br>systems that influence our society both on a scale of global politics and<br>during mundane shopping decisions. Recently, such recommender systems have come<br>under critique for how they may strengthen existing or even generate new kinds<br>of biases. To this end, designers and engineers are increasingly urged to make<br>the functioning and purpose of recommender systems more transparent. Our<br>research relates to the discourse of algorithm awareness, that reconsiders the<br>role of algorithm visibility in interface design. We conducted online<br>experiments with 105 participants using MTurk for the recommender system<br>Recoin, a gadget for Wikidata. In these experiments, we presented users with<br>one of a set of three different designs of Recoin's user interface, each of<br>them exhibiting a varying degree of explainability and interactivity. Our<br>findings include a positive correlation between comprehension of and trust in<br>an algorithmic system in our interactive redesign. However, our results are not<br>conclusive yet, and suggest that the measures of comprehension, fairness,<br>accuracy and trust are not yet exhaustive for the empirical study of algorithm<br>awareness. Our qualitative insights provide a first indication for further<br>measures. Our study participants, for example, were less concerned with the<br>details of understanding an algorithmic calculation than with who or what is<br>judging the result of the algorithm.<br>},
}
Endnote
%0 Report
%A Benjamin, Jesse Josua
%A Müller-Birn, Claudia
%A Razniewski, Simon
%+ External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Examining the Impact of Algorithm Awareness on Wikidata's Recommender System Recoin :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-0661-4
%U https://arxiv.org/abs/2009.09049
%D 2020
%X The global infrastructure of the Web, designed as an open and transparent<br>system, has a significant impact on our society. However, algorithmic systems<br>of corporate entities that neglect those principles increasingly populated the<br>Web. Typical representatives of these algorithmic systems are recommender<br>systems that influence our society both on a scale of global politics and<br>during mundane shopping decisions. Recently, such recommender systems have come<br>under critique for how they may strengthen existing or even generate new kinds<br>of biases. To this end, designers and engineers are increasingly urged to make<br>the functioning and purpose of recommender systems more transparent. Our<br>research relates to the discourse of algorithm awareness, that reconsiders the<br>role of algorithm visibility in interface design. We conducted online<br>experiments with 105 participants using MTurk for the recommender system<br>Recoin, a gadget for Wikidata. In these experiments, we presented users with<br>one of a set of three different designs of Recoin's user interface, each of<br>them exhibiting a varying degree of explainability and interactivity. Our<br>findings include a positive correlation between comprehension of and trust in<br>an algorithmic system in our interactive redesign. However, our results are not<br>conclusive yet, and suggest that the measures of comprehension, fairness,<br>accuracy and trust are not yet exhaustive for the empirical study of algorithm<br>awareness. Our qualitative insights provide a first indication for further<br>measures. Our study participants, for example, were less concerned with the<br>details of understanding an algorithmic calculation than with who or what is<br>judging the result of the algorithm.<br>
%K Computer Science, Human-Computer Interaction, cs.HC,Computer Science, Computers and Society, cs.CY,Computer Science, Digital Libraries, cs.DL
[154]
A. Bhattacharya, S. Natarajan, and R. Saha Roy, Eds., Proceedings of the 7th ACM IKDD CoDS and 25th COMAD. ACM, 2020.
Export
BibTeX
@proceedings{SahaRoy_CoDSCOMAD20,
TITLE = {Proceedings of the 7th ACM IKDD CoDS and 25th COMAD (CoDS-COMAD 2020)},
EDITOR = {Bhattacharya, Arnab and Natarajan, Sriaam and Saha Roy, Rishiraj},
LANGUAGE = {eng},
ISBN = {978-1-4503-7738-6},
DOI = {10.1145/3371158},
PUBLISHER = {ACM},
YEAR = {2020},
ADDRESS = {Hyderabad, India},
}
Endnote
%0 Conference Proceedings
%E Bhattacharya, Arnab
%E Natarajan, Sriaam
%E Saha Roy, Rishiraj
%+ External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Proceedings of the 7th ACM IKDD CoDS and 25th COMAD :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-09CF-6
%R 10.1145/3371158
%@ 978-1-4503-7738-6
%I ACM
%D 2020
%B ACM India Joint International Conferenceon Data Science and Management of Data
%Z date of event: 2020-01-05 - 2020-01-07
%D 2020
%C Hyderabad, India
[155]
A. J. Biega, J. Schmidt, and R. Saha Roy, “Towards Query Logs for Privacy Studies: On Deriving Search Queries from Questions,” in Advances in Information Retrieval (ECIR 2020), Lisbon, Portugal, 2020.
Export
BibTeX
@inproceedings{Biega_ECIR2020,
TITLE = {Towards Query Logs for Privacy Studies: {O}n Deriving Search Queries from Questions},
AUTHOR = {Biega, Asia J. and Schmidt, Jana and Saha Roy, Rishiraj},
LANGUAGE = {eng},
ISBN = {978-3-030-45441-8},
DOI = {10.1007/978-3-030-45442-5_14},
PUBLISHER = {Springer},
YEAR = {2020},
DATE = {2020},
BOOKTITLE = {Advances in Information Retrieval (ECIR 2020)},
EDITOR = {Jose, Joemon M. and Yilmaz, Emine and Magalh{\~a}es, Jo{\~a}o and Castells, Pablo and Ferro, Nicola and Silva, M{\'a}rio J. and Martins, Fl{\'a}vio},
PAGES = {110--117},
SERIES = {Lecture Notes in Computer Science},
VOLUME = {12036},
ADDRESS = {Lisbon, Portugal},
}
Endnote
%0 Conference Proceedings
%A Biega, Asia J.
%A Schmidt, Jana
%A Saha Roy, Rishiraj
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Towards Query Logs for Privacy Studies: On Deriving Search Queries from Questions :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-02FD-9
%R 10.1007/978-3-030-45442-5_14
%D 2020
%B 42nd European Conference on IR Research
%Z date of event: 2020-04-14 - 2020-04-17
%C Lisbon, Portugal
%B Advances in Information Retrieval
%E Jose, Joemon M.; Yilmaz, Emine; Magalhães, João; Castells, Pablo; Ferro, Nicola; Silva, Mário J.; Martins, Flávio
%P 110 - 117
%I Springer
%@ 978-3-030-45441-8
%B Lecture Notes in Computer Science
%N 12036
[156]
A. J. Biega, J. Schmidt, and R. Saha Roy, “Towards Query Logs for Privacy Studies: On Deriving Search Queries from Questions,” 2020. [Online]. Available: https://arxiv.org/abs/2004.02023. (arXiv: 2004.02023)
Abstract
Translating verbose information needs into crisp search queries is a<br>phenomenon that is ubiquitous but hardly understood. Insights into this process<br>could be valuable in several applications, including synthesizing large<br>privacy-friendly query logs from public Web sources which are readily available<br>to the academic research community. In this work, we take a step towards<br>understanding query formulation by tapping into the rich potential of community<br>question answering (CQA) forums. Specifically, we sample natural language (NL)<br>questions spanning diverse themes from the Stack Exchange platform, and conduct<br>a large-scale conversion experiment where crowdworkers submit search queries<br>they would use when looking for equivalent information. We provide a careful<br>analysis of this data, accounting for possible sources of bias during<br>conversion, along with insights into user-specific linguistic patterns and<br>search behaviors. We release a dataset of 7,000 question-query pairs from this<br>study to facilitate further research on query understanding.<br>
Export
BibTeX
@online{Biega2004.02023,
TITLE = {Towards Query Logs for Privacy Studies: On Deriving Search Queries from Questions},
AUTHOR = {Biega, Asia J. and Schmidt, Jana and Saha Roy, Rishiraj},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2004.02023},
EPRINT = {2004.02023},
EPRINTTYPE = {arXiv},
YEAR = {2020},
ABSTRACT = {Translating verbose information needs into crisp search queries is a<br>phenomenon that is ubiquitous but hardly understood. Insights into this process<br>could be valuable in several applications, including synthesizing large<br>privacy-friendly query logs from public Web sources which are readily available<br>to the academic research community. In this work, we take a step towards<br>understanding query formulation by tapping into the rich potential of community<br>question answering (CQA) forums. Specifically, we sample natural language (NL)<br>questions spanning diverse themes from the Stack Exchange platform, and conduct<br>a large-scale conversion experiment where crowdworkers submit search queries<br>they would use when looking for equivalent information. We provide a careful<br>analysis of this data, accounting for possible sources of bias during<br>conversion, along with insights into user-specific linguistic patterns and<br>search behaviors. We release a dataset of 7,000 question-query pairs from this<br>study to facilitate further research on query understanding.<br>},
}
Endnote
%0 Report
%A Biega, Asia J.
%A Schmidt, Jana
%A Saha Roy, Rishiraj
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Towards Query Logs for Privacy Studies: On Deriving Search Queries from Questions :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-09C7-E
%U https://arxiv.org/abs/2004.02023
%D 2020
%X Translating verbose information needs into crisp search queries is a<br>phenomenon that is ubiquitous but hardly understood. Insights into this process<br>could be valuable in several applications, including synthesizing large<br>privacy-friendly query logs from public Web sources which are readily available<br>to the academic research community. In this work, we take a step towards<br>understanding query formulation by tapping into the rich potential of community<br>question answering (CQA) forums. Specifically, we sample natural language (NL)<br>questions spanning diverse themes from the Stack Exchange platform, and conduct<br>a large-scale conversion experiment where crowdworkers submit search queries<br>they would use when looking for equivalent information. We provide a careful<br>analysis of this data, accounting for possible sources of bias during<br>conversion, along with insights into user-specific linguistic patterns and<br>search behaviors. We release a dataset of 7,000 question-query pairs from this<br>study to facilitate further research on query understanding.<br>
%K Computer Science, Information Retrieval, cs.IR
[157]
K. Budhathoki, “Causal Inference on Discrete Data,” Universität des Saarlandes, Saarbrücken, 2020.
Export
BibTeX
@phdthesis{BudDiss_2020,
TITLE = {Causal Inference on Discrete Data},
AUTHOR = {Budhathoki, Kailash},
LANGUAGE = {eng},
URL = {urn:nbn:de:bsz:291--ds-329528},
DOI = {10.22028/D291-32952},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2020},
DATE = {2020},
}
Endnote
%0 Thesis
%A Budhathoki, Kailash
%Y Vreeken, Jilles
%A referee: Weikum, Gerhard
%A referee: Heskes, Tom
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
International Max Planck Research School, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Causal Inference on Discrete Data :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-FE73-A
%R 10.22028/D291-32952
%U urn:nbn:de:bsz:291--ds-329528
%F OTHER: hdl:20.500.11880/30501
%I Universität des Saarlandes
%C Saarbrücken
%D 2020
%P 171 p.
%V phd
%9 phd
%U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/30501
[158]
D. Calvanes, J. Corman, D. Lanti, and S. Razniewski, “Counting Query Answers over a DL-Lite Knowledge Base,” in Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence (IJCAI 2020), Yokohama, Japan (Virtual), 2020.
Abstract
Counting answers to a query is an operation supported by virtually all<br>database management systems. In this paper we focus on counting answers over a<br>Knowledge Base (KB), which may be viewed as a database enriched with background<br>knowledge about the domain under consideration. In particular, we place our<br>work in the context of Ontology-Mediated Query Answering/Ontology-based Data<br>Access (OMQA/OBDA), where the language used for the ontology is a member of the<br>DL-Lite family and the data is a (usually virtual) set of assertions. We study<br>the data complexity of query answering, for different members of the DL-Lite<br>family that include number restrictions, and for variants of conjunctive<br>queries with counting that differ with respect to their shape (connected,<br>branching, rooted). We improve upon existing results by providing a PTIME and<br>coNP lower bounds, and upper bounds in PTIME and LOGSPACE. For the latter case,<br>we define a novel query rewriting technique into first-order logic with<br>counting.<br>
Export
BibTeX
@inproceedings{RazniewskiIJCAI2020,
TITLE = {Counting Query Answers over a {$DL-Lite$} Knowledge Base},
AUTHOR = {Calvanes, Diego and Corman, Julien and Lanti, Davide and Razniewski, Simon},
LANGUAGE = {eng},
ISBN = {978-0-9992411-6-5},
DOI = {10.24963/ijcai.2020/230},
PUBLISHER = {IJCAI},
YEAR = {2021},
ABSTRACT = {Counting answers to a query is an operation supported by virtually all<br>database management systems. In this paper we focus on counting answers over a<br>Knowledge Base (KB), which may be viewed as a database enriched with background<br>knowledge about the domain under consideration. In particular, we place our<br>work in the context of Ontology-Mediated Query Answering/Ontology-based Data<br>Access (OMQA/OBDA), where the language used for the ontology is a member of the<br>DL-Lite family and the data is a (usually virtual) set of assertions. We study<br>the data complexity of query answering, for different members of the DL-Lite<br>family that include number restrictions, and for variants of conjunctive<br>queries with counting that differ with respect to their shape (connected,<br>branching, rooted). We improve upon existing results by providing a PTIME and<br>coNP lower bounds, and upper bounds in PTIME and LOGSPACE. For the latter case,<br>we define a novel query rewriting technique into first-order logic with<br>counting.<br>},
BOOKTITLE = {Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence (IJCAI 2020)},
EDITOR = {Bessiere, Christian},
PAGES = {1658--1666},
ADDRESS = {Yokohama, Japan (Virtual)},
}
Endnote
%0 Conference Proceedings
%A Calvanes, Diego
%A Corman, Julien
%A Lanti, Davide
%A Razniewski, Simon
%+ External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Counting Query Answers over a DL-Lite Knowledge Base :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-009E-6
%R 10.24963/ijcai.2020/230
%D 2020
%B Twenty-Ninth International Joint Conference on Artificial Intelligence
%Z date of event: 2021-01-07 - 2021-01-15
%C Yokohama, Japan (Virtual)
%X Counting answers to a query is an operation supported by virtually all<br>database management systems. In this paper we focus on counting answers over a<br>Knowledge Base (KB), which may be viewed as a database enriched with background<br>knowledge about the domain under consideration. In particular, we place our<br>work in the context of Ontology-Mediated Query Answering/Ontology-based Data<br>Access (OMQA/OBDA), where the language used for the ontology is a member of the<br>DL-Lite family and the data is a (usually virtual) set of assertions. We study<br>the data complexity of query answering, for different members of the DL-Lite<br>family that include number restrictions, and for variants of conjunctive<br>queries with counting that differ with respect to their shape (connected,<br>branching, rooted). We improve upon existing results by providing a PTIME and<br>coNP lower bounds, and upper bounds in PTIME and LOGSPACE. For the latter case,<br>we define a novel query rewriting technique into first-order logic with<br>counting.<br>
%K Computer Science, Databases, cs.DB,Computer Science, Artificial Intelligence, cs.AI
%B Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence
%E Bessiere, Christian
%P 1658 - 1666
%I IJCAI
%@ 978-0-9992411-6-5
[159]
D. Calvanes, J. Corman, D. Lanti, and S. Razniewski, “Counting Query Answers over a DL-Lite Knowledge Base (extended version),” 2020. [Online]. Available: https://arxiv.org/abs/2005.05886. (arXiv: 2005.05886)
Abstract
Counting answers to a query is an operation supported by virtually all<br>database management systems. In this paper we focus on counting answers over a<br>Knowledge Base (KB), which may be viewed as a database enriched with background<br>knowledge about the domain under consideration. In particular, we place our<br>work in the context of Ontology-Mediated Query Answering/Ontology-based Data<br>Access (OMQA/OBDA), where the language used for the ontology is a member of the<br>DL-Lite family and the data is a (usually virtual) set of assertions. We study<br>the data complexity of query answering, for different members of the DL-Lite<br>family that include number restrictions, and for variants of conjunctive<br>queries with counting that differ with respect to their shape (connected,<br>branching, rooted). We improve upon existing results by providing a PTIME and<br>coNP lower bounds, and upper bounds in PTIME and LOGSPACE. For the latter case,<br>we define a novel query rewriting technique into first-order logic with<br>counting.<br>
Export
BibTeX
@online{Razniewskiarxiv2020,
TITLE = {Counting Query Answers over a {DL}-Lite Knowledge Base (extended version)},
AUTHOR = {Calvanes, Diego and Corman, Julien and Lanti, Davide and Razniewski, Simon},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2005.05886},
EPRINT = {2005.05886},
EPRINTTYPE = {arXiv},
YEAR = {2020},
ABSTRACT = {Counting answers to a query is an operation supported by virtually all<br>database management systems. In this paper we focus on counting answers over a<br>Knowledge Base (KB), which may be viewed as a database enriched with background<br>knowledge about the domain under consideration. In particular, we place our<br>work in the context of Ontology-Mediated Query Answering/Ontology-based Data<br>Access (OMQA/OBDA), where the language used for the ontology is a member of the<br>DL-Lite family and the data is a (usually virtual) set of assertions. We study<br>the data complexity of query answering, for different members of the DL-Lite<br>family that include number restrictions, and for variants of conjunctive<br>queries with counting that differ with respect to their shape (connected,<br>branching, rooted). We improve upon existing results by providing a PTIME and<br>coNP lower bounds, and upper bounds in PTIME and LOGSPACE. For the latter case,<br>we define a novel query rewriting technique into first-order logic with<br>counting.<br>},
}
Endnote
%0 Report
%A Calvanes, Diego
%A Corman, Julien
%A Lanti, Davide
%A Razniewski, Simon
%+ External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Counting Query Answers over a DL-Lite Knowledge Base (extended version) :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-FF5A-6
%U https://arxiv.org/abs/2005.05886
%D 2020
%X Counting answers to a query is an operation supported by virtually all<br>database management systems. In this paper we focus on counting answers over a<br>Knowledge Base (KB), which may be viewed as a database enriched with background<br>knowledge about the domain under consideration. In particular, we place our<br>work in the context of Ontology-Mediated Query Answering/Ontology-based Data<br>Access (OMQA/OBDA), where the language used for the ontology is a member of the<br>DL-Lite family and the data is a (usually virtual) set of assertions. We study<br>the data complexity of query answering, for different members of the DL-Lite<br>family that include number restrictions, and for variants of conjunctive<br>queries with counting that differ with respect to their shape (connected,<br>branching, rooted). We improve upon existing results by providing a PTIME and<br>coNP lower bounds, and upper bounds in PTIME and LOGSPACE. For the latter case,<br>we define a novel query rewriting technique into first-order logic with<br>counting.<br>
%K Computer Science, Databases, cs.DB,Computer Science, Artificial Intelligence, cs.AI
[160]
D. Calvanese, J. Corman, D. Lanti, and S. Razniewski, “Rewriting Count Queries over DL-Lite TBoxes with Number Restrictions,” in Proceedings of the 33rd International Workshop on Description Logics (DL 2020), Rhodes, Greece (Virtual Event), 2020.
Export
BibTeX
@inproceedings{Calvanese_DL2020,
TITLE = {Rewriting Count Queries over {DL}-Lite {TBoxes} with Number Restrictions},
AUTHOR = {Calvanese, Diego and Corman, Julien and Lanti, Davide and Razniewski, Simon},
LANGUAGE = {eng},
ISSN = {1613-0073},
URL = {http://ceur-ws.org/Vol-2663/paper-7.pdf; urn:nbn:de:0074-2663-4},
PUBLISHER = {ceur-ws.org},
YEAR = {2020},
BOOKTITLE = {Proceedings of the 33rd International Workshop on Description Logics (DL 2020)},
EDITOR = {Borgwardt, Stefan and Meyer, Thomas},
EID = {7},
SERIES = {CEUR Workshop Proceedings},
VOLUME = {2663},
ADDRESS = {Rhodes, Greece (Virtual Event)},
}
Endnote
%0 Conference Proceedings
%A Calvanese, Diego
%A Corman, Julien
%A Lanti, Davide
%A Razniewski, Simon
%+ External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Rewriting Count Queries over DL-Lite TBoxes with Number Restrictions :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-0606-B
%U http://ceur-ws.org/Vol-2663/paper-7.pdf
%D 2020
%B 33rd International Workshop on Description Logics
%Z date of event: 2020-09-12 - 2020-09-14
%C Rhodes, Greece (Virtual Event)
%B Proceedings of the 33rd International Workshop on Description Logics
%E Borgwardt , Stefan; Meyer, Thomas
%Z sequence number: 7
%I ceur-ws.org
%B CEUR Workshop Proceedings
%N 2663
%@ false
[161]
Y. Chalier, S. Razniewski, and G. Weikum, “Joint Reasoning for Multi-Faceted Commonsense Knowledge,” 2020. [Online]. Available: http://arxiv.org/abs/2001.04170. (arXiv: 2001.04170)
Abstract
Commonsense knowledge (CSK) supports a variety of AI applications, from<br>visual understanding to chatbots. Prior works on acquiring CSK, such as<br>ConceptNet, have compiled statements that associate concepts, like everyday<br>objects or activities, with properties that hold for most or some instances of<br>the concept. Each concept is treated in isolation from other concepts, and the<br>only quantitative measure (or ranking) of properties is a confidence score that<br>the statement is valid. This paper aims to overcome these limitations by<br>introducing a multi-faceted model of CSK statements and methods for joint<br>reasoning over sets of inter-related statements. Our model captures four<br>different dimensions of CSK statements: plausibility, typicality, remarkability<br>and salience, with scoring and ranking along each dimension. For example,<br>hyenas drinking water is typical but not salient, whereas hyenas eating<br>carcasses is salient. For reasoning and ranking, we develop a method with soft<br>constraints, to couple the inference over concepts that are related in in a<br>taxonomic hierarchy. The reasoning is cast into an integer linear programming<br>(ILP), and we leverage the theory of reduction costs of a relaxed LP to compute<br>informative rankings. This methodology is applied to several large CSK<br>collections. Our evaluation shows that we can consolidate these inputs into<br>much cleaner and more expressive knowledge. Results are available at<br>https://dice.mpi-inf.mpg.de.<br>
Export
BibTeX
@online{Chalier_arXiv2001.04170,
TITLE = {Joint Reasoning for Multi-Faceted Commonsense Knowledge},
AUTHOR = {Chalier, Yohan and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {http://arxiv.org/abs/2001.04170},
EPRINT = {2001.04170},
EPRINTTYPE = {arXiv},
YEAR = {2020},
ABSTRACT = {Commonsense knowledge (CSK) supports a variety of AI applications, from<br>visual understanding to chatbots. Prior works on acquiring CSK, such as<br>ConceptNet, have compiled statements that associate concepts, like everyday<br>objects or activities, with properties that hold for most or some instances of<br>the concept. Each concept is treated in isolation from other concepts, and the<br>only quantitative measure (or ranking) of properties is a confidence score that<br>the statement is valid. This paper aims to overcome these limitations by<br>introducing a multi-faceted model of CSK statements and methods for joint<br>reasoning over sets of inter-related statements. Our model captures four<br>different dimensions of CSK statements: plausibility, typicality, remarkability<br>and salience, with scoring and ranking along each dimension. For example,<br>hyenas drinking water is typical but not salient, whereas hyenas eating<br>carcasses is salient. For reasoning and ranking, we develop a method with soft<br>constraints, to couple the inference over concepts that are related in in a<br>taxonomic hierarchy. The reasoning is cast into an integer linear programming<br>(ILP), and we leverage the theory of reduction costs of a relaxed LP to compute<br>informative rankings. This methodology is applied to several large CSK<br>collections. Our evaluation shows that we can consolidate these inputs into<br>much cleaner and more expressive knowledge. Results are available at<br>https://dice.mpi-inf.mpg.de.<br>},
}
Endnote
%0 Report
%A Chalier, Yohan
%A Razniewski, Simon
%A Weikum, Gerhard
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Joint Reasoning for Multi-Faceted Commonsense Knowledge :
%G eng
%U http://hdl.handle.net/21.11116/0000-0005-8226-D
%U http://arxiv.org/abs/2001.04170
%D 2020
%X Commonsense knowledge (CSK) supports a variety of AI applications, from<br>visual understanding to chatbots. Prior works on acquiring CSK, such as<br>ConceptNet, have compiled statements that associate concepts, like everyday<br>objects or activities, with properties that hold for most or some instances of<br>the concept. Each concept is treated in isolation from other concepts, and the<br>only quantitative measure (or ranking) of properties is a confidence score that<br>the statement is valid. This paper aims to overcome these limitations by<br>introducing a multi-faceted model of CSK statements and methods for joint<br>reasoning over sets of inter-related statements. Our model captures four<br>different dimensions of CSK statements: plausibility, typicality, remarkability<br>and salience, with scoring and ranking along each dimension. For example,<br>hyenas drinking water is typical but not salient, whereas hyenas eating<br>carcasses is salient. For reasoning and ranking, we develop a method with soft<br>constraints, to couple the inference over concepts that are related in in a<br>taxonomic hierarchy. The reasoning is cast into an integer linear programming<br>(ILP), and we leverage the theory of reduction costs of a relaxed LP to compute<br>informative rankings. This methodology is applied to several large CSK<br>collections. Our evaluation shows that we can consolidate these inputs into<br>much cleaner and more expressive knowledge. Results are available at<br>https://dice.mpi-inf.mpg.de.<br>
%K Computer Science, Computation and Language, cs.CL,Computer Science, Artificial Intelligence, cs.AI,Computer Science, Information Retrieval, cs.IR
[162]
Y. Chalier, S. Razniewski, and G. Weikum, “Joint Reasoning for Multi-Faceted Commonsense Knowledge,” in Automated Knowledge Base Construction (AKBC 2020), Virtual Conference, 2020.
Export
BibTeX
@inproceedings{Chalier_AKBC2020,
TITLE = {Joint Reasoning for Multi-Faceted Commonsense Knowledge},
AUTHOR = {Chalier, Yohan and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
DOI = {10.24432/C58G6G},
PUBLISHER = {OpenReview},
YEAR = {2020},
BOOKTITLE = {Automated Knowledge Base Construction (AKBC 2020)},
ADDRESS = {Virtual Conference},
}
Endnote
%0 Conference Proceedings
%A Chalier, Yohan
%A Razniewski, Simon
%A Weikum, Gerhard
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Joint Reasoning for Multi-Faceted Commonsense Knowledge :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-EBCF-8
%R 10.24432/C58G6G
%D 2020
%B 2nd Conference on Automated Knowledge Base Construction
%Z date of event: 2020-06-22 - 2020-06-24
%C Virtual Conference
%B Automated Knowledge Base Construction
%I OpenReview
%U https://openreview.net/forum?id=QnPV72SZVt
[163]
Y. Chalier, S. Razniewski, and G. Weikum, “Dice: A Joint Reasoning Framework for Multi-Faceted Commonsense Knowledge,” in ISWC 2020 Posters, Demos, and Industry Tracks, Globally Online, 2020.
Export
BibTeX
@inproceedings{Chalier_ISCW20,
TITLE = {Dice: {A} Joint Reasoning Framework for Multi-Faceted Commonsense Knowledge},
AUTHOR = {Chalier, Yohan and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
ISSN = {1613-0073},
URL = {http://ceur-ws.org/Vol-2721/paper482.pdf; urn:nbn:de:0074-2721-6},
PUBLISHER = {ceur-ws.org},
YEAR = {2020},
BOOKTITLE = {ISWC 2020 Posters, Demos, and Industry Tracks},
EDITOR = {Taylor, Kerry and Goncalves, Rafael and Lecue, Freddy and Yan, Jun},
PAGES = {16--20},
EID = {482},
SERIES = {CEUR Workshop Proceedings},
VOLUME = {2721},
ADDRESS = {Globally Online},
}
Endnote
%0 Conference Proceedings
%A Chalier, Yohan
%A Razniewski, Simon
%A Weikum, Gerhard
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Dice: A Joint Reasoning Framework for Multi-Faceted Commonsense Knowledge :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-F132-0
%U http://ceur-ws.org/Vol-2721/paper482.pdf
%D 2020
%B 19th Internatinal Semantic Web Conference
%Z date of event: 2020-11-01 - 2020-11-06
%C Globally Online
%B ISWC 2020 Posters, Demos, and Industry Tracks
%E Taylor, Kerry; Goncalves, Rafael; Lecue, Freddy; Yan, Jun
%P 16 - 20
%Z sequence number: 482
%I ceur-ws.org
%B CEUR Workshop Proceedings
%N 2721
%@ false
%U http://ceur-ws.org/Vol-2721/paper482.pdf
[164]
E. Chang, J. Caplinger, A. Marin, X. Shen, and V. Demberg, “DART: A Lightweight Quality-Suggestive Data-to-Text Annotation Tool,” in The 28th International Conference on Computational Linguistics (COLING 2020), Barcelona, Spain (Online), 2020.
Export
BibTeX
@inproceedings{chang2020dart,
TITLE = {{DART}: {A} Lightweight Quality-Suggestive Data-to-Text Annotation Tool},
AUTHOR = {Chang, Ernie and Caplinger, Jeriah and Marin, Alex and Shen, Xiaoyu and Demberg, Vera},
LANGUAGE = {eng},
ISBN = {978-1-952148-28-6},
URL = {https://www.aclweb.org/anthology/2020.coling-demos.3},
DOI = {10.18653/v1/2020.coling-demos.3},
PUBLISHER = {ACL},
YEAR = {2020},
BOOKTITLE = {The 28th International Conference on Computational Linguistics (COLING 2020)},
EDITOR = {Ptaszynski, Michal and Ziolko, Bartosz},
PAGES = {12--17},
ADDRESS = {Barcelona, Spain (Online)},
}
Endnote
%0 Conference Proceedings
%A Chang, Ernie
%A Caplinger, Jeriah
%A Marin, Alex
%A Shen, Xiaoyu
%A Demberg, Vera
%+ External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T DART: A Lightweight Quality-Suggestive Data-to-Text Annotation Tool :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-149C-2
%U https://www.aclweb.org/anthology/2020.coling-demos.3
%R 10.18653/v1/2020.coling-demos.3
%D 2020
%B The 28th International Conferenceon Computational Linguistics
%Z date of event: 2020-12-08 - 2020-12-13
%C Barcelona, Spain (Online)
%B The 28th International Conference on Computational Linguistics
%E Ptaszynski, Michal; Ziolko, Bartosz
%P 12 - 17
%I ACL
%@ 978-1-952148-28-6
[165]
C. X. Chu, S. Razniewski, and G. Weikum, “ENTYFI: A System for Fine-grained Entity Typing in Fictional Texts,” in The 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP 2020), Online, 2020.
Export
BibTeX
@inproceedings{Chu_EMNLP20,
TITLE = {{ENTYFI}: {A} System for Fine-grained Entity Typing in Fictional Texts},
AUTHOR = {Chu, Cuong Xuan and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-952148-62-0},
URL = {https://www.aclweb.org/anthology/2020.emnlp-demos.14/},
DOI = {10.18653/v1/2020.emnlp-demos.14},
PUBLISHER = {ACM},
YEAR = {2020},
BOOKTITLE = {The 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP 2020)},
EDITOR = {Liu, Qun and Schlangen, David},
PAGES = {100--106},
ADDRESS = {Online},
}
Endnote
%0 Conference Proceedings
%A Chu, Cuong Xuan
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T ENTYFI: A System for Fine-grained Entity Typing in Fictional Texts :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-EED5-D
%U https://www.aclweb.org/anthology/2020.emnlp-demos.14/
%R 10.18653/v1/2020.emnlp-demos.14
%D 2020
%B Conference on Empirical Methods in Natural Language Processing
%Z date of event: 2020-11-16 - 2020-11-20
%C Online
%B The 2020 Conference on Empirical Methods in Natural Language Processing
%E Liu, Qun; Schlangen, David
%P 100 - 106
%I ACM
%@ 978-1-952148-62-0
%U https://www.aclweb.org/anthology/2020.emnlp-demos.14.pdf
[166]
C. X. Chu, S. Razniewski, and G. Weikum, “ENTYFI: Entity Typing in Fictional Texts,” in WSDM ’20, 13th International Conference on Web Search and Data Mining, Houston, TX, USA, 2020.
Export
BibTeX
@inproceedings{ChuWSDM2020,
TITLE = {{ENTYFI}: {E}ntity Typing in Fictional Texts},
AUTHOR = {Chu, Cuong Xuan and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {9781450368223},
DOI = {10.1145/3336191.3371808},
PUBLISHER = {ACM},
YEAR = {2020},
BOOKTITLE = {WSDM '20, 13th International Conference on Web Search and Data Mining},
EDITOR = {Caverlee, James and Hu, Xia Ben},
PAGES = {124--132},
ADDRESS = {Houston, TX, USA},
}
Endnote
%0 Conference Proceedings
%A Chu, Cuong Xuan
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T ENTYFI: Entity Typing in Fictional Texts :
%G eng
%U http://hdl.handle.net/21.11116/0000-0006-A27E-6
%R 10.1145/3336191.3371808
%D 2020
%B 13th International Conference on Web Search and Data Mining
%Z date of event: 2020-02-03 - 2020-02-07
%C Houston, TX, USA
%B WSDM '20
%E Caverlee, James; Hu, Xia Ben
%P 124 - 132
%I ACM
%@ 9781450368223
[167]
S. Dalleiger and J. Vreeken, “Explainable Data Decompositions,” in AAAI Technical Track: Machine Learning, New York, NY, USA, 2020.
Export
BibTeX
@inproceedings{dalleiger:20:disc,
TITLE = {Explainable Data Decompositions},
AUTHOR = {Dalleiger, Sebastian and Vreeken, Jilles},
LANGUAGE = {eng},
ISBN = {978-1-57735-835-0},
DOI = {10.1609/aaai.v34i04.5780},
PUBLISHER = {AAAI},
YEAR = {2020},
DATE = {2020},
BOOKTITLE = {AAAI Technical Track: Machine Learning},
PAGES = {3709--3716},
ADDRESS = {New York, NY, USA},
}
Endnote
%0 Conference Proceedings
%A Dalleiger, Sebastian
%A Vreeken, Jilles
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Explainable Data Decompositions :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-2559-B
%R 10.1609/aaai.v34i04.5780
%D 2020
%B Thirty-Fourth AAAI Conference on Artificial Intelligence
%Z date of event: 2020-02-07 - 2020-02-12
%C New York, NY, USA
%B AAAI Technical Track: Machine Learning
%P 3709 - 3716
%I AAAI
%@ 978-1-57735-835-0
[168]
S. Dalleiger and J. Vreeken, “The Relaxed Maximum Entropy Distribution and its Application to Pattern Discovery,” in 20th IEEE International Conference on Data Mining (ICDM 2020), Virtual Conference, 2020.
Export
BibTeX
@inproceedings{dalleiger:20:reaper,
TITLE = {The Relaxed Maximum Entropy Distribution and its Application to Pattern Discovery},
AUTHOR = {Dalleiger, Sebastian and Vreeken, Jilles},
LANGUAGE = {eng},
ISBN = {978-1-7281-8316-9},
DOI = {10.1109/ICDM50108.2020.00112},
PUBLISHER = {IEEE},
YEAR = {2020},
BOOKTITLE = {20th IEEE International Conference on Data Mining (ICDM 2020)},
EDITOR = {Plant, Claudia and Wang, Haixun and Cuzzocrea, Alfredo and Zaniolo, Carlo and Wu, Xidong},
PAGES = {978--983},
ADDRESS = {Virtual Conference},
}
Endnote
%0 Conference Proceedings
%A Dalleiger, Sebastian
%A Vreeken, Jilles
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T The Relaxed Maximum Entropy Distribution and its Application to Pattern Discovery :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-254E-8
%R 10.1109/ICDM50108.2020.00112
%D 2020
%B 20th IEEE International Conference on Data Mining
%Z date of event: 2020-11-17 - 2020-11-20
%C Virtual Conference
%B 20th IEEE International Conference on Data Mining
%E Plant, Claudia; Wang, Haixun; Cuzzocrea, Alfredo; Zaniolo, Carlo; Wu, Xidong
%P 978 - 983
%I IEEE
%@ 978-1-7281-8316-9
[169]
F. Darari, W. Nutt, S. Razniewski, and S. Rudolph, “Completeness and soundness guarantees for conjunctive SPARQL queries over RDF data sources with completeness statements,” Semantic Web, vol. 11, no. 1, 2020.
Export
BibTeX
@article{Darari2020,
TITLE = {Completeness and soundness guarantees for conjunctive {SPARQL} queries over {RDF} data sources with completeness statements},
AUTHOR = {Darari, Fariza and Nutt, Werner and Razniewski, Simon and Rudolph, Sebastian},
LANGUAGE = {eng},
ISSN = {1570-0844},
DOI = {10.3233/SW-190344},
PUBLISHER = {IOS Press},
ADDRESS = {Amsterdam},
YEAR = {2020},
DATE = {2020},
JOURNAL = {Semantic Web},
VOLUME = {11},
NUMBER = {1},
PAGES = {441--482},
}
Endnote
%0 Journal Article
%A Darari, Fariza
%A Nutt, Werner
%A Razniewski, Simon
%A Rudolph, Sebastian
%+ External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Completeness and soundness guarantees for conjunctive SPARQL queries over RDF data sources with completeness statements :
%G eng
%U http://hdl.handle.net/21.11116/0000-0006-9A06-6
%R 10.3233/SW-190344
%7 2020
%D 2020
%J Semantic Web
%V 11
%N 1
%& 441
%P 441 - 482
%I IOS Press
%C Amsterdam
%@ false
[170]
J. Fischer and J. Vreeken, “Sets of Robust Rules, and How to Find Them,” in Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2019), Würzburg, Germany, 2020.
Export
BibTeX
@inproceedings{fischer:19:grab,
TITLE = {Sets of Robust Rules, and How to Find Them},
AUTHOR = {Fischer, Jonas and Vreeken, Jilles},
LANGUAGE = {eng},
ISBN = {978-3-030-46150-8},
DOI = {10.1007/978-3-030-46150-8_3},
PUBLISHER = {Springer},
YEAR = {2019},
DATE = {2020},
BOOKTITLE = {Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2019)},
PAGES = {38--54},
SERIES = {Lecture Notes in Artificial Intelligence},
VOLUME = {11906},
ADDRESS = {W{\"u}rzburg, Germany},
}
Endnote
%0 Conference Proceedings
%A Fischer, Jonas
%A Vreeken, Jilles
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Sets of Robust Rules, and How to Find Them :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-FEAE-8
%R 10.1007/978-3-030-46150-8_3
%D 2020
%B European Conference on Machine Learning and Knowledge Discovery in Databases
%Z date of event: 2019-09-19 - 2019-09-20
%C Würzburg, Germany
%B Machine Learning and Knowledge Discovery in Databases
%P 38 - 54
%I Springer
%@ 978-3-030-46150-8
%B Lecture Notes in Artificial Intelligence
%N 11906
[171]
J. Fischer and J. Vreeken, “Discovering Succinct Pattern Sets Expressing Co-Occurrence and Mutual Exclusivity,” in KDD ’20, 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, Virtual Event, USA, 2020.
Export
BibTeX
@inproceedings{fischer:20:mexican,
TITLE = {Discovering Succinct Pattern Sets Expressing Co-Occurrence and Mutual Exclusivity},
AUTHOR = {Fischer, Jonas and Vreeken, Jilles},
LANGUAGE = {eng},
ISBN = {978-1-4503-7998-4},
DOI = {10.1145/3394486.3403124},
PUBLISHER = {ACM},
YEAR = {2020},
DATE = {2020},
BOOKTITLE = {KDD '20, 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining},
EDITOR = {Gupta, Rajesh and Liu, Yan and Tang, Jilaiang and Prakash, B. Aditya},
PAGES = {813--823},
ADDRESS = {Virtual Event, USA},
}
Endnote
%0 Conference Proceedings
%A Fischer, Jonas
%A Vreeken, Jilles
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Discovering Succinct Pattern Sets Expressing Co-Occurrence and Mutual Exclusivity :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-FEA5-1
%R 10.1145/3394486.3403124
%D 2020
%B 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining
%Z date of event: 2020-08-23 - 2020-08-27
%C Virtual Event, USA
%B KDD '20
%E Gupta, Rajesh; Liu, Yan; Tang, Jilaiang; Prakash, B. Aditya
%P 813 - 823
%I ACM
%@ 978-1-4503-7998-4
[172]
M. H. Gad-Elrab, D. Stepanova, T.-K. Tran, H. Adel, and G. Weikum, “ExCut: Explainable Embedding-Based Clustering over Knowledge Graphs,” in The Semantic Web -- ISWC 2020, Athens, Greece (Virtual Conference), 2020.
Export
BibTeX
@inproceedings{Gad_Elrab_ISWC2020,
TITLE = {{ExCut}: {E}xplainable Embedding-Based Clustering over Knowledge Graphs},
AUTHOR = {Gad-Elrab, Mohamed Hassan and Stepanova, Daria and Tran, Trung-Kien and Adel, Heike and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-3-030-62418-7},
DOI = {10.1007/978-3-030-62419-4_13},
PUBLISHER = {Springer},
YEAR = {2020},
DATE = {2020},
BOOKTITLE = {The Semantic Web -- ISWC 2020},
EDITOR = {Pan, Jeff Z. and Tamma, Valentina and D'Amato, Claudia and Janowicz, Krzysztof and Fu, Bo and Polleres, Axel and Seneviratne, Oshani and Kagal, Lalana},
PAGES = {218--237},
SERIES = {Lecture Notes in Computer Science},
VOLUME = {12506},
ADDRESS = {Athens, Greece (Virtual Conference)},
}
Endnote
%0 Conference Proceedings
%A Gad-Elrab, Mohamed Hassan
%A Stepanova, Daria
%A Tran, Trung-Kien
%A Adel, Heike
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T ExCut: Explainable Embedding-Based Clustering over Knowledge Graphs :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-830F-5
%R 10.1007/978-3-030-62419-4_13
%D 2020
%B 19th International Semantic Web Conference
%Z date of event: 2020-11-02 - 2020-11-06
%C Athens, Greece (Virtual Conference)
%B The Semantic Web -- ISWC 2020
%E Pan, Jeff Z.; Tamma, Valentina; D'Amato, Claudia; Janowicz, Krzysztof; Fu, Bo; Polleres, Axel; Seneviratne, Oshani; Kagal, Lalana
%P 218 - 237
%I Springer
%@ 978-3-030-62418-7
%B Lecture Notes in Computer Science
%N 12506
[173]
M. H. Gad-Elrab, V. T. Ho, E. Levinkov, T.-K. Tran, and D. Stepanova, “Towards Utilizing Knowledge Graph Embedding Models for Conceptual Clustering,” in ISWC 2020 Posters, Demos, and Industry Tracks, Globally Online, 2020.
Export
BibTeX
@inproceedings{Gad-Elrab_ISCW20,
TITLE = {Towards Utilizing Knowledge Graph Embedding Models for Conceptual Clustering},
AUTHOR = {Gad-Elrab, Mohamed Hassan and Ho, Vinh Thinh and Levinkov, Evgeny and Tran, Trung-Kien and Stepanova, Daria},
LANGUAGE = {eng},
ISSN = {1613-0073},
URL = {http://ceur-ws.org/Vol-2721/paper572.pdf; urn:nbn:de:0074-2721-6},
PUBLISHER = {ceur-ws.org},
YEAR = {2020},
BOOKTITLE = {ISWC 2020 Posters, Demos, and Industry Tracks},
EDITOR = {Taylor, Kerry and Goncalves, Rafael and Lecue, Freddy and Yan, Jun},
PAGES = {281--286},
EID = {572},
SERIES = {CEUR Workshop Proceedings},
VOLUME = {2721},
ADDRESS = {Globally Online},
}
Endnote
%0 Conference Proceedings
%A Gad-Elrab, Mohamed Hassan
%A Ho, Vinh Thinh
%A Levinkov, Evgeny
%A Tran, Trung-Kien
%A Stepanova, Daria
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
%T Towards Utilizing Knowledge Graph Embedding Models for Conceptual Clustering :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-F86B-A
%U http://ceur-ws.org/Vol-2721/paper572.pdf
%D 2020
%B 19th Internatinal Semantic Web Conference
%Z date of event: 2020-11-01 - 2020-11-06
%C Globally Online
%B ISWC 2020 Posters, Demos, and Industry Tracks
%E Taylor, Kerry; Goncalves, Rafael; Lecue, Freddy; Yan, Jun
%P 281 - 286
%Z sequence number: 572
%I ceur-ws.org
%B CEUR Workshop Proceedings
%N 2721
%@ false
%U http://ceur-ws.org/Vol-2721/paper572.pdf
[174]
A. Ghazimatin, O. Balalau, R. Saha Roy, and G. Weikum, “PRINCE: Provider-side Interpretability with Counterfactual Explanations in Recommender Systems,” in WSDM ’20, 13th International Conference on Web Search and Data Mining, Houston, TX, USA, 2020.
Export
BibTeX
@inproceedings{GhazimatinWSDM2020,
TITLE = {{PRINCE}: {P}rovider-side Interpretability with Counterfactual Explanations in Recommender Systemsxts},
AUTHOR = {Ghazimatin, Azin and Balalau, Oana and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-6822-3},
DOI = {10.1145/3336191.3371824},
PUBLISHER = {ACM},
YEAR = {2020},
BOOKTITLE = {WSDM '20, 13th International Conference on Web Search and Data Mining},
EDITOR = {Caverlee, James and Hu, Xia Ben},
PAGES = {196--204},
ADDRESS = {Houston, TX, USA},
}
Endnote
%0 Conference Proceedings
%A Ghazimatin, Azin
%A Balalau, Oana
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T PRINCE: Provider-side Interpretability with Counterfactual Explanations in Recommender Systems :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-F173-7
%R 10.1145/3336191.3371824
%D 2020
%B 13th International Conference on Web Search and Data Mining
%Z date of event: 2020-02-03 - 2020-02-07
%C Houston, TX, USA
%B WSDM '20
%E Caverlee, James; Hu, Xia Ben
%P 196 - 204
%I ACM
%@ 978-1-4503-6822-3
[175]
S. Ghosh, S. Razniewski, and G. Weikum, “Uncovering Hidden Semantics of Set Information in Knowledge Bases,” Journal of Web Semantics, vol. 64, 2020.
Export
BibTeX
@article{Ghosh_2020,
TITLE = {Uncovering Hidden Semantics of Set Information in Knowledge Bases},
AUTHOR = {Ghosh, Shrestha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
ISSN = {1570-8268},
DOI = {10.1016/j.websem.2020.100588},
PUBLISHER = {Elsevier},
ADDRESS = {Amsterdam},
YEAR = {2020},
DATE = {2020},
JOURNAL = {Journal of Web Semantics},
VOLUME = {64},
EID = {100588},
}
Endnote
%0 Journal Article
%A Ghosh, Shrestha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Uncovering Hidden Semantics of Set Information in Knowledge Bases :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-066D-9
%R 10.1016/j.websem.2020.100588
%7 2020
%D 2020
%J Journal of Web Semantics
%V 64
%Z sequence number: 100588
%I Elsevier
%C Amsterdam
%@ false
[176]
S. Ghosh, S. Razniewski, and G. Weikum, “CounQER: A System for Discovering and Linking Count Information in Knowledge Bases,” in The Semantic Web: ESWC 2020 Satellite Events, Heraklion, Greece, 2020.
Export
BibTeX
@inproceedings{Ghosh_ESWC20,
TITLE = {{CounQER}: {A} System for Discovering and Linking Count Information in Knowledge Bases},
AUTHOR = {Ghosh, Shrestha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-3-030-62326-5},
DOI = {10.1007/978-3-030-62327-2_15},
PUBLISHER = {Springer},
YEAR = {2020},
DATE = {2020},
BOOKTITLE = {The Semantic Web: ESWC 2020 Satellite Events},
EDITOR = {Harth, Andreas and Presutti, Valentina and Troncy, Rapha{\"e}l and Acosta, Maribel and Polleres, Axel and Fern{\'a}ndez, Javier D. and Xavier Parreira, Josiane and Hartig, Olaf and Hose, Katja and Cochez, Michael},
PAGES = {84--90},
SERIES = {Lecture Notes in Computer Science},
VOLUME = {12124},
ADDRESS = {Heraklion, Greece},
}
Endnote
%0 Conference Proceedings
%A Ghosh, Shrestha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T CounQER: A System for Discovering and Linking Count Information in Knowledge Bases :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-EFB9-C
%R 10.1007/978-3-030-62327-2_15
%D 2020
%B 17th Extended Semantic Web Conference
%Z date of event: 2020-05-31 - 2020-06-04
%C Heraklion, Greece
%B The Semantic Web: ESWC 2020 Satellite Events
%E Harth, Andreas; Presutti, Valentina; Troncy, Raphaël; Acosta, Maribel; Polleres, Axel; Fernández, Javier D.; Xavier Parreira, Josiane; Hartig, Olaf; Hose, Katja; Cochez, Michael
%P 84 - 90
%I Springer
%@ 978-3-030-62326-5
%B Lecture Notes in Computer Science
%N 12124
[177]
S. Ghosh, S. Razniewski, and G. Weikum, “CounQER: A System for Discovering and Linking Count Information in Knowledge Bases,” 2020. [Online]. Available: https://arxiv.org/abs/2005.03529. (arXiv: 2005.03529)
Abstract
Predicate constraints of general-purpose knowledge bases (KBs) like Wikidata,<br>DBpedia and Freebase are often limited to subproperty, domain and range<br>constraints. In this demo we showcase CounQER, a system that illustrates the<br>alignment of counting predicates, like staffSize, and enumerating predicates,<br>like workInstitution^{-1} . In the demonstration session, attendees can inspect<br>these alignments, and will learn about the importance of these alignments for<br>KB question answering and curation. CounQER is available at<br>https://counqer.mpi-inf.mpg.de/spo.<br>
Export
BibTeX
@online{Ghosh_2005.03529,
TITLE = {{CounQER}: {A} System for Discovering and Linking Count Information in Knowledge Bases},
AUTHOR = {Ghosh, Shrestha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2005.03529},
EPRINT = {2005.03529},
EPRINTTYPE = {arXiv},
YEAR = {2020},
ABSTRACT = {Predicate constraints of general-purpose knowledge bases (KBs) like Wikidata,<br>DBpedia and Freebase are often limited to subproperty, domain and range<br>constraints. In this demo we showcase CounQER, a system that illustrates the<br>alignment of counting predicates, like staffSize, and enumerating predicates,<br>like workInstitution^{-1} . In the demonstration session, attendees can inspect<br>these alignments, and will learn about the importance of these alignments for<br>KB question answering and curation. CounQER is available at<br>https://counqer.mpi-inf.mpg.de/spo.<br>},
}
Endnote
%0 Report
%A Ghosh, Shrestha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T CounQER: A System for Discovering and Linking Count Information in Knowledge Bases :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-F187-0
%U https://arxiv.org/abs/2005.03529
%D 2020
%X Predicate constraints of general-purpose knowledge bases (KBs) like Wikidata,<br>DBpedia and Freebase are often limited to subproperty, domain and range<br>constraints. In this demo we showcase CounQER, a system that illustrates the<br>alignment of counting predicates, like staffSize, and enumerating predicates,<br>like workInstitution^{-1} . In the demonstration session, attendees can inspect<br>these alignments, and will learn about the importance of these alignments for<br>KB question answering and curation. CounQER is available at<br>https://counqer.mpi-inf.mpg.de/spo.<br>
%K Computer Science, Information Retrieval, cs.IR,Computer Science, Artificial Intelligence, cs.AI,Computer Science, Databases, cs.DB
[178]
S. Ghosh, S. Razniewski, and G. Weikum, “Uncovering Hidden Semantics of Set Information in Knowledge Bases,” 2020. [Online]. Available: http://arxiv.org/abs/2003.03155. (arXiv: 2003.03155)
Abstract
Knowledge Bases (KBs) contain a wealth of structured information about<br>entities and predicates. This paper focuses on set-valued predicates, i.e., the<br>relationship between an entity and a set of entities. In KBs, this information<br>is often represented in two formats: (i) via counting predicates such as<br>numberOfChildren and staffSize, that store aggregated integers, and (ii) via<br>enumerating predicates such as parentOf and worksFor, that store individual set<br>memberships. Both formats are typically complementary: unlike enumerating<br>predicates, counting predicates do not give away individuals, but are more<br>likely informative towards the true set size, thus this coexistence could<br>enable interesting applications in question answering and KB curation.<br> In this paper we aim at uncovering this hidden knowledge. We proceed in two<br>steps. (i) We identify set-valued predicates from a given KB predicates via<br>statistical and embedding-based features. (ii) We link counting predicates and<br>enumerating predicates by a combination of co-occurrence, correlation and<br>textual relatedness metrics. We analyze the prevalence of count information in<br>four prominent knowledge bases, and show that our linking method achieves up to<br>0.55 F1 score in set predicate identification versus 0.40 F1 score of a random<br>selection, and normalized discounted gains of up to 0.84 at position 1 and 0.75<br>at position 3 in relevant predicate alignments. Our predicate alignments are<br>showcased in a demonstration system available at<br>https://counqer.mpi-inf.mpg.de/spo.<br>
Export
BibTeX
@online{Ghosh_arXiv2003.03155,
TITLE = {Uncovering Hidden Semantics of Set Information in Knowledge Bases},
AUTHOR = {Ghosh, Shrestha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {http://arxiv.org/abs/2003.03155},
EPRINT = {2003.03155},
EPRINTTYPE = {arXiv},
YEAR = {2020},
ABSTRACT = {Knowledge Bases (KBs) contain a wealth of structured information about<br>entities and predicates. This paper focuses on set-valued predicates, i.e., the<br>relationship between an entity and a set of entities. In KBs, this information<br>is often represented in two formats: (i) via counting predicates such as<br>numberOfChildren and staffSize, that store aggregated integers, and (ii) via<br>enumerating predicates such as parentOf and worksFor, that store individual set<br>memberships. Both formats are typically complementary: unlike enumerating<br>predicates, counting predicates do not give away individuals, but are more<br>likely informative towards the true set size, thus this coexistence could<br>enable interesting applications in question answering and KB curation.<br> In this paper we aim at uncovering this hidden knowledge. We proceed in two<br>steps. (i) We identify set-valued predicates from a given KB predicates via<br>statistical and embedding-based features. (ii) We link counting predicates and<br>enumerating predicates by a combination of co-occurrence, correlation and<br>textual relatedness metrics. We analyze the prevalence of count information in<br>four prominent knowledge bases, and show that our linking method achieves up to<br>0.55 F1 score in set predicate identification versus 0.40 F1 score of a random<br>selection, and normalized discounted gains of up to 0.84 at position 1 and 0.75<br>at position 3 in relevant predicate alignments. Our predicate alignments are<br>showcased in a demonstration system available at<br>https://counqer.mpi-inf.mpg.de/spo.<br>},
}
Endnote
%0 Report
%A Ghosh, Shrestha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Uncovering Hidden Semantics of Set Information in Knowledge Bases :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-0662-4
%U http://arxiv.org/abs/2003.03155
%D 2020
%X Knowledge Bases (KBs) contain a wealth of structured information about<br>entities and predicates. This paper focuses on set-valued predicates, i.e., the<br>relationship between an entity and a set of entities. In KBs, this information<br>is often represented in two formats: (i) via counting predicates such as<br>numberOfChildren and staffSize, that store aggregated integers, and (ii) via<br>enumerating predicates such as parentOf and worksFor, that store individual set<br>memberships. Both formats are typically complementary: unlike enumerating<br>predicates, counting predicates do not give away individuals, but are more<br>likely informative towards the true set size, thus this coexistence could<br>enable interesting applications in question answering and KB curation.<br> In this paper we aim at uncovering this hidden knowledge. We proceed in two<br>steps. (i) We identify set-valued predicates from a given KB predicates via<br>statistical and embedding-based features. (ii) We link counting predicates and<br>enumerating predicates by a combination of co-occurrence, correlation and<br>textual relatedness metrics. We analyze the prevalence of count information in<br>four prominent knowledge bases, and show that our linking method achieves up to<br>0.55 F1 score in set predicate identification versus 0.40 F1 score of a random<br>selection, and normalized discounted gains of up to 0.84 at position 1 and 0.75<br>at position 3 in relevant predicate alignments. Our predicate alignments are<br>showcased in a demonstration system available at<br>https://counqer.mpi-inf.mpg.de/spo.<br>
%K Computer Science, Databases, cs.DB,Computer Science, Information Retrieval, cs.IR
[179]
D. Gupta and K. Berberich, “Weaving Text into Tables,” in CIKM ’20, 29th ACM International Conference on Information & Knowledge Management, Virtual Event, Ireland, 2020.
Export
BibTeX
@inproceedings{DBLP:conf/cikm/0001B20,
TITLE = {Weaving Text into Tables},
AUTHOR = {Gupta, Dhruv and Berberich, Klaus},
LANGUAGE = {eng},
ISBN = {978-1-4503-6859-9},
DOI = {10.1145/3340531.3417442},
PUBLISHER = {ACM},
YEAR = {2020},
DATE = {2020},
BOOKTITLE = {CIKM '20, 29th ACM International Conference on Information \& Knowledge Management},
EDITOR = {d{\textquoteright}Aquin, Mathieu and Dietze, Stefan},
PAGES = {3401--34049},
ADDRESS = {Virtual Event, Ireland},
}
Endnote
%0 Conference Proceedings
%A Gupta, Dhruv
%A Berberich, Klaus
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Weaving Text into Tables :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-0313-F
%R 10.1145/3340531.3417442
%D 2020
%B 29th ACM International Conference on Information & Knowledge Management
%Z date of event: 2020-10-19 - 2020-10-23
%C Virtual Event, Ireland
%B CIKM '20
%E d’Aquin, Mathieu; Dietze, Stefan
%P 3401 - 34049
%I ACM
%@ 978-1-4503-6859-9
[180]
D. Gupta and K. Berberich, “Optimizing Hyper-Phrase Queries,” in ICTIR ’20, ACM SIGIR International Conference on Theory of Information Retrieval, Virtual Event, Norway, 2020.
Export
BibTeX
@inproceedings{DBLP:conf/ictir/0002B20,
TITLE = {Optimizing Hyper-Phrase Queries},
AUTHOR = {Gupta, Dhruv and Berberich, Klaus},
LANGUAGE = {eng},
ISBN = {978-1-4503-8067-6},
DOI = {10.1145/3409256.3409827},
PUBLISHER = {ACM},
YEAR = {2020},
BOOKTITLE = {ICTIR '20, ACM SIGIR International Conference on Theory of Information Retrieval},
EDITOR = {Balog, Krisztian and Setty, Vinay and Lioma, Christina and Liu, Yiqun and Zhang, Min and Berberich, Klaus},
PAGES = {41--48},
ADDRESS = {Virtual Event, Norway},
}
Endnote
%0 Conference Proceedings
%A Gupta, Dhruv
%A Berberich, Klaus
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Optimizing Hyper-Phrase Queries :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-0335-9
%R 10.1145/3409256.3409827
%D 2020
%B ACM SIGIR International Conference on Theory of Information Retrieval
%Z date of event: 2020-09-14 - 2020-09-17
%C Virtual Event, Norway
%B ICTIR '20
%E Balog, Krisztian; Setty, Vinay; Lioma, Christina; Liu, Yiqun; Zhang, Min; Berberich, Klaus
%P 41 - 48
%I ACM
%@ 978-1-4503-8067-6
[181]
E. Heiter, “Factoring Out Prior Knowledge from Low-dimensional Embeddings,” Universität des Saarlandes, Saarbrücken, 2020.
Export
BibTeX
@mastersthesis{heiter:20:confetti,
TITLE = {Factoring Out Prior Knowledge from Low-dimensional Embeddings},
AUTHOR = {Heiter, Edith},
LANGUAGE = {eng},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2020},
DATE = {2020},
}
Endnote
%0 Thesis
%A Heiter, Edith
%Y Vreeken, Jilles
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Factoring Out Prior Knowledge from Low-dimensional Embeddings :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-FEF8-4
%I Universität des Saarlandes
%C Saarbrücken
%D 2020
%V master
%9 master
[182]
V. T. Ho, K. Pal, N. Kleer, K. Berberich, and G. Weikum, “Entities with Quantities: Extraction, Search, and Ranking,” in WSDM ’20, 13th International Conference on Web Search and Data Mining, Houston, TX, USA, 2020.
Export
BibTeX
@inproceedings{HoWSDM2020,
TITLE = {Entities with Quantities: {E}xtraction, Search, and Ranking},
AUTHOR = {Ho, Vinh Thinh and Pal, Koninika and Kleer, Niko and Berberich, Klaus and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {9781450368223},
DOI = {10.1145/3336191.3371860},
PUBLISHER = {ACM},
YEAR = {2020},
BOOKTITLE = {WSDM '20, 13th International Conference on Web Search and Data Mining},
EDITOR = {Caverlee, James and Hu, Xia Ben},
PAGES = {833--836},
ADDRESS = {Houston, TX, USA},
}
Endnote
%0 Conference Proceedings
%A Ho, Vinh Thinh
%A Pal, Koninika
%A Kleer, Niko
%A Berberich, Klaus
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Entities with Quantities: Extraction, Search, and Ranking :
%G eng
%U http://hdl.handle.net/21.11116/0000-0006-A284-D
%R 10.1145/3336191.3371860
%D 2020
%B 13th International Conference on Web Search and Data Mining
%Z date of event: 2020-02-03 - 2020-02-07
%C Houston, TX, USA
%B WSDM '20
%E Caverlee, James; Hu, Xia Ben
%P 833 - 836
%I ACM
%@ 9781450368223
[183]
M. Jain, P. Mirza, and R. Mutharaju, “Cardinality Extraction from Text for Ontology Learning,” in Proceedings of the 7th ACM IKDD CoDS and 25th COMAD (CoDS-COMAD 2020), Hyderabad, India, 2020.
Export
BibTeX
@inproceedings{Jain_CoDS2020,
TITLE = {Cardinality Extraction from Text for Ontology Learning},
AUTHOR = {Jain, Monika and Mirza, Paramita and Mutharaju, Raghava},
LANGUAGE = {eng},
ISBN = {9781450377386},
DOI = {10.1145/3371158.3371223},
PUBLISHER = {ACM},
YEAR = {2020},
BOOKTITLE = {Proceedings of the 7th ACM IKDD CoDS and 25th COMAD (CoDS-COMAD 2020)},
EDITOR = {Bhattacharya, Arnab and Natarajan, Sriraam and Saha Roy, Rishiraj},
PAGES = {354--354},
ADDRESS = {Hyderabad, India},
}
Endnote
%0 Conference Proceedings
%A Jain, Monika
%A Mirza, Paramita
%A Mutharaju, Raghava
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Cardinality Extraction from Text for Ontology Learning :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-AB73-6
%R 10.1145/3371158.3371223
%D 2020
%B ACM India Joint International Conferenceon Data Science and Management of Data
%Z date of event: 2020-01-05 - 2020-01-07
%C Hyderabad, India
%B Proceedings of the 7th ACM IKDD CoDS and 25th COMAD
%E Bhattacharya, Arnab; Natarajan, Sriraam; Saha Roy, Rishiraj
%P 354 - 354
%I ACM
%@ 9781450377386
[184]
M. Kaiser, “Incorporating User Feedback in Conversational Question Answering over Heterogeneous Web Sources,” in SIGIR ’20, 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, China, 2020.
Export
BibTeX
@inproceedings{Kaiser_SIGIR20b,
TITLE = {Incorporating User Feedback in Conversational Question Answering over Heterogeneous {Web} Sources},
AUTHOR = {Kaiser, Magdalena},
LANGUAGE = {eng},
ISBN = {9781450380164},
DOI = {10.1145/3397271.3401454},
PUBLISHER = {ACM},
YEAR = {2020},
BOOKTITLE = {SIGIR '20, 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval},
PAGES = {2482--2482},
ADDRESS = {Virtual Event, China},
}
Endnote
%0 Conference Proceedings
%A Kaiser, Magdalena
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Incorporating User Feedback in Conversational Question Answering over Heterogeneous Web Sources :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-FCDA-8
%R 10.1145/3397271.3401454
%D 2020
%B 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval
%Z date of event: 2020-07-25 - 2020-07-30
%C Virtual Event, China
%B SIGIR '20
%P 2482 - 2482
%I ACM
%@ 9781450380164
[185]
M. Kaiser, R. Saha Roy, and G. Weikum, “Conversational Question Answering over Passages by Leveraging Word Proximity Networks,” 2020. [Online]. Available: https://arxiv.org/abs/2004.13117. (arXiv: 2004.13117)
Abstract
Question answering (QA) over text passages is a problem of long-standing<br>interest in information retrieval. Recently, the conversational setting has<br>attracted attention, where a user asks a sequence of questions to satisfy her<br>information needs around a topic. While this setup is a natural one and similar<br>to humans conversing with each other, it introduces two key research<br>challenges: understanding the context left implicit by the user in follow-up<br>questions, and dealing with ad hoc question formulations. In this work, we<br>demonstrate CROWN (Conversational passage ranking by Reasoning Over Word<br>Networks): an unsupervised yet effective system for conversational QA with<br>passage responses, that supports several modes of context propagation over<br>multiple turns. To this end, CROWN first builds a word proximity network (WPN)<br>from large corpora to store statistically significant term co-occurrences. At<br>answering time, passages are ranked by a combination of their similarity to the<br>question, and coherence of query terms within: these factors are measured by<br>reading off node and edge weights from the WPN. CROWN provides an interface<br>that is both intuitive for end-users, and insightful for experts for<br>reconfiguration to individual setups. CROWN was evaluated on TREC CAsT data,<br>where it achieved above-median performance in a pool of neural methods.<br>
Export
BibTeX
@online{Kaiser_2004.13117,
TITLE = {Conversational Question Answering over Passages by Leveraging Word Proximity Networks},
AUTHOR = {Kaiser, Magdalena and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2004.13117},
EPRINT = {2004.13117},
EPRINTTYPE = {arXiv},
YEAR = {2020},
ABSTRACT = {Question answering (QA) over text passages is a problem of long-standing<br>interest in information retrieval. Recently, the conversational setting has<br>attracted attention, where a user asks a sequence of questions to satisfy her<br>information needs around a topic. While this setup is a natural one and similar<br>to humans conversing with each other, it introduces two key research<br>challenges: understanding the context left implicit by the user in follow-up<br>questions, and dealing with ad hoc question formulations. In this work, we<br>demonstrate CROWN (Conversational passage ranking by Reasoning Over Word<br>Networks): an unsupervised yet effective system for conversational QA with<br>passage responses, that supports several modes of context propagation over<br>multiple turns. To this end, CROWN first builds a word proximity network (WPN)<br>from large corpora to store statistically significant term co-occurrences. At<br>answering time, passages are ranked by a combination of their similarity to the<br>question, and coherence of query terms within: these factors are measured by<br>reading off node and edge weights from the WPN. CROWN provides an interface<br>that is both intuitive for end-users, and insightful for experts for<br>reconfiguration to individual setups. CROWN was evaluated on TREC CAsT data,<br>where it achieved above-median performance in a pool of neural methods.<br>},
}
Endnote
%0 Report
%A Kaiser, Magdalena
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Conversational Question Answering over Passages by Leveraging Word Proximity Networks :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-F17D-D
%U https://arxiv.org/abs/2004.13117
%D 2020
%X Question answering (QA) over text passages is a problem of long-standing<br>interest in information retrieval. Recently, the conversational setting has<br>attracted attention, where a user asks a sequence of questions to satisfy her<br>information needs around a topic. While this setup is a natural one and similar<br>to humans conversing with each other, it introduces two key research<br>challenges: understanding the context left implicit by the user in follow-up<br>questions, and dealing with ad hoc question formulations. In this work, we<br>demonstrate CROWN (Conversational passage ranking by Reasoning Over Word<br>Networks): an unsupervised yet effective system for conversational QA with<br>passage responses, that supports several modes of context propagation over<br>multiple turns. To this end, CROWN first builds a word proximity network (WPN)<br>from large corpora to store statistically significant term co-occurrences. At<br>answering time, passages are ranked by a combination of their similarity to the<br>question, and coherence of query terms within: these factors are measured by<br>reading off node and edge weights from the WPN. CROWN provides an interface<br>that is both intuitive for end-users, and insightful for experts for<br>reconfiguration to individual setups. CROWN was evaluated on TREC CAsT data,<br>where it achieved above-median performance in a pool of neural methods.<br>
%K Computer Science, Information Retrieval, cs.IR,Computer Science, Computation and Language, cs.CL
[186]
M. Kaiser, R. Saha Roy, and G. Weikum, “Conversational Question Answering over Passages by Leveraging Word Proximity Networks,” in SIGIR ’20, 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, China, 2020.
Export
BibTeX
@inproceedings{Kaiser_SIGIR20,
TITLE = {Conversational Question Answering over Passages by Leveraging Word Proximity Networks},
AUTHOR = {Kaiser, Magdalena and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {9781450380164},
DOI = {10.1145/3397271.3401399},
PUBLISHER = {ACM},
YEAR = {2020},
BOOKTITLE = {SIGIR '20, 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval},
PAGES = {2129--2132},
ADDRESS = {Virtual Event, China},
}
Endnote
%0 Conference Proceedings
%A Kaiser, Magdalena
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Conversational Question Answering over Passages by Leveraging Word Proximity Networks :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-F152-C
%R 10.1145/3397271.3401399
%D 2020
%B 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval
%Z date of event: 2020-07-25 - 2020-07-30
%C Virtual Event, China
%B SIGIR '20
%P 2129 - 2132
%I ACM
%@ 9781450380164
[187]
P. Lahoti, A. Beutel, J. Chen, K. Lee, F. Prost, N. Thain, X. Wang, and E. Chi, “Fairness without Demographics through Adversarially Reweighted Learning,” in Advances in Neural Information Processing Systems 33 (NeurIPS 2020), Virtual Event, 2020.
Export
BibTeX
@inproceedings{DBLP:conf/nips/LahotiBCLPT0C20,
TITLE = {Fairness without Demographics through Adversarially Reweighted Learning},
AUTHOR = {Lahoti, Preethi and Beutel, Alex and Chen, Jilin and Lee, Kang and Prost, Flavien and Thain, Nithum and Wang, Xuezhi and Chi, Ed},
LANGUAGE = {eng},
PUBLISHER = {Curran Associates, Inc.},
YEAR = {2020},
BOOKTITLE = {Advances in Neural Information Processing Systems 33 (NeurIPS 2020)},
EDITOR = {Larochelle, Hugo and Ranzato, Marc Aurelio and Hadsell, Raia and Balcan, Maria-Florina and Lin, Hsuan-Tien},
ADDRESS = {Virtual Event},
}
Endnote
%0 Conference Proceedings
%A Lahoti, Preethi
%A Beutel, Alex
%A Chen, Jilin
%A Lee, Kang
%A Prost, Flavien
%A Thain, Nithum
%A Wang, Xuezhi
%A Chi, Ed
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
%T Fairness without Demographics through Adversarially Reweighted Learning :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-FCC2-2
%D 2020
%B 34th Conference on Neural Information Processing Systems
%Z date of event: 2020-12-06 - 2020-12-12
%C Virtual Event
%B Advances in Neural Information Processing Systems 33
%E Larochelle, Hugo; Ranzato, Marc Aurelio; Hadsell, Raia; Balcan, Maria-Florina; Lin, Hsuan-Tien
%I Curran Associates, Inc.
%U https://proceedings.neurips.cc/paper/2020/hash/07fc15c9d169ee48573edd749d25945d-Abstract.html
[188]
C. Li, A. Yates, S. MacAvaney, B. He, and Y. Sun, “PARADE: Passage Representation Aggregation for Document Reranking,” 2020. [Online]. Available: https://arxiv.org/abs/2008.09093. (arXiv: 2008.09093)
Abstract
We present PARADE, an end-to-end Transformer-based model that considers<br>document-level context for document reranking. PARADE leverages passage-level<br>relevance representations to predict a document relevance score, overcoming the<br>limitations of previous approaches that perform inference on passages<br>independently. Experiments on two ad-hoc retrieval benchmarks demonstrate<br>PARADE's effectiveness over such methods. We conduct extensive analyses on<br>PARADE's efficiency, highlighting several strategies for improving it. When<br>combined with knowledge distillation, a PARADE model with 72\% fewer parameters<br>achieves effectiveness competitive with previous approaches using BERT-Base.<br>Our code is available at \url{https://github.com/canjiali/PARADE}.<br>
Export
BibTeX
@online{Li2008.09093,
TITLE = {{PARADE}: Passage Representation Aggregation for Document Reranking},
AUTHOR = {Li, Canjia and Yates, Andrew and MacAvaney, Sean and He, Ben and Sun, Yingfei},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2008.09093},
EPRINT = {2008.09093},
EPRINTTYPE = {arXiv},
YEAR = {2020},
ABSTRACT = {We present PARADE, an end-to-end Transformer-based model that considers<br>document-level context for document reranking. PARADE leverages passage-level<br>relevance representations to predict a document relevance score, overcoming the<br>limitations of previous approaches that perform inference on passages<br>independently. Experiments on two ad-hoc retrieval benchmarks demonstrate<br>PARADE's effectiveness over such methods. We conduct extensive analyses on<br>PARADE's efficiency, highlighting several strategies for improving it. When<br>combined with knowledge distillation, a PARADE model with 72\% fewer parameters<br>achieves effectiveness competitive with previous approaches using BERT-Base.<br>Our code is available at \url{https://github.com/canjiali/PARADE}.<br>},
}
Endnote
%0 Report
%A Li, Canjia
%A Yates, Andrew
%A MacAvaney, Sean
%A He, Ben
%A Sun, Yingfei
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
%T PARADE: Passage Representation Aggregation for Document Reranking :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-06CF-9
%U https://arxiv.org/abs/2008.09093
%D 2020
%X We present PARADE, an end-to-end Transformer-based model that considers<br>document-level context for document reranking. PARADE leverages passage-level<br>relevance representations to predict a document relevance score, overcoming the<br>limitations of previous approaches that perform inference on passages<br>independently. Experiments on two ad-hoc retrieval benchmarks demonstrate<br>PARADE's effectiveness over such methods. We conduct extensive analyses on<br>PARADE's efficiency, highlighting several strategies for improving it. When<br>combined with knowledge distillation, a PARADE model with 72\% fewer parameters<br>achieves effectiveness competitive with previous approaches using BERT-Base.<br>Our code is available at \url{https://github.com/canjiali/PARADE}.<br>
%K Computer Science, Information Retrieval, cs.IR
[189]
J. Lin, R. Nogueira, and A. Yates, “Pretrained Transformers for Text Ranking: BERT and Beyond,” 2020. [Online]. Available: https://arxiv.org/abs/2010.06467. (arXiv: 2010.06467)
Abstract
The goal of text ranking is to generate an ordered list of texts retrieved<br>from a corpus in response to a query. Although the most common formulation of<br>text ranking is search, instances of the task can also be found in many natural<br>language processing applications. This survey provides an overview of text<br>ranking with neural network architectures known as transformers, of which BERT<br>is the best-known example. The combination of transformers and self-supervised<br>pretraining has, without exaggeration, revolutionized the fields of natural<br>language processing (NLP), information retrieval (IR), and beyond. In this<br>survey, we provide a synthesis of existing work as a single point of entry for<br>practitioners who wish to gain a better understanding of how to apply<br>transformers to text ranking problems and researchers who wish to pursue work<br>in this area. We cover a wide range of modern techniques, grouped into two<br>high-level categories: transformer models that perform reranking in multi-stage<br>ranking architectures and learned dense representations that attempt to perform<br>ranking directly. There are two themes that pervade our survey: techniques for<br>handling long documents, beyond the typical sentence-by-sentence processing<br>approaches used in NLP, and techniques for addressing the tradeoff between<br>effectiveness (result quality) and efficiency (query latency). Although<br>transformer architectures and pretraining techniques are recent innovations,<br>many aspects of how they are applied to text ranking are relatively well<br>understood and represent mature techniques. However, there remain many open<br>research questions, and thus in addition to laying out the foundations of<br>pretrained transformers for text ranking, this survey also attempts to<br>prognosticate where the field is heading.<br>
Export
BibTeX
@online{Lin2010.06467,
TITLE = {Pretrained Transformers for Text Ranking: {BERT} and Beyond},
AUTHOR = {Lin, Jimmy and Nogueira, Rodrigo and Yates, Andrew},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2010.06467},
EPRINT = {2010.06467},
EPRINTTYPE = {arXiv},
YEAR = {2020},
ABSTRACT = {The goal of text ranking is to generate an ordered list of texts retrieved<br>from a corpus in response to a query. Although the most common formulation of<br>text ranking is search, instances of the task can also be found in many natural<br>language processing applications. This survey provides an overview of text<br>ranking with neural network architectures known as transformers, of which BERT<br>is the best-known example. The combination of transformers and self-supervised<br>pretraining has, without exaggeration, revolutionized the fields of natural<br>language processing (NLP), information retrieval (IR), and beyond. In this<br>survey, we provide a synthesis of existing work as a single point of entry for<br>practitioners who wish to gain a better understanding of how to apply<br>transformers to text ranking problems and researchers who wish to pursue work<br>in this area. We cover a wide range of modern techniques, grouped into two<br>high-level categories: transformer models that perform reranking in multi-stage<br>ranking architectures and learned dense representations that attempt to perform<br>ranking directly. There are two themes that pervade our survey: techniques for<br>handling long documents, beyond the typical sentence-by-sentence processing<br>approaches used in NLP, and techniques for addressing the tradeoff between<br>effectiveness (result quality) and efficiency (query latency). Although<br>transformer architectures and pretraining techniques are recent innovations,<br>many aspects of how they are applied to text ranking are relatively well<br>understood and represent mature techniques. However, there remain many open<br>research questions, and thus in addition to laying out the foundations of<br>pretrained transformers for text ranking, this survey also attempts to<br>prognosticate where the field is heading.<br>},
}
Endnote
%0 Report
%A Lin, Jimmy
%A Nogueira, Rodrigo
%A Yates, Andrew
%+ External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Pretrained Transformers for Text Ranking: BERT and Beyond :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-06DA-C
%U https://arxiv.org/abs/2010.06467
%D 2020
%X The goal of text ranking is to generate an ordered list of texts retrieved<br>from a corpus in response to a query. Although the most common formulation of<br>text ranking is search, instances of the task can also be found in many natural<br>language processing applications. This survey provides an overview of text<br>ranking with neural network architectures known as transformers, of which BERT<br>is the best-known example. The combination of transformers and self-supervised<br>pretraining has, without exaggeration, revolutionized the fields of natural<br>language processing (NLP), information retrieval (IR), and beyond. In this<br>survey, we provide a synthesis of existing work as a single point of entry for<br>practitioners who wish to gain a better understanding of how to apply<br>transformers to text ranking problems and researchers who wish to pursue work<br>in this area. We cover a wide range of modern techniques, grouped into two<br>high-level categories: transformer models that perform reranking in multi-stage<br>ranking architectures and learned dense representations that attempt to perform<br>ranking directly. There are two themes that pervade our survey: techniques for<br>handling long documents, beyond the typical sentence-by-sentence processing<br>approaches used in NLP, and techniques for addressing the tradeoff between<br>effectiveness (result quality) and efficiency (query latency). Although<br>transformer architectures and pretraining techniques are recent innovations,<br>many aspects of how they are applied to text ranking are relatively well<br>understood and represent mature techniques. However, there remain many open<br>research questions, and thus in addition to laying out the foundations of<br>pretrained transformers for text ranking, this survey also attempts to<br>prognosticate where the field is heading.<br>
%K Computer Science, Information Retrieval, cs.IR,Computer Science, Computation and Language, cs.CL
[190]
P. Mandros, M. Boley, and J. Vreeken, “Discovering Dependencies with Reliable Mutual Information,” Knowledge and Information Systems, vol. 62, 2020.
Export
BibTeX
@article{Mandros2020,
TITLE = {Discovering Dependencies with Reliable Mutual Information},
AUTHOR = {Mandros, Panagiotis and Boley, Mario and Vreeken, Jilles},
LANGUAGE = {eng},
ISSN = {0219-3116},
DOI = {10.1007/s10115-020-01494-9},
PUBLISHER = {Springer},
ADDRESS = {New York, NY},
YEAR = {2020},
JOURNAL = {Knowledge and Information Systems},
VOLUME = {62},
PAGES = {4223--4253},
}
Endnote
%0 Journal Article
%A Mandros, Panagiotis
%A Boley, Mario
%A Vreeken, Jilles
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Discovering Dependencies with Reliable Mutual Information :
%G eng
%U http://hdl.handle.net/21.11116/0000-0006-DC90-F
%R 10.1007/s10115-020-01494-9
%7 2020
%D 2020
%J Knowledge and Information Systems
%V 62
%& 4223
%P 4223 - 4253
%I Springer
%C New York, NY
%@ false
[191]
S. Nag Chowdhury, W. Cheng, G. de Melo, S. Razniewski, and G. Weikum, “Illustrate Your Story: Enriching Text with Images,” in WSDM ’20, 13th International Conference on Web Search and Data Mining, Houston, TX, USA, 2020.
Export
BibTeX
@inproceedings{NagWSDM2020,
TITLE = {Illustrate Your Story: {Enriching} Text with Images},
AUTHOR = {Nag Chowdhury, Sreyasi and Cheng, William and de Melo, Gerard and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {9781450368223},
DOI = {10.1145/3336191.3371866},
PUBLISHER = {ACM},
YEAR = {2020},
BOOKTITLE = {WSDM '20, 13th International Conference on Web Search and Data Mining},
EDITOR = {Caverlee, James and Hu, Xia Ben},
PAGES = {849--852},
ADDRESS = {Houston, TX, USA},
}
Endnote
%0 Conference Proceedings
%A Nag Chowdhury, Sreyasi
%A Cheng, William
%A de Melo, Gerard
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Illustrate Your Story: Enriching Text with Images :
%G eng
%U http://hdl.handle.net/21.11116/0000-0006-A27C-8
%R 10.1145/3336191.3371866
%D 2020
%B 13th International Conference on Web Search and Data Mining
%Z date of event: 2020-02-03 - 2020-02-07
%C Houston, TX, USA
%B WSDM '20
%E Caverlee, James; Hu, Xia Ben
%P 849 - 852
%I ACM
%@ 9781450368223
[192]
T.-P. Nguyen, “Advanced Semantics for Commonsense Knowledge Extraction,” Universität des Saarlandes, Saarbrücken, 2020.
Abstract
Commonsense knowledge (CSK) about concepts and their properties is useful for AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB and others compiled large CSK collections, but are restricted in their expressiveness to subject-predicate-object (SPO) triples with simple concepts for S and monolithic strings for P and O. Also, these projects have either prioritized precision or recall, but hardly reconcile these complementary goals. This thesis presents a methodology, called Ascent, to automatically build a large-scale knowledge base (KB) of CSK assertions, with advanced expressiveness and both better precision and recall than prior works. Ascent goes beyond triples by capturing composite concepts with subgroups and aspects, and by refining assertions with semantic facets. The latter are important to express temporal and spatial validity of assertions and further qualifiers. Ascent combines open information extraction with judicious cleaning using language models. Intrinsic evaluation shows the superior size and quality of the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the benefits of Ascent.
Export
BibTeX
@mastersthesis{NguyenMSc2020,
TITLE = {Advanced Semantics for Commonsense Knowledge Extraction},
AUTHOR = {Nguyen, Tuan-Phong},
LANGUAGE = {eng},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2020},
DATE = {2020},
ABSTRACT = {Commonsense knowledge (CSK) about concepts and their properties is useful for AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB and others compiled large CSK collections, but are restricted in their expressiveness to subject-predicate-object (SPO) triples with simple concepts for S and monolithic strings for P and O. Also, these projects have either prioritized precision or recall, but hardly reconcile these complementary goals. This thesis presents a methodology, called Ascent, to automatically build a large-scale knowledge base (KB) of CSK assertions, with advanced expressiveness and both better precision and recall than prior works. Ascent goes beyond triples by capturing composite concepts with subgroups and aspects, and by refining assertions with semantic facets. The latter are important to express temporal and spatial validity of assertions and further qualifiers. Ascent combines open information extraction with judicious cleaning using language models. Intrinsic evaluation shows the superior size and quality of the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the benefits of Ascent.},
}
Endnote
%0 Thesis
%A Nguyen, Tuan-Phong
%Y Razniewski, Simon
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
International Max Planck Research School, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Advanced Semantics for Commonsense Knowledge Extraction :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-FED0-0
%I Universität des Saarlandes
%C Saarbrücken
%D 2020
%P 67 p.
%V master
%9 master
%X Commonsense knowledge (CSK) about concepts and their properties is useful for AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB and others compiled large CSK collections, but are restricted in their expressiveness to subject-predicate-object (SPO) triples with simple concepts for S and monolithic strings for P and O. Also, these projects have either prioritized precision or recall, but hardly reconcile these complementary goals. This thesis presents a methodology, called Ascent, to automatically build a large-scale knowledge base (KB) of CSK assertions, with advanced expressiveness and both better precision and recall than prior works. Ascent goes beyond triples by capturing composite concepts with subgroups and aspects, and by refining assertions with semantic facets. The latter are important to express temporal and spatial validity of assertions and further qualifiers. Ascent combines open information extraction with judicious cleaning using language models. Intrinsic evaluation shows the superior size and quality of the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the benefits of Ascent.
[193]
T.-P. Nguyen, S. Razniewski, and G. Weikum, “Advanced Semantics for Commonsense Knowledge Extraction,” WWW 2021, 2020. [Online]. Available: https://arxiv.org/abs/2011.00905. (arXiv: 2011.00905)
Abstract
Commonsense knowledge (CSK) about concepts and their properties is useful for<br>AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB<br>and others compiled large CSK collections, but are restricted in their<br>expressiveness to subject-predicate-object (SPO) triples with simple concepts<br>for S and monolithic strings for P and O. Also, these projects have either<br>prioritized precision or recall, but hardly reconcile these complementary<br>goals. This paper presents a methodology, called Ascent, to automatically build<br>a large-scale knowledge base (KB) of CSK assertions, with advanced<br>expressiveness and both better precision and recall than prior works. Ascent<br>goes beyond triples by capturing composite concepts with subgroups and aspects,<br>and by refining assertions with semantic facets. The latter are important to<br>express temporal and spatial validity of assertions and further qualifiers.<br>Ascent combines open information extraction with judicious cleaning using<br>language models. Intrinsic evaluation shows the superior size and quality of<br>the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the<br>benefits of Ascent.<br>
Export
BibTeX
@online{Nguyen_2011.00905,
TITLE = {Advanced Semantics for Commonsense Knowledge Extraction},
AUTHOR = {Nguyen, Tuan-Phong and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2011.00905},
EPRINT = {2011.00905},
EPRINTTYPE = {arXiv},
YEAR = {2020},
ABSTRACT = {Commonsense knowledge (CSK) about concepts and their properties is useful for<br>AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB<br>and others compiled large CSK collections, but are restricted in their<br>expressiveness to subject-predicate-object (SPO) triples with simple concepts<br>for S and monolithic strings for P and O. Also, these projects have either<br>prioritized precision or recall, but hardly reconcile these complementary<br>goals. This paper presents a methodology, called Ascent, to automatically build<br>a large-scale knowledge base (KB) of CSK assertions, with advanced<br>expressiveness and both better precision and recall than prior works. Ascent<br>goes beyond triples by capturing composite concepts with subgroups and aspects,<br>and by refining assertions with semantic facets. The latter are important to<br>express temporal and spatial validity of assertions and further qualifiers.<br>Ascent combines open information extraction with judicious cleaning using<br>language models. Intrinsic evaluation shows the superior size and quality of<br>the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the<br>benefits of Ascent.<br>},
JOURNAL = {WWW 2021},
}
Endnote
%0 Report
%A Nguyen, Tuan-Phong
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Advanced Semantics for Commonsense Knowledge Extraction :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-FEDA-6
%U https://arxiv.org/abs/2011.00905
%D 2020
%X Commonsense knowledge (CSK) about concepts and their properties is useful for<br>AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB<br>and others compiled large CSK collections, but are restricted in their<br>expressiveness to subject-predicate-object (SPO) triples with simple concepts<br>for S and monolithic strings for P and O. Also, these projects have either<br>prioritized precision or recall, but hardly reconcile these complementary<br>goals. This paper presents a methodology, called Ascent, to automatically build<br>a large-scale knowledge base (KB) of CSK assertions, with advanced<br>expressiveness and both better precision and recall than prior works. Ascent<br>goes beyond triples by capturing composite concepts with subgroups and aspects,<br>and by refining assertions with semantic facets. The latter are important to<br>express temporal and spatial validity of assertions and further qualifiers.<br>Ascent combines open information extraction with judicious cleaning using<br>language models. Intrinsic evaluation shows the superior size and quality of<br>the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the<br>benefits of Ascent.<br>
%K Computer Science, Artificial Intelligence, cs.AI,Computer Science, Computation and Language, cs.CL
%J WWW 2021
[194]
A. Oláh, “What’s in the Box? Explaining Neural Networks with Robust Rules,” Universität des Saarlandes, Saarbrücken, 2020.
Export
BibTeX
@mastersthesis{olah:20:explainn,
TITLE = {What's in the Box? Explaining Neural Networks with Robust Rules},
AUTHOR = {Ol{\'a}h, Anna},
LANGUAGE = {eng},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2020},
DATE = {2020},
}
Endnote
%0 Thesis
%A Oláh, Anna
%Y Vreeken, Jilles
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T What's in the Box? Explaining Neural Networks with Robust Rules :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-FEFA-2
%I Universität des Saarlandes
%C Saarbrücken
%D 2020
%V master
%9 master
[195]
K. Pal, V. T. Ho, and G. Weikum, “Co-Clustering Triples from Open Information Extraction,” in Proceedings of the 7th ACM IKDD CoDS and 25th COMAD (CoDS-COMAD 2020), Hyderabad, India, 2020.
Export
BibTeX
@inproceedings{Pal_CoDS2020,
TITLE = {Co-Clustering Triples from Open Information Extraction},
AUTHOR = {Pal, Koninika and Ho, Vinh Thinh and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {9781450377386},
DOI = {10.1145/3371158.3371183},
PUBLISHER = {ACM},
YEAR = {2020},
BOOKTITLE = {Proceedings of the 7th ACM IKDD CoDS and 25th COMAD (CoDS-COMAD 2020)},
EDITOR = {Bhattacharya, Arnab and Natarajan, Sriraam and Saha Roy, Rishiraj},
PAGES = {190--194},
ADDRESS = {Hyderabad, India},
}
Endnote
%0 Conference Proceedings
%A Pal, Koninika
%A Ho, Vinh Thinh
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Co-Clustering Triples from Open Information Extraction :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-EBFC-5
%R 10.1145/3371158.3371183
%D 2020
%B ACM India Joint International Conferenceon Data Science and Management of Data
%Z date of event: 2020-01-05 - 2020-01-07
%C Hyderabad, India
%B Proceedings of the 7th ACM IKDD CoDS and 25th COMAD
%E Bhattacharya, Arnab; Natarajan, Sriraam; Saha Roy, Rishiraj
%P 190 - 194
%I ACM
%@ 9781450377386
[196]
T. Pellissier Tanon, G. Weikum, and F. Suchanek, “YAGO 4: A Reason-able Knowledge Base,” in The Semantic Web (ESWC 2020), Heraklion, Greece, 2020.
Export
BibTeX
@inproceedings{Pellissier_ESCW2020,
TITLE = {{YAGO 4}: {A} Reason-able Knowledge Base},
AUTHOR = {Pellissier Tanon, Thomas and Weikum, Gerhard and Suchanek, Fabian},
LANGUAGE = {eng},
ISBN = {978-3-030-49460-5},
DOI = {10.1007/978-3-030-49461-2_34},
PUBLISHER = {Springer},
YEAR = {2020},
DATE = {2020},
BOOKTITLE = {The Semantic Web (ESWC 2020)},
EDITOR = {Harth, Andreas and Kirrane, Sabrina and Ngonga Ngomo, Axel-Cyrille and Paulheim, Heiko and Rula, Anisa and Gentile, Anna Lisa and Haase, Peter and Cochez, Michael},
PAGES = {583 {\textbar}--596},
SERIES = {Lecture Notes in Computer Science},
VOLUME = {12123},
ADDRESS = {Heraklion, Greece},
}
Endnote
%0 Conference Proceedings
%A Pellissier Tanon, Thomas
%A Weikum, Gerhard
%A Suchanek, Fabian
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T YAGO 4: A Reason-able Knowledge Base :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-EFC8-B
%R 10.1007/978-3-030-49461-2_34
%D 2020
%B 17th Extended Semantic Web Conference
%Z date of event: 2020-05-31 - 2020-06-04
%C Heraklion, Greece
%B The Semantic Web
%E Harth, Andreas; Kirrane, Sabrina; Ngonga Ngomo, Axel-Cyrille; Paulheim, Heiko; Rula, Anisa; Gentile, Anna Lisa; Haase, Peter; Cochez, Michael
%P 583 | - 596
%I Springer
%@ 978-3-030-49460-5
%B Lecture Notes in Computer Science
%N 12123
[197]
F. Pennerath, P. Mandros, and J. Vreeken, “Discovering Approximate Functional Dependencies using Smoothed Mutual Information,” in KDD ’20, 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, Virtual Event, USA, 2020.
Export
BibTeX
@inproceedings{penerath:20:smooth,
TITLE = {Discovering Approximate Functional Dependencies using Smoothed Mutual Information},
AUTHOR = {Pennerath, Fr{\'e}d{\'e}ric and Mandros, Panagiotis and Vreeken, Jilles},
LANGUAGE = {eng},
ISBN = {978-1-4503-7998-4},
DOI = {10.1145/3394486.3403178},
PUBLISHER = {ACM},
YEAR = {2020},
DATE = {2020},
BOOKTITLE = {KDD '20, 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining},
EDITOR = {Gupta, Rajesh and Liu, Yan and Tang, Jilaiang and Prakash, B. Aditya},
PAGES = {1254--1264},
ADDRESS = {Virtual Event, USA},
}
Endnote
%0 Conference Proceedings
%A Pennerath, Frédéric
%A Mandros, Panagiotis
%A Vreeken, Jilles
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Discovering Approximate Functional Dependencies using Smoothed Mutual Information :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-2560-2
%R 10.1145/3394486.3403178
%D 2020
%B 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining
%Z date of event: 2020-08-23 - 2020-08-27
%C Virtual Event, USA
%B KDD '20
%E Gupta, Rajesh; Liu, Yan; Tang, Jilaiang; Prakash, B. Aditya
%P 1254 - 1264
%I ACM
%@ 978-1-4503-7998-4
[198]
S. Qiu, B. Xu, J. Zhang, Y. Wang, X. Shen, G. de Melo, C. Long, and X. Li, “EasyAug: An Automatic Textual Data Augmentation Platform for Classification Tasks,” in Companion of The World Wide Web Conference (WWW 2020), Taipei, Taiwan, 2020.
Export
BibTeX
@inproceedings{qiu2020easyaug,
TITLE = {{EasyAug}: {An} Automatic Textual Data Augmentation Platform for Classification Tasks},
AUTHOR = {Qiu, Siyuan and Xu, Binxia and Zhang, Jie and Wang, Yafang and Shen, Xiaoyu and de Melo, Gerard and Long, Chong and Li, Xiaolong},
LANGUAGE = {eng},
ISBN = {978-1-4503-7024-0},
DOI = {10.1145/3366424.3383552},
PUBLISHER = {ACM},
YEAR = {2020},
BOOKTITLE = {Companion of The World Wide Web Conference (WWW 2020)},
EDITOR = {El Fallah, Amal and Sukthankar, Gita and Liu, Tie-Yan and van Steen, Maarten},
PAGES = {249--252},
ADDRESS = {Taipei, Taiwan},
}
Endnote
%0 Conference Proceedings
%A Qiu, Siyuan
%A Xu, Binxia
%A Zhang, Jie
%A Wang, Yafang
%A Shen, Xiaoyu
%A de Melo, Gerard
%A Long, Chong
%A Li, Xiaolong
%+ External Organizations
External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
%T EasyAug: An Automatic Textual Data Augmentation Platform for Classification Tasks :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-143B-0
%R 10.1145/3366424.3383552
%D 2020
%B The World Wide Web Conference
%Z date of event: 2020-04-20 - 2020-04-24
%C Taipei, Taiwan
%B Companion of The World Wide Web Conference
%E El Fallah, Amal; Sukthankar, Gita; Liu, Tie-Yan; van Steen, Maarten
%P 249 - 252
%I ACM
%@ 978-1-4503-7024-0
[199]
N. H. Ramadhana, F. Darari, P. O. H. Putra, W. Nutt, S. Razniewski, and R. I. Akbar, “User-Centered Design for Knowledge Imbalance Analysis: A Case Study of ProWD,” in VOILA!2020, Fifth International Workshop on Visualization and Interaction for Ontologies and Linked Data, Virtual Conference, 2020.
Export
BibTeX
@inproceedings{Ramadhana_VOILA2020,
TITLE = {User-Centered Design for Knowledge Imbalance Analysis: {A} Case Study of {ProWD}},
AUTHOR = {Ramadhana, Nadyah Hani and Darari, Fariz and Putra, Panca O. Hadi and Nutt, Werner and Razniewski, Simon and Akbar, Refo Ilmiya},
LANGUAGE = {eng},
ISSN = {1613-0073},
URL = {http://ceur-ws.org/Vol-2778/paper2.pdf; urn:nbn:de:0074-2778-8},
PUBLISHER = {ceur-ws.org},
YEAR = {2020},
BOOKTITLE = {VOILA!2020, Fifth International Workshop on Visualization and Interaction for Ontologies and Linked Data},
EDITOR = {Ivanova, Valentina and Lambrix, Patrick and Pesquita, Catia and Wiens, Vitalis},
PAGES = {14--27},
EID = {2},
SERIES = {CEUR Workshop Proceedings},
VOLUME = {2778},
ADDRESS = {Virtual Conference},
}
Endnote
%0 Conference Proceedings
%A Ramadhana, Nadyah Hani
%A Darari, Fariz
%A Putra, Panca O. Hadi
%A Nutt, Werner
%A Razniewski, Simon
%A Akbar, Refo Ilmiya
%+ External Organizations
External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T User-Centered Design for Knowledge Imbalance Analysis: A Case Study of ProWD :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-063B-0
%U http://ceur-ws.org/Vol-2778/paper2.pdf
%D 2020
%B Fifth International Workshop on Visualization and Interaction for Ontologies and Linked Data
%Z date of event: 2020-11-02 - 2020-11-02
%C Virtual Conference
%B VOILA!2020
%E Ivanova, Valentina; Lambrix, Patrick; Pesquita, Catia; Wiens, Vitalis
%P 14 - 27
%Z sequence number: 2
%I ceur-ws.org
%B CEUR Workshop Proceedings
%N 2778
%@ false
%U http://ceur-ws.org/Vol-2778/paper2.pdf
[200]
S. Razniewski and P. Das, “Structured Knowledge: Have We Made Progress? An Extrinsic Study of KB Coverage over 19 Years,” in CIKM ’20, 29th ACM International Conference on Information & Knowledge Management, Virtual Event, Ireland, 2020.
Abstract
Structured world knowledge is at the foundation of knowledge-centric AI applications. Despite considerable research on knowledge base construction, beyond mere statement counts, little is known about the progress of KBs, in particular concerning their coverage, and one may wonder whether there is constant progress, or diminishing returns. In this paper we employ question answering and entity summarization as extrinsic use cases for a longitudinal study of the progress of KB coverage. Our analysis shows a near-continuous improvement of two popular KBs, DBpedia and Wikidata, over the last 19 years, with little signs of flattening out or leveling off.
Export
BibTeX
@inproceedings{razniewski2020structured,
TITLE = {Structured Knowledge: {H}ave We Made Progress? {A}n Extrinsic Study of {KB} Coverage over 19 Years},
AUTHOR = {Razniewski, Simon and Das, Priyanka},
LANGUAGE = {eng},
ISBN = {978-1-4503-6859-9},
DOI = {10.1145/3340531.3417447},
PUBLISHER = {ACM},
YEAR = {2020},
DATE = {2020},
ABSTRACT = {Structured world knowledge is at the foundation of knowledge-centric AI applications. Despite considerable research on knowledge base construction, beyond mere statement counts, little is known about the progress of KBs, in particular concerning their coverage, and one may wonder whether there is constant progress, or diminishing returns. In this paper we employ question answering and entity summarization as extrinsic use cases for a longitudinal study of the progress of KB coverage. Our analysis shows a near-continuous improvement of two popular KBs, DBpedia and Wikidata, over the last 19 years, with little signs of flattening out or leveling off.},
BOOKTITLE = {CIKM '20, 29th ACM International Conference on Information \& Knowledge Management},
EDITOR = {d{\textquoteright}Aquin, Mathieu and Dietze, Stefan},
PAGES = {3317--3320},
ADDRESS = {Virtual Event, Ireland},
}
Endnote
%0 Conference Proceedings
%A Razniewski, Simon
%A Das, Priyanka
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Structured Knowledge: Have We Made Progress? An Extrinsic Study of KB Coverage over 19 Years :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-FF42-0
%R 10.1145/3340531.3417447
%D 2020
%B 29th ACM International Conference on Information & Knowledge Management
%Z date of event: 2020-10-19 - 2020-10-23
%C Virtual Event, Ireland
%X Structured world knowledge is at the foundation of knowledge-centric AI applications. Despite considerable research on knowledge base construction, beyond mere statement counts, little is known about the progress of KBs, in particular concerning their coverage, and one may wonder whether there is constant progress, or diminishing returns. In this paper we employ question answering and entity summarization as extrinsic use cases for a longitudinal study of the progress of KB coverage. Our analysis shows a near-continuous improvement of two popular KBs, DBpedia and Wikidata, over the last 19 years, with little signs of flattening out or leveling off.
%B CIKM '20
%E d’Aquin, Mathieu; Dietze, Stefan
%P 3317 - 3320
%I ACM
%@ 978-1-4503-6859-9
[201]
J. Romero and S. Razniewski, “Inside Quasimodo: Exploring Construction and Usage of Commonsense Knowledge,” in CIKM ’20, 29th ACM International Conference on Information & Knowledge Management, Virtual Event, Ireland, 2020.
Abstract
Structured world knowledge is at the foundation of knowledge-centric AI applications. Despite considerable research on knowledge base construction, beyond mere statement counts, little is known about the progress of KBs, in particular concerning their coverage, and one may wonder whether there is constant progress, or diminishing returns. In this paper we employ question answering and entity summarization as extrinsic use cases for a longitudinal study of the progress of KB coverage. Our analysis shows a near-continuous improvement of two popular KBs, DBpedia and Wikidata, over the last 19 years, with little signs of flattening out or leveling off.
Export
BibTeX
@inproceedings{Romero_CIKM2020,
TITLE = {Inside {Quasimodo}: {E}xploring Construction and Usage of Commonsense Knowledge},
AUTHOR = {Romero, Julien and Razniewski, Simon},
LANGUAGE = {eng},
ISBN = {978-1-4503-6859-9},
DOI = {10.1145/3340531.3417416},
PUBLISHER = {ACM},
YEAR = {2020},
DATE = {2020},
ABSTRACT = {Structured world knowledge is at the foundation of knowledge-centric AI applications. Despite considerable research on knowledge base construction, beyond mere statement counts, little is known about the progress of KBs, in particular concerning their coverage, and one may wonder whether there is constant progress, or diminishing returns. In this paper we employ question answering and entity summarization as extrinsic use cases for a longitudinal study of the progress of KB coverage. Our analysis shows a near-continuous improvement of two popular KBs, DBpedia and Wikidata, over the last 19 years, with little signs of flattening out or leveling off.},
BOOKTITLE = {CIKM '20, 29th ACM International Conference on Information \& Knowledge Management},
EDITOR = {d{\textquoteright}Aquin, Mathieu and Dietze, Stefan},
PAGES = {3445--3448},
ADDRESS = {Virtual Event, Ireland},
}
Endnote
%0 Conference Proceedings
%A Romero, Julien
%A Razniewski, Simon
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Inside Quasimodo: Exploring Construction and Usage of Commonsense Knowledge :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-04C6-4
%R 10.1145/3340531.3417416
%D 2020
%B 29th ACM International Conference on Information & Knowledge Management
%Z date of event: 2020-10-19 - 2020-10-23
%C Virtual Event, Ireland
%X Structured world knowledge is at the foundation of knowledge-centric AI applications. Despite considerable research on knowledge base construction, beyond mere statement counts, little is known about the progress of KBs, in particular concerning their coverage, and one may wonder whether there is constant progress, or diminishing returns. In this paper we employ question answering and entity summarization as extrinsic use cases for a longitudinal study of the progress of KB coverage. Our analysis shows a near-continuous improvement of two popular KBs, DBpedia and Wikidata, over the last 19 years, with little signs of flattening out or leveling off.
%B CIKM '20
%E d’Aquin, Mathieu; Dietze, Stefan
%P 3445 - 3448
%I ACM
%@ 978-1-4503-6859-9
[202]
R. Saha Roy and A. Anand, “Question Answering over Curated and Open Web Sources,” 2020. [Online]. Available: https://arxiv.org/abs/2004.11980. (arXiv: 2004.11980)
Abstract
The last few years have seen an explosion of research on the topic of<br>automated question answering (QA), spanning the communities of information<br>retrieval, natural language processing, and artificial intelligence. This<br>tutorial would cover the highlights of this really active period of growth for<br>QA to give the audience a grasp over the families of algorithms that are<br>currently being used. We partition research contributions by the underlying<br>source from where answers are retrieved: curated knowledge graphs, unstructured<br>text, or hybrid corpora. We choose this dimension of partitioning as it is the<br>most discriminative when it comes to algorithm design. Other key dimensions are<br>covered within each sub-topic: like the complexity of questions addressed, and<br>degrees of explainability and interactivity introduced in the systems. We would<br>conclude the tutorial with the most promising emerging trends in the expanse of<br>QA, that would help new entrants into this field make the best decisions to<br>take the community forward. Much has changed in the community since the last<br>tutorial on QA in SIGIR 2016, and we believe that this timely overview will<br>indeed benefit a large number of conference participants.<br>
Export
BibTeX
@online{SahaRoy2004.11980,
TITLE = {Question Answering over Curated and Open Web Sources},
AUTHOR = {Saha Roy, Rishiraj and Anand, Avishek},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2004.11980},
EPRINT = {2004.11980},
EPRINTTYPE = {arXiv},
YEAR = {2020},
ABSTRACT = {The last few years have seen an explosion of research on the topic of<br>automated question answering (QA), spanning the communities of information<br>retrieval, natural language processing, and artificial intelligence. This<br>tutorial would cover the highlights of this really active period of growth for<br>QA to give the audience a grasp over the families of algorithms that are<br>currently being used. We partition research contributions by the underlying<br>source from where answers are retrieved: curated knowledge graphs, unstructured<br>text, or hybrid corpora. We choose this dimension of partitioning as it is the<br>most discriminative when it comes to algorithm design. Other key dimensions are<br>covered within each sub-topic: like the complexity of questions addressed, and<br>degrees of explainability and interactivity introduced in the systems. We would<br>conclude the tutorial with the most promising emerging trends in the expanse of<br>QA, that would help new entrants into this field make the best decisions to<br>take the community forward. Much has changed in the community since the last<br>tutorial on QA in SIGIR 2016, and we believe that this timely overview will<br>indeed benefit a large number of conference participants.<br>},
}
Endnote
%0 Report
%A Saha Roy, Rishiraj
%A Anand, Avishek
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Question Answering over Curated and Open Web Sources :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-09CA-B
%U https://arxiv.org/abs/2004.11980
%D 2020
%X The last few years have seen an explosion of research on the topic of<br>automated question answering (QA), spanning the communities of information<br>retrieval, natural language processing, and artificial intelligence. This<br>tutorial would cover the highlights of this really active period of growth for<br>QA to give the audience a grasp over the families of algorithms that are<br>currently being used. We partition research contributions by the underlying<br>source from where answers are retrieved: curated knowledge graphs, unstructured<br>text, or hybrid corpora. We choose this dimension of partitioning as it is the<br>most discriminative when it comes to algorithm design. Other key dimensions are<br>covered within each sub-topic: like the complexity of questions addressed, and<br>degrees of explainability and interactivity introduced in the systems. We would<br>conclude the tutorial with the most promising emerging trends in the expanse of<br>QA, that would help new entrants into this field make the best decisions to<br>take the community forward. Much has changed in the community since the last<br>tutorial on QA in SIGIR 2016, and we believe that this timely overview will<br>indeed benefit a large number of conference participants.<br>
%K Computer Science, Information Retrieval, cs.IR,Computer Science, Computation and Language, cs.CL
[203]
R. Saha Roy and A. Anand, “Question Answering over Curated and Open Web Sources,” in SIGIR ’20, 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, China, 2020.
Export
BibTeX
@inproceedings{SahaRoy_SIGIR20,
TITLE = {Question Answering over Curated and Open Web Sources},
AUTHOR = {Saha Roy, Rishiraj and Anand, Avishek},
LANGUAGE = {eng},
ISBN = {9781450380164},
DOI = {10.1145/3397271.3401421},
PUBLISHER = {ACM},
YEAR = {2020},
BOOKTITLE = {SIGIR '20, 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval},
PAGES = {2432--2435},
ADDRESS = {Virtual Event, China},
}
Endnote
%0 Conference Proceedings
%A Saha Roy, Rishiraj
%A Anand, Avishek
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Question Answering over Curated and Open Web Sources :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-02F6-0
%R 10.1145/3397271.3401421
%D 2020
%B 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval
%Z date of event: 2020-07-25 - 2020-07-30
%C Virtual Event, China
%B SIGIR '20
%P 2432 - 2435
%I ACM
%@ 9781450380164
[204]
V. Sathya, S. Ghosh, A. Ramamurthy, and B. R. Tamma, “Small Cell Planning: Resource Management and Interference Mitigation Mechanisms in LTE HetNets,” Wireless Personal Communications, vol. 115, 2020.
Export
BibTeX
@article{Sathya2020,
TITLE = {Small Cell Planning: {R}esource Management and Interference Mitigation Mechanisms in {LTE HetNets}},
AUTHOR = {Sathya, Vanlin and Ghosh, Shrestha and Ramamurthy, Arun and Tamma, Bheemarjuna Reddy},
LANGUAGE = {eng},
ISSN = {0929-6212},
DOI = {10.1007/s11277-020-07574-x},
PUBLISHER = {Springer},
ADDRESS = {New York, NY},
YEAR = {2020},
JOURNAL = {Wireless Personal Communications},
VOLUME = {115},
PAGES = {335--361},
}
Endnote
%0 Journal Article
%A Sathya, Vanlin
%A Ghosh, Shrestha
%A Ramamurthy, Arun
%A Tamma, Bheemarjuna Reddy
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
%T Small Cell Planning: Resource Management and Interference Mitigation Mechanisms in LTE HetNets :
%G eng
%U http://hdl.handle.net/21.11116/0000-0006-B963-A
%R 10.1007/s11277-020-07574-x
%7 2020
%D 2020
%J Wireless Personal Communications
%V 115
%& 335
%P 335 - 361
%I Springer
%C New York, NY
%@ false
[205]
X. Shen, E. Chang, H. Su, C. Niu, and D. Klakow, “Neural Data-to-Text Generation via Jointly Learning the Segmentation and Correspondence,” in The 58th Annual Meeting of the Association for Computational Linguistics (ACL 2020), 2020.
Export
BibTeX
@inproceedings{shen2020neural,
TITLE = {Neural Data-to-Text Generation via Jointly Learning the Segmentation and Correspondence},
AUTHOR = {Shen, Xiaoyu and Chang, Ernie and Su, Hui and Niu, Cheng and Klakow, Dietrich},
LANGUAGE = {eng},
ISBN = {978-1-952148-25-5},
URL = {https://www.aclweb.org/anthology/2020.acl-main.641},
DOI = {10.18653/v1/2020.acl-main.641},
PUBLISHER = {ACL},
YEAR = {2020},
BOOKTITLE = {The 58th Annual Meeting of the Association for Computational Linguistics (ACL 2020)},
EDITOR = {Jurafsky, Dan and Chai, Joyce and Schluter, Natalie and Tetreault, Joel},
PAGES = {7155--7165},
}
Endnote
%0 Conference Proceedings
%A Shen, Xiaoyu
%A Chang, Ernie
%A Su, Hui
%A Niu, Cheng
%A Klakow, Dietrich
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
External Organizations
%T Neural Data-to-Text Generation via Jointly Learning the Segmentation and Correspondence :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-141B-4
%U https://www.aclweb.org/anthology/2020.acl-main.641
%R 10.18653/v1/2020.acl-main.641
%D 2020
%B 58th Annual Meeting of the Association for Computational Linguistics
%Z date of event: 2020-07-05 - 2020-07-10
%B The 58th Annual Meeting of the Association for Computational Linguistics
%E Jurafsky, Dan; Chai, Joyce; Schluter, Natalie; Tetreault, Joel
%P 7155 - 7165
%I ACL
%@ 978-1-952148-25-5
[206]
H. Su, X. Shen, S. Zhao, Z. Xiao, P. Hu, C. Niu, and J. Zhou, “Diversifying Dialogue Generation with Non-Conversational Text,” in The 58th Annual Meeting of the Association for Computational Linguistics (ACL 2020), 2020.
Export
BibTeX
@inproceedings{su2020diversifying,
TITLE = {Diversifying Dialogue Generation with Non-Conversational Text},
AUTHOR = {Su, Hui and Shen, Xiaoyu and Zhao, Sanqiang and Xiao, Zhou and Hu, Pengwei and Niu, Cheng and Zhou, Jie},
LANGUAGE = {eng},
ISBN = {978-1-952148-25-5},
URL = {https://www.aclweb.org/anthology/2020.acl-main.634},
DOI = {10.18653/v1/2020.acl-main.634},
PUBLISHER = {ACL},
YEAR = {2020},
BOOKTITLE = {The 58th Annual Meeting of the Association for Computational Linguistics (ACL 2020)},
EDITOR = {Jurafsky, Dan and Chai, Joyce and Schluter, Natalie and Tetreault, Joel},
PAGES = {7087--7097},
}
Endnote
%0 Conference Proceedings
%A Su, Hui
%A Shen, Xiaoyu
%A Zhao, Sanqiang
%A Xiao, Zhou
%A Hu, Pengwei
%A Niu, Cheng
%A Zhou, Jie
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
%T Diversifying Dialogue Generation with Non-Conversational Text :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-14AF-D
%U https://www.aclweb.org/anthology/2020.acl-main.634
%R 10.18653/v1/2020.acl-main.634
%D 2020
%B 58th Annual Meeting of the Association for Computational Linguistics
%Z date of event: 2020-07-05 - 2020-07-10
%B The 58th Annual Meeting of the Association for Computational Linguistics
%E Jurafsky, Dan; Chai, Joyce; Schluter, Natalie; Tetreault, Joel
%P 7087 - 7097
%I ACL
%@ 978-1-952148-25-5
[207]
S. Sukarieh, “SPRAP: Detecting Opinion Spam Campaigns in Online Rating Services,” Universität des Saarlandes, Saarbrücken, 2020.
Export
BibTeX
@mastersthesis{sukarieh:20:sprap,
TITLE = {{SPRAP}: Detecting Opinion Spam Campaigns in Online Rating Services},
AUTHOR = {Sukarieh, Sandra},
LANGUAGE = {eng},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2020},
DATE = {2020},
}
Endnote
%0 Thesis
%A Sukarieh, Sandra
%Y Vreeken, Jilles
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T SPRAP: Detecting Opinion Spam Campaigns in Online Rating Services :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-FF00-A
%I Universität des Saarlandes
%C Saarbrücken
%D 2020
%V master
%9 master
[208]
C. Sutton, M. Boley, L. Ghiringhelli, M. Rupp, J. Vreeken, and M. Scheffler,, “Identifying Domains of Applicability of Machine Learning Models for Materials Science,” Nature Communications, vol. 11, 2020.
Export
BibTeX
@article{sutton:20:natcomm,
TITLE = {Identifying Domains of Applicability of Machine Learning Models for Materials Science},
AUTHOR = {Sutton, Chris and Boley, Mario and Ghiringhelli, Luca and Rupp, Matthias and Vreeken, Jilles and Scheffler,, Matthias},
LANGUAGE = {eng},
ISSN = {2041-1723},
DOI = {10.1038/s41467-020-17112-9},
PUBLISHER = {Nature Publishing Group},
ADDRESS = {London},
YEAR = {2020},
JOURNAL = {Nature Communications},
VOLUME = {11},
EID = {4428},
}
Endnote
%0 Journal Article
%A Sutton, Chris
%A Boley, Mario
%A Ghiringhelli, Luca
%A Rupp, Matthias
%A Vreeken, Jilles
%A Scheffler,, Matthias
%+ External Organizations
External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Identifying Domains of Applicability of Machine Learning Models for Materials Science :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-26CF-5
%R 10.1038/s41467-020-17112-9
%7 2020
%D 2020
%J Nature Communications
%O Nat. Commun.
%V 11
%Z sequence number: 4428
%I Nature Publishing Group
%C London
%@ false
[209]
E. Terolli, P. Ernst, and G. Weikum, “Focused Query Expansion with Entity Cores for Patient-Centric Health Search,” in The Semantic Web -- ISWC 2020, Athens, Greece (Virtual Conference), 2020.
Export
BibTeX
@inproceedings{Terolli_ISWC2020,
TITLE = {Focused Query Expansion with Entity Cores for Patient-Centric Health Search},
AUTHOR = {Terolli, Erisa and Ernst, Patrick and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-3-030-62418-7},
DOI = {10.1007/978-3-030-62419-4_31},
PUBLISHER = {Springer},
YEAR = {2020},
DATE = {2020},
BOOKTITLE = {The Semantic Web -- ISWC 2020},
EDITOR = {Pan, Jeff Z. and Tamma, Valentina and D'Amato, Claudia and Janowicz, Krzysztof and Fu, Bo and Polleres, Axel and Seneviratne, Oshani and Kagal, Lalana},
PAGES = {547--564},
SERIES = {Lecture Notes in Computer Science},
VOLUME = {12506},
ADDRESS = {Athens, Greece (Virtual Conference)},
}
Endnote
%0 Conference Proceedings
%A Terolli, Erisa
%A Ernst, Patrick
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Focused Query Expansion with Entity Cores for Patient-Centric Health Search :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-78D7-0
%R 10.1007/978-3-030-62419-4_31
%D 2020
%B 19th International Semantic Web Conference
%Z date of event: 2020-11-02 - 2020-11-06
%C Athens, Greece (Virtual Conference)
%B The Semantic Web -- ISWC 2020
%E Pan, Jeff Z.; Tamma, Valentina; D'Amato, Claudia; Janowicz, Krzysztof; Fu, Bo; Polleres, Axel; Seneviratne, Oshani; Kagal, Lalana
%P 547 - 564
%I Springer
%@ 978-3-030-62418-7
%B Lecture Notes in Computer Science
%N 12506
[210]
A. Tigunova, “Extracting Personal Information from Conversations,” in Companion of The World Wide Web Conference (WWW 2020), Taipei, Taiwan, 2020.
Export
BibTeX
@inproceedings{tigunova2020extracting,
TITLE = {Extracting Personal Information from Conversations},
AUTHOR = {Tigunova, Anna},
LANGUAGE = {eng},
ISBN = {978-1-4503-7024-0},
DOI = {10.1145/3366424.3382089},
PUBLISHER = {ACM},
YEAR = {2020},
BOOKTITLE = {Companion of The World Wide Web Conference (WWW 2020)},
EDITOR = {El Fallah, Amal and Sukthankar, Gita and Liu, Tie-Yan and van Steen, Maarten},
PAGES = {284--288},
ADDRESS = {Taipei, Taiwan},
}
Endnote
%0 Conference Proceedings
%A Tigunova, Anna
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Extracting Personal Information from Conversations :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-F845-4
%R 10.1145/3366424.3382089
%D 2020
%B The World Wide Web Conference
%Z date of event: 2020-04-20 - 2020-04-24
%C Taipei, Taiwan
%B Companion of The World Wide Web Conference
%E El Fallah, Amal; Sukthankar, Gita; Liu, Tie-Yan; van Steen, Maarten
%P 284 - 288
%I ACM
%@ 978-1-4503-7024-0
[211]
A. Tigunova, A. Yates, P. Mirza, and G. Weikum, “CHARM: Inferring Personal Attributes from Conversations,” in The 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP 2020), Online, 2020.
Export
BibTeX
@inproceedings{Tigunova_EMNLP20,
TITLE = {{CHARM}: {I}nferring Personal Attributes from Conversations},
AUTHOR = {Tigunova, Anna and Yates, Andrew and Mirza, Paramita and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-952148-60-6},
URL = {https://www.aclweb.org/anthology/2020.emnlp-main.434},
DOI = {10.18653/v1/2020.emnlp-main.434},
PUBLISHER = {ACM},
YEAR = {2020},
BOOKTITLE = {The 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP 2020)},
EDITOR = {Webber, Bonnie and Cohn, Trevor and He, Yulan and Liu, Yang},
PAGES = {5391--5404},
ADDRESS = {Online},
}
Endnote
%0 Conference Proceedings
%A Tigunova, Anna
%A Yates, Andrew
%A Mirza, Paramita
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T CHARM: Inferring Personal Attributes from Conversations :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-EEDB-7
%U https://www.aclweb.org/anthology/2020.emnlp-main.434
%R 10.18653/v1/2020.emnlp-main.434
%D 2020
%B Conference on Empirical Methods in Natural Language Processing
%Z date of event: 2020-11-16 - 2020-11-20
%C Online
%B The 2020 Conference on Empirical Methods in Natural Language Processing
%E Webber, Bonnie; Cohn, Trevor; He, Yulan; Liu, Yang
%P 5391 - 5404
%I ACM
%@ 978-1-952148-60-6
%U https://www.aclweb.org/anthology/2020.emnlp-main.434.pdf
[212]
A. Tigunova, P. Mirza, A. Yates, and G. Weikum, “RedDust: a Large Reusable Dataset of Reddit User Traits,” in Twelfth Language Resources and Evaluation Conference (LREC 2020), Marseille, France, 2020.
Export
BibTeX
@inproceedings{Tigunova_ELREC20,
TITLE = {{RedDust}: a Large Reusable Dataset of {Reddit} User Traits},
AUTHOR = {Tigunova, Anna and Mirza, Paramita and Yates, Andrew and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {979-10-95546-34-4},
URL = {https://www.aclweb.org/anthology/2020.lrec-1.751},
PUBLISHER = {ELRA},
YEAR = {2020},
BOOKTITLE = {Twelfth Language Resources and Evaluation Conference (LREC 2020)},
EDITOR = {Calzolari, Nicoletta and B{\'e}chet, Fr{\'e}d{\'e}ric and Blache, Philippe and Choukri, Khalid and Cieri, Christopher and Declerck, Thierry and Goggi, Sara and Mariani, Joseph and Mazo, H{\'e}l{\`e}ne and Moreno, Asuncion and Odiik, Jan and Piperidis, Stelios},
PAGES = {6118--6126},
ADDRESS = {Marseille, France},
}
Endnote
%0 Conference Proceedings
%A Tigunova, Anna
%A Mirza, Paramita
%A Yates, Andrew
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T RedDust: a Large Reusable Dataset of Reddit User Traits :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-F0A9-B
%U https://www.aclweb.org/anthology/2020.lrec-1.751
%D 2020
%B 12th Language Resources and Evaluation Conference
%Z date of event: 2020-05-11 - 2020-05-16
%C Marseille, France
%B Twelfth Language Resources and Evaluation Conference
%E Calzolari, Nicoletta; Béchet, Frédéric; Blache, Philippe; Choukri, Khalid; Cieri, Christopher; Declerck, Thierry; Goggi, Sara; Mariani, Joseph; Mazo, Hélène; Moreno, Asuncion; Odiik, Jan; Piperidis, Stelios
%P 6118 - 6126
%I ELRA
%@ 979-10-95546-34-4
%U https://www.aclweb.org/anthology/2020.lrec-1.751.pdf
[213]
G. H. Torbati, A. Yates, and G. Weikum, “Personalized Entity Search by Sparse and Scrutable User Profiles,” in CHIIR ’20, Fifth ACM SIGIR Conference on Human Information Interaction and Retrieval, Vancouver, BC, Canada, 2020.
Export
BibTeX
@inproceedings{CHIIR2020Torbati,
TITLE = {Personalized Entity Search by Sparse and Scrutable User Profiles},
AUTHOR = {Torbati, Ghazaleh Haratinezhad and Yates, Andrew and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {9781450368926},
DOI = {10.1145/3343413.3378011},
PUBLISHER = {ACM},
YEAR = {2020},
BOOKTITLE = {CHIIR '20, Fifth ACM SIGIR Conference on Human Information Interaction and Retrieval},
EDITOR = {O'Brain, Heather and Freund, Luanne},
PAGES = {427--431},
ADDRESS = {Vancouver, BC, Canada},
}
Endnote
%0 Conference Proceedings
%A Torbati, Ghazaleh Haratinezhad
%A Yates, Andrew
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Personalized Entity Search by Sparse and Scrutable User Profiles :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-EAD7-F
%R 10.1145/3343413.3378011
%D 2020
%B Fifth ACM SIGIR Conference on Human Information Interaction and Retrieval
%Z date of event: 2020-03-14 - 2020-03-18
%C Vancouver, BC, Canada
%B CHIIR '20
%E O'Brain, Heather; Freund, Luanne
%P 427 - 431
%I ACM
%@ 9781450368926
[214]
T.-K. Tran, M. H. Gad-Elrab, D. Stepanova, E. Kharlamov, and J. Strötgen, “Fast Computation of Explanations for Inconsistency in Large-Scale Knowledge Graphs,” in Companion of The World Wide Web Conference (WWW 2020), Taipei, Taiwan, 2020.
Export
BibTeX
@inproceedings{DBLP:conf/www/TranG0KS20,
TITLE = {Fast Computation of Explanations for Inconsistency in Large-Scale Knowledge Graphs},
AUTHOR = {Tran, Trung-Kien and Gad-Elrab, Mohamed Hassan and Stepanova, Daria and Kharlamov, Evgeny and Str{\"o}tgen, Jannik},
LANGUAGE = {eng},
ISBN = {978-1-4503-7024-0},
DOI = {10.1145/3366423.3380014},
PUBLISHER = {ACM},
YEAR = {2020},
BOOKTITLE = {Companion of The World Wide Web Conference (WWW 2020)},
EDITOR = {El Fallah, Amal and Sukthankar, Gita and Liu, Tie-Yan and van Steen, Maarten},
PAGES = {2613--2619},
ADDRESS = {Taipei, Taiwan},
}
Endnote
%0 Conference Proceedings
%A Tran, Trung-Kien
%A Gad-Elrab, Mohamed Hassan
%A Stepanova, Daria
%A Kharlamov, Evgeny
%A Strötgen, Jannik
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
%T Fast Computation of Explanations for Inconsistency in Large-Scale Knowledge Graphs :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-F861-4
%R 10.1145/3366423.3380014
%D 2020
%B The World Wide Web Conference
%Z date of event: 2020-04-20 - 2020-04-24
%C Taipei, Taiwan
%B Companion of The World Wide Web Conference
%E El Fallah, Amal; Sukthankar, Gita; Liu, Tie-Yan; van Steen, Maarten
%P 2613 - 2619
%I ACM
%@ 978-1-4503-7024-0
[215]
L. Wang, X. Shen, G. de Melo, and G. Weikum, “Cross-Domain Learning for Classifying Propaganda in Online Contents,” in Proceedings of the 2020 Truth and Trust Online Conference (TTO 2020), Virtual, 2020.
Export
BibTeX
@inproceedings{Wang_TTO2020,
TITLE = {Cross-Domain Learning for Classifying Propaganda in Online Contents},
AUTHOR = {Wang, Liqiang and Shen, Xiaoyu and de Melo, Gerard and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-7359904-0-8},
URL = {https://truthandtrustonline.com/wp-content/uploads/2020/10/TTO03.pdf},
PUBLISHER = {Hacks Hackers},
YEAR = {2020},
BOOKTITLE = {Proceedings of the 2020 Truth and Trust Online Conference (TTO 2020)},
EDITOR = {De Cristofaro, Emiliano and Nakov, Preslav},
PAGES = {21--31},
ADDRESS = {Virtual},
}
Endnote
%0 Conference Proceedings
%A Wang, Liqiang
%A Shen, Xiaoyu
%A de Melo, Gerard
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Cross-Domain Learning for Classifying Propaganda in Online Contents :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-F169-3
%U https://truthandtrustonline.com/wp-content/uploads/2020/10/TTO03.pdf
%D 2020
%B Truth and Trust Online Conference
%Z date of event: 2020-10-16 - 2020-10-17
%C Virtual
%B Proceedings of the 2020 Truth and Trust Online Conference
%E De Cristofaro, Emiliano; Nakov, Preslav
%P 21 - 31
%I Hacks Hackers
%@ 978-1-7359904-0-8
%U https://truthandtrustonline.com/wp-content/uploads/2020/10/TTO03.pdf
[216]
L. Wang, X. Shen, G. de Melo, and G. Weikum, “Cross-Domain Learning for Classifying Propaganda in Online Contents,” 2020. [Online]. Available: https://arxiv.org/abs/2011.06844. (arXiv: 2011.06844)
Abstract
As news and social media exhibit an increasing amount of manipulative<br>polarized content, detecting such propaganda has received attention as a new<br>task for content analysis. Prior work has focused on supervised learning with<br>training data from the same domain. However, as propaganda can be subtle and<br>keeps evolving, manual identification and proper labeling are very demanding.<br>As a consequence, training data is a major bottleneck. In this paper, we tackle<br>this bottleneck and present an approach to leverage cross-domain learning,<br>based on labeled documents and sentences from news and tweets, as well as<br>political speeches with a clear difference in their degrees of being<br>propagandistic. We devise informative features and build various classifiers<br>for propaganda labeling, using cross-domain learning. Our experiments<br>demonstrate the usefulness of this approach, and identify difficulties and<br>limitations in various configurations of sources and targets for the transfer<br>step. We further analyze the influence of various features, and characterize<br>salient indicators of propaganda.<br>
Export
BibTeX
@online{Wang_2011.06844,
TITLE = {Cross-Domain Learning for Classifying Propaganda in Online Contents},
AUTHOR = {Wang, Liqiang and Shen, Xiaoyu and de Melo, Gerard and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2011.06844},
EPRINT = {2011.06844},
EPRINTTYPE = {arXiv},
YEAR = {2020},
ABSTRACT = {As news and social media exhibit an increasing amount of manipulative<br>polarized content, detecting such propaganda has received attention as a new<br>task for content analysis. Prior work has focused on supervised learning with<br>training data from the same domain. However, as propaganda can be subtle and<br>keeps evolving, manual identification and proper labeling are very demanding.<br>As a consequence, training data is a major bottleneck. In this paper, we tackle<br>this bottleneck and present an approach to leverage cross-domain learning,<br>based on labeled documents and sentences from news and tweets, as well as<br>political speeches with a clear difference in their degrees of being<br>propagandistic. We devise informative features and build various classifiers<br>for propaganda labeling, using cross-domain learning. Our experiments<br>demonstrate the usefulness of this approach, and identify difficulties and<br>limitations in various configurations of sources and targets for the transfer<br>step. We further analyze the influence of various features, and characterize<br>salient indicators of propaganda.<br>},
}
Endnote
%0 Report
%A Wang, Liqiang
%A Shen, Xiaoyu
%A de Melo, Gerard
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Cross-Domain Learning for Classifying Propaganda in Online Contents :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-FEBF-5
%U https://arxiv.org/abs/2011.06844
%D 2020
%X As news and social media exhibit an increasing amount of manipulative<br>polarized content, detecting such propaganda has received attention as a new<br>task for content analysis. Prior work has focused on supervised learning with<br>training data from the same domain. However, as propaganda can be subtle and<br>keeps evolving, manual identification and proper labeling are very demanding.<br>As a consequence, training data is a major bottleneck. In this paper, we tackle<br>this bottleneck and present an approach to leverage cross-domain learning,<br>based on labeled documents and sentences from news and tweets, as well as<br>political speeches with a clear difference in their degrees of being<br>propagandistic. We devise informative features and build various classifiers<br>for propaganda labeling, using cross-domain learning. Our experiments<br>demonstrate the usefulness of this approach, and identify difficulties and<br>limitations in various configurations of sources and targets for the transfer<br>step. We further analyze the influence of various features, and characterize<br>salient indicators of propaganda.<br>
%K Computer Science, Computation and Language, cs.CL
[217]
G. Weikum, L. Dong, S. Razniewski, and F. Suchanek, “Machine Knowledge: Creation and Curation of Comprehensive Knowledge Bases,” 2020. [Online]. Available: https://arxiv.org/abs/2009.11564. (arXiv: 2009.11564)
Abstract
Equipping machines with comprehensive knowledge of the world's entities and<br>their relationships has been a long-standing goal of AI. Over the last decade,<br>large-scale knowledge bases, also known as knowledge graphs, have been<br>automatically constructed from web contents and text sources, and have become a<br>key asset for search engines. This machine knowledge can be harnessed to<br>semantically interpret textual phrases in news, social media and web tables,<br>and contributes to question answering, natural language processing and data<br>analytics. This article surveys fundamental concepts and practical methods for<br>creating and curating large knowledge bases. It covers models and methods for<br>discovering and canonicalizing entities and their semantic types and organizing<br>them into clean taxonomies. On top of this, the article discusses the automatic<br>extraction of entity-centric properties. To support the long-term life-cycle<br>and the quality assurance of machine knowledge, the article presents methods<br>for constructing open schemas and for knowledge curation. Case studies on<br>academic projects and industrial knowledge graphs complement the survey of<br>concepts and methods.<br>
Export
BibTeX
@online{Weikum_2009.11564,
TITLE = {Machine Knowledge: {C}reation and Curation of Comprehensive Knowledge Bases},
AUTHOR = {Weikum, Gerhard and Dong, Luna and Razniewski, Simon and Suchanek, Fabian},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2009.11564},
EPRINT = {2009.11564},
EPRINTTYPE = {arXiv},
YEAR = {2020},
ABSTRACT = {Equipping machines with comprehensive knowledge of the world's entities and<br>their relationships has been a long-standing goal of AI. Over the last decade,<br>large-scale knowledge bases, also known as knowledge graphs, have been<br>automatically constructed from web contents and text sources, and have become a<br>key asset for search engines. This machine knowledge can be harnessed to<br>semantically interpret textual phrases in news, social media and web tables,<br>and contributes to question answering, natural language processing and data<br>analytics. This article surveys fundamental concepts and practical methods for<br>creating and curating large knowledge bases. It covers models and methods for<br>discovering and canonicalizing entities and their semantic types and organizing<br>them into clean taxonomies. On top of this, the article discusses the automatic<br>extraction of entity-centric properties. To support the long-term life-cycle<br>and the quality assurance of machine knowledge, the article presents methods<br>for constructing open schemas and for knowledge curation. Case studies on<br>academic projects and industrial knowledge graphs complement the survey of<br>concepts and methods.<br>},
}
Endnote
%0 Report
%A Weikum, Gerhard
%A Dong, Luna
%A Razniewski, Simon
%A Suchanek, Fabian
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Machine Knowledge: Creation and Curation of Comprehensive Knowledge Bases :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-F1A6-D
%U https://arxiv.org/abs/2009.11564
%D 2020
%X Equipping machines with comprehensive knowledge of the world's entities and<br>their relationships has been a long-standing goal of AI. Over the last decade,<br>large-scale knowledge bases, also known as knowledge graphs, have been<br>automatically constructed from web contents and text sources, and have become a<br>key asset for search engines. This machine knowledge can be harnessed to<br>semantically interpret textual phrases in news, social media and web tables,<br>and contributes to question answering, natural language processing and data<br>analytics. This article surveys fundamental concepts and practical methods for<br>creating and curating large knowledge bases. It covers models and methods for<br>discovering and canonicalizing entities and their semantic types and organizing<br>them into clean taxonomies. On top of this, the article discusses the automatic<br>extraction of entity-centric properties. To support the long-term life-cycle<br>and the quality assurance of machine knowledge, the article presents methods<br>for constructing open schemas and for knowledge curation. Case studies on<br>academic projects and industrial knowledge graphs complement the survey of<br>concepts and methods.<br>
%K Computer Science, Artificial Intelligence, cs.AI,Computer Science, Databases, cs.DB,Computer Science, General Literature, cs.GL
[218]
G. Weikum, “Entities with Quantities,” Bulletin of the Technical Committee on Data Engineering, vol. 43, no. 1, 2020.
Export
BibTeX
@article{Weikum_Entities2020,
TITLE = {Entities with Quantities},
AUTHOR = {Weikum, Gerhard},
LANGUAGE = {eng},
URL = {http://sites.computer.org/debull/A20mar/p4.pdf},
PUBLISHER = {IEEE Computer Society},
ADDRESS = {Los Alamitos, CA},
YEAR = {2020},
JOURNAL = {Bulletin of the Technical Committee on Data Engineering},
VOLUME = {43},
NUMBER = {1},
PAGES = {4--8},
}
Endnote
%0 Journal Article
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Entities with Quantities :
%G eng
%U http://hdl.handle.net/21.11116/0000-0007-EBBB-E
%U http://sites.computer.org/debull/A20mar/p4.pdf
%7 2020
%D 2020
%J Bulletin of the Technical Committee on Data Engineering
%V 43
%N 1
%& 4
%P 4 - 8
%I IEEE Computer Society
%C Los Alamitos, CA
[219]
B. Xu, S. Qiu, J. Zhang, Y. Wang, X. Shen, and G. de Melo, “Data Augmentation for Multiclass Utterance Classification - A Systematic Study,” in The 28th International Conference on Computational Linguistics (COLING 2020), Barcelona, Spain (Online), 2020.
Export
BibTeX
@inproceedings{xu2020data,
TITLE = {Data Augmentation for Multiclass Utterance Classification -- A Systematic Study},
AUTHOR = {Xu, Binxia and Qiu, Siyuan and Zhang, Jie and Wang, Yafang and Shen, Xiaoyu and de Melo, Gerard},
LANGUAGE = {eng},
ISBN = {978-1-952148-27-9},
URL = {https://www.aclweb.org/anthology/2020.coling-main.479},
DOI = {10.18653/v1/2020.coling-main.479},
PUBLISHER = {ACL},
YEAR = {2020},
BOOKTITLE = {The 28th International Conference on Computational Linguistics (COLING 2020)},
EDITOR = {Scott, Donia and Bel, Nuria and Zong, Chengqing},
PAGES = {5494--5506},
ADDRESS = {Barcelona, Spain (Online)},
}
Endnote
%0 Conference Proceedings
%A Xu, Binxia
%A Qiu, Siyuan
%A Zhang, Jie
%A Wang, Yafang
%A Shen, Xiaoyu
%A de Melo, Gerard
%+ External Organizations
External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Data Augmentation for Multiclass Utterance Classification - A Systematic Study :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-1498-6
%U https://www.aclweb.org/anthology/2020.coling-main.479
%R 10.18653/v1/2020.coling-main.479
%D 2020
%B The 28th International Conferenceon Computational Linguistics
%Z date of event: 2020-12-08 - 2020-12-13
%C Barcelona, Spain (Online)
%B The 28th International Conference on Computational Linguistics
%E Scott, Donia; Bel, Nuria; Zong, Chengqing
%P 5494 - 5506
%I ACL
%@ 978-1-952148-27-9
[220]
A. Yates, K. M. Jose, X. Zhang, and J. Lin, “Flexible IR Pipelines with Capreolus,” in CIKM ’20, 29th ACM International Conference on Information & Knowledge Management, Virtual Event, Ireland, 2020.
Abstract
Structured world knowledge is at the foundation of knowledge-centric AI applications. Despite considerable research on knowledge base construction, beyond mere statement counts, little is known about the progress of KBs, in particular concerning their coverage, and one may wonder whether there is constant progress, or diminishing returns. In this paper we employ question answering and entity summarization as extrinsic use cases for a longitudinal study of the progress of KB coverage. Our analysis shows a near-continuous improvement of two popular KBs, DBpedia and Wikidata, over the last 19 years, with little signs of flattening out or leveling off.
Export
BibTeX
@inproceedings{Yates_CIKM2020,
TITLE = {Flexible {IR} Pipelines with {Capreolus}},
AUTHOR = {Yates, Andrew and Jose, Kevin Martin and Zhang, Xinyu and Lin, Jimmy},
LANGUAGE = {eng},
ISBN = {978-1-4503-6859-9},
DOI = {10.1145/3340531.3412780},
PUBLISHER = {ACM},
YEAR = {2020},
DATE = {2020},
ABSTRACT = {Structured world knowledge is at the foundation of knowledge-centric AI applications. Despite considerable research on knowledge base construction, beyond mere statement counts, little is known about the progress of KBs, in particular concerning their coverage, and one may wonder whether there is constant progress, or diminishing returns. In this paper we employ question answering and entity summarization as extrinsic use cases for a longitudinal study of the progress of KB coverage. Our analysis shows a near-continuous improvement of two popular KBs, DBpedia and Wikidata, over the last 19 years, with little signs of flattening out or leveling off.},
BOOKTITLE = {CIKM '20, 29th ACM International Conference on Information \& Knowledge Management},
EDITOR = {d{\textquoteright}Aquin, Mathieu and Dietze, Stefan},
PAGES = {3181--3188},
ADDRESS = {Virtual Event, Ireland},
}
Endnote
%0 Conference Proceedings
%A Yates, Andrew
%A Jose, Kevin Martin
%A Zhang, Xinyu
%A Lin, Jimmy
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
%T Flexible IR Pipelines with Capreolus :
%G eng
%U http://hdl.handle.net/21.11116/0000-0008-066A-B
%R 10.1145/3340531.3412780
%D 2020
%B 29th ACM International Conference on Information & Knowledge Management
%Z date of event: 2020-10-19 - 2020-10-23
%C Virtual Event, Ireland
%X Structured world knowledge is at the foundation of knowledge-centric AI applications. Despite considerable research on knowledge base construction, beyond mere statement counts, little is known about the progress of KBs, in particular concerning their coverage, and one may wonder whether there is constant progress, or diminishing returns. In this paper we employ question answering and entity summarization as extrinsic use cases for a longitudinal study of the progress of KB coverage. Our analysis shows a near-continuous improvement of two popular KBs, DBpedia and Wikidata, over the last 19 years, with little signs of flattening out or leveling off.
%B CIKM '20
%E d’Aquin, Mathieu; Dietze, Stefan
%P 3181 - 3188
%I ACM
%@ 978-1-4503-6859-9
[221]
A. Yates, S. Arora, X. Zhang, W. Yang, K. M. Jose, and J. Lin, “Capreolus: A Toolkit for End-to-End Neural Ad Hoc Retrieval,” in WSDM ’20, 13th International Conference on Web Search and Data Min