2023
[1]
A. S. Anwari, “Learning Filters to Improve Social Media Search,” Universität des Saarlandes, Saarbrücken, 2023.
Export
BibTeX
@mastersthesis{AnwariMSc23,
TITLE = {Learning Filters to Improve Social Media Search},
AUTHOR = {Anwari, Ahmed Sohail},
LANGUAGE = {eng},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
DATE = {2023},
}
Endnote
%0 Thesis
%A Anwari, Ahmed Sohail
%Y Yates, Andrew
%A referee: Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Learning Filters to Improve Social Media Search :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-1C08-C
%I Universität des Saarlandes
%C Saarbrücken
%D 2023
%P XI, 69 p.
%V master
%9 master
[2]
H. Arnaout, T.-P. Nguyen, S. Razniewski, and G. Weikum, “UnCommonSense in Action! Informative Negations for Commonsense Knowledge Bases,” in WSDM ’23, 16th ACM International Conference on Web Search and Data Mining, Singapore, 2023.
Export
BibTeX
@inproceedings{Arnaout_WSDM23,
TITLE = {{UnCommonSense} in Action! {I}nformative Negations for Commonsense Knowledge Bases},
AUTHOR = {Arnaout, Hiba and Nguyen, Tuan-Phong and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-9407-9},
DOI = {10.1145/3539597.3573027},
PUBLISHER = {ACM},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {WSDM '23, 16th ACM International Conference on Web Search and Data Mining},
EDITOR = {Chua, Tat-Seng and Lauw, Hady and Si, Luo and Terzi, Evimaria and Tsaparas, Panayiotis},
PAGES = {1120--1123},
ADDRESS = {Singapore},
}
Endnote
%0 Conference Proceedings
%A Arnaout, Hiba
%A Nguyen, Tuan-Phong
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T UnCommonSense in Action! Informative Negations for Commonsense Knowledge Bases :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-18BC-6
%R 10.1145/3539597.3573027
%D 2023
%B 16th ACM International Conference on Web Search and Data Mining
%Z date of event: 2023-02-27 - 2023-03-03
%C Singapore
%B WSDM '23
%E Chua, Tat-Seng; Lauw, Hady; Si, Luo; Terzi, Evimaria; Tsaparas, Panayiotis
%P 1120 - 1123
%I ACM
%@ 978-1-4503-9407-9
[3]
H. Arnaout and S. Razniewski, “Can Large Language Models Generate Salient Negative Statements?,” 2023. [Online]. Available: https://arxiv.org/abs/2305.16755. (arXiv: 2305.16755)
Abstract
We examine the ability of large language models (LLMs) to generate salient<br>(interesting) negative statements about real-world entities; an emerging<br>research topic of the last few years. We probe the LLMs using zero- and k-shot<br>unconstrained probes, and compare with traditional methods for negation<br>generation, i.e., pattern-based textual extractions and knowledge-graph-based<br>inferences, as well as crowdsourced gold statements. We measure the correctness<br>and salience of the generated lists about subjects from different domains. Our<br>evaluation shows that guided probes do in fact improve the quality of generated<br>negatives, compared to the zero-shot variant. Nevertheless, using both prompts,<br>LLMs still struggle with the notion of factuality of negatives, frequently<br>generating many ambiguous statements, or statements with negative keywords but<br>a positive meaning.<br>
Export
BibTeX
@online{Arnaout2305.16755,
TITLE = {Can Large Language Models Generate Salient Negative Statements?},
AUTHOR = {Arnaout, Hiba and Razniewski, Simon},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2305.16755},
EPRINT = {2305.16755},
EPRINTTYPE = {arXiv},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {We examine the ability of large language models (LLMs) to generate salient<br>(interesting) negative statements about real-world entities; an emerging<br>research topic of the last few years. We probe the LLMs using zero- and k-shot<br>unconstrained probes, and compare with traditional methods for negation<br>generation, i.e., pattern-based textual extractions and knowledge-graph-based<br>inferences, as well as crowdsourced gold statements. We measure the correctness<br>and salience of the generated lists about subjects from different domains. Our<br>evaluation shows that guided probes do in fact improve the quality of generated<br>negatives, compared to the zero-shot variant. Nevertheless, using both prompts,<br>LLMs still struggle with the notion of factuality of negatives, frequently<br>generating many ambiguous statements, or statements with negative keywords but<br>a positive meaning.<br>},
}
Endnote
%0 Report
%A Arnaout, Hiba
%A Razniewski, Simon
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Can Large Language Models Generate Salient Negative Statements? :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-3D73-E
%U https://arxiv.org/abs/2305.16755
%D 2023
%X We examine the ability of large language models (LLMs) to generate salient<br>(interesting) negative statements about real-world entities; an emerging<br>research topic of the last few years. We probe the LLMs using zero- and k-shot<br>unconstrained probes, and compare with traditional methods for negation<br>generation, i.e., pattern-based textual extractions and knowledge-graph-based<br>inferences, as well as crowdsourced gold statements. We measure the correctness<br>and salience of the generated lists about subjects from different domains. Our<br>evaluation shows that guided probes do in fact improve the quality of generated<br>negatives, compared to the zero-shot variant. Nevertheless, using both prompts,<br>LLMs still struggle with the notion of factuality of negatives, frequently<br>generating many ambiguous statements, or statements with negative keywords but<br>a positive meaning.<br>
%K Computer Science, Computation and Language, cs.CL,Computer Science, Artificial Intelligence, cs.AI
[4]
A. Bashir, “Leveraging Self-Supervised Learning in Domain-Specific Language Models,” Universität des Saarlandes, Saarbrücken, 2023.
Export
BibTeX
@mastersthesis{BashirMSc23,
TITLE = {Leveraging Self-Supervised Learning in Domain-Speci{fi}c Language Models},
AUTHOR = {Bashir, Abdallah},
LANGUAGE = {eng},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
DATE = {2023},
}
Endnote
%0 Thesis
%A Bashir, Abdallah
%Y Terolli, Erisa
%Y Ernst, Patrick
%A referee: Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Leveraging Self-Supervised Learning in Domain-Specific Language Models :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-2D82-E
%I Universität des Saarlandes
%C Saarbrücken
%D 2023
%P XI, 54 p.
%V master
%9 master
[5]
L. Boualili and A. Yates, “A Study of Term-Topic Embeddings for Ranking,” in Advances in Information Retrieval (ECIR 2023), Dublin, Ireland, 2023.
Export
BibTeX
@inproceedings{Boualili_ECIR23,
TITLE = {A Study of Term-Topic Embeddings for Ranking},
AUTHOR = {Boualili, Lila and Yates, Andrew},
LANGUAGE = {eng},
ISBN = {978-3-031-28237-9},
DOI = {10.1007/978-3-031-28238-6_25},
PUBLISHER = {Springer},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
DATE = {2023},
BOOKTITLE = {Advances in Information Retrieval (ECIR 2023)},
EDITOR = {Kamps, Jaap and Goeuriot, Lorraine and Crestani, Fabio and Maistro, Maria and Joho, Hideao and Davis, Brian and Gurrin, Cathal and Kruschwitz, Udo and Caputo, Annalina},
PAGES = {359--366},
SERIES = {Lecture Notes in Computer Science},
VOLUME = {13981},
ADDRESS = {Dublin, Ireland},
}
Endnote
%0 Conference Proceedings
%A Boualili, Lila
%A Yates, Andrew
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T A Study of Term-Topic Embeddings for Ranking :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-DC34-2
%R 10.1007/978-3-031-28238-6_25
%D 2023
%B 45th European Conference on IR Research
%Z date of event: 2023-04-02 - 2023-04-06
%C Dublin, Ireland
%B Advances in Information Retrieval
%E Kamps, Jaap; Goeuriot, Lorraine; Crestani, Fabio; Maistro, Maria; Joho, Hideao; Davis, Brian; Gurrin, Cathal; Kruschwitz, Udo; Caputo, Annalina
%P 359 - 366
%I Springer
%@ 978-3-031-28237-9
%B Lecture Notes in Computer Science
%N 13981
[6]
P. Christmann, R. Saha Roy, and G. Weikum, “Explainable Conversational Question Answering over Heterogeneous Sources via Iterative Graph Neural Networks,” in SIGIR ’23, 46th International ACM SIGIR Conference on Research and Development in Information Retrieval, Taipei, Taiwan. (Accepted/in press)
Export
BibTeX
@inproceedings{Christmann:SIGIR2023,
TITLE = {Explainable Conversational Question Answering over Heterogeneous Sources via Iterative Graph Neural Networks},
AUTHOR = {Christmann, Philipp and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
PUBLISHER = {ACM},
YEAR = {2023},
PUBLREMARK = {Accepted},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {SIGIR '23, 46th International ACM SIGIR Conference on Research and Development in Information Retrieval},
ADDRESS = {Taipei, Taiwan},
}
Endnote
%0 Conference Proceedings
%A Christmann, Philipp
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Explainable Conversational Question Answering over Heterogeneous Sources via Iterative Graph Neural Networks :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-FE28-A
%D 2023
%B 46th International ACM SIGIR Conference on Research and Development in Information Retrieval
%Z date of event: 2023-07-23 - 2023-07-27
%C Taipei, Taiwan
%B SIGIR '23
%I ACM
[7]
P. Christmann, R. Saha Roy, and G. Weikum, “CLOCQ: A Toolkit for Fast and Easy Access to Knowledge Bases,” in BTW 2023, Dresden, Germany, 2023.
Export
BibTeX
@inproceedings{Christmann_BTW2023,
TITLE = {{CLOCQ}: {A} Toolkit for Fast and Easy Access to Knowledge Bases},
AUTHOR = {Christmann, Philipp and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-3-88579-725-8},
DOI = {10.18420/BTW2023-28},
PUBLISHER = {GI},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {BTW 2023},
EDITOR = {K{\"o}nig-Ries, Birgitta and Scherzinger, Stefanie and Lehner, Wolfgang and Vossen, Gottfried},
PAGES = {579--591},
SERIES = {Lecture Notes in Informatics},
VOLUME = {P-331},
ADDRESS = {Dresden, Germany},
}
Endnote
%0 Conference Proceedings
%A Christmann, Philipp
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T CLOCQ: A Toolkit for Fast and Easy Access to Knowledge Bases :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-BF14-7
%R 10.18420/BTW2023-28
%D 2023
%B 20th Conference on Database Systems for Business, Technology and Web
%Z date of event: 2023-03-06 - 2023-03-10
%C Dresden, Germany
%B BTW 2023
%E König-Ries, Birgitta; Scherzinger, Stefanie; Lehner, Wolfgang; Vossen, Gottfried
%P 579 - 591
%I GI
%@ 978-3-88579-725-8
%B Lecture Notes in Informatics
%N P-331
[8]
A. Ghazimatin, “Enhancing Explainability and Scrutability of Recommender Systems,” in BTW 2023, Dresden, Germany, 2023.
Export
BibTeX
@inproceedings{DBLP:conf/btw/Ghazimatin23,
TITLE = {Enhancing Explainability and Scrutability of Recommender Systems},
AUTHOR = {Ghazimatin, Azin},
LANGUAGE = {eng},
ISBN = {978-3-88579-725-8},
DOI = {10.18420/BTW2023-32},
PUBLISHER = {GI},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {BTW 2023},
EDITOR = {K{\"o}nig-Ries, Birgitta and Scherzinger, Stefanie and Lehner, Wolfgang and Vossen, Gottfried},
PAGES = {633--640},
SERIES = {Lecture Notes in Informatics},
VOLUME = {P-331},
ADDRESS = {Dresden, Germany},
}
Endnote
%0 Conference Proceedings
%A Ghazimatin, Azin
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Enhancing Explainability and Scrutability of Recommender Systems :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-DC4D-7
%R 10.18420/BTW2023-32
%D 2023
%B 20th Conference on Database Systems for Business, Technology and Web
%Z date of event: 2023-03-06 - 2023-03-10
%C Dresden, Germany
%B BTW 2023
%E König-Ries, Birgitta; Scherzinger, Stefanie; Lehner, Wolfgang; Vossen, Gottfried
%P 633 - 640
%I GI
%@ 978-3-88579-725-8
%B Lecture Notes in Informatics
%N P-331
[9]
S. Ghosh, S. Razniewski, and G. Weikum, “Answering Count Questions with Structured Answers from Text,” Journal of Web Semantics, vol. 76, 2023.
Export
BibTeX
@article{Ghosh23,
TITLE = {Answering Count Questions with Structured Answers from Text},
AUTHOR = {Ghosh, Shrestha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
DOI = {10.1016/j.websem.2022.100769},
PUBLISHER = {Elsevier},
ADDRESS = {Amsterdam},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
DATE = {2023},
JOURNAL = {Journal of Web Semantics},
VOLUME = {76},
EID = {100769},
}
Endnote
%0 Journal Article
%A Ghosh, Shrestha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Answering Count Questions with Structured Answers from Text :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-47CB-0
%R 10.1016/j.websem.2022.100769
%7 2022
%D 2023
%J Journal of Web Semantics
%V 76
%Z sequence number: 100769
%I Elsevier
%C Amsterdam
[10]
S. Ghosh, S. Razniewski, and G. Weikum, “CoQEx: Entity Counts Explained,” in WSDM ’23, 16th ACM International Conference on Web Search and Data Mining, Singapore, 2023.
Export
BibTeX
@inproceedings{Christmann_WSDM23,
TITLE = {{CoQEx}: {E}ntity Counts Explained},
AUTHOR = {Ghosh, Shrestha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-9407-9},
DOI = {10.1145/3539597.3573021},
PUBLISHER = {ACM},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {WSDM '23, 16th ACM International Conference on Web Search and Data Mining},
EDITOR = {Chua, Tat-Seng and Lauw, Hady and Si, Luo and Terzi, Evimaria and Tsaparas, Panayiotis},
PAGES = {1168--1171},
ADDRESS = {Singapore},
}
Endnote
%0 Conference Proceedings
%A Ghosh, Shrestha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T CoQEx: Entity Counts Explained :
%G eng
%U http://hdl.handle.net/21.11116/0000-000B-F41F-0
%R 10.1145/3539597.3573021
%D 2023
%B 16th ACM International Conference on Web Search and Data Mining
%Z date of event: 2023-02-27 - 2023-03-03
%C Singapore
%B WSDM '23
%E Chua, Tat-Seng; Lauw, Hady; Si, Luo; Terzi, Evimaria; Tsaparas, Panayiotis
%P 1168 - 1171
%I ACM
%@ 978-1-4503-9407-9
[11]
S. Ghosh, S. Razniewski, and G. Weikum, “Class Cardinality Comparison as a Fermi Problem,” in WWW ’23, ACM Web Conference, Austin, TX, USA. (arXiv: 2303.04532, Accepted/in press)
Abstract
Questions on class cardinality comparisons are quite tricky to answer and<br>come with its own challenges. They require some kind of reasoning since web<br>documents and knowledge bases, indispensable sources of information, rarely<br>store direct answers to questions, such as, ``Are there more astronauts or<br>Physics Nobel Laureates?'' We tackle questions on class cardinality comparison<br>by tapping into three sources for absolute cardinalities as well as the<br>cardinalities of orthogonal subgroups of the classes. We propose novel<br>techniques for aggregating signals with partial coverage for more reliable<br>estimates and evaluate them on a dataset of 4005 class pairs, achieving an<br>accuracy of 83.7%.<br>
Export
BibTeX
@inproceedings{Ghosh2303.04532,
TITLE = {Class Cardinality Comparison as a {F}ermi Problem},
AUTHOR = {Ghosh, Shrestha and Razniewski, Simon and Weikum, Gerhard},
URL = {https://arxiv.org/abs/2303.04532},
EPRINT = {2303.04532},
EPRINTTYPE = {arXiv},
YEAR = {2023},
PUBLREMARK = {Accepted},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Questions on class cardinality comparisons are quite tricky to answer and<br>come with its own challenges. They require some kind of reasoning since web<br>documents and knowledge bases, indispensable sources of information, rarely<br>store direct answers to questions, such as, ``Are there more astronauts or<br>Physics Nobel Laureates?'' We tackle questions on class cardinality comparison<br>by tapping into three sources for absolute cardinalities as well as the<br>cardinalities of orthogonal subgroups of the classes. We propose novel<br>techniques for aggregating signals with partial coverage for more reliable<br>estimates and evaluate them on a dataset of 4005 class pairs, achieving an<br>accuracy of 83.7%.<br>},
BOOKTITLE = {WWW '23, ACM Web Conference},
ADDRESS = {Austin, TX, USA},
}
Endnote
%0 Conference Proceedings
%A Ghosh, Shrestha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Class Cardinality Comparison as a Fermi Problem :
%U http://hdl.handle.net/21.11116/0000-000C-BF05-8
%U https://arxiv.org/abs/2303.04532
%D 2023
%B ACM Web Conference
%Z date of event: 2023-04-30 - 2023-05-04
%C Austin, TX, USA
%X Questions on class cardinality comparisons are quite tricky to answer and<br>come with its own challenges. They require some kind of reasoning since web<br>documents and knowledge bases, indispensable sources of information, rarely<br>store direct answers to questions, such as, ``Are there more astronauts or<br>Physics Nobel Laureates?'' We tackle questions on class cardinality comparison<br>by tapping into three sources for absolute cardinalities as well as the<br>cardinalities of orthogonal subgroups of the classes. We propose novel<br>techniques for aggregating signals with partial coverage for more reliable<br>estimates and evaluate them on a dataset of 4005 class pairs, achieving an<br>accuracy of 83.7%.<br>
%K Computer Science, Information Retrieval, cs.IR,Computer Science, Artificial Intelligence, cs.AI
%B WWW '23
[12]
J. Kalofolias, “Subgroup Discovery for Structured Target Concepts,” Universität des Saarlandes, Saarbrücken, 2023.
Export
BibTeX
@phdthesis{Kalofolias_PhD2023,
TITLE = {Subgroup Discovery for Structured Target Concepts},
AUTHOR = {Kalofolias, Janis},
LANGUAGE = {eng},
URL = {urn:nbn:de:bsz:291--ds-393710},
DOI = {10.22028/D291-39371},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
}
Endnote
%0 Thesis
%A Kalofolias, Janis
%Y Vreeken, Jilles
%A referee: Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Subgroup Discovery for Structured Target Concepts :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-FE96-D
%R 10.22028/D291-39371
%U urn:nbn:de:bsz:291--ds-393710
%F OTHER: hdl:20.500.11880/35569
%I Universität des Saarlandes
%C Saarbrücken
%D 2023
%P xi, 215 p.
%V phd
%9 phd
%U https://scidok.sulb.uni-saarland.de/handle/20.500.11880/35569
[13]
T.-P. Nguyen, S. Razniewski, A. Varde, and G. Weikum, “Extracting Cultural Commonsense Knowledge at Scale,” in WWW ’23, ACM Web Conference, Austin, TX, USA. (Accepted/in press)
Export
BibTeX
@inproceedings{Nguyen_WWW23,
TITLE = {Extracting Cultural Commonsense Knowledge at Scale},
AUTHOR = {Nguyen, Tuan-Phong and Razniewski, Simon and Varde, Aparna and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-9416-1},
DOI = {10.1145/3543507.3583535},
PUBLISHER = {ACM},
YEAR = {2023},
PUBLREMARK = {Accepted},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {WWW '23, ACM Web Conference},
ADDRESS = {Austin, TX, USA},
}
Endnote
%0 Conference Proceedings
%A Nguyen, Tuan-Phong
%A Razniewski, Simon
%A Varde, Aparna
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Extracting Cultural Commonsense Knowledge at Scale :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-9FF7-B
%R 10.1145/3543507.3583535
%D 2023
%B ACM Web Conference
%Z date of event: 2023-04-30 - 2023-05-04
%C Austin, TX, USA
%B WWW '23
%I ACM
%@ 978-1-4503-9416-1
[14]
S. Razniewski, H. Arnaout, S. Ghosh, and F. Suchanek, “Completeness, Recall, and Negation in Open-World Knowledge Bases: A Survey,” 2023. [Online]. Available: https://arxiv.org/abs/2305.05403. (arXiv: 2305.05403)
Abstract
General-purpose knowledge bases (KBs) are a cornerstone of knowledge-centric<br>AI. Many of them are constructed pragmatically from Web sources, and are thus<br>far from complete. This poses challenges for the consumption as well as the<br>curation of their content. While several surveys target the problem of<br>completing incomplete KBs, the first problem is arguably to know whether and<br>where the KB is incomplete in the first place, and to which degree.<br> In this survey we discuss how knowledge about completeness, recall, and<br>negation in KBs can be expressed, extracted, and inferred. We cover (i) the<br>logical foundations of knowledge representation and querying under partial<br>closed-world semantics; (ii) the estimation of this information via statistical<br>patterns; (iii) the extraction of information about recall from KBs and text;<br>(iv) the identification of interesting negative statements; and (v) relaxed<br>notions of relative recall.<br> This survey is targeted at two types of audiences: (1) practitioners who are<br>interested in tracking KB quality, focusing extraction efforts, and building<br>quality-aware downstream applications; and (2) data management, knowledge base<br>and semantic web researchers who wish to understand the state of the art of<br>knowledge bases beyond the open-world assumption. Consequently, our survey<br>presents both fundamental methodologies and their working, and gives<br>practice-oriented recommendations on how to choose between different approaches<br>for a problem at hand.<br>
Export
BibTeX
@online{Razniewski_2305.05403,
TITLE = {Completeness, Recall, and Negation in Open-World Knowledge Bases: A Survey},
AUTHOR = {Razniewski, Simon and Arnaout, Hiba and Ghosh, Shrestha and Suchanek, Fabian},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2305.05403},
EPRINT = {2305.05403},
EPRINTTYPE = {arXiv},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {General-purpose knowledge bases (KBs) are a cornerstone of knowledge-centric<br>AI. Many of them are constructed pragmatically from Web sources, and are thus<br>far from complete. This poses challenges for the consumption as well as the<br>curation of their content. While several surveys target the problem of<br>completing incomplete KBs, the first problem is arguably to know whether and<br>where the KB is incomplete in the first place, and to which degree.<br> In this survey we discuss how knowledge about completeness, recall, and<br>negation in KBs can be expressed, extracted, and inferred. We cover (i) the<br>logical foundations of knowledge representation and querying under partial<br>closed-world semantics; (ii) the estimation of this information via statistical<br>patterns; (iii) the extraction of information about recall from KBs and text;<br>(iv) the identification of interesting negative statements; and (v) relaxed<br>notions of relative recall.<br> This survey is targeted at two types of audiences: (1) practitioners who are<br>interested in tracking KB quality, focusing extraction efforts, and building<br>quality-aware downstream applications; and (2) data management, knowledge base<br>and semantic web researchers who wish to understand the state of the art of<br>knowledge bases beyond the open-world assumption. Consequently, our survey<br>presents both fundamental methodologies and their working, and gives<br>practice-oriented recommendations on how to choose between different approaches<br>for a problem at hand.<br>},
}
Endnote
%0 Report
%A Razniewski, Simon
%A Arnaout, Hiba
%A Ghosh, Shrestha
%A Suchanek, Fabian
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Completeness, Recall, and Negation in Open-World Knowledge Bases: A
Survey :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-1C00-4
%U https://arxiv.org/abs/2305.05403
%D 2023
%X General-purpose knowledge bases (KBs) are a cornerstone of knowledge-centric<br>AI. Many of them are constructed pragmatically from Web sources, and are thus<br>far from complete. This poses challenges for the consumption as well as the<br>curation of their content. While several surveys target the problem of<br>completing incomplete KBs, the first problem is arguably to know whether and<br>where the KB is incomplete in the first place, and to which degree.<br> In this survey we discuss how knowledge about completeness, recall, and<br>negation in KBs can be expressed, extracted, and inferred. We cover (i) the<br>logical foundations of knowledge representation and querying under partial<br>closed-world semantics; (ii) the estimation of this information via statistical<br>patterns; (iii) the extraction of information about recall from KBs and text;<br>(iv) the identification of interesting negative statements; and (v) relaxed<br>notions of relative recall.<br> This survey is targeted at two types of audiences: (1) practitioners who are<br>interested in tracking KB quality, focusing extraction efforts, and building<br>quality-aware downstream applications; and (2) data management, knowledge base<br>and semantic web researchers who wish to understand the state of the art of<br>knowledge bases beyond the open-world assumption. Consequently, our survey<br>presents both fundamental methodologies and their working, and gives<br>practice-oriented recommendations on how to choose between different approaches<br>for a problem at hand.<br>
%K Computer Science, Artificial Intelligence, cs.AI,Computer Science, Computation and Language, cs.CL,Computer Science, Databases, cs.DB,Computer Science, Digital Libraries, cs.DL
[15]
G. H. Torbati, G. Weikum, and A. Yates, “Search-based Recommendation : The Case for Difficult Predictions,” in The ACM Web Conference 2023 (WWW 2023), Austin, TX, USA, 2023.
Abstract
Questions on class cardinality comparisons are quite tricky to answer and<br>come with its own challenges. They require some kind of reasoning since web<br>documents and knowledge bases, indispensable sources of information, rarely<br>store direct answers to questions, such as, ``Are there more astronauts or<br>Physics Nobel Laureates?'' We tackle questions on class cardinality comparison<br>by tapping into three sources for absolute cardinalities as well as the<br>cardinalities of orthogonal subgroups of the classes. We propose novel<br>techniques for aggregating signals with partial coverage for more reliable<br>estimates and evaluate them on a dataset of 4005 class pairs, achieving an<br>accuracy of 83.7%.<br>
Export
BibTeX
@inproceedings{Torbati_WWW23,
TITLE = {Search-based Recommendation : {T}he Case for Difficult Predictions},
AUTHOR = {Torbati, Ghazaleh Haratinezhad and Weikum, Gerhard and Yates, Andrew},
LANGUAGE = {eng},
ISBN = {978-1-4503-9419-2},
DOI = {10.1145/3543873.3587374},
PUBLISHER = {ACM},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Questions on class cardinality comparisons are quite tricky to answer and<br>come with its own challenges. They require some kind of reasoning since web<br>documents and knowledge bases, indispensable sources of information, rarely<br>store direct answers to questions, such as, ``Are there more astronauts or<br>Physics Nobel Laureates?'' We tackle questions on class cardinality comparison<br>by tapping into three sources for absolute cardinalities as well as the<br>cardinalities of orthogonal subgroups of the classes. We propose novel<br>techniques for aggregating signals with partial coverage for more reliable<br>estimates and evaluate them on a dataset of 4005 class pairs, achieving an<br>accuracy of 83.7%.<br>},
BOOKTITLE = {The ACM Web Conference 2023 (WWW 2023)},
EDITOR = {Ding, Ying and Tang, Jie and Sequeda, Juan and Aroyo, Lora and Castillo, Carlos and Houben, Geert-Jan},
PAGES = {318--321},
ADDRESS = {Austin, TX, USA},
}
Endnote
%0 Conference Proceedings
%A Torbati, Ghazaleh Haratinezhad
%A Weikum, Gerhard
%A Yates, Andrew
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Search-based Recommendation : The Case for Difficult Predictions :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-DC45-F
%R 10.1145/3543873.3587374
%D 2023
%B ACM Web Conference
%Z date of event: 2023-04-30 - 2023-05-04
%C Austin, TX, USA
%X Questions on class cardinality comparisons are quite tricky to answer and<br>come with its own challenges. They require some kind of reasoning since web<br>documents and knowledge bases, indispensable sources of information, rarely<br>store direct answers to questions, such as, ``Are there more astronauts or<br>Physics Nobel Laureates?'' We tackle questions on class cardinality comparison<br>by tapping into three sources for absolute cardinalities as well as the<br>cardinalities of orthogonal subgroups of the classes. We propose novel<br>techniques for aggregating signals with partial coverage for more reliable<br>estimates and evaluate them on a dataset of 4005 class pairs, achieving an<br>accuracy of 83.7%.<br>
%K Computer Science, Information Retrieval, cs.IR,Computer Science, Artificial Intelligence, cs.AI
%B The ACM Web Conference 2023
%E Ding, Ying; Tang, Jie; Sequeda, Juan; Aroyo, Lora; Castillo, Carlos; Houben, Geert-Jan
%P 318 - 321
%I ACM
%@ 978-1-4503-9419-2
[16]
B. Veseli, S. Singhania, S. Razniewski, and G. Weikum, “Evaluating Language Models for Knowledge Base Completion,” in The Semantic Web (ESWC 2023), Hersonissos, Greece. (Accepted/in press)
Export
BibTeX
@inproceedings{Veseli_ESWC23,
TITLE = {Evaluating Language Models for Knowledge Base Completion},
AUTHOR = {Veseli, Blerta and Singhania, Sneha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
PUBLISHER = {Springer},
YEAR = {2023},
PUBLREMARK = {Accepted},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {The Semantic Web (ESWC 2023)},
SERIES = {Lecture Notes in Computer Science},
ADDRESS = {Hersonissos, Greece},
}
Endnote
%0 Conference Proceedings
%A Veseli, Blerta
%A Singhania, Sneha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Evaluating Language Models for Knowledge Base Completion :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-DC39-D
%D 2023
%B The European Semantic Web Conference
%Z date of event: 2023-05-28 - 2023-06-01
%C Hersonissos, Greece
%B The Semantic Web
%I Springer
%B Lecture Notes in Computer Science
[17]
B. Veseli, S. Singhania, S. Razniewski, and G. Weikum, “Evaluating Language Models for Knowledge Base Completion,” 2023. [Online]. Available: https://arxiv.org/abs/2303.11082. (arXiv: 2303.11082)
Abstract
Structured knowledge bases (KBs) are a foundation of many intelligent<br>applications, yet are notoriously incomplete. Language models (LMs) have<br>recently been proposed for unsupervised knowledge base completion (KBC), yet,<br>despite encouraging initial results, questions regarding their suitability<br>remain open. Existing evaluations often fall short because they only evaluate<br>on popular subjects, or sample already existing facts from KBs. In this work,<br>we introduce a novel, more challenging benchmark dataset, and a methodology<br>tailored for a realistic assessment of the KBC potential of LMs. For automated<br>assessment, we curate a dataset called WD-KNOWN, which provides an unbiased<br>random sample of Wikidata, containing over 3.9 million facts. In a second step,<br>we perform a human evaluation on predictions that are not yet in the KB, as<br>only this provides real insights into the added value over existing KBs. Our<br>key finding is that biases in dataset conception of previous benchmarks lead to<br>a systematic overestimate of LM performance for KBC. However, our results also<br>reveal strong areas of LMs. We could, for example, perform a significant<br>completion of Wikidata on the relations nativeLanguage, by a factor of ~21<br>(from 260k to 5.8M) at 82% precision, usedLanguage, by a factor of ~2.1 (from<br>2.1M to 6.6M) at 82% precision, and citizenOf by a factor of ~0.3 (from 4.2M to<br>5.3M) at 90% precision. Moreover, we find that LMs possess surprisingly strong<br>generalization capabilities: even on relations where most facts were not<br>directly observed in LM training, prediction quality can be high.<br>
Export
BibTeX
@online{,
TITLE = {Evaluating Language Models for Knowledge Base Completion},
AUTHOR = {Veseli, Blerta and Singhania, Sneha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2303.11082},
EPRINT = {2303.11082},
EPRINTTYPE = {arXiv},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Structured knowledge bases (KBs) are a foundation of many intelligent<br>applications, yet are notoriously incomplete. Language models (LMs) have<br>recently been proposed for unsupervised knowledge base completion (KBC), yet,<br>despite encouraging initial results, questions regarding their suitability<br>remain open. Existing evaluations often fall short because they only evaluate<br>on popular subjects, or sample already existing facts from KBs. In this work,<br>we introduce a novel, more challenging benchmark dataset, and a methodology<br>tailored for a realistic assessment of the KBC potential of LMs. For automated<br>assessment, we curate a dataset called WD-KNOWN, which provides an unbiased<br>random sample of Wikidata, containing over 3.9 million facts. In a second step,<br>we perform a human evaluation on predictions that are not yet in the KB, as<br>only this provides real insights into the added value over existing KBs. Our<br>key finding is that biases in dataset conception of previous benchmarks lead to<br>a systematic overestimate of LM performance for KBC. However, our results also<br>reveal strong areas of LMs. We could, for example, perform a significant<br>completion of Wikidata on the relations nativeLanguage, by a factor of ~21<br>(from 260k to 5.8M) at 82% precision, usedLanguage, by a factor of ~2.1 (from<br>2.1M to 6.6M) at 82% precision, and citizenOf by a factor of ~0.3 (from 4.2M to<br>5.3M) at 90% precision. Moreover, we find that LMs possess surprisingly strong<br>generalization capabilities: even on relations where most facts were not<br>directly observed in LM training, prediction quality can be high.<br>},
}
Endnote
%0 Report
%A Veseli, Blerta
%A Singhania, Sneha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Evaluating Language Models for Knowledge Base Completion :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-D3CD-F
%U https://arxiv.org/abs/2303.11082
%D 2023
%X Structured knowledge bases (KBs) are a foundation of many intelligent<br>applications, yet are notoriously incomplete. Language models (LMs) have<br>recently been proposed for unsupervised knowledge base completion (KBC), yet,<br>despite encouraging initial results, questions regarding their suitability<br>remain open. Existing evaluations often fall short because they only evaluate<br>on popular subjects, or sample already existing facts from KBs. In this work,<br>we introduce a novel, more challenging benchmark dataset, and a methodology<br>tailored for a realistic assessment of the KBC potential of LMs. For automated<br>assessment, we curate a dataset called WD-KNOWN, which provides an unbiased<br>random sample of Wikidata, containing over 3.9 million facts. In a second step,<br>we perform a human evaluation on predictions that are not yet in the KB, as<br>only this provides real insights into the added value over existing KBs. Our<br>key finding is that biases in dataset conception of previous benchmarks lead to<br>a systematic overestimate of LM performance for KBC. However, our results also<br>reveal strong areas of LMs. We could, for example, perform a significant<br>completion of Wikidata on the relations nativeLanguage, by a factor of ~21<br>(from 260k to 5.8M) at 82% precision, usedLanguage, by a factor of ~2.1 (from<br>2.1M to 6.6M) at 82% precision, and citizenOf by a factor of ~0.3 (from 4.2M to<br>5.3M) at 90% precision. Moreover, we find that LMs possess surprisingly strong<br>generalization capabilities: even on relations where most facts were not<br>directly observed in LM training, prediction quality can be high.<br>
%K Computer Science, Computation and Language, cs.CL,Computer Science, Artificial Intelligence, cs.AI
%U https://github.com/bveseli/LMsForKBC