2023
[1]
A. S. Anwari, “Learning Filters to Improve Social Media Search,” Universität des Saarlandes, Saarbrücken, 2023.
Export
BibTeX
@mastersthesis{AnwariMSc23,
TITLE = {Learning Filters to Improve Social Media Search},
AUTHOR = {Anwari, Ahmed Sohail},
LANGUAGE = {eng},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
DATE = {2023},
}
Endnote
%0 Thesis
%A Anwari, Ahmed Sohail
%Y Yates, Andrew
%A referee: Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Learning Filters to Improve Social Media Search :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-1C08-C
%I Universität des Saarlandes
%C Saarbrücken
%D 2023
%P XI, 69 p.
%V master
%9 master
[2]
H. Arnaout, T.-P. Nguyen, S. Razniewski, and G. Weikum, “UnCommonSense in Action! Informative Negations for Commonsense Knowledge Bases,” in WSDM ’23, 16th ACM International Conference on Web Search and Data Mining, Singapore, 2023.
Export
BibTeX
@inproceedings{Arnaout_WSDM23,
TITLE = {{UnCommonSense} in Action! {I}nformative Negations for Commonsense Knowledge Bases},
AUTHOR = {Arnaout, Hiba and Nguyen, Tuan-Phong and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-9407-9},
DOI = {10.1145/3539597.3573027},
PUBLISHER = {ACM},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {WSDM '23, 16th ACM International Conference on Web Search and Data Mining},
EDITOR = {Chua, Tat-Seng and Lauw, Hady and Si, Luo and Terzi, Evimaria and Tsaparas, Panayiotis},
PAGES = {1120--1123},
ADDRESS = {Singapore},
}
Endnote
%0 Conference Proceedings
%A Arnaout, Hiba
%A Nguyen, Tuan-Phong
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T UnCommonSense in Action! Informative Negations for Commonsense Knowledge Bases :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-18BC-6
%R 10.1145/3539597.3573027
%D 2023
%B 16th ACM International Conference on Web Search and Data Mining
%Z date of event: 2023-02-27 - 2023-03-03
%C Singapore
%B WSDM '23
%E Chua, Tat-Seng; Lauw, Hady; Si, Luo; Terzi, Evimaria; Tsaparas, Panayiotis
%P 1120 - 1123
%I ACM
%@ 978-1-4503-9407-9
[3]
H. Arnaout and S. Razniewski, “Can Large Language Models Generate Salient Negative Statements?,” 2023. [Online]. Available: https://arxiv.org/abs/2305.16755. (arXiv: 2305.16755)
Abstract
We examine the ability of large language models (LLMs) to generate salient<br>(interesting) negative statements about real-world entities; an emerging<br>research topic of the last few years. We probe the LLMs using zero- and k-shot<br>unconstrained probes, and compare with traditional methods for negation<br>generation, i.e., pattern-based textual extractions and knowledge-graph-based<br>inferences, as well as crowdsourced gold statements. We measure the correctness<br>and salience of the generated lists about subjects from different domains. Our<br>evaluation shows that guided probes do in fact improve the quality of generated<br>negatives, compared to the zero-shot variant. Nevertheless, using both prompts,<br>LLMs still struggle with the notion of factuality of negatives, frequently<br>generating many ambiguous statements, or statements with negative keywords but<br>a positive meaning.<br>
Export
BibTeX
@online{Arnaout2305.16755,
TITLE = {Can Large Language Models Generate Salient Negative Statements?},
AUTHOR = {Arnaout, Hiba and Razniewski, Simon},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2305.16755},
EPRINT = {2305.16755},
EPRINTTYPE = {arXiv},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {We examine the ability of large language models (LLMs) to generate salient<br>(interesting) negative statements about real-world entities; an emerging<br>research topic of the last few years. We probe the LLMs using zero- and k-shot<br>unconstrained probes, and compare with traditional methods for negation<br>generation, i.e., pattern-based textual extractions and knowledge-graph-based<br>inferences, as well as crowdsourced gold statements. We measure the correctness<br>and salience of the generated lists about subjects from different domains. Our<br>evaluation shows that guided probes do in fact improve the quality of generated<br>negatives, compared to the zero-shot variant. Nevertheless, using both prompts,<br>LLMs still struggle with the notion of factuality of negatives, frequently<br>generating many ambiguous statements, or statements with negative keywords but<br>a positive meaning.<br>},
}
Endnote
%0 Report
%A Arnaout, Hiba
%A Razniewski, Simon
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Can Large Language Models Generate Salient Negative Statements? :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-3D73-E
%U https://arxiv.org/abs/2305.16755
%D 2023
%X We examine the ability of large language models (LLMs) to generate salient<br>(interesting) negative statements about real-world entities; an emerging<br>research topic of the last few years. We probe the LLMs using zero- and k-shot<br>unconstrained probes, and compare with traditional methods for negation<br>generation, i.e., pattern-based textual extractions and knowledge-graph-based<br>inferences, as well as crowdsourced gold statements. We measure the correctness<br>and salience of the generated lists about subjects from different domains. Our<br>evaluation shows that guided probes do in fact improve the quality of generated<br>negatives, compared to the zero-shot variant. Nevertheless, using both prompts,<br>LLMs still struggle with the notion of factuality of negatives, frequently<br>generating many ambiguous statements, or statements with negative keywords but<br>a positive meaning.<br>
%K Computer Science, Computation and Language, cs.CL,Computer Science, Artificial Intelligence, cs.AI
[4]
A. Bashir, “Leveraging Self-Supervised Learning in Domain-Specific Language Models,” Universität des Saarlandes, Saarbrücken, 2023.
Export
BibTeX
@mastersthesis{BashirMSc23,
TITLE = {Leveraging Self-Supervised Learning in Domain-Speci{fi}c Language Models},
AUTHOR = {Bashir, Abdallah},
LANGUAGE = {eng},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
DATE = {2023},
}
Endnote
%0 Thesis
%A Bashir, Abdallah
%Y Terolli, Erisa
%Y Ernst, Patrick
%A referee: Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Leveraging Self-Supervised Learning in Domain-Specific Language Models :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-2D82-E
%I Universität des Saarlandes
%C Saarbrücken
%D 2023
%P XI, 54 p.
%V master
%9 master
[5]
L. Boualili and A. Yates, “A Study of Term-Topic Embeddings for Ranking,” in Advances in Information Retrieval (ECIR 2023), Dublin, Ireland, 2023.
Export
BibTeX
@inproceedings{Boualili_ECIR23,
TITLE = {A Study of Term-Topic Embeddings for Ranking},
AUTHOR = {Boualili, Lila and Yates, Andrew},
LANGUAGE = {eng},
ISBN = {978-3-031-28237-9},
DOI = {10.1007/978-3-031-28238-6_25},
PUBLISHER = {Springer},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
DATE = {2023},
BOOKTITLE = {Advances in Information Retrieval (ECIR 2023)},
EDITOR = {Kamps, Jaap and Goeuriot, Lorraine and Crestani, Fabio and Maistro, Maria and Joho, Hideao and Davis, Brian and Gurrin, Cathal and Kruschwitz, Udo and Caputo, Annalina},
PAGES = {359--366},
SERIES = {Lecture Notes in Computer Science},
VOLUME = {13981},
ADDRESS = {Dublin, Ireland},
}
Endnote
%0 Conference Proceedings
%A Boualili, Lila
%A Yates, Andrew
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T A Study of Term-Topic Embeddings for Ranking :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-DC34-2
%R 10.1007/978-3-031-28238-6_25
%D 2023
%B 45th European Conference on IR Research
%Z date of event: 2023-04-02 - 2023-04-06
%C Dublin, Ireland
%B Advances in Information Retrieval
%E Kamps, Jaap; Goeuriot, Lorraine; Crestani, Fabio; Maistro, Maria; Joho, Hideao; Davis, Brian; Gurrin, Cathal; Kruschwitz, Udo; Caputo, Annalina
%P 359 - 366
%I Springer
%@ 978-3-031-28237-9
%B Lecture Notes in Computer Science
%N 13981
[6]
L. Chen, S. Razniewski, and G. Weikum, “Knowledge Base Completion for Long-Tail Entities,” 2023. [Online]. Available: https://arxiv.org/abs/2306.17472. (arXiv: 2306.17472)
Abstract
Despite their impressive scale, knowledge bases (KBs), such as Wikidata,<br>still contain significant gaps. Language models (LMs) have been proposed as a<br>source for filling these gaps. However, prior works have focused on prominent<br>entities with rich coverage by LMs, neglecting the crucial case of long-tail<br>entities. In this paper, we present a novel method for LM-based-KB completion<br>that is specifically geared for facts about long-tail entities. The method<br>leverages two different LMs in two stages: for candidate retrieval and for<br>candidate verification and disambiguation. To evaluate our method and various<br>baselines, we introduce a novel dataset, called MALT, rooted in Wikidata. Our<br>method outperforms all baselines in F1, with major gains especially in recall.<br>
Export
BibTeX
@online{Chen2306.17472,
TITLE = {Knowledge Base Completion for Long-Tail Entities},
AUTHOR = {Chen, Lihu and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2306.17472},
EPRINT = {2306.17472},
EPRINTTYPE = {arXiv},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Despite their impressive scale, knowledge bases (KBs), such as Wikidata,<br>still contain significant gaps. Language models (LMs) have been proposed as a<br>source for filling these gaps. However, prior works have focused on prominent<br>entities with rich coverage by LMs, neglecting the crucial case of long-tail<br>entities. In this paper, we present a novel method for LM-based-KB completion<br>that is specifically geared for facts about long-tail entities. The method<br>leverages two different LMs in two stages: for candidate retrieval and for<br>candidate verification and disambiguation. To evaluate our method and various<br>baselines, we introduce a novel dataset, called MALT, rooted in Wikidata. Our<br>method outperforms all baselines in F1, with major gains especially in recall.<br>},
}
Endnote
%0 Report
%A Chen, Lihu
%A Razniewski, Simon
%A Weikum, Gerhard
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Knowledge Base Completion for Long-Tail Entities :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-6C7F-D
%U https://arxiv.org/abs/2306.17472
%D 2023
%X Despite their impressive scale, knowledge bases (KBs), such as Wikidata,<br>still contain significant gaps. Language models (LMs) have been proposed as a<br>source for filling these gaps. However, prior works have focused on prominent<br>entities with rich coverage by LMs, neglecting the crucial case of long-tail<br>entities. In this paper, we present a novel method for LM-based-KB completion<br>that is specifically geared for facts about long-tail entities. The method<br>leverages two different LMs in two stages: for candidate retrieval and for<br>candidate verification and disambiguation. To evaluate our method and various<br>baselines, we introduce a novel dataset, called MALT, rooted in Wikidata. Our<br>method outperforms all baselines in F1, with major gains especially in recall.<br>
%K Computer Science, Computation and Language, cs.CL
[7]
P. Christmann, R. Saha Roy, and G. Weikum, “Explainable Conversational Question Answering over Heterogeneous Sources via Iterative Graph Neural Networks,” in SIGIR ’23, 46th International ACM SIGIR Conference on Research and Development in Information Retrieval, Taipei, Taiwan. (Accepted/in press)
Export
BibTeX
@inproceedings{Christmann:SIGIR2023,
TITLE = {Explainable Conversational Question Answering over Heterogeneous Sources via Iterative Graph Neural Networks},
AUTHOR = {Christmann, Philipp and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
PUBLISHER = {ACM},
YEAR = {2023},
PUBLREMARK = {Accepted},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {SIGIR '23, 46th International ACM SIGIR Conference on Research and Development in Information Retrieval},
ADDRESS = {Taipei, Taiwan},
}
Endnote
%0 Conference Proceedings
%A Christmann, Philipp
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Explainable Conversational Question Answering over Heterogeneous Sources via Iterative Graph Neural Networks :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-FE28-A
%D 2023
%B 46th International ACM SIGIR Conference on Research and Development in Information Retrieval
%Z date of event: 2023-07-23 - 2023-07-27
%C Taipei, Taiwan
%B SIGIR '23
%I ACM
[8]
P. Christmann, R. Saha Roy, and G. Weikum, “CompMix: A Benchmark for Heterogeneous Question Answering,” 2023. [Online]. Available: https://arxiv.org/abs/2306.12235. (arXiv: 2306.12235)
Abstract
Fact-centric question answering (QA) often requires access to multiple,<br>heterogeneous, information sources. By jointly considering several sources like<br>a knowledge base (KB), a text collection, and tables from the web, QA systems<br>can enhance their answer coverage and confidence. However, existing QA<br>benchmarks are mostly constructed with a single source of knowledge in mind.<br>This limits capabilities of these benchmarks to fairly evaluate QA systems that<br>can tap into more than one information repository. To bridge this gap, we<br>release CompMix, a crowdsourced QA benchmark which naturally demands the<br>integration of a mixture of input sources. CompMix has a total of 9,410<br>questions, and features several complex intents like joins and temporal<br>conditions. Evaluation of a range of QA systems on CompMix highlights the need<br>for further research on leveraging information from heterogeneous sources.<br>
Export
BibTeX
@online{Christmann2306.12235,
TITLE = {{CompMix}: A Benchmark for Heterogeneous Question Answering},
AUTHOR = {Christmann, Philipp and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2306.12235},
EPRINT = {2306.12235},
EPRINTTYPE = {arXiv},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Fact-centric question answering (QA) often requires access to multiple,<br>heterogeneous, information sources. By jointly considering several sources like<br>a knowledge base (KB), a text collection, and tables from the web, QA systems<br>can enhance their answer coverage and confidence. However, existing QA<br>benchmarks are mostly constructed with a single source of knowledge in mind.<br>This limits capabilities of these benchmarks to fairly evaluate QA systems that<br>can tap into more than one information repository. To bridge this gap, we<br>release CompMix, a crowdsourced QA benchmark which naturally demands the<br>integration of a mixture of input sources. CompMix has a total of 9,410<br>questions, and features several complex intents like joins and temporal<br>conditions. Evaluation of a range of QA systems on CompMix highlights the need<br>for further research on leveraging information from heterogeneous sources.<br>},
}
Endnote
%0 Report
%A Christmann, Philipp
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T CompMix: A Benchmark for Heterogeneous Question Answering :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-579D-1
%U https://arxiv.org/abs/2306.12235
%D 2023
%X Fact-centric question answering (QA) often requires access to multiple,<br>heterogeneous, information sources. By jointly considering several sources like<br>a knowledge base (KB), a text collection, and tables from the web, QA systems<br>can enhance their answer coverage and confidence. However, existing QA<br>benchmarks are mostly constructed with a single source of knowledge in mind.<br>This limits capabilities of these benchmarks to fairly evaluate QA systems that<br>can tap into more than one information repository. To bridge this gap, we<br>release CompMix, a crowdsourced QA benchmark which naturally demands the<br>integration of a mixture of input sources. CompMix has a total of 9,410<br>questions, and features several complex intents like joins and temporal<br>conditions. Evaluation of a range of QA systems on CompMix highlights the need<br>for further research on leveraging information from heterogeneous sources.<br>
%K Computer Science, Information Retrieval, cs.IR
[9]
P. Christmann, R. Saha Roy, and G. Weikum, “CLOCQ: A Toolkit for Fast and Easy Access to Knowledge Bases,” in BTW 2023, Dresden, Germany, 2023.
Export
BibTeX
@inproceedings{Christmann_BTW2023,
TITLE = {{CLOCQ}: {A} Toolkit for Fast and Easy Access to Knowledge Bases},
AUTHOR = {Christmann, Philipp and Saha Roy, Rishiraj and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-3-88579-725-8},
DOI = {10.18420/BTW2023-28},
PUBLISHER = {GI},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {BTW 2023},
EDITOR = {K{\"o}nig-Ries, Birgitta and Scherzinger, Stefanie and Lehner, Wolfgang and Vossen, Gottfried},
PAGES = {579--591},
SERIES = {Lecture Notes in Informatics},
VOLUME = {P-331},
ADDRESS = {Dresden, Germany},
}
Endnote
%0 Conference Proceedings
%A Christmann, Philipp
%A Saha Roy, Rishiraj
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T CLOCQ: A Toolkit for Fast and Easy Access to Knowledge Bases :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-BF14-7
%R 10.18420/BTW2023-28
%D 2023
%B 20th Conference on Database Systems for Business, Technology and Web
%Z date of event: 2023-03-06 - 2023-03-10
%C Dresden, Germany
%B BTW 2023
%E König-Ries, Birgitta; Scherzinger, Stefanie; Lehner, Wolfgang; Vossen, Gottfried
%P 579 - 591
%I GI
%@ 978-3-88579-725-8
%B Lecture Notes in Informatics
%N P-331
[10]
X. L. Dong, B. Li, J. Stoyanovich, A. K. H. Tung, G. Weikum, A. Halevy, and W.-C. Tan, “Personal Data for Personal Use: Vision or Reality?,” in SIGMOD ’23 Companion, Seattle WA USA, 2023.
Export
BibTeX
@inproceedings{DongPODS23,
TITLE = {Personal Data for Personal Use: Vision or Reality?},
AUTHOR = {Dong, Xin Luna and Li, Bo and Stoyanovich, Julia and Tung, Anthony Kum Hoe and Weikum, Gerhard and Halevy, Alon and Tan, Wang-Chiew},
LANGUAGE = {eng},
ISBN = {978-1-4503-9507-6},
DOI = {10.1145/3555041.3589378},
PUBLISHER = {ACM},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {SIGMOD '23 Companion},
EDITOR = {Das, Sudipto and Pandis, Ippokratis and Candan, K. Sel{\c c}uk and Amer-Yahia, Sihem},
PAGES = {263--264},
ADDRESS = {Seattle WA USA},
}
Endnote
%0 Conference Proceedings
%A Dong, Xin Luna
%A Li, Bo
%A Stoyanovich, Julia
%A Tung, Anthony Kum Hoe
%A Weikum, Gerhard
%A Halevy, Alon
%A Tan, Wang-Chiew
%+ External Organizations
External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
%T Personal Data for Personal Use: Vision or Reality? :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-5775-E
%R 10.1145/3555041.3589378
%D 2023
%B ACM/SIGMOD International Conference on Management of Data
%Z date of event: 2023-06-18 - 2023-06-23
%C Seattle WA USA
%B SIGMOD '23 Companion
%E Das, Sudipto; Pandis, Ippokratis; Candan, K. Selçuk; Amer-Yahia, Sihem
%P 263 - 264
%I ACM
%@ 978-1-4503-9507-6
[11]
A. Ghazimatin, “Enhancing Explainability and Scrutability of Recommender Systems,” in BTW 2023, Dresden, Germany, 2023.
Export
BibTeX
@inproceedings{DBLP:conf/btw/Ghazimatin23,
TITLE = {Enhancing Explainability and Scrutability of Recommender Systems},
AUTHOR = {Ghazimatin, Azin},
LANGUAGE = {eng},
ISBN = {978-3-88579-725-8},
DOI = {10.18420/BTW2023-32},
PUBLISHER = {GI},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {BTW 2023},
EDITOR = {K{\"o}nig-Ries, Birgitta and Scherzinger, Stefanie and Lehner, Wolfgang and Vossen, Gottfried},
PAGES = {633--640},
SERIES = {Lecture Notes in Informatics},
VOLUME = {P-331},
ADDRESS = {Dresden, Germany},
}
Endnote
%0 Conference Proceedings
%A Ghazimatin, Azin
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Enhancing Explainability and Scrutability of Recommender Systems :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-DC4D-7
%R 10.18420/BTW2023-32
%D 2023
%B 20th Conference on Database Systems for Business, Technology and Web
%Z date of event: 2023-03-06 - 2023-03-10
%C Dresden, Germany
%B BTW 2023
%E König-Ries, Birgitta; Scherzinger, Stefanie; Lehner, Wolfgang; Vossen, Gottfried
%P 633 - 640
%I GI
%@ 978-3-88579-725-8
%B Lecture Notes in Informatics
%N P-331
[12]
S. Ghosh, S. Razniewski, and G. Weikum, “Answering Count Questions with Structured Answers from Text,” Journal of Web Semantics, vol. 76, 2023.
Export
BibTeX
@article{Ghosh23,
TITLE = {Answering Count Questions with Structured Answers from Text},
AUTHOR = {Ghosh, Shrestha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
DOI = {10.1016/j.websem.2022.100769},
PUBLISHER = {Elsevier},
ADDRESS = {Amsterdam},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
DATE = {2023},
JOURNAL = {Journal of Web Semantics},
VOLUME = {76},
EID = {100769},
}
Endnote
%0 Journal Article
%A Ghosh, Shrestha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Answering Count Questions with Structured Answers from Text :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-47CB-0
%R 10.1016/j.websem.2022.100769
%7 2022
%D 2023
%J Journal of Web Semantics
%V 76
%Z sequence number: 100769
%I Elsevier
%C Amsterdam
[13]
S. Ghosh, S. Razniewski, and G. Weikum, “CoQEx: Entity Counts Explained,” in WSDM ’23, 16th ACM International Conference on Web Search and Data Mining, Singapore, 2023.
Export
BibTeX
@inproceedings{Christmann_WSDM23,
TITLE = {{CoQEx}: {E}ntity Counts Explained},
AUTHOR = {Ghosh, Shrestha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-9407-9},
DOI = {10.1145/3539597.3573021},
PUBLISHER = {ACM},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {WSDM '23, 16th ACM International Conference on Web Search and Data Mining},
EDITOR = {Chua, Tat-Seng and Lauw, Hady and Si, Luo and Terzi, Evimaria and Tsaparas, Panayiotis},
PAGES = {1168--1171},
ADDRESS = {Singapore},
}
Endnote
%0 Conference Proceedings
%A Ghosh, Shrestha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T CoQEx: Entity Counts Explained :
%G eng
%U http://hdl.handle.net/21.11116/0000-000B-F41F-0
%R 10.1145/3539597.3573021
%D 2023
%B 16th ACM International Conference on Web Search and Data Mining
%Z date of event: 2023-02-27 - 2023-03-03
%C Singapore
%B WSDM '23
%E Chua, Tat-Seng; Lauw, Hady; Si, Luo; Terzi, Evimaria; Tsaparas, Panayiotis
%P 1168 - 1171
%I ACM
%@ 978-1-4503-9407-9
[14]
S. Ghosh, S. Razniewski, and G. Weikum, “Class Cardinality Comparison as a Fermi Problem,” in The ACM Web Conference 2023 (WWW 2023), Austin, TX, USA, 2023.
Abstract
Questions on class cardinality comparisons are quite tricky to answer and<br>come with its own challenges. They require some kind of reasoning since web<br>documents and knowledge bases, indispensable sources of information, rarely<br>store direct answers to questions, such as, ``Are there more astronauts or<br>Physics Nobel Laureates?'' We tackle questions on class cardinality comparison<br>by tapping into three sources for absolute cardinalities as well as the<br>cardinalities of orthogonal subgroups of the classes. We propose novel<br>techniques for aggregating signals with partial coverage for more reliable<br>estimates and evaluate them on a dataset of 4005 class pairs, achieving an<br>accuracy of 83.7%.<br>
Export
BibTeX
@inproceedings{Ghosh2303.04532,
TITLE = {Class Cardinality Comparison as a {F}ermi Problem},
AUTHOR = {Ghosh, Shrestha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-9419-2},
DOI = {10.1145/3543873.3587334},
PUBLISHER = {ACM},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Questions on class cardinality comparisons are quite tricky to answer and<br>come with its own challenges. They require some kind of reasoning since web<br>documents and knowledge bases, indispensable sources of information, rarely<br>store direct answers to questions, such as, ``Are there more astronauts or<br>Physics Nobel Laureates?'' We tackle questions on class cardinality comparison<br>by tapping into three sources for absolute cardinalities as well as the<br>cardinalities of orthogonal subgroups of the classes. We propose novel<br>techniques for aggregating signals with partial coverage for more reliable<br>estimates and evaluate them on a dataset of 4005 class pairs, achieving an<br>accuracy of 83.7%.<br>},
BOOKTITLE = {The ACM Web Conference 2023 (WWW 2023)},
EDITOR = {Ding, YIng and Tang, Jie and Sequeda, Juan and Aroyo, Lora and Castillo, Carlos and Houben, Geert-Jan},
PAGES = {148--151},
ADDRESS = {Austin, TX, USA},
}
Endnote
%0 Conference Proceedings
%A Ghosh, Shrestha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Class Cardinality Comparison as a Fermi Problem :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-BF05-8
%R 10.1145/3543873.3587334
%D 2023
%B ACM Web Conference
%Z date of event: 2023-04-30 - 2023-05-04
%C Austin, TX, USA
%X Questions on class cardinality comparisons are quite tricky to answer and<br>come with its own challenges. They require some kind of reasoning since web<br>documents and knowledge bases, indispensable sources of information, rarely<br>store direct answers to questions, such as, ``Are there more astronauts or<br>Physics Nobel Laureates?'' We tackle questions on class cardinality comparison<br>by tapping into three sources for absolute cardinalities as well as the<br>cardinalities of orthogonal subgroups of the classes. We propose novel<br>techniques for aggregating signals with partial coverage for more reliable<br>estimates and evaluate them on a dataset of 4005 class pairs, achieving an<br>accuracy of 83.7%.<br>
%K Computer Science, Information Retrieval, cs.IR,Computer Science, Artificial Intelligence, cs.AI
%B The ACM Web Conference 2023
%E Ding, YIng; Tang, Jie; Sequeda, Juan; Aroyo, Lora; Castillo, Carlos; Houben, Geert-Jan
%P 148 - 151
%I ACM
%@ 978-1-4503-9419-2
[15]
J. Kalofolias, “Subgroup Discovery for Structured Target Concepts,” Universität des Saarlandes, Saarbrücken, 2023.
Export
BibTeX
@phdthesis{Kalofolias_PhD2023,
TITLE = {Subgroup Discovery for Structured Target Concepts},
AUTHOR = {Kalofolias, Janis},
LANGUAGE = {eng},
URL = {urn:nbn:de:bsz:291--ds-393710},
DOI = {10.22028/D291-39371},
SCHOOL = {Universit{\"a}t des Saarlandes},
ADDRESS = {Saarbr{\"u}cken},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
}
Endnote
%0 Thesis
%A Kalofolias, Janis
%Y Vreeken, Jilles
%A referee: Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Subgroup Discovery for Structured Target Concepts :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-FE96-D
%R 10.22028/D291-39371
%U urn:nbn:de:bsz:291--ds-393710
%F OTHER: hdl:20.500.11880/35569
%I Universität des Saarlandes
%C Saarbrücken
%D 2023
%P xi, 215 p.
%V phd
%9 phd
%U https://scidok.sulb.uni-saarland.de/handle/20.500.11880/35569
[16]
T.-P. Nguyen, S. Razniewski, A. Varde, and G. Weikum, “Extracting Cultural Commonsense Knowledge at Scale,” in The ACM Web Conference 2023 (WWW 2023), Austin, TX, USA, 2023.
Export
BibTeX
@inproceedings{Nguyen_WWW23,
TITLE = {Extracting Cultural Commonsense Knowledge at Scale},
AUTHOR = {Nguyen, Tuan-Phong and Razniewski, Simon and Varde, Aparna and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {978-1-4503-9416-1},
DOI = {10.1145/3543507.3583535},
PUBLISHER = {ACM},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {The ACM Web Conference 2023 (WWW 2023)},
EDITOR = {Ding, YIng and Tang, Jie and Sequeda, Juan and Aroyo, Lora and Castillo, Carlos and Houben, Geert-Jan},
PAGES = {1907--1917},
ADDRESS = {Austin, TX, USA},
}
Endnote
%0 Conference Proceedings
%A Nguyen, Tuan-Phong
%A Razniewski, Simon
%A Varde, Aparna
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Extracting Cultural Commonsense Knowledge at Scale :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-9FF7-B
%R 10.1145/3543507.3583535
%D 2023
%B ACM Web Conference
%Z date of event: 2023-04-30 - 2023-05-04
%C Austin, TX, USA
%B The ACM Web
Conference 2023
%E Ding, YIng; Tang, Jie; Sequeda, Juan; Aroyo, Lora; Castillo, Carlos; Houben, Geert-Jan
%P 1907 - 1917
%I ACM
%@ 978-1-4503-9416-1
[17]
T.-P. Nguyen, S. Razniewski, J. Romero, and G. Weikum, “Refined Commonsense Knowledge from Large-Scale Web Contents,” IEEE Transactions on Knowledge and Data Engineering, vol. 35, no. 8, 2023.
Export
BibTeX
@article{Nguyen_TKDE_2022,
TITLE = {Refined Commonsense Knowledge from Large-Scale Web Contents},
AUTHOR = {Nguyen, Tuan-Phong and Razniewski, Simon and Romero, Julien and Weikum, Gerhard},
LANGUAGE = {eng},
ISSN = {1558-2191},
DOI = {10.1109/TKDE.2022.3206505},
PUBLISHER = {IEEE},
ADDRESS = {Piscataway, NJ},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
DATE = {2023},
JOURNAL = {IEEE Transactions on Knowledge and Data Engineering},
VOLUME = {35},
NUMBER = {8},
PAGES = {8431--8447},
}
Endnote
%0 Journal Article
%A Nguyen, Tuan-Phong
%A Razniewski, Simon
%A Romero, Julien
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Refined Commonsense Knowledge from Large-Scale Web Contents :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-9FEE-6
%R 10.1109/TKDE.2022.3206505
%7 2022
%D 2023
%J IEEE Transactions on Knowledge and Data Engineering
%V 35
%N 8
%& 8431
%P 8431 - 8447
%I IEEE
%C Piscataway, NJ
%@ false
[18]
J. Z. Pan, S. Razniewski, J.-C. Kalo, S. Singhania, J. Chen, S. Dietze, H. Jabeen, J. Omeliyanenko, W. Zhang, M. Lissandrini, R. Biswas, G. de Melo, A. Bonifati, E. Vakaj, M. Dragoni, and D. Graux, “Large Language Models and Knowledge Graphs: Opportunities and Challenges,” 2023. [Online]. Available: https://arxiv.org/abs/2308.06374. (arXiv: 2308.06374)
Abstract
Large Language Models (LLMs) have taken Knowledge Representation -- and the<br>world -- by storm. This inflection point marks a shift from explicit knowledge<br>representation to a renewed focus on the hybrid representation of both explicit<br>knowledge and parametric knowledge. In this position paper, we will discuss<br>some of the common debate points within the community on LLMs (parametric<br>knowledge) and Knowledge Graphs (explicit knowledge) and speculate on<br>opportunities and visions that the renewed focus brings, as well as related<br>research topics and challenges.<br>
Export
BibTeX
@online{Pan2308.06374,
TITLE = {Large Language Models and Knowledge Graphs: Opportunities and Challenges},
AUTHOR = {Pan, Jeff Z. and Razniewski, Simon and Kalo, Jan-Christoph and Singhania, Sneha and Chen, Jiaoyan and Dietze, Stefan and Jabeen, Hajira and Omeliyanenko, Janna and Zhang, Wen and Lissandrini, Matteo and Biswas, Russa and de Melo, Gerard and Bonifati, Angela and Vakaj, Edlira and Dragoni, Mauro and Graux, Damien},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2308.06374},
EPRINT = {2308.06374},
EPRINTTYPE = {arXiv},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Large Language Models (LLMs) have taken Knowledge Representation -- and the<br>world -- by storm. This inflection point marks a shift from explicit knowledge<br>representation to a renewed focus on the hybrid representation of both explicit<br>knowledge and parametric knowledge. In this position paper, we will discuss<br>some of the common debate points within the community on LLMs (parametric<br>knowledge) and Knowledge Graphs (explicit knowledge) and speculate on<br>opportunities and visions that the renewed focus brings, as well as related<br>research topics and challenges.<br>},
}
Endnote
%0 Report
%A Pan, Jeff Z.
%A Razniewski, Simon
%A Kalo, Jan-Christoph
%A Singhania, Sneha
%A Chen, Jiaoyan
%A Dietze, Stefan
%A Jabeen, Hajira
%A Omeliyanenko, Janna
%A Zhang, Wen
%A Lissandrini, Matteo
%A Biswas, Russa
%A de Melo, Gerard
%A Bonifati, Angela
%A Vakaj, Edlira
%A Dragoni, Mauro
%A Graux, Damien
%+ External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
External Organizations
%T Large Language Models and Knowledge Graphs: Opportunities and Challenges :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-A223-4
%U https://arxiv.org/abs/2308.06374
%D 2023
%X Large Language Models (LLMs) have taken Knowledge Representation -- and the<br>world -- by storm. This inflection point marks a shift from explicit knowledge<br>representation to a renewed focus on the hybrid representation of both explicit<br>knowledge and parametric knowledge. In this position paper, we will discuss<br>some of the common debate points within the community on LLMs (parametric<br>knowledge) and Knowledge Graphs (explicit knowledge) and speculate on<br>opportunities and visions that the renewed focus brings, as well as related<br>research topics and challenges.<br>
%K Computer Science, Artificial Intelligence, cs.AI,Computer Science, Computation and Language, cs.CL
[19]
S. Razniewski, H. Arnaout, S. Ghosh, and F. Suchanek, “Completeness, Recall, and Negation in Open-World Knowledge Bases: A Survey,” 2023. [Online]. Available: https://arxiv.org/abs/2305.05403. (arXiv: 2305.05403)
Abstract
General-purpose knowledge bases (KBs) are a cornerstone of knowledge-centric<br>AI. Many of them are constructed pragmatically from Web sources, and are thus<br>far from complete. This poses challenges for the consumption as well as the<br>curation of their content. While several surveys target the problem of<br>completing incomplete KBs, the first problem is arguably to know whether and<br>where the KB is incomplete in the first place, and to which degree.<br> In this survey we discuss how knowledge about completeness, recall, and<br>negation in KBs can be expressed, extracted, and inferred. We cover (i) the<br>logical foundations of knowledge representation and querying under partial<br>closed-world semantics; (ii) the estimation of this information via statistical<br>patterns; (iii) the extraction of information about recall from KBs and text;<br>(iv) the identification of interesting negative statements; and (v) relaxed<br>notions of relative recall.<br> This survey is targeted at two types of audiences: (1) practitioners who are<br>interested in tracking KB quality, focusing extraction efforts, and building<br>quality-aware downstream applications; and (2) data management, knowledge base<br>and semantic web researchers who wish to understand the state of the art of<br>knowledge bases beyond the open-world assumption. Consequently, our survey<br>presents both fundamental methodologies and their working, and gives<br>practice-oriented recommendations on how to choose between different approaches<br>for a problem at hand.<br>
Export
BibTeX
@online{Razniewski_2305.05403,
TITLE = {Completeness, Recall, and Negation in Open-World Knowledge Bases: A Survey},
AUTHOR = {Razniewski, Simon and Arnaout, Hiba and Ghosh, Shrestha and Suchanek, Fabian},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2305.05403},
EPRINT = {2305.05403},
EPRINTTYPE = {arXiv},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {General-purpose knowledge bases (KBs) are a cornerstone of knowledge-centric<br>AI. Many of them are constructed pragmatically from Web sources, and are thus<br>far from complete. This poses challenges for the consumption as well as the<br>curation of their content. While several surveys target the problem of<br>completing incomplete KBs, the first problem is arguably to know whether and<br>where the KB is incomplete in the first place, and to which degree.<br> In this survey we discuss how knowledge about completeness, recall, and<br>negation in KBs can be expressed, extracted, and inferred. We cover (i) the<br>logical foundations of knowledge representation and querying under partial<br>closed-world semantics; (ii) the estimation of this information via statistical<br>patterns; (iii) the extraction of information about recall from KBs and text;<br>(iv) the identification of interesting negative statements; and (v) relaxed<br>notions of relative recall.<br> This survey is targeted at two types of audiences: (1) practitioners who are<br>interested in tracking KB quality, focusing extraction efforts, and building<br>quality-aware downstream applications; and (2) data management, knowledge base<br>and semantic web researchers who wish to understand the state of the art of<br>knowledge bases beyond the open-world assumption. Consequently, our survey<br>presents both fundamental methodologies and their working, and gives<br>practice-oriented recommendations on how to choose between different approaches<br>for a problem at hand.<br>},
}
Endnote
%0 Report
%A Razniewski, Simon
%A Arnaout, Hiba
%A Ghosh, Shrestha
%A Suchanek, Fabian
%+ External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
%T Completeness, Recall, and Negation in Open-World Knowledge Bases: A
Survey :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-1C00-4
%U https://arxiv.org/abs/2305.05403
%D 2023
%X General-purpose knowledge bases (KBs) are a cornerstone of knowledge-centric<br>AI. Many of them are constructed pragmatically from Web sources, and are thus<br>far from complete. This poses challenges for the consumption as well as the<br>curation of their content. While several surveys target the problem of<br>completing incomplete KBs, the first problem is arguably to know whether and<br>where the KB is incomplete in the first place, and to which degree.<br> In this survey we discuss how knowledge about completeness, recall, and<br>negation in KBs can be expressed, extracted, and inferred. We cover (i) the<br>logical foundations of knowledge representation and querying under partial<br>closed-world semantics; (ii) the estimation of this information via statistical<br>patterns; (iii) the extraction of information about recall from KBs and text;<br>(iv) the identification of interesting negative statements; and (v) relaxed<br>notions of relative recall.<br> This survey is targeted at two types of audiences: (1) practitioners who are<br>interested in tracking KB quality, focusing extraction efforts, and building<br>quality-aware downstream applications; and (2) data management, knowledge base<br>and semantic web researchers who wish to understand the state of the art of<br>knowledge bases beyond the open-world assumption. Consequently, our survey<br>presents both fundamental methodologies and their working, and gives<br>practice-oriented recommendations on how to choose between different approaches<br>for a problem at hand.<br>
%K Computer Science, Artificial Intelligence, cs.AI,Computer Science, Computation and Language, cs.CL,Computer Science, Databases, cs.DB,Computer Science, Digital Libraries, cs.DL
[20]
S. Singhania, S. Razniewski, and G. Weikum, “Extracting Multi-valued Relations from Language Models,” 2023. [Online]. Available: https://arxiv.org/abs/2307.03122v2. (arXiv: 2307.03122)
Abstract
The widespread usage of latent language representations via pre-trained<br>language models (LMs) suggests that they are a promising source of structured<br>knowledge. However, existing methods focus only on a single object per<br>subject-relation pair, even though often multiple objects are correct. To<br>overcome this limitation, we analyze these representations for their potential<br>to yield materialized multi-object relational knowledge. We formulate the<br>problem as a rank-then-select task. For ranking candidate objects, we evaluate<br>existing prompting techniques and propose new ones incorporating domain<br>knowledge. Among the selection methods, we find that choosing objects with a<br>likelihood above a learned relation-specific threshold gives a 49.5% F1 score.<br>Our results highlight the difficulty of employing LMs for the multi-valued<br>slot-filling task and pave the way for further research on extracting<br>relational knowledge from latent language representations.<br>
Export
BibTeX
@online{Singhania2307.03122,
TITLE = {Extracting Multi-valued Relations from Language Models},
AUTHOR = {Singhania, Sneha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2307.03122v2},
EPRINT = {2307.03122},
EPRINTTYPE = {arXiv},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {The widespread usage of latent language representations via pre-trained<br>language models (LMs) suggests that they are a promising source of structured<br>knowledge. However, existing methods focus only on a single object per<br>subject-relation pair, even though often multiple objects are correct. To<br>overcome this limitation, we analyze these representations for their potential<br>to yield materialized multi-object relational knowledge. We formulate the<br>problem as a rank-then-select task. For ranking candidate objects, we evaluate<br>existing prompting techniques and propose new ones incorporating domain<br>knowledge. Among the selection methods, we find that choosing objects with a<br>likelihood above a learned relation-specific threshold gives a 49.5% F1 score.<br>Our results highlight the difficulty of employing LMs for the multi-valued<br>slot-filling task and pave the way for further research on extracting<br>relational knowledge from latent language representations.<br>},
}
Endnote
%0 Report
%A Singhania, Sneha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Extracting Multi-valued Relations from Language Models :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-938B-0
%U https://arxiv.org/abs/2307.03122v2
%D 2023
%X The widespread usage of latent language representations via pre-trained<br>language models (LMs) suggests that they are a promising source of structured<br>knowledge. However, existing methods focus only on a single object per<br>subject-relation pair, even though often multiple objects are correct. To<br>overcome this limitation, we analyze these representations for their potential<br>to yield materialized multi-object relational knowledge. We formulate the<br>problem as a rank-then-select task. For ranking candidate objects, we evaluate<br>existing prompting techniques and propose new ones incorporating domain<br>knowledge. Among the selection methods, we find that choosing objects with a<br>likelihood above a learned relation-specific threshold gives a 49.5% F1 score.<br>Our results highlight the difficulty of employing LMs for the multi-valued<br>slot-filling task and pave the way for further research on extracting<br>relational knowledge from latent language representations.<br>
%K Computer Science, Computation and Language, cs.CL
[21]
G. H. Torbati, G. Weikum, and A. Yates, “Search-based Recommendation : The Case for Difficult Predictions,” in The ACM Web Conference 2023 (WWW 2023), Austin, TX, USA, 2023.
Abstract
Questions on class cardinality comparisons are quite tricky to answer and<br>come with its own challenges. They require some kind of reasoning since web<br>documents and knowledge bases, indispensable sources of information, rarely<br>store direct answers to questions, such as, ``Are there more astronauts or<br>Physics Nobel Laureates?'' We tackle questions on class cardinality comparison<br>by tapping into three sources for absolute cardinalities as well as the<br>cardinalities of orthogonal subgroups of the classes. We propose novel<br>techniques for aggregating signals with partial coverage for more reliable<br>estimates and evaluate them on a dataset of 4005 class pairs, achieving an<br>accuracy of 83.7%.<br>
Export
BibTeX
@inproceedings{Torbati_WWW23,
TITLE = {Search-based Recommendation : {T}he Case for Difficult Predictions},
AUTHOR = {Torbati, Ghazaleh Haratinezhad and Weikum, Gerhard and Yates, Andrew},
LANGUAGE = {eng},
ISBN = {978-1-4503-9419-2},
DOI = {10.1145/3543873.3587374},
PUBLISHER = {ACM},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Questions on class cardinality comparisons are quite tricky to answer and<br>come with its own challenges. They require some kind of reasoning since web<br>documents and knowledge bases, indispensable sources of information, rarely<br>store direct answers to questions, such as, ``Are there more astronauts or<br>Physics Nobel Laureates?'' We tackle questions on class cardinality comparison<br>by tapping into three sources for absolute cardinalities as well as the<br>cardinalities of orthogonal subgroups of the classes. We propose novel<br>techniques for aggregating signals with partial coverage for more reliable<br>estimates and evaluate them on a dataset of 4005 class pairs, achieving an<br>accuracy of 83.7%.<br>},
BOOKTITLE = {The ACM Web Conference 2023 (WWW 2023)},
EDITOR = {Ding, Ying and Tang, Jie and Sequeda, Juan and Aroyo, Lora and Castillo, Carlos and Houben, Geert-Jan},
PAGES = {318--321},
ADDRESS = {Austin, TX, USA},
}
Endnote
%0 Conference Proceedings
%A Torbati, Ghazaleh Haratinezhad
%A Weikum, Gerhard
%A Yates, Andrew
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Search-based Recommendation : The Case for Difficult Predictions :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-DC45-F
%R 10.1145/3543873.3587374
%D 2023
%B ACM Web Conference
%Z date of event: 2023-04-30 - 2023-05-04
%C Austin, TX, USA
%X Questions on class cardinality comparisons are quite tricky to answer and<br>come with its own challenges. They require some kind of reasoning since web<br>documents and knowledge bases, indispensable sources of information, rarely<br>store direct answers to questions, such as, ``Are there more astronauts or<br>Physics Nobel Laureates?'' We tackle questions on class cardinality comparison<br>by tapping into three sources for absolute cardinalities as well as the<br>cardinalities of orthogonal subgroups of the classes. We propose novel<br>techniques for aggregating signals with partial coverage for more reliable<br>estimates and evaluate them on a dataset of 4005 class pairs, achieving an<br>accuracy of 83.7%.<br>
%K Computer Science, Information Retrieval, cs.IR,Computer Science, Artificial Intelligence, cs.AI
%B The ACM Web Conference 2023
%E Ding, Ying; Tang, Jie; Sequeda, Juan; Aroyo, Lora; Castillo, Carlos; Houben, Geert-Jan
%P 318 - 321
%I ACM
%@ 978-1-4503-9419-2
[22]
G. H. Torbati, A. Tigunova, and G. Weikum, “Unveiling Challenging Cases in Text-based Recommender Systems,” in Perspectives on the Evaluation of Recommender Systems 2023, Singapore, Singapore, 2023.
Export
BibTeX
@inproceedings{Torbati_PERSPECTIVES23,
TITLE = {Unveiling Challenging Cases in Text-based Recommender Systems},
AUTHOR = {Torbati, Ghazaleh Haratinezhad and Tigunova, Anna and Weikum, Gerhard},
LANGUAGE = {eng},
ISSN = {1613-0073},
URL = {https://ceur-ws.org/Vol-3476/paper5.pdf; urn:nbn:de:0074-3476-4},
PUBLISHER = {CEUR-WS.org},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {Perspectives on the Evaluation of Recommender Systems 2023},
EDITOR = {Said, Alain and Zangerle, Eva and Bauer, Christine},
SERIES = {CEUR Workshop Proceedings},
VOLUME = {3476},
ADDRESS = {Singapore, Singapore},
}
Endnote
%0 Conference Proceedings
%A Torbati, Ghazaleh Haratinezhad
%A Tigunova, Anna
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Unveiling Challenging Cases in Text-based Recommender Systems :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-AEBA-E
%U https://ceur-ws.org/Vol-3476/paper5.pdf
%D 2023
%B 3rd Workshop Perspectives on the Evaluation of Recommender Systems
%Z date of event: 2023-09-19 - 2023-09-19
%C Singapore, Singapore
%B Perspectives on the Evaluation of Recommender Systems 2023
%E Said, Alain; Zangerle, Eva; Bauer, Christine
%I CEUR-WS.org
%B CEUR Workshop Proceedings
%N 3476
%@ false
[23]
A. Varde, D. Karthikeyan, and W. Wang, “Facilitating COVID Recognition from X-Rays with Computer Vision Models and Transfer Learning,” Multimedia Tools and Applications, 2023.
Export
BibTeX
@article{Varde23,
TITLE = {Facilitating {COVID} Recognition from {X}-Rays with Computer Vision Models and Transfer Learning},
AUTHOR = {Varde, Aparna and Karthikeyan, Divydharshini and Wang, Weitian},
LANGUAGE = {eng},
ISSN = {1380-7501},
DOI = {10.1007/s11042-023-15744-9},
PUBLISHER = {Springer Nature},
ADDRESS = {New York, NY},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
JOURNAL = {Multimedia Tools and Applications},
}
Endnote
%0 Journal Article
%A Varde, Aparna
%A Karthikeyan, Divydharshini
%A Wang, Weitian
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
External Organizations
%T Facilitating COVID Recognition from X-Rays with Computer Vision Models and Transfer Learning :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-578B-5
%R 10.1007/s11042-023-15744-9
%7 2023
%D 2023
%J Multimedia Tools and Applications
%I Springer Nature
%C New York, NY
%@ false
[24]
B. Veseli, S. Singhania, S. Razniewski, and G. Weikum, “Evaluating Language Models for Knowledge Base Completion,” in The Semantic Web (ESWC 2023), Hersonissos, Greece. (Accepted/in press)
Export
BibTeX
@inproceedings{Veseli_ESWC23,
TITLE = {Evaluating Language Models for Knowledge Base Completion},
AUTHOR = {Veseli, Blerta and Singhania, Sneha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
PUBLISHER = {Springer},
YEAR = {2023},
PUBLREMARK = {Accepted},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {The Semantic Web (ESWC 2023)},
SERIES = {Lecture Notes in Computer Science},
ADDRESS = {Hersonissos, Greece},
}
Endnote
%0 Conference Proceedings
%A Veseli, Blerta
%A Singhania, Sneha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Evaluating Language Models for Knowledge Base Completion :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-DC39-D
%D 2023
%B The European Semantic Web Conference
%Z date of event: 2023-05-28 - 2023-06-01
%C Hersonissos, Greece
%B The Semantic Web
%I Springer
%B Lecture Notes in Computer Science
[25]
B. Veseli, S. Singhania, S. Razniewski, and G. Weikum, “Evaluating Language Models for Knowledge Base Completion,” 2023. [Online]. Available: https://arxiv.org/abs/2303.11082. (arXiv: 2303.11082)
Abstract
Structured knowledge bases (KBs) are a foundation of many intelligent<br>applications, yet are notoriously incomplete. Language models (LMs) have<br>recently been proposed for unsupervised knowledge base completion (KBC), yet,<br>despite encouraging initial results, questions regarding their suitability<br>remain open. Existing evaluations often fall short because they only evaluate<br>on popular subjects, or sample already existing facts from KBs. In this work,<br>we introduce a novel, more challenging benchmark dataset, and a methodology<br>tailored for a realistic assessment of the KBC potential of LMs. For automated<br>assessment, we curate a dataset called WD-KNOWN, which provides an unbiased<br>random sample of Wikidata, containing over 3.9 million facts. In a second step,<br>we perform a human evaluation on predictions that are not yet in the KB, as<br>only this provides real insights into the added value over existing KBs. Our<br>key finding is that biases in dataset conception of previous benchmarks lead to<br>a systematic overestimate of LM performance for KBC. However, our results also<br>reveal strong areas of LMs. We could, for example, perform a significant<br>completion of Wikidata on the relations nativeLanguage, by a factor of ~21<br>(from 260k to 5.8M) at 82% precision, usedLanguage, by a factor of ~2.1 (from<br>2.1M to 6.6M) at 82% precision, and citizenOf by a factor of ~0.3 (from 4.2M to<br>5.3M) at 90% precision. Moreover, we find that LMs possess surprisingly strong<br>generalization capabilities: even on relations where most facts were not<br>directly observed in LM training, prediction quality can be high.<br>
Export
BibTeX
@online{,
TITLE = {Evaluating Language Models for Knowledge Base Completion},
AUTHOR = {Veseli, Blerta and Singhania, Sneha and Razniewski, Simon and Weikum, Gerhard},
LANGUAGE = {eng},
URL = {https://arxiv.org/abs/2303.11082},
EPRINT = {2303.11082},
EPRINTTYPE = {arXiv},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
ABSTRACT = {Structured knowledge bases (KBs) are a foundation of many intelligent<br>applications, yet are notoriously incomplete. Language models (LMs) have<br>recently been proposed for unsupervised knowledge base completion (KBC), yet,<br>despite encouraging initial results, questions regarding their suitability<br>remain open. Existing evaluations often fall short because they only evaluate<br>on popular subjects, or sample already existing facts from KBs. In this work,<br>we introduce a novel, more challenging benchmark dataset, and a methodology<br>tailored for a realistic assessment of the KBC potential of LMs. For automated<br>assessment, we curate a dataset called WD-KNOWN, which provides an unbiased<br>random sample of Wikidata, containing over 3.9 million facts. In a second step,<br>we perform a human evaluation on predictions that are not yet in the KB, as<br>only this provides real insights into the added value over existing KBs. Our<br>key finding is that biases in dataset conception of previous benchmarks lead to<br>a systematic overestimate of LM performance for KBC. However, our results also<br>reveal strong areas of LMs. We could, for example, perform a significant<br>completion of Wikidata on the relations nativeLanguage, by a factor of ~21<br>(from 260k to 5.8M) at 82% precision, usedLanguage, by a factor of ~2.1 (from<br>2.1M to 6.6M) at 82% precision, and citizenOf by a factor of ~0.3 (from 4.2M to<br>5.3M) at 90% precision. Moreover, we find that LMs possess surprisingly strong<br>generalization capabilities: even on relations where most facts were not<br>directly observed in LM training, prediction quality can be high.<br>},
}
Endnote
%0 Report
%A Veseli, Blerta
%A Singhania, Sneha
%A Razniewski, Simon
%A Weikum, Gerhard
%+ Databases and Information Systems, MPI for Informatics, Max Planck Society
Databases and Information Systems, MPI for Informatics, Max Planck Society
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Evaluating Language Models for Knowledge Base Completion :
%G eng
%U http://hdl.handle.net/21.11116/0000-000C-D3CD-F
%U https://arxiv.org/abs/2303.11082
%D 2023
%X Structured knowledge bases (KBs) are a foundation of many intelligent<br>applications, yet are notoriously incomplete. Language models (LMs) have<br>recently been proposed for unsupervised knowledge base completion (KBC), yet,<br>despite encouraging initial results, questions regarding their suitability<br>remain open. Existing evaluations often fall short because they only evaluate<br>on popular subjects, or sample already existing facts from KBs. In this work,<br>we introduce a novel, more challenging benchmark dataset, and a methodology<br>tailored for a realistic assessment of the KBC potential of LMs. For automated<br>assessment, we curate a dataset called WD-KNOWN, which provides an unbiased<br>random sample of Wikidata, containing over 3.9 million facts. In a second step,<br>we perform a human evaluation on predictions that are not yet in the KB, as<br>only this provides real insights into the added value over existing KBs. Our<br>key finding is that biases in dataset conception of previous benchmarks lead to<br>a systematic overestimate of LM performance for KBC. However, our results also<br>reveal strong areas of LMs. We could, for example, perform a significant<br>completion of Wikidata on the relations nativeLanguage, by a factor of ~21<br>(from 260k to 5.8M) at 82% precision, usedLanguage, by a factor of ~2.1 (from<br>2.1M to 6.6M) at 82% precision, and citizenOf by a factor of ~0.3 (from 4.2M to<br>5.3M) at 90% precision. Moreover, we find that LMs possess surprisingly strong<br>generalization capabilities: even on relations where most facts were not<br>directly observed in LM training, prediction quality can be high.<br>
%K Computer Science, Computation and Language, cs.CL,Computer Science, Artificial Intelligence, cs.AI
%U https://github.com/bveseli/LMsForKBC
[26]
M. Zhang, P. Mundra, C. Chikweze, F. Nargesian, and G. Weikum, “Approximate Query Answering over Open Data,” in HILDA 2023, Workshop on Human-In-the-Loop Data Analytics, Seattle, WA, USA, 2023.
Export
BibTeX
@inproceedings{Zhang_HILDA23,
TITLE = {Approximate Query Answering over Open Data},
AUTHOR = {Zhang, Mengqi and Mundra, Pranay and Chikweze, Chukwubuikem and Nargesian, Fatemeh and Weikum, Gerhard},
LANGUAGE = {eng},
ISBN = {979-8-4007-0216-7},
DOI = {10.1145/3597465.3605227},
PUBLISHER = {ACM},
YEAR = {2023},
MARGINALMARK = {$\bullet$},
BOOKTITLE = {HILDA 2023, Workshop on Human-In-the-Loop Data Analytics},
PAGES = {1--3},
EID = {11},
ADDRESS = {Seattle, WA, USA},
}
Endnote
%0 Conference Proceedings
%A Zhang, Mengqi
%A Mundra, Pranay
%A Chikweze, Chukwubuikem
%A Nargesian, Fatemeh
%A Weikum, Gerhard
%+ External Organizations
External Organizations
External Organizations
External Organizations
Databases and Information Systems, MPI for Informatics, Max Planck Society
%T Approximate Query Answering over Open Data :
%G eng
%U http://hdl.handle.net/21.11116/0000-000D-941D-C
%R 10.1145/3597465.3605227
%D 2023
%B Workshop on Human-In-the-Loop Data Analytics
%Z date of event: 2023-06-18 - 2023-06-18
%C Seattle, WA, USA
%B HILDA 2023
%P 1 - 3
%Z sequence number: 11
%I ACM
%@ 979-8-4007-0216-7