Last Year

2020
[1]
H. Arnaout, S. Razniewski, and G. Weikum, “Negative Statements Considered Useful,” 2020. [Online]. Available: http://arxiv.org/abs/2001.04425. (arXiv: 2001.04425)
Abstract
Knowledge bases (KBs), pragmatic collections of knowledge about notable entities, are an important asset in applications such as search, question answering and dialogue. Rooted in a long tradition in knowledge representation, all popular KBs only store positive information, while they abstain from taking any stance towards statements not contained in them. In this paper, we make the case for explicitly stating interesting statements which are not true. Negative statements would be important to overcome current limitations of question answering, yet due to their potential abundance, any effort towards compiling them needs a tight coupling with ranking. We introduce two approaches towards compiling negative statements. (i) In peer-based statistical inferences, we compare entities with highly related entities in order to derive potential negative statements, which we then rank using supervised and unsupervised features. (ii) In query-log-based text extraction, we use a pattern-based approach for harvesting search engine query logs. Experimental results show that both approaches hold promising and complementary potential. Along with this paper, we publish the first datasets on interesting negative information, containing over 1.1M statements for 100K popular Wikidata entities.
Export
BibTeX
@online{Arnaout_arXiv2001.04425, TITLE = {Negative Statements Considered Useful}, AUTHOR = {Arnaout, Hiba and Razniewski, Simon and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/2001.04425}, EPRINT = {2001.04425}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Knowledge bases (KBs), pragmatic collections of knowledge about notable entities, are an important asset in applications such as search, question answering and dialogue. Rooted in a long tradition in knowledge representation, all popular KBs only store positive information, while they abstain from taking any stance towards statements not contained in them. In this paper, we make the case for explicitly stating interesting statements which are not true. Negative statements would be important to overcome current limitations of question answering, yet due to their potential abundance, any effort towards compiling them needs a tight coupling with ranking. We introduce two approaches towards compiling negative statements. (i) In peer-based statistical inferences, we compare entities with highly related entities in order to derive potential negative statements, which we then rank using supervised and unsupervised features. (ii) In query-log-based text extraction, we use a pattern-based approach for harvesting search engine query logs. Experimental results show that both approaches hold promising and complementary potential. Along with this paper, we publish the first datasets on interesting negative information, containing over 1.1M statements for 100K popular Wikidata entities.}, }
Endnote
%0 Report %A Arnaout, Hiba %A Razniewski, Simon %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Negative Statements Considered Useful : %G eng %U http://hdl.handle.net/21.11116/0000-0005-821F-6 %U http://arxiv.org/abs/2001.04425 %D 2020 %X Knowledge bases (KBs), pragmatic collections of knowledge about notable entities, are an important asset in applications such as search, question answering and dialogue. Rooted in a long tradition in knowledge representation, all popular KBs only store positive information, while they abstain from taking any stance towards statements not contained in them. In this paper, we make the case for explicitly stating interesting statements which are not true. Negative statements would be important to overcome current limitations of question answering, yet due to their potential abundance, any effort towards compiling them needs a tight coupling with ranking. We introduce two approaches towards compiling negative statements. (i) In peer-based statistical inferences, we compare entities with highly related entities in order to derive potential negative statements, which we then rank using supervised and unsupervised features. (ii) In query-log-based text extraction, we use a pattern-based approach for harvesting search engine query logs. Experimental results show that both approaches hold promising and complementary potential. Along with this paper, we publish the first datasets on interesting negative information, containing over 1.1M statements for 100K popular Wikidata entities. %K Computer Science, Information Retrieval, cs.IR,Computer Science, Artificial Intelligence, cs.AI,Computer Science, Computation and Language, cs.CL,Computer Science, Databases, cs.DB
[2]
H. Arnaout, S. Razniewski, and G. Weikum, “Enriching Knowledge Bases with Interesting Negative Statements,” in Automated Knowledge Base Construction (AKBC 2020), Virtual Conference, 2020.
Export
BibTeX
@inproceedings{Arnaout_AKBC2020, TITLE = {Enriching Knowledge Bases with Interesting Negative Statements}, AUTHOR = {Arnaout, Hiba and Razniewski, Simon and Weikum, Gerhard}, LANGUAGE = {eng}, DOI = {10.24432/C5101K}, PUBLISHER = {OpenReview}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Automated Knowledge Base Construction (AKBC 2020)}, ADDRESS = {Virtual Conference}, }
Endnote
%0 Conference Proceedings %A Arnaout, Hiba %A Razniewski, Simon %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Enriching Knowledge Bases with Interesting Negative Statements : %G eng %U http://hdl.handle.net/21.11116/0000-0007-EBC9-E %R 10.24432/C5101K %D 2020 %B 2nd Conference on Automated Knowledge Base Construction %Z date of event: 2020-06-22 - 2020-06-24 %C Virtual Conference %B Automated Knowledge Base Construction %I OpenReview %U https://openreview.net/forum?id=pSLmyZKaS
[3]
K. Balog, V. Setty, C. Lioma, Y. Liu, M. Zhang, and K. Berberich, Eds., ICTIR ’20. ACM, 2020.
Export
BibTeX
@proceedings{Balog_ICTIR20, TITLE = {ICTIR '20, ACM SIGIR International Conference on Theory of Information Retrieval}, EDITOR = {Balog, Krisztian and Setty, Vinay and Lioma, Christina and Liu, Yiqun and Zhang, Min and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-1-4503-8067-6}, DOI = {10.1145/3409256}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ADDRESS = {Virtual Event, Norway}, }
Endnote
%0 Conference Proceedings %E Balog, Krisztian %E Setty, Vinay %E Lioma, Christina %E Liu, Yiqun %E Zhang, Min %E Berberich, Klaus %+ External Organizations External Organizations External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T ICTIR '20 : Proceedings of the 2020 ACM SIGIR International Conference on Theory of Information Retrieval %G eng %U http://hdl.handle.net/21.11116/0000-0008-041D-4 %R 10.1145/3409256 %@ 978-1-4503-8067-6 %I ACM %D 2020 %B ACM SIGIR International Conference on Theory of Information Retrieval %Z date of event: 2020-09-14 - 2020-09-17 %D 2020 %C Virtual Event, Norway
[4]
C. Belth, X. Zheng, J. Vreeken, and D. Koutra, “What is Normal, What is Strange, and What is Missing in a Knowledge Graph: Unified Characterization via Inductive Summarization,” in Proceedings of The World Wide Web Conference (WWW 2020), Taipei, Taiwan, 2020.
Export
BibTeX
@inproceedings{belth:20:kgist, TITLE = {What is Normal, What is Strange, and What is Missing in a Knowledge Graph: {U}nified Characterization via Inductive Summarization}, AUTHOR = {Belth, Caleb and Zheng, Xinyi and Vreeken, Jilles and Koutra, Danai}, LANGUAGE = {eng}, ISBN = {978-1-4503-7023-3}, DOI = {10.1145/3366423.3380189}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of The World Wide Web Conference (WWW 2020)}, EDITOR = {Huang, Yennun and King, Irwin and Liu, Tie-Yan and van Steen, Maarten}, PAGES = {1115--1126}, ADDRESS = {Taipei, Taiwan}, }
Endnote
%0 Conference Proceedings %A Belth, Caleb %A Zheng, Xinyi %A Vreeken, Jilles %A Koutra, Danai %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T What is Normal, What is Strange, and What is Missing in a Knowledge Graph: Unified Characterization via Inductive Summarization : %G eng %U http://hdl.handle.net/21.11116/0000-0008-253F-9 %R 10.1145/3366423.3380189 %D 2020 %B The World Wide Web Conference %Z date of event: 2020-04-20 - 2020-04-24 %C Taipei, Taiwan %B Proceedings of The World Wide Web Conference %E Huang, Yennun; King, Irwin; Liu, Tie-Yan; van Steen, Maarten %P 1115 - 1126 %I ACM %@ 978-1-4503-7023-3
[5]
J. J. Benjamin, C. Müller-Birn, and S. Razniewski, “Examining the Impact of Algorithm Awareness on Wikidata’s Recommender System Recoin,” 2020. [Online]. Available: https://arxiv.org/abs/2009.09049. (arXiv: 2009.09049)
Abstract
The global infrastructure of the Web, designed as an open and transparent system, has a significant impact on our society. However, algorithmic systems of corporate entities that neglect those principles increasingly populated the Web. Typical representatives of these algorithmic systems are recommender systems that influence our society both on a scale of global politics and during mundane shopping decisions. Recently, such recommender systems have come under critique for how they may strengthen existing or even generate new kinds of biases. To this end, designers and engineers are increasingly urged to make the functioning and purpose of recommender systems more transparent. Our research relates to the discourse of algorithm awareness, that reconsiders the role of algorithm visibility in interface design. We conducted online experiments with 105 participants using MTurk for the recommender system Recoin, a gadget for Wikidata. In these experiments, we presented users with one of a set of three different designs of Recoin's user interface, each of them exhibiting a varying degree of explainability and interactivity. Our findings include a positive correlation between comprehension of and trust in an algorithmic system in our interactive redesign. However, our results are not conclusive yet, and suggest that the measures of comprehension, fairness, accuracy and trust are not yet exhaustive for the empirical study of algorithm awareness. Our qualitative insights provide a first indication for further measures. Our study participants, for example, were less concerned with the details of understanding an algorithmic calculation than with who or what is judging the result of the algorithm.
Export
BibTeX
@online{Benjamin2009.09049, TITLE = {Examining the Impact of Algorithm Awareness on {W}ikidata's Recommender System Recoin}, AUTHOR = {Benjamin, Jesse Josua and M{\"u}ller-Birn, Claudia and Razniewski, Simon}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2009.09049}, EPRINT = {2009.09049}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {The global infrastructure of the Web, designed as an open and transparent system, has a significant impact on our society. However, algorithmic systems of corporate entities that neglect those principles increasingly populated the Web. Typical representatives of these algorithmic systems are recommender systems that influence our society both on a scale of global politics and during mundane shopping decisions. Recently, such recommender systems have come under critique for how they may strengthen existing or even generate new kinds of biases. To this end, designers and engineers are increasingly urged to make the functioning and purpose of recommender systems more transparent. Our research relates to the discourse of algorithm awareness, that reconsiders the role of algorithm visibility in interface design. We conducted online experiments with 105 participants using MTurk for the recommender system Recoin, a gadget for Wikidata. In these experiments, we presented users with one of a set of three different designs of Recoin's user interface, each of them exhibiting a varying degree of explainability and interactivity. Our findings include a positive correlation between comprehension of and trust in an algorithmic system in our interactive redesign. However, our results are not conclusive yet, and suggest that the measures of comprehension, fairness, accuracy and trust are not yet exhaustive for the empirical study of algorithm awareness. Our qualitative insights provide a first indication for further measures. Our study participants, for example, were less concerned with the details of understanding an algorithmic calculation than with who or what is judging the result of the algorithm.}, }
Endnote
%0 Report %A Benjamin, Jesse Josua %A Müller-Birn, Claudia %A Razniewski, Simon %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Examining the Impact of Algorithm Awareness on Wikidata's Recommender System Recoin : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0661-4 %U https://arxiv.org/abs/2009.09049 %D 2020 %X The global infrastructure of the Web, designed as an open and transparent system, has a significant impact on our society. However, algorithmic systems of corporate entities that neglect those principles increasingly populated the Web. Typical representatives of these algorithmic systems are recommender systems that influence our society both on a scale of global politics and during mundane shopping decisions. Recently, such recommender systems have come under critique for how they may strengthen existing or even generate new kinds of biases. To this end, designers and engineers are increasingly urged to make the functioning and purpose of recommender systems more transparent. Our research relates to the discourse of algorithm awareness, that reconsiders the role of algorithm visibility in interface design. We conducted online experiments with 105 participants using MTurk for the recommender system Recoin, a gadget for Wikidata. In these experiments, we presented users with one of a set of three different designs of Recoin's user interface, each of them exhibiting a varying degree of explainability and interactivity. Our findings include a positive correlation between comprehension of and trust in an algorithmic system in our interactive redesign. However, our results are not conclusive yet, and suggest that the measures of comprehension, fairness, accuracy and trust are not yet exhaustive for the empirical study of algorithm awareness. Our qualitative insights provide a first indication for further measures. Our study participants, for example, were less concerned with the details of understanding an algorithmic calculation than with who or what is judging the result of the algorithm. %K Computer Science, Human-Computer Interaction, cs.HC,Computer Science, Computers and Society, cs.CY,Computer Science, Digital Libraries, cs.DL
[6]
A. Bhattacharya, S. Natarajan, and R. Saha Roy, Eds., Proceedings of the 7th ACM IKDD CoDS and 25th COMAD. ACM, 2020.
Export
BibTeX
@proceedings{SahaRoy_CoDSCOMAD20, TITLE = {Proceedings of the 7th ACM IKDD CoDS and 25th COMAD (CoDS-COMAD 2020)}, EDITOR = {Bhattacharya, Arnab and Natarajan, Sriaam and Saha Roy, Rishiraj}, LANGUAGE = {eng}, ISBN = {978-1-4503-7738-6}, DOI = {10.1145/3371158}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ADDRESS = {Hyderabad, India}, }
Endnote
%0 Conference Proceedings %E Bhattacharya, Arnab %E Natarajan, Sriaam %E Saha Roy, Rishiraj %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Proceedings of the 7th ACM IKDD CoDS and 25th COMAD : %G eng %U http://hdl.handle.net/21.11116/0000-0008-09CF-6 %R 10.1145/3371158 %@ 978-1-4503-7738-6 %I ACM %D 2020 %B ACM India Joint International Conferenceon Data Science and Management of Data %Z date of event: 2020-01-05 - 2020-01-07 %D 2020 %C Hyderabad, India
[7]
A. J. Biega, J. Schmidt, and R. Saha Roy, “Towards Query Logs for Privacy Studies: On Deriving Search Queries from Questions,” in Advances in Information Retrieval (ECIR 2020), Lisbon, Portugal, 2020.
Export
BibTeX
@inproceedings{Biega_ECIR2020, TITLE = {Towards Query Logs for Privacy Studies: {O}n Deriving Search Queries from Questions}, AUTHOR = {Biega, Asia J. and Schmidt, Jana and Saha Roy, Rishiraj}, LANGUAGE = {eng}, ISBN = {978-3-030-45441-8}, DOI = {10.1007/978-3-030-45442-5_14}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Advances in Information Retrieval (ECIR 2020)}, EDITOR = {Jose, Joemon M. and Yilmaz, Emine and Magalh{\~a}es, Jo{\~a}o and Castells, Pablo and Ferro, Nicola and Silva, M{\'a}rio J. and Martins, Fl{\'a}vio}, PAGES = {110--117}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12036}, ADDRESS = {Lisbon, Portugal}, }
Endnote
%0 Conference Proceedings %A Biega, Asia J. %A Schmidt, Jana %A Saha Roy, Rishiraj %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Towards Query Logs for Privacy Studies: On Deriving Search Queries from Questions : %G eng %U http://hdl.handle.net/21.11116/0000-0008-02FD-9 %R 10.1007/978-3-030-45442-5_14 %D 2020 %B 42nd European Conference on IR Research %Z date of event: 2020-04-14 - 2020-04-17 %C Lisbon, Portugal %B Advances in Information Retrieval %E Jose, Joemon M.; Yilmaz, Emine; Magalhães, João; Castells, Pablo; Ferro, Nicola; Silva, Mário J.; Martins, Flávio %P 110 - 117 %I Springer %@ 978-3-030-45441-8 %B Lecture Notes in Computer Science %N 12036
[8]
A. J. Biega, J. Schmidt, and R. Saha Roy, “Towards Query Logs for Privacy Studies: On Deriving Search Queries from Questions,” 2020. [Online]. Available: https://arxiv.org/abs/2004.02023. (arXiv: 2004.02023)
Abstract
Translating verbose information needs into crisp search queries is a phenomenon that is ubiquitous but hardly understood. Insights into this process could be valuable in several applications, including synthesizing large privacy-friendly query logs from public Web sources which are readily available to the academic research community. In this work, we take a step towards understanding query formulation by tapping into the rich potential of community question answering (CQA) forums. Specifically, we sample natural language (NL) questions spanning diverse themes from the Stack Exchange platform, and conduct a large-scale conversion experiment where crowdworkers submit search queries they would use when looking for equivalent information. We provide a careful analysis of this data, accounting for possible sources of bias during conversion, along with insights into user-specific linguistic patterns and search behaviors. We release a dataset of 7,000 question-query pairs from this study to facilitate further research on query understanding.
Export
BibTeX
@online{Biega2004.02023, TITLE = {Towards Query Logs for Privacy Studies: On Deriving Search Queries from Questions}, AUTHOR = {Biega, Asia J. and Schmidt, Jana and Saha Roy, Rishiraj}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2004.02023}, EPRINT = {2004.02023}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Translating verbose information needs into crisp search queries is a phenomenon that is ubiquitous but hardly understood. Insights into this process could be valuable in several applications, including synthesizing large privacy-friendly query logs from public Web sources which are readily available to the academic research community. In this work, we take a step towards understanding query formulation by tapping into the rich potential of community question answering (CQA) forums. Specifically, we sample natural language (NL) questions spanning diverse themes from the Stack Exchange platform, and conduct a large-scale conversion experiment where crowdworkers submit search queries they would use when looking for equivalent information. We provide a careful analysis of this data, accounting for possible sources of bias during conversion, along with insights into user-specific linguistic patterns and search behaviors. We release a dataset of 7,000 question-query pairs from this study to facilitate further research on query understanding.}, }
Endnote
%0 Report %A Biega, Asia J. %A Schmidt, Jana %A Saha Roy, Rishiraj %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Towards Query Logs for Privacy Studies: On Deriving Search Queries from Questions : %G eng %U http://hdl.handle.net/21.11116/0000-0008-09C7-E %U https://arxiv.org/abs/2004.02023 %D 2020 %X Translating verbose information needs into crisp search queries is a phenomenon that is ubiquitous but hardly understood. Insights into this process could be valuable in several applications, including synthesizing large privacy-friendly query logs from public Web sources which are readily available to the academic research community. In this work, we take a step towards understanding query formulation by tapping into the rich potential of community question answering (CQA) forums. Specifically, we sample natural language (NL) questions spanning diverse themes from the Stack Exchange platform, and conduct a large-scale conversion experiment where crowdworkers submit search queries they would use when looking for equivalent information. We provide a careful analysis of this data, accounting for possible sources of bias during conversion, along with insights into user-specific linguistic patterns and search behaviors. We release a dataset of 7,000 question-query pairs from this study to facilitate further research on query understanding. %K Computer Science, Information Retrieval, cs.IR
[9]
K. Budhathoki, “Causal Inference on Discrete Data,” Universität des Saarlandes, Saarbrücken, 2020.
Export
BibTeX
@phdthesis{BudDiss_2020, TITLE = {Causal Inference on Discrete Data}, AUTHOR = {Budhathoki, Kailash}, LANGUAGE = {eng}, URL = {urn:nbn:de:bsz:291--ds-329528}, DOI = {10.22028/D291-32952}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, }
Endnote
%0 Thesis %A Budhathoki, Kailash %Y Vreeken, Jilles %A referee: Weikum, Gerhard %A referee: Heskes, Tom %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Causal Inference on Discrete Data : %G eng %U http://hdl.handle.net/21.11116/0000-0007-FE73-A %R 10.22028/D291-32952 %U urn:nbn:de:bsz:291--ds-329528 %I Universität des Saarlandes %C Saarbrücken %D 2020 %P 171 p. %V phd %9 phd %U https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/30501
[10]
D. Calvanes, J. Corman, D. Lanti, and S. Razniewski, “Counting Query Answers over a DL-Lite Knowledge Base (extended version),” 2020. [Online]. Available: https://arxiv.org/abs/2005.05886. (arXiv: 2005.05886)
Abstract
Counting answers to a query is an operation supported by virtually all database management systems. In this paper we focus on counting answers over a Knowledge Base (KB), which may be viewed as a database enriched with background knowledge about the domain under consideration. In particular, we place our work in the context of Ontology-Mediated Query Answering/Ontology-based Data Access (OMQA/OBDA), where the language used for the ontology is a member of the DL-Lite family and the data is a (usually virtual) set of assertions. We study the data complexity of query answering, for different members of the DL-Lite family that include number restrictions, and for variants of conjunctive queries with counting that differ with respect to their shape (connected, branching, rooted). We improve upon existing results by providing a PTIME and coNP lower bounds, and upper bounds in PTIME and LOGSPACE. For the latter case, we define a novel query rewriting technique into first-order logic with counting.
Export
BibTeX
@online{Razniewskiarxiv2020, TITLE = {Counting Query Answers over a {DL}-Lite Knowledge Base (extended version)}, AUTHOR = {Calvanes, Diego and Corman, Julien and Lanti, Davide and Razniewski, Simon}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2005.05886}, EPRINT = {2005.05886}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Counting answers to a query is an operation supported by virtually all database management systems. In this paper we focus on counting answers over a Knowledge Base (KB), which may be viewed as a database enriched with background knowledge about the domain under consideration. In particular, we place our work in the context of Ontology-Mediated Query Answering/Ontology-based Data Access (OMQA/OBDA), where the language used for the ontology is a member of the DL-Lite family and the data is a (usually virtual) set of assertions. We study the data complexity of query answering, for different members of the DL-Lite family that include number restrictions, and for variants of conjunctive queries with counting that differ with respect to their shape (connected, branching, rooted). We improve upon existing results by providing a PTIME and coNP lower bounds, and upper bounds in PTIME and LOGSPACE. For the latter case, we define a novel query rewriting technique into first-order logic with counting.}, }
Endnote
%0 Report %A Calvanes, Diego %A Corman, Julien %A Lanti, Davide %A Razniewski, Simon %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Counting Query Answers over a DL-Lite Knowledge Base (extended version) : %G eng %U http://hdl.handle.net/21.11116/0000-0007-FF5A-6 %U https://arxiv.org/abs/2005.05886 %D 2020 %X Counting answers to a query is an operation supported by virtually all database management systems. In this paper we focus on counting answers over a Knowledge Base (KB), which may be viewed as a database enriched with background knowledge about the domain under consideration. In particular, we place our work in the context of Ontology-Mediated Query Answering/Ontology-based Data Access (OMQA/OBDA), where the language used for the ontology is a member of the DL-Lite family and the data is a (usually virtual) set of assertions. We study the data complexity of query answering, for different members of the DL-Lite family that include number restrictions, and for variants of conjunctive queries with counting that differ with respect to their shape (connected, branching, rooted). We improve upon existing results by providing a PTIME and coNP lower bounds, and upper bounds in PTIME and LOGSPACE. For the latter case, we define a novel query rewriting technique into first-order logic with counting. %K Computer Science, Databases, cs.DB,Computer Science, Artificial Intelligence, cs.AI
[11]
D. Calvanes, J. Corman, D. Lanti, and S. Razniewski, “Counting Query Answers over a DL-Lite Knowledge Base,” in Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence (IJCAI 2020), Yokohama, Japan (Virtual), 2020.
Abstract
Counting answers to a query is an operation supported by virtually all database management systems. In this paper we focus on counting answers over a Knowledge Base (KB), which may be viewed as a database enriched with background knowledge about the domain under consideration. In particular, we place our work in the context of Ontology-Mediated Query Answering/Ontology-based Data Access (OMQA/OBDA), where the language used for the ontology is a member of the DL-Lite family and the data is a (usually virtual) set of assertions. We study the data complexity of query answering, for different members of the DL-Lite family that include number restrictions, and for variants of conjunctive queries with counting that differ with respect to their shape (connected, branching, rooted). We improve upon existing results by providing a PTIME and coNP lower bounds, and upper bounds in PTIME and LOGSPACE. For the latter case, we define a novel query rewriting technique into first-order logic with counting.
Export
BibTeX
@inproceedings{RazniewskiIJCAI2020, TITLE = {Counting Query Answers over a {$DL-Lite$} Knowledge Base}, AUTHOR = {Calvanes, Diego and Corman, Julien and Lanti, Davide and Razniewski, Simon}, LANGUAGE = {eng}, ISBN = {978-0-9992411-6-5}, DOI = {10.24963/ijcai.2020/230}, PUBLISHER = {IJCAI}, YEAR = {2021}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Counting answers to a query is an operation supported by virtually all database management systems. In this paper we focus on counting answers over a Knowledge Base (KB), which may be viewed as a database enriched with background knowledge about the domain under consideration. In particular, we place our work in the context of Ontology-Mediated Query Answering/Ontology-based Data Access (OMQA/OBDA), where the language used for the ontology is a member of the DL-Lite family and the data is a (usually virtual) set of assertions. We study the data complexity of query answering, for different members of the DL-Lite family that include number restrictions, and for variants of conjunctive queries with counting that differ with respect to their shape (connected, branching, rooted). We improve upon existing results by providing a PTIME and coNP lower bounds, and upper bounds in PTIME and LOGSPACE. For the latter case, we define a novel query rewriting technique into first-order logic with counting.}, BOOKTITLE = {Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence (IJCAI 2020)}, EDITOR = {Bessiere, Christian}, PAGES = {1658--1666}, ADDRESS = {Yokohama, Japan (Virtual)}, }
Endnote
%0 Conference Proceedings %A Calvanes, Diego %A Corman, Julien %A Lanti, Davide %A Razniewski, Simon %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Counting Query Answers over a DL-Lite Knowledge Base : %G eng %U http://hdl.handle.net/21.11116/0000-0008-009E-6 %R 10.24963/ijcai.2020/230 %D 2020 %B Twenty-Ninth International Joint Conference on Artificial Intelligence %Z date of event: 2021-01-07 - 2021-01-15 %C Yokohama, Japan (Virtual) %X Counting answers to a query is an operation supported by virtually all database management systems. In this paper we focus on counting answers over a Knowledge Base (KB), which may be viewed as a database enriched with background knowledge about the domain under consideration. In particular, we place our work in the context of Ontology-Mediated Query Answering/Ontology-based Data Access (OMQA/OBDA), where the language used for the ontology is a member of the DL-Lite family and the data is a (usually virtual) set of assertions. We study the data complexity of query answering, for different members of the DL-Lite family that include number restrictions, and for variants of conjunctive queries with counting that differ with respect to their shape (connected, branching, rooted). We improve upon existing results by providing a PTIME and coNP lower bounds, and upper bounds in PTIME and LOGSPACE. For the latter case, we define a novel query rewriting technique into first-order logic with counting. %K Computer Science, Databases, cs.DB,Computer Science, Artificial Intelligence, cs.AI %B Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence %E Bessiere, Christian %P 1658 - 1666 %I IJCAI %@ 978-0-9992411-6-5
[12]
D. Calvanese, J. Corman, D. Lanti, and S. Razniewski, “Rewriting Count Queries over DL-Lite TBoxes with Number Restrictions,” in Proceedings of the 33rd International Workshop on Description Logics (DL 2020), Rhodes, Greece (Virtual Event), 2020.
Export
BibTeX
@inproceedings{Calvanese_DL2020, TITLE = {Rewriting Count Queries over {DL}-Lite {TBoxes} with Number Restrictions}, AUTHOR = {Calvanese, Diego and Corman, Julien and Lanti, Davide and Razniewski, Simon}, LANGUAGE = {eng}, ISSN = {1613-0073}, URL = {http://ceur-ws.org/Vol-2663/paper-7.pdf; urn:nbn:de:0074-2663-4}, PUBLISHER = {ceur-ws.org}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the 33rd International Workshop on Description Logics (DL 2020)}, EDITOR = {Borgwardt, Stefan and Meyer, Thomas}, EID = {7}, SERIES = {CEUR Workshop Proceedings}, VOLUME = {2663}, ADDRESS = {Rhodes, Greece (Virtual Event)}, }
Endnote
%0 Conference Proceedings %A Calvanese, Diego %A Corman, Julien %A Lanti, Davide %A Razniewski, Simon %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Rewriting Count Queries over DL-Lite TBoxes with Number Restrictions : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0606-B %U http://ceur-ws.org/Vol-2663/paper-7.pdf %D 2020 %B 33rd International Workshop on Description Logics %Z date of event: 2020-09-12 - 2020-09-14 %C Rhodes, Greece (Virtual Event) %B Proceedings of the 33rd International Workshop on Description Logics %E Borgwardt , Stefan; Meyer, Thomas %Z sequence number: 7 %I ceur-ws.org %B CEUR Workshop Proceedings %N 2663 %@ false
[13]
Y. Chalier, S. Razniewski, and G. Weikum, “Joint Reasoning for Multi-Faceted Commonsense Knowledge,” 2020. [Online]. Available: http://arxiv.org/abs/2001.04170. (arXiv: 2001.04170)
Abstract
Commonsense knowledge (CSK) supports a variety of AI applications, from visual understanding to chatbots. Prior works on acquiring CSK, such as ConceptNet, have compiled statements that associate concepts, like everyday objects or activities, with properties that hold for most or some instances of the concept. Each concept is treated in isolation from other concepts, and the only quantitative measure (or ranking) of properties is a confidence score that the statement is valid. This paper aims to overcome these limitations by introducing a multi-faceted model of CSK statements and methods for joint reasoning over sets of inter-related statements. Our model captures four different dimensions of CSK statements: plausibility, typicality, remarkability and salience, with scoring and ranking along each dimension. For example, hyenas drinking water is typical but not salient, whereas hyenas eating carcasses is salient. For reasoning and ranking, we develop a method with soft constraints, to couple the inference over concepts that are related in in a taxonomic hierarchy. The reasoning is cast into an integer linear programming (ILP), and we leverage the theory of reduction costs of a relaxed LP to compute informative rankings. This methodology is applied to several large CSK collections. Our evaluation shows that we can consolidate these inputs into much cleaner and more expressive knowledge. Results are available at https://dice.mpi-inf.mpg.de.
Export
BibTeX
@online{Chalier_arXiv2001.04170, TITLE = {Joint Reasoning for Multi-Faceted Commonsense Knowledge}, AUTHOR = {Chalier, Yohan and Razniewski, Simon and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/2001.04170}, EPRINT = {2001.04170}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Commonsense knowledge (CSK) supports a variety of AI applications, from visual understanding to chatbots. Prior works on acquiring CSK, such as ConceptNet, have compiled statements that associate concepts, like everyday objects or activities, with properties that hold for most or some instances of the concept. Each concept is treated in isolation from other concepts, and the only quantitative measure (or ranking) of properties is a confidence score that the statement is valid. This paper aims to overcome these limitations by introducing a multi-faceted model of CSK statements and methods for joint reasoning over sets of inter-related statements. Our model captures four different dimensions of CSK statements: plausibility, typicality, remarkability and salience, with scoring and ranking along each dimension. For example, hyenas drinking water is typical but not salient, whereas hyenas eating carcasses is salient. For reasoning and ranking, we develop a method with soft constraints, to couple the inference over concepts that are related in in a taxonomic hierarchy. The reasoning is cast into an integer linear programming (ILP), and we leverage the theory of reduction costs of a relaxed LP to compute informative rankings. This methodology is applied to several large CSK collections. Our evaluation shows that we can consolidate these inputs into much cleaner and more expressive knowledge. Results are available at https://dice.mpi-inf.mpg.de.}, }
Endnote
%0 Report %A Chalier, Yohan %A Razniewski, Simon %A Weikum, Gerhard %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Joint Reasoning for Multi-Faceted Commonsense Knowledge : %G eng %U http://hdl.handle.net/21.11116/0000-0005-8226-D %U http://arxiv.org/abs/2001.04170 %D 2020 %X Commonsense knowledge (CSK) supports a variety of AI applications, from visual understanding to chatbots. Prior works on acquiring CSK, such as ConceptNet, have compiled statements that associate concepts, like everyday objects or activities, with properties that hold for most or some instances of the concept. Each concept is treated in isolation from other concepts, and the only quantitative measure (or ranking) of properties is a confidence score that the statement is valid. This paper aims to overcome these limitations by introducing a multi-faceted model of CSK statements and methods for joint reasoning over sets of inter-related statements. Our model captures four different dimensions of CSK statements: plausibility, typicality, remarkability and salience, with scoring and ranking along each dimension. For example, hyenas drinking water is typical but not salient, whereas hyenas eating carcasses is salient. For reasoning and ranking, we develop a method with soft constraints, to couple the inference over concepts that are related in in a taxonomic hierarchy. The reasoning is cast into an integer linear programming (ILP), and we leverage the theory of reduction costs of a relaxed LP to compute informative rankings. This methodology is applied to several large CSK collections. Our evaluation shows that we can consolidate these inputs into much cleaner and more expressive knowledge. Results are available at https://dice.mpi-inf.mpg.de. %K Computer Science, Computation and Language, cs.CL,Computer Science, Artificial Intelligence, cs.AI,Computer Science, Information Retrieval, cs.IR
[14]
Y. Chalier, S. Razniewski, and G. Weikum, “Joint Reasoning for Multi-Faceted Commonsense Knowledge,” in Automated Knowledge Base Construction (AKBC 2020), Virtual Conference, 2020.
Export
BibTeX
@inproceedings{Chalier_AKBC2020, TITLE = {Joint Reasoning for Multi-Faceted Commonsense Knowledge}, AUTHOR = {Chalier, Yohan and Razniewski, Simon and Weikum, Gerhard}, LANGUAGE = {eng}, DOI = {10.24432/C58G6G}, PUBLISHER = {OpenReview}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Automated Knowledge Base Construction (AKBC 2020)}, ADDRESS = {Virtual Conference}, }
Endnote
%0 Conference Proceedings %A Chalier, Yohan %A Razniewski, Simon %A Weikum, Gerhard %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Joint Reasoning for Multi-Faceted Commonsense Knowledge : %G eng %U http://hdl.handle.net/21.11116/0000-0007-EBCF-8 %R 10.24432/C58G6G %D 2020 %B 2nd Conference on Automated Knowledge Base Construction %Z date of event: 2020-06-22 - 2020-06-24 %C Virtual Conference %B Automated Knowledge Base Construction %I OpenReview %U https://openreview.net/forum?id=QnPV72SZVt
[15]
Y. Chalier, S. Razniewski, and G. Weikum, “Dice: A Joint Reasoning Framework for Multi-Faceted Commonsense Knowledge,” in ISWC 2020 Posters, Demos, and Industry Tracks, Globally Online, 2020.
Export
BibTeX
@inproceedings{Chalier_ISCW20, TITLE = {Dice: {A} Joint Reasoning Framework for Multi-Faceted Commonsense Knowledge}, AUTHOR = {Chalier, Yohan and Razniewski, Simon and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {1613-0073}, URL = {http://ceur-ws.org/Vol-2721/paper482.pdf; urn:nbn:de:0074-2721-6}, PUBLISHER = {ceur-ws.org}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {ISWC 2020 Posters, Demos, and Industry Tracks}, EDITOR = {Taylor, Kerry and Goncalves, Rafael and Lecue, Freddy and Yan, Jun}, PAGES = {16--20}, EID = {482}, SERIES = {CEUR Workshop Proceedings}, VOLUME = {2721}, ADDRESS = {Globally Online}, }
Endnote
%0 Conference Proceedings %A Chalier, Yohan %A Razniewski, Simon %A Weikum, Gerhard %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Dice: A Joint Reasoning Framework for Multi-Faceted Commonsense Knowledge : %G eng %U http://hdl.handle.net/21.11116/0000-0007-F132-0 %U http://ceur-ws.org/Vol-2721/paper482.pdf %D 2020 %B 19th Internatinal Semantic Web Conference %Z date of event: 2020-11-01 - 2020-11-06 %C Globally Online %B ISWC 2020 Posters, Demos, and Industry Tracks %E Taylor, Kerry; Goncalves, Rafael; Lecue, Freddy; Yan, Jun %P 16 - 20 %Z sequence number: 482 %I ceur-ws.org %B CEUR Workshop Proceedings %N 2721 %@ false %U http://ceur-ws.org/Vol-2721/paper482.pdf
[16]
E. Chang, J. Caplinger, A. Marin, X. Shen, and V. Demberg, “DART: A Lightweight Quality-Suggestive Data-to-Text Annotation Tool,” in The 28th International Conference on Computational Linguistics (COLING 2020), Barcelona, Spain (Online), 2020.
Export
BibTeX
@inproceedings{chang2020dart, TITLE = {{DART}: {A} Lightweight Quality-Suggestive Data-to-Text Annotation Tool}, AUTHOR = {Chang, Ernie and Caplinger, Jeriah and Marin, Alex and Shen, Xiaoyu and Demberg, Vera}, LANGUAGE = {eng}, ISBN = {978-1-952148-28-6}, URL = {https://www.aclweb.org/anthology/2020.coling-demos.3}, DOI = {10.18653/v1/2020.coling-demos.3}, PUBLISHER = {ACL}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {The 28th International Conference on Computational Linguistics (COLING 2020)}, EDITOR = {Ptaszynski, Michal and Ziolko, Bartosz}, PAGES = {12--17}, ADDRESS = {Barcelona, Spain (Online)}, }
Endnote
%0 Conference Proceedings %A Chang, Ernie %A Caplinger, Jeriah %A Marin, Alex %A Shen, Xiaoyu %A Demberg, Vera %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T DART: A Lightweight Quality-Suggestive Data-to-Text Annotation Tool : %G eng %U http://hdl.handle.net/21.11116/0000-0008-149C-2 %U https://www.aclweb.org/anthology/2020.coling-demos.3 %R 10.18653/v1/2020.coling-demos.3 %D 2020 %B The 28th International Conferenceon Computational Linguistics %Z date of event: 2020-12-08 - 2020-12-13 %C Barcelona, Spain (Online) %B The 28th International Conference on Computational Linguistics %E Ptaszynski, Michal; Ziolko, Bartosz %P 12 - 17 %I ACL %@ 978-1-952148-28-6
[17]
C. X. Chu, S. Razniewski, and G. Weikum, “ENTYFI: Entity Typing in Fictional Texts,” in WSDM ’20, 13th International Conference on Web Search and Data Mining, Houston, TX, USA, 2020.
Export
BibTeX
@inproceedings{ChuWSDM2020, TITLE = {{ENTYFI}: {E}ntity Typing in Fictional Texts}, AUTHOR = {Chu, Cuong Xuan and Razniewski, Simon and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {9781450368223}, DOI = {10.1145/3336191.3371808}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {WSDM '20, 13th International Conference on Web Search and Data Mining}, EDITOR = {Caverlee, James and Hu, Xia Ben}, PAGES = {124--132}, ADDRESS = {Houston, TX, USA}, }
Endnote
%0 Conference Proceedings %A Chu, Cuong Xuan %A Razniewski, Simon %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T ENTYFI: Entity Typing in Fictional Texts : %G eng %U http://hdl.handle.net/21.11116/0000-0006-A27E-6 %R 10.1145/3336191.3371808 %D 2020 %B 13th International Conference on Web Search and Data Mining %Z date of event: 2020-02-03 - 2020-02-07 %C Houston, TX, USA %B WSDM '20 %E Caverlee, James; Hu, Xia Ben %P 124 - 132 %I ACM %@ 9781450368223
[18]
C. X. Chu, S. Razniewski, and G. Weikum, “ENTYFI: A System for Fine-grained Entity Typing in Fictional Texts,” in The 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP 2020), Online, 2020.
Export
BibTeX
@inproceedings{Chu_EMNLP20, TITLE = {{ENTYFI}: {A} System for Fine-grained Entity Typing in Fictional Texts}, AUTHOR = {Chu, Cuong Xuan and Razniewski, Simon and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-952148-62-0}, URL = {https://www.aclweb.org/anthology/2020.emnlp-demos.14/}, DOI = {10.18653/v1/2020.emnlp-demos.14}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {The 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP 2020)}, EDITOR = {Liu, Qun and Schlangen, David}, PAGES = {100--106}, ADDRESS = {Online}, }
Endnote
%0 Conference Proceedings %A Chu, Cuong Xuan %A Razniewski, Simon %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T ENTYFI: A System for Fine-grained Entity Typing in Fictional Texts : %G eng %U http://hdl.handle.net/21.11116/0000-0007-EED5-D %U https://www.aclweb.org/anthology/2020.emnlp-demos.14/ %R 10.18653/v1/2020.emnlp-demos.14 %D 2020 %B Conference on Empirical Methods in Natural Language Processing %Z date of event: 2020-11-16 - 2020-11-20 %C Online %B The 2020 Conference on Empirical Methods in Natural Language Processing %E Liu, Qun; Schlangen, David %P 100 - 106 %I ACM %@ 978-1-952148-62-0 %U https://www.aclweb.org/anthology/2020.emnlp-demos.14.pdf
[19]
S. Dalleiger and J. Vreeken, “The Relaxed Maximum Entropy Distribution and its Application to Pattern Discovery,” in 20th IEEE International Conference on Data Mining (ICDM 2020), Virtual Conference, 2020.
Export
BibTeX
@inproceedings{dalleiger:20:reaper, TITLE = {The Relaxed Maximum Entropy Distribution and its Application to Pattern Discovery}, AUTHOR = {Dalleiger, Sebastian and Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-1-7281-8316-9}, DOI = {10.1109/ICDM50108.2020.00112}, PUBLISHER = {IEEE}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {20th IEEE International Conference on Data Mining (ICDM 2020)}, EDITOR = {Plant, Claudia and Wang, Haixun and Cuzzocrea, Alfredo and Zaniolo, Carlo and Wu, Xidong}, PAGES = {978--983}, ADDRESS = {Virtual Conference}, }
Endnote
%0 Conference Proceedings %A Dalleiger, Sebastian %A Vreeken, Jilles %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T The Relaxed Maximum Entropy Distribution and its Application to Pattern Discovery : %G eng %U http://hdl.handle.net/21.11116/0000-0008-254E-8 %R 10.1109/ICDM50108.2020.00112 %D 2020 %B 20th IEEE International Conference on Data Mining %Z date of event: 2020-11-17 - 2020-11-20 %C Virtual Conference %B 20th IEEE International Conference on Data Mining %E Plant, Claudia; Wang, Haixun; Cuzzocrea, Alfredo; Zaniolo, Carlo; Wu, Xidong %P 978 - 983 %I IEEE %@ 978-1-7281-8316-9
[20]
S. Dalleiger and J. Vreeken, “Explainable Data Decompositions,” in AAAI Technical Track: Machine Learning, New York, NY, USA, 2020.
Export
BibTeX
@inproceedings{dalleiger:20:disc, TITLE = {Explainable Data Decompositions}, AUTHOR = {Dalleiger, Sebastian and Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-1-57735-835-0}, DOI = {10.1609/aaai.v34i04.5780}, PUBLISHER = {AAAI}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {AAAI Technical Track: Machine Learning}, PAGES = {3709--3716}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %A Dalleiger, Sebastian %A Vreeken, Jilles %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Explainable Data Decompositions : %G eng %U http://hdl.handle.net/21.11116/0000-0008-2559-B %R 10.1609/aaai.v34i04.5780 %D 2020 %B Thirty-Fourth AAAI Conference on Artificial Intelligence %Z date of event: 2020-02-07 - 2020-02-12 %C New York, NY, USA %B AAAI Technical Track: Machine Learning %P 3709 - 3716 %I AAAI %@ 978-1-57735-835-0
[21]
F. Darari, W. Nutt, S. Razniewski, and S. Rudolph, “Completeness and soundness guarantees for conjunctive SPARQL queries over RDF data sources with completeness statements,” Semantic Web, vol. 11, no. 1, 2020.
Export
BibTeX
@article{Darari2020, TITLE = {Completeness and soundness guarantees for conjunctive {SPARQL} queries over {RDF} data sources with completeness statements}, AUTHOR = {Darari, Fariza and Nutt, Werner and Razniewski, Simon and Rudolph, Sebastian}, LANGUAGE = {eng}, ISSN = {1570-0844}, DOI = {10.3233/SW-190344}, PUBLISHER = {IOS Press}, ADDRESS = {Amsterdam}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, JOURNAL = {Semantic Web}, VOLUME = {11}, NUMBER = {1}, PAGES = {441--482}, }
Endnote
%0 Journal Article %A Darari, Fariza %A Nutt, Werner %A Razniewski, Simon %A Rudolph, Sebastian %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Completeness and soundness guarantees for conjunctive SPARQL queries over RDF data sources with completeness statements : %G eng %U http://hdl.handle.net/21.11116/0000-0006-9A06-6 %R 10.3233/SW-190344 %7 2020 %D 2020 %J Semantic Web %V 11 %N 1 %& 441 %P 441 - 482 %I IOS Press %C Amsterdam %@ false
[22]
J. Fischer and J. Vreeken, “Discovering Succinct Pattern Sets Expressing Co-Occurrence and Mutual Exclusivity,” in KDD ’20, 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, Virtual Event, USA, 2020.
Export
BibTeX
@inproceedings{fischer:20:mexican, TITLE = {Discovering Succinct Pattern Sets Expressing Co-Occurrence and Mutual Exclusivity}, AUTHOR = {Fischer, Jonas and Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-1-4503-7998-4}, DOI = {10.1145/3394486.3403124}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {KDD '20, 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining}, EDITOR = {Gupta, Rajesh and Liu, Yan and Tang, Jilaiang and Prakash, B. Aditya}, PAGES = {813--823}, ADDRESS = {Virtual Event, USA}, }
Endnote
%0 Conference Proceedings %A Fischer, Jonas %A Vreeken, Jilles %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Discovering Succinct Pattern Sets Expressing Co-Occurrence and Mutual Exclusivity : %G eng %U http://hdl.handle.net/21.11116/0000-0007-FEA5-1 %R 10.1145/3394486.3403124 %D 2020 %B 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining %Z date of event: 2020-08-23 - 2020-08-27 %C Virtual Event, USA %B KDD '20 %E Gupta, Rajesh; Liu, Yan; Tang, Jilaiang; Prakash, B. Aditya %P 813 - 823 %I ACM %@ 978-1-4503-7998-4
[23]
J. Fischer and J. Vreeken, “Sets of Robust Rules, and How to Find Them,” in Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2019), Würzburg, Germany, 2020.
Export
BibTeX
@inproceedings{fischer:19:grab, TITLE = {Sets of Robust Rules, and How to Find Them}, AUTHOR = {Fischer, Jonas and Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-3-030-46150-8}, DOI = {10.1007/978-3-030-46150-8_3}, PUBLISHER = {Springer}, YEAR = {2019}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2019)}, PAGES = {38--54}, SERIES = {Lecture Notes in Artificial Intelligence}, VOLUME = {11906}, ADDRESS = {W{\"u}rzburg, Germany}, }
Endnote
%0 Conference Proceedings %A Fischer, Jonas %A Vreeken, Jilles %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Sets of Robust Rules, and How to Find Them : %G eng %U http://hdl.handle.net/21.11116/0000-0007-FEAE-8 %R 10.1007/978-3-030-46150-8_3 %D 2020 %B European Conference on Machine Learning and Knowledge Discovery in Databases %Z date of event: 2019-09-19 - 2019-09-20 %C Würzburg, Germany %B Machine Learning and Knowledge Discovery in Databases %P 38 - 54 %I Springer %@ 978-3-030-46150-8 %B Lecture Notes in Artificial Intelligence %N 11906
[24]
M. H. Gad-Elrab, D. Stepanova, T.-K. Tran, H. Adel, and G. Weikum, “ExCut: Explainable Embedding-Based Clustering over Knowledge Graphs,” in The Semantic Web -- ISWC 2020, Athens, Greece (Virtual Conference), 2020.
Export
BibTeX
@inproceedings{Gad_Elrab_ISWC2020, TITLE = {{ExCut}: {E}xplainable Embedding-Based Clustering over Knowledge Graphs}, AUTHOR = {Gad-Elrab, Mohamed Hassan and Stepanova, Daria and Tran, Trung-Kien and Adel, Heike and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-3-030-62418-7}, DOI = {10.1007/978-3-030-62419-4_13}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {The Semantic Web -- ISWC 2020}, EDITOR = {Pan, Jeff Z. and Tamma, Valentina and D'Amato, Claudia and Janowicz, Krzysztof and Fu, Bo and Polleres, Axel and Seneviratne, Oshani and Kagal, Lalana}, PAGES = {218--237}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12506}, ADDRESS = {Athens, Greece (Virtual Conference)}, }
Endnote
%0 Conference Proceedings %A Gad-Elrab, Mohamed Hassan %A Stepanova, Daria %A Tran, Trung-Kien %A Adel, Heike %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T ExCut: Explainable Embedding-Based Clustering over Knowledge Graphs : %G eng %U http://hdl.handle.net/21.11116/0000-0007-830F-5 %R 10.1007/978-3-030-62419-4_13 %D 2020 %B 19th International Semantic Web Conference %Z date of event: 2020-11-02 - 2020-11-06 %C Athens, Greece (Virtual Conference) %B The Semantic Web -- ISWC 2020 %E Pan, Jeff Z.; Tamma, Valentina; D'Amato, Claudia; Janowicz, Krzysztof; Fu, Bo; Polleres, Axel; Seneviratne, Oshani; Kagal, Lalana %P 218 - 237 %I Springer %@ 978-3-030-62418-7 %B Lecture Notes in Computer Science %N 12506
[25]
M. H. Gad-Elrab, V. T. Ho, E. Levinkov, T.-K. Tran, and D. Stepanova, “Towards Utilizing Knowledge Graph Embedding Models for Conceptual Clustering,” in ISWC 2020 Posters, Demos, and Industry Tracks, Globally Online, 2020.
Export
BibTeX
@inproceedings{Gad-Elrab_ISCW20, TITLE = {Towards Utilizing Knowledge Graph Embedding Models for Conceptual Clustering}, AUTHOR = {Gad-Elrab, Mohamed Hassan and Ho, Vinh Thinh and Levinkov, Evgeny and Tran, Trung-Kien and Stepanova, Daria}, LANGUAGE = {eng}, ISSN = {1613-0073}, URL = {http://ceur-ws.org/Vol-2721/paper572.pdf; urn:nbn:de:0074-2721-6}, PUBLISHER = {ceur-ws.org}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {ISWC 2020 Posters, Demos, and Industry Tracks}, EDITOR = {Taylor, Kerry and Goncalves, Rafael and Lecue, Freddy and Yan, Jun}, PAGES = {281--286}, EID = {572}, SERIES = {CEUR Workshop Proceedings}, VOLUME = {2721}, ADDRESS = {Globally Online}, }
Endnote
%0 Conference Proceedings %A Gad-Elrab, Mohamed Hassan %A Ho, Vinh Thinh %A Levinkov, Evgeny %A Tran, Trung-Kien %A Stepanova, Daria %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T Towards Utilizing Knowledge Graph Embedding Models for Conceptual Clustering : %G eng %U http://hdl.handle.net/21.11116/0000-0007-F86B-A %U http://ceur-ws.org/Vol-2721/paper572.pdf %D 2020 %B 19th Internatinal Semantic Web Conference %Z date of event: 2020-11-01 - 2020-11-06 %C Globally Online %B ISWC 2020 Posters, Demos, and Industry Tracks %E Taylor, Kerry; Goncalves, Rafael; Lecue, Freddy; Yan, Jun %P 281 - 286 %Z sequence number: 572 %I ceur-ws.org %B CEUR Workshop Proceedings %N 2721 %@ false %U http://ceur-ws.org/Vol-2721/paper572.pdf
[26]
A. Ghazimatin, O. Balalau, R. Saha Roy, and G. Weikum, “PRINCE: Provider-side Interpretability with Counterfactual Explanations in Recommender Systems,” in WSDM ’20, 13th International Conference on Web Search and Data Mining, Houston, TX, USA, 2020.
Export
BibTeX
@inproceedings{GhazimatinWSDM2020, TITLE = {{PRINCE}: {P}rovider-side Interpretability with Counterfactual Explanations in Recommender Systemsxts}, AUTHOR = {Ghazimatin, Azin and Balalau, Oana and Saha Roy, Rishiraj and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-6822-3}, DOI = {10.1145/3336191.3371824}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {WSDM '20, 13th International Conference on Web Search and Data Mining}, EDITOR = {Caverlee, James and Hu, Xia Ben}, PAGES = {196--204}, ADDRESS = {Houston, TX, USA}, }
Endnote
%0 Conference Proceedings %A Ghazimatin, Azin %A Balalau, Oana %A Saha Roy, Rishiraj %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T PRINCE: Provider-side Interpretability with Counterfactual Explanations in Recommender Systems : %G eng %U http://hdl.handle.net/21.11116/0000-0007-F173-7 %R 10.1145/3336191.3371824 %D 2020 %B 13th International Conference on Web Search and Data Mining %Z date of event: 2020-02-03 - 2020-02-07 %C Houston, TX, USA %B WSDM '20 %E Caverlee, James; Hu, Xia Ben %P 196 - 204 %I ACM %@ 978-1-4503-6822-3
[27]
S. Ghosh, S. Razniewski, and G. Weikum, “Uncovering Hidden Semantics of Set Information in Knowledge Bases,” 2020. [Online]. Available: http://arxiv.org/abs/2003.03155. (arXiv: 2003.03155)
Abstract
Knowledge Bases (KBs) contain a wealth of structured information about entities and predicates. This paper focuses on set-valued predicates, i.e., the relationship between an entity and a set of entities. In KBs, this information is often represented in two formats: (i) via counting predicates such as numberOfChildren and staffSize, that store aggregated integers, and (ii) via enumerating predicates such as parentOf and worksFor, that store individual set memberships. Both formats are typically complementary: unlike enumerating predicates, counting predicates do not give away individuals, but are more likely informative towards the true set size, thus this coexistence could enable interesting applications in question answering and KB curation. In this paper we aim at uncovering this hidden knowledge. We proceed in two steps. (i) We identify set-valued predicates from a given KB predicates via statistical and embedding-based features. (ii) We link counting predicates and enumerating predicates by a combination of co-occurrence, correlation and textual relatedness metrics. We analyze the prevalence of count information in four prominent knowledge bases, and show that our linking method achieves up to 0.55 F1 score in set predicate identification versus 0.40 F1 score of a random selection, and normalized discounted gains of up to 0.84 at position 1 and 0.75 at position 3 in relevant predicate alignments. Our predicate alignments are showcased in a demonstration system available at https://counqer.mpi-inf.mpg.de/spo.
Export
BibTeX
@online{Ghosh_arXiv2003.03155, TITLE = {Uncovering Hidden Semantics of Set Information in Knowledge Bases}, AUTHOR = {Ghosh, Shrestha and Razniewski, Simon and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/2003.03155}, EPRINT = {2003.03155}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Knowledge Bases (KBs) contain a wealth of structured information about entities and predicates. This paper focuses on set-valued predicates, i.e., the relationship between an entity and a set of entities. In KBs, this information is often represented in two formats: (i) via counting predicates such as numberOfChildren and staffSize, that store aggregated integers, and (ii) via enumerating predicates such as parentOf and worksFor, that store individual set memberships. Both formats are typically complementary: unlike enumerating predicates, counting predicates do not give away individuals, but are more likely informative towards the true set size, thus this coexistence could enable interesting applications in question answering and KB curation. In this paper we aim at uncovering this hidden knowledge. We proceed in two steps. (i) We identify set-valued predicates from a given KB predicates via statistical and embedding-based features. (ii) We link counting predicates and enumerating predicates by a combination of co-occurrence, correlation and textual relatedness metrics. We analyze the prevalence of count information in four prominent knowledge bases, and show that our linking method achieves up to 0.55 F1 score in set predicate identification versus 0.40 F1 score of a random selection, and normalized discounted gains of up to 0.84 at position 1 and 0.75 at position 3 in relevant predicate alignments. Our predicate alignments are showcased in a demonstration system available at https://counqer.mpi-inf.mpg.de/spo.}, }
Endnote
%0 Report %A Ghosh, Shrestha %A Razniewski, Simon %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Uncovering Hidden Semantics of Set Information in Knowledge Bases : %G eng %U http://hdl.handle.net/21.11116/0000-0007-0662-4 %U http://arxiv.org/abs/2003.03155 %D 2020 %X Knowledge Bases (KBs) contain a wealth of structured information about entities and predicates. This paper focuses on set-valued predicates, i.e., the relationship between an entity and a set of entities. In KBs, this information is often represented in two formats: (i) via counting predicates such as numberOfChildren and staffSize, that store aggregated integers, and (ii) via enumerating predicates such as parentOf and worksFor, that store individual set memberships. Both formats are typically complementary: unlike enumerating predicates, counting predicates do not give away individuals, but are more likely informative towards the true set size, thus this coexistence could enable interesting applications in question answering and KB curation. In this paper we aim at uncovering this hidden knowledge. We proceed in two steps. (i) We identify set-valued predicates from a given KB predicates via statistical and embedding-based features. (ii) We link counting predicates and enumerating predicates by a combination of co-occurrence, correlation and textual relatedness metrics. We analyze the prevalence of count information in four prominent knowledge bases, and show that our linking method achieves up to 0.55 F1 score in set predicate identification versus 0.40 F1 score of a random selection, and normalized discounted gains of up to 0.84 at position 1 and 0.75 at position 3 in relevant predicate alignments. Our predicate alignments are showcased in a demonstration system available at https://counqer.mpi-inf.mpg.de/spo. %K Computer Science, Databases, cs.DB,Computer Science, Information Retrieval, cs.IR
[28]
S. Ghosh, S. Razniewski, and G. Weikum, “Uncovering Hidden Semantics of Set Information in Knowledge Bases,” Journal of Web Semantics, vol. 64, 2020.
Export
BibTeX
@article{Ghosh_2020, TITLE = {Uncovering Hidden Semantics of Set Information in Knowledge Bases}, AUTHOR = {Ghosh, Shrestha and Razniewski, Simon and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {1570-8268}, DOI = {10.1016/j.websem.2020.100588}, PUBLISHER = {Elsevier}, ADDRESS = {Amsterdam}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, JOURNAL = {Journal of Web Semantics}, VOLUME = {64}, EID = {100588}, }
Endnote
%0 Journal Article %A Ghosh, Shrestha %A Razniewski, Simon %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Uncovering Hidden Semantics of Set Information in Knowledge Bases : %G eng %U http://hdl.handle.net/21.11116/0000-0007-066D-9 %R 10.1016/j.websem.2020.100588 %7 2020 %D 2020 %J Journal of Web Semantics %V 64 %Z sequence number: 100588 %I Elsevier %C Amsterdam %@ false
[29]
S. Ghosh, S. Razniewski, and G. Weikum, “CounQER: A System for Discovering and Linking Count Information in Knowledge Bases,” in The Semantic Web: ESWC 2020 Satellite Events, Heraklion, Greece, 2020.
Export
BibTeX
@inproceedings{Ghosh_ESWC20, TITLE = {{CounQER}: {A} System for Discovering and Linking Count Information in Knowledge Bases}, AUTHOR = {Ghosh, Shrestha and Razniewski, Simon and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-3-030-62326-5}, DOI = {10.1007/978-3-030-62327-2_15}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {The Semantic Web: ESWC 2020 Satellite Events}, EDITOR = {Harth, Andreas and Presutti, Valentina and Troncy, Rapha{\"e}l and Acosta, Maribel and Polleres, Axel and Fern{\'a}ndez, Javier D. and Xavier Parreira, Josiane and Hartig, Olaf and Hose, Katja and Cochez, Michael}, PAGES = {84--90}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12124}, ADDRESS = {Heraklion, Greece}, }
Endnote
%0 Conference Proceedings %A Ghosh, Shrestha %A Razniewski, Simon %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T CounQER: A System for Discovering and Linking Count Information in Knowledge Bases : %G eng %U http://hdl.handle.net/21.11116/0000-0007-EFB9-C %R 10.1007/978-3-030-62327-2_15 %D 2020 %B 17th Extended Semantic Web Conference %Z date of event: 2020-05-31 - 2020-06-04 %C Heraklion, Greece %B The Semantic Web: ESWC 2020 Satellite Events %E Harth, Andreas; Presutti, Valentina; Troncy, Raphaël; Acosta, Maribel; Polleres, Axel; Fernández, Javier D.; Xavier Parreira, Josiane; Hartig, Olaf; Hose, Katja; Cochez, Michael %P 84 - 90 %I Springer %@ 978-3-030-62326-5 %B Lecture Notes in Computer Science %N 12124
[30]
S. Ghosh, S. Razniewski, and G. Weikum, “CounQER: A System for Discovering and Linking Count Information in Knowledge Bases,” 2020. [Online]. Available: https://arxiv.org/abs/2005.03529. (arXiv: 2005.03529)
Abstract
Predicate constraints of general-purpose knowledge bases (KBs) like Wikidata, DBpedia and Freebase are often limited to subproperty, domain and range constraints. In this demo we showcase CounQER, a system that illustrates the alignment of counting predicates, like staffSize, and enumerating predicates, like workInstitution^{-1} . In the demonstration session, attendees can inspect these alignments, and will learn about the importance of these alignments for KB question answering and curation. CounQER is available at https://counqer.mpi-inf.mpg.de/spo.
Export
BibTeX
@online{Ghosh_2005.03529, TITLE = {{CounQER}: {A} System for Discovering and Linking Count Information in Knowledge Bases}, AUTHOR = {Ghosh, Shrestha and Razniewski, Simon and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2005.03529}, EPRINT = {2005.03529}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Predicate constraints of general-purpose knowledge bases (KBs) like Wikidata, DBpedia and Freebase are often limited to subproperty, domain and range constraints. In this demo we showcase CounQER, a system that illustrates the alignment of counting predicates, like staffSize, and enumerating predicates, like workInstitution^{-1} . In the demonstration session, attendees can inspect these alignments, and will learn about the importance of these alignments for KB question answering and curation. CounQER is available at https://counqer.mpi-inf.mpg.de/spo.}, }
Endnote
%0 Report %A Ghosh, Shrestha %A Razniewski, Simon %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T CounQER: A System for Discovering and Linking Count Information in Knowledge Bases : %G eng %U http://hdl.handle.net/21.11116/0000-0007-F187-0 %U https://arxiv.org/abs/2005.03529 %D 2020 %X Predicate constraints of general-purpose knowledge bases (KBs) like Wikidata, DBpedia and Freebase are often limited to subproperty, domain and range constraints. In this demo we showcase CounQER, a system that illustrates the alignment of counting predicates, like staffSize, and enumerating predicates, like workInstitution^{-1} . In the demonstration session, attendees can inspect these alignments, and will learn about the importance of these alignments for KB question answering and curation. CounQER is available at https://counqer.mpi-inf.mpg.de/spo. %K Computer Science, Information Retrieval, cs.IR,Computer Science, Artificial Intelligence, cs.AI,Computer Science, Databases, cs.DB
[31]
D. Gupta and K. Berberich, “Weaving Text into Tables,” in CIKM ’20, 29th ACM International Conference on Information & Knowledge Management, Virtual Event, Ireland, 2020.
Export
BibTeX
@inproceedings{DBLP:conf/cikm/0001B20, TITLE = {Weaving Text into Tables}, AUTHOR = {Gupta, Dhruv and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-1-4503-6859-9}, DOI = {10.1145/3340531.3417442}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {CIKM '20, 29th ACM International Conference on Information \& Knowledge Management}, EDITOR = {d{\textquoteright}Aquin, Mathieu and Dietze, Stefan}, PAGES = {3401--34049}, ADDRESS = {Virtual Event, Ireland}, }
Endnote
%0 Conference Proceedings %A Gupta, Dhruv %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Weaving Text into Tables : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0313-F %R 10.1145/3340531.3417442 %D 2020 %B 29th ACM International Conference on Information & Knowledge Management %Z date of event: 2020-10-19 - 2020-10-23 %C Virtual Event, Ireland %B CIKM '20 %E d’Aquin, Mathieu; Dietze, Stefan %P 3401 - 34049 %I ACM %@ 978-1-4503-6859-9
[32]
D. Gupta and K. Berberich, “Optimizing Hyper-Phrase Queries,” in ICTIR ’20, ACM SIGIR International Conference on Theory of Information Retrieval, Virtual Event, Norway, 2020.
Export
BibTeX
@inproceedings{DBLP:conf/ictir/0002B20, TITLE = {Optimizing Hyper-Phrase Queries}, AUTHOR = {Gupta, Dhruv and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-1-4503-8067-6}, DOI = {10.1145/3409256.3409827}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {ICTIR '20, ACM SIGIR International Conference on Theory of Information Retrieval}, EDITOR = {Balog, Krisztian and Setty, Vinay and Lioma, Christina and Liu, Yiqun and Zhang, Min and Berberich, Klaus}, PAGES = {41--48}, ADDRESS = {Virtual Event, Norway}, }
Endnote
%0 Conference Proceedings %A Gupta, Dhruv %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Optimizing Hyper-Phrase Queries : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0335-9 %R 10.1145/3409256.3409827 %D 2020 %B ACM SIGIR International Conference on Theory of Information Retrieval %Z date of event: 2020-09-14 - 2020-09-17 %C Virtual Event, Norway %B ICTIR '20 %E Balog, Krisztian; Setty, Vinay; Lioma, Christina; Liu, Yiqun; Zhang, Min; Berberich, Klaus %P 41 - 48 %I ACM %@ 978-1-4503-8067-6
[33]
E. Heiter, “Factoring Out Prior Knowledge from Low-dimensional Embeddings,” Universität des Saarlandes, Saarbrücken, 2020.
Export
BibTeX
@mastersthesis{heiter:20:confetti, TITLE = {Factoring Out Prior Knowledge from Low-dimensional Embeddings}, AUTHOR = {Heiter, Edith}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, }
Endnote
%0 Thesis %A Heiter, Edith %Y Vreeken, Jilles %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Factoring Out Prior Knowledge from Low-dimensional Embeddings : %G eng %U http://hdl.handle.net/21.11116/0000-0007-FEF8-4 %I Universität des Saarlandes %C Saarbrücken %D 2020 %V master %9 master
[34]
V. T. Ho, K. Pal, N. Kleer, K. Berberich, and G. Weikum, “Entities with Quantities: Extraction, Search, and Ranking,” in WSDM ’20, 13th International Conference on Web Search and Data Mining, Houston, TX, USA, 2020.
Export
BibTeX
@inproceedings{HoWSDM2020, TITLE = {Entities with Quantities: {E}xtraction, Search, and Ranking}, AUTHOR = {Ho, Vinh Thinh and Pal, Koninika and Kleer, Niko and Berberich, Klaus and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {9781450368223}, DOI = {10.1145/3336191.3371860}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {WSDM '20, 13th International Conference on Web Search and Data Mining}, EDITOR = {Caverlee, James and Hu, Xia Ben}, PAGES = {833--836}, ADDRESS = {Houston, TX, USA}, }
Endnote
%0 Conference Proceedings %A Ho, Vinh Thinh %A Pal, Koninika %A Kleer, Niko %A Berberich, Klaus %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Entities with Quantities: Extraction, Search, and Ranking : %G eng %U http://hdl.handle.net/21.11116/0000-0006-A284-D %R 10.1145/3336191.3371860 %D 2020 %B 13th International Conference on Web Search and Data Mining %Z date of event: 2020-02-03 - 2020-02-07 %C Houston, TX, USA %B WSDM '20 %E Caverlee, James; Hu, Xia Ben %P 833 - 836 %I ACM %@ 9781450368223
[35]
M. Kaiser, R. Saha Roy, and G. Weikum, “Conversational Question Answering over Passages by Leveraging Word Proximity Networks,” in SIGIR ’20, 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, China, 2020.
Export
BibTeX
@inproceedings{Kaiser_SIGIR20, TITLE = {Conversational Question Answering over Passages by Leveraging Word Proximity Networks}, AUTHOR = {Kaiser, Magdalena and Saha Roy, Rishiraj and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {9781450380164}, DOI = {10.1145/3397271.3401399}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {SIGIR '20, 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval}, PAGES = {2129--2132}, ADDRESS = {Virtual Event, China}, }
Endnote
%0 Conference Proceedings %A Kaiser, Magdalena %A Saha Roy, Rishiraj %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Conversational Question Answering over Passages by Leveraging Word Proximity Networks : %G eng %U http://hdl.handle.net/21.11116/0000-0007-F152-C %R 10.1145/3397271.3401399 %D 2020 %B 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval %Z date of event: 2020-07-25 - 2020-07-30 %C Virtual Event, China %B SIGIR '20 %P 2129 - 2132 %I ACM %@ 9781450380164
[36]
M. Kaiser, R. Saha Roy, and G. Weikum, “Conversational Question Answering over Passages by Leveraging Word Proximity Networks,” 2020. [Online]. Available: https://arxiv.org/abs/2004.13117. (arXiv: 2004.13117)
Abstract
Question answering (QA) over text passages is a problem of long-standing interest in information retrieval. Recently, the conversational setting has attracted attention, where a user asks a sequence of questions to satisfy her information needs around a topic. While this setup is a natural one and similar to humans conversing with each other, it introduces two key research challenges: understanding the context left implicit by the user in follow-up questions, and dealing with ad hoc question formulations. In this work, we demonstrate CROWN (Conversational passage ranking by Reasoning Over Word Networks): an unsupervised yet effective system for conversational QA with passage responses, that supports several modes of context propagation over multiple turns. To this end, CROWN first builds a word proximity network (WPN) from large corpora to store statistically significant term co-occurrences. At answering time, passages are ranked by a combination of their similarity to the question, and coherence of query terms within: these factors are measured by reading off node and edge weights from the WPN. CROWN provides an interface that is both intuitive for end-users, and insightful for experts for reconfiguration to individual setups. CROWN was evaluated on TREC CAsT data, where it achieved above-median performance in a pool of neural methods.
Export
BibTeX
@online{Kaiser_2004.13117, TITLE = {Conversational Question Answering over Passages by Leveraging Word Proximity Networks}, AUTHOR = {Kaiser, Magdalena and Saha Roy, Rishiraj and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2004.13117}, EPRINT = {2004.13117}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Question answering (QA) over text passages is a problem of long-standing interest in information retrieval. Recently, the conversational setting has attracted attention, where a user asks a sequence of questions to satisfy her information needs around a topic. While this setup is a natural one and similar to humans conversing with each other, it introduces two key research challenges: understanding the context left implicit by the user in follow-up questions, and dealing with ad hoc question formulations. In this work, we demonstrate CROWN (Conversational passage ranking by Reasoning Over Word Networks): an unsupervised yet effective system for conversational QA with passage responses, that supports several modes of context propagation over multiple turns. To this end, CROWN first builds a word proximity network (WPN) from large corpora to store statistically significant term co-occurrences. At answering time, passages are ranked by a combination of their similarity to the question, and coherence of query terms within: these factors are measured by reading off node and edge weights from the WPN. CROWN provides an interface that is both intuitive for end-users, and insightful for experts for reconfiguration to individual setups. CROWN was evaluated on TREC CAsT data, where it achieved above-median performance in a pool of neural methods.}, }
Endnote
%0 Report %A Kaiser, Magdalena %A Saha Roy, Rishiraj %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Conversational Question Answering over Passages by Leveraging Word Proximity Networks : %G eng %U http://hdl.handle.net/21.11116/0000-0007-F17D-D %U https://arxiv.org/abs/2004.13117 %D 2020 %X Question answering (QA) over text passages is a problem of long-standing interest in information retrieval. Recently, the conversational setting has attracted attention, where a user asks a sequence of questions to satisfy her information needs around a topic. While this setup is a natural one and similar to humans conversing with each other, it introduces two key research challenges: understanding the context left implicit by the user in follow-up questions, and dealing with ad hoc question formulations. In this work, we demonstrate CROWN (Conversational passage ranking by Reasoning Over Word Networks): an unsupervised yet effective system for conversational QA with passage responses, that supports several modes of context propagation over multiple turns. To this end, CROWN first builds a word proximity network (WPN) from large corpora to store statistically significant term co-occurrences. At answering time, passages are ranked by a combination of their similarity to the question, and coherence of query terms within: these factors are measured by reading off node and edge weights from the WPN. CROWN provides an interface that is both intuitive for end-users, and insightful for experts for reconfiguration to individual setups. CROWN was evaluated on TREC CAsT data, where it achieved above-median performance in a pool of neural methods. %K Computer Science, Information Retrieval, cs.IR,Computer Science, Computation and Language, cs.CL
[37]
M. Kaiser, “Incorporating User Feedback in Conversational Question Answering over Heterogeneous Web Sources,” in SIGIR ’20, 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, China, 2020.
Export
BibTeX
@inproceedings{Kaiser_SIGIR20b, TITLE = {Incorporating User Feedback in Conversational Question Answering over Heterogeneous {Web} Sources}, AUTHOR = {Kaiser, Magdalena}, LANGUAGE = {eng}, ISBN = {9781450380164}, DOI = {10.1145/3397271.3401454}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {SIGIR '20, 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval}, PAGES = {2482--2482}, ADDRESS = {Virtual Event, China}, }
Endnote
%0 Conference Proceedings %A Kaiser, Magdalena %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Incorporating User Feedback in Conversational Question Answering over Heterogeneous Web Sources : %G eng %U http://hdl.handle.net/21.11116/0000-0007-FCDA-8 %R 10.1145/3397271.3401454 %D 2020 %B 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval %Z date of event: 2020-07-25 - 2020-07-30 %C Virtual Event, China %B SIGIR '20 %P 2482 - 2482 %I ACM %@ 9781450380164
[38]
P. Lahoti, A. Beutel, J. Chen, K. Lee, F. Prost, N. Thain, X. Wang, and E. Chi, “Fairness without Demographics through Adversarially Reweighted Learning,” in Advances in Neural Information Processing Systems 33 (NeurIPS 2020), Virtual Event, 2020.
Export
BibTeX
@inproceedings{DBLP:conf/nips/LahotiBCLPT0C20, TITLE = {Fairness without Demographics through Adversarially Reweighted Learning}, AUTHOR = {Lahoti, Preethi and Beutel, Alex and Chen, Jilin and Lee, Kang and Prost, Flavien and Thain, Nithum and Wang, Xuezhi and Chi, Ed}, LANGUAGE = {eng}, PUBLISHER = {Curran Associates, Inc.}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Advances in Neural Information Processing Systems 33 (NeurIPS 2020)}, EDITOR = {Larochelle, Hugo and Ranzato, Marc Aurelio and Hadsell, Raia and Balcan, Maria-Florina and Lin, Hsuan-Tien}, ADDRESS = {Virtual Event}, }
Endnote
%0 Conference Proceedings %A Lahoti, Preethi %A Beutel, Alex %A Chen, Jilin %A Lee, Kang %A Prost, Flavien %A Thain, Nithum %A Wang, Xuezhi %A Chi, Ed %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations %T Fairness without Demographics through Adversarially Reweighted Learning : %G eng %U http://hdl.handle.net/21.11116/0000-0007-FCC2-2 %D 2020 %B 34th Conference on Neural Information Processing Systems %Z date of event: 2020-12-06 - 2020-12-12 %C Virtual Event %B Advances in Neural Information Processing Systems 33 %E Larochelle, Hugo; Ranzato, Marc Aurelio; Hadsell, Raia; Balcan, Maria-Florina; Lin, Hsuan-Tien %I Curran Associates, Inc. %U https://proceedings.neurips.cc/paper/2020/hash/07fc15c9d169ee48573edd749d25945d-Abstract.html
[39]
C. Li, A. Yates, S. MacAvaney, B. He, and Y. Sun, “PARADE: Passage Representation Aggregation for Document Reranking,” 2020. [Online]. Available: https://arxiv.org/abs/2008.09093. (arXiv: 2008.09093)
Abstract
We present PARADE, an end-to-end Transformer-based model that considers document-level context for document reranking. PARADE leverages passage-level relevance representations to predict a document relevance score, overcoming the limitations of previous approaches that perform inference on passages independently. Experiments on two ad-hoc retrieval benchmarks demonstrate PARADE's effectiveness over such methods. We conduct extensive analyses on PARADE's efficiency, highlighting several strategies for improving it. When combined with knowledge distillation, a PARADE model with 72\% fewer parameters achieves effectiveness competitive with previous approaches using BERT-Base. Our code is available at \url{https://github.com/canjiali/PARADE}.
Export
BibTeX
@online{Li2008.09093, TITLE = {{PARADE}: Passage Representation Aggregation for Document Reranking}, AUTHOR = {Li, Canjia and Yates, Andrew and MacAvaney, Sean and He, Ben and Sun, Yingfei}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2008.09093}, EPRINT = {2008.09093}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We present PARADE, an end-to-end Transformer-based model that considers document-level context for document reranking. PARADE leverages passage-level relevance representations to predict a document relevance score, overcoming the limitations of previous approaches that perform inference on passages independently. Experiments on two ad-hoc retrieval benchmarks demonstrate PARADE's effectiveness over such methods. We conduct extensive analyses on PARADE's efficiency, highlighting several strategies for improving it. When combined with knowledge distillation, a PARADE model with 72\% fewer parameters achieves effectiveness competitive with previous approaches using BERT-Base. Our code is available at \url{https://github.com/canjiali/PARADE}.}, }
Endnote
%0 Report %A Li, Canjia %A Yates, Andrew %A MacAvaney, Sean %A He, Ben %A Sun, Yingfei %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T PARADE: Passage Representation Aggregation for Document Reranking : %G eng %U http://hdl.handle.net/21.11116/0000-0008-06CF-9 %U https://arxiv.org/abs/2008.09093 %D 2020 %X We present PARADE, an end-to-end Transformer-based model that considers document-level context for document reranking. PARADE leverages passage-level relevance representations to predict a document relevance score, overcoming the limitations of previous approaches that perform inference on passages independently. Experiments on two ad-hoc retrieval benchmarks demonstrate PARADE's effectiveness over such methods. We conduct extensive analyses on PARADE's efficiency, highlighting several strategies for improving it. When combined with knowledge distillation, a PARADE model with 72\% fewer parameters achieves effectiveness competitive with previous approaches using BERT-Base. Our code is available at \url{https://github.com/canjiali/PARADE}. %K Computer Science, Information Retrieval, cs.IR
[40]
J. Lin, R. Nogueira, and A. Yates, “Pretrained Transformers for Text Ranking: BERT and Beyond,” 2020. [Online]. Available: https://arxiv.org/abs/2010.06467. (arXiv: 2010.06467)
Abstract
The goal of text ranking is to generate an ordered list of texts retrieved from a corpus in response to a query. Although the most common formulation of text ranking is search, instances of the task can also be found in many natural language processing applications. This survey provides an overview of text ranking with neural network architectures known as transformers, of which BERT is the best-known example. The combination of transformers and self-supervised pretraining has, without exaggeration, revolutionized the fields of natural language processing (NLP), information retrieval (IR), and beyond. In this survey, we provide a synthesis of existing work as a single point of entry for practitioners who wish to gain a better understanding of how to apply transformers to text ranking problems and researchers who wish to pursue work in this area. We cover a wide range of modern techniques, grouped into two high-level categories: transformer models that perform reranking in multi-stage ranking architectures and learned dense representations that attempt to perform ranking directly. There are two themes that pervade our survey: techniques for handling long documents, beyond the typical sentence-by-sentence processing approaches used in NLP, and techniques for addressing the tradeoff between effectiveness (result quality) and efficiency (query latency). Although transformer architectures and pretraining techniques are recent innovations, many aspects of how they are applied to text ranking are relatively well understood and represent mature techniques. However, there remain many open research questions, and thus in addition to laying out the foundations of pretrained transformers for text ranking, this survey also attempts to prognosticate where the field is heading.
Export
BibTeX
@online{Lin2010.06467, TITLE = {Pretrained Transformers for Text Ranking: {BERT} and Beyond}, AUTHOR = {Lin, Jimmy and Nogueira, Rodrigo and Yates, Andrew}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2010.06467}, EPRINT = {2010.06467}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {The goal of text ranking is to generate an ordered list of texts retrieved from a corpus in response to a query. Although the most common formulation of text ranking is search, instances of the task can also be found in many natural language processing applications. This survey provides an overview of text ranking with neural network architectures known as transformers, of which BERT is the best-known example. The combination of transformers and self-supervised pretraining has, without exaggeration, revolutionized the fields of natural language processing (NLP), information retrieval (IR), and beyond. In this survey, we provide a synthesis of existing work as a single point of entry for practitioners who wish to gain a better understanding of how to apply transformers to text ranking problems and researchers who wish to pursue work in this area. We cover a wide range of modern techniques, grouped into two high-level categories: transformer models that perform reranking in multi-stage ranking architectures and learned dense representations that attempt to perform ranking directly. There are two themes that pervade our survey: techniques for handling long documents, beyond the typical sentence-by-sentence processing approaches used in NLP, and techniques for addressing the tradeoff between effectiveness (result quality) and efficiency (query latency). Although transformer architectures and pretraining techniques are recent innovations, many aspects of how they are applied to text ranking are relatively well understood and represent mature techniques. However, there remain many open research questions, and thus in addition to laying out the foundations of pretrained transformers for text ranking, this survey also attempts to prognosticate where the field is heading.}, }
Endnote
%0 Report %A Lin, Jimmy %A Nogueira, Rodrigo %A Yates, Andrew %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Pretrained Transformers for Text Ranking: BERT and Beyond : %G eng %U http://hdl.handle.net/21.11116/0000-0008-06DA-C %U https://arxiv.org/abs/2010.06467 %D 2020 %X The goal of text ranking is to generate an ordered list of texts retrieved from a corpus in response to a query. Although the most common formulation of text ranking is search, instances of the task can also be found in many natural language processing applications. This survey provides an overview of text ranking with neural network architectures known as transformers, of which BERT is the best-known example. The combination of transformers and self-supervised pretraining has, without exaggeration, revolutionized the fields of natural language processing (NLP), information retrieval (IR), and beyond. In this survey, we provide a synthesis of existing work as a single point of entry for practitioners who wish to gain a better understanding of how to apply transformers to text ranking problems and researchers who wish to pursue work in this area. We cover a wide range of modern techniques, grouped into two high-level categories: transformer models that perform reranking in multi-stage ranking architectures and learned dense representations that attempt to perform ranking directly. There are two themes that pervade our survey: techniques for handling long documents, beyond the typical sentence-by-sentence processing approaches used in NLP, and techniques for addressing the tradeoff between effectiveness (result quality) and efficiency (query latency). Although transformer architectures and pretraining techniques are recent innovations, many aspects of how they are applied to text ranking are relatively well understood and represent mature techniques. However, there remain many open research questions, and thus in addition to laying out the foundations of pretrained transformers for text ranking, this survey also attempts to prognosticate where the field is heading. %K Computer Science, Information Retrieval, cs.IR,Computer Science, Computation and Language, cs.CL
[41]
P. Mandros, M. Boley, and J. Vreeken, “Discovering Dependencies with Reliable Mutual Information,” Knowledge and Information Systems, vol. 62, 2020.
Export
BibTeX
@article{Mandros2020, TITLE = {Discovering Dependencies with Reliable Mutual Information}, AUTHOR = {Mandros, Panagiotis and Boley, Mario and Vreeken, Jilles}, LANGUAGE = {eng}, ISSN = {0219-3116}, DOI = {10.1007/s10115-020-01494-9}, PUBLISHER = {Springer}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {Knowledge and Information Systems}, VOLUME = {62}, PAGES = {4223--4253}, }
Endnote
%0 Journal Article %A Mandros, Panagiotis %A Boley, Mario %A Vreeken, Jilles %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Discovering Dependencies with Reliable Mutual Information : %G eng %U http://hdl.handle.net/21.11116/0000-0006-DC90-F %R 10.1007/s10115-020-01494-9 %7 2020 %D 2020 %J Knowledge and Information Systems %V 62 %& 4223 %P 4223 - 4253 %I Springer %C New York, NY %@ false
[42]
S. Nag Chowdhury, W. Cheng, G. de Melo, S. Razniewski, and G. Weikum, “Illustrate Your Story: Enriching Text with Images,” in WSDM ’20, 13th International Conference on Web Search and Data Mining, Houston, TX, USA, 2020.
Export
BibTeX
@inproceedings{NagWSDM2020, TITLE = {Illustrate Your Story: {Enriching} Text with Images}, AUTHOR = {Nag Chowdhury, Sreyasi and Cheng, William and de Melo, Gerard and Razniewski, Simon and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {9781450368223}, DOI = {10.1145/3336191.3371866}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {WSDM '20, 13th International Conference on Web Search and Data Mining}, EDITOR = {Caverlee, James and Hu, Xia Ben}, PAGES = {849--852}, ADDRESS = {Houston, TX, USA}, }
Endnote
%0 Conference Proceedings %A Nag Chowdhury, Sreyasi %A Cheng, William %A de Melo, Gerard %A Razniewski, Simon %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Illustrate Your Story: Enriching Text with Images : %G eng %U http://hdl.handle.net/21.11116/0000-0006-A27C-8 %R 10.1145/3336191.3371866 %D 2020 %B 13th International Conference on Web Search and Data Mining %Z date of event: 2020-02-03 - 2020-02-07 %C Houston, TX, USA %B WSDM '20 %E Caverlee, James; Hu, Xia Ben %P 849 - 852 %I ACM %@ 9781450368223
[43]
T.-P. Nguyen, “Advanced Semantics for Commonsense Knowledge Extraction,” Universität des Saarlandes, Saarbrücken, 2020.
Abstract
Commonsense knowledge (CSK) about concepts and their properties is useful for AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB and others compiled large CSK collections, but are restricted in their expressiveness to subject-predicate-object (SPO) triples with simple concepts for S and monolithic strings for P and O. Also, these projects have either prioritized precision or recall, but hardly reconcile these complementary goals. This thesis presents a methodology, called Ascent, to automatically build a large-scale knowledge base (KB) of CSK assertions, with advanced expressiveness and both better precision and recall than prior works. Ascent goes beyond triples by capturing composite concepts with subgroups and aspects, and by refining assertions with semantic facets. The latter are important to express temporal and spatial validity of assertions and further qualifiers. Ascent combines open information extraction with judicious cleaning using language models. Intrinsic evaluation shows the superior size and quality of the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the benefits of Ascent.
Export
BibTeX
@mastersthesis{NguyenMSc2020, TITLE = {Advanced Semantics for Commonsense Knowledge Extraction}, AUTHOR = {Nguyen, Tuan-Phong}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, ABSTRACT = {Commonsense knowledge (CSK) about concepts and their properties is useful for AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB and others compiled large CSK collections, but are restricted in their expressiveness to subject-predicate-object (SPO) triples with simple concepts for S and monolithic strings for P and O. Also, these projects have either prioritized precision or recall, but hardly reconcile these complementary goals. This thesis presents a methodology, called Ascent, to automatically build a large-scale knowledge base (KB) of CSK assertions, with advanced expressiveness and both better precision and recall than prior works. Ascent goes beyond triples by capturing composite concepts with subgroups and aspects, and by refining assertions with semantic facets. The latter are important to express temporal and spatial validity of assertions and further qualifiers. Ascent combines open information extraction with judicious cleaning using language models. Intrinsic evaluation shows the superior size and quality of the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the benefits of Ascent.}, }
Endnote
%0 Thesis %A Nguyen, Tuan-Phong %Y Razniewski, Simon %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Advanced Semantics for Commonsense Knowledge Extraction : %G eng %U http://hdl.handle.net/21.11116/0000-0007-FED0-0 %I Universität des Saarlandes %C Saarbrücken %D 2020 %P 67 p. %V master %9 master %X Commonsense knowledge (CSK) about concepts and their properties is useful for AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB and others compiled large CSK collections, but are restricted in their expressiveness to subject-predicate-object (SPO) triples with simple concepts for S and monolithic strings for P and O. Also, these projects have either prioritized precision or recall, but hardly reconcile these complementary goals. This thesis presents a methodology, called Ascent, to automatically build a large-scale knowledge base (KB) of CSK assertions, with advanced expressiveness and both better precision and recall than prior works. Ascent goes beyond triples by capturing composite concepts with subgroups and aspects, and by refining assertions with semantic facets. The latter are important to express temporal and spatial validity of assertions and further qualifiers. Ascent combines open information extraction with judicious cleaning using language models. Intrinsic evaluation shows the superior size and quality of the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the benefits of Ascent.
[44]
T.-P. Nguyen, S. Razniewski, and G. Weikum, “Advanced Semantics for Commonsense Knowledge Extraction,” WWW 2021, 2020. [Online]. Available: https://arxiv.org/abs/2011.00905. (arXiv: 2011.00905)
Abstract
Commonsense knowledge (CSK) about concepts and their properties is useful for AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB and others compiled large CSK collections, but are restricted in their expressiveness to subject-predicate-object (SPO) triples with simple concepts for S and monolithic strings for P and O. Also, these projects have either prioritized precision or recall, but hardly reconcile these complementary goals. This paper presents a methodology, called Ascent, to automatically build a large-scale knowledge base (KB) of CSK assertions, with advanced expressiveness and both better precision and recall than prior works. Ascent goes beyond triples by capturing composite concepts with subgroups and aspects, and by refining assertions with semantic facets. The latter are important to express temporal and spatial validity of assertions and further qualifiers. Ascent combines open information extraction with judicious cleaning using language models. Intrinsic evaluation shows the superior size and quality of the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the benefits of Ascent.
Export
BibTeX
@online{Nguyen_2011.00905, TITLE = {Advanced Semantics for Commonsense Knowledge Extraction}, AUTHOR = {Nguyen, Tuan-Phong and Razniewski, Simon and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2011.00905}, EPRINT = {2011.00905}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Commonsense knowledge (CSK) about concepts and their properties is useful for AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB and others compiled large CSK collections, but are restricted in their expressiveness to subject-predicate-object (SPO) triples with simple concepts for S and monolithic strings for P and O. Also, these projects have either prioritized precision or recall, but hardly reconcile these complementary goals. This paper presents a methodology, called Ascent, to automatically build a large-scale knowledge base (KB) of CSK assertions, with advanced expressiveness and both better precision and recall than prior works. Ascent goes beyond triples by capturing composite concepts with subgroups and aspects, and by refining assertions with semantic facets. The latter are important to express temporal and spatial validity of assertions and further qualifiers. Ascent combines open information extraction with judicious cleaning using language models. Intrinsic evaluation shows the superior size and quality of the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the benefits of Ascent.}, JOURNAL = {WWW 2021}, }
Endnote
%0 Report %A Nguyen, Tuan-Phong %A Razniewski, Simon %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Advanced Semantics for Commonsense Knowledge Extraction : %G eng %U http://hdl.handle.net/21.11116/0000-0007-FEDA-6 %U https://arxiv.org/abs/2011.00905 %D 2020 %X Commonsense knowledge (CSK) about concepts and their properties is useful for AI applications such as robust chatbots. Prior works like ConceptNet, TupleKB and others compiled large CSK collections, but are restricted in their expressiveness to subject-predicate-object (SPO) triples with simple concepts for S and monolithic strings for P and O. Also, these projects have either prioritized precision or recall, but hardly reconcile these complementary goals. This paper presents a methodology, called Ascent, to automatically build a large-scale knowledge base (KB) of CSK assertions, with advanced expressiveness and both better precision and recall than prior works. Ascent goes beyond triples by capturing composite concepts with subgroups and aspects, and by refining assertions with semantic facets. The latter are important to express temporal and spatial validity of assertions and further qualifiers. Ascent combines open information extraction with judicious cleaning using language models. Intrinsic evaluation shows the superior size and quality of the Ascent KB, and an extrinsic evaluation for QA-support tasks underlines the benefits of Ascent. %K Computer Science, Artificial Intelligence, cs.AI,Computer Science, Computation and Language, cs.CL %J WWW 2021
[45]
A. Oláh, “What’s in the Box? Explaining Neural Networks with Robust Rules,” Universität des Saarlandes, Saarbrücken, 2020.
Export
BibTeX
@mastersthesis{olah:20:explainn, TITLE = {What's in the Box? Explaining Neural Networks with Robust Rules}, AUTHOR = {Ol{\'a}h, Anna}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, }
Endnote
%0 Thesis %A Oláh, Anna %Y Vreeken, Jilles %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T What's in the Box? Explaining Neural Networks with Robust Rules : %G eng %U http://hdl.handle.net/21.11116/0000-0007-FEFA-2 %I Universität des Saarlandes %C Saarbrücken %D 2020 %V master %9 master
[46]
K. Pal, V. T. Ho, and G. Weikum, “Co-Clustering Triples from Open Information Extraction,” in Proceedings of the 7th ACM IKDD CoDS and 25th COMAD (CoDS-COMAD 2020), Hyderabad, India, 2020.
Export
BibTeX
@inproceedings{Pal_CoDS2020, TITLE = {Co-Clustering Triples from Open Information Extraction}, AUTHOR = {Pal, Koninika and Ho, Vinh Thinh and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {9781450377386}, DOI = {10.1145/3371158.3371183}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the 7th ACM IKDD CoDS and 25th COMAD (CoDS-COMAD 2020)}, EDITOR = {Bhattacharya, Arnab and Natarajan, Sriraam and Saha Roy, Rishiraj}, PAGES = {190--194}, ADDRESS = {Hyderabad, India}, }
Endnote
%0 Conference Proceedings %A Pal, Koninika %A Ho, Vinh Thinh %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Co-Clustering Triples from Open Information Extraction : %G eng %U http://hdl.handle.net/21.11116/0000-0007-EBFC-5 %R 10.1145/3371158.3371183 %D 2020 %B ACM India Joint International Conferenceon Data Science and Management of Data %Z date of event: 2020-01-05 - 2020-01-07 %C Hyderabad, India %B Proceedings of the 7th ACM IKDD CoDS and 25th COMAD %E Bhattacharya, Arnab; Natarajan, Sriraam; Saha Roy, Rishiraj %P 190 - 194 %I ACM %@ 9781450377386
[47]
T. Pellissier Tanon, G. Weikum, and F. Suchanek, “YAGO 4: A Reason-able Knowledge Base,” in The Semantic Web (ESWC 2020), Heraklion, Greece, 2020.
Export
BibTeX
@inproceedings{Pellissier_ESCW2020, TITLE = {{YAGO 4}: {A} Reason-able Knowledge Base}, AUTHOR = {Pellissier Tanon, Thomas and Weikum, Gerhard and Suchanek, Fabian}, LANGUAGE = {eng}, ISBN = {978-3-030-49460-5}, DOI = {10.1007/978-3-030-49461-2_34}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {The Semantic Web (ESWC 2020)}, EDITOR = {Harth, Andreas and Kirrane, Sabrina and Ngonga Ngomo, Axel-Cyrille and Paulheim, Heiko and Rula, Anisa and Gentile, Anna Lisa and Haase, Peter and Cochez, Michael}, PAGES = {583 {\textbar}--596}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12123}, ADDRESS = {Heraklion, Greece}, }
Endnote
%0 Conference Proceedings %A Pellissier Tanon, Thomas %A Weikum, Gerhard %A Suchanek, Fabian %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T YAGO 4: A Reason-able Knowledge Base : %G eng %U http://hdl.handle.net/21.11116/0000-0007-EFC8-B %R 10.1007/978-3-030-49461-2_34 %D 2020 %B 17th Extended Semantic Web Conference %Z date of event: 2020-05-31 - 2020-06-04 %C Heraklion, Greece %B The Semantic Web %E Harth, Andreas; Kirrane, Sabrina; Ngonga Ngomo, Axel-Cyrille; Paulheim, Heiko; Rula, Anisa; Gentile, Anna Lisa; Haase, Peter; Cochez, Michael %P 583 | - 596 %I Springer %@ 978-3-030-49460-5 %B Lecture Notes in Computer Science %N 12123
[48]
F. Pennerath, P. Mandros, and J. Vreeken, “Discovering Approximate Functional Dependencies using Smoothed Mutual Information,” in KDD ’20, 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, Virtual Event, USA, 2020.
Export
BibTeX
@inproceedings{penerath:20:smooth, TITLE = {Discovering Approximate Functional Dependencies using Smoothed Mutual Information}, AUTHOR = {Pennerath, Fr{\'e}d{\'e}ric and Mandros, Panagiotis and Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-1-4503-7998-4}, DOI = {10.1145/3394486.3403178}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {KDD '20, 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining}, EDITOR = {Gupta, Rajesh and Liu, Yan and Tang, Jilaiang and Prakash, B. Aditya}, PAGES = {1254--1264}, ADDRESS = {Virtual Event, USA}, }
Endnote
%0 Conference Proceedings %A Pennerath, Frédéric %A Mandros, Panagiotis %A Vreeken, Jilles %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Discovering Approximate Functional Dependencies using Smoothed Mutual Information : %G eng %U http://hdl.handle.net/21.11116/0000-0008-2560-2 %R 10.1145/3394486.3403178 %D 2020 %B 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining %Z date of event: 2020-08-23 - 2020-08-27 %C Virtual Event, USA %B KDD '20 %E Gupta, Rajesh; Liu, Yan; Tang, Jilaiang; Prakash, B. Aditya %P 1254 - 1264 %I ACM %@ 978-1-4503-7998-4
[49]
S. Qiu, B. Xu, J. Zhang, Y. Wang, X. Shen, G. de Melo, C. Long, and X. Li, “EasyAug: An Automatic Textual Data Augmentation Platform for Classification Tasks,” in Companion of The World Wide Web Conference (WWW 2020), Taipei, Taiwan, 2020.
Export
BibTeX
@inproceedings{qiu2020easyaug, TITLE = {{EasyAug}: {An} Automatic Textual Data Augmentation Platform for Classification Tasks}, AUTHOR = {Qiu, Siyuan and Xu, Binxia and Zhang, Jie and Wang, Yafang and Shen, Xiaoyu and de Melo, Gerard and Long, Chong and Li, Xiaolong}, LANGUAGE = {eng}, ISBN = {978-1-4503-7024-0}, DOI = {10.1145/3366424.3383552}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Companion of The World Wide Web Conference (WWW 2020)}, EDITOR = {El Fallah, Amal and Sukthankar, Gita and Liu, Tie-Yan and van Steen, Maarten}, PAGES = {249--252}, ADDRESS = {Taipei, Taiwan}, }
Endnote
%0 Conference Proceedings %A Qiu, Siyuan %A Xu, Binxia %A Zhang, Jie %A Wang, Yafang %A Shen, Xiaoyu %A de Melo, Gerard %A Long, Chong %A Li, Xiaolong %+ External Organizations External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T EasyAug: An Automatic Textual Data Augmentation Platform for Classification Tasks : %G eng %U http://hdl.handle.net/21.11116/0000-0008-143B-0 %R 10.1145/3366424.3383552 %D 2020 %B The World Wide Web Conference %Z date of event: 2020-04-20 - 2020-04-24 %C Taipei, Taiwan %B Companion of The World Wide Web Conference %E El Fallah, Amal; Sukthankar, Gita; Liu, Tie-Yan; van Steen, Maarten %P 249 - 252 %I ACM %@ 978-1-4503-7024-0
[50]
N. H. Ramadhana, F. Darari, P. O. H. Putra, W. Nutt, S. Razniewski, and R. I. Akbar, “User-Centered Design for Knowledge Imbalance Analysis: A Case Study of ProWD,” in VOILA!2020, Fifth International Workshop on Visualization and Interaction for Ontologies and Linked Data, Virtual Conference, 2020.
Export
BibTeX
@inproceedings{Ramadhana_VOILA2020, TITLE = {User-Centered Design for Knowledge Imbalance Analysis: {A} Case Study of {ProWD}}, AUTHOR = {Ramadhana, Nadyah Hani and Darari, Fariz and Putra, Panca O. Hadi and Nutt, Werner and Razniewski, Simon and Akbar, Refo Ilmiya}, LANGUAGE = {eng}, ISSN = {1613-0073}, URL = {http://ceur-ws.org/Vol-2778/paper2.pdf; urn:nbn:de:0074-2778-8}, PUBLISHER = {ceur-ws.org}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {VOILA!2020, Fifth International Workshop on Visualization and Interaction for Ontologies and Linked Data}, EDITOR = {Ivanova, Valentina and Lambrix, Patrick and Pesquita, Catia and Wiens, Vitalis}, PAGES = {14--27}, EID = {2}, SERIES = {CEUR Workshop Proceedings}, VOLUME = {2778}, ADDRESS = {Virtual Conference}, }
Endnote
%0 Conference Proceedings %A Ramadhana, Nadyah Hani %A Darari, Fariz %A Putra, Panca O. Hadi %A Nutt, Werner %A Razniewski, Simon %A Akbar, Refo Ilmiya %+ External Organizations External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T User-Centered Design for Knowledge Imbalance Analysis: A Case Study of ProWD : %G eng %U http://hdl.handle.net/21.11116/0000-0008-063B-0 %U http://ceur-ws.org/Vol-2778/paper2.pdf %D 2020 %B Fifth International Workshop on Visualization and Interaction for Ontologies and Linked Data %Z date of event: 2020-11-02 - 2020-11-02 %C Virtual Conference %B VOILA!2020 %E Ivanova, Valentina; Lambrix, Patrick; Pesquita, Catia; Wiens, Vitalis %P 14 - 27 %Z sequence number: 2 %I ceur-ws.org %B CEUR Workshop Proceedings %N 2778 %@ false %U http://ceur-ws.org/Vol-2778/paper2.pdf
[51]
S. Razniewski and P. Das, “Structured Knowledge: Have We Made Progress? An Extrinsic Study of KB Coverage over 19 Years,” in CIKM ’20, 29th ACM International Conference on Information & Knowledge Management, Virtual Event, Ireland, 2020.
Abstract
Structured world knowledge is at the foundation of knowledge-centric AI applications. Despite considerable research on knowledge base construction, beyond mere statement counts, little is known about the progress of KBs, in particular concerning their coverage, and one may wonder whether there is constant progress, or diminishing returns. In this paper we employ question answering and entity summarization as extrinsic use cases for a longitudinal study of the progress of KB coverage. Our analysis shows a near-continuous improvement of two popular KBs, DBpedia and Wikidata, over the last 19 years, with little signs of flattening out or leveling off.
Export
BibTeX
@inproceedings{razniewski2020structured, TITLE = {Structured Knowledge: {H}ave We Made Progress? {A}n Extrinsic Study of {KB} Coverage over 19 Years}, AUTHOR = {Razniewski, Simon and Das, Priyanka}, LANGUAGE = {eng}, ISBN = {978-1-4503-6859-9}, DOI = {10.1145/3340531.3417447}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, ABSTRACT = {Structured world knowledge is at the foundation of knowledge-centric AI applications. Despite considerable research on knowledge base construction, beyond mere statement counts, little is known about the progress of KBs, in particular concerning their coverage, and one may wonder whether there is constant progress, or diminishing returns. In this paper we employ question answering and entity summarization as extrinsic use cases for a longitudinal study of the progress of KB coverage. Our analysis shows a near-continuous improvement of two popular KBs, DBpedia and Wikidata, over the last 19 years, with little signs of flattening out or leveling off.}, BOOKTITLE = {CIKM '20, 29th ACM International Conference on Information \& Knowledge Management}, EDITOR = {d{\textquoteright}Aquin, Mathieu and Dietze, Stefan}, PAGES = {3317--3320}, ADDRESS = {Virtual Event, Ireland}, }
Endnote
%0 Conference Proceedings %A Razniewski, Simon %A Das, Priyanka %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Structured Knowledge: Have We Made Progress? An Extrinsic Study of KB Coverage over 19 Years : %G eng %U http://hdl.handle.net/21.11116/0000-0007-FF42-0 %R 10.1145/3340531.3417447 %D 2020 %B 29th ACM International Conference on Information & Knowledge Management %Z date of event: 2020-10-19 - 2020-10-23 %C Virtual Event, Ireland %X Structured world knowledge is at the foundation of knowledge-centric AI applications. Despite considerable research on knowledge base construction, beyond mere statement counts, little is known about the progress of KBs, in particular concerning their coverage, and one may wonder whether there is constant progress, or diminishing returns. In this paper we employ question answering and entity summarization as extrinsic use cases for a longitudinal study of the progress of KB coverage. Our analysis shows a near-continuous improvement of two popular KBs, DBpedia and Wikidata, over the last 19 years, with little signs of flattening out or leveling off. %B CIKM '20 %E d’Aquin, Mathieu; Dietze, Stefan %P 3317 - 3320 %I ACM %@ 978-1-4503-6859-9
[52]
J. Romero and S. Razniewski, “Inside Quasimodo: Exploring Construction and Usage of Commonsense Knowledge,” in CIKM ’20, 29th ACM International Conference on Information & Knowledge Management, Virtual Event, Ireland, 2020.
Abstract
Structured world knowledge is at the foundation of knowledge-centric AI applications. Despite considerable research on knowledge base construction, beyond mere statement counts, little is known about the progress of KBs, in particular concerning their coverage, and one may wonder whether there is constant progress, or diminishing returns. In this paper we employ question answering and entity summarization as extrinsic use cases for a longitudinal study of the progress of KB coverage. Our analysis shows a near-continuous improvement of two popular KBs, DBpedia and Wikidata, over the last 19 years, with little signs of flattening out or leveling off.
Export
BibTeX
@inproceedings{Romero_CIKM2020, TITLE = {Inside {Quasimodo}: {E}xploring Construction and Usage of Commonsense Knowledge}, AUTHOR = {Romero, Julien and Razniewski, Simon}, LANGUAGE = {eng}, ISBN = {978-1-4503-6859-9}, DOI = {10.1145/3340531.3417416}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, ABSTRACT = {Structured world knowledge is at the foundation of knowledge-centric AI applications. Despite considerable research on knowledge base construction, beyond mere statement counts, little is known about the progress of KBs, in particular concerning their coverage, and one may wonder whether there is constant progress, or diminishing returns. In this paper we employ question answering and entity summarization as extrinsic use cases for a longitudinal study of the progress of KB coverage. Our analysis shows a near-continuous improvement of two popular KBs, DBpedia and Wikidata, over the last 19 years, with little signs of flattening out or leveling off.}, BOOKTITLE = {CIKM '20, 29th ACM International Conference on Information \& Knowledge Management}, EDITOR = {d{\textquoteright}Aquin, Mathieu and Dietze, Stefan}, PAGES = {3445--3448}, ADDRESS = {Virtual Event, Ireland}, }
Endnote
%0 Conference Proceedings %A Romero, Julien %A Razniewski, Simon %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Inside Quasimodo: Exploring Construction and Usage of Commonsense Knowledge : %G eng %U http://hdl.handle.net/21.11116/0000-0008-04C6-4 %R 10.1145/3340531.3417416 %D 2020 %B 29th ACM International Conference on Information & Knowledge Management %Z date of event: 2020-10-19 - 2020-10-23 %C Virtual Event, Ireland %X Structured world knowledge is at the foundation of knowledge-centric AI applications. Despite considerable research on knowledge base construction, beyond mere statement counts, little is known about the progress of KBs, in particular concerning their coverage, and one may wonder whether there is constant progress, or diminishing returns. In this paper we employ question answering and entity summarization as extrinsic use cases for a longitudinal study of the progress of KB coverage. Our analysis shows a near-continuous improvement of two popular KBs, DBpedia and Wikidata, over the last 19 years, with little signs of flattening out or leveling off. %B CIKM '20 %E d’Aquin, Mathieu; Dietze, Stefan %P 3445 - 3448 %I ACM %@ 978-1-4503-6859-9
[53]
R. Saha Roy and A. Anand, “Question Answering over Curated and Open Web Sources,” in SIGIR ’20, 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, China, 2020.
Export
BibTeX
@inproceedings{SahaRoy_SIGIR20, TITLE = {Question Answering over Curated and Open Web Sources}, AUTHOR = {Saha Roy, Rishiraj and Anand, Avishek}, LANGUAGE = {eng}, ISBN = {9781450380164}, DOI = {10.1145/3397271.3401421}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {SIGIR '20, 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval}, PAGES = {2432--2435}, ADDRESS = {Virtual Event, China}, }
Endnote
%0 Conference Proceedings %A Saha Roy, Rishiraj %A Anand, Avishek %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Question Answering over Curated and Open Web Sources : %G eng %U http://hdl.handle.net/21.11116/0000-0008-02F6-0 %R 10.1145/3397271.3401421 %D 2020 %B 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval %Z date of event: 2020-07-25 - 2020-07-30 %C Virtual Event, China %B SIGIR '20 %P 2432 - 2435 %I ACM %@ 9781450380164
[54]
R. Saha Roy and A. Anand, “Question Answering over Curated and Open Web Sources,” 2020. [Online]. Available: https://arxiv.org/abs/2004.11980. (arXiv: 2004.11980)
Abstract
The last few years have seen an explosion of research on the topic of automated question answering (QA), spanning the communities of information retrieval, natural language processing, and artificial intelligence. This tutorial would cover the highlights of this really active period of growth for QA to give the audience a grasp over the families of algorithms that are currently being used. We partition research contributions by the underlying source from where answers are retrieved: curated knowledge graphs, unstructured text, or hybrid corpora. We choose this dimension of partitioning as it is the most discriminative when it comes to algorithm design. Other key dimensions are covered within each sub-topic: like the complexity of questions addressed, and degrees of explainability and interactivity introduced in the systems. We would conclude the tutorial with the most promising emerging trends in the expanse of QA, that would help new entrants into this field make the best decisions to take the community forward. Much has changed in the community since the last tutorial on QA in SIGIR 2016, and we believe that this timely overview will indeed benefit a large number of conference participants.
Export
BibTeX
@online{SahaRoy2004.11980, TITLE = {Question Answering over Curated and Open Web Sources}, AUTHOR = {Saha Roy, Rishiraj and Anand, Avishek}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2004.11980}, EPRINT = {2004.11980}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {The last few years have seen an explosion of research on the topic of automated question answering (QA), spanning the communities of information retrieval, natural language processing, and artificial intelligence. This tutorial would cover the highlights of this really active period of growth for QA to give the audience a grasp over the families of algorithms that are currently being used. We partition research contributions by the underlying source from where answers are retrieved: curated knowledge graphs, unstructured text, or hybrid corpora. We choose this dimension of partitioning as it is the most discriminative when it comes to algorithm design. Other key dimensions are covered within each sub-topic: like the complexity of questions addressed, and degrees of explainability and interactivity introduced in the systems. We would conclude the tutorial with the most promising emerging trends in the expanse of QA, that would help new entrants into this field make the best decisions to take the community forward. Much has changed in the community since the last tutorial on QA in SIGIR 2016, and we believe that this timely overview will indeed benefit a large number of conference participants.}, }
Endnote
%0 Report %A Saha Roy, Rishiraj %A Anand, Avishek %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Question Answering over Curated and Open Web Sources : %G eng %U http://hdl.handle.net/21.11116/0000-0008-09CA-B %U https://arxiv.org/abs/2004.11980 %D 2020 %X The last few years have seen an explosion of research on the topic of automated question answering (QA), spanning the communities of information retrieval, natural language processing, and artificial intelligence. This tutorial would cover the highlights of this really active period of growth for QA to give the audience a grasp over the families of algorithms that are currently being used. We partition research contributions by the underlying source from where answers are retrieved: curated knowledge graphs, unstructured text, or hybrid corpora. We choose this dimension of partitioning as it is the most discriminative when it comes to algorithm design. Other key dimensions are covered within each sub-topic: like the complexity of questions addressed, and degrees of explainability and interactivity introduced in the systems. We would conclude the tutorial with the most promising emerging trends in the expanse of QA, that would help new entrants into this field make the best decisions to take the community forward. Much has changed in the community since the last tutorial on QA in SIGIR 2016, and we believe that this timely overview will indeed benefit a large number of conference participants. %K Computer Science, Information Retrieval, cs.IR,Computer Science, Computation and Language, cs.CL
[55]
V. Sathya, S. Ghosh, A. Ramamurthy, and B. R. Tamma, “Small Cell Planning: Resource Management and Interference Mitigation Mechanisms in LTE HetNets,” Wireless Personal Communications, vol. 115, 2020.
Export
BibTeX
@article{Sathya2020, TITLE = {Small Cell Planning: {R}esource Management and Interference Mitigation Mechanisms in {LTE HetNets}}, AUTHOR = {Sathya, Vanlin and Ghosh, Shrestha and Ramamurthy, Arun and Tamma, Bheemarjuna Reddy}, LANGUAGE = {eng}, ISSN = {0929-6212}, DOI = {10.1007/s11277-020-07574-x}, PUBLISHER = {Springer}, ADDRESS = {New York, NY}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {Wireless Personal Communications}, VOLUME = {115}, PAGES = {335--361}, }
Endnote
%0 Journal Article %A Sathya, Vanlin %A Ghosh, Shrestha %A Ramamurthy, Arun %A Tamma, Bheemarjuna Reddy %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Small Cell Planning: Resource Management and Interference Mitigation Mechanisms in LTE HetNets : %G eng %U http://hdl.handle.net/21.11116/0000-0006-B963-A %R 10.1007/s11277-020-07574-x %7 2020 %D 2020 %J Wireless Personal Communications %V 115 %& 335 %P 335 - 361 %I Springer %C New York, NY %@ false
[56]
X. Shen, E. Chang, H. Su, C. Niu, and D. Klakow, “Neural Data-to-Text Generation via Jointly Learning the Segmentation and Correspondence,” in The 58th Annual Meeting of the Association for Computational Linguistics (ACL 2020), 2020.
Export
BibTeX
@inproceedings{shen2020neural, TITLE = {Neural Data-to-Text Generation via Jointly Learning the Segmentation and Correspondence}, AUTHOR = {Shen, Xiaoyu and Chang, Ernie and Su, Hui and Niu, Cheng and Klakow, Dietrich}, LANGUAGE = {eng}, ISBN = {978-1-952148-25-5}, URL = {https://www.aclweb.org/anthology/2020.acl-main.641}, DOI = {10.18653/v1/2020.acl-main.641}, PUBLISHER = {ACL}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {The 58th Annual Meeting of the Association for Computational Linguistics (ACL 2020)}, EDITOR = {Jurafsky, Dan and Chai, Joyce and Schluter, Natalie and Tetreault, Joel}, PAGES = {7155--7165}, }
Endnote
%0 Conference Proceedings %A Shen, Xiaoyu %A Chang, Ernie %A Su, Hui %A Niu, Cheng %A Klakow, Dietrich %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations %T Neural Data-to-Text Generation via Jointly Learning the Segmentation and Correspondence : %G eng %U http://hdl.handle.net/21.11116/0000-0008-141B-4 %U https://www.aclweb.org/anthology/2020.acl-main.641 %R 10.18653/v1/2020.acl-main.641 %D 2020 %B 58th Annual Meeting of the Association for Computational Linguistics %Z date of event: 2020-07-05 - 2020-07-10 %B The 58th Annual Meeting of the Association for Computational Linguistics %E Jurafsky, Dan; Chai, Joyce; Schluter, Natalie; Tetreault, Joel %P 7155 - 7165 %I ACL %@ 978-1-952148-25-5
[57]
H. Su, X. Shen, S. Zhao, Z. Xiao, P. Hu, C. Niu, and J. Zhou, “Diversifying Dialogue Generation with Non-Conversational Text,” in The 58th Annual Meeting of the Association for Computational Linguistics (ACL 2020), 2020.
Export
BibTeX
@inproceedings{su2020diversifying, TITLE = {Diversifying Dialogue Generation with Non-Conversational Text}, AUTHOR = {Su, Hui and Shen, Xiaoyu and Zhao, Sanqiang and Xiao, Zhou and Hu, Pengwei and Niu, Cheng and Zhou, Jie}, LANGUAGE = {eng}, ISBN = {978-1-952148-25-5}, URL = {https://www.aclweb.org/anthology/2020.acl-main.634}, DOI = {10.18653/v1/2020.acl-main.634}, PUBLISHER = {ACL}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {The 58th Annual Meeting of the Association for Computational Linguistics (ACL 2020)}, EDITOR = {Jurafsky, Dan and Chai, Joyce and Schluter, Natalie and Tetreault, Joel}, PAGES = {7087--7097}, }
Endnote
%0 Conference Proceedings %A Su, Hui %A Shen, Xiaoyu %A Zhao, Sanqiang %A Xiao, Zhou %A Hu, Pengwei %A Niu, Cheng %A Zhou, Jie %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations %T Diversifying Dialogue Generation with Non-Conversational Text : %G eng %U http://hdl.handle.net/21.11116/0000-0008-14AF-D %U https://www.aclweb.org/anthology/2020.acl-main.634 %R 10.18653/v1/2020.acl-main.634 %D 2020 %B 58th Annual Meeting of the Association for Computational Linguistics %Z date of event: 2020-07-05 - 2020-07-10 %B The 58th Annual Meeting of the Association for Computational Linguistics %E Jurafsky, Dan; Chai, Joyce; Schluter, Natalie; Tetreault, Joel %P 7087 - 7097 %I ACL %@ 978-1-952148-25-5
[58]
S. Sukarieh, “SPRAP: Detecting Opinion Spam Campaigns in Online Rating Services,” Universität des Saarlandes, Saarbrücken, 2020.
Export
BibTeX
@mastersthesis{sukarieh:20:sprap, TITLE = {{SPRAP}: Detecting Opinion Spam Campaigns in Online Rating Services}, AUTHOR = {Sukarieh, Sandra}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, }
Endnote
%0 Thesis %A Sukarieh, Sandra %Y Vreeken, Jilles %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T SPRAP: Detecting Opinion Spam Campaigns in Online Rating Services : %G eng %U http://hdl.handle.net/21.11116/0000-0007-FF00-A %I Universität des Saarlandes %C Saarbrücken %D 2020 %V master %9 master
[59]
C. Sutton, M. Boley, L. Ghiringhelli, M. Rupp, J. Vreeken, and M. Scheffler,, “Identifying Domains of Applicability of Machine Learning Models for Materials Science,” Nature Communications, vol. 11, 2020.
Export
BibTeX
@article{sutton:20:natcomm, TITLE = {Identifying Domains of Applicability of Machine Learning Models for Materials Science}, AUTHOR = {Sutton, Chris and Boley, Mario and Ghiringhelli, Luca and Rupp, Matthias and Vreeken, Jilles and Scheffler,, Matthias}, LANGUAGE = {eng}, ISSN = {2041-1723}, DOI = {10.1038/s41467-020-17112-9}, PUBLISHER = {Nature Publishing Group}, ADDRESS = {London}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {Nature Communications}, VOLUME = {11}, EID = {4428}, }
Endnote
%0 Journal Article %A Sutton, Chris %A Boley, Mario %A Ghiringhelli, Luca %A Rupp, Matthias %A Vreeken, Jilles %A Scheffler,, Matthias %+ External Organizations External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Identifying Domains of Applicability of Machine Learning Models for Materials Science : %G eng %U http://hdl.handle.net/21.11116/0000-0008-26CF-5 %R 10.1038/s41467-020-17112-9 %7 2020 %D 2020 %J Nature Communications %O Nat. Commun. %V 11 %Z sequence number: 4428 %I Nature Publishing Group %C London %@ false
[60]
E. Terolli, P. Ernst, and G. Weikum, “Focused Query Expansion with Entity Cores for Patient-Centric Health Search,” in The Semantic Web -- ISWC 2020, Athens, Greece (Virtual Conference), 2020.
Export
BibTeX
@inproceedings{Terolli_ISWC2020, TITLE = {Focused Query Expansion with Entity Cores for Patient-Centric Health Search}, AUTHOR = {Terolli, Erisa and Ernst, Patrick and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-3-030-62418-7}, DOI = {10.1007/978-3-030-62419-4_31}, PUBLISHER = {Springer}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, BOOKTITLE = {The Semantic Web -- ISWC 2020}, EDITOR = {Pan, Jeff Z. and Tamma, Valentina and D'Amato, Claudia and Janowicz, Krzysztof and Fu, Bo and Polleres, Axel and Seneviratne, Oshani and Kagal, Lalana}, PAGES = {547--564}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {12506}, ADDRESS = {Athens, Greece (Virtual Conference)}, }
Endnote
%0 Conference Proceedings %A Terolli, Erisa %A Ernst, Patrick %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Focused Query Expansion with Entity Cores for Patient-Centric Health Search : %G eng %U http://hdl.handle.net/21.11116/0000-0007-78D7-0 %R 10.1007/978-3-030-62419-4_31 %D 2020 %B 19th International Semantic Web Conference %Z date of event: 2020-11-02 - 2020-11-06 %C Athens, Greece (Virtual Conference) %B The Semantic Web -- ISWC 2020 %E Pan, Jeff Z.; Tamma, Valentina; D'Amato, Claudia; Janowicz, Krzysztof; Fu, Bo; Polleres, Axel; Seneviratne, Oshani; Kagal, Lalana %P 547 - 564 %I Springer %@ 978-3-030-62418-7 %B Lecture Notes in Computer Science %N 12506
[61]
A. Tigunova, A. Yates, P. Mirza, and G. Weikum, “CHARM: Inferring Personal Attributes from Conversations,” in The 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP 2020), Online, 2020.
Export
BibTeX
@inproceedings{Tigunova_EMNLP20, TITLE = {{CHARM}: {I}nferring Personal Attributes from Conversations}, AUTHOR = {Tigunova, Anna and Yates, Andrew and Mirza, Paramita and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-952148-60-6}, URL = {https://www.aclweb.org/anthology/2020.emnlp-main.434}, DOI = {10.18653/v1/2020.emnlp-main.434}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {The 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP 2020)}, EDITOR = {Webber, Bonnie and Cohn, Trevor and He, Yulan and Liu, Yang}, PAGES = {5391--5404}, ADDRESS = {Online}, }
Endnote
%0 Conference Proceedings %A Tigunova, Anna %A Yates, Andrew %A Mirza, Paramita %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T CHARM: Inferring Personal Attributes from Conversations : %G eng %U http://hdl.handle.net/21.11116/0000-0007-EEDB-7 %U https://www.aclweb.org/anthology/2020.emnlp-main.434 %R 10.18653/v1/2020.emnlp-main.434 %D 2020 %B Conference on Empirical Methods in Natural Language Processing %Z date of event: 2020-11-16 - 2020-11-20 %C Online %B The 2020 Conference on Empirical Methods in Natural Language Processing %E Webber, Bonnie; Cohn, Trevor; He, Yulan; Liu, Yang %P 5391 - 5404 %I ACM %@ 978-1-952148-60-6 %U https://www.aclweb.org/anthology/2020.emnlp-main.434.pdf
[62]
A. Tigunova, P. Mirza, A. Yates, and G. Weikum, “RedDust: a Large Reusable Dataset of Reddit User Traits,” in Twelfth Language Resources and Evaluation Conference (LREC 2020), Marseille, France, 2020.
Export
BibTeX
@inproceedings{Tigunova_ELREC20, TITLE = {{RedDust}: a Large Reusable Dataset of {Reddit} User Traits}, AUTHOR = {Tigunova, Anna and Mirza, Paramita and Yates, Andrew and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {979-10-95546-34-4}, URL = {https://www.aclweb.org/anthology/2020.lrec-1.751}, PUBLISHER = {ELRA}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Twelfth Language Resources and Evaluation Conference (LREC 2020)}, EDITOR = {Calzolari, Nicoletta and B{\'e}chet, Fr{\'e}d{\'e}ric and Blache, Philippe and Choukri, Khalid and Cieri, Christopher and Declerck, Thierry and Goggi, Sara and Mariani, Joseph and Mazo, H{\'e}l{\`e}ne and Moreno, Asuncion and Odiik, Jan and Piperidis, Stelios}, PAGES = {6118--6126}, ADDRESS = {Marseille, France}, }
Endnote
%0 Conference Proceedings %A Tigunova, Anna %A Mirza, Paramita %A Yates, Andrew %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T RedDust: a Large Reusable Dataset of Reddit User Traits : %G eng %U http://hdl.handle.net/21.11116/0000-0007-F0A9-B %U https://www.aclweb.org/anthology/2020.lrec-1.751 %D 2020 %B 12th Language Resources and Evaluation Conference %Z date of event: 2020-05-11 - 2020-05-16 %C Marseille, France %B Twelfth Language Resources and Evaluation Conference %E Calzolari, Nicoletta; Béchet, Frédéric; Blache, Philippe; Choukri, Khalid; Cieri, Christopher; Declerck, Thierry; Goggi, Sara; Mariani, Joseph; Mazo, Hélène; Moreno, Asuncion; Odiik, Jan; Piperidis, Stelios %P 6118 - 6126 %I ELRA %@ 979-10-95546-34-4 %U https://www.aclweb.org/anthology/2020.lrec-1.751.pdf
[63]
A. Tigunova, “Extracting Personal Information from Conversations,” in Companion of The World Wide Web Conference (WWW 2020), Taipei, Taiwan, 2020.
Export
BibTeX
@inproceedings{tigunova2020extracting, TITLE = {Extracting Personal Information from Conversations}, AUTHOR = {Tigunova, Anna}, LANGUAGE = {eng}, ISBN = {978-1-4503-7024-0}, DOI = {10.1145/3366424.3382089}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Companion of The World Wide Web Conference (WWW 2020)}, EDITOR = {El Fallah, Amal and Sukthankar, Gita and Liu, Tie-Yan and van Steen, Maarten}, PAGES = {284--288}, ADDRESS = {Taipei, Taiwan}, }
Endnote
%0 Conference Proceedings %A Tigunova, Anna %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Extracting Personal Information from Conversations : %G eng %U http://hdl.handle.net/21.11116/0000-0007-F845-4 %R 10.1145/3366424.3382089 %D 2020 %B The World Wide Web Conference %Z date of event: 2020-04-20 - 2020-04-24 %C Taipei, Taiwan %B Companion of The World Wide Web Conference %E El Fallah, Amal; Sukthankar, Gita; Liu, Tie-Yan; van Steen, Maarten %P 284 - 288 %I ACM %@ 978-1-4503-7024-0
[64]
G. H. Torbati, A. Yates, and G. Weikum, “Personalized Entity Search by Sparse and Scrutable User Profiles,” in CHIIR ’20, Fifth ACM SIGIR Conference on Human Information Interaction and Retrieval, Vancouver, BC, Canada, 2020.
Export
BibTeX
@inproceedings{CHIIR2020Torbati, TITLE = {Personalized Entity Search by Sparse and Scrutable User Profiles}, AUTHOR = {Torbati, Ghazaleh Haratinezhad and Yates, Andrew and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {9781450368926}, DOI = {10.1145/3343413.3378011}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {CHIIR '20, Fifth ACM SIGIR Conference on Human Information Interaction and Retrieval}, EDITOR = {O'Brain, Heather and Freund, Luanne}, PAGES = {427--431}, ADDRESS = {Vancouver, BC, Canada}, }
Endnote
%0 Conference Proceedings %A Torbati, Ghazaleh Haratinezhad %A Yates, Andrew %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Personalized Entity Search by Sparse and Scrutable User Profiles : %G eng %U http://hdl.handle.net/21.11116/0000-0007-EAD7-F %R 10.1145/3343413.3378011 %D 2020 %B Fifth ACM SIGIR Conference on Human Information Interaction and Retrieval %Z date of event: 2020-03-14 - 2020-03-18 %C Vancouver, BC, Canada %B CHIIR '20 %E O'Brain, Heather; Freund, Luanne %P 427 - 431 %I ACM %@ 9781450368926
[65]
T.-K. Tran, M. H. Gad-Elrab, D. Stepanova, E. Kharlamov, and J. Strötgen, “Fast Computation of Explanations for Inconsistency in Large-Scale Knowledge Graphs,” in Companion of The World Wide Web Conference (WWW 2020), Taipei, Taiwan, 2020.
Export
BibTeX
@inproceedings{DBLP:conf/www/TranG0KS20, TITLE = {Fast Computation of Explanations for Inconsistency in Large-Scale Knowledge Graphs}, AUTHOR = {Tran, Trung-Kien and Gad-Elrab, Mohamed Hassan and Stepanova, Daria and Kharlamov, Evgeny and Str{\"o}tgen, Jannik}, LANGUAGE = {eng}, ISBN = {978-1-4503-7024-0}, DOI = {10.1145/3366423.3380014}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Companion of The World Wide Web Conference (WWW 2020)}, EDITOR = {El Fallah, Amal and Sukthankar, Gita and Liu, Tie-Yan and van Steen, Maarten}, PAGES = {2613--2619}, ADDRESS = {Taipei, Taiwan}, }
Endnote
%0 Conference Proceedings %A Tran, Trung-Kien %A Gad-Elrab, Mohamed Hassan %A Stepanova, Daria %A Kharlamov, Evgeny %A Strötgen, Jannik %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T Fast Computation of Explanations for Inconsistency in Large-Scale Knowledge Graphs : %G eng %U http://hdl.handle.net/21.11116/0000-0007-F861-4 %R 10.1145/3366423.3380014 %D 2020 %B The World Wide Web Conference %Z date of event: 2020-04-20 - 2020-04-24 %C Taipei, Taiwan %B Companion of The World Wide Web Conference %E El Fallah, Amal; Sukthankar, Gita; Liu, Tie-Yan; van Steen, Maarten %P 2613 - 2619 %I ACM %@ 978-1-4503-7024-0
[66]
L. Wang, X. Shen, G. de Melo, and G. Weikum, “Cross-Domain Learning for Classifying Propaganda in Online Contents,” in Proceedings of the 2020 Truth and Trust Online Conference (TTO 2020), Virtual, 2020.
Export
BibTeX
@inproceedings{Wang_TTO2020, TITLE = {Cross-Domain Learning for Classifying Propaganda in Online Contents}, AUTHOR = {Wang, Liqiang and Shen, Xiaoyu and de Melo, Gerard and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-7359904-0-8}, URL = {https://truthandtrustonline.com/wp-content/uploads/2020/10/TTO03.pdf}, PUBLISHER = {Hacks Hackers}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the 2020 Truth and Trust Online Conference (TTO 2020)}, EDITOR = {De Cristofaro, Emiliano and Nakov, Preslav}, PAGES = {21--31}, ADDRESS = {Virtual}, }
Endnote
%0 Conference Proceedings %A Wang, Liqiang %A Shen, Xiaoyu %A de Melo, Gerard %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Cross-Domain Learning for Classifying Propaganda in Online Contents : %G eng %U http://hdl.handle.net/21.11116/0000-0007-F169-3 %U https://truthandtrustonline.com/wp-content/uploads/2020/10/TTO03.pdf %D 2020 %B Truth and Trust Online Conference %Z date of event: 2020-10-16 - 2020-10-17 %C Virtual %B Proceedings of the 2020 Truth and Trust Online Conference %E De Cristofaro, Emiliano; Nakov, Preslav %P 21 - 31 %I Hacks Hackers %@ 978-1-7359904-0-8 %U https://truthandtrustonline.com/wp-content/uploads/2020/10/TTO03.pdf
[67]
L. Wang, X. Shen, G. de Melo, and G. Weikum, “Cross-Domain Learning for Classifying Propaganda in Online Contents,” 2020. [Online]. Available: https://arxiv.org/abs/2011.06844. (arXiv: 2011.06844)
Abstract
As news and social media exhibit an increasing amount of manipulative polarized content, detecting such propaganda has received attention as a new task for content analysis. Prior work has focused on supervised learning with training data from the same domain. However, as propaganda can be subtle and keeps evolving, manual identification and proper labeling are very demanding. As a consequence, training data is a major bottleneck. In this paper, we tackle this bottleneck and present an approach to leverage cross-domain learning, based on labeled documents and sentences from news and tweets, as well as political speeches with a clear difference in their degrees of being propagandistic. We devise informative features and build various classifiers for propaganda labeling, using cross-domain learning. Our experiments demonstrate the usefulness of this approach, and identify difficulties and limitations in various configurations of sources and targets for the transfer step. We further analyze the influence of various features, and characterize salient indicators of propaganda.
Export
BibTeX
@online{Wang_2011.06844, TITLE = {Cross-Domain Learning for Classifying Propaganda in Online Contents}, AUTHOR = {Wang, Liqiang and Shen, Xiaoyu and de Melo, Gerard and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2011.06844}, EPRINT = {2011.06844}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {As news and social media exhibit an increasing amount of manipulative polarized content, detecting such propaganda has received attention as a new task for content analysis. Prior work has focused on supervised learning with training data from the same domain. However, as propaganda can be subtle and keeps evolving, manual identification and proper labeling are very demanding. As a consequence, training data is a major bottleneck. In this paper, we tackle this bottleneck and present an approach to leverage cross-domain learning, based on labeled documents and sentences from news and tweets, as well as political speeches with a clear difference in their degrees of being propagandistic. We devise informative features and build various classifiers for propaganda labeling, using cross-domain learning. Our experiments demonstrate the usefulness of this approach, and identify difficulties and limitations in various configurations of sources and targets for the transfer step. We further analyze the influence of various features, and characterize salient indicators of propaganda.}, }
Endnote
%0 Report %A Wang, Liqiang %A Shen, Xiaoyu %A de Melo, Gerard %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Cross-Domain Learning for Classifying Propaganda in Online Contents : %G eng %U http://hdl.handle.net/21.11116/0000-0007-FEBF-5 %U https://arxiv.org/abs/2011.06844 %D 2020 %X As news and social media exhibit an increasing amount of manipulative polarized content, detecting such propaganda has received attention as a new task for content analysis. Prior work has focused on supervised learning with training data from the same domain. However, as propaganda can be subtle and keeps evolving, manual identification and proper labeling are very demanding. As a consequence, training data is a major bottleneck. In this paper, we tackle this bottleneck and present an approach to leverage cross-domain learning, based on labeled documents and sentences from news and tweets, as well as political speeches with a clear difference in their degrees of being propagandistic. We devise informative features and build various classifiers for propaganda labeling, using cross-domain learning. Our experiments demonstrate the usefulness of this approach, and identify difficulties and limitations in various configurations of sources and targets for the transfer step. We further analyze the influence of various features, and characterize salient indicators of propaganda. %K Computer Science, Computation and Language, cs.CL
[68]
G. Weikum, “Entities with Quantities,” Bulletin of the Technical Committee on Data Engineering, vol. 43, no. 1, 2020.
Export
BibTeX
@article{Weikum_Entities2020, TITLE = {Entities with Quantities}, AUTHOR = {Weikum, Gerhard}, LANGUAGE = {eng}, URL = {http://sites.computer.org/debull/A20mar/p4.pdf}, PUBLISHER = {IEEE Computer Society}, ADDRESS = {Los Alamitos, CA}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, JOURNAL = {Bulletin of the Technical Committee on Data Engineering}, VOLUME = {43}, NUMBER = {1}, PAGES = {4--8}, }
Endnote
%0 Journal Article %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Entities with Quantities : %G eng %U http://hdl.handle.net/21.11116/0000-0007-EBBB-E %U http://sites.computer.org/debull/A20mar/p4.pdf %7 2020 %D 2020 %J Bulletin of the Technical Committee on Data Engineering %V 43 %N 1 %& 4 %P 4 - 8 %I IEEE Computer Society %C Los Alamitos, CA
[69]
G. Weikum, L. Dong, S. Razniewski, and F. Suchanek, “Machine Knowledge: Creation and Curation of Comprehensive Knowledge Bases,” 2020. [Online]. Available: https://arxiv.org/abs/2009.11564. (arXiv: 2009.11564)
Abstract
Equipping machines with comprehensive knowledge of the world's entities and their relationships has been a long-standing goal of AI. Over the last decade, large-scale knowledge bases, also known as knowledge graphs, have been automatically constructed from web contents and text sources, and have become a key asset for search engines. This machine knowledge can be harnessed to semantically interpret textual phrases in news, social media and web tables, and contributes to question answering, natural language processing and data analytics. This article surveys fundamental concepts and practical methods for creating and curating large knowledge bases. It covers models and methods for discovering and canonicalizing entities and their semantic types and organizing them into clean taxonomies. On top of this, the article discusses the automatic extraction of entity-centric properties. To support the long-term life-cycle and the quality assurance of machine knowledge, the article presents methods for constructing open schemas and for knowledge curation. Case studies on academic projects and industrial knowledge graphs complement the survey of concepts and methods.
Export
BibTeX
@online{Weikum_2009.11564, TITLE = {Machine Knowledge: {C}reation and Curation of Comprehensive Knowledge Bases}, AUTHOR = {Weikum, Gerhard and Dong, Luna and Razniewski, Simon and Suchanek, Fabian}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2009.11564}, EPRINT = {2009.11564}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Equipping machines with comprehensive knowledge of the world's entities and their relationships has been a long-standing goal of AI. Over the last decade, large-scale knowledge bases, also known as knowledge graphs, have been automatically constructed from web contents and text sources, and have become a key asset for search engines. This machine knowledge can be harnessed to semantically interpret textual phrases in news, social media and web tables, and contributes to question answering, natural language processing and data analytics. This article surveys fundamental concepts and practical methods for creating and curating large knowledge bases. It covers models and methods for discovering and canonicalizing entities and their semantic types and organizing them into clean taxonomies. On top of this, the article discusses the automatic extraction of entity-centric properties. To support the long-term life-cycle and the quality assurance of machine knowledge, the article presents methods for constructing open schemas and for knowledge curation. Case studies on academic projects and industrial knowledge graphs complement the survey of concepts and methods.}, }
Endnote
%0 Report %A Weikum, Gerhard %A Dong, Luna %A Razniewski, Simon %A Suchanek, Fabian %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Machine Knowledge: Creation and Curation of Comprehensive Knowledge Bases : %G eng %U http://hdl.handle.net/21.11116/0000-0007-F1A6-D %U https://arxiv.org/abs/2009.11564 %D 2020 %X Equipping machines with comprehensive knowledge of the world's entities and their relationships has been a long-standing goal of AI. Over the last decade, large-scale knowledge bases, also known as knowledge graphs, have been automatically constructed from web contents and text sources, and have become a key asset for search engines. This machine knowledge can be harnessed to semantically interpret textual phrases in news, social media and web tables, and contributes to question answering, natural language processing and data analytics. This article surveys fundamental concepts and practical methods for creating and curating large knowledge bases. It covers models and methods for discovering and canonicalizing entities and their semantic types and organizing them into clean taxonomies. On top of this, the article discusses the automatic extraction of entity-centric properties. To support the long-term life-cycle and the quality assurance of machine knowledge, the article presents methods for constructing open schemas and for knowledge curation. Case studies on academic projects and industrial knowledge graphs complement the survey of concepts and methods. %K Computer Science, Artificial Intelligence, cs.AI,Computer Science, Databases, cs.DB,Computer Science, General Literature, cs.GL
[70]
B. Xu, S. Qiu, J. Zhang, Y. Wang, X. Shen, and G. de Melo, “Data Augmentation for Multiclass Utterance Classification - A Systematic Study,” in The 28th International Conference on Computational Linguistics (COLING 2020), Barcelona, Spain (Online), 2020.
Export
BibTeX
@inproceedings{xu2020data, TITLE = {Data Augmentation for Multiclass Utterance Classification -- A Systematic Study}, AUTHOR = {Xu, Binxia and Qiu, Siyuan and Zhang, Jie and Wang, Yafang and Shen, Xiaoyu and de Melo, Gerard}, LANGUAGE = {eng}, ISBN = {978-1-952148-27-9}, URL = {https://www.aclweb.org/anthology/2020.coling-main.479}, DOI = {10.18653/v1/2020.coling-main.479}, PUBLISHER = {ACL}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {The 28th International Conference on Computational Linguistics (COLING 2020)}, EDITOR = {Scott, Donia and Bel, Nuria and Zong, Chengqing}, PAGES = {5494--5506}, ADDRESS = {Barcelona, Spain (Online)}, }
Endnote
%0 Conference Proceedings %A Xu, Binxia %A Qiu, Siyuan %A Zhang, Jie %A Wang, Yafang %A Shen, Xiaoyu %A de Melo, Gerard %+ External Organizations External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Data Augmentation for Multiclass Utterance Classification - A Systematic Study : %G eng %U http://hdl.handle.net/21.11116/0000-0008-1498-6 %U https://www.aclweb.org/anthology/2020.coling-main.479 %R 10.18653/v1/2020.coling-main.479 %D 2020 %B The 28th International Conferenceon Computational Linguistics %Z date of event: 2020-12-08 - 2020-12-13 %C Barcelona, Spain (Online) %B The 28th International Conference on Computational Linguistics %E Scott, Donia; Bel, Nuria; Zong, Chengqing %P 5494 - 5506 %I ACL %@ 978-1-952148-27-9
[71]
A. Yates, S. Arora, X. Zhang, W. Yang, K. M. Jose, and J. Lin, “Capreolus: A Toolkit for End-to-End Neural Ad Hoc Retrieval,” in WSDM ’20, 13th International Conference on Web Search and Data Mining, Houston, TX, USA, 2020.
Export
BibTeX
@inproceedings{YatesWSDM2020, TITLE = {Capreolus: {A} Toolkit for End-to-End Neural Ad Hoc Retrieval}, AUTHOR = {Yates, Andrew and Arora, Siddhant and Zhang, Xinyu and Yang, Wei and Jose, Kevin Martin and Lin, Jimmy}, LANGUAGE = {eng}, ISBN = {9781450368223}, DOI = {10.1145/3336191.3371868}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {WSDM '20, 13th International Conference on Web Search and Data Mining}, EDITOR = {Caverlee, James and Hu, Xia Ben}, PAGES = {861--864}, ADDRESS = {Houston, TX, USA}, }
Endnote
%0 Conference Proceedings %A Yates, Andrew %A Arora, Siddhant %A Zhang, Xinyu %A Yang, Wei %A Jose, Kevin Martin %A Lin, Jimmy %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Capreolus: A Toolkit for End-to-End Neural Ad Hoc Retrieval : %G eng %U http://hdl.handle.net/21.11116/0000-0006-A28E-3 %R 10.1145/3336191.3371868 %D 2020 %B 13th International Conference on Web Search and Data Mining %Z date of event: 2020-02-03 - 2020-02-07 %C Houston, TX, USA %B WSDM '20 %E Caverlee, James; Hu, Xia Ben %P 861 - 864 %I ACM %@ 9781450368223
[72]
A. Yates, K. M. Jose, X. Zhang, and J. Lin, “Flexible IR Pipelines with Capreolus,” in CIKM ’20, 29th ACM International Conference on Information & Knowledge Management, Virtual Event, Ireland, 2020.
Abstract
Structured world knowledge is at the foundation of knowledge-centric AI applications. Despite considerable research on knowledge base construction, beyond mere statement counts, little is known about the progress of KBs, in particular concerning their coverage, and one may wonder whether there is constant progress, or diminishing returns. In this paper we employ question answering and entity summarization as extrinsic use cases for a longitudinal study of the progress of KB coverage. Our analysis shows a near-continuous improvement of two popular KBs, DBpedia and Wikidata, over the last 19 years, with little signs of flattening out or leveling off.
Export
BibTeX
@inproceedings{Yates_CIKM2020, TITLE = {Flexible {IR} Pipelines with {Capreolus}}, AUTHOR = {Yates, Andrew and Jose, Kevin Martin and Zhang, Xinyu and Lin, Jimmy}, LANGUAGE = {eng}, ISBN = {978-1-4503-6859-9}, DOI = {10.1145/3340531.3412780}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, DATE = {2020}, ABSTRACT = {Structured world knowledge is at the foundation of knowledge-centric AI applications. Despite considerable research on knowledge base construction, beyond mere statement counts, little is known about the progress of KBs, in particular concerning their coverage, and one may wonder whether there is constant progress, or diminishing returns. In this paper we employ question answering and entity summarization as extrinsic use cases for a longitudinal study of the progress of KB coverage. Our analysis shows a near-continuous improvement of two popular KBs, DBpedia and Wikidata, over the last 19 years, with little signs of flattening out or leveling off.}, BOOKTITLE = {CIKM '20, 29th ACM International Conference on Information \& Knowledge Management}, EDITOR = {d{\textquoteright}Aquin, Mathieu and Dietze, Stefan}, PAGES = {3181--3188}, ADDRESS = {Virtual Event, Ireland}, }
Endnote
%0 Conference Proceedings %A Yates, Andrew %A Jose, Kevin Martin %A Zhang, Xinyu %A Lin, Jimmy %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T Flexible IR Pipelines with Capreolus : %G eng %U http://hdl.handle.net/21.11116/0000-0008-066A-B %R 10.1145/3340531.3412780 %D 2020 %B 29th ACM International Conference on Information & Knowledge Management %Z date of event: 2020-10-19 - 2020-10-23 %C Virtual Event, Ireland %X Structured world knowledge is at the foundation of knowledge-centric AI applications. Despite considerable research on knowledge base construction, beyond mere statement counts, little is known about the progress of KBs, in particular concerning their coverage, and one may wonder whether there is constant progress, or diminishing returns. In this paper we employ question answering and entity summarization as extrinsic use cases for a longitudinal study of the progress of KB coverage. Our analysis shows a near-continuous improvement of two popular KBs, DBpedia and Wikidata, over the last 19 years, with little signs of flattening out or leveling off. %B CIKM '20 %E d’Aquin, Mathieu; Dietze, Stefan %P 3181 - 3188 %I ACM %@ 978-1-4503-6859-9
[73]
Z. Zheng, K. Hui, B. He, X. Han, L. Sun, and A. Yates, “BERT-QE: Contextualized Query Expansion for Document Re-ranking,” in Findings of the ACL: EMNLP 2020, Online, 2020.
Export
BibTeX
@inproceedings{Zheng_EMNLP20, TITLE = {{BERT-QE}: {C}ontextualized Query Expansion for Document Re-ranking}, AUTHOR = {Zheng, Zhi and Hui, Kai and He, Ben and Han, Xianpei and Sun, Le and Yates, Andrew}, LANGUAGE = {eng}, ISBN = {978-1-952148-90-3}, URL = {https://www.aclweb.org/anthology/2020.findings-emnlp.424}, DOI = {10.18653/v1/2020.findings-emnlp.424}, PUBLISHER = {ACM}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Findings of the ACL: EMNLP 2020}, EDITOR = {Cohn, Trevor and He, Yulan and Liu, Yang}, PAGES = {4718--4728}, SERIES = {Findings of the Association for Computational Linguistics}, VOLUME = {1}, ADDRESS = {Online}, }
Endnote
%0 Conference Proceedings %A Zheng, Zhi %A Hui, Kai %A He, Ben %A Han, Xianpei %A Sun, Le %A Yates, Andrew %+ External Organizations External Organizations External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T BERT-QE: Contextualized Query Expansion for Document Re-ranking : %G eng %U http://hdl.handle.net/21.11116/0000-0008-0687-9 %U https://www.aclweb.org/anthology/2020.findings-emnlp.424 %R 10.18653/v1/2020.findings-emnlp.424 %D 2020 %B Conference on Empirical Methods in Natural Language Processing %Z date of event: 2020-11-16 - 2020-11-20 %C Online %B Findings of the ACL: EMNLP 2020 %E Cohn, Trevor; He, Yulan; Liu, Yang %P 4718 - 4728 %I ACM %@ 978-1-952148-90-3 %B Findings of the Association for Computational Linguistics %N 1 %U https://www.aclweb.org/anthology/2020.findings-emnlp.424.pdf
[74]
Z. Zheng, K. Hui, B. He, X. Han, L. Sun, and A. Yates, “BERT-QE: Contextualized Query Expansion for Document Re-ranking,” 2020. [Online]. Available: https://arxiv.org/abs/2009.07258. (arXiv: 2009.07258)
Abstract
Query expansion aims to mitigate the mismatch between the language used in a query and in a document. However, query expansion methods can suffer from introducing non-relevant information when expanding the query. To bridge this gap, inspired by recent advances in applying contextualized models like BERT to the document retrieval task, this paper proposes a novel query expansion model that leverages the strength of the BERT model to select relevant document chunks for expansion. In evaluation on the standard TREC Robust04 and GOV2 test collections, the proposed BERT-QE model significantly outperforms BERT-Large models.
Export
BibTeX
@online{Zheng2009.07258, TITLE = {{BERT}-{QE}: Contextualized Query Expansion for Document Re-ranking}, AUTHOR = {Zheng, Zhi and Hui, Kai and He, Ben and Han, Xianpei and Sun, Le and Yates, Andrew}, LANGUAGE = {eng}, URL = {https://arxiv.org/abs/2009.07258}, EPRINT = {2009.07258}, EPRINTTYPE = {arXiv}, YEAR = {2020}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Query expansion aims to mitigate the mismatch between the language used in a query and in a document. However, query expansion methods can suffer from introducing non-relevant information when expanding the query. To bridge this gap, inspired by recent advances in applying contextualized models like BERT to the document retrieval task, this paper proposes a novel query expansion model that leverages the strength of the BERT model to select relevant document chunks for expansion. In evaluation on the standard TREC Robust04 and GOV2 test collections, the proposed BERT-QE model significantly outperforms BERT-Large models.}, }
Endnote
%0 Report %A Zheng, Zhi %A Hui, Kai %A He, Ben %A Han, Xianpei %A Sun, Le %A Yates, Andrew %+ External Organizations External Organizations External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T BERT-QE: Contextualized Query Expansion for Document Re-ranking : %G eng %U http://hdl.handle.net/21.11116/0000-0008-06D5-1 %U https://arxiv.org/abs/2009.07258 %D 2020 %X Query expansion aims to mitigate the mismatch between the language used in a query and in a document. However, query expansion methods can suffer from introducing non-relevant information when expanding the query. To bridge this gap, inspired by recent advances in applying contextualized models like BERT to the document retrieval task, this paper proposes a novel query expansion model that leverages the strength of the BERT model to select relevant document chunks for expansion. In evaluation on the standard TREC Robust04 and GOV2 test collections, the proposed BERT-QE model significantly outperforms BERT-Large models. %K Computer Science, Information Retrieval, cs.IR,Computer Science, Computation and Language, cs.CL