Publications

2016
[1]
L. Derczynski, J. Strötgen, D. Maynard, M. A. Greenwood, and M. Jung, “GATE-Time: Extraction of Temporal Expressions and Event,” in 10th edition of the Language Resources and Evaluation Conference (LREC 2016), Portorož, Slovenia. (Accepted/in press)
Export
BibTeX
@inproceedings{DerczynskiEtAl2016_LREC, TITLE = {{GATE}-Time: {E}xtraction of Temporal Expressions and Event}, AUTHOR = {Derczynski, Leon and Str{\"o}tgen, Jannik and Maynard, Diana and Greenwood, Mark A. and Jung, Manuel}, LANGUAGE = {eng}, YEAR = {2016}, PUBLREMARK = {Accepted}, BOOKTITLE = {10th edition of the Language Resources and Evaluation Conference (LREC 2016)}, EID = {915}, ADDRESS = {Portoro{\v z}, Slovenia}, }
Endnote
%0 Conference Proceedings %A Derczynski, Leon %A Strötgen, Jannik %A Maynard, Diana %A Greenwood, Mark A. %A Jung, Manuel %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T GATE-Time: Extraction of Temporal Expressions and Event : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-4139-8 %D 2016 %B 10th Language Resources and Evaluation Conference %Z date of event: 2016-05-23 - 2016-05-28 %C Portorož, Slovenia %B 10th edition of the Language Resources and Evaluation Conference %Z sequence number: 915
[2]
D. Gupta, “Event Search and Analytics: Detecting Events in Semantically Annotated Corpora for Search & Analytics,” in WSDM’16, 9th ACM International Conference on Web Search and Data Mining, San Francisco, CA, USA, 2016.
Export
BibTeX
@inproceedings{GuptaWSDM2016, TITLE = {Event Search and Analytics: Detecting Events in Semantically Annotated Corpora for Search \& Analytics}, AUTHOR = {Gupta, Dhruv}, LANGUAGE = {eng}, ISBN = {978-1-4503-3716-8}, DOI = {10.1145/2835776.2855083}, PUBLISHER = {ACM}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {WSDM'16, 9th ACM International Conference on Web Search and Data Mining}, PAGES = {705--705}, ADDRESS = {San Francisco, CA, USA}, }
Endnote
%0 Conference Proceedings %A Gupta, Dhruv %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Event Search and Analytics: Detecting Events in Semantically Annotated Corpora for Search & Analytics : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-7526-7 %R 10.1145/2835776.2855083 %D 2016 %B 9th ACM International Conference on Web Search and Data Mining %Z date of event: 2016-02-22 - 2016-02-25 %C San Francisco, CA, USA %B WSDM'16 %P 705 - 705 %I ACM %@ 978-1-4503-3716-8
[3]
D. Gupta and K. Berberich, “Diversifying Search Results Using Time,” in Advances in Information Retrieval (ECIR 2016), Padova, Italy, 2016.
Export
BibTeX
@inproceedings{GuptaECIR2016, TITLE = {Diversifying Search Results Using Time}, AUTHOR = {Gupta, Dhruv and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-3-319-30670-4}, DOI = {10.1007/978-3-319-30671-1_69}, PUBLISHER = {Springer}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {Advances in Information Retrieval (ECIR 2016)}, EDITOR = {Ferro, Nicola and Crestani, Fabio and Moens, Marie-Francine and Mothe, Josiane and Silvestre, Fabrizio and Di Nunzio, Giorgio Maria and Hauff, Claudia and Silvello, Gianmaria}, PAGES = {789--795}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {9626}, ADDRESS = {Padova, Italy}, }
Endnote
%0 Conference Proceedings %A Gupta, Dhruv %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Diversifying Search Results Using Time : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-7514-F %R 10.1007/978-3-319-30671-1_69 %D 2016 %B 38th European Conference on Information Retrieval %Z date of event: 2016-03-20 - 2016-03-23 %C Padova, Italy %B Advances in Information Retrieval %E Ferro, Nicola; Crestani, Fabio; Moens, Marie-Francine; Mothe, Josiane; Silvestre, Fabrizio; Di Nunzio, Giorgio Maria; Hauff, Claudia; Silvello, Gianmaria %P 789 - 795 %I Springer %@ 978-3-319-30670-4 %B Lecture Notes in Computer Science %N 9626
[4]
D. Gupta and K. Berberich, “Diversifying Search Results Using Time,” Max-Planck-Institut für Informatik, Saarbrücken, MPI-I-2015-5-001, 2016.
Abstract
Getting an overview of a historic entity or event can be difficult in search results, especially if important dates concerning the entity or event are not known beforehand. For such information needs, users would benefit if returned results covered diverse dates, thus giving an overview of what has happened throughout history. Diversifying search results based on important dates can be a building block for applications, for instance, in digital humanities. Historians would thus be able to quickly explore longitudinal document collections by querying for entities or events without knowing associated important dates apriori. In this work, we describe an approach to diversify search results using temporal expressions (e.g., in the 1990s) from their contents. Our approach first identifies time intervals of interest to the given keyword query based on pseudo-relevant documents. It then re-ranks query results so as to maximize the coverage of identified time intervals. We present a novel and objective evaluation for our proposed approach. We test the effectiveness of our methods on the New York Times Annotated corpus and the Living Knowledge corpus, collectively consisting of around 6 million documents. Using history-oriented queries and encyclopedic resources we show that our method indeed is able to present search results diversified along time.
Export
BibTeX
@techreport{GuptaReport2015-5-001, TITLE = {Diversifying Search Results Using Time}, AUTHOR = {Gupta, Dhruv and Berberich, Klaus}, LANGUAGE = {eng}, ISSN = {0946-011X}, NUMBER = {MPI-I-2015-5-001}, INSTITUTION = {Max-Planck-Institut f{\"u}r Informatik}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2016}, ABSTRACT = {Getting an overview of a historic entity or event can be difficult in search results, especially if important dates concerning the entity or event are not known beforehand. For such information needs, users would benefit if returned results covered diverse dates, thus giving an overview of what has happened throughout history. Diversifying search results based on important dates can be a building block for applications, for instance, in digital humanities. Historians would thus be able to quickly explore longitudinal document collections by querying for entities or events without knowing associated important dates apriori. In this work, we describe an approach to diversify search results using temporal expressions (e.g., in the 1990s) from their contents. Our approach first identifies time intervals of interest to the given keyword query based on pseudo-relevant documents. It then re-ranks query results so as to maximize the coverage of identified time intervals. We present a novel and objective evaluation for our proposed approach. We test the effectiveness of our methods on the New York Times Annotated corpus and the Living Knowledge corpus, collectively consisting of around 6 million documents. Using history-oriented queries and encyclopedic resources we show that our method indeed is able to present search results diversified along time.}, TYPE = {Research Report}, }
Endnote
%0 Report %A Gupta, Dhruv %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Diversifying Search Results Using Time : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-0AA4-C %Y Max-Planck-Institut für Informatik %C Saarbrücken %D 2016 %P 51 p. %X Getting an overview of a historic entity or event can be difficult in search results, especially if important dates concerning the entity or event are not known beforehand. For such information needs, users would benefit if returned results covered diverse dates, thus giving an overview of what has happened throughout history. Diversifying search results based on important dates can be a building block for applications, for instance, in digital humanities. Historians would thus be able to quickly explore longitudinal document collections by querying for entities or events without knowing associated important dates apriori. In this work, we describe an approach to diversify search results using temporal expressions (e.g., in the 1990s) from their contents. Our approach first identifies time intervals of interest to the given keyword query based on pseudo-relevant documents. It then re-ranks query results so as to maximize the coverage of identified time intervals. We present a novel and objective evaluation for our proposed approach. We test the effectiveness of our methods on the New York Times Annotated corpus and the Living Knowledge corpus, collectively consisting of around 6 million documents. Using history-oriented queries and encyclopedic resources we show that our method indeed is able to present search results diversified along time. %B Research Report %@ false
[5]
Y. He, K. Chakrabarti, T. Cheng, and T. Tylenda, “Automatic Discovery of Attribute Synonyms Using Query Logs and Table Corpora,” in WWW’16, 25th International Conference on World Wide Web, Montréal, Canada, 2016.
Export
BibTeX
@inproceedings{He_WWW2016, TITLE = {Automatic Discovery of Attribute Synonyms Using Query Logs and Table Corpora}, AUTHOR = {He, Yeye and Chakrabarti, Kaushik and Cheng, Tao and Tylenda, Tomasz}, LANGUAGE = {eng}, ISBN = {978-1-4503-4143-1}, DOI = {10.1145/2872427.2874816}, PUBLISHER = {ACM}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {WWW'16, 25th International Conference on World Wide Web}, PAGES = {1429--1439}, ADDRESS = {Montr{\'e}al, Canada}, }
Endnote
%0 Conference Proceedings %A He, Yeye %A Chakrabarti, Kaushik %A Cheng, Tao %A Tylenda, Tomasz %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Automatic Discovery of Attribute Synonyms Using Query Logs and Table Corpora : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-312D-5 %R 10.1145/2872427.2874816 %D 2016 %B 25th International Conference on World Wide Web %Z date of event: 2016-05-11 - 2016-05-15 %C Montréal, Canada %B WWW'16 %P 1429 - 1439 %I ACM %@ 978-1-4503-4143-1
[6]
S. Karaev and P. Miettinen, “Capricorn: An Algorithm for Subtropical Matrix Factorization,” in Proceedings of the 2016 SIAM International Conference on Data Mining (SDM 2016), Miama, FL, USA. (Accepted/in press)
Abstract
Finding patterns from binary data is a classical problem in data mining, dating back to at least frequent itemset mining. More recently, approaches such as tiling and Boolean matrix factorization (BMF), have been proposed to find sets of patterns that aim to explain the full data well. These methods, however, are not robust against non-trivial destructive noise, i.e. when relatively many 1s are removed from the data: tiling can only model additive noise while BMF assumes approximately equal amounts of additive and destructive noise. Most real-world binary datasets, however, exhibit mostly destructive noise. In presence/absence data, for instance, it is much more common to fail to observe something than it is to observe a spurious presence. To address this problem, we take the recent approach of employing the Minimum Description Length (MDL) principle for BMF and introduce a new algorithm, Nassau, that directly optimizes the description length of the factorization instead of the reconstruction error. In addition, unlike the previous algorithms, it can adjust the factors it has discovered during its search. Empirical evaluation on synthetic data shows that Nassau excels at datasets with high destructive noise levels and its performance on real-world datasets confirms our hypothesis of the high numbers of missing observations in the real-world data.
Export
BibTeX
@inproceedings{karaev16capricorn, TITLE = {Capricorn: {An} Algorithm for Subtropical Matrix Factorization}, AUTHOR = {Karaev, Sanjar and Miettinen, Pauli}, LANGUAGE = {eng}, PUBLISHER = {SIAM}, YEAR = {2016}, PUBLREMARK = {Accepted}, ABSTRACT = {Finding patterns from binary data is a classical problem in data mining, dating back to at least frequent itemset mining. More recently, approaches such as tiling and Boolean matrix factorization (BMF), have been proposed to find sets of patterns that aim to explain the full data well. These methods, however, are not robust against non-trivial destructive noise, i.e. when relatively many 1s are removed from the data: tiling can only model additive noise while BMF assumes approximately equal amounts of additive and destructive noise. Most real-world binary datasets, however, exhibit mostly destructive noise. In presence/absence data, for instance, it is much more common to fail to observe something than it is to observe a spurious presence. To address this problem, we take the recent approach of employing the Minimum Description Length (MDL) principle for BMF and introduce a new algorithm, Nassau, that directly optimizes the description length of the factorization instead of the reconstruction error. In addition, unlike the previous algorithms, it can adjust the factors it has discovered during its search. Empirical evaluation on synthetic data shows that Nassau excels at datasets with high destructive noise levels and its performance on real-world datasets confirms our hypothesis of the high numbers of missing observations in the real-world data.}, BOOKTITLE = {Proceedings of the 2016 SIAM International Conference on Data Mining (SDM 2016)}, ADDRESS = {Miama, FL, USA}, }
Endnote
%0 Conference Proceedings %A Karaev, Sanjar %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Capricorn: An Algorithm for Subtropical Matrix Factorization : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-542F-3 %D 2016 %B 16th SIAM International Conference on Data Mining %Z date of event: 2016-05-05 - 2016-05-07 %C Miama, FL, USA %X Finding patterns from binary data is a classical problem in data mining, dating back to at least frequent itemset mining. More recently, approaches such as tiling and Boolean matrix factorization (BMF), have been proposed to find sets of patterns that aim to explain the full data well. These methods, however, are not robust against non-trivial destructive noise, i.e. when relatively many 1s are removed from the data: tiling can only model additive noise while BMF assumes approximately equal amounts of additive and destructive noise. Most real-world binary datasets, however, exhibit mostly destructive noise. In presence/absence data, for instance, it is much more common to fail to observe something than it is to observe a spurious presence. To address this problem, we take the recent approach of employing the Minimum Description Length (MDL) principle for BMF and introduce a new algorithm, Nassau, that directly optimizes the description length of the factorization instead of the reconstruction error. In addition, unlike the previous algorithms, it can adjust the factors it has discovered during its search. Empirical evaluation on synthetic data shows that Nassau excels at datasets with high destructive noise levels and its performance on real-world datasets confirms our hypothesis of the high numbers of missing observations in the real-world data. %B Proceedings of the 2016 SIAM International Conference on Data Mining %I SIAM
[7]
M. Krötzsch and G. Weikum, “Editorial,” Journal of Web Semantics, vol. 37/38, 2016.
Export
BibTeX
@article{Kroetzsch2016, TITLE = {Editorial}, AUTHOR = {Kr{\"o}tzsch, Markus and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {1570-8268}, DOI = {10.1016/j.websem.2016.04.002}, PUBLISHER = {Elsevier}, ADDRESS = {Amsterdam}, YEAR = {2016}, DATE = {2016}, JOURNAL = {Journal of Web Semantics}, VOLUME = {37/38}, PAGES = {53--54}, }
Endnote
%0 Journal Article %A Krötzsch, Markus %A Weikum, Gerhard %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Editorial : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-EB8D-B %R 10.1016/j.websem.2016.04.002 %7 2016 %D 2016 %J Journal of Web Semantics %O Science, Services and Agents on the World Wide Web Web Semantics: Science, Services and Agents on the World Wide Web %V 37/38 %& 53 %P 53 - 54 %I Elsevier %C Amsterdam %@ false
[8]
E. Kuzey, V. Setty, J. Strötgen, and G. Weikum, “As Time Goes By: Comprehensive Tagging of Textual Phrases with Temporal Scopes,” in WWW’16, 25th International Conference on World Wide Web, Montréal, Canada, 2016.
Export
BibTeX
@inproceedings{Kuzey_WWW2016, TITLE = {As Time Goes By: {C}omprehensive Tagging of Textual Phrases with Temporal Scopes}, AUTHOR = {Kuzey, Erdal and Setty, Vinay and Str{\"o}tgen, Jannik and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-4143-1}, DOI = {10.1145/2872427.2883055}, PUBLISHER = {ACM}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {WWW'16, 25th International Conference on World Wide Web}, PAGES = {915--925}, ADDRESS = {Montr{\'e}al, Canada}, }
Endnote
%0 Conference Proceedings %A Kuzey, Erdal %A Setty, Vinay %A Strötgen, Jannik %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T As Time Goes By: Comprehensive Tagging of Textual Phrases with Temporal Scopes : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-310D-D %R 10.1145/2872427.2883055 %D 2016 %B 25th International Conference on World Wide Web %Z date of event: 2016-05-11 - 2016-05-15 %C Montréal, Canada %B WWW'16 %P 915 - 925 %I ACM %@ 978-1-4503-4143-1
[9]
E. Kuzey, J. Strötgen, V. Setty, and G. Weikum, “Temponym Tagging: Temporal Scopes for Textual Phrases,” in WWW’16 Companion, Montréal, Canada, 2016.
Export
BibTeX
@inproceedings{Kuzey:2016:TTT:2872518.2889289, TITLE = {Temponym Tagging: {T}emporal Scopes for Textual Phrases}, AUTHOR = {Kuzey, Erdal and Str{\"o}tgen, Jannik and Setty, Vinay and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-4144-8}, DOI = {10.1145/2872518.2889289}, PUBLISHER = {ACM}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {WWW'16 Companion}, PAGES = {841--842}, ADDRESS = {Montr{\'e}al, Canada}, }
Endnote
%0 Conference Proceedings %A Kuzey, Erdal %A Strötgen, Jannik %A Setty, Vinay %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Temponym Tagging: Temporal Scopes for Textual Phrases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-4134-1 %R 10.1145/2872518.2889289 %D 2016 %B 25th International Conference on World Wide Web %Z date of event: 2016-05-11 - 2016-05-15 %C Montréal, Canada %B WWW'16 Companion %P 841 - 842 %I ACM %@ 978-1-4503-4144-8
[10]
A. Mishra and K. Berberich, “Leveraging Semantic Annotations to Link Wikipedia and News Archives,” Max-Planck-Institut für Informatik, Saarbrücken, MPI-I-2016-5-002, 2016.
Abstract
The incomprehensible amount of information available online has made it difficult to retrospect on past events. We propose a novel linking problem to connect excerpts from Wikipedia summarizing events to online news articles elaborating on them. To address the linking problem, we cast it into an information retrieval task by treating a given excerpt as a user query with the goal to retrieve a ranked list of relevant news articles. We find that Wikipedia excerpts often come with additional semantics, in their textual descriptions, representing the time, geolocations, and named entities involved in the event. Our retrieval model leverages text and semantic annotations as different dimensions of an event by estimating independent query models to rank documents. In our experiments on two datasets, we compare methods that consider different combinations of dimensions and find that the approach that leverages all dimensions suits our problem best.
Export
BibTeX
@techreport{MishraBerberich16, TITLE = {Leveraging Semantic Annotations to Link Wikipedia and News Archives}, AUTHOR = {Mishra, Arunav and Berberich, Klaus}, LANGUAGE = {eng}, ISSN = {0946-011X}, NUMBER = {MPI-I-2016-5-002}, INSTITUTION = {Max-Planck-Institut f{\"u}r Informatik}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2016}, ABSTRACT = {The incomprehensible amount of information available online has made it difficult to retrospect on past events. We propose a novel linking problem to connect excerpts from Wikipedia summarizing events to online news articles elaborating on them. To address the linking problem, we cast it into an information retrieval task by treating a given excerpt as a user query with the goal to retrieve a ranked list of relevant news articles. We find that Wikipedia excerpts often come with additional semantics, in their textual descriptions, representing the time, geolocations, and named entities involved in the event. Our retrieval model leverages text and semantic annotations as different dimensions of an event by estimating independent query models to rank documents. In our experiments on two datasets, we compare methods that consider different combinations of dimensions and find that the approach that leverages all dimensions suits our problem best.}, TYPE = {Research Reports}, }
Endnote
%0 Report %A Mishra, Arunav %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Leveraging Semantic Annotations to Link Wikipedia and News Archives : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-5FF0-A %Y Max-Planck-Institut für Informatik %C Saarbrücken %D 2016 %P 21 p. %X The incomprehensible amount of information available online has made it difficult to retrospect on past events. We propose a novel linking problem to connect excerpts from Wikipedia summarizing events to online news articles elaborating on them. To address the linking problem, we cast it into an information retrieval task by treating a given excerpt as a user query with the goal to retrieve a ranked list of relevant news articles. We find that Wikipedia excerpts often come with additional semantics, in their textual descriptions, representing the time, geolocations, and named entities involved in the event. Our retrieval model leverages text and semantic annotations as different dimensions of an event by estimating independent query models to rank documents. In our experiments on two datasets, we compare methods that consider different combinations of dimensions and find that the approach that leverages all dimensions suits our problem best. %B Research Reports %@ false
[11]
A. Mishra and K. Berberich, “Leveraging Semantic Annotations to Link Wikipedia and News Archives,” in Advances in Information Retrieval (ECIR 2016), Padova, Italy, 2016.
Export
BibTeX
@inproceedings{MishraECIR2016, TITLE = {Leveraging Semantic Annotations to Link Wikipedia and News Archives}, AUTHOR = {Mishra, Arunav and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-3-319-30670-4}, DOI = {10.1007/978-3-319-30671-1_3}, PUBLISHER = {Springer}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {Advances in Information Retrieval (ECIR 2016)}, EDITOR = {Ferro, Nicola and Crestani, Fabio and Moens, Marie-Francine and Mothe, Josiane and Silvestre, Fabrizio and Di Nunzio, Giorgio Maria and Hauff, Claudia and Silvello, Gianmaria}, PAGES = {30--42}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {9626}, ADDRESS = {Padova, Italy}, }
Endnote
%0 Conference Proceedings %A Mishra, Arunav %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Leveraging Semantic Annotations to Link Wikipedia and News Archives : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-48DC-F %R 10.1007/978-3-319-30671-1_3 %D 2016 %B 38th European Conference on Information Retrieval %Z date of event: 2016-03-20 - 2016-03-23 %C Padova, Italy %B Advances in Information Retrieval %E Ferro, Nicola; Crestani, Fabio; Moens, Marie-Francine; Mothe, Josiane; Silvestre, Fabrizio; Di Nunzio, Giorgio Maria; Hauff, Claudia; Silvello, Gianmaria %P 30 - 42 %I Springer %@ 978-3-319-30670-4 %B Lecture Notes in Computer Science %N 9626
[12]
R. S. Roy, A. Suresh, N. Ganguly, and M. Choudhury, “Improving Document Ranking for Long Queries with Nested Query Segmentation,” in Advances in Information Retrieval (ECIR 2016), Padova, Italy, 2016.
Export
BibTeX
@inproceedings{RoyECIR2016, TITLE = {Improving Document Ranking for Long Queries with Nested Query Segmentation}, AUTHOR = {Roy, Rishiraj Saha and Suresh, Anusha and Ganguly, Niloy and Choudhury, Monojit}, LANGUAGE = {eng}, ISBN = {978-3-319-30670-4}, DOI = {10.1007/978-3-319-30671-1_67}, PUBLISHER = {Springer}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {Advances in Information Retrieval (ECIR 2016)}, EDITOR = {Ferro, Nicola and Crestani, Fabio and Moens, Marie-Francine and Mothe, Josiane and Silvestre, Fabrizio and Di Nunzio, Giorgio Maria and Hauff, Claudia and Silvello, Gianmaria}, PAGES = {775--781}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {9626}, ADDRESS = {Padova, Italy}, }
Endnote
%0 Conference Proceedings %A Roy, Rishiraj Saha %A Suresh, Anusha %A Ganguly, Niloy %A Choudhury, Monojit %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T Improving Document Ranking for Long Queries with Nested Query Segmentation : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-48DF-9 %R 10.1007/978-3-319-30671-1_67 %D 2016 %B 38th European Conference on Information Retrieval %Z date of event: 2016-03-20 - 2016-03-23 %C Padova, Italy %B Advances in Information Retrieval %E Ferro, Nicola; Crestani, Fabio; Moens, Marie-Francine; Mothe, Josiane; Silvestre, Fabrizio; Di Nunzio, Giorgio Maria; Hauff, Claudia; Silvello, Gianmaria %P 775 - 781 %I Springer %@ 978-3-319-30670-4 %B Lecture Notes in Computer Science %N 9626
[13]
N. Tandon, C. D. Hariman, J. Urbani, A. Rohrbach, M. Rohrbach, and G. Weikum, “Commonsense in Parts: Mining Part-Whole Relations from the Web and Image Tags,” in Proceedings of the Thirtieth AAAI Conference on Artificial Intelligence, Phoenix, AZ, USA. (Accepted/in press)
Export
BibTeX
@inproceedings{TandonAAAI2016, TITLE = {Commonsense in Parts: Mining Part-Whole Relations from the Web and Image Tags}, AUTHOR = {Tandon, Niket and Hariman, Charles Darwis and Urbani, Jacopo and Rohrbach, Anna and Rohrbach, Marcus and Weikum, Gerhard}, LANGUAGE = {eng}, PUBLISHER = {AAAI}, YEAR = {2016}, PUBLREMARK = {Accepted}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the Thirtieth AAAI Conference on Artificial Intelligence}, ADDRESS = {Phoenix, AZ, USA}, }
Endnote
%0 Conference Proceedings %A Tandon, Niket %A Hariman, Charles Darwis %A Urbani, Jacopo %A Rohrbach, Anna %A Rohrbach, Marcus %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Commonsense in Parts: Mining Part-Whole Relations from the Web and Image Tags : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-ABFE-1 %D 2015 %B Thirtieth AAAI Conference on Artificial Intelligence %Z date of event: 2016-02-12 - 2016-02-17 %C Phoenix, AZ, USA %B Proceedings of the Thirtieth AAAI Conference on Artificial Intelligence %I AAAI
[14]
C. Teflioudi, “Algorithms for Shared-Memory Matrix Completion and Maximum Inner Product Search,” Universität des Saarlandes, Saarbrücken, 2016.
Export
BibTeX
@phdthesis{Teflioudiphd2016, TITLE = {Algorithms for Shared-Memory Matrix Completion and Maximum Inner Product Search}, AUTHOR = {Teflioudi, Christina}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2016}, DATE = {2016}, }
Endnote
%0 Thesis %A Teflioudi, Christina %Y Gemulla, Rainer %A referee: Weikum, Gerhard %+ International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Algorithms for Shared-Memory Matrix Completion and Maximum Inner Product Search : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-43FA-2 %I Universität des Saarlandes %C Saarbrücken %D 2016 %P xi, 110 p. %V phd %9 phd %U http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=dehttp://scidok.sulb.uni-saarland.de/volltexte/2016/6469/
[15]
M. Yahya, D. Barbosa, K. Berberich, Q. Wang, and G. Weikum, “Relationship Queries on Extended Knowledge Graphs,” in WSDM’16, 9th ACM International Conference on Web Search and Data Mining, San Francisco, CA, USA, 2016.
Export
BibTeX
@inproceedings{YahyaWSDM2016, TITLE = {Relationship Queries on Extended Knowledge Graphs}, AUTHOR = {Yahya, Mohamed and Barbosa, Denilson and Berberich, Klaus and Wang, Quiyue and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-3716-8}, DOI = {10.1145/2835776.2835795}, PUBLISHER = {ACM}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {WSDM'16, 9th ACM International Conference on Web Search and Data Mining}, PAGES = {605--614}, ADDRESS = {San Francisco, CA, USA}, }
Endnote
%0 Conference Proceedings %A Yahya, Mohamed %A Barbosa, Denilson %A Berberich, Klaus %A Wang, Quiyue %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Relationship Queries on Extended Knowledge Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-ABAA-0 %R 10.1145/2835776.2835795 %D 2016 %B 9th ACM International Conference on Web Search and Data Mining %Z date of event: 2016-02-22 - 2016-02-25 %C San Francisco, CA, USA %B WSDM'16 %P 605 - 614 %I ACM %@ 978-1-4503-3716-8
[16]
M. Yahya and H. Schütze, “Question Answering and Query Processing for Extended Knowledge Graphs,” Universität des Saarlandes, Saarbrücken, 2016.
Export
BibTeX
@phdthesis{yahyaphd2016, TITLE = {Question Answering and Query Processing for Extended Knowledge Graphs}, AUTHOR = {Yahya, Mohamed and Sch{\"u}tze, Hinrich}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2016}, DATE = {2016}, }
Endnote
%0 Thesis %A Yahya, Mohamed %Y Weikum, Gerhard %A Schütze, Hinrich %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Question Answering and Query Processing for Extended Knowledge Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-48C2-7 %I Universität des Saarlandes %C Saarbrücken %D 2016 %P x, 160 p. %V phd %9 phd %U http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=dehttp://scidok.sulb.uni-saarland.de/volltexte/2016/6476/
2015
[17]
S. Abiteboul, L. Dong, O. Etzioni, D. Srivastava, G. Weikum, J. Stoyanovich, and F. M. Suchanek, “The Elephant in the Room: Getting Value from Big Data,” in Proceedings of the 18th International Workshop on Web and Databases (WebDB 2015), Melbourne, Australia, 2015.
Export
BibTeX
@inproceedings{AbiteboulWebDB2015, TITLE = {The Elephant in the Room: {G}etting Value from {Big Data}}, AUTHOR = {Abiteboul, Serge and Dong, Luna and Etzioni, Oren and Srivastava, Divesh and Weikum, Gerhard and Stoyanovich, Julia and Suchanek, Fabian M.}, LANGUAGE = {eng}, ISBN = {978-1-4503-3627-7}, DOI = {10.1145/2767109.2770014}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Proceedings of the 18th International Workshop on Web and Databases (WebDB 2015)}, EDITOR = {Stoyanovich, Julia and Suchanek, Fabian M.}, PAGES = {1--5}, ADDRESS = {Melbourne, Australia}, }
Endnote
%0 Conference Proceedings %A Abiteboul , Serge %A Dong, Luna %A Etzioni, Oren %A Srivastava, Divesh %A Weikum, Gerhard %A Stoyanovich, Julia %A Suchanek, Fabian M. %+ External Organizations External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Télécom ParisTech %T The Elephant in the Room: Getting Value from Big Data : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0027-D3F2-F %R 10.1145/2767109.2770014 %D 2015 %B 18th International Workshop on the Web and Databases %Z date of event: 2015-05-31 - 2015-05-31 %C Melbourne, Australia %B Proceedings of the 18th International Workshop on Web and Databases %E Stoyanovich, Julia; Suchanek, Fabian M. %P 1 - 5 %I ACM %@ 978-1-4503-3627-7
[18]
A. Abujabal and K. Berberich, “Important Events in the Past, Present, and Future,” in WWW’15 Companion, Florence, Italy, 2015.
Export
BibTeX
@inproceedings{AbjuabalWWW2015, TITLE = {Important Events in the Past, Present, and Future}, AUTHOR = {Abujabal, Abdalghani and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-1-4503-3473-0}, DOI = {10.1145/2740908.2741692}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {WWW'15 Companion}, PAGES = {1315--1320}, ADDRESS = {Florence, Italy}, }
Endnote
%0 Conference Proceedings %A Abujabal, Abdalghani %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Important Events in the Past, Present, and Future : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-E33A-8 %R 10.1145/2740908.2741692 %D 2015 %B 24th International Conference on World Wide Web %Z date of event: 2015-04-18 - 2015-04-22 %C Florence, Italy %B WWW'15 Companion %P 1315 - 1320 %I ACM %@ 978-1-4503-3473-0
[19]
A. Abujabal, “Mining Past, Present, and Future,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@mastersthesis{AbujabalMaster2015, TITLE = {Mining Past, Present, and Future}, AUTHOR = {Abujabal, Abdalghani}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Abujabal, Abdalghani %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Mining Past, Present, and Future : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0025-A974-2 %I Universität des Saarlandes %C Saarbrücken %D 2015 %P XII, 86 p. %V master %9 master
[20]
A. Anagnostopoulos, L. Becchetti, I. Bordino, S. Leonardi, I. Mele, and P. Sankowski, “Stochastic Query Covering for Fast Approximate Document Retrieval,” ACM Transactions on Information Systems, vol. 33, no. 3, 2015.
Export
BibTeX
@article{Anagnostopoulos:TOIS, TITLE = {Stochastic Query Covering for Fast Approximate Document Retrieval}, AUTHOR = {Anagnostopoulos, Aris and Becchetti, Luca and Bordino, Ilaria and Leonardi, Stefano and Mele, Ida and Sankowski, Piotr}, LANGUAGE = {eng}, ISSN = {1046-8188}, DOI = {10.1145/2699671}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, JOURNAL = {ACM Transactions on Information Systems}, VOLUME = {33}, NUMBER = {3}, PAGES = {1--35}, EID = {11}, }
Endnote
%0 Journal Article %A Anagnostopoulos, Aris %A Becchetti, Luca %A Bordino, Ilaria %A Leonardi, Stefano %A Mele, Ida %A Sankowski, Piotr %+ External Organizations External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Stochastic Query Covering for Fast Approximate Document Retrieval : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-B6C7-2 %R 10.1145/2699671 %7 2015 %D 2015 %J ACM Transactions on Information Systems %O TOIS %V 33 %N 3 %& 1 %P 1 - 35 %Z sequence number: 11 %I ACM %C New York, NY %@ false
[21]
A. Anagnostopoulos, L. Becchetti, A. Fazzone, I. Mele, and M. Riondato, “The Importance of Being Expert: Efficient Max-Finding in Crowdsourcing,” in SIGMOD’15, ACM SIGMOD International Conference on Management of Data, Melbourne, Victoria, Australia, 2015.
Export
BibTeX
@inproceedings{Anagnostopoulos:SIGMOD2015, TITLE = {The Importance of Being Expert: Efficient Max-Finding in Crowdsourcing}, AUTHOR = {Anagnostopoulos, Aris and Becchetti, Luca and Fazzone, Adriano and Mele, Ida and Riondato, Matteo}, LANGUAGE = {eng}, ISBN = {978-1-4503-2758-9}, DOI = {10.1145/2723372.2723722}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {SIGMOD'15, ACM SIGMOD International Conference on Management of Data}, PAGES = {983--998}, ADDRESS = {Melbourne, Victoria, Australia}, }
Endnote
%0 Conference Proceedings %A Anagnostopoulos, Aris %A Becchetti, Luca %A Fazzone, Adriano %A Mele, Ida %A Riondato, Matteo %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T The Importance of Being Expert: Efficient Max-Finding in Crowdsourcing : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-B6BE-7 %R 10.1145/2723372.2723722 %D 2015 %B ACM SIGMOD International Conference on Management of Data %Z date of event: 2015-05-31 - 2015-06-04 %C Melbourne, Victoria, Australia %B SIGMOD'15 %P 983 - 998 %I ACM %@ 978-1-4503-2758-9
[22]
K. Athukorala, D. Głowack, G. Jacucc, A. Oulasvirta, and J. Vreeken, “Is Exploratory Search Different? A Comparison of Information Search Behavior for Exploratory and Lookup Tasks,” Journal of the Association for Information Science and Technology, 2015.
Export
BibTeX
@article{VreekenSearch2015, TITLE = {Is Exploratory Search Different? A Comparison of Information Search Behavior for Exploratory and Lookup Tasks}, AUTHOR = {Athukorala, Kumaripaba and G{\l}owack, Dorota and Jacucc, Giulio and Oulasvirta, Antti and Vreeken, Jilles}, LANGUAGE = {eng}, ISSN = {2330-1643}, DOI = {10.1002/asi.23617}, PUBLISHER = {Wiley}, ADDRESS = {Chichester}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, JOURNAL = {Journal of the Association for Information Science and Technology}, PAGES = {1--17}, }
Endnote
%0 Journal Article %A Athukorala, Kumaripaba %A Głowack, Dorota %A Jacucc, Giulio %A Oulasvirta, Antti %A Vreeken, Jilles %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Is Exploratory Search Different? A Comparison of Information Search Behavior for Exploratory and Lookup Tasks : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0028-E6A7-D %R 10.1002/asi.23617 %7 2015-10-22 %D 2015 %8 22.10.2015 %J Journal of the Association for Information Science and Technology %& 1 %P 1 - 17 %I Wiley %C Chichester %@ false
[23]
H. R. Bazoobandi, S. de Rooij, J. Urbani, A. ten Teije, F. van Harmelen, and H. Bal, “A Compact In-Memory Dictionary for RDF Data,” in The Semantic Web. Latest Advances and New Domains, Portoroz, Slovenia, 2015.
Export
BibTeX
@inproceedings{Urbanilncs15, TITLE = {A Compact In-Memory Dictionary for {RDF} Data}, AUTHOR = {Bazoobandi, Hamid R. and de Rooij, Steve and Urbani, Jacopo and ten Teije, Annette and van Harmelen, Frank and Bal, Henri}, LANGUAGE = {eng}, ISBN = {978-3-319-18817-1}, DOI = {10.1007/978-3-319-18818-8_13}, PUBLISHER = {Springer}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {The Semantic Web. Latest Advances and New Domains}, PAGES = {205--220}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {9088}, ADDRESS = {Portoroz, Slovenia}, }
Endnote
%0 Conference Proceedings %A Bazoobandi, Hamid R. %A de Rooij, Steve %A Urbani, Jacopo %A ten Teije, Annette %A van Harmelen, Frank %A Bal, Henri %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T A Compact In-Memory Dictionary for RDF Data : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0028-F1A6-9 %R 10.1007/978-3-319-18818-8_13 %D 2015 %B 12th European Semantic Web Conference %Z date of event: 2015-05-31 - 2015-06-04 %C Portoroz, Slovenia %B The Semantic Web. Latest Advances and New Domains %P 205 - 220 %I Springer %@ 978-3-319-18817-1 %B Lecture Notes in Computer Science %N 9088
[24]
K. Budhathoki and J. Vreeken, “The Difference and the Norm - Characterising Similarities and Differences Between Databases,” in Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2015), Porto, Portugal, 2015.
Export
BibTeX
@inproceedings{BudhathokiECML2015, TITLE = {The Difference and the Norm -- Characterising Similarities and Differences Between Databases}, AUTHOR = {Budhathoki, Kailash and Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-3-319-23524-0}, DOI = {10.1007/978-3-319-23525-7_13}, PUBLISHER = {Springer}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2015)}, EDITOR = {Appice, Annalisa and Pereira Rodrigues, Pedro and Gama, Jo{\~a}o and Al{\'i}pio, Jorge and Soares, Carlos}, PAGES = {206--223}, SERIES = {Lecture Notes in Artificial Intellligence}, VOLUME = {9285}, ADDRESS = {Porto, Portugal}, }
Endnote
%0 Conference Proceedings %A Budhathoki, Kailash %A Vreeken, Jilles %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T The Difference and the Norm - Characterising Similarities and Differences Between Databases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-2271-F %R 10.1007/978-3-319-23525-7_13 %D 2015 %B European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases %Z date of event: 2015-09-07 - 2015-09-11 %C Porto, Portugal %B Machine Learning and Knowledge Discovery in Databases %E Appice, Annalisa; Pereira Rodrigues, Pedro; Gama, João; Alípio, Jorge; Soares, Carlos %P 206 - 223 %I Springer %@ 978-3-319-23524-0 %B Lecture Notes in Artificial Intellligence %N 9285
[25]
K. Budhathoki, “Correlation by Compression,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@mastersthesis{BudhathokiMaster2015, TITLE = {Correlation by Compression}, AUTHOR = {Budhathoki, Kailash}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Budhathoki, Kailash %Y Vreeken, Jilles %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Correlation by Compression : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-0753-D %I Universität des Saarlandes %C Saarbrücken %D 2015 %P X, 56 p. %V master %9 master
[26]
P. Chau, J. Vreeken, M. van Leeuwen, and C. Faloutsos, Eds., Proceedings of the ACM SIGKDD 2015 Full-day Workshop on Interactive Data Exploration and Analytics. ACM, 2015.
Export
BibTeX
@proceedings{chau:15:idea, TITLE = {Proceedings of the ACM SIGKDD 2015 Full-day Workshop on Interactive Data Exploration and Analytics (IDEA 2015)}, EDITOR = {Chau, Polo and Vreeken, Jilles and van Leeuwen, Matthijs and Faloutsos, Christos}, LANGUAGE = {eng}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, PAGES = {72 p.}, ADDRESS = {Sydney, Australia}, }
Endnote
%0 Conference Proceedings %E Chau, Polo %E Vreeken, Jilles %E van Leeuwen, Matthijs %E Faloutsos, Christos %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Proceedings of the ACM SIGKDD 2015 Full-day Workshop on Interactive Data Exploration and Analytics : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-578A-0 %I ACM %D 2015 %B ACM SIGKDD 2015 Full-day Workshop on Interactive Data Exploration and Analytics %Z date of event: 2015-08-10 - 2014-08-10 %D 2015 %C Sydney, Australia %P 72 p. %U http://poloclub.gatech.edu/idea2015/papers/idea15-proceedings.pdf
[27]
D. Dedik, “Robust Type Classification of Out of Knowledge Base Entities,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@mastersthesis{DedikMaster2015, TITLE = {Robust Type Classification of Out of Knowledge Base Entities}, AUTHOR = {Dedik, Darya}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Dedik, Darya %Y Weikum, Gerhard %A referee: Spaniol, Marc %+ International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Robust Type Classification of Out of Knowledge Base Entities : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0026-C0EC-F %I Universität des Saarlandes %C Saarbrücken %D 2015 %P 65 p. %V master %9 master
[28]
L. Del Corro, “Methods for Open Information Extraction and Sense Disambiguation on Natural Language Text,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@phdthesis{delcorrophd15, TITLE = {Methods for Open Information Extraction and Sense Disambiguation on Natural Language Text}, AUTHOR = {Del Corro, Luciano}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Del Corro, Luciano %Y Gemulla, Rainer %A referee: Ponzetto, Simone Paolo %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Methods for Open Information Extraction and Sense Disambiguation on Natural Language Text : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-B3DB-3 %I Universität des Saarlandes %C Saarbrücken %D 2015 %P xiv, 101 p. %V phd %9 phd %U http://scidok.sulb.uni-saarland.de/volltexte/2016/6346/http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=de
[29]
L. Del Corro, A. Abujabal, R. Gemulla, and G. Weikum, “FINET: Context-Aware Fine-Grained Named Entity Typing,” in Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing (EMNLP 2015), Lisbon, Portugal, 2015.
Export
BibTeX
@inproceedings{delcorro-EtAl:2015:EMNLP, TITLE = {{FINET}: {C}ontext-Aware Fine-Grained Named Entity Typing}, AUTHOR = {Del Corro, Luciano and Abujabal, Abdalghani and Gemulla, Rainer and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-941643-32-7}, URL = {https://aclweb.org/anthology/D/D15/D15-1103}, PUBLISHER = {ACL}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing (EMNLP 2015)}, PAGES = {868--878}, ADDRESS = {Lisbon, Portugal}, }
Endnote
%0 Conference Proceedings %A Del Corro, Luciano %A Abujabal, Abdalghani %A Gemulla, Rainer %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T FINET: Context-Aware Fine-Grained Named Entity Typing : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-49C3-C %U https://aclweb.org/anthology/D/D15/D15-1103 %D 2015 %B Conference on Empirical Methods in Natural Language Processing %Z date of event: 2015-09-17 - 2015-09-21 %C Lisbon, Portugal %B Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing %P 868 - 878 %I ACL %@ 978-1-941643-32-7 %U https://www.cs.cmu.edu/~ark/EMNLP-2015/proceedings/EMNLP/pdf/EMNLP103.pdf
[30]
S. Dutta, S. Bhattacherjee, and A. Narang, “Mining Wireless Intelligence using Unsupervised Edge and Core Analytics,” in 2nd Workshop on Smarter Planet and Big Data Analytics, Goa, Indien. (Accepted/in press)
Export
BibTeX
@inproceedings{SouSPBDA2015, TITLE = {Mining Wireless Intelligence using Unsupervised Edge and Core Analytics}, AUTHOR = {Dutta, Sourav and Bhattacherjee, Souvik and Narang, Ankur}, LANGUAGE = {eng}, YEAR = {2015}, PUBLREMARK = {Accepted}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {2nd Workshop on Smarter Planet and Big Data Analytics}, ADDRESS = {Goa, Indien}, }
Endnote
%0 Conference Proceedings %A Dutta, Sourav %A Bhattacherjee, Souvik %A Narang, Ankur %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Mining Wireless Intelligence using Unsupervised Edge and Core Analytics : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-54B5-0 %D 2014 %B 2nd Workshop on Smarter Planet and Big Data Analytics %Z date of event: 2015-01-04 - 2015-01-07 %C Goa, Indien %B 2nd Workshop on Smarter Planet and Big Data Analytics
[31]
S. Dutta, “MIST: Top-k Approximate Sub-String Mining using Triplet Statistical Significance,” in Advances in Information Retrieval (ECIR 2015), Vienna, Austria, 2015.
Export
BibTeX
@inproceedings{SouECIR2015, TITLE = {{MIST}: Top-k Approximate Sub-String Mining using Triplet Statistical Significance}, AUTHOR = {Dutta, Sourav}, LANGUAGE = {eng}, ISBN = {978-3-319-16353-6}, DOI = {10.1007/978-3-319-16354-3_31}, PUBLISHER = {Springer}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Advances in Information Retrieval (ECIR 2015)}, EDITOR = {Hanbury, Allan and Kazai, Gabriella and Rauber, Andreas and Fuhr, Norbert}, PAGES = {284--290}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {9022}, ADDRESS = {Vienna, Austria}, }
Endnote
%0 Conference Proceedings %A Dutta, Sourav %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T MIST: Top-k Approximate Sub-String Mining using Triplet Statistical Significance : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-54B2-5 %R 10.1007/978-3-319-16354-3_31 %D 2015 %B 37th European Conference on Information Retrieval %Z date of event: 2015-03-29 - 2015-04-02 %C Vienna, Austria %B Advances in Information Retrieval %E Hanbury, Allan; Kazai, Gabriella; Rauber, Andreas; Fuhr, Norbert %P 284 - 290 %I Springer %@ 978-3-319-16353-6 %B Lecture Notes in Computer Science %N 9022
[32]
S. Dutta and G. Weikum, “Cross-document Co-reference Resolution using Sample-based Clustering with Knowledge Enrichment,” Transactions of the Association for Computational Linguistics, vol. 3, 2015.
Export
BibTeX
@article{SouTACL2015, TITLE = {Cross-document Co-reference Resolution using Sample-based Clustering with Knowledge Enrichment}, AUTHOR = {Dutta, Sourav and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {2307-387X}, PUBLISHER = {ACL}, ADDRESS = {Stroudsbourg, PA}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, JOURNAL = {Transactions of the Association for Computational Linguistics}, VOLUME = {3}, PAGES = {15--28}, }
Endnote
%0 Journal Article %A Dutta, Sourav %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Cross-document Co-reference Resolution using Sample-based Clustering with Knowledge Enrichment : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-54B7-C %7 2015 %D 2015 %J Transactions of the Association for Computational Linguistics %O TACL %V 3 %& 15 %P 15 - 28 %I ACL %C Stroudsbourg, PA %@ false
[33]
S. Dutta and G. Weikum, “C3EL: A Joint Model for Cross-Document Co-Reference Resolution and Entity Linking,” in Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing (EMNLP 2015), Lisbon, Portugal, 2015.
Export
BibTeX
@inproceedings{dutta-weikum:2015:EMNLP, TITLE = {{C3EL}: {A} Joint Model for Cross-Document Co-Reference Resolution and Entity Linking}, AUTHOR = {Dutta, Sourav and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-941643-32-7}, URL = {https://aclweb.org/anthology/D/D15/D15-1101}, PUBLISHER = {ACL}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing (EMNLP 2015)}, PAGES = {846--856}, ADDRESS = {Lisbon, Portugal}, }
Endnote
%0 Conference Proceedings %A Dutta, Sourav %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T C3EL: A Joint Model for Cross-Document Co-Reference Resolution and Entity Linking : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-49C1-0 %U https://aclweb.org/anthology/D/D15/D15-1101 %D 2015 %B Conference on Empirical Methods in Natural Language Processing %Z date of event: 2015-09-17 - 2015-09-21 %C Lisbon, Portugal %B Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing %P 846 - 856 %I ACL %@ 978-1-941643-32-7 %U https://www.cs.cmu.edu/~ark/EMNLP-2015/proceedings/EMNLP/pdf/EMNLP101.pdf
[34]
P. Ernst, A. Siu, and G. Weikum, “KnowLife: A Versatile Approach for Constructing a Large Knowledge Graph for Biomedical Sciences,” BMC Bioinformatics, vol. 16, no. 1, 2015.
Export
BibTeX
@article{ErnstSiuWeikum2015, TITLE = {{KnowLife}: A Versatile Approach for Constructing a Large Knowledge Graph for Biomedical Sciences}, AUTHOR = {Ernst, Patrick and Siu, Amy and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {1471-2105}, URL = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=4448285&tool=pmcentrez&rendertype=abstract}, DOI = {10.1186/s12859-015-0549-5}, PUBLISHER = {BioMed Central}, ADDRESS = {London}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, JOURNAL = {BMC Bioinformatics}, VOLUME = {16}, NUMBER = {1}, EID = {157}, }
Endnote
%0 Journal Article %A Ernst, Patrick %A Siu, Amy %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T KnowLife: A Versatile Approach for Constructing a Large Knowledge Graph for Biomedical Sciences : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0027-7AB7-0 %F OTHER: pmcidPMC4448285 %F OTHER: pmc-uid4448285 %F OTHER: publisher-id549 %R 10.1186/s12859-015-0549-5 %U http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=4448285&tool=pmcentrez&rendertype=abstract %7 2015-05-14 %D 2015 %8 14.05.2015 %K Relation extraction %J BMC Bioinformatics %V 16 %N 1 %Z sequence number: 157 %I BioMed Central %C London %@ false
[35]
M. Gad-Elrab, “AIDArabic+ Named Entity Disambiguation for Arabic Text,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@mastersthesis{Gad-ElrabMaster2015, TITLE = {{AIDArabic}+ Named Entity Disambiguation for Arabic Text}, AUTHOR = {Gad-Elrab, Mohamed}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Gad-Elrab, Mohamed %Y Weikum, Gerhard %A referee: Berberich, Klaus %+ International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T AIDArabic+ Named Entity Disambiguation for Arabic Text : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-0F70-5 %I Universität des Saarlandes %C Saarbrücken %D 2015 %P 56 p. %V master %9 master
[36]
M. H. Gad-Elrab, M. A. Yosef, and G. Weikum, “EDRAK: Entity-Centric Data Resource for Arabic Knowledge,” in The Second Workshop on Arabic Natural Language Processing (ANLP 2015), Beijing, China, 2015.
Export
BibTeX
@inproceedings{Gad-ElrabAnLP2015, TITLE = {{EDRAK}: {E}ntity-Centric Data Resource for {Arabic} Knowledge}, AUTHOR = {Gad-Elrab, Mohamed H. and Yosef, Mohamed Amir and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-941643-58-7}, PUBLISHER = {ACL}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {The Second Workshop on Arabic Natural Language Processing (ANLP 2015)}, PAGES = {191--200}, ADDRESS = {Beijing, China}, }
Endnote
%0 Conference Proceedings %A Gad-Elrab, Mohamed H. %A Yosef, Mohamed Amir %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T EDRAK: Entity-Centric Data Resource for Arabic Knowledge : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-0773-3 %D 2015 %B The Second Workshop on Arabic Natural Language Processing %Z date of event: 2015-07-26 - 2015-07-31 %C Beijing, China %B The Second Workshop on Arabic Natural Language Processing %P 191 - 200 %I ACL %@ 978-1-941643-58-7
[37]
M. H. Gad-Elrab, M. A. Yosef, and G. Weikum, “Named Entity Disambiguation for Resource-poor Languages,” in ESAIR’15, Eighth Workshop on Exploiting Semantic Annotations in Information Retrieval, Melbourne, Australia, 2015.
Export
BibTeX
@inproceedings{Gad-ElrabESAIR2015, TITLE = {Named Entity Disambiguation for Resource-poor Languages}, AUTHOR = {Gad-Elrab, Mohamed H. and Yosef, Mohamed Amir and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-3790-8}, DOI = {10.1145/2810133.2810138}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {ESAIR'15, Eighth Workshop on Exploiting Semantic Annotations in Information Retrieval}, EDITOR = {Alonso, Omar and Kamps, Jaap and Karlgren, Jussi}, PAGES = {29--34}, ADDRESS = {Melbourne, Australia}, }
Endnote
%0 Conference Proceedings %A Gad-Elrab, Mohamed H. %A Yosef, Mohamed Amir %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Named Entity Disambiguation for Resource-poor Languages : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-077F-B %R 10.1145/2810133.2810138 %D 2015 %B Eighth Workshop on Exploiting Semantic Annotations in Information Retrieval %Z date of event: 2015-10-23 - 2015-10-23 %C Melbourne, Australia %B ESAIR'15 %E Alonso, Omar; Kamps, Jaap; Karlgren, Jussi %P 29 - 34 %I ACM %@ 978-1-4503-3790-8
[38]
L. Galárraga, C. Teflioudi, K. Hose, and F. M. Suchanek, “Fast Rule Mining in Ontological Knowledge Bases with AMIE+,” The VLDB Journal, vol. 24, no. 6, 2015.
Export
BibTeX
@article{Galarrag2015, TITLE = {Fast Rule Mining in Ontological Knowledge Bases with {AMIE}+}, AUTHOR = {Gal{\'a}rraga, Luis and Teflioudi, Christina and Hose, Katja and Suchanek, Fabian M.}, LANGUAGE = {eng}, ISSN = {1066-8888}, DOI = {10.1007/s00778-015-0394-1}, PUBLISHER = {Springer}, ADDRESS = {Berlin}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, JOURNAL = {The VLDB Journal}, VOLUME = {24}, NUMBER = {6}, PAGES = {707--730}, }
Endnote
%0 Journal Article %A Galárraga, Luis %A Teflioudi, Christina %A Hose, Katja %A Suchanek, Fabian M. %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Télécom ParisTech %T Fast Rule Mining in Ontological Knowledge Bases with AMIE+ : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-3510-3 %R 10.1007/s00778-015-0394-1 %7 2015 %D 2015 %J The VLDB Journal %V 24 %N 6 %& 707 %P 707 - 730 %I Springer %C Berlin %@ false
[39]
J. Geiß, A. Spitz, J. Strötgen, and M. Gertz, “The Wikipedia Location Network - Overcoming Borders and Oceans,” in Proceedings of the 9th Workshop on Geographic Information Retrieval (GIR 2015), Paris, France, 2015.
Export
BibTeX
@inproceedings{GIR2015, TITLE = {The {Wikipedia} Location Network -- Overcoming Borders and Oceans}, AUTHOR = {Gei{\ss}, Johanna and Spitz, Andreas and Str{\"o}tgen, Jannik and Gertz, Michael}, LANGUAGE = {eng}, ISBN = {978-1-4503-3937-7}, DOI = {10.1145/2837689.2837694}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Proceedings of the 9th Workshop on Geographic Information Retrieval (GIR 2015)}, EDITOR = {Purves, Ross S. and Jones, Christopher B.}, PAGES = {1--3}, EID = {2}, ADDRESS = {Paris, France}, }
Endnote
%0 Conference Proceedings %A Geiß, Johanna %A Spitz, Andreas %A Strötgen, Jannik %A Gertz, Michael %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T The Wikipedia Location Network - Overcoming Borders and Oceans : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-216D-0 %R 10.1145/2837689.2837694 %D 2015 %B 9th Workshop on Geographic Information Retrieval %Z date of event: 2015-11-26 - 2015-11-27 %C Paris, France %B Proceedings of the 9th Workshop on Geographic Information Retrieval %E Purves, Ross S.; Jones, Christopher B. %P 1 - 3 %Z sequence number: 2 %I ACM %@ 978-1-4503-3937-7
[40]
A. Grycner, G. Weikum, J. Pujara, J. Foulds, and L. Getoor, “RELLY: Inferring Hypernym Relationships Between Relational Phrases,” in Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing (EMNLP 2015), Lisbon, Portugal, 2015.
Export
BibTeX
@inproceedings{grycner-EtAl:2015:EMNLP, TITLE = {{RELLY}: {I}nferring Hypernym Relationships Between Relational Phrases}, AUTHOR = {Grycner, Adam and Weikum, Gerhard and Pujara, Jay and Foulds, James and Getoor, Lise}, LANGUAGE = {eng}, ISBN = {978-1-941643-32-7}, URL = {http://aclweb.org/anthology/D15-1113}, PUBLISHER = {ACL}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing (EMNLP 2015)}, PAGES = {971--981}, ADDRESS = {Lisbon, Portugal}, }
Endnote
%0 Conference Proceedings %A Grycner, Adam %A Weikum, Gerhard %A Pujara, Jay %A Foulds, James %A Getoor, Lise %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T RELLY: Inferring Hypernym Relationships Between Relational Phrases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-49B0-5 %U http://aclweb.org/anthology/D15-1113 %D 2015 %B Conference on Empirical Methods in Natural Language Processing %Z date of event: 2015-09-17 - 2015-09-21 %C Lisbon, Portugal %B Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing %P 971 - 981 %I ACL %@ 978-1-941643-32-7 %U https://www.cs.cmu.edu/~ark/EMNLP-2015/proceedings/EMNLP/pdf/EMNLP113.pdf
[41]
D. Gupta and K. Berberich, “Temporal Query Classification at Different Granularities,” in String Processing and Information Retrieval (SPIRE 2015), London, UK, 2015.
Export
BibTeX
@inproceedings{spire15-gupta, TITLE = {Temporal Query Classification at Different Granularities}, AUTHOR = {Gupta, Dhruv and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-3-319-23825-8}, DOI = {10.1007/978-3-319-23826-5_16}, PUBLISHER = {Springer}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {String Processing and Information Retrieval (SPIRE 2015)}, EDITOR = {Iliopoulos, Costas S. and Publisi, Simon J. and Yilmaz, Emine}, PAGES = {137--148}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {9309}, ADDRESS = {London, UK}, }
Endnote
%0 Conference Proceedings %A Gupta, Dhruv %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Temporal Query Classification at Different Granularities : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-4249-D %R 10.1007/978-3-319-23826-5_16 %D 2015 %B 22nd International Symposium on String Processing and Information Retrieval %Z date of event: 2015-08-31 - 2015-09-02 %C London, UK %B String Processing and Information Retrieval %E Iliopoulos, Costas S.; Publisi, Simon J.; Yilmaz, Emine %P 137 - 148 %I Springer %@ 978-3-319-23825-8 %B Lecture Notes in Computer Science %N 9309
[42]
C. D. Hariman, “Part-Whole Commonsense Knowledge Harvesting from the Web,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@mastersthesis{HarimanMaster2015, TITLE = {Part-Whole Commonsense Knowledge Harvesting from the Web}, AUTHOR = {Hariman, Charles Darwis}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Hariman, Charles Darwis %Y Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Part-Whole Commonsense Knowledge Harvesting from the Web : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0026-C0E6-C %I Universität des Saarlandes %C Saarbrücken %D 2015 %P 53 p. %V master %9 master
[43]
J. Hoffart, “Discovering and Disambiguating Named Entities in Text,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@phdthesis{Hoffartthesis, TITLE = {Discovering and Disambiguating Named Entities in Text}, AUTHOR = {Hoffart, Johannes}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Hoffart, Johannes %Y Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Discovering and Disambiguating Named Entities in Text : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0025-6C44-0 %I Universität des Saarlandes %C Saarbrücken %D 2015 %P X, 103 p. %V phd %9 phd %U http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=dehttp://scidok.sulb.uni-saarland.de/volltexte/2015/6022/
[44]
J. Hoffart, N. Preda, F. M. Suchanek, and G. Weikum, “Knowledge Bases for Web Content Analytics,” in WWW’15 Companion, Florence, Italy, 2015.
Export
BibTeX
@inproceedings{hoffart2015knowledgebases, TITLE = {Knowledge Bases for Web Content Analytics}, AUTHOR = {Hoffart, Johannes and Preda, Nicoleta and Suchanek, Fabian M. and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-3473-0}, DOI = {10.1145/2740908.2741984}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {WWW'15 Companion}, PAGES = {1535--1535}, ADDRESS = {Florence, Italy}, }
Endnote
%0 Conference Proceedings %A Hoffart, Johannes %A Preda, Nicoleta %A Suchanek, Fabian M. %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Knowledge Bases for Web Content Analytics : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0028-8E68-7 %R 10.1145/2740908.2741984 %D 2015 %B 24th International Conference on World Wide Web %Z date of event: 2015-05-18 - 2015-05-22 %C Florence, Italy %B WWW'15 Companion %P 1535 - 1535 %I ACM %@ 978-1-4503-3473-0
[45]
K. Hui and K. Berberich, “Selective Labeling and Incomplete Label Mitigation for Low-Cost Evaluation,” in String Processing and Information Retrieval (SPIRE 2015), London, UK, 2015.
Export
BibTeX
@inproceedings{spire15-kaihui, TITLE = {Selective Labeling and Incomplete Label Mitigation for Low-Cost Evaluation}, AUTHOR = {Hui, Kai and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-3-319-23825-8}, DOI = {10.1007/978-3-319-23826-5_14}, PUBLISHER = {Springer}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {String Processing and Information Retrieval (SPIRE 2015)}, EDITOR = {Iliopoulos, Costas S. and Publisi, Simon J. and Yilmaz, Emine}, PAGES = {137--148}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {9309}, ADDRESS = {London, UK}, }
Endnote
%0 Conference Proceedings %A Hui, Kai %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Selective Labeling and Incomplete Label Mitigation for Low-Cost Evaluation : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0028-5DAA-5 %R 10.1007/978-3-319-23826-5_14 %D 2015 %B 22nd International Symposium on String Processing and Information Retrieval %Z date of event: 2015-08-31 - 2015-09-02 %C London, UK %B String Processing and Information Retrieval %E Iliopoulos, Costas S.; Publisi, Simon J.; Yilmaz, Emine %P 137 - 148 %I Springer %@ 978-3-319-23825-8 %B Lecture Notes in Computer Science %N 9309
[46]
S. Karaev, P. Miettinen, and J. Vreeken, “Getting to Know the Unknown Unknowns: Destructive-noise Resistant Boolean Matrix Factorization,” in Proceedings of the 2015 SIAM International Conference on Data Mining (SDM 2015), Vancouver, Canada, 2015.
Abstract
Finding patterns from binary data is a classical problem in data mining, dating back to at least frequent itemset mining. More recently, approaches such as tiling and Boolean matrix factorization (BMF), have been proposed to find sets of patterns that aim to explain the full data well. These methods, however, are not robust against non-trivial destructive noise, i.e. when relatively many 1s are removed from the data: tiling can only model additive noise while BMF assumes approximately equal amounts of additive and destructive noise. Most real-world binary datasets, however, exhibit mostly destructive noise. In presence/absence data, for instance, it is much more common to fail to observe something than it is to observe a spurious presence. To address this problem, we take the recent approach of employing the Minimum Description Length (MDL) principle for BMF and introduce a new algorithm, Nassau, that directly optimizes the description length of the factorization instead of the reconstruction error. In addition, unlike the previous algorithms, it can adjust the factors it has discovered during its search. Empirical evaluation on synthetic data shows that Nassau excels at datasets with high destructive noise levels and its performance on real-world datasets confirms our hypothesis of the high numbers of missing observations in the real-world data.
Export
BibTeX
@inproceedings{karaev15getting, TITLE = {Getting to Know the Unknown Unknowns: {D}estructive-noise Resistant {Boolean} Matrix Factorization}, AUTHOR = {Karaev, Sanjar and Miettinen, Pauli and Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-1-61197-401-0}, DOI = {10.1137/1.9781611974010.37}, PUBLISHER = {SIAM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, ABSTRACT = {Finding patterns from binary data is a classical problem in data mining, dating back to at least frequent itemset mining. More recently, approaches such as tiling and Boolean matrix factorization (BMF), have been proposed to find sets of patterns that aim to explain the full data well. These methods, however, are not robust against non-trivial destructive noise, i.e. when relatively many 1s are removed from the data: tiling can only model additive noise while BMF assumes approximately equal amounts of additive and destructive noise. Most real-world binary datasets, however, exhibit mostly destructive noise. In presence/absence data, for instance, it is much more common to fail to observe something than it is to observe a spurious presence. To address this problem, we take the recent approach of employing the Minimum Description Length (MDL) principle for BMF and introduce a new algorithm, Nassau, that directly optimizes the description length of the factorization instead of the reconstruction error. In addition, unlike the previous algorithms, it can adjust the factors it has discovered during its search. Empirical evaluation on synthetic data shows that Nassau excels at datasets with high destructive noise levels and its performance on real-world datasets confirms our hypothesis of the high numbers of missing observations in the real-world data.}, BOOKTITLE = {Proceedings of the 2015 SIAM International Conference on Data Mining (SDM 2015)}, EDITOR = {Venkatasubramanian, Suresh and Ye, Jieping}, PAGES = {325--333}, ADDRESS = {Vancouver, Canada}, }
Endnote
%0 Conference Proceedings %A Karaev, Sanjar %A Miettinen, Pauli %A Vreeken, Jilles %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Getting to Know the Unknown Unknowns: Destructive-noise Resistant Boolean Matrix Factorization : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-6C59-C %R 10.1137/1.9781611974010.37 %D 2015 %B 15th SIAM International Conference on Data Mining %Z date of event: 2015-04-30 - 2015-05-02 %C Vancouver, Canada %X Finding patterns from binary data is a classical problem in data mining, dating back to at least frequent itemset mining. More recently, approaches such as tiling and Boolean matrix factorization (BMF), have been proposed to find sets of patterns that aim to explain the full data well. These methods, however, are not robust against non-trivial destructive noise, i.e. when relatively many 1s are removed from the data: tiling can only model additive noise while BMF assumes approximately equal amounts of additive and destructive noise. Most real-world binary datasets, however, exhibit mostly destructive noise. In presence/absence data, for instance, it is much more common to fail to observe something than it is to observe a spurious presence. To address this problem, we take the recent approach of employing the Minimum Description Length (MDL) principle for BMF and introduce a new algorithm, Nassau, that directly optimizes the description length of the factorization instead of the reconstruction error. In addition, unlike the previous algorithms, it can adjust the factors it has discovered during its search. Empirical evaluation on synthetic data shows that Nassau excels at datasets with high destructive noise levels and its performance on real-world datasets confirms our hypothesis of the high numbers of missing observations in the real-world data. %B Proceedings of the 2015 SIAM International Conference on Data Mining %E Venkatasubramanian, Suresh; Ye, Jieping %P 325 - 333 %I SIAM %@ 978-1-61197-401-0
[47]
D. Koutra, U. Kang, J. Vreeken, and C. Faloutsos, “Summarizing and Understanding Large Graphs,” Statistical Analysis and Data Mining, vol. 8, no. 3, 2015.
Export
BibTeX
@article{koutra:15:vog, TITLE = {Summarizing and Understanding Large Graphs}, AUTHOR = {Koutra, Danai and Kang, U and Vreeken, Jilles and Faloutsos, Christos}, LANGUAGE = {eng}, ISSN = {1932-1872}, DOI = {10.1002/sam.11267}, PUBLISHER = {Wiley-Blackwell}, ADDRESS = {Chichester}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, JOURNAL = {Statistical Analysis and Data Mining}, VOLUME = {8}, NUMBER = {3}, PAGES = {183--202}, }
Endnote
%0 Journal Article %A Koutra, Danai %A Kang, U %A Vreeken, Jilles %A Faloutsos, Christos %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Summarizing and Understanding Large Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0026-D185-2 %R 10.1002/sam.11267 %7 2015-05-18 %D 2015 %J Statistical Analysis and Data Mining %O The ASA Data Science Journal %V 8 %N 3 %& 183 %P 183 - 202 %I Wiley-Blackwell %C Chichester %@ false
[48]
P. Mandros, “Information Theoretic Supervised Feature Selection for Continuous Data,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@mastersthesis{MandrosMaster2015, TITLE = {Information Theoretic Supervised Feature Selection for Continuous Data}, AUTHOR = {Mandros, Panagiotis}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Mandros, Panagiotis %Y Weikum, Gerhard %A referee: Vreeken, Jilles %+ International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Information Theoretic Supervised Feature Selection for Continuous Data : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-BAF3-F %I Universität des Saarlandes %C Saarbrücken %D 2015 %P 67 p. %V master %9 master
[49]
S. Metzger, R. Schenkel, and M. Sydow, “Aspect-based Similar Entity Search in Semantic Knowledge Graphs with Diversity-awareness and Relaxation,” in The 2014 IEEE/WIC/ACM International Conference on Intelligent Agent Technology (IAT 2014), Warsaw, Poland, 2015.
Export
BibTeX
@inproceedings{MetzgerIAT2014, TITLE = {Aspect-based Similar Entity Search in Semantic Knowledge Graphs with Diversity-awareness and Relaxation}, AUTHOR = {Metzger, Steffen and Schenkel, Ralf and Sydow, Marcin}, LANGUAGE = {eng}, ISBN = {978-1-4799-4143-8}, DOI = {10.1109/WI-IAT.2014.17}, PUBLISHER = {IEEE}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {The 2014 IEEE/WIC/ACM International Conference on Intelligent Agent Technology (IAT 2014)}, PAGES = {60--69}, ADDRESS = {Warsaw, Poland}, }
Endnote
%0 Conference Proceedings %A Metzger, Steffen %A Schenkel, Ralf %A Sydow, Marcin %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Aspect-based Similar Entity Search in Semantic Knowledge Graphs with Diversity-awareness and Relaxation : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-424D-5 %R 10.1109/WI-IAT.2014.17 %D 2015 %B IEEE/WIC/ACM International Conference on Intelligent Agent Technology %Z date of event: 2014-08-11 - 2014-08-14 %C Warsaw, Poland %B The 2014 IEEE/WIC/ACM International Conference on Intelligent Agent Technology %P 60 - 69 %I IEEE %@ 978-1-4799-4143-8
[50]
S. Metzler and P. Miettinen, “On Defining SPARQL with Boolean Tensor Algebra,” 2015. [Online]. Available: http://arxiv.org/abs/1503.00301.
Abstract
The Resource Description Framework (RDF) represents information as subject-predicate-object triples. These triples are commonly interpreted as a directed labelled graph. We propose an alternative approach, interpreting the data as a 3-way Boolean tensor. We show how SPARQL queries - the standard queries for RDF - can be expressed as elementary operations in Boolean algebra, giving us a complete re-interpretation of RDF and SPARQL. We show how the Boolean tensor interpretation allows for new optimizations and analyses of the complexity of SPARQL queries. For example, estimating the size of the results for different join queries becomes much simpler.
Export
BibTeX
@online{metzler15defining:arxiv, TITLE = {On Defining {SPARQL} with {B}oolean Tensor Algebra}, AUTHOR = {Metzler, Saskia and Miettinen, Pauli}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1503.00301}, EPRINT = {1503.00301}, EPRINTTYPE = {arXiv}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, ABSTRACT = {The Resource Description Framework (RDF) represents information as subject-predicate-object triples. These triples are commonly interpreted as a directed labelled graph. We propose an alternative approach, interpreting the data as a 3-way Boolean tensor. We show how SPARQL queries -- the standard queries for RDF -- can be expressed as elementary operations in Boolean algebra, giving us a complete re-interpretation of RDF and SPARQL. We show how the Boolean tensor interpretation allows for new optimizations and analyses of the complexity of SPARQL queries. For example, estimating the size of the results for different join queries becomes much simpler.}, }
Endnote
%0 Report %A Metzler, Saskia %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T On Defining SPARQL with Boolean Tensor Algebra : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0025-054A-9 %U http://arxiv.org/abs/1503.00301 %D 2015 %8 03.03.2015 %X The Resource Description Framework (RDF) represents information as subject-predicate-object triples. These triples are commonly interpreted as a directed labelled graph. We propose an alternative approach, interpreting the data as a 3-way Boolean tensor. We show how SPARQL queries - the standard queries for RDF - can be expressed as elementary operations in Boolean algebra, giving us a complete re-interpretation of RDF and SPARQL. We show how the Boolean tensor interpretation allows for new optimizations and analyses of the complexity of SPARQL queries. For example, estimating the size of the results for different join queries becomes much simpler. %K Computer Science, Databases, cs.DB
[51]
S. Metzler and P. Miettinen, “Join Size Estimation on Boolean Tensors of RDF Data,” in WWW’15 Companion, Florence, Italy, 2015.
Export
BibTeX
@inproceedings{metzler15join, TITLE = {Join Size Estimation on Boolean Tensors of {RDF} Data}, AUTHOR = {Metzler, Saskia and Miettinen, Pauli}, LANGUAGE = {eng}, ISBN = {978-1-4503-3473-0}, DOI = {10.1145/2740908.2742738}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {WWW'15 Companion}, PAGES = {77--78}, ADDRESS = {Florence, Italy}, }
Endnote
%0 Conference Proceedings %A Metzler, Saskia %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Join Size Estimation on Boolean Tensors of RDF Data : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-CCED-A %R 10.1145/2740908.2742738 %D 2015 %B 24th International Conference on World Wide Web %Z date of event: 2015-05-18 - 2015-05-22 %C Florence, Italy %B WWW'15 Companion %P 77 - 78 %I ACM %@ 978-1-4503-3473-0
[52]
S. Metzler and P. Miettinen, “Clustering Boolean Tensors,” Data Mining and Knowledge Discovery, vol. 29, no. 5, 2015.
Export
BibTeX
@article{MetzlerMiettinen2015, TITLE = {Clustering {Boolean} tensors}, AUTHOR = {Metzler, Saskia and Miettinen, Pauli}, LANGUAGE = {eng}, DOI = {10.1007/s10618-015-0420-3}, PUBLISHER = {Springer}, ADDRESS = {New York, NY}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, JOURNAL = {Data Mining and Knowledge Discovery}, VOLUME = {29}, NUMBER = {5}, PAGES = {1343--1373}, }
Endnote
%0 Journal Article %A Metzler, Saskia %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Clustering Boolean Tensors : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0028-536A-B %R 10.1007/s10618-015-0420-3 %7 2015 %D 2015 %J Data Mining and Knowledge Discovery %V 29 %N 5 %& 1343 %P 1343 - 1373 %I Springer %C New York, NY
[53]
S. Metzler and P. Miettinen, “Clustering Boolean Tensors,” 2015. [Online]. Available: http://arxiv.org/abs/1501.00696.
Abstract
Tensor factorizations are computationally hard problems, and in particular, are often significantly harder than their matrix counterparts. In case of Boolean tensor factorizations -- where the input tensor and all the factors are required to be binary and we use Boolean algebra -- much of that hardness comes from the possibility of overlapping components. Yet, in many applications we are perfectly happy to partition at least one of the modes. In this paper we investigate what consequences does this partitioning have on the computational complexity of the Boolean tensor factorizations and present a new algorithm for the resulting clustering problem. This algorithm can alternatively be seen as a particularly regularized clustering algorithm that can handle extremely high-dimensional observations. We analyse our algorithms with the goal of maximizing the similarity and argue that this is more meaningful than minimizing the dissimilarity. As a by-product we obtain a PTAS and an efficient 0.828-approximation algorithm for rank-1 binary factorizations. Our algorithm for Boolean tensor clustering achieves high scalability, high similarity, and good generalization to unseen data with both synthetic and real-world data sets.
Export
BibTeX
@online{metzler15clustering:arxiv, TITLE = {Clustering {Boolean} Tensors}, AUTHOR = {Metzler, Saskia and Miettinen, Pauli}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1501.00696}, EPRINT = {1501.00696}, EPRINTTYPE = {arXiv}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Tensor factorizations are computationally hard problems, and in particular, are often significantly harder than their matrix counterparts. In case of Boolean tensor factorizations -- where the input tensor and all the factors are required to be binary and we use Boolean algebra -- much of that hardness comes from the possibility of overlapping components. Yet, in many applications we are perfectly happy to partition at least one of the modes. In this paper we investigate what consequences does this partitioning have on the computational complexity of the Boolean tensor factorizations and present a new algorithm for the resulting clustering problem. This algorithm can alternatively be seen as a particularly regularized clustering algorithm that can handle extremely high-dimensional observations. We analyse our algorithms with the goal of maximizing the similarity and argue that this is more meaningful than minimizing the dissimilarity. As a by-product we obtain a PTAS and an efficient 0.828-approximation algorithm for rank-1 binary factorizations. Our algorithm for Boolean tensor clustering achieves high scalability, high similarity, and good generalization to unseen data with both synthetic and real-world data sets.}, }
Endnote
%0 Report %A Metzler, Saskia %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Clustering Boolean Tensors : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-6C5B-8 %U http://arxiv.org/abs/1501.00696 %D 2015 %X Tensor factorizations are computationally hard problems, and in particular, are often significantly harder than their matrix counterparts. In case of Boolean tensor factorizations -- where the input tensor and all the factors are required to be binary and we use Boolean algebra -- much of that hardness comes from the possibility of overlapping components. Yet, in many applications we are perfectly happy to partition at least one of the modes. In this paper we investigate what consequences does this partitioning have on the computational complexity of the Boolean tensor factorizations and present a new algorithm for the resulting clustering problem. This algorithm can alternatively be seen as a particularly regularized clustering algorithm that can handle extremely high-dimensional observations. We analyse our algorithms with the goal of maximizing the similarity and argue that this is more meaningful than minimizing the dissimilarity. As a by-product we obtain a PTAS and an efficient 0.828-approximation algorithm for rank-1 binary factorizations. Our algorithm for Boolean tensor clustering achieves high scalability, high similarity, and good generalization to unseen data with both synthetic and real-world data sets. %K Computer Science, Numerical Analysis, cs.NA,Computer Science, Data Structures and Algorithms, cs.DS
[54]
P. Miettinen, “Generalized Matrix Factorizations as a Unifying Framework for Pattern Set Mining: Complexity Beyond Blocks,” in Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2015), Porto, Portugal, 2015.
Export
BibTeX
@inproceedings{MiettinenECML2015, TITLE = {Generalized Matrix Factorizations as a Unifying Framework for Pattern Set Mining: {C}omplexity Beyond Blocks}, AUTHOR = {Miettinen, Pauli}, LANGUAGE = {eng}, ISBN = {978-3-319-23524-0}, DOI = {10.1007/978-3-319-23525-7_3}, PUBLISHER = {Springer}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2015)}, EDITOR = {Appice, Annalisa and Pereira Rodrigues, Pedro and Gama, Jo{\~a}o and Al{\'i}pio, Jorge and Soares, Carlos}, PAGES = {36--52}, SERIES = {Lecture Notes in Artificial Intellligence}, VOLUME = {9285}, ADDRESS = {Porto, Portugal}, }
Endnote
%0 Conference Proceedings %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Generalized Matrix Factorizations as a Unifying Framework for Pattern Set Mining: Complexity Beyond Blocks : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-2278-1 %R 10.1007/978-3-319-23525-7_3 %D 2015 %B European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases %Z date of event: 2015-09-07 - 2015-09-11 %C Porto, Portugal %B Machine Learning and Knowledge Discovery in Databases %E Appice, Annalisa; Pereira Rodrigues, Pedro; Gama, João; Alípio, Jorge; Soares, Carlos %P 36 - 52 %I Springer %@ 978-3-319-23524-0 %B Lecture Notes in Artificial Intellligence %N 9285
[55]
A. Mishra and K. Berberich, “EXPOSÉ: EXploring Past news fOr Seminal Events,” in WWW’15 Companion, Florence, Italy, 2015.
Export
BibTeX
@inproceedings{MishraWWW2015, TITLE = {EXPOSÉ: {EXploring Past news fOr Seminal Events}}, AUTHOR = {Mishra, Arunav and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-1-4503-3473-0}, DOI = {10.1145/2740908.2742844}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {WWW'15 Companion}, PAGES = {223--226}, ADDRESS = {Florence, Italy}, }
Endnote
%0 Conference Proceedings %A Mishra, Arunav %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T EXPOSÉ: EXploring Past news fOr Seminal Events : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-E33E-F %R 10.1145/2740908.2742844 %D 2015 %B 24th International Conference on World Wide Web %Z date of event: 2015-04-18 - 2015-04-22 %C Florence, Italy %B WWW'15 Companion %P 223 - 226 %I ACM %@ 978-1-4503-3473-0
[56]
S. Mukherjee, H. Lamba, and G. Weikum, “Experience-aware Item Recommendation in Evolving Review Communities,” in 15th IEEE International Conference on Data Mining (ICDM 2015), Atlantic City, NJ, USA, 2015.
Export
BibTeX
@inproceedings{mukherjee-experience-model, TITLE = {Experience-aware Item Recommendation in Evolving Review Communities}, AUTHOR = {Mukherjee, Subhabrata and Lamba, Hemank and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4673-9503-8}, DOI = {10.1109/ICDM.2015.111}, PUBLISHER = {IEEE}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {15th IEEE International Conference on Data Mining (ICDM 2015)}, EDITOR = {Aggarwal, Charu and Zhou, Zhi-Hua and Tuzhilin, Alexander and Xiong, Hui and Wu, Xindong}, PAGES = {925--930}, ADDRESS = {Atlantic City, NJ, USA}, }
Endnote
%0 Conference Proceedings %A Mukherjee, Subhabrata %A Lamba, Hemank %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Experience-aware Item Recommendation in Evolving Review Communities : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-49F3-F %R 10.1109/ICDM.2015.111 %D 2015 %B 15th International Conference on Data Mining %Z date of event: 2015-11-14 - 2015-11-17 %C Atlantic City, NJ, USA %B 15th IEEE International Conference on Data Mining %E Aggarwal, Charu; Zhou, Zhi-Hua; Tuzhilin, Alexander; Xiong, Hui; Wu, Xindong %P 925 - 930 %I IEEE %@ 978-1-4673-9503-8
[57]
S. Mukherjee and G. Weikum, “Leveraging Joint Interactions for Credibility Analysis in News Communities,” in CIKM’15, 24th ACM International Conference on Information and Knowledge Management, Melbourne, Australia, 2015.
Export
BibTeX
@inproceedings{mukherjee-credibility-analysis, TITLE = {Leveraging Joint Interactions for Credibility Analysis in News Communities}, AUTHOR = {Mukherjee, Subhabrata and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-3794-6}, DOI = {10.1145/2806416.2806537}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {CIKM'15, 24th ACM International Conference on Information and Knowledge Management}, PAGES = {353--362}, ADDRESS = {Melbourne, Australia}, }
Endnote
%0 Conference Proceedings %A Mukherjee, Subhabrata %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Leveraging Joint Interactions for Credibility Analysis in News Communities : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-49DE-1 %R 10.1145/2806416.2806537 %D 2015 %B 24th ACM International Conference on Information and Knowledge Management %Z date of event: 2015-10-19 - 2015-10-23 %C Melbourne, Australia %B CIKM'15 %P 353 - 362 %I ACM %@ 978-1-4503-3794-6
[58]
S. Neumann, “On Some Problems of Rounding Rank,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@mastersthesis{NeumannMaster2015, TITLE = {On Some Problems of Rounding Rank}, AUTHOR = {Neumann, Stefan}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Neumann, Stefan %Y Miettinen, Pauli %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T On Some Problems of Rounding Rank : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-57D6-2 %I Universität des Saarlandes %C Saarbrücken %D 2015 %P X, 77 p. %V master %9 master
[59]
H.-V. Nguyen and J. Vreeken, “Non-parametric Jensen-Shannon Divergence,” in Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2015), Porto, Portugal, 2015.
Export
BibTeX
@inproceedings{NguyenECML2015, TITLE = {Non-parametric {Jensen}-{Shannon} Divergence}, AUTHOR = {Nguyen, Hoang-Vu and Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-3-319-23524-0}, DOI = {10.1007/978-3-319-23525-7_11}, PUBLISHER = {Springer}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2015)}, EDITOR = {Appice, Annalisa and Pereira Rodrigues, Pedro and Gama, Jo{\~a}o and Al{\'i}pio, Jorge and Soares, Carlos}, PAGES = {173--189}, SERIES = {Lecture Notes in Artificial Intellligence}, VOLUME = {9285}, ADDRESS = {Porto, Portugal}, }
Endnote
%0 Conference Proceedings %A Nguyen, Hoang-Vu %A Vreeken, Jilles %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Non-parametric Jensen-Shannon Divergence : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-2286-3 %R 10.1007/978-3-319-23525-7_11 %D 2015 %B European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases %Z date of event: 2015-09-07 - 2015-09-11 %C Porto, Portugal %B Machine Learning and Knowledge Discovery in Databases %E Appice, Annalisa; Pereira Rodrigues, Pedro; Gama, João; Alípio, Jorge; Soares, Carlos %P 173 - 189 %I Springer %@ 978-3-319-23524-0 %B Lecture Notes in Artificial Intellligence %N 9285
[60]
R. Pienta, Z. Lin, M. Kahng, J. Vreeken, P. P. Talukdar, J. Abello, G. Parameswaran, and D. H. Chau, “AdaptiveNav: Adaptive Discovery of Interesting and Surprising Nodes in Large Graphs.” 2015.
Export
BibTeX
@inproceedings{pienta:15:adaptivenav, TITLE = {{AdaptiveNav}: {A}daptive Discovery of Interesting and Surprising Nodes in Large Graphs}, AUTHOR = {Pienta, Robert and Lin, Zhiyuan and Kahng, Minsuk and Vreeken, Jilles and Talukdar, Partha P. and Abello, James and Parameswaran, Ganesh and Chau, Duen Horng}, LANGUAGE = {eng}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, ADDRESS = {Chicago, IL, USA}, }
Endnote
%0 Generic %A Pienta, Robert %A Lin, Zhiyuan %A Kahng, Minsuk %A Vreeken, Jilles %A Talukdar, Partha P. %A Abello, James %A Parameswaran, Ganesh %A Chau, Duen Horng %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations %T AdaptiveNav: Adaptive Discovery of Interesting and Surprising Nodes in Large Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-57B4-E %D 2015 %Z name of event: IEEE VIS 2015 %Z date of event: 2015-10-25 - 2015-10-30 %Z place of event: Chicago, IL, USA
[61]
N. Prytkova, M. Spaniol, and G. Weikum, “Aligning Multi-cultural Knowledge Taxonomies by Combinatorial Optimization,” in WWW’15 Companion, Florence, Italy, 2015.
Export
BibTeX
@inproceedings{PSWe15, TITLE = {Aligning Multi-cultural Knowledge Taxonomies by Combinatorial Optimization}, AUTHOR = {Prytkova, Natalia and Spaniol, Marc and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-3473-0}, DOI = {10.1145/2740908.2742721}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {WWW'15 Companion}, PAGES = {93--94}, ADDRESS = {Florence, Italy}, }
Endnote
%0 Conference Proceedings %A Prytkova, Natalia %A Spaniol, Marc %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Aligning Multi-cultural Knowledge Taxonomies by Combinatorial Optimization : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0025-06E5-3 %R 10.1145/2740908.2742721 %D 2015 %B 24th International Conference on World Wide Web %Z date of event: 2015-05-18 - 2015-05-22 %C Florence, Italy %B WWW'15 Companion %P 93 - 94 %I ACM %@ 978-1-4503-3473-0
[62]
A. Rohrbach, M. Rohrbach, N. Tandon, and B. Schiele, “A Dataset for Movie Description,” in IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2015), Boston, MA, USA, 2015.
Export
BibTeX
@inproceedings{Rohrbach15cvpr, TITLE = {A Dataset for Movie Description}, AUTHOR = {Rohrbach, Anna and Rohrbach, Marcus and Tandon, Niket and Schiele, Bernt}, LANGUAGE = {eng}, DOI = {10.1109/CVPR.2015.7298940}, PUBLISHER = {IEEE Computer Society}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2015)}, PAGES = {3202--3212}, ADDRESS = {Boston, MA, USA}, }
Endnote
%0 Conference Proceedings %A Rohrbach, Anna %A Rohrbach, Marcus %A Tandon, Niket %A Schiele, Bernt %+ Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society %T A Dataset for Movie Description : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0025-01B9-B %R 10.1109/CVPR.2015.7298940 %D 2015 %B IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2015-06-08 - 2015-06-10 %C Boston, MA, USA %B IEEE Conference on Computer Vision and Pattern Recognition %P 3202 - 3212 %I IEEE Computer Society
[63]
C. Schulte, B. Taneva, and G. Weikum, “On-topic Cover Stories from News Archives,” in Advances in Information Retrieval (ECIR 2015), Vienna, Austria, 2015.
Export
BibTeX
@inproceedings{Schulte:ECIR2015, TITLE = {On-topic Cover Stories from News Archives}, AUTHOR = {Schulte, Christian and Taneva, Bilyana and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-3-319-16353-6}, DOI = {10.1007/978-3-319-16354-3_4}, PUBLISHER = {Springer}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Advances in Information Retrieval (ECIR 2015)}, EDITOR = {Hanbury, Allan and Kazai, Gabriella and Rauber, Andreas and Fuhr, Norbert}, PAGES = {37--42}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {9022}, ADDRESS = {Vienna, Austria}, }
Endnote
%0 Conference Proceedings %A Schulte, Christian %A Taneva, Bilyana %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T On-topic Cover Stories from News Archives : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-A6DE-B %R 10.1007/978-3-319-16354-3_4 %D 2015 %B 37th European Conference on Information Retrieval %Z date of event: 2015-03-29 - 2015-04-02 %C Vienna, Austria %B Advances in Information Retrieval %E Hanbury, Allan; Kazai, Gabriella; Rauber, Andreas; Fuhr, Norbert %P 37 - 42 %I Springer %@ 978-3-319-16353-6 %B Lecture Notes in Computer Science %N 9022
[64]
S. Seufert, “Algorithmic Building Blocks for Relationship Analysis over Large Graphs,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@phdthesis{Seufertphd15, TITLE = {Algorithmic Building Blocks for Relationship Analysis over Large Graphs}, AUTHOR = {Seufert, Stephan}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Seufert, Stephan %Y Bedathur, Srikanta %A referee: Barbosa, Denilson %A referee: Weidenbach, Christoph %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Automation of Logic, MPI for Informatics, Max Planck Society %T Algorithmic Building Blocks for Relationship Analysis over Large Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-6E65-D %I Universität des Saarlandes %C Saarbrücken %D 2015 %P 198 p. %V phd %9 phd %U http://scidok.sulb.uni-saarland.de/volltexte/2015/6183/http://scidok.sulb.uni-saarland.de/doku/urheberrecht.php?la=de
[65]
D. Seyler, M. Yahya, and K. Berberich, “Generating Quiz Questions from Knowledge Graphs,” in WWW’15 Companion, Florence, Italy, 2015.
Export
BibTeX
@inproceedings{SeylerWWW2015, TITLE = {Generating Quiz Questions from Knowledge Graphs}, AUTHOR = {Seyler, Dominic and Yahya, Mohamed and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-1-4503-3473-0}, DOI = {10.1145/2740908.2742722}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {WWW'15 Companion}, PAGES = {113--114}, ADDRESS = {Florence, Italy}, }
Endnote
%0 Conference Proceedings %A Seyler, Dominic %A Yahya, Mohamed %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Generating Quiz Questions from Knowledge Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-E33C-4 %R 10.1145/2740908.2742722 %D 2015 %B 24th International Conference on World Wide Web %Z date of event: 2015-04-18 - 2015-04-22 %C Florence, Italy %B WWW'15 Companion %P 113 - 114 %I ACM %@ 978-1-4503-3473-0
[66]
D. Seyler, “Question Generation from Knowledge Graphs,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@mastersthesis{SeylerMaster2015, TITLE = {Question Generation from Knowledge Graphs}, AUTHOR = {Seyler, Dominic}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Seyler, Dominic %Y Berberich, Klaus %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Question Generation from Knowledge Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-08B0-4 %I Universität des Saarlandes %C Saarbrücken %D 2015 %P XII, 104 p. %V master %9 master
[67]
A. Sierra, “Ad-hoc Information Retrieval using Annotated Queries and Documents,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@mastersthesis{SierraMaster2015, TITLE = {Ad-hoc Information Retrieval using Annotated Queries and Documents}, AUTHOR = {Sierra, Alejandro}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Sierra, Alejandro %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Ad-hoc Information Retrieval using Annotated Queries and Documents : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0025-A968-D %I Universität des Saarlandes %C Saarbrücken %D 2015 %P 68 p. %V master %9 master
[68]
S. Sundareisan, J. Vreeken, and B. A. Prakash, “Hidden Hazards: Finding Missing Nodes in Large Graph Epidemics,” in Proceedings of the SIAM International Conference on Data Mining (SDM 2015), Vancouver, Canada, 2015.
Export
BibTeX
@inproceedings{sundareisan:15:netfill, TITLE = {Hidden Hazards: {Finding} Missing Nodes in Large Graph Epidemics}, AUTHOR = {Sundareisan, Shashi and Vreeken, Jilles and Prakash, B. Aditya}, LANGUAGE = {eng}, ISBN = {978-1-61197-401-0}, DOI = {10.1137/1.9781611974010.47}, PUBLISHER = {SIAM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Proceedings of the SIAM International Conference on Data Mining (SDM 2015)}, EDITOR = {Venkatasubramanian, Suresh and Ye, Jieping}, PAGES = {415--423}, ADDRESS = {Vancouver, Canada}, }
Endnote
%0 Conference Proceedings %A Sundareisan, Shashi %A Vreeken, Jilles %A Prakash, B. Aditya %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Hidden Hazards: Finding Missing Nodes in Large Graph Epidemics : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-A82A-2 %R 10.1137/1.9781611974010.47 %D 2015 %B 15th SIAM International Conference on Data Mining %Z date of event: 2015-04-30 - 2015-05-02 %C Vancouver, Canada %B Proceedings of the SIAM International Conference on Data Mining %E Venkatasubramanian, Suresh; Ye, Jieping %P 415 - 423 %I SIAM %@ 978-1-61197-401-0
[69]
N. Tandon, G. de Melo, A. De, and G. Weikum, “Lights, Camera, Action: Knowledge Extraction from Movie Scripts,” in WWW’15 Companion, Florence, Italy, 2015.
Export
BibTeX
@inproceedings{tandon2015moviescripts, TITLE = {Lights, Camera, Action: Knowledge Extraction from Movie Scripts}, AUTHOR = {Tandon, Niket and de Melo, Gerard and De, Abir and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-3473-0}, DOI = {10.1145/2740908.2742756}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {WWW'15 Companion}, PAGES = {127--128}, ADDRESS = {Florence, Italy}, }
Endnote
%0 Conference Proceedings %A Tandon, Niket %A de Melo, Gerard %A De, Abir %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Lights, Camera, Action: Knowledge Extraction from Movie Scripts : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-E32D-6 %R 10.1145/2740908.2742756 %D 2015 %B 24th International Conference on World Wide Web %Z date of event: 2015-05-18 - 2015-05-22 %C Florence, Italy %B WWW'15 Companion %P 127 - 128 %I ACM %@ 978-1-4503-3473-0
[70]
N. Tandon, G. de Melo, A. De, and G. Weikum, “Knowlywood: Mining Activity Knowledge From Hollywood Narratives,” in CIKM’15, 24th ACM International Conference on Information and Knowledge Management, Melbourne, Australia, 2015.
Export
BibTeX
@inproceedings{Tandon:2015:KMA:2806416.2806583, TITLE = {Knowlywood: {M}ining Activity Knowledge From Hollywood Narratives}, AUTHOR = {Tandon, Niket and de Melo, Gerard and De, Abir and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-3794-6}, DOI = {10.1145/2806416.2806583}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {CIKM'15, 24th ACM International Conference on Information and Knowledge Management}, PAGES = {223--232}, ADDRESS = {Melbourne, Australia}, }
Endnote
%0 Conference Proceedings %A Tandon, Niket %A de Melo, Gerard %A De, Abir %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Knowlywood: Mining Activity Knowledge From Hollywood Narratives : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-49E0-A %R 10.1145/2806416.2806583 %D 2015 %B 24th ACM International Conference on Information and Knowledge Management %Z date of event: 2015-10-19 - 2015-10-23 %C Melbourne, Australia %B CIKM'15 %P 223 - 232 %I ACM %@ 978-1-4503-3794-6
[71]
C. Teflioudi, R. Gemulla, and O. Mykytiuk, “LEMP: Fast Retrieval of Large Entries in a Matrix Product,” in SIGMOD’15, ACM SIGMOD International Conference on Management of Data, Melbourne, Victoria, Australia, 2015.
Export
BibTeX
@inproceedings{Teflioudi15, TITLE = {{LEMP}: {F}ast Retrieval of Large Entries in a Matrix Product}, AUTHOR = {Teflioudi, Christina and Gemulla, Rainer and Mykytiuk, Olga}, LANGUAGE = {eng}, ISBN = {978-1-4503-2758-9}, DOI = {10.1145/2723372.2747647}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {SIGMOD'15, ACM SIGMOD International Conference on Management of Data}, PAGES = {107--122}, ADDRESS = {Melbourne, Victoria, Australia}, }
Endnote
%0 Conference Proceedings %A Teflioudi, Christina %A Gemulla, Rainer %A Mykytiuk, Olga %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T LEMP: Fast Retrieval of Large Entries in a Matrix Product : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-4A1C-F %R 10.1145/2723372.2747647 %D 2015 %B ACM SIGMOD International Conference on Management of Data %Z date of event: 2015-05-31 - 2015-06-04 %C Melbourne, Victoria, Australia %B SIGMOD'15 %P 107 - 122 %I ACM %@ 978-1-4503-2758-9
[72]
T. Tylenda, “Methods and Tools for Summarization of Entities and Facts in Knowledge Bases,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@phdthesis{TylendaPhd15, TITLE = {Methods and Tools for Summarization of Entities and Facts in Knowledge Bases}, AUTHOR = {Tylenda, Tomasz}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Tylenda, Tomasz %Y Weikum, Gerhard %A referee: Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Methods and Tools for Summarization of Entities and Facts in Knowledge Bases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0028-FC65-5 %I Universität des Saarlandes %C Saarbrücken %D 2015 %P 113 p. %V phd %9 phd %U http://scidok.sulb.uni-saarland.de/volltexte/2015/6263/http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=de
[73]
J. Vreeken, “Causal Inference by Direction of Information,” in Proceedings of the SIAM International Conference on Data Mining (SDM 2015), Vancouver, Canada, 2015.
Export
BibTeX
@inproceedings{vreeken:15:ergo, TITLE = {Causal Inference by Direction of Information}, AUTHOR = {Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-1-61197-401-0}, DOI = {10.1137/1.9781611974010.102}, PUBLISHER = {SIAM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Proceedings of the SIAM International Conference on Data Mining (SDM 2015)}, EDITOR = {Venkatasubramanian, Suresh and Ye, Jieping}, PAGES = {909--917}, ADDRESS = {Vancouver, Canada}, }
Endnote
%0 Conference Proceedings %A Vreeken, Jilles %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Causal Inference by Direction of Information : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-A82C-D %R 10.1137/1.9781611974010.102 %D 2015 %B 15th SIAM International Conference on Data Mining %Z date of event: 2015-04-30 - 2015-05-02 %C Vancouver, Canada %B Proceedings of the SIAM International Conference on Data Mining %E Venkatasubramanian, Suresh; Ye, Jieping %P 909 - 917 %I SIAM %@ 978-1-61197-401-0
[74]
H. Wang, “Retrospective Summarization : What Did I Miss?,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@mastersthesis{WangMaster2015, TITLE = {Retrospective Summarization : What Did I Miss?}, AUTHOR = {Wang, He}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Wang, He %Y Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Retrospective Summarization : What Did I Miss? : %U http://hdl.handle.net/11858/00-001M-0000-0026-A0B4-B %I Universität des Saarlandes %C Saarbrücken %D 2015 %P XVI, 73 p. %V master %9 master
[75]
M. A. Yosef, “U-AIDA : A Customizable System for Named Entity Recognition, Classification, and Disambiguation,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@phdthesis{Yosefphd15, TITLE = {U-{AIDA} : A Customizable System for Named Entity Recognition, Classification, and Disambiguation}, AUTHOR = {Yosef, Mohamed Amir}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Yosef, Mohamed Amir %Y Weikum, Gerhard %A referee: Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T U-AIDA : A Customizable System for Named Entity Recognition, Classification, and Disambiguation : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-B9B9-C %I Universität des Saarlandes %C Saarbrücken %D 2015 %P XV, 101 p. %V phd %9 phd %U http://scidok.sulb.uni-saarland.de/volltexte/2016/6370/http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=de
[76]
A. Zimek and J. Vreeken, “The Blind Men and the Elephant: On Meeting the Problem of Multiple Truths in Data from Clustering and Pattern Mining Perspectives,” Machine Learning, vol. 98, no. 1, 2015.
Export
BibTeX
@article{zimek:15:blind, TITLE = {The Blind Men and the Elephant: On Meeting the Problem of Multiple Truths in Data from Clustering and Pattern Mining Perspectives}, AUTHOR = {Zimek, Arthur and Vreeken, Jilles}, LANGUAGE = {eng}, ISSN = {0885-6125}, DOI = {10.1007/s10994-013-5334-y}, PUBLISHER = {Springer}, ADDRESS = {New York, NY}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, JOURNAL = {Machine Learning}, VOLUME = {98}, NUMBER = {1}, PAGES = {121--155}, }
Endnote
%0 Journal Article %A Zimek, Arthur %A Vreeken, Jilles %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T The Blind Men and the Elephant: On Meeting the Problem of Multiple Truths in Data from Clustering and Pattern Mining Perspectives : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-57AE-D %R 10.1007/s10994-013-5334-y %7 2013-03-07 %D 2015 %J Machine Learning %V 98 %N 1 %& 121 %P 121 - 155 %I Springer %C New York, NY %@ false
[77]
T. Zinchenko, E. Galbrun, and P. Miettinen, “Mining Predictive Redescriptions with Trees,” in 15th IEEE International Conference on Data Mining Workshop (ICDMW 2015), Atlantic City, NJ, USA, 2015.
Export
BibTeX
@inproceedings{zinchenko15mining, TITLE = {Mining Predictive Redescriptions with Trees}, AUTHOR = {Zinchenko, Tetiana and Galbrun, Esther and Miettinen, Pauli}, LANGUAGE = {eng}, ISBN = {978-1-4673-8492-6}, DOI = {10.1109/ICDMW.2015.123}, PUBLISHER = {IEEE Computer Society}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {15th IEEE International Conference on Data Mining Workshop (ICDMW 2015)}, EDITOR = {Cui, Peng and Dy, Jennifer and Aggarwal, Charu and Zhou, Zhi-Hua and Tuzhilin, Alexander and Xiong, Hui and Wu, Xindong}, PAGES = {1672--1675}, ADDRESS = {Atlantic City, NJ, USA}, }
Endnote
%0 Conference Proceedings %A Zinchenko, Tetiana %A Galbrun, Esther %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Mining Predictive Redescriptions with Trees : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-5424-A %R 10.1109/ICDMW.2015.123 %D 2015 %B 15th International Conference on Data Mining %Z date of event: 2015-11-14 - 2015-11-17 %C Atlantic City, NJ, USA %B 15th IEEE International Conference on Data Mining Workshop %E Cui, Peng; Dy, Jennifer; Aggarwal, Charu; Zhou, Zhi-Hua; Tuzhilin, Alexander; Xiong, Hui; Wu, Xindong %P 1672 - 1675 %I IEEE Computer Society %@ 978-1-4673-8492-6
2014
[78]
F. Alvanaki, “Mining Interesting Events on Large and Dynamic Data,” Universität des Saarlandes, Saarbrücken, 2014.
Export
BibTeX
@phdthesis{Alvanakithesis, TITLE = {Mining Interesting Events on Large and Dynamic Data}, AUTHOR = {Alvanaki, Foteini}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, }
Endnote
%0 Thesis %A Alvanaki, Foteini %Y Michel, Sebastian %A referee: Weikum, Gerhard %A referee: Delis, Alexis %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Mining Interesting Events on Large and Dynamic Data : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0025-6C4E-B %I Universität des Saarlandes %C Saarbrücken %D 2014 %P 128 p. %V phd %9 phd %U http://scidok.sulb.uni-saarland.de/volltexte/2015/5985/http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=de
[79]
F. Alvanaki and S. Michel, “Tracking Set Correlations at Large Scale,” in SIGMOD’14, ACM SIGMOD International Conference on Management of Data, Snowbird, UT, USA, 2014.
Export
BibTeX
@inproceedings{Alvanaki2014, TITLE = {Tracking Set Correlations at Large Scale}, AUTHOR = {Alvanaki, Foteini and Michel, Sebastian}, LANGUAGE = {eng}, ISBN = {978-1-4503-2376-5}, DOI = {10.1145/2588555.2610510}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {SIGMOD'14, ACM SIGMOD International Conference on Management of Data}, EDITOR = {Dyresson, Curtis and Li, Feifei and {\"O}zsu, M. Tamer}, PAGES = {1507--1518}, ADDRESS = {Snowbird, UT, USA}, }
Endnote
%0 Conference Proceedings %A Alvanaki, Foteini %A Michel, Sebastian %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Tracking Set Correlations at Large Scale : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0019-8423-2 %R 10.1145/2588555.2610510 %D 2014 %B ACM SIGMOD International Conference on Management of Data %Z date of event: 2014-06-22 - 2014-06-27 %C Snowbird, UT, USA %B SIGMOD'14 %E Dyresson, Curtis; Li, Feifei; Özsu, M. Tamer %P 1507 - 1518 %I ACM %@ 978-1-4503-2376-5
[80]
A. Anand, I. Mele, S. Bedathur, and K. Berberich, “Phrase Query Optimization on Inverted Indexes,” Max-Planck-Institut für Informatik, Saarbrücken, MPI-I-2014-5-002, 2014.
Abstract
Phrase queries are a key functionality of modern search engines. Beyond that, they increasingly serve as an important building block for applications such as entity-oriented search, text analytics, and plagiarism detection. Processing phrase queries is costly, though, since positional information has to be kept in the index and all words, including stopwords, need to be considered. We consider an augmented inverted index that indexes selected variable-length multi-word sequences in addition to single words. We study how arbitrary phrase queries can be processed efficiently on such an augmented inverted index. We show that the underlying optimization problem is NP-hard in the general case and describe an exact exponential algorithm and an approximation algorithm to its solution. Experiments on ClueWeb09 and The New York Times with different real-world query workloads examine the practical performance of our methods.
Export
BibTeX
@techreport{AnandMeleBedathurBerberich2014, TITLE = {Phrase Query Optimization on Inverted Indexes}, AUTHOR = {Anand, Avishek and Mele, Ida and Bedathur, Srikanta and Berberich, Klaus}, LANGUAGE = {eng}, ISSN = {0946-011X}, NUMBER = {MPI-I-2014-5-002}, INSTITUTION = {Max-Planck-Institut f{\"u}r Informatik}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Phrase queries are a key functionality of modern search engines. Beyond that, they increasingly serve as an important building block for applications such as entity-oriented search, text analytics, and plagiarism detection. Processing phrase queries is costly, though, since positional information has to be kept in the index and all words, including stopwords, need to be considered. We consider an augmented inverted index that indexes selected variable-length multi-word sequences in addition to single words. We study how arbitrary phrase queries can be processed efficiently on such an augmented inverted index. We show that the underlying optimization problem is NP-hard in the general case and describe an exact exponential algorithm and an approximation algorithm to its solution. Experiments on ClueWeb09 and The New York Times with different real-world query workloads examine the practical performance of our methods.}, TYPE = {Research Report}, }
Endnote
%0 Report %A Anand, Avishek %A Mele, Ida %A Bedathur, Srikanta %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Phrase Query Optimization on Inverted Indexes : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-022A-3 %Y Max-Planck-Institut für Informatik %C Saarbrücken %D 2014 %P 20 p. %X Phrase queries are a key functionality of modern search engines. Beyond that, they increasingly serve as an important building block for applications such as entity-oriented search, text analytics, and plagiarism detection. Processing phrase queries is costly, though, since positional information has to be kept in the index and all words, including stopwords, need to be considered. We consider an augmented inverted index that indexes selected variable-length multi-word sequences in addition to single words. We study how arbitrary phrase queries can be processed efficiently on such an augmented inverted index. We show that the underlying optimization problem is NP-hard in the general case and describe an exact exponential algorithm and an approximation algorithm to its solution. Experiments on ClueWeb09 and The New York Times with different real-world query workloads examine the practical performance of our methods. %B Research Report %@ false
[81]
A. Anand, I. Mele, S. Bedathur, and K. Berberich, “Phrase Query Optimization on Inverted Indexes,” in CIKM’14, 23rd ACM International Conference on Information and Knowledge Management, Shanghai, China, 2014.
Export
BibTeX
@inproceedings{Anand:CIKM2014, TITLE = {Phrase Query Optimization on Inverted Indexes}, AUTHOR = {Anand, Avishek and Mele, Ida and Bedathur, Srikanta and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-1-4503-2598-1}, DOI = {10.1145/2661829.2661928}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {CIKM'14, 23rd ACM International Conference on Information and Knowledge Management}, EDITOR = {Li, Jianzhong and Wang, X. Sean and Garofalakis, Minos and Soboroff, Ian and Suel, Torsten and Wang, Min}, PAGES = {1807--1810}, ADDRESS = {Shanghai, China}, }
Endnote
%0 Conference Proceedings %A Anand, Avishek %A Mele, Ida %A Bedathur, Srikanta %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Phrase Query Optimization on Inverted Indexes : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-549A-0 %R 10.1145/2661829.2661928 %D 2014 %B 23rd ACM International Conference on Information and Knowledge Management %Z date of event: 2014-11-03 - 2014-11-07 %C Shanghai, China %K multi-word indexing, phrase queries, query optimization %B CIKM'14 %E Li, Jianzhong; Wang, X. Sean; Garofalakis, Minos; Soboroff, Ian; Suel, Torsten; Wang, Min %P 1807 - 1810 %I ACM %@ 978-1-4503-2598-1
[82]
N. An, L. Jiang, J. Wang, P. Luo, M. Wang, and B. N. Li, “Toward Detection of Aliases without String Similarity,” Information Sciences, vol. 261, 2014.
Export
BibTeX
@article{AnJiangWang2014, TITLE = {Toward Detection of Aliases without String Similarity}, AUTHOR = {An, Ning and Jiang, Lili and Wang, Jianyong and Luo, Ping and Wang, Min and Li, Bing Nan}, LANGUAGE = {eng}, ISSN = {0020-0255}, DOI = {10.1016/j.ins.2013.11.010}, PUBLISHER = {Elsevier}, ADDRESS = {Amsterdam}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, JOURNAL = {Information Sciences}, VOLUME = {261}, PAGES = {89--100}, }
Endnote
%0 Journal Article %A An, Ning %A Jiang, Lili %A Wang, Jianyong %A Luo, Ping %A Wang, Min %A Li, Bing Nan %+ external Databases and Information Systems, MPI for Informatics, Max Planck Society external external external external %T Toward Detection of Aliases without String Similarity : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-3DFB-8 %F ISI: 000331689700005 %R 10.1016/j.ins.2013.11.010 %7 2013-11-18 %D 2014 %J Information Sciences %O Inf. Sci. %V 261 %& 89 %P 89 - 100 %I Elsevier %C Amsterdam %@ false
[83]
K. Athukorala, A. Oulasvirta, D. Glowacka, J. Vreeken, and G. Jaccuci, “Interaction Model to Predict Subjective-specificity of Search Results,” in UMAP 2014 Extended Proceedings, Aalborg, Denmark, 2014.
Export
BibTeX
@inproceedings{atukorala:14:interaction, TITLE = {Interaction Model to Predict Subjective-specificity of Search Results}, AUTHOR = {Athukorala, Kumaripaba and Oulasvirta, Antti and Glowacka, Dorata and Vreeken, Jilles and Jaccuci, Giulio}, LANGUAGE = {eng}, URL = {http://ceur-ws.org/Vol-1181/umap2014_lateresults_01.pdf; urn:nbn:de:0074-1181-4}, PUBLISHER = {CEUR-WS.org}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {UMAP 2014 Extended Proceedings}, EDITOR = {Cantador, Iv{\'a}n and Chi, Min and Farzan, Rosta and J{\"a}schke, Robert}, PAGES = {69--74}, SERIES = {CEUR Workshop Proceedings}, VOLUME = {1181}, ADDRESS = {Aalborg, Denmark}, }
Endnote
%0 Conference Proceedings %A Athukorala, Kumaripaba %A Oulasvirta, Antti %A Glowacka, Dorata %A Vreeken, Jilles %A Jaccuci, Giulio %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Interaction Model to Predict Subjective-specificity of Search Results : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5397-D %U http://ceur-ws.org/Vol-1181/umap2014_lateresults_01.pdf %D 2014 %B 22nd Conference on User Modeling, Adaptation, and Personalization %Z date of event: 2014-07-07 - 2014-07-11 %C Aalborg, Denmark %B UMAP 2014 Extended Proceedings %E Cantador, Iván; Chi, Min; Farzan, Rosta; Jäschke, Robert %P 69 - 74 %I CEUR-WS.org %B CEUR Workshop Proceedings %N 1181 %U http://ceur-ws.org/Vol-1181/umap2014_lateresults_01.pdf
[84]
K. Athukorala, A. Oulasvirta, D. Glowacka, J. Vreeken, and G. Jaccuci, “Supporting Exploratory Search Through User Modelling,” in UMAP 2014 Extended Proceedings (PIA 2014 in conjunction with UMAP 2014), Aalborg, Denmark, 2014.
Export
BibTeX
@inproceedings{atukorala:14:supporting, TITLE = {Supporting Exploratory Search Through User Modelling}, AUTHOR = {Athukorala, Kumaripaba and Oulasvirta, Antti and Glowacka, Dorata and Vreeken, Jilles and Jaccuci, Giulio}, LANGUAGE = {eng}, ISSN = {1613-0073}, URL = {http://ceur-ws.org/Vol-1181/pia2014_paper_04.pdf; urn:nbn:de:0074-1181-4; http://ceur-ws.org/Vol-1181/pia2014_proceedings.pdf}, PUBLISHER = {CEUR-WS.org}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {UMAP 2014 Extended Proceedings (PIA 2014 in conjunction with UMAP 2014)}, EDITOR = {Cantador, Iv{\'a}n and Chi, Min and Farzan, Rosta and J{\"a}schke, Robert}, PAGES = {1--47}, SERIES = {CEUR Workshop Proceedings}, VOLUME = {1181}, ADDRESS = {Aalborg, Denmark}, }
Endnote
%0 Conference Proceedings %A Athukorala, Kumaripaba %A Oulasvirta, Antti %A Glowacka, Dorata %A Vreeken, Jilles %A Jaccuci, Giulio %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Supporting Exploratory Search Through User Modelling : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-538C-7 %U http://ceur-ws.org/Vol-1181/pia2014_paper_04.pdf %D 2014 %B Joint Workshop on Personalised Information Access %Z date of event: 2014-07-07 - 2014-07-07 %C Aalborg, Denmark %B UMAP 2014 Extended Proceedings %E Cantador, Iván; Chi, Min; Farzan, Rosta; Jäschke, Robert %P 1 - 47 %I CEUR-WS.org %B CEUR Workshop Proceedings %N 1181 %@ false %U http://ceur-ws.org/Vol-1181/pia2014_paper_04.pdf
[85]
K. Athukorala, A. Oulasvirta, D. Glowacka, J. Vreeken, and G. Jaccuci, “Narrow or Broad? Estimating Subjective Specificity in Exploratory Search,” in CIKM’14, 23rd ACM International Conference on Information and Knowledge Management, Shanghai, China, 2014.
Export
BibTeX
@inproceedings{atukorala:14:foraging, TITLE = {Narrow or Broad? {Estimating} Subjective Specificity in Exploratory Search}, AUTHOR = {Athukorala, Kumaripaba and Oulasvirta, Antti and Glowacka, Dorata and Vreeken, Jilles and Jaccuci, Giulio}, LANGUAGE = {eng}, ISBN = {978-1-4503-2598-1}, DOI = {10.1145/2661829.2661904}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {CIKM'14, 23rd ACM International Conference on Information and Knowledge Management}, EDITOR = {Li, Jianzhong and Wang, X. Sean and Garofalakis, Minos and Soboroff, Ian and Suel, Torsten and Wang, Min}, PAGES = {819--828}, ADDRESS = {Shanghai, China}, }
Endnote
%0 Conference Proceedings %A Athukorala, Kumaripaba %A Oulasvirta, Antti %A Glowacka, Dorata %A Vreeken, Jilles %A Jaccuci, Giulio %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Narrow or Broad? Estimating Subjective Specificity in Exploratory Search : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53A1-6 %R 10.1145/2661829.2661904 %D 2014 %B 23rd ACM International Conference on Information and Knowledge Management %Z date of event: 2014-11-03 - 2014-11-07 %C Shanghai, China %B CIKM'14 %E Li, Jianzhong; Wang, X. Sean; Garofalakis, Minos; Soboroff, Ian; Suel, Torsten; Wang, Min %P 819 - 828 %I ACM %@ 978-1-4503-2598-1
[86]
K. Berberich, “Web Archives,” in Encyclopedia of Social Network Analysis and Mining, Berlin: Springer, 2014.
Export
BibTeX
@incollection{DBLP:reference/snam/Berberich14, TITLE = {Web Archives}, AUTHOR = {Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-1-4614-6169-2}, DOI = {10.1007/978-1-4614-6170-8_128}, PUBLISHER = {Springer}, ADDRESS = {Berlin}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {Encyclopedia of Social Network Analysis and Mining}, PAGES = {2337--2343}, }
Endnote
%0 Book Section %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Web Archives : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53C1-B %R 10.1007/978-1-4614-6170-8_128 %D 2014 %B Encyclopedia of Social Network Analysis and Mining %P 2337 - 2343 %I Springer %C Berlin %@ 978-1-4614-6169-2
[87]
J. Biega, I. Mele, and G. Weikum, “Probabilistic Prediction of Privacy Risks in User Search Histories,” in PSBD’14, First International Workshop on Privacy and Security of Big Data, Shanghai, China, 2014.
Export
BibTeX
@inproceedings{Biega:PSBD2014, TITLE = {Probabilistic Prediction of Privacy Risks in User Search Histories}, AUTHOR = {Biega, Joanna and Mele, Ida and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-1583-8}, DOI = {10.1145/2663715.2669609}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {PSBD'14, First International Workshop on Privacy and Security of Big Data}, PAGES = {29--36}, ADDRESS = {Shanghai, China}, }
Endnote
%0 Conference Proceedings %A Biega, Joanna %A Mele, Ida %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Probabilistic Prediction of Privacy Risks in User Search Histories : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5486-B %R 10.1145/2663715.2669609 %D 2014 %B First International Workshop on Privacy and Security of Big Data %Z date of event: 2014-11-07 - 2014-11-07 %C Shanghai, China %K privacy risk prediction, probabilistic privacy, query logs, user-centric privacy %B PSBD'14 %P 29 - 36 %I ACM %@ 978-1-4503-1583-8
[88]
R. Burghartz and K. Berberich, “MPI-INF at the NTCIR-11 Temporal Query Classification Task,” in Proceedings of the 11th NTCIR Conference on Evaluation of Information Access Technologies, Tokyo, Japan, 2014.
Export
BibTeX
@inproceedings{burghartz2014, TITLE = {{MPI}-{INF} at the {NTCIR}-11 Temporal Query Classification Task}, AUTHOR = {Burghartz, Robin and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-4-86049-065-2}, PUBLISHER = {National Institute of Informatics}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the 11th NTCIR Conference on Evaluation of Information Access Technologies}, EDITOR = {Kando, Noriko and Joho, Hideo and Kishida, Kazuaki}, PAGES = {443--450}, ADDRESS = {Tokyo, Japan}, }
Endnote
%0 Conference Proceedings %A Burghartz, Robin %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T MPI-INF at the NTCIR-11 Temporal Query Classification Task : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5418-1 %D 2014 %8 09.12.2014 %B 11th NTCIR Conference on Evaluation of Information Access Technologies %Z date of event: 2014-12-09 - 2014-12-12 %C Tokyo, Japan %B Proceedings of the 11th NTCIR Conference on Evaluation of Information Access Technologies %E Kando, Noriko; Joho, Hideo; Kishida, Kazuaki %P 443 - 450 %I National Institute of Informatics %@ 978-4-86049-065-2 %U http://research.nii.ac.jp/ntcir/workshop/OnlineProceedings11/pdf/NTCIR/Temporalia/03-NTCIR11-TEMPORALIA-BurghartzR.pdf
[89]
P. Chau, J. Vreeken, M. van Leeuwen, and C. Faloutsos, Eds., Proceedings of the ACM SIGKDD 2014 Full-day Workshop on Interactive Data Exploration and Analytics. Georgia Institute of Technology, 2014.
Export
BibTeX
@proceedings{escidoc:2078519, TITLE = {Proceedings of the ACM SIGKDD 2014 Full-day Workshop on Interactive Data Exploration and Analytics (IDEA 2014)}, EDITOR = {Chau, Polo and Vreeken, Jilles and van Leeuwen, Matthijs and Faloutsos, Christos}, LANGUAGE = {eng}, PUBLISHER = {Georgia Institute of Technology}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, PAGES = {130 p.}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %E Chau, Polo %E Vreeken, Jilles %E van Leeuwen, Matthijs %E Faloutsos, Christos %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Proceedings of the ACM SIGKDD 2014 Full-day Workshop on Interactive Data Exploration and Analytics : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5564-F %I Georgia Institute of Technology %D 2014 %B ACM SIGKDD 2014 Full-day Workshop on Interactive Data Exploration and Analytics %Z date of event: 2014-08-24 - 2014-08-24 %D 2014 %C New York, NY, USA %P 130 p. %U http://poloclub.gatech.edu/idea2014/papers/idea14-proceedings.pdf
[90]
L. Del Corro, R. Gemulla, and G. Weikum, “Werdy: Recognition and Disambiguation of Verbs and Verb Phrases with Syntactic and Semantic Pruning,” in The 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP 2014), Doha, Qatar, 2014.
Export
BibTeX
@inproceedings{DelCorro2014, TITLE = {Werdy: Recognition and Disambiguation of Verbs and Verb Phrases with Syntactic and Semantic Pruning}, AUTHOR = {Del Corro, Luciano and Gemulla, Rainer and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-937284-96-1}, URL = {http://aclweb.org/anthology/D14-1042}, PUBLISHER = {ACL}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {The 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP 2014)}, PAGES = {374--385}, ADDRESS = {Doha, Qatar}, }
Endnote
%0 Conference Proceedings %A Del Corro, Luciano %A Gemulla, Rainer %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Werdy: Recognition and Disambiguation of Verbs and Verb Phrases with Syntactic and Semantic Pruning : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-51DF-E %U http://aclweb.org/anthology/D14-1042 %D 2014 %B 2014 Conference on Empirical Methods in Natural Language Processing %Z date of event: 2014-10-25 - 2014-10-29 %C Doha, Qatar %B The 2014 Conference on Empirical Methods in Natural Language Processing %P 374 - 385 %I ACL %@ 978-1-937284-96-1
[91]
G. de Melo and G. Weikum, “Taxonomic Data Integration from Multilingual Wikipedia Editions,” Knowledge and Information Systems, vol. 39, no. 1, 2014.
Export
BibTeX
@article{deMeloWeikum2013KAIS, TITLE = {Taxonomic Data Integration from Multilingual {Wikipedia} Editions}, AUTHOR = {de Melo, Gerard and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {0219-1377}, DOI = {10.1007%2Fs10115-012-0597-3}, LOCALID = {Local-ID: E21183D8146A7A86C1257B1100306F46-deMeloWeikum2013KAIS}, PUBLISHER = {Springer}, ADDRESS = {Berlin}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, JOURNAL = {Knowledge and Information Systems}, VOLUME = {39}, NUMBER = {1}, PAGES = {1--39}, }
Endnote
%0 Journal Article %A de Melo, Gerard %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Taxonomic Data Integration from Multilingual Wikipedia Editions : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1A38-F %F OTHER: Local-ID: E21183D8146A7A86C1257B1100306F46-deMeloWeikum2013KAIS %R 10.1007%2Fs10115-012-0597-3 %7 2013-01-08 %D 2014 %J Knowledge and Information Systems %V 39 %N 1 %& 1 %P 1 - 39 %I Springer %C Berlin %@ false
[92]
M. Dylla, M. Theobald, and I. Miliaraki, “Querying and Learning in Probabilistic Databases,” in Reasoning Web (RW 2014), Athens, Greece, 2014.
Abstract
Probabilistic Databases (PDBs) lie at the expressive intersection of databases, first-order logic, and probability theory. PDBs employ logical deduction rules to process Select-Project-Join (SPJ) queries, which form the basis for a variety of declarative query languages such as Datalog, Relational Algebra, and SQL. They employ logical consistency constraints to resolve data inconsistencies, and they represent query answers via logical lineage formulas (aka. "data provenance") to trace the dependencies between these answers and the input tuples that led to their derivation. While the literature on PDBs dates back to more than 25 years of research, only fairly recently the key role of lineage for establishing a closed and complete representation model of relational operations over this kind of probabilistic data was discovered. Although PDBs benefit from their efficient and scalable database infrastructures for data storage and indexing, they couple the data computation with probabilistic inference, the latter of which remains a #P-hard problem also in the context of PDBs. In this chapter, we provide a review on the key concepts of PDBs with a particular focus on our own recent research results related to this field. We highlight a number of ongoing research challenges related to PDBs, and we keep referring to an information extraction (IE) scenario as a running application to manage uncertain and temporal facts obtained from IE techniques directly inside a PDB setting.
Export
BibTeX
@inproceedings{DyllaRW2014, TITLE = {Querying and Learning in Probabilistic Databases}, AUTHOR = {Dylla, Maximilian and Theobald, Martin and Miliaraki, Iris}, LANGUAGE = {eng}, ISBN = {978-3-319-10587-1; 978-3-319-10586-4}, DOI = {10.1007/978-3-319-10587-1_8}, PUBLISHER = {Springer}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, ABSTRACT = {Probabilistic Databases (PDBs) lie at the expressive intersection of databases, first-order logic, and probability theory. PDBs employ logical deduction rules to process Select-Project-Join (SPJ) queries, which form the basis for a variety of declarative query languages such as Datalog, Relational Algebra, and SQL. They employ logical consistency constraints to resolve data inconsistencies, and they represent query answers via logical lineage formulas (aka. "data provenance") to trace the dependencies between these answers and the input tuples that led to their derivation. While the literature on PDBs dates back to more than 25 years of research, only fairly recently the key role of lineage for establishing a closed and complete representation model of relational operations over this kind of probabilistic data was discovered. Although PDBs benefit from their efficient and scalable database infrastructures for data storage and indexing, they couple the data computation with probabilistic inference, the latter of which remains a #P-hard problem also in the context of PDBs. In this chapter, we provide a review on the key concepts of PDBs with a particular focus on our own recent research results related to this field. We highlight a number of ongoing research challenges related to PDBs, and we keep referring to an information extraction (IE) scenario as a running application to manage uncertain and temporal facts obtained from IE techniques directly inside a PDB setting.}, BOOKTITLE = {Reasoning Web (RW 2014)}, EDITOR = {Koubarakis, Manolis and Stamou, Giorgos and Stoilos, Giorgos and Horrocks, Ian and Kolaitis, Phokion and Lausen, Georg and Weikum, Gerhard}, PAGES = {313--368}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {8714}, ADDRESS = {Athens, Greece}, }
Endnote
%0 Conference Proceedings %A Dylla, Maximilian %A Theobald, Martin %A Miliaraki, Iris %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Querying and Learning in Probabilistic Databases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-E51D-9 %F OTHER: WOS:000348929200008 %R 10.1007/978-3-319-10587-1_8 %D 2014 %B 10th Reasoning Web Summer School %Z date of event: 2014-09-08 - 2014-09-13 %C Athens, Greece %X Probabilistic Databases (PDBs) lie at the expressive intersection of databases, first-order logic, and probability theory. PDBs employ logical deduction rules to process Select-Project-Join (SPJ) queries, which form the basis for a variety of declarative query languages such as Datalog, Relational Algebra, and SQL. They employ logical consistency constraints to resolve data inconsistencies, and they represent query answers via logical lineage formulas (aka. "data provenance") to trace the dependencies between these answers and the input tuples that led to their derivation. While the literature on PDBs dates back to more than 25 years of research, only fairly recently the key role of lineage for establishing a closed and complete representation model of relational operations over this kind of probabilistic data was discovered. Although PDBs benefit from their efficient and scalable database infrastructures for data storage and indexing, they couple the data computation with probabilistic inference, the latter of which remains a #P-hard problem also in the context of PDBs. In this chapter, we provide a review on the key concepts of PDBs with a particular focus on our own recent research results related to this field. We highlight a number of ongoing research challenges related to PDBs, and we keep referring to an information extraction (IE) scenario as a running application to manage uncertain and temporal facts obtained from IE techniques directly inside a PDB setting. %K Probabilistic and Temporal Databases Deduction Rules Consistency Constraints Information Extraction LINEAGE SYSTEMS WEB Computer Science, Information Systems Computer Science, Theory & Methods %B Reasoning Web %E Koubarakis, Manolis; Stamou, Giorgos; Stoilos, Giorgos; Horrocks, Ian; Kolaitis, Phokion; Lausen, Georg; Weikum, Gerhard %P 313 - 368 %I Springer %@ 978-3-319-10587-1 978-3-319-10586-4 %B Lecture Notes in Computer Science %N 8714
[93]
M. Dylla, “Efficient Querying and Learning in Probabilistic and Temporal Databases,” Universität des Saarlandes, Saarbrücken, 2014.
Abstract
Probabilistic databases store, query, and manage large amounts of uncertain information. This thesis advances the state-of-the-art in probabilistic databases in three different ways: 1. We present a closed and complete data model for temporal probabilistic databases and analyze its complexity. Queries are posed via temporal deduction rules which induce lineage formulas capturing both time and uncertainty. 2. We devise a methodology for computing the top-k most probable query answers. It is based on first-order lineage formulas representing sets of answer candidates. Theoretically derived probability bounds on these formulas enable pruning low-probability answers. 3. We introduce the problem of learning tuple probabilities which allows updating and cleaning of probabilistic databases. We study its complexity, characterize its solutions, cast it into an optimization problem, and devise an approximation algorithm based on stochastic gradient descent. All of the above contributions support consistency constraints and are evaluated experimentally.
Export
BibTeX
@phdthesis{DyllaPhDThesis2014, TITLE = {Efficient Querying and Learning in Probabilistic and Temporal Databases}, AUTHOR = {Dylla, Maximilian}, LANGUAGE = {eng}, URL = {urn:nbn:de:bsz:291-scidok-58146}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, ABSTRACT = {Probabilistic databases store, query, and manage large amounts of uncertain information. This thesis advances the state-of-the-art in probabilistic databases in three different ways: 1. We present a closed and complete data model for temporal probabilistic databases and analyze its complexity. Queries are posed via temporal deduction rules which induce lineage formulas capturing both time and uncertainty. 2. We devise a methodology for computing the top-k most probable query answers. It is based on first-order lineage formulas representing sets of answer candidates. Theoretically derived probability bounds on these formulas enable pruning low-probability answers. 3. We introduce the problem of learning tuple probabilities which allows updating and cleaning of probabilistic databases. We study its complexity, characterize its solutions, cast it into an optimization problem, and devise an approximation algorithm based on stochastic gradient descent. All of the above contributions support consistency constraints and are evaluated experimentally.}, }
Endnote
%0 Thesis %A Dylla, Maximilian %Y Weikum, Gerhard %A referee: Theobald, Martin %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Efficient Querying and Learning in Probabilistic and Temporal Databases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-3C44-E %U urn:nbn:de:bsz:291-scidok-58146 %I Universität des Saarlandes %C Saarbrücken %D 2014 %P VIII, 169 p. %V phd %9 phd %X Probabilistic databases store, query, and manage large amounts of uncertain information. This thesis advances the state-of-the-art in probabilistic databases in three different ways: 1. We present a closed and complete data model for temporal probabilistic databases and analyze its complexity. Queries are posed via temporal deduction rules which induce lineage formulas capturing both time and uncertainty. 2. We devise a methodology for computing the top-k most probable query answers. It is based on first-order lineage formulas representing sets of answer candidates. Theoretically derived probability bounds on these formulas enable pruning low-probability answers. 3. We introduce the problem of learning tuple probabilities which allows updating and cleaning of probabilistic databases. We study its complexity, characterize its solutions, cast it into an optimization problem, and devise an approximation algorithm based on stochastic gradient descent. All of the above contributions support consistency constraints and are evaluated experimentally. %K Deduction Rules, Probabilistic Database, Temporal Database, Learning, Constraints, Top-k %U http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=dehttp://scidok.sulb.uni-saarland.de/volltexte/2014/5814/
[94]
M. Dylla and M. Theobald, “Learning Tuple Probabilities in Probabilistic Databases,” Max-Planck-Institut für Informatik, Saarbrücken, MPI-I-2014-5-001, 2014.
Abstract
Learning the parameters of complex probabilistic-relational models from labeled training data is a standard technique in machine learning, which has been intensively studied in the subfield of Statistical Relational Learning (SRL), but---so far---this is still an under-investigated topic in the context of Probabilistic Databases (PDBs). In this paper, we focus on learning the probability values of base tuples in a PDB from query answers, the latter of which are represented as labeled lineage formulas. Specifically, we consider labels in the form of pairs, each consisting of a Boolean lineage formula and a marginal probability that comes attached to the corresponding query answer. The resulting learning problem can be viewed as the inverse problem to confidence computations in PDBs: given a set of labeled query answers, learn the probability values of the base tuples, such that the marginal probabilities of the query answers again yield in the assigned probability labels. We analyze the learning problem from a theoretical perspective, devise two optimization-based objectives, and provide an efficient algorithm (based on Stochastic Gradient Descent) for solving these objectives. Finally, we conclude this work by an experimental evaluation on three real-world and one synthetic dataset, while competing with various techniques from SRL, reasoning in information extraction, and optimization.
Export
BibTeX
@techreport{Dylla-Learning2014, TITLE = {Learning Tuple Probabilities in Probabilistic Databases}, AUTHOR = {Dylla, Maximilian and Theobald, Martin}, LANGUAGE = {eng}, ISSN = {0946-011X}, NUMBER = {MPI-I-2014-5-001}, INSTITUTION = {Max-Planck-Institut f{\"u}r Informatik}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Learning the parameters of complex probabilistic-relational models from labeled training data is a standard technique in machine learning, which has been intensively studied in the subfield of Statistical Relational Learning (SRL), but---so far---this is still an under-investigated topic in the context of Probabilistic Databases (PDBs). In this paper, we focus on learning the probability values of base tuples in a PDB from query answers, the latter of which are represented as labeled lineage formulas. Specifically, we consider labels in the form of pairs, each consisting of a Boolean lineage formula and a marginal probability that comes attached to the corresponding query answer. The resulting learning problem can be viewed as the inverse problem to confidence computations in PDBs: given a set of labeled query answers, learn the probability values of the base tuples, such that the marginal probabilities of the query answers again yield in the assigned probability labels. We analyze the learning problem from a theoretical perspective, devise two optimization-based objectives, and provide an efficient algorithm (based on Stochastic Gradient Descent) for solving these objectives. Finally, we conclude this work by an experimental evaluation on three real-world and one synthetic dataset, while competing with various techniques from SRL, reasoning in information extraction, and optimization.}, TYPE = {Research Report}, }
Endnote
%0 Report %A Dylla, Maximilian %A Theobald, Martin %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Learning Tuple Probabilities in Probabilistic Databases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0019-8492-6 %Y Max-Planck-Institut für Informatik %C Saarbrücken %D 2014 %P 51 p. %X Learning the parameters of complex probabilistic-relational models from labeled training data is a standard technique in machine learning, which has been intensively studied in the subfield of Statistical Relational Learning (SRL), but---so far---this is still an under-investigated topic in the context of Probabilistic Databases (PDBs). In this paper, we focus on learning the probability values of base tuples in a PDB from query answers, the latter of which are represented as labeled lineage formulas. Specifically, we consider labels in the form of pairs, each consisting of a Boolean lineage formula and a marginal probability that comes attached to the corresponding query answer. The resulting learning problem can be viewed as the inverse problem to confidence computations in PDBs: given a set of labeled query answers, learn the probability values of the base tuples, such that the marginal probabilities of the query answers again yield in the assigned probability labels. We analyze the learning problem from a theoretical perspective, devise two optimization-based objectives, and provide an efficient algorithm (based on Stochastic Gradient Descent) for solving these objectives. Finally, we conclude this work by an experimental evaluation on three real-world and one synthetic dataset, while competing with various techniques from SRL, reasoning in information extraction, and optimization. %B Research Report %@ false
[95]
D. Erdős, R. Gemulla, and E. Terzi, “Reconstructing Graphs from Neighborhood Data,” ACM Transactions on Knowledge Discovery from Data, vol. 8, no. 4, 2014.
Export
BibTeX
@article{Erdos:2014:RGN:2663597.2641761, TITLE = {Reconstructing Graphs from Neighborhood Data}, AUTHOR = {Erd{\H o}s, D{\'o}ra and Gemulla, Rainer and Terzi, Evimaria}, LANGUAGE = {eng}, ISSN = {1556-4681}, DOI = {10.1145/2641761}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, JOURNAL = {ACM Transactions on Knowledge Discovery from Data}, VOLUME = {8}, NUMBER = {4}, PAGES = {1--22}, EID = {23}, }
Endnote
%0 Journal Article %A Erdős, Dóra %A Gemulla, Rainer %A Terzi, Evimaria %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Reconstructing Graphs from Neighborhood Data : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-692A-E %R 10.1145/2641761 %7 2014 %D 2014 %K Bipartite graph reconstruction, adjacency matrix, singular value decomposition %J ACM Transactions on Knowledge Discovery from Data %O TKDD %V 8 %N 4 %& 1 %P 1 - 22 %Z sequence number: 23 %I ACM %C New York, NY %@ false
[96]
P. Ernst, C. Meng, A. Siu, and G. Weikum, “KnowLife: A Knowledge Graph for Health and Life Sciences,” in 30th International Conference on Data Engineering (ICDE 2014), Chicago, IL, USA, 2014.
Export
BibTeX
@inproceedings{DBLP:conf/icde/ErnstMSW14, TITLE = {{KnowLife}: A Knowledge Graph for Health and Life Sciences}, AUTHOR = {Ernst, Patrick and Meng, Cynthia and Siu, Amy and Weikum, Gerhard}, LANGUAGE = {eng}, DOI = {10.1109/ICDE.2014.6816754}, PUBLISHER = {IEEE}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {30th International Conference on Data Engineering (ICDE 2014)}, PAGES = {1254--1257}, ADDRESS = {Chicago, IL, USA}, }
Endnote
%0 Conference Proceedings %A Ernst, Patrick %A Meng, Cynthia %A Siu, Amy %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T KnowLife: A Knowledge Graph for Health and Life Sciences : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-6BA0-1 %R 10.1109/ICDE.2014.6816754 %D 2014 %B 30th International Conference on Data Engineering %Z date of event: 2014-03-31 - 2014-04-04 %C Chicago, IL, USA %B 30th International Conference on Data Engineering %P 1254 - 1257 %I IEEE %U http://dx.doi.org/10.1109/ICDE.2014.6816754
[97]
E. Galbrun and P. Miettinen, “Interactive Redescription Mining,” in SIGMOD’14, ACM SIGMOD International Conference on Management of Data, Snowbird, UT, USA, 2014, pp. 1079–1082.
Abstract
Exploratory data analysis consists of multiple iterated steps: a data mining method is run on the data, the results are interpreted, new insights are formed, and the resulting knowl- edge is utilized when executing the method in a next round, and so on until satisfactory results are obtained. We focus on redescription mining, a powerful data analysis method that aims at finding alternative descriptions of the same entities, for example, ways to characterize geographical regions in terms of both the fauna that inhabits them and their bioclimatic conditions, so-called bioclimatic niches. We present Siren, a tool for interactive redescription min- ing. It is designed to facilitate the exploratory analysis of data by providing a seamless environment for mining, visu- alizing and editing redescriptions in an interactive fashion, supporting the analysis process in all its stages. We demon- strate its use for exploratory data mining. Simultaneously, Siren exemplifies the power of the various visualizations and means of interaction integrated into it; Techniques that reach beyond the task of redescription mining considered here, to other analysis methods.
Export
BibTeX
@inproceedings{galbrun14interactive, TITLE = {Interactive Redescription Mining}, AUTHOR = {Galbrun, Esther and Miettinen, Pauli}, LANGUAGE = {eng}, ISBN = {978-1-4503-2376-5}, DOI = {10.1145/2588555.2594520}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014-03}, ABSTRACT = {Exploratory data analysis consists of multiple iterated steps: a data mining method is run on the data, the results are interpreted, new insights are formed, and the resulting knowl- edge is utilized when executing the method in a next round, and so on until satisfactory results are obtained. We focus on redescription mining, a powerful data analysis method that aims at finding alternative descriptions of the same entities, for example, ways to characterize geographical regions in terms of both the fauna that inhabits them and their bioclimatic conditions, so-called bioclimatic niches. We present Siren, a tool for interactive redescription min- ing. It is designed to facilitate the exploratory analysis of data by providing a seamless environment for mining, visu- alizing and editing redescriptions in an interactive fashion, supporting the analysis process in all its stages. We demon- strate its use for exploratory data mining. Simultaneously, Siren exemplifies the power of the various visualizations and means of interaction integrated into it; Techniques that reach beyond the task of redescription mining considered here, to other analysis methods.}, BOOKTITLE = {SIGMOD'14, ACM SIGMOD International Conference on Management of Data}, DEBUG = {author: Özsu, M. Tamer}, EDITOR = {Dyresson, Curtis and Li, Feifei}, PAGES = {1079--1082}, ADDRESS = {Snowbird, UT, USA}, }
Endnote
%0 Conference Proceedings %A Galbrun, Esther %A Miettinen, Pauli %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Interactive Redescription Mining : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-4987-F %R 10.1145/2588555.2594520 %D 2014 %B ACM SIGMOD International Conference on Management of Data %Z date of event: 2014-06-22 - 2014-06-27 %C Snowbird, UT, USA %X Exploratory data analysis consists of multiple iterated steps: a data mining method is run on the data, the results are interpreted, new insights are formed, and the resulting knowl- edge is utilized when executing the method in a next round, and so on until satisfactory results are obtained. We focus on redescription mining, a powerful data analysis method that aims at finding alternative descriptions of the same entities, for example, ways to characterize geographical regions in terms of both the fauna that inhabits them and their bioclimatic conditions, so-called bioclimatic niches. We present Siren, a tool for interactive redescription min- ing. It is designed to facilitate the exploratory analysis of data by providing a seamless environment for mining, visu- alizing and editing redescriptions in an interactive fashion, supporting the analysis process in all its stages. We demon- strate its use for exploratory data mining. Simultaneously, Siren exemplifies the power of the various visualizations and means of interaction integrated into it; Techniques that reach beyond the task of redescription mining considered here, to other analysis methods. %B SIGMOD'14 %E Dyresson, Curtis; Li, Feifei; Özsu, M. Tamer %P 1079 - 1082 %I ACM %@ 978-1-4503-2376-5
[98]
A. Grycner, G. Weikum, J. Pujara, J. Foulds, and L. Getoor, “A Unified Probabilistic Approach for Semantic Clustering of Relational Phrases,” in AKBC 2014, 4th Workshop on Automated Knowledge Base Construction, Montreal, Canada, 2014.
Export
BibTeX
@inproceedings{grycner2014:AKBC, TITLE = {A Unified Probabilistic Approach for Semantic Clustering of Relational Phrases}, AUTHOR = {Grycner, Adam and Weikum, Gerhard and Pujara, Jay and Foulds, James and Getoor, Lise}, LANGUAGE = {eng}, URL = {http://www.akbc.ws/2014/submissions/akbc2014_submission_13.pdf}, PUBLISHER = {AKBC Board}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {AKBC 2014, 4th Workshop on Automated Knowledge Base Construction}, ADDRESS = {Montreal, Canada}, }
Endnote
%0 Conference Proceedings %A Grycner, Adam %A Weikum, Gerhard %A Pujara, Jay %A Foulds, James %A Getoor, Lise %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T A Unified Probabilistic Approach for Semantic Clustering of Relational Phrases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5B22-D %U http://www.akbc.ws/2014/submissions/akbc2014_submission_13.pdf %D 2014 %B 4th Workshop on Automated Knowledge Base Construction %Z date of event: 2014-12-13 - 2014-12-13 %C Montreal, Canada %B AKBC 2014 %I AKBC Board %U http://www.akbc.ws/2014/submissions/akbc2014_submission_13.pdf
[99]
A. Grycner and G. Weikum, “HARPY: Hypernyms and Alignment of Relational Paraphrases,” in Proceedings of COLING 2014: Technical Papers, Dublin, Ireland, 2014.
Abstract
Collections of relational paraphrases have been automatically constructed from \u000Alarge text corpora, as a WordNet counterpart for the realm of binary predicates \u000Aand their surface forms.\u000AHowever, these resources fall short in their coverage of hypernymy links \u000A(subsumptions) among the synsets of phrases. \u000AThis paper closes this gap by computing a high‐quality alignment between the \u000Arelational phrases of the Patty taxonomy, one of the largest collections of \u000Athis kind, and the verb senses of WordNet. To this end, we devise judicious \u000Afeatures and develop a graph‐based alignment algorithm by adapting and \u000Aextending the SimRank random‐walk method.\u000AThe resulting taxonomy of relational phrases and verb senses, coined HARPY, \u000Acontains 20,812 synsets organized into a \em Directed Acyclic Graph (DAG)} \u000Awith 616,792 hypernymy links. \u000AOur empirical assessment, indicates that the alignment links between Patty and \u000AWordNet have high accuracy, with {\em Mean Reciprocal Rank (MRR)} score 0.7 and \u000A{\em Normalized Discounted Cumulative Gain (NDCG) score 0.73. \u000AAs an additional extrinsic value, HARPY provides fine‐grained lexical types for \u000Athe arguments of verb senses in WordNet.
Export
BibTeX
@inproceedings{grycner-weikum:2014:Coling, TITLE = {{HARPY}: {Hypernyms} and Alignment of Relational Paraphrases}, AUTHOR = {Grycner, Adam and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-941643-26-6}, URL = {http://www.aclweb.org/anthology/C14-1207}, PUBLISHER = {ACL}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Collections of relational paraphrases have been automatically constructed from \u000Alarge text corpora, as a WordNet counterpart for the realm of binary predicates \u000Aand their surface forms.\u000AHowever, these resources fall short in their coverage of hypernymy links \u000A(subsumptions) among the synsets of phrases. \u000AThis paper closes this gap by computing a high-quality alignment between the \u000Arelational phrases of the Patty taxonomy, one of the largest collections of \u000Athis kind, and the verb senses of WordNet. To this end, we devise judicious \u000Afeatures and develop a graph-based alignment algorithm by adapting and \u000Aextending the SimRank random-walk method.\u000AThe resulting taxonomy of relational phrases and verb senses, coined HARPY, \u000Acontains 20,812 synsets organized into a \em Directed Acyclic Graph (DAG)} \u000Awith 616,792 hypernymy links. \u000AOur empirical assessment, indicates that the alignment links between Patty and \u000AWordNet have high accuracy, with {\em Mean Reciprocal Rank (MRR)} score 0.7 and \u000A{\em Normalized Discounted Cumulative Gain (NDCG) score 0.73. \u000AAs an additional extrinsic value, HARPY provides fine-grained lexical types for \u000Athe arguments of verb senses in WordNet.}, BOOKTITLE = {Proceedings of COLING 2014: Technical Papers}, EDITOR = {Hajic, Jan and Tsujii, Junichi}, PAGES = {2195--2204}, EID = {C14}, ADDRESS = {Dublin, Ireland}, }
Endnote
%0 Conference Proceedings %A Grycner, Adam %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T HARPY: Hypernyms and Alignment of Relational Paraphrases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-3329-1 %U http://www.aclweb.org/anthology/C14-1207 %D 2014 %B 25th International Conference on Computational Linguistics %Z date of event: 2014-08-23 - 2014-08-29 %C Dublin, Ireland %X Collections of relational paraphrases have been automatically constructed from \u000Alarge text corpora, as a WordNet counterpart for the realm of binary predicates \u000Aand their surface forms.\u000AHowever, these resources fall short in their coverage of hypernymy links \u000A(subsumptions) among the synsets of phrases. \u000AThis paper closes this gap by computing a high‐quality alignment between the \u000Arelational phrases of the Patty taxonomy, one of the largest collections of \u000Athis kind, and the verb senses of WordNet. To this end, we devise judicious \u000Afeatures and develop a graph‐based alignment algorithm by adapting and \u000Aextending the SimRank random‐walk method.\u000AThe resulting taxonomy of relational phrases and verb senses, coined HARPY, \u000Acontains 20,812 synsets organized into a \em Directed Acyclic Graph (DAG)} \u000Awith 616,792 hypernymy links. \u000AOur empirical assessment, indicates that the alignment links between Patty and \u000AWordNet have high accuracy, with {\em Mean Reciprocal Rank (MRR)} score 0.7 and \u000A{\em Normalized Discounted Cumulative Gain (NDCG) score 0.73. \u000AAs an additional extrinsic value, HARPY provides fine‐grained lexical types for \u000Athe arguments of verb senses in WordNet. %B Proceedings of COLING 2014: Technical Papers %E Hajic, Jan; Tsujii, Junichi %P 2195 - 2204 %Z sequence number: C14 %I ACL %@ 978‐1‐941643‐26‐6
[100]
D. Gupta and K. Berberich, “Identifying Time Intervals of Interest to Queries,” in CIKM’14, 23rd ACM International Conference on Information and Knowledge Management, Shanghai, China, 2014.
Export
BibTeX
@inproceedings{DBLP:conf/cikm/GuptaB14, TITLE = {Identifying Time Intervals of Interest to Queries}, AUTHOR = {Gupta, Dhruv and Berberich, Klaus}, LANGUAGE = {eng}, DOI = {10.1145/2661829.2661927}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {CIKM'14, 23rd ACM International Conference on Information and Knowledge Management}, EDITOR = {Li, Jianzhong and Wang, Xiaoyang Sean and Garofalakis, Minos N. and Soboroff, Ian and Suel, Torsten and Wang, Min}, PAGES = {1835--1838}, ADDRESS = {Shanghai, China}, }
Endnote
%0 Conference Proceedings %A Gupta, Dhruv %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Identifying Time Intervals of Interest to Queries : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5435-1 %R 10.1145/2661829.2661927 %D 2014 %B 23rd ACM International Conference on Information and Knowledge Management %Z date of event: 2014-11-03 - 2014-11-07 %C Shanghai, China %B CIKM'14 %E Li, Jianzhong; Wang, Xiaoyang Sean; Garofalakis, Minos N.; Soboroff, Ian; Suel, Torsten; Wang, Min %P 1835 - 1838 %I ACM
[101]
S. Gurajada, S. Seufert, I. Miliaraki, and M. Theobald, “TriAD: A Distributed Shared-nothing RDF Engine Based on Asynchronous Message Passing,” in SIGMOD’14, ACM SIGMOD International Conference on Management of Data, Snowbird, UT, USA, 2014.
Export
BibTeX
@inproceedings{Gurajada:2014:TDS:2588555.2610511, TITLE = {{TriAD}: A Distributed Shared-nothing {RDF} Engine Based on Asynchronous Message Passing}, AUTHOR = {Gurajada, Sairam and Seufert, Stephan and Miliaraki, Iris and Theobald, Martin}, LANGUAGE = {eng}, ISBN = {978-1-4503-2376-5}, DOI = {10.1145/2588555.2610511}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {SIGMOD'14, ACM SIGMOD International Conference on Management of Data}, EDITOR = {Dyresson, Curtis and Li, Feifei and {\"O}zsu, M. Tamer}, PAGES = {289--300}, ADDRESS = {Snowbird, UT, USA}, }
Endnote
%0 Conference Proceedings %A Gurajada, Sairam %A Seufert, Stephan %A Miliaraki, Iris %A Theobald, Martin %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T TriAD: A Distributed Shared-nothing RDF Engine Based on Asynchronous Message Passing : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5C81-2 %R 10.1145/2588555.2610511 %D 2014 %B ACM SIGMOD International Conference on Management of Data %Z date of event: 2014-06-22 - 2014-06-27 %C Snowbird, UT, USA %K asynchronous message passing, distributed RDF indexing 38; SparQL processing, join-ahead pruning, parallel join evaluation %B SIGMOD'14 %E Dyresson, Curtis; Li, Feifei; Özsu, M. Tamer %P 289 - 300 %I ACM %@ 978-1-4503-2376-5
[102]
S. Gurajada, S. Seufert, I. Miliaraki, and M. Theobald, “Using Graph Summarization for Join-ahead Pruning in a Distributed RDF Engine,” in SWIM’14, 6th International Workshop on Semantic Web Information Management, Snowbird, UT, USA, 2014.
Export
BibTeX
@inproceedings{Gurajada:2014:UGS:2630602.2630610, TITLE = {Using Graph Summarization for Join-ahead Pruning in a Distributed {RDF} Engine}, AUTHOR = {Gurajada, Sairam and Seufert, Stephan and Miliaraki, Iris and Theobald, Martin}, LANGUAGE = {eng}, ISBN = {978-1-4503-2994-1}, DOI = {10.1145/2630602.2630610}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {SWIM'14, 6th International Workshop on Semantic Web Information Management}, PAGES = {1--4}, EID = {41}, ADDRESS = {Snowbird, UT, USA}, }
Endnote
%0 Conference Proceedings %A Gurajada, Sairam %A Seufert, Stephan %A Miliaraki, Iris %A Theobald, Martin %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Using Graph Summarization for Join-ahead Pruning in a Distributed RDF Engine : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5C65-2 %R 10.1145/2630602.2630610 %D 2014 %B 6th International Workshop on Semantic Web Information Management %Z date of event: 2014-06-22 - 2014-06-27 %C Snowbird, UT, USA %B SWIM'14 %P 1 - 4 %Z sequence number: 41 %I ACM %@ 978-1-4503-2994-1
[103]
A. Harth, K. Hose, and R. Schenkel, Eds., Linked Data Management. Boca Raton, FL: CRC Press, 2014.
Export
BibTeX
@book{LinkedDataBook2014, TITLE = {Linked Data Management}, EDITOR = {Harth, Andreas and Hose, Katja and Schenkel, Ralf}, LANGUAGE = {eng}, ISBN = {978-1466582408; 1466582405}, PUBLISHER = {CRC Press}, ADDRESS = {Boca Raton, FL}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, PAGES = {576 p.}, SERIES = {Emerging Directions in Database Systems and Applications}, }
Endnote
%0 Edited Book %A Harth, Andreas %A Hose, Katja %A Schenkel, Ralf %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Linked Data Management : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0019-8478-2 %@ 978-1466582408 %@ 1466582405 %I CRC Press %C Boca Raton, FL %D 2014 %P 576 p. %B Emerging Directions in Database Systems and Applications
[104]
J. Hoffart, Y. Altun, and G. Weikum, “Discovering Emerging Entities with Ambiguous Names,” in WWW’14, 23rd International World Wide Web Conference, Seoul, Korea, 2014.
Export
BibTeX
@inproceedings{Hoffart:2014hp, TITLE = {Discovering Emerging Entities with Ambiguous Names}, AUTHOR = {Hoffart, Johannes and Altun, Yasemin and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-2744-2}, DOI = {10.1145/2566486.2568003}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {WWW'14, 23rd International World Wide Web Conference}, EDITOR = {Chung, Chin-Wan and Broder, Andrei and Shin, Kyuseok and Suel, Torsten}, PAGES = {385--395}, ADDRESS = {Seoul, Korea}, }
Endnote
%0 Conference Proceedings %A Hoffart, Johannes %A Altun, Yasemin %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Discovering Emerging Entities with Ambiguous Names : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5364-0 %R 10.1145/2566486.2568003 %D 2014 %B 23rd International World Wide Web Conference %Z date of event: 2014-04-07 - 2014-04-11 %C Seoul, Korea %B WWW'14 %E Chung, Chin-Wan; Broder, Andrei; Shin, Kyuseok; Suel, Torsten %P 385 - 395 %I ACM %@ 978-1-4503-2744-2
[105]
J. Hoffart, D. Milchevski, and G. Weikum, “AESTHETICS: Analytics with Strings, Things, and Cats,” in CIKM’14, 23rd ACM International Conference on Information and Knowledge Management, Shanghai, China, 2014.
Export
BibTeX
@inproceedings{Hoffart:2014cy, TITLE = {{AESTHETICS}: Analytics with Strings, Things, and Cats}, AUTHOR = {Hoffart, Johannes and Milchevski, Dragan and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-2598-1}, DOI = {10.1145/2661829.2661835}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {CIKM'14, 23rd ACM International Conference on Information and Knowledge Management}, DEBUG = {author: Wang, Min}, EDITOR = {Li, Jianzhong and Wang, X. Sean and Garofalakis, Minos and Soboroff, Ian and Suel, Torsten}, PAGES = {2018--2020}, ADDRESS = {Shanghai, China}, }
Endnote
%0 Conference Proceedings %A Hoffart, Johannes %A Milchevski, Dragan %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T AESTHETICS: Analytics with Strings, Things, and Cats : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-536B-2 %R 10.1145/2661829.2661835 %D 2014 %B 23rd ACM International Conference on Information and Knowledge Management %Z date of event: 2014-11-03 - 2014-11-07 %C Shanghai, China %B CIKM'14 %E Li, Jianzhong; Wang, X. Sean; Garofalakis, Minos; Soboroff, Ian; Suel, Torsten; Wang, Min %P 2018 - 2020 %I ACM %@ 978-1-4503-2598-1
[106]
J. Hoffart, D. Milchevski, and G. Weikum, “STICS: Searching with Strings, Things, and Cats,” in SIGIR’14, 37th International ACM SIGIR Conference on Research and Development in Information Retrieval, Gold Coast, Australia, 2014.
Export
BibTeX
@inproceedings{Hoffart:2014dt, TITLE = {{STICS}: Searching with Strings, Things, and Cats}, AUTHOR = {Hoffart, Johannes and Milchevski, Dragan and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-2257-7}, DOI = {10.1145/2600428.2611177}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {SIGIR'14, 37th International ACM SIGIR Conference on Research and Development in Information Retrieval}, PAGES = {1247--1248}, ADDRESS = {Gold Coast, Australia}, }
Endnote
%0 Conference Proceedings %A Hoffart, Johannes %A Milchevski, Dragan %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T STICS: Searching with Strings, Things, and Cats : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5344-7 %R 10.1145/2600428.2611177 %D 2014 %B 37th International ACM SIGIR Conference on Research and Development in Information Retrieval %Z date of event: 2014-07-06 - 2014-07-11 %C Gold Coast, Australia %B SIGIR'14 %P 1247 - 1248 %I ACM %@ 978-1-4503-2257-7
[107]
K. Hui, “Towards Robust & Reusable Evaluation for Novelty & Diversity,” in PIKM’14, 7th PhD Workshop in Information and Knowledge Management, Shanghai, China, 2014.
Export
BibTeX
@inproceedings{Hui-pikm2014, TITLE = {Towards Robust \& Reusable Evaluation for Novelty \& Diversity}, AUTHOR = {Hui, Kai}, LANGUAGE = {eng}, ISBN = {978-1-4503-1481-7}, DOI = {10.1145/2663714.2668045}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {PIKM'14, 7th PhD Workshop in Information and Knowledge Management}, EDITOR = {de Melo, Gerard and Kacimi, Mouna and Varde, Aparna S.}, PAGES = {9--17}, ADDRESS = {Shanghai, China}, }
Endnote
%0 Conference Proceedings %A Hui, Kai %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Towards Robust & Reusable Evaluation for Novelty & Diversity : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-4F55-D %R 10.1145/2663714.2668045 %D 2014 %B 7th PhD Workshop in Information and Knowledge Management %Z date of event: 2014-11-03 - 2014-11-03 %C Shanghai, China %B PIKM'14 %E de Melo, Gerard; Kacimi, Mouna; Varde, Aparna S. %P 9 - 17 %I ACM %@ 978-1-4503-1481-7
[108]
Y. Ibrahim, M. A. Yosef, and G. Weikum, “AIDA-Social: Entity Linking on the Social Stream,” in ESAIR’14, 7th International Workshop on Exploiting Semantic Annotations in Information Retrieval, Shanghai, China, 2014.
Export
BibTeX
@inproceedings{mamir:2014:aida-social, TITLE = {{AIDA}-{Social}: {Entity} Linking on the Social Stream}, AUTHOR = {Ibrahim, Yusra and Yosef, Mohamed Amir and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-1365-0}, DOI = {10.1145/2663712.2666185}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {ESAIR'14, 7th International Workshop on Exploiting Semantic Annotations in Information Retrieval}, EDITOR = {Alonso, Omar and Kamps, Jaap and Karlgren, Jussi}, PAGES = {17--19}, ADDRESS = {Shanghai, China}, }
Endnote
%0 Conference Proceedings %A Ibrahim, Yusra %A Yosef, Mohamed Amir %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T AIDA-Social: Entity Linking on the Social Stream : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-54A3-7 %R 10.1145/2663712.2666185 %D 2014 %B 7th International Workshop on Exploiting Semantic Annotations in Information Retrieval %Z date of event: 2014-11-07 - 2014-11-07 %C Shanghai, China %K information extraction, named entity linking, semantic annotation, social media %B ESAIR'14 %E Alonso, Omar; Kamps, Jaap; Karlgren, Jussi %P 17 - 19 %I ACM %@ 978-1-4503-1365-0 %U http://doi.acm.org/10.1145/2663712.2666185
[109]
S. Karaev, “NASSAU: Description Length Minimization for Boolean Matrix Factorization,” in ECML/PKDD 2014 PhD Session Proceedings, Nancy, France, 2014.
Export
BibTeX
@inproceedings{karaev2014nassau, TITLE = {NASSAU: {D}escription Length Minimization for {Boolean} Matrix Factorization}, AUTHOR = {Karaev, Sanjar}, LANGUAGE = {eng}, URL = {https://phdsession-ecmlpkdd2014.greyc.fr/sites/phdsession-ecmlpkdd2014.greyc.fr/files/papers/Paper_20702.pdf}, PUBLISHER = {University of Caen}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {ECML/PKDD 2014 PhD Session Proceedings}, EDITOR = {Belohlavek, Radim and Cr{\'e}milleux, Bruno}, PAGES = {177--186}, ADDRESS = {Nancy, France}, }
Endnote
%0 Conference Proceedings %A Karaev, Sanjar %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T NASSAU: Description Length Minimization for Boolean Matrix Factorization : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-51A9-6 %U https://phdsession-ecmlpkdd2014.greyc.fr/sites/phdsession-ecmlpkdd2014.greyc.fr/files/papers/Paper_20702.pdf %D 2014 %B The European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases %Z date of event: 2014-09-15 - 2014-09-19 %C Nancy, France %B ECML/PKDD 2014 PhD Session Proceedings %E Belohlavek, Radim; Crémilleux, Bruno %P 177 - 186 %I University of Caen
[110]
S. K. Kondreddi, “Human Computing and Crowdsourcing Methods for Knowledge Acquisition,” Universität des Saarlandes, Saarbrücken, 2014.
Abstract
Ambiguity, complexity, and diversity in natural language textual expressions are major hindrances to automated knowledge extraction. As a result state-of-the-art methods for extracting entities and relationships from unstructured data make incorrect extractions or produce noise. With the advent of human computing, computationally hard tasks have been addressed through human inputs. While text-based knowledge acquisition can benefit from this approach, humans alone cannot bear the burden of extracting knowledge from the vast textual resources that exist today. Even making payments for crowdsourced acquisition can quickly become prohibitively expensive. In this thesis we present principled methods that effectively garner human computing inputs for improving the extraction of knowledge-base facts from natural language texts. Our methods complement automatic extraction techniques with human computing to reap the benefits of both while overcoming each other�s limitations. We present the architecture and implementation of HIGGINS, a system that combines an information extraction (IE) engine with a human computing (HC) engine to produce high quality facts. The IE engine combines statistics derived from large Web corpora with semantic resources like WordNet and ConceptNet to construct a large dictionary of entity and relational phrases. It employs specifically designed statistical language models for phrase relatedness to come up with questions and relevant candidate answers that are presented to human workers. Through extensive experiments we establish the superiority of this approach in extracting relation-centric facts from text. In our experiments we extract facts about fictitious characters in narrative text, where the issues of diversity and complexity in expressing relations are far more pronounced. Finally, we also demonstrate how interesting human computing games can be designed for knowledge acquisition tasks.
Export
BibTeX
@phdthesis{Kondreddi2014b, TITLE = {Human Computing and Crowdsourcing Methods for Knowledge Acquisition}, AUTHOR = {Kondreddi, Sarath Kumar}, LANGUAGE = {eng}, URL = {urn:nbn:de:bsz:291-scidok-57948}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, ABSTRACT = {Ambiguity, complexity, and diversity in natural language textual expressions are major hindrances to automated knowledge extraction. As a result state-of-the-art methods for extracting entities and relationships from unstructured data make incorrect extractions or produce noise. With the advent of human computing, computationally hard tasks have been addressed through human inputs. While text-based knowledge acquisition can benefit from this approach, humans alone cannot bear the burden of extracting knowledge from the vast textual resources that exist today. Even making payments for crowdsourced acquisition can quickly become prohibitively expensive. In this thesis we present principled methods that effectively garner human computing inputs for improving the extraction of knowledge-base facts from natural language texts. Our methods complement automatic extraction techniques with human computing to reap the benefits of both while overcoming each other{\diamond}s limitations. We present the architecture and implementation of HIGGINS, a system that combines an information extraction (IE) engine with a human computing (HC) engine to produce high quality facts. The IE engine combines statistics derived from large Web corpora with semantic resources like WordNet and ConceptNet to construct a large dictionary of entity and relational phrases. It employs specifically designed statistical language models for phrase relatedness to come up with questions and relevant candidate answers that are presented to human workers. Through extensive experiments we establish the superiority of this approach in extracting relation-centric facts from text. In our experiments we extract facts about fictitious characters in narrative text, where the issues of diversity and complexity in expressing relations are far more pronounced. Finally, we also demonstrate how interesting human computing games can be designed for knowledge acquisition tasks.}, }
Endnote
%0 Thesis %A Kondreddi, Sarath Kumar %Y Triantafillou, Peter %A referee: Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Human Computing and Crowdsourcing Methods for Knowledge Acquisition : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-3C3D-F %U urn:nbn:de:bsz:291-scidok-57948 %I Universität des Saarlandes %C Saarbrücken %D 2014 %P 116 p. %V phd %9 phd %X Ambiguity, complexity, and diversity in natural language textual expressions are major hindrances to automated knowledge extraction. As a result state-of-the-art methods for extracting entities and relationships from unstructured data make incorrect extractions or produce noise. With the advent of human computing, computationally hard tasks have been addressed through human inputs. While text-based knowledge acquisition can benefit from this approach, humans alone cannot bear the burden of extracting knowledge from the vast textual resources that exist today. Even making payments for crowdsourced acquisition can quickly become prohibitively expensive. In this thesis we present principled methods that effectively garner human computing inputs for improving the extraction of knowledge-base facts from natural language texts. Our methods complement automatic extraction techniques with human computing to reap the benefits of both while overcoming each other�s limitations. We present the architecture and implementation of HIGGINS, a system that combines an information extraction (IE) engine with a human computing (HC) engine to produce high quality facts. The IE engine combines statistics derived from large Web corpora with semantic resources like WordNet and ConceptNet to construct a large dictionary of entity and relational phrases. It employs specifically designed statistical language models for phrase relatedness to come up with questions and relevant candidate answers that are presented to human workers. Through extensive experiments we establish the superiority of this approach in extracting relation-centric facts from text. In our experiments we extract facts about fictitious characters in narrative text, where the issues of diversity and complexity in expressing relations are far more pronounced. Finally, we also demonstrate how interesting human computing games can be designed for knowledge acquisition tasks. %U http://scidok.sulb.uni-saarland.de/volltexte/2014/5794/http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=de
[111]
S. K. Kondreddi, P. Triantafillou, and G. Weikum, “Combining Information Extraction and Human Computing for Crowdsourced Knowledge Acquisition,” in 30th IEEE International Conference on Data Engineering (ICDE 2014), Chicago, IL, USA, 2014.
Abstract
Automatic information extraction (IE) enables the construction of very large knowledge bases (KBs), with relational facts on millions of entities from text corpora and Web sources. However, such KBs contain errors and they are far from being complete. This motivates the need for exploiting human intelligence and knowledge using crowd-based human computing (HC) for assessing the validity of facts and for gathering additional knowledge. This paper presents a novel system architecture, called Higgins, which shows how to effectively integrate an IE engine and a HC engine. Higgins generates game questions where players choose or fill in missing relations for subject-relation-object triples. For generating multiple-choice answer candidates, we have constructed a large dictionary of entity names and relational phrases, and have developed specifically designed statistical language models for phrase relatedness. To this end, we combine semantic resources like WordNet, ConceptNet, and others with statistics derived from a large Web corpus. We demonstrate the effectiveness of Higgins for knowledge acquisition by crowdsourced gathering of relationships between characters in narrative descriptions of movies and books.
Export
BibTeX
@inproceedings{Kondreddi2014a, TITLE = {Combining Information Extraction and Human Computing for Crowdsourced Knowledge Acquisition}, AUTHOR = {Kondreddi, Sarath Kumar and Triantafillou, Peter and Weikum, Gerhard}, LANGUAGE = {eng}, DOI = {10.1109/ICDE.2014.6816717}, PUBLISHER = {IEEE}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, ABSTRACT = {Automatic information extraction (IE) enables the construction of very large knowledge bases (KBs), with relational facts on millions of entities from text corpora and Web sources. However, such KBs contain errors and they are far from being complete. This motivates the need for exploiting human intelligence and knowledge using crowd-based human computing (HC) for assessing the validity of facts and for gathering additional knowledge. This paper presents a novel system architecture, called Higgins, which shows how to effectively integrate an IE engine and a HC engine. Higgins generates game questions where players choose or fill in missing relations for subject-relation-object triples. For generating multiple-choice answer candidates, we have constructed a large dictionary of entity names and relational phrases, and have developed specifically designed statistical language models for phrase relatedness. To this end, we combine semantic resources like WordNet, ConceptNet, and others with statistics derived from a large Web corpus. We demonstrate the effectiveness of Higgins for knowledge acquisition by crowdsourced gathering of relationships between characters in narrative descriptions of movies and books.}, BOOKTITLE = {30th IEEE International Conference on Data Engineering (ICDE 2014)}, PAGES = {988--999}, ADDRESS = {Chicago, IL, USA}, }
Endnote
%0 Conference Proceedings %A Kondreddi, Sarath Kumar %A Triantafillou, Peter %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Combining Information Extraction and Human Computing for Crowdsourced Knowledge Acquisition : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0023-C15D-6 %R 10.1109/ICDE.2014.6816717 %D 2014 %B 30th IEEE International Conference on Data Engineering %Z date of event: 2014-03-31 - 2014-04-04 %C Chicago, IL, USA %X Automatic information extraction (IE) enables the construction of very large knowledge bases (KBs), with relational facts on millions of entities from text corpora and Web sources. However, such KBs contain errors and they are far from being complete. This motivates the need for exploiting human intelligence and knowledge using crowd-based human computing (HC) for assessing the validity of facts and for gathering additional knowledge. This paper presents a novel system architecture, called Higgins, which shows how to effectively integrate an IE engine and a HC engine. Higgins generates game questions where players choose or fill in missing relations for subject-relation-object triples. For generating multiple-choice answer candidates, we have constructed a large dictionary of entity names and relational phrases, and have developed specifically designed statistical language models for phrase relatedness. To this end, we combine semantic resources like WordNet, ConceptNet, and others with statistics derived from a large Web corpus. We demonstrate the effectiveness of Higgins for knowledge acquisition by crowdsourced gathering of relationships between characters in narrative descriptions of movies and books. %B 30th IEEE International Conference on Data Engineering %P 988 - 999 %I IEEE
[112]
M. Koubarakis, G. B. Stamou, G. Stoilos, I. Horrocks, P. G. Kolaitis, G. Lausen, and G. Weikum, Eds., Reasoning Web. Springer, 2014.
Export
BibTeX
@proceedings{DBLP:conf/rweb/2014, TITLE = {Reasoning Web}, EDITOR = {Koubarakis, Manolis and Stamou, Giorgos B. and Stoilos, Giorgos and Horrocks, Ian and Kolaitis, Phokion G. and Lausen, Georg and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-3-319-10586-4}, DOI = {10.1007/978-3-319-10587-1}, PUBLISHER = {Springer}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, PAGES = {X, 390 p.}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {8714}, ADDRESS = {Athens, Greece}, }
Endnote
%0 Conference Proceedings %E Koubarakis, Manolis %E Stamou, Giorgos B. %E Stoilos, Giorgos %E Horrocks, Ian %E Kolaitis, Phokion G. %E Lausen, Georg %E Weikum, Gerhard %+ External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Reasoning Web : Reasoning on the Web in the Big Data Era ; 10th International Summer School 2014, Athens, Greece, September 8-13, 2014. Proceedings %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-6BD5-B %@ 978-3-319-10586-4 %R 10.1007/978-3-319-10587-1 %I Springer %D 2014 %B 10th Reasoning Web Summer School %Z date of event: 2014-09-08 - 2014-09-13 %D 2014 %C Athens, Greece %P X, 390 p. %S Lecture Notes in Computer Science %V 8714
[113]
D. Koutra, U. Kang, J. Vreeken, and C. Faloutsos, “VoG: Summarizing and Understanding Large Graphs,” in 2014 SIAM International Conference on Data Mining (SDM 2014), Philadelphia, PA, USA, 2014.
Export
BibTeX
@inproceedings{koutra:14:vog, TITLE = {{VoG}: {Summarizing} and Understanding Large Graphs}, AUTHOR = {Koutra, Danai and Kang, U and Vreeken, Jilles and Faloutsos, Christos}, LANGUAGE = {eng}, ISBN = {978-1-61197-344-0}, DOI = {10.1137/1.9781611973440.11}, PUBLISHER = {SIAM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {2014 SIAM International Conference on Data Mining (SDM 2014)}, PAGES = {91--99}, ADDRESS = {Philadelphia, PA, USA}, }
Endnote
%0 Conference Proceedings %A Koutra, Danai %A Kang, U %A Vreeken, Jilles %A Faloutsos, Christos %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T VoG: Summarizing and Understanding Large Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53AF-A %R 10.1137/1.9781611973440.11 %D 2014 %B SIAM International Conference on Data Mining %Z date of event: 2014-04-24 - 2014-04-26 %C Philadelphia, PA, USA %B 2014 SIAM International Conference on Data Mining %P 91 - 99 %I SIAM %@ 978-1-61197-344-0
[114]
D. Koutra, U. Kang, J. Vreeken, and C. Faloutsos, “VoG: Summarizing and Understanding Large Graphs,” 2014. [Online]. Available: http://arxiv.org/abs/1406.3411.
Abstract
How can we succinctly describe a million-node graph with a few simple sentences? How can we measure the "importance" of a set of discovered subgraphs in a large graph? These are exactly the problems we focus on. Our main ideas are to construct a "vocabulary" of subgraph-types that often occur in real graphs (e.g., stars, cliques, chains), and from a set of subgraphs, find the most succinct description of a graph in terms of this vocabulary. We measure success in a well-founded way by means of the Minimum Description Length (MDL) principle: a subgraph is included in the summary if it decreases the total description length of the graph. Our contributions are three-fold: (a) formulation: we provide a principled encoding scheme to choose vocabulary subgraphs; (b) algorithm: we develop \method, an efficient method to minimize the description cost, and (c) applicability: we report experimental results on multi-million-edge real graphs, including Flickr and the Notre Dame web graph.
Export
BibTeX
@online{KoutraKangVreekenFaloutsosarXiv2014, TITLE = {{VoG}: {Summarizing} and Understanding Large Graphs}, AUTHOR = {Koutra, Danai and Kang, U and Vreeken, Jilles and Faloutsos, Christos}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1406.3411}, EPRINT = {1406.3411}, EPRINTTYPE = {arXiv}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, ABSTRACT = {How can we succinctly describe a million-node graph with a few simple sentences? How can we measure the "importance" of a set of discovered subgraphs in a large graph? These are exactly the problems we focus on. Our main ideas are to construct a "vocabulary" of subgraph-types that often occur in real graphs (e.g., stars, cliques, chains), and from a set of subgraphs, find the most succinct description of a graph in terms of this vocabulary. We measure success in a well-founded way by means of the Minimum Description Length (MDL) principle: a subgraph is included in the summary if it decreases the total description length of the graph. Our contributions are three-fold: (a) formulation: we provide a principled encoding scheme to choose vocabulary subgraphs; (b) algorithm: we develop \method, an efficient method to minimize the description cost, and (c) applicability: we report experimental results on multi-million-edge real graphs, including Flickr and the Notre Dame web graph.}, }
Endnote
%0 Report %A Koutra, Danai %A Kang, U %A Vreeken, Jilles %A Faloutsos, Christos %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T VoG: Summarizing and Understanding Large Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-49A3-F %U http://arxiv.org/abs/1406.3411 %D 2014 %X How can we succinctly describe a million-node graph with a few simple sentences? How can we measure the "importance" of a set of discovered subgraphs in a large graph? These are exactly the problems we focus on. Our main ideas are to construct a "vocabulary" of subgraph-types that often occur in real graphs (e.g., stars, cliques, chains), and from a set of subgraphs, find the most succinct description of a graph in terms of this vocabulary. We measure success in a well-founded way by means of the Minimum Description Length (MDL) principle: a subgraph is included in the summary if it decreases the total description length of the graph. Our contributions are three-fold: (a) formulation: we provide a principled encoding scheme to choose vocabulary subgraphs; (b) algorithm: we develop \method, an efficient method to minimize the description cost, and (c) applicability: we report experimental results on multi-million-edge real graphs, including Flickr and the Notre Dame web graph. %K cs.SI, Physics, Physics and Society, physics.soc-ph
[115]
E. Kuzey and G. Weikum, “EVIN: Building a Knowledge Base of Events,” in WWW’14 Companion, Seoul, Korea, 2014.
Export
BibTeX
@inproceedings{ekuzeyWWW14, TITLE = {{EVIN}: Building a Knowledge Base of Events}, AUTHOR = {Kuzey, Erdal and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-2745-9}, URL = {http://dl.acm.org/citation.cfm?id=2577009}, DOI = {10.1145/2567948.2577009}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {WWW'14 Companion}, PAGES = {103--106}, ADDRESS = {Seoul, Korea}, }
Endnote
%0 Conference Proceedings %A Kuzey, Erdal %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T EVIN: Building a Knowledge Base of Events : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-525B-C %R 10.1145/2567948.2577009 %U http://dl.acm.org/citation.cfm?id=2577009 %D 2014 %B 23rd International Conference on World Wide Web %Z date of event: 2014-04-07 - 2014-04-11 %C Seoul, Korea %B WWW'14 Companion %P 103 - 106 %I ACM %@ 978-1-4503-2745-9
[116]
E. Kuzey, J. Vreeken, and G. Weikum, “A Fresh Look on Knowledge Bases: Distilling Named Events from News,” in CIKM’14, 23rd ACM International Conference on Information and Knowledge Management, Shanghai, China, 2014.
Export
BibTeX
@inproceedings{ekuzeyCIKM14, TITLE = {A Fresh Look on Knowledge Bases: Distilling Named Events from News}, AUTHOR = {Kuzey, Erdal and Vreeken, Jilles and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-2598-1}, DOI = {10.1145/2661829.2661984}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {CIKM'14, 23rd ACM International Conference on Information and Knowledge Management}, EDITOR = {Li, Jianzhong and Garofalakis, Minos and Soboroff, Ian and Suel, Torsten and Wang, Min}, PAGES = {1689--1698}, ADDRESS = {Shanghai, China}, }
Endnote
%0 Conference Proceedings %A Kuzey, Erdal %A Vreeken, Jilles %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T A Fresh Look on Knowledge Bases: Distilling Named Events from News : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5263-9 %R 10.1145/2661829.2661984 %D 2014 %B 23rd ACM International Conference on Information and Knowledge Management %Z date of event: 2014-11-03 - 2014-11-07 %C Shanghai, China %B CIKM'14 %E Li, Jianzhong; Wang, X. Sean; Garofalakis, Minos; Soboroff, Ian; Suel, Torsten; Wang, Min %P 1689 - 1698 %I ACM %@ 978-1-4503-2598-1
[117]
F. Mahdisoltani, J. Biega, and F. Suchanek, “YAGO3: A Knowledge Base from Multilingual Wikipedias,” in 7th Biennial Conference on Innovative Data Systems Research (CIDR 2015), Asilomar, CA, USA, 2014.
Export
BibTeX
@inproceedings{Mahdisoltani:2015, TITLE = {{YAGO}3: A Knowledge Base from Multilingual Wikipedias}, AUTHOR = {Mahdisoltani, Farzaneh and Biega, Joanna and Suchanek, Fabian}, LANGUAGE = {eng}, URL = {http://www.cidrdb.org/cidr2015/Papers/CIDR15_Paper1.pdf}, PUBLISHER = {CIDR Conference}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {7th Biennial Conference on Innovative Data Systems Research (CIDR 2015)}, ADDRESS = {Asilomar, CA, USA}, }
Endnote
%0 Conference Proceedings %A Mahdisoltani, Farzaneh %A Biega, Joanna %A Suchanek, Fabian %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T YAGO3: A Knowledge Base from Multilingual Wikipedias : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-501C-6 %U http://www.cidrdb.org/cidr2015/Papers/CIDR15_Paper1.pdf %D 2014 %B 7th Biennial Conference on Innovative Data Systems Research %Z date of event: 2015-01-04 - 2015-01-07 %C Asilomar, CA, USA %B 7th Biennial Conference on Innovative Data Systems Research %I CIDR Conference
[118]
F. Makari, C. Teflioudi, R. Gemulla, P. Haas, and Y. Sismanis, “Shared-memory and Shared-nothing Stochastic Gradient Descent Algorithms for Matrix Completion,” Knowledge and Information Systems, vol. 42, no. 3, 2014.
Export
BibTeX
@article{MakariTeflioudiGemulla2014, TITLE = {Shared-memory and Shared-nothing Stochastic Gradient Descent Algorithms for Matrix Completion}, AUTHOR = {Makari, Faraz and Teflioudi, Christina and Gemulla, Rainer and Haas, Peter and Sismanis, Yannis}, LANGUAGE = {eng}, ISSN = {0219-1377}, DOI = {10.1007/s10115-013-0718-7}, PUBLISHER = {Springer}, ADDRESS = {London}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, JOURNAL = {Knowledge and Information Systems}, VOLUME = {42}, NUMBER = {3}, PAGES = {493--523}, }
Endnote
%0 Journal Article %A Makari, Faraz %A Teflioudi, Christina %A Gemulla, Rainer %A Haas, Peter %A Sismanis, Yannis %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Shared-memory and Shared-nothing Stochastic Gradient Descent Algorithms for Matrix Completion : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-4F57-9 %R 10.1007/s10115-013-0718-7 %7 2014-02-15 %D 2014 %J Knowledge and Information Systems %V 42 %N 3 %& 493 %P 493 - 523 %I Springer %C London %@ false
[119]
F. Makari Manshadi, “Scalable Optimization Algorithms for Recommender Systems,” Universität des Saarlandes, Saarbrücken, 2014.
Export
BibTeX
@phdthesis{MakariManshadi2014, TITLE = {Scalable Optimization Algorithms for Recommender Systems}, AUTHOR = {Makari Manshadi, Faraz}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, }
Endnote
%0 Thesis %A Makari Manshadi, Faraz %Y Gemulla, Rainer %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Scalable Optimization Algorithms for Recommender Systems : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-96AA-5 %I Universität des Saarlandes %C Saarbrücken %D 2014 %P 121 p. %V phd %9 phd %U http://scidok.sulb.uni-saarland.de/volltexte/2014/5922/http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=de
[120]
S. Metzger, “User-centric Knowledge Extraction and Maintenance,” Universität des Saarlandes, Saarbrücken, 2014.
Export
BibTeX
@phdthesis{Metzger2014, TITLE = {User-centric Knowledge Extraction and Maintenance}, AUTHOR = {Metzger, Steffen}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, }
Endnote
%0 Thesis %A Metzger, Steffen %Y Schenkel, Ralf %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T User-centric Knowledge Extraction and Maintenance : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-96AE-E %I Universität des Saarlandes %C Saarbrücken %D 2014 %P 230 p. %V phd %9 phd %U http://scidok.sulb.uni-saarland.de/volltexte/2014/5763/http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=de
[121]
S. Metzler and P. Miettinen, “Clustering Boolean Tensors,” in ECML/PKDD 2014 PhD Session Proceedings, Nancy, France, 2014.
Export
BibTeX
@inproceedings{Metzler2014Clustering, TITLE = {Clustering {Boolean} Tensors}, AUTHOR = {Metzler, Saskia and Miettinen, Pauli}, LANGUAGE = {eng}, URL = {https://phdsession-ecmlpkdd2014.greyc.fr/sites/phdsession-ecmlpkdd2014.greyc.fr/files/papers/Paper_20692.pdf}, PUBLISHER = {University of Caen}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {ECML/PKDD 2014 PhD Session Proceedings}, EDITOR = {Belohlavek, Radim and Cr{\'e}milleux, Bruno}, PAGES = {31--40}, ADDRESS = {Nancy, France}, }
Endnote
%0 Conference Proceedings %A Metzler, Saskia %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Clustering Boolean Tensors : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5C44-C %U https://phdsession-ecmlpkdd2014.greyc.fr/sites/phdsession-ecmlpkdd2014.greyc.fr/files/papers/Paper_20692.pdf %D 2014 %B The European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases %Z date of event: 2014-09-15 - 2014-09-19 %C Nancy, France %B ECML/PKDD 2014 PhD Session Proceedings %E Belohlavek, Radim; Crémilleux, Bruno %P 31 - 40 %I University of Caen
[122]
P. Miettinen and J. Vreeken, “MDL4BMF: Minimum Description Length for Boolean Matrix Factorization,” ACM Transactions on Knowledge Discovery from Data, vol. 8, no. 4, Oct. 2014.
Export
BibTeX
@article{miettinen14mdl4bmf, TITLE = {{MDL4BMF}: {Minimum} {D}escription {L}ength for {Boolean} {M}atrix {F}actorization}, AUTHOR = {Miettinen, Pauli and Vreeken, Jilles}, LANGUAGE = {eng}, DOI = {10.1145/2601437}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014-10}, JOURNAL = {ACM Transactions on Knowledge Discovery from Data}, VOLUME = {8}, NUMBER = {4}, PAGES = {1--31}, EID = {18}, }
Endnote
%0 Journal Article %A Miettinen, Pauli %A Vreeken, Jilles %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T MDL4BMF: Minimum Description Length for Boolean Matrix Factorization : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-4980-E %R 10.1145/2601437 %7 2014 %D 2014 %J ACM Transactions on Knowledge Discovery from Data %V 8 %N 4 %& 1 %P 1 - 31 %Z sequence number: 18 %I ACM %C New York, NY %U http://dl.acm.org/citation.cfm?id=2663597.2601437
[123]
P. Miettinen, “Interactive Data Mining Considered Harmful (If Done Wrong),” in Proceedings of the ACM SIGKDD 2014 Full-day Workshop on Interactive Data Exploration and Analytics (IDEA 2014), New York, NY, USA, 2014.
Abstract
Interactive data mining can be a powerful tool for data analysis. But in this short opinion piece I argue that this power comes with new pitfalls that can undermine the value of interactive mining, if not properly addressed. Most notably, there is a serious risk that the user of powerful interactive data mining tools will only find the results she was expecting. The purpose of this piece is to raise awareness of this potential issue, stimulate discussion on it, and hopefully give rise to new research directions in addressing it.
Export
BibTeX
@inproceedings{miettinen14interactive, TITLE = {Interactive Data Mining Considered Harmful (If Done Wrong)}, AUTHOR = {Miettinen, Pauli}, LANGUAGE = {eng}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014-07}, ABSTRACT = {Interactive data mining can be a powerful tool for data analysis. But in this short opinion piece I argue that this power comes with new pitfalls that can undermine the value of interactive mining, if not properly addressed. Most notably, there is a serious risk that the user of powerful interactive data mining tools will only find the results she was expecting. The purpose of this piece is to raise awareness of this potential issue, stimulate discussion on it, and hopefully give rise to new research directions in addressing it.}, BOOKTITLE = {Proceedings of the ACM SIGKDD 2014 Full-day Workshop on Interactive Data Exploration and Analytics (IDEA 2014)}, EDITOR = {Chau, Polo and Vreeken, Jilles and van Leeuwen, Matthijs and Faloutsos, Christos}, PAGES = {85--87}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Interactive Data Mining Considered Harmful (If Done Wrong) : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5567-9 %D 2014 %B ACM SIGKDD 2014 Full-day Workshop on Interactive Data Exploration and Analytics %Z date of event: 2014-08-24 - 2014-08-24 %C New York, NY, USA %X Interactive data mining can be a powerful tool for data analysis. But in this short opinion piece I argue that this power comes with new pitfalls that can undermine the value of interactive mining, if not properly addressed. Most notably, there is a serious risk that the user of powerful interactive data mining tools will only find the results she was expecting. The purpose of this piece is to raise awareness of this potential issue, stimulate discussion on it, and hopefully give rise to new research directions in addressing it. %B Proceedings of the ACM SIGKDD 2014 Full-day Workshop on Interactive Data Exploration and Analytics %E Chau, Polo; Vreeken, Jilles; van Leeuwen, Matthijs; Faloutsos, Christos %P 85 - 87 %U http://poloclub.gatech.edu/idea2014/papers/p85-miettinen.pdf
[124]
D. Milchevski and K. Berberich, “X-REC: Cross-category Entity Recommendation,” in Proceedings of the 5th Information Interaction in Context Conference (IIiX 2014), Regensburg, Germany, 2014.
Export
BibTeX
@inproceedings{DBLP:conf/iiix/MilchevskiB14, TITLE = {{X-REC}: Cross-category Entity Recommendation}, AUTHOR = {Milchevski, Dragan and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-1-4503-2976-7}, DOI = {10.1145/2637002.2637049}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {Proceedings of the 5th Information Interaction in Context Conference (IIiX 2014)}, EDITOR = {Elsweiler, David and Ludwig, Bernd and Azzopardi, Leif and Wilson, Max L.}, PAGES = {308--311}, ADDRESS = {Regensburg, Germany}, }
Endnote
%0 Conference Proceedings %A Milchevski, Dragan %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T X-REC: Cross-category Entity Recommendation : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5430-B %R 10.1145/2637002.2637049 %D 2014 %B 5th Information Interaction in Context Conference %Z date of event: 2014-08-26 - 2014-08-29 %C Regensburg, Germany %B Proceedings of the 5th Information Interaction in Context Conference %E Elsweiler, David; Ludwig, Bernd; Azzopardi, Leif; Wilson, Max L. %P 308 - 311 %I ACM %@ 978-1-4503-2976-7
[125]
A. Mishra, “Linking Today’s Wikipedia and News from the Past,” in PIKM’14, 7th PhD Workshop in Information and Knowledge Management, Shanghai, China, 2014.
Export
BibTeX
@inproceedings{Mishra:2014:LTW:2663714.2668048, TITLE = {Linking Today's {Wikipedia} and News from the Past}, AUTHOR = {Mishra, Arunav}, LANGUAGE = {eng}, ISBN = {978-1-4503-1481-7}, DOI = {10.1145/2663714.2668048}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {PIKM'14, 7th PhD Workshop in Information and Knowledge Management}, EDITOR = {de Melo, Gerard and Kacimi, Mouna and Varde, Aparna S.}, PAGES = {1--8}, ADDRESS = {Shanghai, China}, }
Endnote
%0 Conference Proceedings %A Mishra, Arunav %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Linking Today's Wikipedia and News from the Past : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-6C6E-D %R 10.1145/2663714.2668048 %D 2014 %B 7th PhD Workshop in Information and Knowledge Management %Z date of event: 2014-11-03 - 2014-11-07 %C Shanghai, China %K events, linking, time-aware language model, wikipedia %B PIKM'14 %E de Melo, Gerard; Kacimi, Mouna; Varde, Aparna S. %P 1 - 8 %I ACM %@ 978-1-4503-1481-7
[126]
A. Mishra, D. Milchevski, and K. Berberich, “Linking Wikipedia Events to Past News,” in SIGIR 2014 Workshop on Temporal, Social and Spatially-aware Information Access (TAIA 2014), Gold Coast, Australia, 2014.
Export
BibTeX
@inproceedings{Mishra2014a, TITLE = {Linking {Wikipedia} Events to Past News}, AUTHOR = {Mishra, Arunav and Milchevski, Dragan and Berberich, Klaus}, LANGUAGE = {eng}, URL = {http://research.microsoft.com/en-US/people/milads/taia2014-mishra.pdf}, PUBLISHER = {Microsoft Research}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {SIGIR 2014 Workshop on Temporal, Social and Spatially-aware Information Access (TAIA 2014)}, PAGES = {1--4}, ADDRESS = {Gold Coast, Australia}, }
Endnote
%0 Conference Proceedings %A Mishra, Arunav %A Milchevski, Dragan %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Linking Wikipedia Events to Past News : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-3C35-0 %U http://research.microsoft.com/en-US/people/milads/taia2014-mishra.pdf %D 2014 %B SIGIR 2014 Workshop on Temporal, Social and Spatially-aware Information Access %Z date of event: 2014-07-11 - 2014-07-11 %C Gold Coast, Australia %B SIGIR 2014 Workshop on Temporal, Social and Spatially-aware Information Access %P 1 - 4 %I Microsoft Research
[127]
S. Mukherjee, J. Ajmera, and S. Joshi, “Unsupervised Approach for Shallow Domain Ontology Construction from Corpus,” in WWW’14 Companion, Seoul, Korea, 2014.
Export
BibTeX
@inproceedings{Mukherjee:2014:DCU, TITLE = {Unsupervised Approach for Shallow Domain Ontology Construction from Corpus}, AUTHOR = {Mukherjee, Subhabrata and Ajmera, Jitendra and Joshi, Sachindra}, LANGUAGE = {eng}, ISBN = {978-1-4503-2745-9}, URL = {http://dl.acm.org/citation.cfm?id=2577350}, DOI = {10.1145/2567948.2577021}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {WWW'14 Companion}, PAGES = {349--350}, ADDRESS = {Seoul, Korea}, }
Endnote
%0 Conference Proceedings %A Mukherjee, Subhabrata %A Ajmera, Jitendra %A Joshi, Sachindra %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Unsupervised Approach for Shallow Domain Ontology Construction from Corpus : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-4FFD-6 %R 10.1145/2567948.2577021 %U http://dl.acm.org/citation.cfm?id=2577350 %D 2014 %B 23rd International Conference on World Wide Web %Z date of event: 2014-04-07 - 2014-04-11 %C Seoul, Korea %B WWW'14 Companion %P 349 - 350 %I ACM %@ 978-1-4503-2745-9
[128]
S. Mukherjee, G. Weikum, and C. Danescu-Niculescu-Mizil, “People on Drugs: Credibility of User Statements in Health Communities,” in KDD’14, 20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, New York, NY, USA, 2014.
Export
BibTeX
@inproceedings{Mukherjee:2014:PeopleOnDrugs, TITLE = {People on Drugs: Credibility of User Statements in Health Communities}, AUTHOR = {Mukherjee, Subhabrata and Weikum, Gerhard and Danescu-Niculescu-Mizil, Cristian}, LANGUAGE = {eng}, DOI = {10.1145/2623330.2623714}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {KDD'14, 20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, EDITOR = {Macskassy, Sofus A. and Perlich, Claudia and Lescovec, Jure and Wang, Wei and Ghani, Rayid}, PAGES = {65--74}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %A Mukherjee, Subhabrata %A Weikum, Gerhard %A Danescu-Niculescu-Mizil, Cristian %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Group C. Danescu-Niculescu-Mizil, Max Planck Institute for Software Systems, Max Planck Society %T People on Drugs: Credibility of User Statements in Health Communities : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-4FF9-E %R 10.1145/2623330.2623714 %D 2014 %B 20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining %Z date of event: 2014-08-24 - 2014-08-27 %C New York, NY, USA %B KDD'14 %E Macskassy, Sofus A.; Perlich, Claudia; Lescovec, Jure; Wang, Wei; Ghani, Rayid %P 65 - 74 %I ACM
[129]
S. Mukherjee and S. Joshi, “Help Yourself: A Virtual Self-assist System,” in WWW’14 Companion, Seoul, Korea, 2014.
Export
BibTeX
@inproceedings{Mukherjee:2014:SelfAssist, TITLE = {Help Yourself: A Virtual Self-assist System}, AUTHOR = {Mukherjee, Subhabrata and Joshi, Sachindra}, LANGUAGE = {eng}, ISBN = {978-1-4503-2745-9}, URL = {http://dl.acm.org/citation.cfm?id=2577021}, DOI = {10.1145/2567948.2577021}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {WWW'14 Companion}, PAGES = {171--174}, ADDRESS = {Seoul, Korea}, }
Endnote
%0 Conference Proceedings %A Mukherjee, Subhabrata %A Joshi, Sachindra %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Help Yourself: A Virtual Self-assist System : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5007-5 %R 10.1145/2567948.2577021 %U http://dl.acm.org/citation.cfm?id=2577021 %D 2014 %B 23rd International Conference on World Wide Web %Z date of event: 2014-04-07 - 2014-04-11 %C Seoul, Korea %B WWW'14 Companion %P 171 - 174 %I ACM %@ 978-1-4503-2745-9
[130]
S. Mukherjee, G. Basu, and S. Joshi, “42 - Joint Author Sentiment Topic Model,” in 2014 SIAM International Conference on Data Mining (SDM 2014), Philadelphia, PA, USA, 2014.
Export
BibTeX
@inproceedings{Mukherjee:2014:JAST, TITLE = {42 -- Joint Author Sentiment Topic Model}, AUTHOR = {Mukherjee, Subhabrata and Basu, Gaurab and Joshi, Sachindra}, LANGUAGE = {eng}, ISBN = {978-1-61197-344-0}, DOI = {10.1137/1.9781611973440.43}, PUBLISHER = {SIAM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {2014 SIAM International Conference on Data Mining (SDM 2014)}, PAGES = {370--378}, ADDRESS = {Philadelphia, PA, USA}, }
Endnote
%0 Conference Proceedings %A Mukherjee, Subhabrata %A Basu, Gaurab %A Joshi, Sachindra %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T 42 - Joint Author Sentiment Topic Model : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-4F9D-B %R 10.1137/1.9781611973440.43 %D 2014 %B SIAM International Conference on Data Mining %Z date of event: 2014-04-24 - 2014-04-26 %C Philadelphia, PA, USA %B 2014 SIAM International Conference on Data Mining %P 370 - 378 %I SIAM %@ 978-1-61197-344-0
[131]
S. Mukherjee, J. Ajmera, and S. Joshi, “Domain Cartridge: Unsupervised Framework for Shallow Domain Ontology Construction from Corpus,” in CIKM’14, 23rd ACM International Conference on Information and Knowledge Management, Shanghai, China, 2014.
Export
BibTeX
@inproceedings{Mukherjee:2014:DomainCartridge, TITLE = {Domain Cartridge: Unsupervised Framework for Shallow Domain Ontology Construction from Corpus}, AUTHOR = {Mukherjee, Subhabrata and Ajmera, Jitendra and Joshi, Sachindra}, LANGUAGE = {eng}, ISBN = {978-1-4503-2598-1}, DOI = {10.1145/2661829.2662087}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {CIKM'14, 23rd ACM International Conference on Information and Knowledge Management}, EDITOR = {Li, Jianzhong and Wang, X. Sean and Garofalakis, Minos and Soboroff, Ian and Suel, Torsten and Wang, Min}, PAGES = {929--938}, ADDRESS = {Shanghai, China}, }
Endnote
%0 Conference Proceedings %A Mukherjee, Subhabrata %A Ajmera, Jitendra %A Joshi, Sachindra %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Domain Cartridge: Unsupervised Framework for Shallow Domain Ontology Construction from Corpus : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-4FB1-C %R 10.1145/2661829.2662087 %D 2014 %8 03.11.2014 %B 23rd ACM International Conference on Information and Knowledge Management %Z date of event: 2014-11-03 - 2014-11-07 %C Shanghai, China %B CIKM'14 %E Li, Jianzhong; Wang, X. Sean; Garofalakis, Minos; Soboroff, Ian; Suel, Torsten; Wang, Min %P 929 - 938 %I ACM %@ 978-1-4503-2598-1
[132]
S. Mukherjee and S. Jos, “Author-Specific Sentiment Aggregation for Polarity Prediction of Reviews,” in Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC 2014), Reykjavik, Iceland, 2014.
Export
BibTeX
@inproceedings{Mukherjee:2014:PASOT, TITLE = {Author-Specific Sentiment Aggregation for Polarity Prediction of Reviews}, AUTHOR = {Mukherjee, Subhabrata and Jos, Sachindra}, LANGUAGE = {eng}, ISBN = {978-2-9517408-8-4}, PUBLISHER = {ELRA}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC 2014)}, PAGES = {3092--3099}, ADDRESS = {Reykjavik, Iceland}, }
Endnote
%0 Conference Proceedings %A Mukherjee, Subhabrata %A Jos, Sachindra %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Author-Specific Sentiment Aggregation for Polarity Prediction of Reviews : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-4FF7-1 %D 2014 %B Ninth International Conference on Language Resources and Evaluation %Z date of event: 2014-05-26 - 2014-05-31 %C Reykjavik, Iceland %B Proceedings of the Ninth International Conference on Language Resources and Evaluation %P 3092 - 3099 %I ELRA %@ 978-2-9517408-8-4 %U http://www.lrec-conf.org/proceedings/lrec2014/pdf/467_Paper.pdf
[133]
D. B. Nguyen, J. Hoffart, M. Theobald, and G. Weikum, “AIDA-light: High-throughput Named-entity Disambiguation,” in Linked Data on the Web (LDOW 2014), Seoul, Korea, 2014.
Export
BibTeX
@inproceedings{Nguyen:2014wl, TITLE = {{AIDA}--light: High-Throughput Named-entity Disambiguation}, AUTHOR = {Nguyen, Dat Ba and Hoffart, Johannes and Theobald, Martin and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {1613-0073}, URL = {http://ceur-ws.org/Vol-1184/ldow2014_paper_03.pdf}, PUBLISHER = {CEUR-WS.org}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Linked Data on the Web (LDOW 2014)}, DEBUG = {author: Berner-Lee, Tim}, EDITOR = {Bizer, Christian and Heath, Tom and Auer, S{\"o}ren}, PAGES = {1--10}, SERIES = {CEUR Workshop Proceedings}, VOLUME = {1184}, ADDRESS = {Seoul, Korea}, }
Endnote
%0 Conference Proceedings %A Nguyen, Dat Ba %A Hoffart, Johannes %A Theobald, Martin %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T AIDA-light: High-throughput Named-entity Disambiguation : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5162-2 %U http://ceur-ws.org/Vol-1184/ldow2014_paper_03.pdf %D 2014 %B Workshop on Linked Data on the Web 2014 %Z date of event: 2014-04-08 - 2014-04-08 %C Seoul, Korea %B Linked Data on the Web %E Bizer, Christian; Heath, Tom; Auer, Sören; Berner-Lee, Tim %P 1 - 10 %I CEUR-WS.org %B CEUR Workshop Proceedings %N 1184 %@ false %U http://ceur-ws.org/Vol-1184/ldow2014_paper_03.pdf
[134]
H.-V. Nguyen, E. Müller, J. Vreeken, and K. Böhm, “Multivariate Maximal Correlation Analysis,” in Proceedings of The 31st International Conference on Machine Learning (ICML 2014), Beijing, China, 2014.
Export
BibTeX
@inproceedings{nguyen:14:mac, TITLE = {Multivariate Maximal Correlation Analysis}, AUTHOR = {Nguyen, Hoang-Vu and M{\"u}ller, Emmanuel and Vreeken, Jilles and B{\"o}hm, Klemens}, LANGUAGE = {eng}, ISSN = {1938-7228}, URL = {http://jmlr.csail.mit.edu/proceedings/papers/v32/nguyenc14.pdf}, PUBLISHER = {JMLR}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of The 31st International Conference on Machine Learning (ICML 2014)}, EDITOR = {Xing, Eric P. and Jebara, Tony}, PAGES = {775--783}, SERIES = {JMLR Workshop and Conference Proceedings}, VOLUME = {32}, ADDRESS = {Beijing, China}, }
Endnote
%0 Conference Proceedings %A Nguyen, Hoang-Vu %A Müller, Emmanuel %A Vreeken, Jilles %A Böhm, Klemens %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Multivariate Maximal Correlation Analysis : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53A7-9 %U http://jmlr.csail.mit.edu/proceedings/papers/v32/nguyenc14.pdf %D 2014 %B 31st International Conference on Machine Learning %Z date of event: 2014-06-21 - 2014-06-26 %C Beijing, China %B Proceedings of The 31st International Conference on Machine Learning %E Xing, Eric P.; Jebara, Tony %P 775 - 783 %I JMLR %B JMLR Workshop and Conference Proceedings %N 32 %@ false %U http://jmlr.csail.mit.edu/proceedings/papers/v32/nguyenc14.pdf
[135]
H.-V. Nguyen, E. Müller, J. Vreeken, and K. Böhm, “Unsupervised Interaction-preserving Discretization of Multivariate Data,” Data Mining and Knowledge Discovery, vol. 28, no. 5–6, 2014.
Export
BibTeX
@article{nguyen:14:unsupervised, TITLE = {Unsupervised Interaction-preserving Discretization of Multivariate Data}, AUTHOR = {Nguyen, Hoang-Vu and M{\"u}ller, Emmanuel and Vreeken, Jilles and B{\"o}hm, Klemens}, LANGUAGE = {eng}, DOI = {10.1007/s10618-014-0350-5}, PUBLISHER = {Springer}, ADDRESS = {New York, NY}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, JOURNAL = {Data Mining and Knowledge Discovery}, VOLUME = {28}, NUMBER = {5-6}, PAGES = {1366--1397}, }
Endnote
%0 Journal Article %A Nguyen, Hoang-Vu %A Müller, Emmanuel %A Vreeken, Jilles %A Böhm, Klemens %+ Karlsruhe Institute of Technology Karlsruhe Institute of Technology Databases and Information Systems, MPI for Informatics, Max Planck Society Karlsruhe Institute of Technology %T Unsupervised Interaction-preserving Discretization of Multivariate Data : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-49A7-7 %R 10.1007/s10618-014-0350-5 %7 2014-04-04 %D 2014 %J Data Mining and Knowledge Discovery %V 28 %N 5-6 %& 1366 %P 1366 - 1397 %I Springer %C New York, NY
[136]
K. Panev and K. Berberich, “Phrase Queries with Inverted + Direct Indexes,” in Web Information Systems Engineering - WISE 2014, Thessaloniki, Greece, 2014, vol. 8786.
Export
BibTeX
@inproceedings{DBLP:conf/wise/PanevB14, TITLE = {Phrase Queries with Inverted + Direct Indexes}, AUTHOR = {Panev, Kiril and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-3-319-11748-5}, DOI = {10.1007/978-3-319-11749-2_13}, PUBLISHER = {Springer}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {Web Information Systems Engineering -- WISE 2014}, EDITOR = {Benatallah, Boualem and Bestavros, Azer and Manolopoulos, Yannis and Vakali, Athena and Zhang, Yanchun}, VOLUME = {8786}, PAGES = {156--169}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {8786}, ADDRESS = {Thessaloniki, Greece}, }
Endnote
%0 Conference Proceedings %A Panev, Kiril %A Berberich, Klaus %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Phrase Queries with Inverted + Direct Indexes : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53C6-1 %R 10.1007/978-3-319-11749-2_13 %D 2014 %B 15th International Conference on Web Information Systems Engineering %Z date of event: 2014-10-12 - 2014-10-14 %C Thessaloniki, Greece %B Web Information Systems Engineering - WISE 2014 %E Benatallah, Boualem; Bestavros, Azer; Manolopoulos, Yannis; Vakali, Athena; Zhang, Yanchun %V 8786 %P 156 - 169 %I Springer %@ 978-3-319-11748-5 %B Lecture Notes in Computer Science %N 8786 %U http://dx.doi.org/10.1007/978-3-319-11749-2_13
[137]
B. A. Prakash, J. Vreeken, and C. Faloutsos, “Efficiently Spotting the Starting Points of an Epidemic in a Large Graph,” Knowledge and Information Systems, vol. 38, no. 1, 2014.
Export
BibTeX
@article{prakash:14:culprits, TITLE = {Efficiently Spotting the Starting Points of an Epidemic in a Large Graph}, AUTHOR = {Prakash, B. Aditya and Vreeken, Jilles and Faloutsos, Christos}, LANGUAGE = {eng}, ISSN = {0219-1377}, DOI = {10.1007/s10115-013-0671-5}, PUBLISHER = {Springer}, ADDRESS = {London}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, JOURNAL = {Knowledge and Information Systems}, VOLUME = {38}, NUMBER = {1}, PAGES = {35--59}, }
Endnote
%0 Journal Article %A Prakash, B. Aditya %A Vreeken, Jilles %A Faloutsos, Christos %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Efficiently Spotting the Starting Points of an Epidemic in a Large Graph : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53B3-D %R 10.1007/s10115-013-0671-5 %7 2013-07-17 %D 2014 %J Knowledge and Information Systems %V 38 %N 1 %& 35 %P 35 - 59 %I Springer %C London %@ false
[138]
L. Qu, Y. Zhang, R. Wang, L. Jiang, R. Gemulla, and G. Weikum, “Senti-LSSVM: Sentiment-oriented Multi-relation Extraction with Latent structural SVM,” Transactions of the Association for Computational Linguistics (Proc. ACL 2014), vol. 2, 2014.
Export
BibTeX
@article{Gemullaacl2014, TITLE = {Senti-{LSSVM}: {S}entiment-oriented Multi-relation Extraction with Latent structural {SVM}}, AUTHOR = {Qu, Lizhen and Zhang, Yi and Wang, Rui and Jiang, Lili and Gemulla, Rainer and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {2307-387X}, PUBLISHER = {ACL}, ADDRESS = {Stroudsburg, PA}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, JOURNAL = {Transactions of the Association for Computational Linguistics (Proc. ACL)}, VOLUME = {2}, PAGES = {155--164}, BOOKTITLE = {The 52nd Annual Meeting of the Association for Computational Linguistics (ACL 2014)}, }
Endnote
%0 Journal Article %A Qu, Lizhen %A Zhang, Yi %A Wang, Rui %A Jiang, Lili %A Gemulla, Rainer %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Senti-LSSVM: Sentiment-oriented Multi-relation Extraction with Latent structural SVM : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-6AF0-6 %7 2014 %D 2014 %J Transactions of the Association for Computational Linguistics %O TACL %V 2 %& 155 %P 155 - 164 %I ACL %C Stroudsburg, PA %@ false %B The 52nd Annual Meeting of the Association for Computational Linguistics %O ACL 2014
[139]
L. Qu and B. Andres, “Estimating Maximally Probable Constrained Relations by Mathematical Programming,” 2014. [Online]. Available: http://arxiv.org/abs/1408.0838.
Abstract
Estimating a constrained relation is a fundamental problem in machine learning. Special cases are classification (the problem of estimating a map from a set of to-be-classified elements to a set of labels), clustering (the problem of estimating an equivalence relation on a set) and ranking (the problem of estimating a linear order on a set). We contribute a family of probability measures on the set of all relations between two finite, non-empty sets, which offers a joint abstraction of multi-label classification, correlation clustering and ranking by linear ordering. Estimating (learning) a maximally probable measure, given (a training set of) related and unrelated pairs, is a convex optimization problem. Estimating (inferring) a maximally probable relation, given a measure, is a 01-linear program. It is solved in linear time for maps. It is NP-hard for equivalence relations and linear orders. Practical solutions for all three cases are shown in experiments with real data. Finally, estimating a maximally probable measure and relation jointly is posed as a mixed-integer nonlinear program. This formulation suggests a mathematical programming approach to semi-supervised learning.
Export
BibTeX
@online{qu-2014, TITLE = {Estimating Maximally Probable Constrained Relations by Mathematical Programming}, AUTHOR = {Qu, Lizhen and Andres, Bj{\"o}rn}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1408.0838}, EPRINT = {1408.0838}, EPRINTTYPE = {arXiv}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Estimating a constrained relation is a fundamental problem in machine learning. Special cases are classification (the problem of estimating a map from a set of to-be-classified elements to a set of labels), clustering (the problem of estimating an equivalence relation on a set) and ranking (the problem of estimating a linear order on a set). We contribute a family of probability measures on the set of all relations between two finite, non-empty sets, which offers a joint abstraction of multi-label classification, correlation clustering and ranking by linear ordering. Estimating (learning) a maximally probable measure, given (a training set of) related and unrelated pairs, is a convex optimization problem. Estimating (inferring) a maximally probable relation, given a measure, is a 01-linear program. It is solved in linear time for maps. It is NP-hard for equivalence relations and linear orders. Practical solutions for all three cases are shown in experiments with real data. Finally, estimating a maximally probable measure and relation jointly is posed as a mixed-integer nonlinear program. This formulation suggests a mathematical programming approach to semi-supervised learning.}, }
Endnote
%0 Report %A Qu, Lizhen %A Andres, Björn %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society %T Estimating Maximally Probable Constrained Relations by Mathematical Programming : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-D324-6 %U http://arxiv.org/abs/1408.0838 %D 2014 %8 04.08.2014 %X Estimating a constrained relation is a fundamental problem in machine learning. Special cases are classification (the problem of estimating a map from a set of to-be-classified elements to a set of labels), clustering (the problem of estimating an equivalence relation on a set) and ranking (the problem of estimating a linear order on a set). We contribute a family of probability measures on the set of all relations between two finite, non-empty sets, which offers a joint abstraction of multi-label classification, correlation clustering and ranking by linear ordering. Estimating (learning) a maximally probable measure, given (a training set of) related and unrelated pairs, is a convex optimization problem. Estimating (inferring) a maximally probable relation, given a measure, is a 01-linear program. It is solved in linear time for maps. It is NP-hard for equivalence relations and linear orders. Practical solutions for all three cases are shown in experiments with real data. Finally, estimating a maximally probable measure and relation jointly is posed as a mixed-integer nonlinear program. This formulation suggests a mathematical programming approach to semi-supervised learning. %K Computer Science, Learning, cs.LG,Computer Science, Numerical Analysis, cs.NA,Mathematics, Optimization and Control, math.OC,Statistics, Machine Learning, stat.ML
[140]
P. Roy, J. Teubner, and R. Gemulla, “Low-latency Handshake Join,” Proceedings of the VLDB Endowment (Proc. VLDB 2014), vol. 7, no. 9, 2014.
Export
BibTeX
@article{GemullaVLDB2014, TITLE = {Low-latency Handshake Join}, AUTHOR = {Roy, Pratanu and Teubner, Jens and Gemulla, Rainer}, LANGUAGE = {eng}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, JOURNAL = {Proceedings of the VLDB Endowment (Proc. VLDB)}, VOLUME = {7}, NUMBER = {9}, PAGES = {709--720}, BOOKTITLE = {Proceedings of the 40th International Conference on Very Large Data Bases (VLDB 2014)}, EDITOR = {Jagadish, H. V. and Zhou, Aoying}, }
Endnote
%0 Journal Article %A Roy, Pratanu %A Teubner, Jens %A Gemulla, Rainer %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Low-latency Handshake Join : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-6AFF-8 %7 2014 %D 2014 %J Proceedings of the VLDB Endowment %O PVLDB %V 7 %N 9 %& 709 %P 709 - 720 %I ACM %C New York, NY %B Proceedings of the 40th International Conference on Very Large Data Bases %O VLDB 2014 Hangzhou, China, September 1st - 5th
[141]
F. M. Suchanek and G. Weikum, “Knowledge Bases in the Age of Big Data Analytics,” Proceedings of the VLDB Endowment (Proc. VLDB 2014), vol. 7, no. 13, 2014.
Export
BibTeX
@article{DBLP:journals/pvldb/SuchanekW14, TITLE = {Knowledge Bases in the Age of Big Data Analytics}, AUTHOR = {Suchanek, Fabian M. and Weikum, Gerhard}, LANGUAGE = {eng}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, JOURNAL = {Proceedings of the VLDB Endowment (Proc. VLDB)}, VOLUME = {7}, NUMBER = {13}, PAGES = {1713--1714}, BOOKTITLE = {Proceedings of the 40th International Conference on Very Large Data Bases (VLDB 2014)}, EDITOR = {Jagadish, H. V. and Zhou, Aoying}, }
Endnote
%0 Journal Article %A Suchanek, Fabian M. %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Knowledge Bases in the Age of Big Data Analytics : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-6B2A-F %7 2014 %D 2014 %J Proceedings of the VLDB Endowment %O PVLDB %V 7 %N 13 %& 1713 %P 1713 - 1714 %I ACM %C New York, NY %B Proceedings of the 40th International Conference on Very Large Data Bases %O VLDB 2014 Hangzhou, China, September 1st - 5th %U http://www.vldb.org/pvldb/vol7/p1713-suchanek.pdf
[142]
N. Tandon, G. de Melo, F. M. Suchanek, and G. Weikum, “WebChild: Harvesting and Organizing Commonsense Knowledge from the Web,” in WSDM’14, 7th ACM International Conference on Web Search and Data Mining, New York, NY, USA, 2014.
Export
BibTeX
@inproceedings{Tandon2013, TITLE = {{WebChild}: Harvesting and Organizing Commonsense Knowledge from the Web}, AUTHOR = {Tandon, Niket and de Melo, Gerard and Suchanek, Fabian M. and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-2351-2}, DOI = {10.1145/2556195.2556245}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {WSDM'14, 7th ACM International Conference on Web Search and Data Mining}, PAGES = {523--532}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %A Tandon, Niket %A de Melo, Gerard %A Suchanek, Fabian M. %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T WebChild: Harvesting and Organizing Commonsense Knowledge from the Web : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0019-84C4-7 %R 10.1145/2556195.2556245 %D 2014 %B 7th ACM International Conference on Web Search and Data Mining %Z date of event: 2014-04-24 - 2014-04-28 %C New York, NY, USA %B WSDM'14 %P 523 - 532 %I ACM %@ 978-1-4503-2351-2
[143]
N. Tandon, G. de Melo, and G. Weikum, “Acquiring Comparative Commonsense Knowledge from the Web,” in Proceedings of the Twenty-Eighth AAAI Conference on Artificial Intelligence and the Twenty-Sixth Innovative Applications of Artificial Intelligence Conference, Québec City, Québec, Canada, 2014.
Export
BibTeX
@inproceedings{DBLP:conf/aaai/TandonMW14, TITLE = {Acquiring Comparative Commonsense Knowledge from the Web}, AUTHOR = {Tandon, Niket and de Melo, Gerard and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-57735-661-5}, PUBLISHER = {AAAI Press}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {Proceedings of the Twenty-Eighth AAAI Conference on Artificial Intelligence and the Twenty-Sixth Innovative Applications of Artificial Intelligence Conference}, EDITOR = {Brodley, Carla E. and Stone, Peter}, PAGES = {166--172}, ADDRESS = {Qu{\'e}bec City, Qu{\'e}bec, Canada}, }
Endnote
%0 Conference Proceedings %A Tandon, Niket %A de Melo, Gerard %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Acquiring Comparative Commonsense Knowledge from the Web : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-49A0-6 %D 2014 %B Twenty-Eighth AAAI Conference on Artificial Intelligence %Z date of event: 2014-07-27 - 2014-07-31 %C Québec City, Québec, Canada %B Proceedings of the Twenty-Eighth AAAI Conference on Artificial Intelligence and the Twenty-Sixth Innovative Applications of Artificial Intelligence Conference %E Brodley, Carla E.; Stone, Peter %P 166 - 172 %I AAAI Press %@ 978-1-57735-661-5 %U http://www.aaai.org/ocs/index.php/AAAI/AAAI14/paper/view/8649
[144]
T. Tylenda, S. K. Kondreddi, and G. Weikum, “Spotting Knowledge Base Facts in Web Texts,” in AKBC 2014, 4th Workshop on Automated Knowledge Base Construction, Montreal, Canada, 2014.
Export
BibTeX
@inproceedings{TylendaKW2014, TITLE = {Spotting Knowledge Base Facts in Web Texts}, AUTHOR = {Tylenda, Tomasz and Kondreddi, Sarath Kumar and Weikum, Gerhard}, LANGUAGE = {eng}, PUBLISHER = {AKBC Board}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {AKBC 2014, 4th Workshop on Automated Knowledge Base Construction}, ADDRESS = {Montreal, Canada}, }
Endnote
%0 Conference Proceedings %A Tylenda, Tomasz %A Kondreddi, Sarath Kumar %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Spotting Knowledge Base Facts in Web Texts : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-689C-7 %D 2014 %B 4th Workshop on Automated Knowledge Base Construction %Z date of event: 2014-12-13 - 2014-12-13 %C Montreal, Canada %B AKBC 2014 %I AKBC Board %U http://www.akbc.ws/2014/submissions/akbc2014_submission_8.pdf
[145]
T. Tylenda, Y. Wang, and G. Weikum, “Spotting Facts in the Wild,” in Workshop on Automatic Creation and Curation of Knowledge Bases at SIGMOD (WACCK 2014), Snowbird, UT, USA. (Accepted/in press)
Export
BibTeX
@inproceedings{TylendaWW2014, TITLE = {Spotting Facts in the Wild}, AUTHOR = {Tylenda, Tomasz and Wang, Yafang and Weikum, Gerhard}, LANGUAGE = {eng}, YEAR = {2014}, PUBLREMARK = {Accepted}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Workshop on Automatic Creation and Curation of Knowledge Bases at SIGMOD (WACCK 2014)}, ADDRESS = {Snowbird, UT, USA}, }
Endnote
%0 Conference Proceedings %A Tylenda, Tomasz %A Wang, Yafang %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Spotting Facts in the Wild : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-68A8-B %D 2014 %B Workshop on Automatic Creation and Curation of Knowledge Bases %Z date of event: 2014-06-27 - 2014-06-27 %C Snowbird, UT, USA %B Workshop on Automatic Creation and Curation of Knowledge Bases at SIGMOD
[146]
M. van Leeuwen and J. Vreeken, “Mining and Using Sets of Patterns through Compression,” in Frequent Pattern Mining, New York, NY: Springer, 2014.
Export
BibTeX
@incollection{leeuwen:14:compression, TITLE = {Mining and Using Sets of Patterns through Compression}, AUTHOR = {van Leeuwen, Matthijs and Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-3-319-07820-5}, DOI = {10.1007/978-3-319-07821-2_8}, PUBLISHER = {Springer}, ADDRESS = {New York, NY}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {Frequent Pattern Mining}, EDITOR = {Aggarwal, Charu C. and Han, Jiawei}, PAGES = {165--198}, }
Endnote
%0 Book Section %A van Leeuwen, Matthijs %A Vreeken, Jilles %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Mining and Using Sets of Patterns through Compression : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53BB-E %R 10.1007/978-3-319-07821-2_8 %D 2014 %B Frequent Pattern Mining %E Aggarwal, Charu C.; Han, Jiawei %P 165 - 198 %I Springer %C New York, NY %@ 978-3-319-07820-5
[147]
J. Vreeken and N. Tatti, “Interesting Patterns,” in Frequent Pattern Mining, New York, NY: Springer, 2014.
Export
BibTeX
@incollection{vreeken:14:interesting, TITLE = {Interesting Patterns}, AUTHOR = {Vreeken, Jilles and Tatti, Nikolaj}, LANGUAGE = {eng}, ISBN = {978-3-319-07820-5}, DOI = {10.1007/978-3-319-07821-2_5}, PUBLISHER = {Springer}, ADDRESS = {New York, NY}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {Frequent Pattern Mining}, EDITOR = {Aggarwal, Charu C. and Han, Jiawei}, PAGES = {105--134}, }
Endnote
%0 Book Section %A Vreeken, Jilles %A Tatti, Nikolaj %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Interesting Patterns : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53B9-1 %R 10.1007/978-3-319-07821-2_5 %D 2014 %K Pattern mining; Interestingness measures; Statistics; Ranking; Pattern set mining %B Frequent Pattern Mining %E Aggarwal, Charu C.; Han, Jiawei %P 105 - 134 %I Springer %C New York, NY %@ 978-3-319-07820-5
[148]
G. I. Webb and J. Vreeken, “Efficient Discovery of the Most Interesting Associations,” ACM Transactions on Knowledge Discovery from Data, vol. 8, no. 3, 2014.
Export
BibTeX
@article{webb:14:selfsufs, TITLE = {Efficient Discovery of the Most Interesting Associations}, AUTHOR = {Webb, Geoffrey I. and Vreeken, Jilles}, LANGUAGE = {eng}, DOI = {10.1145/2601433}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, JOURNAL = {ACM Transactions on Knowledge Discovery from Data}, VOLUME = {8}, NUMBER = {3}, PAGES = {1--31}, EID = {15}, }
Endnote
%0 Journal Article %A Webb, Geoffrey I. %A Vreeken, Jilles %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Efficient Discovery of the Most Interesting Associations : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53B1-2 %R 10.1145/2601433 %7 2014 %D 2014 %J ACM Transactions on Knowledge Discovery from Data %O TKDD %V 8 %N 3 %& 1 %P 1 - 31 %Z sequence number: 15 %I ACM
[149]
G. Weikum, “Big Text: von Sprache zu Wissen,” in Informatik 2014: Big Data - Komplexität meistern, Stuttgart, Deutschland, 2014.
Export
BibTeX
@inproceedings{DBLP:conf/gi/Weikum14, TITLE = {{Big {Text}: von {Sprache} zu {Wissen}}}, AUTHOR = {Weikum, Gerhard}, LANGUAGE = {deu}, ISBN = {978-388579626-8}, PUBLISHER = {GI}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {Informatik 2014: Big Data -- Komplexit{\"a}t meistern}, EDITOR = {Pl{\"o}dereder, Erhard and Grunske, Lars and Schneider, Eric and Ull, Dominik}, PAGES = {55}, SERIES = {Lecture Notes in Informatics}, VOLUME = {P-232}, ADDRESS = {Stuttgart, Deutschland}, }
Endnote
%0 Conference Proceedings %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Big Text: von Sprache zu Wissen : %G deu %U http://hdl.handle.net/11858/00-001M-0000-0024-54D4-A %D 2014 %B 44. Jahrestagung der Gesellschaft für Informatik %Z date of event: 2014-09-22 - 2014-09-26 %C Stuttgart, Deutschland %B Informatik 2014: Big Data - Komplexität meistern %E Plödereder, Erhard; Grunske, Lars; Schneider, Eric; Ull, Dominik %P 55 %I GI %@ 978-388579626-8 %B Lecture Notes in Informatics %N P-232
[150]
H. Wu, J. Vreeken, N. Tatti, and N. Ramakrishnan, “Uncovering the Plot: Detecting Surprising Coalitions of Entities in Multi-relational Schemas,” Data Mining and Knowledge Discovery, vol. 28, no. 5–6, 2014.
Export
BibTeX
@article{wu:14:plots, TITLE = {Uncovering the Plot: {Detecting} Surprising Coalitions of Entities in Multi-relational Schemas}, AUTHOR = {Wu, Hao and Vreeken, Jilles and Tatti, Nikolaj and Ramakrishnan, Naren}, LANGUAGE = {eng}, DOI = {10.1007/s10618-014-0370-1}, PUBLISHER = {Springer}, ADDRESS = {London}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, JOURNAL = {Data Mining and Knowledge Discovery}, VOLUME = {28}, NUMBER = {5-6}, PAGES = {1398--1428}, }
Endnote
%0 Journal Article %A Wu, Hao %A Vreeken, Jilles %A Tatti, Nikolaj %A Ramakrishnan, Naren %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Uncovering the Plot: Detecting Surprising Coalitions of Entities in Multi-relational Schemas : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53B7-5 %R 10.1007/s10618-014-0370-1 %7 2014-07-22 %D 2014 %J Data Mining and Knowledge Discovery %V 28 %N 5-6 %& 1398 %P 1398 - 1428 %I Springer %C London
[151]
M. Yahya, S. E. Whang, R. Gupta, and A. Halevy, “ReNoun: Fact Extraction for Nominal Attributes,” in The 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP 2014), Doha, Qatar, 2014.
Abstract
Search engines are increasingly relying on large knowledge bases of facts to provide direct answers to users' queries. However, the construction of these knowledge bases is largely manual and does not scale to the long and heavy tail of facts. Open information extraction tries to address this challenge, but typically assumes that facts are expressed with verb phrases, and therefore has had difficulty extracting facts for noun‐based relations. We describe ReNoun, an open information extraction system that complements previous efforts by focusing on nominal attributes and on the long tail. ReNoun's approach is based on leveraging a large ontology of noun attributes mined from a text corpus and from user queries. ReNoun creates a seed set of training data by using specialized patterns and requiring that the facts mention an attribute in the ontology. ReNoun then generalizes from this seed set to produce a much larger set of extractions that are then scored. We describe experiments that show that we extract facts with high precision and for attributes that cannot be extracted with verb‐based techniques.
Export
BibTeX
@inproceedings{YahyaSRA14, TITLE = {{ReNoun}: Fact Extraction for Nominal Attributes}, AUTHOR = {Yahya, Mohamed and Whang, Steven Euijong and Gupta, Rahul and Halevy, Alon}, ISBN = {978-1-937284-96-1}, PUBLISHER = {ACL}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014-10}, ABSTRACT = {Search engines are increasingly relying on large knowledge bases of facts to provide direct answers to users' queries. However, the construction of these knowledge bases is largely manual and does not scale to the long and heavy tail of facts. Open information extraction tries to address this challenge, but typically assumes that facts are expressed with verb phrases, and therefore has had difficulty extracting facts for noun-based relations. We describe ReNoun, an open information extraction system that complements previous efforts by focusing on nominal attributes and on the long tail. ReNoun's approach is based on leveraging a large ontology of noun attributes mined from a text corpus and from user queries. ReNoun creates a seed set of training data by using specialized patterns and requiring that the facts mention an attribute in the ontology. ReNoun then generalizes from this seed set to produce a much larger set of extractions that are then scored. We describe experiments that show that we extract facts with high precision and for attributes that cannot be extracted with verb-based techniques.}, BOOKTITLE = {The 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP 2014)}, PAGES = {325--335}, ADDRESS = {Doha, Qatar}, }
Endnote
%0 Conference Proceedings %A Yahya, Mohamed %A Whang, Steven Euijong %A Gupta, Rahul %A Halevy, Alon %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T ReNoun: Fact Extraction for Nominal Attributes : %U http://hdl.handle.net/11858/00-001M-0000-0024-2589-7 %D 2014 %B 2014 Conference on Empirical Methods in Natural Language Processing %Z date of event: 2014-10-25 - 2014-10-29 %C Doha, Qatar %X Search engines are increasingly relying on large knowledge bases of facts to provide direct answers to users' queries. However, the construction of these knowledge bases is largely manual and does not scale to the long and heavy tail of facts. Open information extraction tries to address this challenge, but typically assumes that facts are expressed with verb phrases, and therefore has had difficulty extracting facts for noun‐based relations. We describe ReNoun, an open information extraction system that complements previous efforts by focusing on nominal attributes and on the long tail. ReNoun's approach is based on leveraging a large ontology of noun attributes mined from a text corpus and from user queries. ReNoun creates a seed set of training data by using specialized patterns and requiring that the facts mention an attribute in the ontology. ReNoun then generalizes from this seed set to produce a much larger set of extractions that are then scored. We describe experiments that show that we extract facts with high precision and for attributes that cannot be extracted with verb‐based techniques. %B The 2014 Conference on Empirical Methods in Natural Language Processing %P 325 - 335 %I ACL %@ 978-1-937284-96-1 %U http://emnlp2014.org/papers/pdf/EMNLP2014038.pdf
[152]
M. A. Yosef, M. Spaniol, and G. Weikum, “AIDArabic: A Named-entity Disambiguation Framework for Arabic Text,” in The EMNLP 2014 Workshop on Arabic Natural Language Processing (ANLP 2014), Dohar, Qatar, 2014.
Export
BibTeX
@inproceedings{mamir:2014:aidarabic, TITLE = {{AIDArabic}: A Named-entity Disambiguation Framework for {Arabic} Text}, AUTHOR = {Yosef, Mohamed Amir and Spaniol, Marc and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-937284-96-1}, PUBLISHER = {ACL}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {The EMNLP 2014 Workshop on Arabic Natural Language Processing (ANLP 2014)}, PAGES = {187--195}, EID = {W14-3626}, ADDRESS = {Dohar, Qatar}, }
Endnote
%0 Conference Proceedings %A Yosef, Mohamed Amir %A Spaniol, Marc %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T AIDArabic: A Named-entity Disambiguation Framework for Arabic Text : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-548F-A %D 2014 %B The EMNLP 2014 Workshop on Arabic Natural Language Processing %Z date of event: 2014-10-25 - 2014-10-25 %C Dohar, Qatar %B The EMNLP 2014 Workshop on Arabic Natural Language Processing %P 187 - 195 %Z sequence number: W14-3626 %I ACL %@ 978-1-937284-96-1 %U http://www.aclweb.org/anthology/W14-3626
[153]
M. A. Yosef, J. Hoffart, Y. Ibrahim, A. Boldyrev, and G. Weikum, “Adapting AIDA for Tweets,” in Proceedings of the 4th Workshop on Making Sense of Microposts, Seoul, Korea, 2014.
Export
BibTeX
@inproceedings{mamir:2014:aida-for-tweets, TITLE = {Adapting {AIDA} for Tweets}, AUTHOR = {Yosef, Mohamed Amir and Hoffart, Johannes and Ibrahim, Yusra and Boldyrev, Artem and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {1613-0073}, URL = {urn:nbn:de:0074-1141-0}, PUBLISHER = {CEUR-WS.org}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the 4th Workshop on Making Sense of Microposts}, EDITOR = {Rowe, Matthew and Stankovic, Milan and Dadzie, Aba-Sah}, PAGES = {68--69}, SERIES = {CEUR Workshop Proceedings}, VOLUME = {1141}, ADDRESS = {Seoul, Korea}, }
Endnote
%0 Conference Proceedings %A Yosef, Mohamed Amir %A Hoffart, Johannes %A Ibrahim, Yusra %A Boldyrev, Artem %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Adapting AIDA for Tweets : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-54AB-8 %D 2014 %B 4th Workshop on Making Sense of Microposts %Z date of event: 2014-04-07 - 2014-04-07 %C Seoul, Korea %B Proceedings of the 4th Workshop on Making Sense of Microposts %E Rowe, Matthew; Stankovic, Milan; Dadzie, Aba-Sah %P 68 - 69 %I CEUR-WS.org %B CEUR Workshop Proceedings %N 1141 %@ false %U http://ceur-ws.org/Vol-1141/paper_15.pdf
[154]
A. Zimek, I. Assent, and J. Vreeken, “Frequent Pattern Mining Algorithms for Data Clustering,” in Frequent Pattern Mining, New York, NY: Springer, 2014.
Export
BibTeX
@incollection{zimek:14:clustering, TITLE = {Frequent Pattern Mining Algorithms for Data Clustering}, AUTHOR = {Zimek, Arthur and Assent, Ira and Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-3-319-07820-5}, DOI = {10.1007/978-3-319-07821-2_16}, PUBLISHER = {Springer}, ADDRESS = {New York, NY}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {Frequent Pattern Mining}, EDITOR = {Aggarwal, Charu C. and Han, Jiawei}, PAGES = {403--423}, }
Endnote
%0 Book Section %A Zimek, Arthur %A Assent, Ira %A Vreeken, Jilles %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Frequent Pattern Mining Algorithms for Data Clustering : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53BD-A %R 10.1007/978-3-319-07821-2_16 %D 2014 %B Frequent Pattern Mining %E Aggarwal, Charu C.; Han, Jiawei %P 403 - 423 %I Springer %C New York, NY %@ 978-3-319-07820-5
[155]
T. Zinchenko, “Redescription Mining Over non-Binary Data Sets Using Decision Trees,” Universität des Saarlandes, Saarbrücken, 2014.
Export
BibTeX
@mastersthesis{ZinchenkoMaster2014, TITLE = {Redescription Mining Over non-Binary Data Sets Using Decision Trees}, AUTHOR = {Zinchenko, Tetiana}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, }
Endnote
%0 Thesis %A Zinchenko, Tetiana %Y Miettinen, Pauli %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Redescription Mining Over non-Binary Data Sets Using Decision Trees : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-B73A-5 %I Universität des Saarlandes %C Saarbrücken %D 2014 %P X, 118 p. %V master %9 master
2013
[156]
E. Aksehirli, B. Goethals, E. Müller, and J. Vreeken, “Cartification: A Neighborhood Preserving Transformation for Mining High Dimensional Data,” in IEEE 13th International Conference on Data Mining (ICDM 2013), Dallas, TX, USA, 2013.
Export
BibTeX
@inproceedings{Aksehirli2013a, TITLE = {Cartification: A Neighborhood Preserving Transformation for Mining High Dimensional Data}, AUTHOR = {Aksehirli, Emin and Goethals, Bart and M{\"u}ller, Emmanuel and Vreeken, Jilles}, LANGUAGE = {eng}, DOI = {10.1109/ICDM.2013.146}, LOCALID = {Local-ID: 9972B38173345D64C1257C600054DB8E-Aksehirli2013a}, PUBLISHER = {IEEE}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {IEEE 13th International Conference on Data Mining (ICDM 2013)}, EDITOR = {Karypis, George and Xiong, Hui}, PAGES = {937--942}, ADDRESS = {Dallas, TX, USA}, }
Endnote
%0 Conference Proceedings %A Aksehirli, Emin %A Goethals, Bart %A Müller, Emmanuel %A Vreeken, Jilles %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Cartification: A Neighborhood Preserving Transformation for Mining High Dimensional Data : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-19EA-5 %R 10.1109/ICDM.2013.146 %F OTHER: Local-ID: 9972B38173345D64C1257C600054DB8E-Aksehirli2013a %D 2013 %B 13th International Conference on Data Mining %Z date of event: 2013-12-07 - 2013-12-10 %C Dallas, TX, USA %B IEEE 13th International Conference on Data Mining %E Karypis, George; Xiong, Hui %P 937 - 942 %I IEEE
[157]
F. Alvanaki, E. Ilieva, S. Michel, and A. Stupar, “Interesting Event Detection through Hall of Fame Rankings,” in Proceedings of the ACM SIGMOD Workshop on Databases and Social Networks (DBSocial 2013), New York, NY, USA, 2013.
Abstract
Everything is relative. Cars are compared by gas per mile, websites by page rank, students based on GPA, scientists by number of publications, and celebrities by beauty or wealth. In this paper, we study the characteristics of such entity rankings based on a set of rankings obtained from a popular Web portal. The obtained insights are integrated in our approach, coined Pantheon. Pantheon maintains sets of top-k rankings and reports identified changes in a way that appeals to users, using a novel combination of different characteristics like competitiveness, information entropy, and scale of change. Entity rankings are assembled by combining entity type attributes with data-driven categorical constraints and sorting criteria on numeric attributes. We report on the results of an experimental evaluation using real-world data obtained from a basketball statistics website.
Export
BibTeX
@inproceedings{Avlanaki2013b, TITLE = {Interesting Event Detection through Hall of Fame Rankings}, AUTHOR = {Alvanaki, Foteini and Ilieva, Evica and Michel, Sebastian and Stupar, Aleksandar}, LANGUAGE = {eng}, ISBN = {978-1-4503-2191-4}, DOI = {10.1145/2484702.2484704}, LOCALID = {Local-ID: BCF76B7E62BA3435C1257B9700501576-Avlanaki2013b}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {Everything is relative. Cars are compared by gas per mile, websites by page rank, students based on GPA, scientists by number of publications, and celebrities by beauty or wealth. In this paper, we study the characteristics of such entity rankings based on a set of rankings obtained from a popular Web portal. The obtained insights are integrated in our approach, coined Pantheon. Pantheon maintains sets of top-k rankings and reports identified changes in a way that appeals to users, using a novel combination of different characteristics like competitiveness, information entropy, and scale of change. Entity rankings are assembled by combining entity type attributes with data-driven categorical constraints and sorting criteria on numeric attributes. We report on the results of an experimental evaluation using real-world data obtained from a basketball statistics website.}, BOOKTITLE = {Proceedings of the ACM SIGMOD Workshop on Databases and Social Networks (DBSocial 2013)}, EDITOR = {LeFevre, Kristen and Machanavajjhala, Ashwin and Silberstein, Adam}, PAGES = {7--12}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %A Alvanaki, Foteini %A Ilieva, Evica %A Michel, Sebastian %A Stupar, Aleksandar %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Interesting Event Detection through Hall of Fame Rankings : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-3A8A-C %R 10.1145/2484702.2484704 %F OTHER: Local-ID: BCF76B7E62BA3435C1257B9700501576-Avlanaki2013b %D 2013 %B ACM SIGMOD Workshop on Databases and Social Networks %Z date of event: 2013-06-22 - 2013-06-27 %C New York, NY, USA %X Everything is relative. Cars are compared by gas per mile, websites by page rank, students based on GPA, scientists by number of publications, and celebrities by beauty or wealth. In this paper, we study the characteristics of such entity rankings based on a set of rankings obtained from a popular Web portal. The obtained insights are integrated in our approach, coined Pantheon. Pantheon maintains sets of top-k rankings and reports identified changes in a way that appeals to users, using a novel combination of different characteristics like competitiveness, information entropy, and scale of change. Entity rankings are assembled by combining entity type attributes with data-driven categorical constraints and sorting criteria on numeric attributes. We report on the results of an experimental evaluation using real-world data obtained from a basketball statistics website. %B Proceedings of the ACM SIGMOD Workshop on Databases and Social Networks %E LeFevre, Kristen; Machanavajjhala, Ashwin; Silberstein, Adam %P 7 - 12 %I ACM %@ 978-1-4503-2191-4
[158]
F. Alvanaki and S. Michel, “Scalable, Continuous Tracking of Tag Co-occurrences Between Short Sets Using (Almost) Disjoint Tag Partitions,” in Proceedings of the ACM SIGMOD Workshop on Databases and Social Networks (DBSocial 2013), New York, NY, USA, 2013.
Abstract
In this work we consider the continuous computation of set correlations over a stream of set-valued attributes, such as Tweets and their hashtags, social annotations of blog posts obtained through RSS, or updates to set-valued attributes of databases. In order to compute tag correlations in a distributed fashion, all necessary information has to be present at the computing node(s). Our approach makes use of a partitioning scheme based on set covers for efficient and replication-lean information flow. We report on the results of a preliminary performance evaluation using Tweets obtained through Twitter's streaming API.
Export
BibTeX
@inproceedings{Avlanaki2013a, TITLE = {Scalable, Continuous Tracking of Tag Co-occurrences Between Short Sets Using (Almost) Disjoint Tag Partitions}, AUTHOR = {Alvanaki, Foteini and Michel, Sebastian}, LANGUAGE = {eng}, ISBN = {978-1-4503-2191-4}, DOI = {10.1145/2484702.2484705}, LOCALID = {Local-ID: 305767E5408759CFC1257B97004FACE2-Avlanaki2013a}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {In this work we consider the continuous computation of set correlations over a stream of set-valued attributes, such as Tweets and their hashtags, social annotations of blog posts obtained through RSS, or updates to set-valued attributes of databases. In order to compute tag correlations in a distributed fashion, all necessary information has to be present at the computing node(s). Our approach makes use of a partitioning scheme based on set covers for efficient and replication-lean information flow. We report on the results of a preliminary performance evaluation using Tweets obtained through Twitter's streaming API.}, BOOKTITLE = {Proceedings of the ACM SIGMOD Workshop on Databases and Social Networks (DBSocial 2013)}, EDITOR = {LeFevre, Kristen and Machanavajjhala, Ashwin and Silberstein, Adam}, PAGES = {49--54}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %A Alvanaki, Foteini %A Michel, Sebastian %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Scalable, Continuous Tracking of Tag Co-occurrences Between Short Sets Using (Almost) Disjoint Tag Partitions : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-3A81-D %R 10.1145/2484702.2484705 %F OTHER: Local-ID: 305767E5408759CFC1257B97004FACE2-Avlanaki2013a %D 2013 %B ACM SIGMOD Workshop on Databases and Social Networks %Z date of event: 2013-06-13 - 2013-06-13 %C New York, NY, USA %X In this work we consider the continuous computation of set correlations over a stream of set-valued attributes, such as Tweets and their hashtags, social annotations of blog posts obtained through RSS, or updates to set-valued attributes of databases. In order to compute tag correlations in a distributed fashion, all necessary information has to be present at the computing node(s). Our approach makes use of a partitioning scheme based on set covers for efficient and replication-lean information flow. We report on the results of a preliminary performance evaluation using Tweets obtained through Twitter's streaming API. %K Distributed Stream Processing, Tags, Twitter, Correlation, Continuous %B Proceedings of the ACM SIGMOD Workshop on Databases and Social Networks %E LeFevre, Kristen; Machanavajjhala, Ashwin; Silberstein, Adam %P 49 - 54 %I ACM %@ 978-1-4503-2191-4
[159]
F. Alvanaki and S. Michel, “A Thin Monitoring Layer for Top-k Aggregation Queries over a Database,” in 7th International Workshop on Ranking in Databases (DBRank 2013), Riva del Garda, Italy, 2013.
Export
BibTeX
@inproceedings{AlvanakiMichel2013c, TITLE = {A Thin Monitoring Layer for Top-k Aggregation Queries over a Database}, AUTHOR = {Alvanaki, Foteini and Michel, Sebastian}, LANGUAGE = {eng}, ISBN = {978-1-4503-2497-7}, DOI = {10.1145/2524828.2524831}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {7th International Workshop on Ranking in Databases (DBRank 2013)}, PAGES = {1--6}, EID = {3}, ADDRESS = {Riva del Garda, Italy}, }
Endnote
%0 Conference Proceedings %A Alvanaki, Foteini %A Michel, Sebastian %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T A Thin Monitoring Layer for Top-k Aggregation Queries over a Database : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-CCDD-E %R 10.1145/2524828.2524831 %D 2013 %B 7th International Workshop on Ranking in Databases %Z date of event: 2013-08-30 - 2013-08-30 %C Riva del Garda, Italy %B 7th International Workshop on Ranking in Databases %P 1 - 6 %Z sequence number: 3 %I ACM %@ 978-1-4503-2497-7
[160]
A. Anand, “Indexing Methods for Web Archives,” Universität des Saarlandes, Saarbrücken, 2013.
Abstract
There have been numerous efforts recently to digitize previously published content and preserving born-digital content leading to the widespread growth of large text repositories. Web archives are such continuously growing text collections which contain versions of documents spanning over long time periods. Web archives present many opportunities for historical, cultural and political analyses. Consequently there is a growing need for tools which can efficiently access and search them. In this work, we are interested in indexing methods for supporting text-search workloads over web archives like time-travel queries and phrase queries. To this end we make the following contributions: Time-travel queries are keyword queries with a temporal predicate, e.g., mpii saarland @ [06/2009], which return versions of documents in the past. We introduce a novel index organization strategy, called index sharding, for efficiently supporting time-travel queries without incurring additional index-size blowup. We also propose index-maintenance approaches which scale to such continuously growing collections. We develop query-optimization techniques for time-travel queries called partition selection which maximizes recall at any given query-execution stage. We propose indexing methods to support phrase queries, e.g., to be or not to be that is the question. We index multi-word sequences and devise novel queryoptimization methods over the indexed sequences to efficiently answer phrase queries. We demonstrate the superior performance of our approaches over existing methods by extensive experimentation on real-world web archives.
Export
BibTeX
@phdthesis{Anand2013, TITLE = {Indexing Methods for Web Archives}, AUTHOR = {Anand, Avishek}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {There have been numerous efforts recently to digitize previously published content and preserving born-digital content leading to the widespread growth of large text repositories. Web archives are such continuously growing text collections which contain versions of documents spanning over long time periods. Web archives present many opportunities for historical, cultural and political analyses. Consequently there is a growing need for tools which can efficiently access and search them. In this work, we are interested in indexing methods for supporting text-search workloads over web archives like time-travel queries and phrase queries. To this end we make the following contributions: Time-travel queries are keyword queries with a temporal predicate, e.g., mpii saarland @ [06/2009], which return versions of documents in the past. We introduce a novel index organization strategy, called index sharding, for efficiently supporting time-travel queries without incurring additional index-size blowup. We also propose index-maintenance approaches which scale to such continuously growing collections. We develop query-optimization techniques for time-travel queries called partition selection which maximizes recall at any given query-execution stage. We propose indexing methods to support phrase queries, e.g., to be or not to be that is the question. We index multi-word sequences and devise novel queryoptimization methods over the indexed sequences to efficiently answer phrase queries. We demonstrate the superior performance of our approaches over existing methods by extensive experimentation on real-world web archives.}, }
Endnote
%0 Thesis %A Anand, Avishek %Y Berberich, Klaus %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Indexing Methods for Web Archives : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0026-CB4B-0 %I Universität des Saarlandes %C Saarbrücken %D 2013 %V phd %9 phd %X There have been numerous efforts recently to digitize previously published content and preserving born-digital content leading to the widespread growth of large text repositories. Web archives are such continuously growing text collections which contain versions of documents spanning over long time periods. Web archives present many opportunities for historical, cultural and political analyses. Consequently there is a growing need for tools which can efficiently access and search them. In this work, we are interested in indexing methods for supporting text-search workloads over web archives like time-travel queries and phrase queries. To this end we make the following contributions: Time-travel queries are keyword queries with a temporal predicate, e.g., mpii saarland @ [06/2009], which return versions of documents in the past. We introduce a novel index organization strategy, called index sharding, for efficiently supporting time-travel queries without incurring additional index-size blowup. We also propose index-maintenance approaches which scale to such continuously growing collections. We develop query-optimization techniques for time-travel queries called partition selection which maximizes recall at any given query-execution stage. We propose indexing methods to support phrase queries, e.g., to be or not to be that is the question. We index multi-word sequences and devise novel queryoptimization methods over the indexed sequences to efficiently answer phrase queries. We demonstrate the superior performance of our approaches over existing methods by extensive experimentation on real-world web archives. %U http://scidok.sulb.uni-saarland.de/volltexte/2013/5531/http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=de
[161]
F. Ansari, “A Comparative Study of MAX-SAT Solving Techniques with Soft and Hard Rules,” Universität des Saarlandes, Saarbrücken, 2013.
Export
BibTeX
@mastersthesis{AnsariMastersThesis2013, TITLE = {A Comparative Study of {MAX}--{SAT} Solving Techniques with Soft and Hard Rules}, AUTHOR = {Ansari, Farzaneh}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, }
Endnote
%0 Thesis %A Ansari, Farzaneh %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T A Comparative Study of MAX-SAT Solving Techniques with Soft and Hard Rules : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5C6B-5 %I Universität des Saarlandes %C Saarbrücken %D 2013 %V master %9 master
[162]
R. Awadallah, M. Ramanath, and G. Weikum, “OpinioNetIt: A Structured and Faceted Knowledge-base of Opinions,” in Proceedings of the 12th IEEE International Conference on Data Mining Workshops (ICDMW 2012), Brussels, Belgium, 2013.
Export
BibTeX
@inproceedings{Awadallah2012i, TITLE = {{OpinioNetIt}: A Structured and Faceted Knowledge-base of Opinions}, AUTHOR = {Awadallah, Rawia and Ramanath, Maya and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4673-5164-5}, DOI = {10.1109/ICDMW.2012.49}, LOCALID = {Local-ID: 04756AF15FFC805BC1257B12002D6750-Awadallah2012i}, PUBLISHER = {IEEE}, YEAR = {2012}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {Proceedings of the 12th IEEE International Conference on Data Mining Workshops (ICDMW 2012)}, EDITOR = {Vreeken, Jilles and Ling, Charles and Javeed Zaki, Mohammed and Siebes, Arno and Yu, Jeffrey Xu and Goethals, Bart and Webb, Geoffrey I. and Wu, Xindong}, PAGES = {878 --881}, ADDRESS = {Brussels, Belgium}, }
Endnote
%0 Conference Proceedings %A Awadallah, Rawia %A Ramanath, Maya %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T OpinioNetIt: A Structured and Faceted Knowledge-base of Opinions : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-198C-C %F OTHER: Local-ID: 04756AF15FFC805BC1257B12002D6750-Awadallah2012i %R 10.1109/ICDMW.2012.49 %D 2013 %B 12th IEEE International Conference on Data Mining Workshops %Z date of event: 2012-12-10 - 2012-12-10 %C Brussels, Belgium %B Proceedings of the 12th IEEE International Conference on Data Mining Workshops %E Vreeken, Jilles; Ling, Charles; Javeed Zaki, Mohammed; Siebes, Arno; Yu, Jeffrey Xu; Goethals, Bart; Webb, Geoffrey I.; Wu, Xindong %P 878 - 881 %I IEEE %@ 978-1-4673-5164-5
[163]
S. Bedathur, K. Berberich, I. Patlakas, P. Triantafillou, and G. Weikum, “D-Hive: Data Bees Pollinating RDF, Text, and Time,” in Online Proceedings of Sixth Biennial Conference on Innovative Data Systems Research (CIDR 2013), Asilomar, CA, USA, 2013.
Export
BibTeX
@inproceedings{Bedathur2013, TITLE = {{D-Hive}: Data Bees Pollinating {RDF}, Text, and Time}, AUTHOR = {Bedathur, Srikanta and Berberich, Klaus and Patlakas, Ioannis and Triantafillou, Peter and Weikum, Gerhard}, LANGUAGE = {eng}, LOCALID = {Local-ID: D3BAD8992F713EB5C1257B10002BB930-Bedathur2013}, PUBLISHER = {cidrdb.org}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Online Proceedings of Sixth Biennial Conference on Innovative Data Systems Research (CIDR 2013)}, EID = {73}, ADDRESS = {Asilomar, CA, USA}, }
Endnote
%0 Conference Proceedings %A Bedathur, Srikanta %A Berberich, Klaus %A Patlakas, Ioannis %A Triantafillou, Peter %A Weikum, Gerhard %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T D-Hive: Data Bees Pollinating RDF, Text, and Time : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-3A7C-A %F OTHER: Local-ID: D3BAD8992F713EB5C1257B10002BB930-Bedathur2013 %D 2013 %B Sixth Biennial Conference on Innovative Data Systems Research %Z date of event: 2013-01-06 - 2013-01-09 %C Asilomar, CA, USA %B Online Proceedings of Sixth Biennial Conference on Innovative Data Systems Research %Z sequence number: 73 %I cidrdb.org %U http://www.cidrdb.org/cidr2013/Papers/CIDR13_Paper73.pdf
[164]
K. Beedkar, L. Del Corro, and R. Gemulla, “Fully Parallel Inference in Markov Logic Networks,” in 15th GI-Symposium Database Systems for Business, Technology and Web (BTW 2013), Magdeburg, Germany, 2013.
Export
BibTeX
@inproceedings{bcg-btw13, TITLE = {Fully Parallel Inference in {M}arkov Logic Networks}, AUTHOR = {Beedkar, Kaustubh and Del Corro, Luciano and Gemulla, Rainer}, LANGUAGE = {eng}, ISBN = {978-3-88579-608-4}, LOCALID = {Local-ID: BB228B55B464BC71C1257B08003A6698-bcg-btw13}, PUBLISHER = {GI}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {15th GI-Symposium Database Systems for Business, Technology and Web (BTW 2013)}, EDITOR = {Saake, Gunther}, ADDRESS = {Magdeburg, Germany}, }
Endnote
%0 Conference Proceedings %A Beedkar, Kaustubh %A Del Corro, Luciano %A Gemulla, Rainer %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Fully Parallel Inference in Markov Logic Networks : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1989-1 %F OTHER: Local-ID: BB228B55B464BC71C1257B08003A6698-bcg-btw13 %D 2013 %B 15th GI-Symposium Database Systems for Business, Technology and Web %Z date of event: 2013-03-11 - 2013-03-15 %C Magdeburg, Germany %B 15th GI-Symposium Database Systems for Business, Technology and Web %E Saake, Gunther %I GI %@ 978-3-88579-608-4 %U http://www.btw-2013.de/proceedings/Fully%20Parallel%20Inference%20in%20Markov%20Logic%20Networks.pdf
[165]
R. Belet, “Leveraging Independence and Locality for Random Forests in a Distributed Environment,” Universität des Saarlandes, Saarbrücken, 2013.
Abstract
With the emergence of big data, inducting regression trees on very large data sets became a common data mining task. Even though centralized algorithms for computing ensembles of Classification/Regression trees are a well studied machine learning/data mining problem, their distributed versions still raise scalability, efficiency and accuracy issues. Most state of the art tree learning algorithms require data to reside in memory on a single machine. Adopting this approach for trees on big data is not feasible as the limited resources provided by only one machine lead to scalability problems. While more scalable implementations of tree learning algorithms have been proposed, they typically require specialized parallel computing architectures rendering those algorithms complex and error-prone. In this thesis we will introduce two approaches to computing ensembles of regression trees on very large training data sets using the MapReduce framework as an underlying tool. The first approach employs the entire MapReduce cluster to parallely and fully distributedly learn tree ensembles. The second approach exploits locality and independence in the tree learning process.
Export
BibTeX
@mastersthesis{Belet2013, TITLE = {Leveraging Independence and Locality for Random Forests in a Distributed Environment}, AUTHOR = {Belet, Razvan}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {With the emergence of big data, inducting regression trees on very large data sets became a common data mining task. Even though centralized algorithms for computing ensembles of Classification/Regression trees are a well studied machine learning/data mining problem, their distributed versions still raise scalability, efficiency and accuracy issues. Most state of the art tree learning algorithms require data to reside in memory on a single machine. Adopting this approach for trees on big data is not feasible as the limited resources provided by only one machine lead to scalability problems. While more scalable implementations of tree learning algorithms have been proposed, they typically require specialized parallel computing architectures rendering those algorithms complex and error-prone. In this thesis we will introduce two approaches to computing ensembles of regression trees on very large training data sets using the MapReduce framework as an underlying tool. The first approach employs the entire MapReduce cluster to parallely and fully distributedly learn tree ensembles. The second approach exploits locality and independence in the tree learning process.}, }
Endnote
%0 Thesis %A Belet, Razvan %Y Weikum, Gerhard %A referee: Schenkel, Ralf %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Leveraging Independence and Locality for Random Forests in a Distributed Environment : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-97B8-0 %I Universität des Saarlandes %C Saarbrücken %D 2013 %P 132 p. %V master %9 master %X With the emergence of big data, inducting regression trees on very large data sets became a common data mining task. Even though centralized algorithms for computing ensembles of Classification/Regression trees are a well studied machine learning/data mining problem, their distributed versions still raise scalability, efficiency and accuracy issues. Most state of the art tree learning algorithms require data to reside in memory on a single machine. Adopting this approach for trees on big data is not feasible as the limited resources provided by only one machine lead to scalability problems. While more scalable implementations of tree learning algorithms have been proposed, they typically require specialized parallel computing architectures rendering those algorithms complex and error-prone. In this thesis we will introduce two approaches to computing ensembles of regression trees on very large training data sets using the MapReduce framework as an underlying tool. The first approach employs the entire MapReduce cluster to parallely and fully distributedly learn tree ensembles. The second approach exploits locality and independence in the tree learning process.
[166]
P. Bellot, A. Doucet, S. Geva, S. Gurajada, J. Kamps, G. Kazai, M. Koolen, A. Mishra, V. Moriceau, J. Mothe, M. Preminger, E. SanJuan, R. Schenkel, X. Tannier, M. Theobald, M. Trappett, and Q. Wang, “Overview of INEX 2013,” in Information Access Evaluation : Multilinguality, Multimodality, and Visualization (CLEF 2013), Valencia, Spain, 2013.
Abstract
INEX investigates focused retrieval from structured docu- ments by providing large test collections of structured documents, uni- form evaluation measures, and a forum for organizations to compare their results. This paper reports on the INEX 2013 evaluation campaign, which consisted of a four activities addressing three themes: searching profes- sional and user generated data (Social Book Search track); searching structured or semantic data (Linked Data track); and focused retrieval (Snippet Retrieval and Tweet Contextualization tracks). INEX 2013 was an exciting year for INEX in which we consolidated the collaboration with (other activities in) CLEF and for the second time ran our work- shop as part of the CLEF labs in order to facilitate knowledge transfer between the evaluation forums. This paper gives an overview of all the INEX 2013 tracks, their aims and task, the built test-collections, and gives an initial analysis of the results.
Export
BibTeX
@inproceedings{INEX-Kamps2012, TITLE = {Overview of {INEX} 2013}, AUTHOR = {Bellot, Patrice and Doucet, Antoine and Geva, Shlomo and Gurajada, Sairam and Kamps, Jaap and Kazai, Gabriella and Koolen, Marijn and Mishra, Arunav and Moriceau, Veronique and Mothe, Josiane and Preminger, Michael and SanJuan, Eric and Schenkel, Ralf and Tannier, Xavier and Theobald, Martin and Trappett, Matthew and Wang, Qiuyue}, LANGUAGE = {eng}, ISSN = {0302-9743}, ISBN = {978-3-642-40801-4}, DOI = {10.1007/978-3-642-40802-1_27}, LOCALID = {Local-ID: E0D7037ADFDDA1C6C1257BBB003D49E2-INEX-Kamps2012}, PUBLISHER = {Springer}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {INEX investigates focused retrieval from structured docu- ments by providing large test collections of structured documents, uni- form evaluation measures, and a forum for organizations to compare their results. This paper reports on the INEX 2013 evaluation campaign, which consisted of a four activities addressing three themes: searching profes- sional and user generated data (Social Book Search track); searching structured or semantic data (Linked Data track); and focused retrieval (Snippet Retrieval and Tweet Contextualization tracks). INEX 2013 was an exciting year for INEX in which we consolidated the collaboration with (other activities in) CLEF and for the second time ran our work- shop as part of the CLEF labs in order to facilitate knowledge transfer between the evaluation forums. This paper gives an overview of all the INEX 2013 tracks, their aims and task, the built test-collections, and gives an initial analysis of the results.}, BOOKTITLE = {Information Access Evaluation : Multilinguality, Multimodality, and Visualization (CLEF 2013)}, EDITOR = {Forner, Pamela and M{\"u}ller, Henning and Paredes, Roberto and Rosso, Paolo and Stein, Benno}, PAGES = {269--281}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {8138}, ADDRESS = {Valencia, Spain}, }
Endnote
%0 Conference Proceedings %A Bellot, Patrice %A Doucet, Antoine %A Geva, Shlomo %A Gurajada, Sairam %A Kamps, Jaap %A Kazai, Gabriella %A Koolen, Marijn %A Mishra, Arunav %A Moriceau, Veronique %A Mothe, Josiane %A Preminger, Michael %A SanJuan, Eric %A Schenkel, Ralf %A Tannier, Xavier %A Theobald, Martin %A Trappett, Matthew %A Wang, Qiuyue %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations %T Overview of INEX 2013 : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-3A78-1 %R 10.1007/978-3-642-40802-1_27 %F OTHER: Local-ID: E0D7037ADFDDA1C6C1257BBB003D49E2-INEX-Kamps2012 %D 2013 %B 4th International Conference of the CLEF Initiative %Z date of event: 2013-09-23 - 2013-09-26 %C Valencia, Spain %X INEX investigates focused retrieval from structured docu- ments by providing large test collections of structured documents, uni- form evaluation measures, and a forum for organizations to compare their results. This paper reports on the INEX 2013 evaluation campaign, which consisted of a four activities addressing three themes: searching profes- sional and user generated data (Social Book Search track); searching structured or semantic data (Linked Data track); and focused retrieval (Snippet Retrieval and Tweet Contextualization tracks). INEX 2013 was an exciting year for INEX in which we consolidated the collaboration with (other activities in) CLEF and for the second time ran our work- shop as part of the CLEF labs in order to facilitate knowledge transfer between the evaluation forums. This paper gives an overview of all the INEX 2013 tracks, their aims and task, the built test-collections, and gives an initial analysis of the results. %B Information Access Evaluation : Multilinguality, Multimodality, and Visualization %E Forner, Pamela; Müller, Henning; Paredes, Roberto; Rosso, Paolo; Stein, Benno %P 269 - 281 %I Springer %@ 978-3-642-40801-4 %B Lecture Notes in Computer Science %N 8138 %@ false
[167]
P. Bellot, A. Doucet, S. Geva, S. Gurajada, J. Kamps, G. Kazai, M. Koolen, A. Mishra, V. Moriceau, J. Mothe, M. Preminger, E. SanJuan, R. Schenkel, X. Tannier, M. Theobald, M. Trappett, A. Trotman, M. Sanderson, F. Scholer, and Q. Wang, “Report on INEX 2013,” SIGIR Forum, vol. 47, no. 2, 2013.
Export
BibTeX
@article{INEX_SIGIRF2013, TITLE = {Report on {INEX 2013}}, AUTHOR = {Bellot, Patrice and Doucet, Antoine and Geva, Shlomo and Gurajada, Sairam and Kamps, Jaap and Kazai, Gabriella and Koolen, Marijn and Mishra, Arunav and Moriceau, V{\'e}ronique and Mothe, Josiane and Preminger, Michael and SanJuan, Eric and Schenkel, Ralf and Tannier, Xavier and Theobald, Martin and Trappett, Matthew and Trotman, Andrew and Sanderson, Mark and Scholer, Falk and Wang, Qiuyue}, LANGUAGE = {eng}, ISSN = {0163-5840}, DOI = {10.1145/2568388.2568393}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, JOURNAL = {SIGIR Forum}, VOLUME = {47}, NUMBER = {2}, PAGES = {21--32}, }
Endnote
%0 Journal Article %A Bellot, Patrice %A Doucet, Antoine %A Geva, Shlomo %A Gurajada, Sairam %A Kamps, Jaap %A Kazai, Gabriella %A Koolen, Marijn %A Mishra, Arunav %A Moriceau, Véronique %A Mothe, Josiane %A Preminger, Michael %A SanJuan, Eric %A Schenkel, Ralf %A Tannier, Xavier %A Theobald, Martin %A Trappett, Matthew %A Trotman, Andrew %A Sanderson, Mark %A Scholer, Falk %A Wang, Qiuyue %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations %T Report on INEX 2013 : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0019-82A3-0 %R 10.1145/2568388.2568393 %7 2013-12 %D 2013 %J SIGIR Forum %V 47 %N 2 %& 21 %P 21 - 32 %@ false
[168]
K. Berberich and S. Bedathur, “Computing n-gram Statistics in MapReduce,” in Advances in Database Technology (EDBT 2013), Genova, Italy, 2013.
Export
BibTeX
@inproceedings{Berberich2013b, TITLE = {Computing n-gram Statistics in {MapReduce}}, AUTHOR = {Berberich, Klaus and Bedathur, Srikanta}, LANGUAGE = {eng}, ISBN = {978-1-4503-1597-5}, DOI = {10.1145/2452376.2452389}, LOCALID = {Local-ID: 31F260D05B735433C1257B09003B1404-Berberich2013b}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {Advances in Database Technology (EDBT 2013)}, PAGES = {101--112}, ADDRESS = {Genova, Italy}, }
Endnote
%0 Conference Proceedings %A Berberich, Klaus %A Bedathur, Srikanta %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Computing n-gram Statistics in MapReduce : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-19CE-5 %F OTHER: Local-ID: 31F260D05B735433C1257B09003B1404-Berberich2013b %R 10.1145/2452376.2452389 %D 2013 %B 16th International Conference on Extending Database Technology %Z date of event: 2013-03-18 - 2013-03-22 %C Genova, Italy %B Advances in Database Technology %P 101 - 112 %I ACM %@ 978-1-4503-1597-5
[169]
K. Berberich and S. Bedathur, “Temporal Diversification of Search Results,” in SIGIR 2013 Workshop on Time-aware Information Access (TAIA 2013), Dublin, Ireland, 2013.
Export
BibTeX
@inproceedings{Berberich2013g, TITLE = {Temporal Diversification of Search Results}, AUTHOR = {Berberich, Klaus and Bedathur, Srikanta}, LANGUAGE = {eng}, URL = {http://research.microsoft.com/en-us/people/milads/taia2013.proceedings.final.pdf}, LOCALID = {Local-ID: F06E854555530CFBC1257C6E0023BA55-Berberich2013g}, PUBLISHER = {Microsoft Research}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {SIGIR 2013 Workshop on Time-aware Information Access (TAIA 2013)}, EDITOR = {Diaz, Fernando and Dumais, Susan and Radinsky, Kira and de Rijke, Maarten and Shokouhi, Milad}, ADDRESS = {Dublin, Ireland}, }
Endnote
%0 Conference Proceedings %A Berberich, Klaus %A Bedathur, Srikanta %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Temporal Diversification of Search Results : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-3A73-B %F OTHER: Local-ID: F06E854555530CFBC1257C6E0023BA55-Berberich2013g %U http://research.microsoft.com/en-us/people/milads/taia2013.proceedings.final.pdf %D 2013 %B SIGIR 2013 Workshop on Time-aware Information Access %Z date of event: 2013-08-01 - 2013-08-01 %C Dublin, Ireland %B SIGIR 2013 Workshop on Time-aware Information Access %E Diaz, Fernando; Dumais, Susan; Radinsky, Kira; de Rijke, Maarten; Shokouhi, Milad %I Microsoft Research %U http://research.microsoft.com/en-us/people/milads/taia2013.proceedings.final.pdf
[170]
J. Biega, E. Kuzey, and F. M. Suchanek, “Inside YAGO2s: A Transparent Information Extraction Architecture,” in WWW’13, 22nd International Conference on World Wide Web, Rio de Janeiro, Brasil, 2013.
Export
BibTeX
@inproceedings{Biega:2013:IYT:2487788.2487935, TITLE = {Inside {YAGO2s}: A Transparent Information Extraction Architecture}, AUTHOR = {Biega, Joanna and Kuzey, Erdal and Suchanek, Fabian M.}, LANGUAGE = {eng}, ISBN = {978-1-4503-2038-2}, URL = {http://dl.acm.org/citation.cfm?id=2487788.2487935}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {WWW'13, 22nd International Conference on World Wide Web}, EDITOR = {Schwabe, Daniel and Almeida, Virgilio and Glaser, Hartmut and Baeza-Yates, Ricardo and Moon, Sue}, PAGES = {325--328}, ADDRESS = {Rio de Janeiro, Brasil}, }
Endnote
%0 Conference Proceedings %A Biega, Joanna %A Kuzey, Erdal %A Suchanek, Fabian M. %+ Ontologies, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Ontologies, MPI for Informatics, Max Planck Society %T Inside YAGO2s: A Transparent Information Extraction Architecture : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0014-54E3-C %U http://dl.acm.org/citation.cfm?id=2487788.2487935 %D 2013 %B 22nd International Conference on World Wide Web %Z date of event: 2013-05-13 - 2013-05-17 %C Rio de Janeiro, Brasil %K information extraction, ontologies, yago %B WWW'13 %E Schwabe, Daniel; Almeida, Virgilio; Glaser, Hartmut; Baeza-Yates, Ricardo; Moon, Sue %P 325 - 328 %I ACM %@ 978-1-4503-2038-2
[171]
A. Boldyrev, “Dictionary-based Named Entity Recognition,” Universität des Saarlandes, Saarbrücken, 2013.
Export
BibTeX
@mastersthesis{BoldyrevMastersThesis2013, TITLE = {Dictionary-based Named Entity Recognition}, AUTHOR = {Boldyrev, Artem}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, }
Endnote
%0 Thesis %A Boldyrev, Artem %Y Weikum, Gerhard %A referee: Theobalt, Christian %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Dictionary-based Named Entity Recognition : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5C74-F %I Universität des Saarlandes %C Saarbrücken %D 2013 %V master %9 master
[172]
E. Cergani and P. Miettinen, “Discovering Relations Using Matrix Factorization Methods,” in CIKM’13, 22nd ACM International Conference on Information & Knowledge Management, San Francisco, CA, USA, 2013.
Abstract
Traditional relation extraction methods work on manually defined relations and typically expect manually labelled extraction patterns for each relation. This strongly limits the scalability of these systems. In Open Relation Extraction (ORE), the relations are identified automatically based on co-occurrences of ``surface relations'' (contexts) and entity pairs. The recently-proposed methods for ORE use partition clustering to find the relations. In this work we propose the use of matrix factorization methods instead of clustering. Specifically, we study Non-Negative Matrix Factorization (NMF) and Boolean Matrix Factorization (BMF). These methods overcome many problems inherent in clustering and perform better than the k-means clustering in our evaluation.
Export
BibTeX
@inproceedings{cergani13discovering, TITLE = {Discovering Relations Using Matrix Factorization Methods}, AUTHOR = {Cergani, Ervina and Miettinen, Pauli}, LANGUAGE = {eng}, ISBN = {978-1-4503-2263-8}, DOI = {10.1145/2505515.2507841}, LOCALID = {Local-ID: B85EF949714E8A6EC1257C6A00608792-cergani13discovering}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {Traditional relation extraction methods work on manually defined relations and typically expect manually labelled extraction patterns for each relation. This strongly limits the scalability of these systems. In Open Relation Extraction (ORE), the relations are identified automatically based on co-occurrences of ``surface relations'' (contexts) and entity pairs. The recently-proposed methods for ORE use partition clustering to find the relations. In this work we propose the use of matrix factorization methods instead of clustering. Specifically, we study Non-Negative Matrix Factorization (NMF) and Boolean Matrix Factorization (BMF). These methods overcome many problems inherent in clustering and perform better than the k-means clustering in our evaluation.}, BOOKTITLE = {CIKM{\textquoteright}13, 22nd ACM International Conference on Information \& Knowledge Management}, EDITOR = {Nejdl, Wolfgang and Pei, Jian and Rastogi, Rajeev}, PAGES = {1549--1552}, ADDRESS = {San Francisco, CA, USA}, }
Endnote
%0 Conference Proceedings %A Cergani, Ervina %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Discovering Relations Using Matrix Factorization Methods : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-19DB-7 %F OTHER: Local-ID: B85EF949714E8A6EC1257C6A00608792-cergani13discovering %R 10.1145/2505515.2507841 %D 2013 %B 22nd ACM International Conference on Information & Knowledge Management %Z date of event: 2013-10-27 - 2013-11-01 %C San Francisco, CA, USA %X Traditional relation extraction methods work on manually defined relations and typically expect manually labelled extraction patterns for each relation. This strongly limits the scalability of these systems. In Open Relation Extraction (ORE), the relations are identified automatically based on co-occurrences of ``surface relations'' (contexts) and entity pairs. The recently-proposed methods for ORE use partition clustering to find the relations. In this work we propose the use of matrix factorization methods instead of clustering. Specifically, we study Non-Negative Matrix Factorization (NMF) and Boolean Matrix Factorization (BMF). These methods overcome many problems inherent in clustering and perform better than the k-means clustering in our evaluation. %B CIKM’13 %E Nejdl, Wolfgang; Pei, Jian; Rastogi, Rajeev %P 1549 - 1552 %I ACM %@ 978-1-4503-2263-8
[173]
D. H. Chau, J. Vreeken, M. van Leeuwen, and C. Faloutsos, Eds., Proceedings of the ACM SIGKDD Full-day Workshop on Interactive Data Exploration and Analytics. ACM, 2013.
Export
BibTeX
@proceedings{Chau2013a, TITLE = {Proceedings of the ACM SIGKDD Full-day Workshop on Interactive Data Exploration and Analytics (IDEA 2013)}, EDITOR = {Chau, Duen Horn and Vreeken, Jilles and van Leeuwen, Matthijs and Faloutsos, Christos}, LANGUAGE = {eng}, ISBN = {978-1-4503-2329-1}, LOCALID = {Local-ID: 1F669F9DC4CC9410C1257C60005593D1-Chau2013a}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, PAGES = {103}, ADDRESS = {Chicago, IL, USA}, }
Endnote
%0 Conference Proceedings %E Chau, Duen Horn %E Vreeken, Jilles %E van Leeuwen, Matthijs %E Faloutsos, Christos %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Proceedings of the ACM SIGKDD Full-day Workshop on Interactive Data Exploration and Analytics : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-19E0-A %F OTHER: Local-ID: 1F669F9DC4CC9410C1257C60005593D1-Chau2013a %@ 978-1-4503-2329-1 %I ACM %D 2013 %B ACM SIGKDD Full-day Workshop on Interactive Data Exploration and Analytics %Z date of event: 2013-08-11 - 2013-08-11 %D 2013 %C Chicago, IL, USA %P 103
[174]
O. Čulo and G. de Melo, “Source-Path-Goal: Investigating the Cross-Linguistic Potential of Frame-Semantic Text Analysis,” Information Technology, vol. 54, no. 3, 2013.
Export
BibTeX
@article{CuloDeMelo2012, TITLE = {Source-Path-Goal: Investigating the Cross-Linguistic Potential of Frame-Semantic Text Analysis}, AUTHOR = {{\v C}ulo, Oliver and de Melo, Gerard}, LANGUAGE = {eng}, ISSN = {1611-2776}, LOCALID = {Local-ID: 4B73EA65B090D965C1257B11002D73A2-CuloDeMelo2012}, PUBLISHER = {Oldenbourg Wissenschaftsverlag}, ADDRESS = {M{\"u}nchen}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, JOURNAL = {Information Technology}, VOLUME = {54}, NUMBER = {3}, PAGES = {147--152}, }
Endnote
%0 Journal Article %A Čulo, Oliver %A de Melo, Gerard %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Source-Path-Goal: Investigating the Cross-Linguistic Potential of Frame-Semantic Text Analysis : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-3A44-6 %F OTHER: Local-ID: 4B73EA65B090D965C1257B11002D73A2-CuloDeMelo2012 %D 2013 %J Information Technology %O it %V 54 %N 3 %& 147 %P 147 - 152 %I Oldenbourg Wissenschaftsverlag %C München %@ false
[175]
M. Daivandy, D. Hünich, R. Jäkel, S. Metzger, R. Müller-Pfefferkorn, and B. Schuller, “Heterogeneous Resource Federation with a Centralized Security Model for Information Extraction,” Journal of Internet Services and Applications, vol. 4, 2013.
Export
BibTeX
@article{MetzgerJISA2012, TITLE = {Heterogeneous Resource Federation with a Centralized Security Model for Information Extraction}, AUTHOR = {Daivandy, Milad and H{\"u}nich, Denis and J{\"a}kel, Rene and Metzger, Steffen and M{\"u}ller-Pfefferkorn, Ralph and Schuller, Bernd}, LANGUAGE = {eng}, ISSN = {1869-0238}, DOI = {10.1186/1869-0238-4-10}, LOCALID = {Local-ID: 9D149AAF29E33BCCC1257B83000D0937-MetzgerJISA2012}, PUBLISHER = {Springer}, ADDRESS = {New York, NY}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, JOURNAL = {Journal of Internet Services and Applications}, VOLUME = {4}, PAGES = {1--14}, EID = {10}, }
Endnote
%0 Journal Article %A Daivandy, Milad %A Hünich, Denis %A Jäkel, Rene %A Metzger, Steffen %A Müller-Pfefferkorn, Ralph %A Schuller, Bernd %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Heterogeneous Resource Federation with a Centralized Security Model for Information Extraction : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0014-6395-C %R 10.1186/1869-0238-4-10 %F OTHER: Local-ID: 9D149AAF29E33BCCC1257B83000D0937-MetzgerJISA2012 %7 2013-03-20 %D 2013 %8 20.03.2013 %J Journal of Internet Services and Applications %V 4 %& 1 %P 1 - 14 %Z sequence number: 10 %I Springer %C New York, NY %@ false %U http://www.jisajournal.com/content/4/1/10
[176]
L. Del Corro and R. Gemulla, “ClausIE: Clause-Based Open Information Extraction,” in WWW’13, 22nd International Conference on World Wide Web, Rio do Janeiro, Brazil, 2013.
Export
BibTeX
@inproceedings{ClausIE, TITLE = {{ClausIE}: Clause-Based Open Information Extraction}, AUTHOR = {Del Corro, Luciano and Gemulla, Rainer}, LANGUAGE = {eng}, ISBN = {978-1-4503-2035-1}, URL = {http://dl.acm.org/citation.cfm?id=2488388.2488420}, LOCALID = {Local-ID: 937BBDB401D54B01C1257B10003FDEFF-ClausIE}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {WWW'13, 22nd International Conference on World Wide Web}, EDITOR = {Schwabe, Daniel and Almeida, Virgilio and Glaser, Hartmut and Baeza-Yates, Ricardo and Moon, Sue}, PAGES = {355--366}, ADDRESS = {Rio do Janeiro, Brazil}, }
Endnote
%0 Conference Proceedings %A Del Corro, Luciano %A Gemulla, Rainer %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T ClausIE: Clause-Based Open Information Extraction : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1A3A-B %F OTHER: Local-ID: 937BBDB401D54B01C1257B10003FDEFF-ClausIE %U http://dl.acm.org/citation.cfm?id=2488388.2488420 %D 2013 %B 22nd International Conference on World Wide Web %Z date of event: 2013-05-13 - 2013-05-17 %C Rio do Janeiro, Brazil %B WWW'13 %E Schwabe, Daniel; Almeida, Virgilio; Glaser, Hartmut; Baeza-Yates, Ricardo; Moon, Sue %P 355 - 366 %I ACM %@ 978-1-4503-2035-1
[177]
A. de Oliveira Melo, “Learning Rules With Categorical Attributes from Linked Data Sources,” Universität des Saarlandes, Saarbrücken, 2013.
Export
BibTeX
@mastersthesis{MeloMastersThesis2013, TITLE = {Learning Rules With Categorical Attributes from Linked Data Sources}, AUTHOR = {de Oliveira Melo, Andr{\'e}}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, }
Endnote
%0 Thesis %A de Oliveira Melo, André %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Learning Rules With Categorical Attributes from Linked Data Sources : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5C54-8 %I Universität des Saarlandes %C Saarbrücken %D 2013 %V master %9 master
[178]
S. Dutta, A. Narang, and S. K. Bera, “Streaming Quotient Filter: A Near Optimal Approximate Duplicate Detection Approach for Data Streams,” Proceedings of the VLDB Endowment (Proc. VLDB 2013), vol. 6, no. 8, 2013.
Export
BibTeX
@article{SouVLDB2013, TITLE = {Streaming Quotient Filter: A Near Optimal Approximate Duplicate Detection Approach for Data Streams}, AUTHOR = {Dutta, Sourav and Narang, Ankur and Bera, Suman K.}, LANGUAGE = {eng}, ISSN = {2150-8097}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, JOURNAL = {Proceedings of the VLDB Endowment (Proc. VLDB)}, VOLUME = {6}, NUMBER = {8}, PAGES = {589--600}, BOOKTITLE = {Proccedings of the 39th International Conference on Very Large Data Bases (VLDB 2013)}, EDITOR = {B{\"o}hlen, Michael and Koch, Christoph}, }
Endnote
%0 Journal Article %A Dutta, Sourav %A Narang, Ankur %A Bera, Suman K. %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Streaming Quotient Filter: A Near Optimal Approximate Duplicate Detection Approach for Data Streams : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-54B0-9 %D 2013 %J Proceedings of the VLDB Endowment %O PVLDB %V 6 %N 8 %& 589 %P 589 - 600 %I ACM %C New York, NY %@ false %B Proccedings of the 39th International Conference on Very Large Data Bases %O Riva del Garda, Trento, Italy VLDB 2013 %U http://www.vldb.org/pvldb/vol6/p589-dutta.pdf
[179]
M. Dylla, I. Miliaraki, and M. Theobald, “A Temporal-probabilistic Database Model for Information Extraction,” Proceedings of the VLDB Endowment (Proc. VLDB 2013), vol. 6, no. 14, 2013.
Export
BibTeX
@article{DBLP:journals/pvldb/DyllaMT13, TITLE = {A Temporal-probabilistic Database Model for Information Extraction}, AUTHOR = {Dylla, Maximilian and Miliaraki, Iris and Theobald, Martin}, LANGUAGE = {eng}, ISSN = {2150-8097}, URL = {http://www.vldb.org/pvldb/vol6/p1810-miliaraki.pdf}, LOCALID = {Local-ID: F77B765948DFB562C1257BEF002A1315-Dylla-VLDB2013}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, JOURNAL = {Proceedings of the VLDB Endowment (Proc. VLDB)}, VOLUME = {6}, NUMBER = {14}, PAGES = {1810--1821}, BOOKTITLE = {Proceedings of the 39th International Conference on Very Large Data Bases (VLDB 2013)}, EDITOR = {B{\"o}hlen, Michael and Koch, Christoph}, }
Endnote
%0 Journal Article %A Dylla, Maximilian %A Miliaraki, Iris %A Theobald, Martin %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T A Temporal-probabilistic Database Model for Information Extraction : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1716-2 %F OTHER: Local-ID: F77B765948DFB562C1257BEF002A1315-Dylla-VLDB2013 %U http://www.vldb.org/pvldb/vol6/p1810-miliaraki.pdf %7 2013 %D 2013 %J Proceedings of the VLDB Endowment %O PVLDP %V 6 %N 14 %& 1810 %P 1810 - 1821 %I ACM %C New York, NY %@ false %B Proceedings of the 39th International Conference on Very Large Data Bases %O Riva del Garda, Trento, Italy VLDB 2013
[180]
M. Dylla, I. Miliaraki, and M. Theobald, “Top-k Query Processing in Probabilistic Databases with Non-materialized Views,” in 29th International IEEE Conference on Data Engineering (ICDE 2013), Brisbane, Australia, 2013.
Export
BibTeX
@inproceedings{DyllaICDE2013, TITLE = {Top-k Query Processing in Probabilistic Databases with Non-materialized Views}, AUTHOR = {Dylla, Maximilian and Miliaraki, Iris and Theobald, Martin}, LANGUAGE = {eng}, ISBN = {978-1-4673-4909-3 ; 978-1-4673-4908-6}, DOI = {10.1109/ICDE.2013.6544819}, LOCALID = {Local-ID: 41ABA8E9D9176C38C1257B0C00538601-DyllaICDE2013}, PUBLISHER = {IEEE}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {29th International IEEE Conference on Data Engineering (ICDE 2013)}, PAGES = {122--133}, ADDRESS = {Brisbane, Australia}, }
Endnote
%0 Conference Proceedings %A Dylla, Maximilian %A Miliaraki, Iris %A Theobald, Martin %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Top-k Query Processing in Probabilistic Databases with Non-materialized Views : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0014-639B-F %R 10.1109/ICDE.2013.6544819 %F OTHER: Local-ID: 41ABA8E9D9176C38C1257B0C00538601-DyllaICDE2013 %D 2013 %B 29th International IEEE Conference on Data Engineering %Z date of event: 2013-04-08 - 2013-04-12 %C Brisbane, Australia %B 29th International IEEE Conference on Data Engineering %P 122 - 133 %I IEEE %@ 978-1-4673-4909-3 978-1-4673-4908-6
[181]
D. Erdős and P. Miettinen, “Walk’n'Merge: A Scalable Algorithm for Boolean Tensor Factorization,” in IEEE 13th International Conference on Data Mining (ICDM 2013), Dallas, TX, USA, 2013.
Abstract
Tensors are becoming increasingly common in data mining, and consequently, tensor factorizations are becoming more important tools for data miners. When the data is binary, it is natural to ask if we can factorize it into binary factors while simultaneously making sure that the reconstructed tensor is still binary. Such factorizations, called Boolean tensor factorizations, can provide improved interpretability and find Boolean structure that is hard to express using normal factorizations. Unfortunately the algorithms for computing Boolean tensor factorizations do not usually scale well. In this paper we present a novel algorithm for finding Boolean CP and Tucker decompositions of large and sparse binary tensors. In our experimental evaluation we show that our algorithm can handle large tensors and accurately reconstructs the latent Boolean structure.
Export
BibTeX
@inproceedings{erdos13walknmerge, TITLE = {{Walk'n'Merge}: A Scalable Algorithm for {Boolean} Tensor Factorization}, AUTHOR = {Erd{\H o}s, D{\'o}ra and Miettinen, Pauli}, LANGUAGE = {eng}, DOI = {10.1109/ICDM.2013.141}, LOCALID = {Local-ID: 4CE63F9DEBEF8E5EC1257C6A00610B3D-erdos13discovering}, PUBLISHER = {IEEE}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Tensors are becoming increasingly common in data mining, and consequently, tensor factorizations are becoming more important tools for data miners. When the data is binary, it is natural to ask if we can factorize it into binary factors while simultaneously making sure that the reconstructed tensor is still binary. Such factorizations, called Boolean tensor factorizations, can provide improved interpretability and find Boolean structure that is hard to express using normal factorizations. Unfortunately the algorithms for computing Boolean tensor factorizations do not usually scale well. In this paper we present a novel algorithm for finding Boolean CP and Tucker decompositions of large and sparse binary tensors. In our experimental evaluation we show that our algorithm can handle large tensors and accurately reconstructs the latent Boolean structure.}, BOOKTITLE = {IEEE 13th International Conference on Data Mining (ICDM 2013)}, PAGES = {1037--1042}, ADDRESS = {Dallas, TX, USA}, }
Endnote
%0 Conference Proceedings %A Erdős, Dóra %A Miettinen, Pauli %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Walk'n'Merge: A Scalable Algorithm for Boolean Tensor Factorization : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1A48-B %F OTHER: Local-ID: 4CE63F9DEBEF8E5EC1257C6A00610B3D-erdos13discovering %R 10.1109/ICDM.2013.141 %D 2013 %8 31.10.2013 %B 13th International Conference on Data Mining %Z date of event: 2013-10-07 - 2013-10-10 %C Dallas, TX, USA %X Tensors are becoming increasingly common in data mining, and consequently, tensor factorizations are becoming more important tools for data miners. When the data is binary, it is natural to ask if we can factorize it into binary factors while simultaneously making sure that the reconstructed tensor is still binary. Such factorizations, called Boolean tensor factorizations, can provide improved interpretability and find Boolean structure that is hard to express using normal factorizations. Unfortunately the algorithms for computing Boolean tensor factorizations do not usually scale well. In this paper we present a novel algorithm for finding Boolean CP and Tucker decompositions of large and sparse binary tensors. In our experimental evaluation we show that our algorithm can handle large tensors and accurately reconstructs the latent Boolean structure. %B IEEE 13th International Conference on Data Mining %P 1037 - 1042 %I IEEE
[182]
D. Erdős and P. Miettinen, “Discovering Facts with Boolean Tensor Tucker Decomposition,” in CIKM’13, 22nd ACM International Conference on Information & Knowledge Management, San Francisco, CA, USA, 2013.
Abstract
Open Information Extraction (Open IE) has gained increasing research interest in recent years. The first step in Open IE is to extract raw subject--predicate--object triples from the data. These raw triples are rarely usable per se, and need additional post-processing. To that end, we proposed the use of Boolean Tucker tensor decomposition to simultaneously find the entity and relation synonyms and the facts connecting them from the raw triples. Our method represents the synonym sets and facts using (sparse) binary matrices and tensor that can be efficiently stored and manipulated. We consider the presentation of the problem as a Boolean tensor decomposition as one of this paper's main contributions. To study the validity of this approach, we use a recent algorithm for scalable Boolean Tucker decomposition. We validate the results with empirical evaluation on a new semi-synthetic data set, generated to faithfully reproduce real-world data features, as well as with real-world data from existing Open IE extractor. We show that our method obtains high precision while the low recall can easily be remedied by considering the original data together with the decomposition.
Export
BibTeX
@inproceedings{erdos13discovering, TITLE = {Discovering Facts with {B}oolean Tensor Tucker Decomposition}, AUTHOR = {Erd{\H o}s, D{\'o}ra and Miettinen, Pauli}, LANGUAGE = {eng}, ISBN = {978-1-4503-2263-8}, DOI = {10.1145/2505515.2507846}, LOCALID = {Local-ID: 65F19E1E95609D3CC1257C6A0061B38E-erdos13walknmerge}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {Open Information Extraction (Open IE) has gained increasing research interest in recent years. The first step in Open IE is to extract raw subject--predicate--object triples from the data. These raw triples are rarely usable per se, and need additional post-processing. To that end, we proposed the use of Boolean Tucker tensor decomposition to simultaneously find the entity and relation synonyms and the facts connecting them from the raw triples. Our method represents the synonym sets and facts using (sparse) binary matrices and tensor that can be efficiently stored and manipulated. We consider the presentation of the problem as a Boolean tensor decomposition as one of this paper's main contributions. To study the validity of this approach, we use a recent algorithm for scalable Boolean Tucker decomposition. We validate the results with empirical evaluation on a new semi-synthetic data set, generated to faithfully reproduce real-world data features, as well as with real-world data from existing Open IE extractor. We show that our method obtains high precision while the low recall can easily be remedied by considering the original data together with the decomposition.}, BOOKTITLE = {CIKM'13, 22nd ACM International Conference on Information \& Knowledge Management}, EDITOR = {Nejdl, Wolfgang and Pei, Jian and Rastogi, Rajeev}, PAGES = {1569--1572}, ADDRESS = {San Francisco, CA, USA}, }
Endnote
%0 Conference Proceedings %A Erdős, Dóra %A Miettinen, Pauli %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Discovering Facts with Boolean Tensor Tucker Decomposition : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1A43-6 %F OTHER: Local-ID: 65F19E1E95609D3CC1257C6A0061B38E-erdos13walknmerge %R 10.1145/2505515.2507846 %D 2013 %B 22nd ACM International Conference on Information & Knowledge Management %Z date of event: 2013-10-27 - 2013-11-01 %C San Francisco, CA, USA %X Open Information Extraction (Open IE) has gained increasing research interest in recent years. The first step in Open IE is to extract raw subject--predicate--object triples from the data. These raw triples are rarely usable per se, and need additional post-processing. To that end, we proposed the use of Boolean Tucker tensor decomposition to simultaneously find the entity and relation synonyms and the facts connecting them from the raw triples. Our method represents the synonym sets and facts using (sparse) binary matrices and tensor that can be efficiently stored and manipulated. We consider the presentation of the problem as a Boolean tensor decomposition as one of this paper's main contributions. To study the validity of this approach, we use a recent algorithm for scalable Boolean Tucker decomposition. We validate the results with empirical evaluation on a new semi-synthetic data set, generated to faithfully reproduce real-world data features, as well as with real-world data from existing Open IE extractor. We show that our method obtains high precision while the low recall can easily be remedied by considering the original data together with the decomposition. %B CIKM'13 %E Nejdl, Wolfgang; Pei, Jian; Rastogi, Rajeev %P 1569 - 1572 %I ACM %@ 978-1-4503-2263-8
[183]
D. Erdős and P. Miettinen, “Scalable Boolean Tensor Factorizations using Random Walks,” 2013. [Online]. Available: http://arxiv.org/abs/1310.4843.
Abstract
Tensors are becoming increasingly common in data mining, and consequently, tensor factorizations are becoming more and more important tools for data miners. When the data is binary, it is natural to ask if we can factorize it into binary factors while simultaneously making sure that the reconstructed tensor is still binary. Such factorizations, called Boolean tensor factorizations, can provide improved interpretability and find Boolean structure that is hard to express using normal factorizations. Unfortunately the algorithms for computing Boolean tensor factorizations do not usually scale well. In this paper we present a novel algorithm for finding Boolean CP and Tucker decompositions of large and sparse binary tensors. In our experimental evaluation we show that our algorithm can handle large tensors and accurately reconstructs the latent Boolean structure.
Export
BibTeX
@online{ErdosMiettinenarXiv2013, TITLE = {Scalable Boolean Tensor Factorizations using Random Walks}, AUTHOR = {Erd{\H o}s, D{\'o}ra and Miettinen, Pauli}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1310.4843}, EPRINT = {1310.4843}, EPRINTTYPE = {arXiv}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Tensors are becoming increasingly common in data mining, and consequently, tensor factorizations are becoming more and more important tools for data miners. When the data is binary, it is natural to ask if we can factorize it into binary factors while simultaneously making sure that the reconstructed tensor is still binary. Such factorizations, called Boolean tensor factorizations, can provide improved interpretability and find Boolean structure that is hard to express using normal factorizations. Unfortunately the algorithms for computing Boolean tensor factorizations do not usually scale well. In this paper we present a novel algorithm for finding Boolean CP and Tucker decompositions of large and sparse binary tensors. In our experimental evaluation we show that our algorithm can handle large tensors and accurately reconstructs the latent Boolean structure.}, }
Endnote
%0 Report %A Erdős, Dóra %A Miettinen, Pauli %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Scalable Boolean Tensor Factorizations using Random Walks : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-4971-0 %U http://arxiv.org/abs/1310.4843 %D 2013 %X Tensors are becoming increasingly common in data mining, and consequently, tensor factorizations are becoming more and more important tools for data miners. When the data is binary, it is natural to ask if we can factorize it into binary factors while simultaneously making sure that the reconstructed tensor is still binary. Such factorizations, called Boolean tensor factorizations, can provide improved interpretability and find Boolean structure that is hard to express using normal factorizations. Unfortunately the algorithms for computing Boolean tensor factorizations do not usually scale well. In this paper we present a novel algorithm for finding Boolean CP and Tucker decompositions of large and sparse binary tensors. In our experimental evaluation we show that our algorithm can handle large tensors and accurately reconstructs the latent Boolean structure. %K Computer Science, Data Structures and Algorithms, cs.DS
[184]
L. Galárraga, N. Preda, and F. M. Suchanek, “Mining Rules to Align Knowledge Bases,” in AKBC’13, 22nd ACM International Conference on Information and Knowledge Management, San Francisco, CA, USA, 2013.
Export
BibTeX
@inproceedings{rosaakbc2013, TITLE = {Mining Rules to Align Knowledge Bases}, AUTHOR = {Gal{\'a}rraga, Luis and Preda, Nicoleta and Suchanek, Fabian M.}, LANGUAGE = {eng}, ISBN = {978-1-4503-2411-3}, DOI = {10.1145/2509558.2509566}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {AKBC'13, 22nd ACM International Conference on Information and Knowledge Management}, EDITOR = {Suchanek, Fabian and Riedel, Sebastian and Singh, Sameer and Talukdar, Partha P.}, PAGES = {43--48}, ADDRESS = {San Francisco, CA, USA}, }
Endnote
%0 Conference Proceedings %A Galárraga, Luis %A Preda, Nicoleta %A Suchanek, Fabian M. %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Ontologies, MPI for Informatics, Max Planck Society %T Mining Rules to Align Knowledge Bases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-B902-2 %R 10.1145/2509558.2509566 %D 2013 %B 22nd ACM International Conference on Information and Knowledge Management %Z date of event: 2013-10-27 - 2013-11-01 %C San Francisco, CA, USA %B AKBC'13 %E Suchanek, Fabian; Riedel, Sebastian; Singh, Sameer; Talukdar, Partha P. %P 43 - 48 %I ACM %@ 978-1-4503-2411-3
[185]
L. Galárraga, C. Teflioudi, K. Hose, and F. M. Suchanek, “AMIE: Association Rule Mining under Incomplete Evidence in Ontological Knowledge Bases,” in WWW’13, 22nd International Conference on World Wide Web, Rio de Janeiro, Brazil, 2013.
Export
BibTeX
@inproceedings{amie2013, TITLE = {{AMIE}: Association Rule Mining under Incomplete Evidence in Ontological Knowledge Bases}, AUTHOR = {Gal{\'a}rraga, Luis and Teflioudi, Christina and Hose, Katja and Suchanek, Fabian M.}, LANGUAGE = {eng}, ISBN = {978-1-4503-2035-1}, URL = {http://dl.acm.org/citation.cfm?id=2488388.2488425}, LOCALID = {Local-ID:C1257ACD0050F94E-F2B50FB8A380EA8EC1257B16005F42C3-amie2013}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {WWW{\textquoteright}13, 22nd International Conference on World Wide Web}, EDITOR = {Schwabe, Daniel and Almeida, Virgilio and Glaser, Hartmut and Baeza-Yates, Ricardo and Moon, Sue}, PAGES = {413--422}, ADDRESS = {Rio de Janeiro, Brazil}, }
Endnote
%0 Conference Proceedings %A Galárraga, Luis %A Teflioudi, Christina %A Hose, Katja %A Suchanek, Fabian M. %+ Ontologies, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Ontologies, MPI for Informatics, Max Planck Society %T AMIE: Association Rule Mining under Incomplete Evidence in Ontological Knowledge Bases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0014-544F-D %U http://dl.acm.org/citation.cfm?id=2488388.2488425 %F OTHER: Local-ID:C1257ACD0050F94E-F2B50FB8A380EA8EC1257B16005F42C3-amie2013 %D 2013 %B 22nd International Conference on World Wide Web %Z date of event: 2013-05-13 - 2013-05-17 %C Rio de Janeiro, Brazil %B WWW’13 %E Schwabe, Daniel; Almeida, Virgilio; Glaser, Hartmut; Baeza-Yates, Ricardo; Moon, Sue %P 413 - 422 %I ACM %@ 978-1-4503-2035-1
[186]
R. Gemulla, P. J. Haas, and W. Lehner, “Non-uniformity Issues and Workarounds in Bounded-size Sampling,” The VLDB Journal, vol. 22, no. 6, 2013.
Abstract
A variety of schemes have been proposed in the literature to speed up query processing and analytics by incrementally maintaining a bounded-size uniform sample from a dataset in the presence of a sequence of insertion, deletion, and update transactions. These algorithms vary according to whether the dataset is an ordinary set or a multiset and whether the transaction sequence consists only of insertions or can include deletions and updates. We report on subtle non-uniformity issues that we found in a number of these prior bounded-size sampling schemes, including some of our own. We provide workarounds that can avoid the non-uniformity problem; these workarounds are easy to implement and incur negligible additional cost. We also consider the impact of non-uniformity in practice and describe simple statistical tests that can help detect non-uniformity in new algorithms.
Export
BibTeX
@article{Gemulla2012, TITLE = {Non-uniformity Issues and Workarounds in Bounded-size Sampling}, AUTHOR = {Gemulla, Rainer and Haas, P. J. and Lehner, W.}, LANGUAGE = {eng}, ISSN = {1066-8888}, DOI = {10.1007/s00778-013-0307-0}, LOCALID = {Local-ID: AE61AAD9E8EE81FCC1257B0B00394134-Gemulla2012}, PUBLISHER = {Springer}, ADDRESS = {Berlin}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {A variety of schemes have been proposed in the literature to speed up query processing and analytics by incrementally maintaining a bounded-size uniform sample from a dataset in the presence of a sequence of insertion, deletion, and update transactions. These algorithms vary according to whether the dataset is an ordinary set or a multiset and whether the transaction sequence consists only of insertions or can include deletions and updates. We report on subtle non-uniformity issues that we found in a number of these prior bounded-size sampling schemes, including some of our own. We provide workarounds that can avoid the non-uniformity problem; these workarounds are easy to implement and incur negligible additional cost. We also consider the impact of non-uniformity in practice and describe simple statistical tests that can help detect non-uniformity in new algorithms.}, JOURNAL = {The VLDB Journal}, VOLUME = {22}, NUMBER = {6}, PAGES = {753--772}, }
Endnote
%0 Journal Article %A Gemulla, Rainer %A Haas, P. J. %A Lehner, W. %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Non-uniformity Issues and Workarounds in Bounded-size Sampling : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1A4D-1 %F OTHER: Local-ID: AE61AAD9E8EE81FCC1257B0B00394134-Gemulla2012 %R 10.1007/s00778-013-0307-0 %7 2013-02-14 %D 2013 %X A variety of schemes have been proposed in the literature to speed up query processing and analytics by incrementally maintaining a bounded-size uniform sample from a dataset in the presence of a sequence of insertion, deletion, and update transactions. These algorithms vary according to whether the dataset is an ordinary set or a multiset and whether the transaction sequence consists only of insertions or can include deletions and updates. We report on subtle non-uniformity issues that we found in a number of these prior bounded-size sampling schemes, including some of our own. We provide workarounds that can avoid the non-uniformity problem; these workarounds are easy to implement and incur negligible additional cost. We also consider the impact of non-uniformity in practice and describe simple statistical tests that can help detect non-uniformity in new algorithms. %K Database sampling, Reservoir sampling, Bernoulli sampling, Sample maintenance %J The VLDB Journal %V 22 %N 6 %& 753 %P 753 - 772 %I Springer %C Berlin %@ false
[187]
F. Grandoni, A. Gupta, S. Leonardi, P. Miettinen, P. Sankowski, and M. Singh, “Set Covering with Our Eyes Closed,” SIAM Journal on Computing, vol. 42, no. 3, 2013.
Abstract
Given a universe $U$ of $n$ elements and a weighted collection $\mathscr{S}$ of $m$ subsets of $U$, the universal set cover problem is to a priori map each element $u \in U$ to a set $S(u) \in \mathscr{S}$ containing $u$ such that any set $X{\subseteq U}$ is covered by $S(X)=\cup_{u\in XS(u)$. The aim is to find a mapping such that the cost of $S(X)$ is as close as possible to the optimal set cover cost for $X$. (Such problems are also called oblivious or a priori optimization problems.) Unfortunately, for every universal mapping, the cost of $S(X)$ can be $\Omega(\sqrt{n})$ times larger than optimal if the set $X$ is adversarially chosen. In this paper we study the performance on average, when $X$ is a set of randomly chosen elements from the universe: we show how to efficiently find a universal map whose expected cost is $O(\log mn)$ times the expected optimal cost. In fact, we give a slightly improved analysis and show that this is the best possible. We generalize these ideas to weighted set cover and show similar guarantees to (nonmetric) facility location, where we have to balance the facility opening cost with the cost of connecting clients to the facilities. We show applications of our results to universal multicut and disc-covering problems and show how all these universal mappings give us algorithms for the stochastic online variants of the problems with the same competitive factors.
Export
BibTeX
@article{grandoni13set, TITLE = {Set Covering with Our Eyes Closed}, AUTHOR = {Grandoni, Fabrizio and Gupta, Anupam and Leonardi, Stefano and Miettinen, Pauli and Sankowski, Piotr and Singh, Mohit}, LANGUAGE = {eng}, ISSN = {0097-5397}, DOI = {10.1137/100802888}, LOCALID = {Local-ID: 53C36AED23EF085AC1257C6A005E9F4B-grandoni13set}, PUBLISHER = {SIAM}, ADDRESS = {Philadelphia, PA}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {Given a universe $U$ of $n$ elements and a weighted collection $\mathscr{S}$ of $m$ subsets of $U$, the universal set cover problem is to a priori map each element $u \in U$ to a set $S(u) \in \mathscr{S}$ containing $u$ such that any set $X{\subseteq U}$ is covered by $S(X)=\cup_{u\in XS(u)$. The aim is to find a mapping such that the cost of $S(X)$ is as close as possible to the optimal set cover cost for $X$. (Such problems are also called oblivious or a priori optimization problems.) Unfortunately, for every universal mapping, the cost of $S(X)$ can be $\Omega(\sqrt{n})$ times larger than optimal if the set $X$ is adversarially chosen. In this paper we study the performance on average, when $X$ is a set of randomly chosen elements from the universe: we show how to efficiently find a universal map whose expected cost is $O(\log mn)$ times the expected optimal cost. In fact, we give a slightly improved analysis and show that this is the best possible. We generalize these ideas to weighted set cover and show similar guarantees to (nonmetric) facility location, where we have to balance the facility opening cost with the cost of connecting clients to the facilities. We show applications of our results to universal multicut and disc-covering problems and show how all these universal mappings give us algorithms for the stochastic online variants of the problems with the same competitive factors.}, JOURNAL = {SIAM Journal on Computing}, VOLUME = {42}, NUMBER = {3}, PAGES = {808--830}, }
Endnote
%0 Journal Article %A Grandoni, Fabrizio %A Gupta, Anupam %A Leonardi, Stefano %A Miettinen, Pauli %A Sankowski, Piotr %A Singh, Mohit %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Set Covering with Our Eyes Closed : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1C37-0 %R 10.1137/100802888 %F OTHER: Local-ID: 53C36AED23EF085AC1257C6A005E9F4B-grandoni13set %7 2013-05-09 %D 2013 %X Given a universe $U$ of $n$ elements and a weighted collection $\mathscr{S}$ of $m$ subsets of $U$, the universal set cover problem is to a priori map each element $u \in U$ to a set $S(u) \in \mathscr{S}$ containing $u$ such that any set $X{\subseteq U}$ is covered by $S(X)=\cup_{u\in XS(u)$. The aim is to find a mapping such that the cost of $S(X)$ is as close as possible to the optimal set cover cost for $X$. (Such problems are also called oblivious or a priori optimization problems.) Unfortunately, for every universal mapping, the cost of $S(X)$ can be $\Omega(\sqrt{n})$ times larger than optimal if the set $X$ is adversarially chosen. In this paper we study the performance on average, when $X$ is a set of randomly chosen elements from the universe: we show how to efficiently find a universal map whose expected cost is $O(\log mn)$ times the expected optimal cost. In fact, we give a slightly improved analysis and show that this is the best possible. We generalize these ideas to weighted set cover and show similar guarantees to (nonmetric) facility location, where we have to balance the facility opening cost with the cost of connecting clients to the facilities. We show applications of our results to universal multicut and disc-covering problems and show how all these universal mappings give us algorithms for the stochastic online variants of the problems with the same competitive factors. %J SIAM Journal on Computing %V 42 %N 3 %& 808 %P 808 - 830 %I SIAM %C Philadelphia, PA %@ false
[188]
A. Grycner, P. Ernst, A. Siu, and G. Weikum, “Knowledge Discovery on Incompatibility of Medical Concepts,” in Computational Linguistics and Intelligent Text Processing (CICLing 2013), Samos, Greece, 2013.
Abstract
This work proposes a method for automatically discovering incompatible medical concepts in text corpora. The approach is distantly supervised based on a seed set of incompatible concept pairs like symptoms or conditions that rule each other out. Two concepts are considered incompatible if their definitions match a template, and contain an antonym pair derived from WordNet, VerbOcean, or a hand-crafted lexicon. Our method creates templates from dependency parse trees of definitional texts, using seed pairs. The templates are applied to a text corpus, and the resulting candidate pairs are categorized and ranked by statistical measures. Since experiments show that the results face semantic ambiguity problems, we further cluster the results into different categories. We applied this approach to the concepts in Unified Medical Language System, Human Phenotype Ontology, and Mammalian Phenotype Ontology. Out of 77,496 definitions, 1,958 concept pairs were detected as incompatible with an average precision of 0.80.
Export
BibTeX
@inproceedings{Grycner2013, TITLE = {Knowledge Discovery on Incompatibility of Medical Concepts}, AUTHOR = {Grycner, Adam and Ernst, Patrick and Siu, Amy and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-3-642-37246-9}, DOI = {10.1007/978-3-642-37247-6_10}, LOCALID = {Local-ID: 2C3D152169C55F01C1257B160035B6E6-Grycner2013}, PUBLISHER = {Springer}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {This work proposes a method for automatically discovering incompatible medical concepts in text corpora. The approach is distantly supervised based on a seed set of incompatible concept pairs like symptoms or conditions that rule each other out. Two concepts are considered incompatible if their definitions match a template, and contain an antonym pair derived from WordNet, VerbOcean, or a hand-crafted lexicon. Our method creates templates from dependency parse trees of definitional texts, using seed pairs. The templates are applied to a text corpus, and the resulting candidate pairs are categorized and ranked by statistical measures. Since experiments show that the results face semantic ambiguity problems, we further cluster the results into different categories. We applied this approach to the concepts in Unified Medical Language System, Human Phenotype Ontology, and Mammalian Phenotype Ontology. Out of 77,496 definitions, 1,958 concept pairs were detected as incompatible with an average precision of 0.80.}, BOOKTITLE = {Computational Linguistics and Intelligent Text Processing (CICLing 2013)}, EDITOR = {Gelbukh, Alexander}, PAGES = {114--125}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {7816}, ADDRESS = {Samos, Greece}, }
Endnote
%0 Conference Proceedings %A Grycner, Adam %A Ernst, Patrick %A Siu, Amy %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Knowledge Discovery on Incompatibility of Medical Concepts : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1A54-F %F OTHER: Local-ID: 2C3D152169C55F01C1257B160035B6E6-Grycner2013 %R 10.1007/978-3-642-37247-6_10 %D 2013 %B 14th International Conference on Computational Linguistics and Intelligent Text Processing %Z date of event: 2013-03-24 - 2013-03-30 %C Samos, Greece %X This work proposes a method for automatically discovering incompatible medical concepts in text corpora. The approach is distantly supervised based on a seed set of incompatible concept pairs like symptoms or conditions that rule each other out. Two concepts are considered incompatible if their definitions match a template, and contain an antonym pair derived from WordNet, VerbOcean, or a hand-crafted lexicon. Our method creates templates from dependency parse trees of definitional texts, using seed pairs. The templates are applied to a text corpus, and the resulting candidate pairs are categorized and ranked by statistical measures. Since experiments show that the results face semantic ambiguity problems, we further cluster the results into different categories. We applied this approach to the concepts in Unified Medical Language System, Human Phenotype Ontology, and Mammalian Phenotype Ontology. Out of 77,496 definitions, 1,958 concept pairs were detected as incompatible with an average precision of 0.80. %B Computational Linguistics and Intelligent Text Processing %E Gelbukh, Alexander %P 114 - 125 %I Springer %@ 978-3-642-37246-9 %B Lecture Notes in Computer Science %N 7816
[189]
A. Gubichev, S. Bedathur, and S. Seufert, “Sparqling Kleene - Fast Property Paths in RDF-3X,” in First International Workshop on Graph Data Management Experiences and Systems (GRADES 2013), New York, NY, USA, 2013.
Export
BibTeX
@inproceedings{Gubichev2013, TITLE = {Sparqling {Kleene} -- Fast Property Paths in {RDF-3X}}, AUTHOR = {Gubichev, Andrey and Bedathur, Srikanta and Seufert, Stephan}, LANGUAGE = {eng}, ISBN = {978-1-4503-2188-4}, DOI = {10.1145/2484425.2484443}, LOCALID = {Local-ID: 2307D92E4A8D0ABFC1257C680057DFE6-Gubichev2013}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {First International Workshop on Graph Data Management Experiences and Systems (GRADES 2013)}, EDITOR = {Boncz, Peter A. and Neumann, Thomas}, PAGES = {1--7}, EID = {14}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %A Gubichev, Andrey %A Bedathur, Srikanta %A Seufert, Stephan %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Sparqling Kleene - Fast Property Paths in RDF-3X : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1A87-C %R 10.1145/2484425.2484443 %F OTHER: Local-ID: 2307D92E4A8D0ABFC1257C680057DFE6-Gubichev2013 %D 2013 %B First International Workshop on Graph Data Management Experiences and Systems %Z date of event: 2013-06-22 - 2013-06-27 %C New York, NY, USA %B First International Workshop on Graph Data Management Experiences and Systems %E Boncz, Peter A.; Neumann, Thomas %P 1 - 7 %Z sequence number: 14 %I ACM %@ 978-1-4503-2188-4
[190]
S. Gurajada, J. Kamps, A. Mishra, R. Schenkel, M. Theobald, and Q. Wang, “Overview of the INEX 2013 Linked Data Track,” in Working Notes for the CLEF 2013 Conference, Valencia, Spain, 2013.
Abstract
This paper provides an overview of the INEX Linked Data Track, which went into its second iteration in 2013.
Export
BibTeX
@inproceedings{INEX-LD-2012, TITLE = {Overview of the {INEX} 2013 Linked Data Track}, AUTHOR = {Gurajada, Sairam and Kamps, Jaap and Mishra, Arunav and Schenkel, Ralf and Theobald, Martin and Wang, Qiuyue}, LANGUAGE = {eng}, LOCALID = {Local-ID: 60E4C9459DE8213AC1257BBB003DE4C9-INEX-LD-2012}, PUBLISHER = {CLEF Initiative}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, ABSTRACT = {This paper provides an overview of the INEX Linked Data Track, which went into its second iteration in 2013.}, BOOKTITLE = {Working Notes for the CLEF 2013 Conference}, EDITOR = {Forner, Pamela and Navigli, Roberto and Tufis, Dan}, ADDRESS = {Valencia, Spain}, }
Endnote
%0 Conference Proceedings %A Gurajada, Sairam %A Kamps, Jaap %A Mishra, Arunav %A Schenkel, Ralf %A Theobald, Martin %A Wang, Qiuyue %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T Overview of the INEX 2013 Linked Data Track : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-3A84-7 %F OTHER: Local-ID: 60E4C9459DE8213AC1257BBB003DE4C9-INEX-LD-2012 %D 2013 %B CLEF 2013 Evaluation Labs and Workshop %Z date of event: 2013-09-23 - 2013-09-26 %C Valencia, Spain %X This paper provides an overview of the INEX Linked Data Track, which went into its second iteration in 2013. %B Working Notes for the CLEF 2013 Conference %E Forner, Pamela; Navigli, Roberto; Tufis, Dan %I CLEF Initiative %U http://www.clef-initiative.eu/documents/71612/2b349f08-de37-41a9-bb62-40c91f1daa0b
[191]
J. Hoffart, F. M. Suchanek, K. Berberich, and G. Weikum, “YAGO2: A Spatially and Temporally Enhanced Knowledge Base from Wikipedia: Extended Abstract,” in 23rd International Joint Conference on Artificial Intelligence (IJCAI 2013), Beijing, China, 2013.
Abstract
We present YAGO2, an extension of the YAGO knowledge base, in which entities, facts, and events are anchored in both time and space. YAGO2 is built automatically from Wikipedia, GeoNames, and WordNet. It contains 447 million facts about 9.8 million entities. Human evaluation confirmed an accuracy of 95 of the facts in YAGO2. In this paper, we present the extraction methodology and the integration of the spatio-temporal dimension.
Export
BibTeX
@inproceedings{Hoffart2013ww, TITLE = {{YAGO2:} {A} Spatially and Temporally Enhanced Knowledge Base from {Wikipedia}: Extended Abstract}, AUTHOR = {Hoffart, Johannes and Suchanek, Fabian M. and Berberich, Klaus and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-57735-633-2}, LOCALID = {Local-ID: 0F08380C815DF7A8C1257C6100731377-Hoffart2013ww}, PUBLISHER = {AAAI}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {We present YAGO2, an extension of the YAGO knowledge base, in which entities, facts, and events are anchored in both time and space. YAGO2 is built automatically from Wikipedia, GeoNames, and WordNet. It contains 447 million facts about 9.8 million entities. Human evaluation confirmed an accuracy of 95 of the facts in YAGO2. In this paper, we present the extraction methodology and the integration of the spatio-temporal dimension.}, BOOKTITLE = {23rd International Joint Conference on Artificial Intelligence (IJCAI 2013)}, PAGES = {3161--3165}, ADDRESS = {Beijing, China}, }
Endnote
%0 Conference Proceedings %A Hoffart, Johannes %A Suchanek, Fabian M. %A Berberich, Klaus %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T YAGO2: A Spatially and Temporally Enhanced Knowledge Base from Wikipedia: Extended Abstract : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1AC8-9 %F OTHER: Local-ID: 0F08380C815DF7A8C1257C6100731377-Hoffart2013ww %D 2013 %B 23rd International Joint Conference on Artificial Intelligence %Z date of event: 2013-08-03 - 2013-08-09 %C Beijing, China %X We present YAGO2, an extension of the YAGO knowledge base, in which entities, facts, and events are anchored in both time and space. YAGO2 is built automatically from Wikipedia, GeoNames, and WordNet. It contains 447 million facts about 9.8 million entities. Human evaluation confirmed an accuracy of 95 of the facts in YAGO2. In this paper, we present the extraction methodology and the integration of the spatio-temporal dimension. %B 23rd International Joint Conference on Artificial Intelligence %P 3161 - 3165 %I AAAI %@ 978-1-57735-633-2 %U http://ijcai.org/papers13/Papers/IJCAI13-478.pdf
[192]
J. Hoffart, “Discovering and Disambiguating Named Entities in Text,” in SIGMOD’13 PhD Symposium, New York, NY, USA, 2013.
Abstract
Disambiguating named entities in natural language texts maps ambiguous names to canonical entities registered in a knowledge base such as DBpedia, Freebase, or YAGO. Knowing the specific entity is an important asset for several other tasks, e.g. entity-based information retrieval or higher-level information extraction. Our approach to named entity disambiguation makes use of several ingredients: the prior probability of an entity being mentioned, the similarity between the context of the mention in the text and an entity, as well as the coherence among the entities. Extending this method, we present a novel and highly efficient measure to compute the semantic coherence between entities. This measure is especially powerful for long-tail entities or such entities that are not yet present in the knowledge base. Reliably identifying names in the input text that are not part of the knowledge base is the current focus of our work.
Export
BibTeX
@inproceedings{Hoffart2013wk, TITLE = {Discovering and Disambiguating Named Entities in Text}, AUTHOR = {Hoffart, Johannes}, LANGUAGE = {eng}, ISBN = {978-1-4503-2155-6}, DOI = {10.1145/2483574.2483582}, LOCALID = {Local-iD: CA2056C02ACB8EDDC1257C6100744944-Hoffart2013wk}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {Disambiguating named entities in natural language texts maps ambiguous names to canonical entities registered in a knowledge base such as DBpedia, Freebase, or YAGO. Knowing the specific entity is an important asset for several other tasks, e.g. entity-based information retrieval or higher-level information extraction. Our approach to named entity disambiguation makes use of several ingredients: the prior probability of an entity being mentioned, the similarity between the context of the mention in the text and an entity, as well as the coherence among the entities. Extending this method, we present a novel and highly efficient measure to compute the semantic coherence between entities. This measure is especially powerful for long-tail entities or such entities that are not yet present in the knowledge base. Reliably identifying names in the input text that are not part of the knowledge base is the current focus of our work.}, BOOKTITLE = {SIGMOD{\textquoteright}13 PhD Symposium}, EDITOR = {Lei, Chen and Dong, Xin Luna}, PAGES = {43--48}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %A Hoffart, Johannes %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Discovering and Disambiguating Named Entities in Text : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1A8E-D %F OTHER: Local-iD: CA2056C02ACB8EDDC1257C6100744944-Hoffart2013wk %R 10.1145/2483574.2483582 %D 2013 %B SIGMOD/PODS PhD Symposium %Z date of event: 2013-06-23 - 2013-06-23 %C New York, NY, USA %X Disambiguating named entities in natural language texts maps ambiguous names to canonical entities registered in a knowledge base such as DBpedia, Freebase, or YAGO. Knowing the specific entity is an important asset for several other tasks, e.g. entity-based information retrieval or higher-level information extraction. Our approach to named entity disambiguation makes use of several ingredients: the prior probability of an entity being mentioned, the similarity between the context of the mention in the text and an entity, as well as the coherence among the entities. Extending this method, we present a novel and highly efficient measure to compute the semantic coherence between entities. This measure is especially powerful for long-tail entities or such entities that are not yet present in the knowledge base. Reliably identifying names in the input text that are not part of the knowledge base is the current focus of our work. %B SIGMOD’13 PhD Symposium %E Lei, Chen; Dong, Xin Luna %P 43 - 48 %I ACM %@ 978-1-4503-2155-6
[193]
J. Hoffart, F. M. Suchanek, K. Berberich, and G. Weikum, “YAGO2: A Spatially and Temporally Enhanced Knowledge Base from Wikipedia,” Artificial Intelligence, vol. 194, 2013.
Export
BibTeX
@article{yago2aij2013, TITLE = {{YAGO2}: A Spatially and Temporally Enhanced Knowledge Base from {Wikipedia}}, AUTHOR = {Hoffart, Johannes and Suchanek, Fabian M. and Berberich, Klaus and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {0004-3702}, URL = {http://www.sciencedirect.com/science/article/pii/S0004370212000719}, DOI = {10.1016/j.artint.2012.06.001}, LOCALID = {Local-ID:C1257ACD0050F94E-8D0B6EF25CD7906FC1257B1600621DB6-yago2@aij2013}, PUBLISHER = {Elsevier}, ADDRESS = {Amsterdam}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, JOURNAL = {Artificial Intelligence}, VOLUME = {194}, PAGES = {28--61}, }
Endnote
%0 Journal Article %A Hoffart, Johannes %A Suchanek, Fabian M. %A Berberich, Klaus %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Ontologies, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T YAGO2: A Spatially and Temporally Enhanced Knowledge Base from Wikipedia : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0014-543A-C %R 10.1016/j.artint.2012.06.001 %U http://www.sciencedirect.com/science/article/pii/S0004370212000719 %F OTHER: Local-ID:C1257ACD0050F94E-8D0B6EF25CD7906FC1257B1600621DB6-yago2@aij2013 %7 2012-06-18 %D 2013 %J Artificial Intelligence %O AI %V 194 %& 28 %P 28 - 61 %I Elsevier %C Amsterdam %@ false
[194]
K. Hose and R. Schenkel, “WARP: Workload-Aware Replication and Partitioning for RDF,” in 4th International Workshop on Data Engineering meets Semantic Web (DESWeb 2013), Brisbane, Australia, 2013.
Export
BibTeX
@inproceedings{HoseSchenkel_DESWeb2013, TITLE = {{WARP}: Workload-Aware Replication and Partitioning for {RDF}}, AUTHOR = {Hose, Katja and Schenkel, Ralf}, LANGUAGE = {eng}, ISBN = {978-1-4673-5303-8}, DOI = {10.1109/ICDEW.2013.6547414}, LOCALID = {Local-ID: 17425053968C448EC1257AD100350E0C-HoseSchenkel_DESWeb2013}, PUBLISHER = {IEEE}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {4th International Workshop on Data Engineering meets Semantic Web (DESWeb 2013)}, PAGES = {1--6}, ADDRESS = {Brisbane, Australia}, }
Endnote
%0 Conference Proceedings %A Hose, Katja %A Schenkel, Ralf %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T WARP: Workload-Aware Replication and Partitioning for RDF : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1A99-4 %F OTHER: Local-ID: 17425053968C448EC1257AD100350E0C-HoseSchenkel_DESWeb2013 %R 10.1109/ICDEW.2013.6547414 %D 2013 %B 4th International Workshop on Data Engineering meets Semantic Web %Z date of event: 2013-04-08 - 2013-04-12 %C Brisbane, Australia %B 4th International Workshop on Data Engineering meets Semantic Web %P 1 - 6 %I IEEE %@ 978-1-4673-5303-8
[195]
T. Huet, J. Biega, and F. Suchanek, “Mining History with Le Monde,” in AKBC’13, 22nd ACM International Conference on Information and Knowledge Management, San Francisco, CA, USA, 2013.
Export
BibTeX
@inproceedings{Huet:2013, TITLE = {Mining History with Le Monde}, AUTHOR = {Huet, Thomas and Biega, Joanna and Suchanek, Fabian}, LANGUAGE = {eng}, ISBN = {978-1-4503-2411-3}, DOI = {10.1145/2509558.2509567}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {AKBC'13, 22nd ACM International Conference on Information and Knowledge Management}, EDITOR = {Suchanek, Fabian and Riedel, Sebastian and Singh, Sameer and Talukdar, Partha P.}, PAGES = {49--54}, ADDRESS = {San Francisco, CA, USA}, }
Endnote
%0 Conference Proceedings %A Huet, Thomas %A Biega, Joanna %A Suchanek, Fabian %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Mining History with Le Monde : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5149-B %R 10.1145/2509558.2509567 %D 2013 %8 27.10.2013 %B 22nd ACM International Conference on Information and Knowledge Management %Z date of event: 2013-10-27 - 2013-11-01 %C San Francisco, CA, USA %K culturomics, knowledge base, le monde, yago %B AKBC'13 %E Suchanek, Fabian; Riedel, Sebastian; Singh, Sameer; Talukdar, Partha P. %P 49 - 54 %I ACM %@ 978-1-4503-2411-3
[196]
E. Ilieva, “Analyzing and Creating Top-k Entity Rankings,” Universität des Saarlandes, Saarbrücken, 2013.
Export
BibTeX
@mastersthesis{Ilieva2013, TITLE = {Analyzing and Creating Top-k Entity Rankings}, AUTHOR = {Ilieva, Evica}, LANGUAGE = {eng}, LOCALID = {Local-ID: DDA2710C9D0C5B92C1257BF00027BC81-Ilieva2013z}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, }
Endnote
%0 Thesis %A Ilieva, Evica %Y Michel, Sebastian %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Analyzing and Creating Top-k Entity Rankings : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1AC1-8 %F OTHER: Local-ID: DDA2710C9D0C5B92C1257BF00027BC81-Ilieva2013z %I Universität des Saarlandes %C Saarbrücken %D 2013 %P 67 p. %V master %9 master
[197]
E. Ilieva, S. Michel, and A. Stupar, “The Essence of Knowledge (bases) Through Entity Rankings,” in CIKM’13, 22nd ACM International Conference of Information & Knowledge Management, San Francisco, CA, USA, 2013.
Abstract
We consider the task of automatically phrasing and computing top-k rankings over the information contained in common knowledge bases (KBs), such as YAGO or DBPedia. We assemble the thematic focus and ranking criteria of rankings by inspecting the present Subject, Predicate, Object (SPO) triples. Making use of numerical attributes contained in the KB we are also able to compute the actual ranking content, i.e., entities and their performances. We further discuss the integration of existing rankings into the ranking generation process for increased coverage and ranking quality. We report on first results obtained using the YAGO knowledge base.
Export
BibTeX
@inproceedings{Ilieva2013z, TITLE = {The Essence of Knowledge (bases) Through Entity Rankings}, AUTHOR = {Ilieva, Evica and Michel, Sebastian and Stupar, Aleksandar}, LANGUAGE = {eng}, ISBN = {978-1-4503-2263-8}, DOI = {10.1145/2505515.2507838}, LOCALID = {Local-ID: 62BCC454FD2DBDEEC1257C690042AF3D-Ilieva2013z}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {We consider the task of automatically phrasing and computing top-k rankings over the information contained in common knowledge bases (KBs), such as YAGO or DBPedia. We assemble the thematic focus and ranking criteria of rankings by inspecting the present Subject, Predicate, Object (SPO) triples. Making use of numerical attributes contained in the KB we are also able to compute the actual ranking content, i.e., entities and their performances. We further discuss the integration of existing rankings into the ranking generation process for increased coverage and ranking quality. We report on first results obtained using the YAGO knowledge base.}, BOOKTITLE = {CIKM'13, 22nd ACM International Conference of Information \& Knowledge Management}, EDITOR = {Nejdl, Wolfgang and Pei, Jian and Rastogi, Rajeev}, PAGES = {1537--1540}, ADDRESS = {San Francisco, CA, USA}, }
Endnote
%0 Conference Proceedings %A Ilieva, Evica %A Michel, Sebastian %A Stupar, Aleksandar %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T The Essence of Knowledge (bases) Through Entity Rankings : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1A33-A %R 10.1145/2505515.2507838 %F OTHER: Local-ID: 62BCC454FD2DBDEEC1257C690042AF3D-Ilieva2013z %D 2013 %B 22nd ACM International Conference of Information & Knowledge Management %Z date of event: 2013-10-27 - 2013-11-01 %C San Francisco, CA, USA %X We consider the task of automatically phrasing and computing top-k rankings over the information contained in common knowledge bases (KBs), such as YAGO or DBPedia. We assemble the thematic focus and ranking criteria of rankings by inspecting the present Subject, Predicate, Object (SPO) triples. Making use of numerical attributes contained in the KB we are also able to compute the actual ranking content, i.e., entities and their performances. We further discuss the integration of existing rankings into the ranking generation process for increased coverage and ranking quality. We report on first results obtained using the YAGO knowledge base. %B CIKM'13 %E Nejdl, Wolfgang; Pei, Jian; Rastogi, Rajeev %P 1537 - 1540 %I ACM %@ 978-1-4503-2263-8
[198]
L. Jiang, P. Luo, J. Wang, Y. Xiong, B. Lin, M. Wang, and N. An, “GRIAS: An Entity-Relation Graph Based Framework For Discovering Entity Aliases,” in IEEE 13th International Conference on Data Mining (ICDM 2013), Dallas, TX, USA, 2013.
Export
BibTeX
@inproceedings{Jiang2013y, TITLE = {{GRIAS}: An Entity-Relation Graph Based Framework For Discovering Entity Aliases}, AUTHOR = {Jiang, Lili and Luo, Ping and Wang, Jianyong and Xiong, Yuhong and Lin, Binduan and Wang, Min and An, Ning}, LANGUAGE = {eng}, ISSN = {1550-4786}, DOI = {10.1109/ICDM.2013.50}, LOCALID = {Local-ID: C5190A26C9118030C1257C68005B209A-Jiang2013y}, PUBLISHER = {IEEE}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {IEEE 13th International Conference on Data Mining (ICDM 2013)}, PAGES = {310--319}, ADDRESS = {Dallas, TX, USA}, }
Endnote
%0 Conference Proceedings %A Jiang, Lili %A Luo, Ping %A Wang, Jianyong %A Xiong, Yuhong %A Lin, Binduan %A Wang, Min %A An, Ning %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations %T GRIAS: An Entity-Relation Graph Based Framework For Discovering Entity Aliases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1ADF-8 %R 10.1109/ICDM.2013.50 %F OTHER: Local-ID: C5190A26C9118030C1257C68005B209A-Jiang2013y %D 2013 %B 13th International Conference on Data Mining %Z date of event: 2013-12-07 - 2013-12-10 %C Dallas, TX, USA %B IEEE 13th International Conference on Data Mining %P 310 - 319 %I IEEE %@ false
[199]
L. Jiang, Y. Wang, J. Hoffart, and G. Weikum, “Crowdsourced Entity Markup,” in Proceedings of the 1st International Workshop on Crowdsourcing the Semantic Web co-located with 12th International Semantic Web Conference (ISWC 2013), Sydney, Australia, 2013.
Export
BibTeX
@inproceedings{Jiang2013, TITLE = {Crowdsourced Entity Markup}, AUTHOR = {Jiang, Lili and Wang, Yafang and Hoffart, Johannes and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {1613-0073}, URL = {urn:nbn:de:0074-1030-0}, LOCALID = {Local-ID: 4A6F03891D73CF9DC1257C68005859B2-Jiang2013}, PUBLISHER = {CEUR-WS.org}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the 1st International Workshop on Crowdsourcing the Semantic Web co-located with 12th International Semantic Web Conference (ISWC 2013)}, EDITOR = {Acosta, Maribel and Aroyo, Lora and Bernstein, Abraham and Lehmann, Jens and Noy, Natasha and Simperl, Elena}, PAGES = {65--68}, SERIES = {CEUR Workshop Proceedings}, EDITOR = {Acosta, Maribel}, VOLUME = {1030}, PAGES = {59--68}, ADDRESS = {Sydney, Australia}, }
Endnote
%0 Conference Proceedings %A Jiang, Lili %A Wang, Yafang %A Hoffart, Johannes %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Crowdsourced Entity Markup : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1AD0-6 %F OTHER: Local-ID: 4A6F03891D73CF9DC1257C68005859B2-Jiang2013 %U urn:nbn:de:0074-1030-0 %D 2013 %8 06.09.2013 %B 1st International Workshop on Crowdsourcing the Semantic Web co-located with 12th International Semantic Web Conference %Z date of event: 2013-10-21 - 2013-10-25 %C Sydney, Australia %B Proceedings of the 1st International Workshop on Crowdsourcing the Semantic Web co-located with 12th International Semantic Web Conference %E Acosta, Maribel; Aroyo, Lora; Bernstein, Abraham; Lehmann, Jens; Noy, Natasha; Simperl, Elena %P 65 - 68 %I CEUR-WS.org %B CEUR Workshop Proceedings %Y Acosta, Maribel %N 1030 %P 59 - 68 %@ false
[200]
S. Karaev, “Matrix Factorization over Max-times Algebra for Data Mining,” Universität des Saarlandes, Saarbrücken, 2013.
Export
BibTeX
@mastersthesis{KaraevMaster2013, TITLE = {Matrix Factorization over Max-times Algebra for Data Mining}, AUTHOR = {Karaev, Sanjar}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, }
Endnote
%0 Thesis %A Karaev, Sanjar %Y Miettinen, Pauli %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Matrix Factorization over Max-times Algebra for Data Mining : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-9DD1-8 %I Universität des Saarlandes %C Saarbrücken %D 2013 %P X, 57 p. %V master %9 master
[201]
S. K. Kondreddi, P. Triantafillou, and G. Weikum, “Human Computing Games for Knowledge Acquisition,” in CIKM’13, 22nd ACM International Conference on Information & Knowledge Management, San Francisco, CA, USA, 2013.
Abstract
Automatic information extraction techniques for knowledge acquisition are known to produce noise, incomplete or incorrect facts from textual sources. Human computing offers a natural alternative to expand and complement the output of automated information extraction methods, thereby enabling us to build high-quality knowledge bases. However, relying solely on human inputs for extraction can be prohibitively expensive in practice. We demonstrate human computing games for knowledge acquisition that employ human computing to overcome the limitations in automated fact acquisition methods. We provide a combined approach that tightly integrates automated extraction techniques with human computing for effective gathering of facts. The methods we provide gather facts in the form of relationships between entities. The games we demonstrate are specifically designed to capture hard-to-extract relations between entities in narrative text -- a task that automated systems find challenging.
Export
BibTeX
@inproceedings{Kondreddi2013b, TITLE = {Human Computing Games for Knowledge Acquisition}, AUTHOR = {Kondreddi, Sarath Kumar and Triantafillou, Peter and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-2263-8}, DOI = {10.1145/2505515.2508213}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {Automatic information extraction techniques for knowledge acquisition are known to produce noise, incomplete or incorrect facts from textual sources. Human computing offers a natural alternative to expand and complement the output of automated information extraction methods, thereby enabling us to build high-quality knowledge bases. However, relying solely on human inputs for extraction can be prohibitively expensive in practice. We demonstrate human computing games for knowledge acquisition that employ human computing to overcome the limitations in automated fact acquisition methods. We provide a combined approach that tightly integrates automated extraction techniques with human computing for effective gathering of facts. The methods we provide gather facts in the form of relationships between entities. The games we demonstrate are specifically designed to capture hard-to-extract relations between entities in narrative text -- a task that automated systems find challenging.}, BOOKTITLE = {CIKM'13, 22nd ACM International Conference on Information \& Knowledge Management}, EDITOR = {Nejdl, Wolfgang and Pei, Jian and Rastogi, Rajeev}, PAGES = {2513--2516}, ADDRESS = {San Francisco, CA, USA}, }
Endnote
%0 Conference Proceedings %A Kondreddi, Sarath Kumar %A Triantafillou, Peter %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Human Computing Games for Knowledge Acquisition : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1C65-8 %@ 978-1-4503-2263-8 %R 10.1145/2505515.2508213 %D 2013 %B 22nd ACM International Conference on Information & Knowledge Management %Z date of event: 2013-10-27 - 2013-11-01 %C San Francisco, CA, USA %X Automatic information extraction techniques for knowledge acquisition are known to produce noise, incomplete or incorrect facts from textual sources. Human computing offers a natural alternative to expand and complement the output of automated information extraction methods, thereby enabling us to build high-quality knowledge bases. However, relying solely on human inputs for extraction can be prohibitively expensive in practice. We demonstrate human computing games for knowledge acquisition that employ human computing to overcome the limitations in automated fact acquisition methods. We provide a combined approach that tightly integrates automated extraction techniques with human computing for effective gathering of facts. The methods we provide gather facts in the form of relationships between entities. The games we demonstrate are specifically designed to capture hard-to-extract relations between entities in narrative text -- a task that automated systems find challenging. %B CIKM'13 %E Nejdl, Wolfgang; Pei, Jian; Rastogi, Rajeev %P 2513 - 2516 %I ACM
[202]
S. K. Kondreddi, P. Triantafillou, and G. Weikum, “HIGGINS: Knowledge Acquisition Meets the Crowds,” in WWW’13, 22nd International Conference on World Wide Web, Rio de Janeiro, Brazil, 2013.
Abstract
We present HIGGINS, a system for \em Knowledge Acquisition (KA)}, placing emphasis on its architecture. The distinguishing characteristic and novelty of HIGGINS lies in its blending of two engines: an automated {\em Information Extraction (IE)} engine, aided by {\em semantic resources} and {\em statistics}, and a game-based {\em Human Computing (HC) engine. We focus on KA from web pages and text sources and, in particular, on deriving relationships between entities. As a running application we utilize movie narratives, from which we wish to derive relationships among movie characters.
Export
BibTeX
@inproceedings{Kondreddi2013a, TITLE = {{HIGGINS}: Knowledge Acquisition Meets the Crowds}, AUTHOR = {Kondreddi, Sarath Kumar and Triantafillou, Peter and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-2038-2}, URL = {http://dl.acm.org/citation.cfm?id=2487788.2487825}, LOCALID = {Local-ID: 6A913522403405EBC1257B3A003B5625-Kondreddi2013a}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {We present HIGGINS, a system for \em Knowledge Acquisition (KA)}, placing emphasis on its architecture. The distinguishing characteristic and novelty of HIGGINS lies in its blending of two engines: an automated {\em Information Extraction (IE)} engine, aided by {\em semantic resources} and {\em statistics}, and a game-based {\em Human Computing (HC) engine. We focus on KA from web pages and text sources and, in particular, on deriving relationships between entities. As a running application we utilize movie narratives, from which we wish to derive relationships among movie characters.}, BOOKTITLE = {WWW'13, 22nd International Conference on World Wide Web}, EDITOR = {Schwabe, Daniel and Almeida, Virgilio and Glaser, Hartmut and Baeza-Yates, Ricardo and Moon, Sue}, PAGES = {85--86}, ADDRESS = {Rio de Janeiro, Brazil}, }
Endnote
%0 Conference Proceedings %A Kondreddi, Sarath Kumar %A Triantafillou, Peter %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T HIGGINS: Knowledge Acquisition Meets the Crowds : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1B79-4 %F OTHER: Local-ID: 6A913522403405EBC1257B3A003B5625-Kondreddi2013a %U http://dl.acm.org/citation.cfm?id=2487788.2487825 %D 2013 %B 22nd International Conference on World Wide Web %Z date of event: 2013-05-13 - 2013-05-17 %C Rio de Janeiro, Brazil %X We present HIGGINS, a system for \em Knowledge Acquisition (KA)}, placing emphasis on its architecture. The distinguishing characteristic and novelty of HIGGINS lies in its blending of two engines: an automated {\em Information Extraction (IE)} engine, aided by {\em semantic resources} and {\em statistics}, and a game-based {\em Human Computing (HC) engine. We focus on KA from web pages and text sources and, in particular, on deriving relationships between entities. As a running application we utilize movie narratives, from which we wish to derive relationships among movie characters. %B WWW'13 %E Schwabe, Daniel; Almeida, Virgilio; Glaser, Hartmut; Baeza-Yates, Ricardo; Moon, Sue %P 85 - 86 %I ACM %@ 978-1-4503-2038-2
[203]
K.-N. Kontonasios, J. Vreeken, and T. De Bie, “Maximum Entropy Models for Iteratively Identifying Subjectively Interesting Structure in Real-valued Data,” in Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2013), Prague, Czech Republic, 2013.
Export
BibTeX
@inproceedings{Konto2013a, TITLE = {Maximum Entropy Models for Iteratively Identifying Subjectively Interesting Structure in Real-valued Data}, AUTHOR = {Kontonasios, Kleanthis-Nikolaos and Vreeken, Jilles and De Bie, Tijl}, LANGUAGE = {eng}, ISBN = {978-3-642-33485-6}, DOI = {10.1007/978-3-642-40991-2_17}, LOCALID = {Local-ID: ED5813E38D4C4066C1257C6000547D94-Konto2013a}, PUBLISHER = {Springer}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2013)}, EDITOR = {Blockeel, Hendrik and Kersting, Kristian and Nijssen, Siegfried and {\v Z}elenzn{\'y}, Filip}, PAGES = {256--271}, SERIES = {Lecture Notes in Artificial Intelligence}, VOLUME = {8189}, ADDRESS = {Prague, Czech Republic}, }
Endnote
%0 Conference Proceedings %A Kontonasios, Kleanthis-Nikolaos %A Vreeken, Jilles %A De Bie, Tijl %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Maximum Entropy Models for Iteratively Identifying Subjectively Interesting Structure in Real-valued Data : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1CCC-4 %F OTHER: Local-ID: ED5813E38D4C4066C1257C6000547D94-Konto2013a %R 10.1007/978-3-642-40991-2_17 %D 2013 %B European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases %Z date of event: 2013-09-23 - 2013-09-27 %C Prague, Czech Republic %B Machine Learning and Knowledge Discovery in Databases %E Blockeel, Hendrik; Kersting, Kristian; Nijssen, Siegfried; Želenzný, Filip %P 256 - 271 %I Springer %@ 978-3-642-33485-6 %B Lecture Notes in Artificial Intelligence %N 8189
[204]
F. Makari, B. Awerbuch, R. Gemula, R. Khandekar, J. Mestre, and M. Sozio, “A Distributed Algorithm for Large-scale Generalized Matching,” Proceedings of the VLDB Endowment (Proc. VLDB 2013), vol. 6, no. 9, 2013.
Export
BibTeX
@article{MakariAGKMS13, TITLE = {A Distributed Algorithm for Large-scale Generalized Matching}, AUTHOR = {Makari, Faraz and Awerbuch, Baruch and Gemula, Rainer and Khandekar, Rohit and Mestre, Juli{\'a}n and Sozio, Mauro}, LANGUAGE = {eng}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, JOURNAL = {Proceedings of the VLDB Endowment (Proc. VLDB)}, VOLUME = {6}, NUMBER = {9}, PAGES = {613--624}, BOOKTITLE = {Proceedings of the 39th International Conference on Very Large Data Bases (VLDB 2013)}, EDITOR = {B{\"o}hlen, Michael and Koch, Christoph}, }
Endnote
%0 Journal Article %A Makari, Faraz %A Awerbuch, Baruch %A Gemula, Rainer %A Khandekar, Rohit %A Mestre, Julián %A Sozio, Mauro %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations Algorithms and Complexity, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T A Distributed Algorithm for Large-scale Generalized Matching : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-9CB4-1 %7 2013 %D 2013 %J Proceedings of the VLDB Endowment %O PVLDB %V 6 %N 9 %& 613 %P 613 - 624 %I ACM %C New York, NY %B Proceedings of the 39th International Conference on Very Large Data Bases %O August 26th - 30th 2013, Riva del Garda, Trento, Italy VLDB 2013 %U http://www.vldb.org/pvldb/vol6/p613-makarimanshadi.pdf
[205]
F. Makari and R. Gemulla, “A Distributed Approximation Algorithm for Mixed Packing-covering Linear Programs,” in Proceedings of the NIPS Workshop on Big Learning, Lake Tahoe, NV, USA, 2013.
Export
BibTeX
@inproceedings{MakariG13, TITLE = {A Distributed Approximation Algorithm for Mixed Packing-covering Linear Programs}, AUTHOR = {Makari, Faraz and Gemulla, Rainer}, LANGUAGE = {eng}, URL = {http://biglearn.org/2013/files/papers/biglearning2013_submission_14.pdf}, PUBLISHER = {NIPS}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the NIPS Workshop on Big Learning}, ADDRESS = {Lake Tahoe, NV, USA}, }
Endnote
%0 Conference Proceedings %A Makari, Faraz %A Gemulla, Rainer %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T A Distributed Approximation Algorithm for Mixed Packing-covering Linear Programs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-9CC6-A %U http://biglearn.org/2013/files/papers/biglearning2013_submission_14.pdf %D 2013 %B NIPS 2013 Workshop on Big Learning %Z date of event: 2013-12-09 - 2013-12-09 %C Lake Tahoe, NV, USA %B Proceedings of the NIPS Workshop on Big Learning %I NIPS
[206]
F. Makari, B. Awerbuch, R. Gemulla, R. Khandekar, J. Mestre, and M. Sozio, “A Distributed Algorithm for Large-scale Generalized Matching,” Max-Planck-Institut für Informatik, Saarbrücken, MPI-I-2013-5-002, 2013.
Abstract
Generalized matching problems arise in a number of applications, including computational advertising, recommender systems, and trade markets. Consider, for example, the problem of recommending multimedia items (e.g., DVDs) to users such that (1) users are recommended items that they are likely to be interested in, (2) every user gets neither too few nor too many recommendations, and (3) only items available in stock are recommended to users. State-of-the-art matching algorithms fail at coping with large real-world instances, which may involve millions of users and items. We propose the first distributed algorithm for computing near-optimal solutions to large-scale generalized matching problems like the one above. Our algorithm is designed to run on a small cluster of commodity nodes (or in a MapReduce environment), has strong approximation guarantees, and requires only a poly-logarithmic number of passes over the input. In particular, we propose a novel distributed algorithm to approximately solve mixed packing-covering linear programs, which include but are not limited to generalized matching problems. Experiments on real-world and synthetic data suggest that our algorithm scales to very large problem sizes and can be orders of magnitude faster than alternative approaches.
Export
BibTeX
@techreport{MakariAwerbuchGemullaKhandekarMestreSozio2013, TITLE = {A Distributed Algorithm for Large-scale Generalized Matching}, AUTHOR = {Makari, Faraz and Awerbuch, Baruch and Gemulla, Rainer and Khandekar, Rohit and Mestre, Julian and Sozio, Mauro}, LANGUAGE = {eng}, ISSN = {0946-011X}, NUMBER = {MPI-I-2013-5-002}, INSTITUTION = {Max-Planck-Institut f{\"u}r Informatik}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Generalized matching problems arise in a number of applications, including computational advertising, recommender systems, and trade markets. Consider, for example, the problem of recommending multimedia items (e.g., DVDs) to users such that (1) users are recommended items that they are likely to be interested in, (2) every user gets neither too few nor too many recommendations, and (3) only items available in stock are recommended to users. State-of-the-art matching algorithms fail at coping with large real-world instances, which may involve millions of users and items. We propose the first distributed algorithm for computing near-optimal solutions to large-scale generalized matching problems like the one above. Our algorithm is designed to run on a small cluster of commodity nodes (or in a MapReduce environment), has strong approximation guarantees, and requires only a poly-logarithmic number of passes over the input. In particular, we propose a novel distributed algorithm to approximately solve mixed packing-covering linear programs, which include but are not limited to generalized matching problems. Experiments on real-world and synthetic data suggest that our algorithm scales to very large problem sizes and can be orders of magnitude faster than alternative approaches.}, TYPE = {Research Reports}, }
Endnote
%0 Report %A Makari, Faraz %A Awerbuch, Baruch %A Gemulla, Rainer %A Khandekar, Rohit %A Mestre, Julian %A Sozio, Mauro %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Algorithms and Complexity, MPI for Informatics, Max Planck Society Algorithms and Complexity, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T A Distributed Algorithm for Large-scale Generalized Matching : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-03B4-3 %Y Max-Planck-Institut für Informatik %C Saarbrücken %D 2013 %P 39 p. %X Generalized matching problems arise in a number of applications, including computational advertising, recommender systems, and trade markets. Consider, for example, the problem of recommending multimedia items (e.g., DVDs) to users such that (1) users are recommended items that they are likely to be interested in, (2) every user gets neither too few nor too many recommendations, and (3) only items available in stock are recommended to users. State-of-the-art matching algorithms fail at coping with large real-world instances, which may involve millions of users and items. We propose the first distributed algorithm for computing near-optimal solutions to large-scale generalized matching problems like the one above. Our algorithm is designed to run on a small cluster of commodity nodes (or in a MapReduce environment), has strong approximation guarantees, and requires only a poly-logarithmic number of passes over the input. In particular, we propose a novel distributed algorithm to approximately solve mixed packing-covering linear programs, which include but are not limited to generalized matching problems. Experiments on real-world and synthetic data suggest that our algorithm scales to very large problem sizes and can be orders of magnitude faster than alternative approaches. %B Research Reports %@ false
[207]
M. Mampaey and J. Vreeken, “Summarizing Categorical Data by Clustering Attributes,” Data Mining and Knowledge Discovery, vol. 26, no. 1, 2013.
Export
BibTeX
@article{Mampaey2013a, TITLE = {Summarizing Categorical Data by Clustering Attributes}, AUTHOR = {Mampaey, Michael and Vreeken, Jilles}, LANGUAGE = {eng}, ISSN = {1384-5810}, DOI = {10.1007/s10618-011-0246-6}, LOCALID = {Local-ID: 4366BFBB9FB411E9C1257C6000528295-Mampaey2013a}, PUBLISHER = {Springer}, ADDRESS = {Berlin}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, JOURNAL = {Data Mining and Knowledge Discovery}, VOLUME = {26}, NUMBER = {1}, PAGES = {130--173}, }
Endnote
%0 Journal Article %A Mampaey, Michael %A Vreeken, Jilles %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Summarizing Categorical Data by Clustering Attributes : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1CD3-1 %R 10.1007/s10618-011-0246-6 %F OTHER: Local-ID: 4366BFBB9FB411E9C1257C6000528295-Mampaey2013a %7 2013-01 %D 2013 %J Data Mining and Knowledge Discovery %V 26 %N 1 %& 130 %P 130 - 173 %I Springer %C Berlin %@ false
[208]
S. Metzger, R. Schenkel, and M. Sydow, “QBEES: Query by Entity Examples,” in CIKM’13, 22nd ACM International Conference on Information & Knowledge Management, San Francisco, CA, USA, 2013.
Abstract
Structured knowledge bases are an increasingly important way for storing and retrieving information. Within such knowledge bases, an important search task is finding similar entities based on one or more example entities. We present QBEES, a novel framework for defining entity similarity based only on structural features, so-called aspects, of the entities, that includes query-dependent and query-independent entity ranking components. We present evaluation results with a number of existing entity list completion benchmarks, comparing to several state-of-the-art baselines.
Export
BibTeX
@inproceedings{MetzgerSS_CIKM2013, TITLE = {{QBEES}: Query by Entity Examples}, AUTHOR = {Metzger, Steffen and Schenkel, Ralf and Sydow, Marcin}, LANGUAGE = {eng}, ISBN = {978-1-4503-2263-8}, DOI = {10.1145/2505515.2507873}, LOCALID = {Local-ID: D07B27BEBFE9E7D8C1257BB10024BD7C-MetzgerSS_CIKM2013}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {Structured knowledge bases are an increasingly important way for storing and retrieving information. Within such knowledge bases, an important search task is finding similar entities based on one or more example entities. We present QBEES, a novel framework for defining entity similarity based only on structural features, so-called aspects, of the entities, that includes query-dependent and query-independent entity ranking components. We present evaluation results with a number of existing entity list completion benchmarks, comparing to several state-of-the-art baselines.}, BOOKTITLE = {CIKM'13, 22nd ACM International Conference on Information \& Knowledge Management}, EDITOR = {Nejdl, Wolfgang and Pei, Jian and Rastogi, Rajeev}, PAGES = {1829--1832}, ADDRESS = {San Francisco, CA, USA}, }
Endnote
%0 Conference Proceedings %A Metzger, Steffen %A Schenkel, Ralf %A Sydow, Marcin %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T QBEES: Query by Entity Examples : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1D0B-E %F OTHER: Local-ID: D07B27BEBFE9E7D8C1257BB10024BD7C-MetzgerSS_CIKM2013 %R 10.1145/2505515.2507873 %D 2013 %B 22nd ACM International Conference on Information & Knowledge Management %Z date of event: 2013-10-27 - 2013-11-01 %C San Francisco, CA, USA %X Structured knowledge bases are an increasingly important way for storing and retrieving information. Within such knowledge bases, an important search task is finding similar entities based on one or more example entities. We present QBEES, a novel framework for defining entity similarity based only on structural features, so-called aspects, of the entities, that includes query-dependent and query-independent entity ranking components. We present evaluation results with a number of existing entity list completion benchmarks, comparing to several state-of-the-art baselines. %B CIKM'13 %E Nejdl, Wolfgang; Pei, Jian; Rastogi, Rajeev %P 1829 - 1832 %I ACM %@ 978-1-4503-2263-8
[209]
P. Miettinen, “Fully Dynamic Quasi-Biclique Edge Covers via Boolean Matrix Factorizations,” in 1st ACM SIGMOD Workshop on Dynamic Networks Management and Mining (DyNetMM 2013), New York, NY, USA, 2013.
Abstract
An important way of summarizing a bipartite graph is to give a set of (quasi-) bicliques that contain (almost) all of its edges. These quasi-bicliques are somewhat similar to clustering of the nodes, giving sets of similar nodes. Unlike clustering, however, the quasi-bicliques are not required to partition the nodes, allowing greater flexibility when creating them. When we identify the bipartite graph with its bi-adjacency matrix, the problem of finding these quasi-bicliques turns into the problem of finding the Boolean matrix factorization of the bi-adjacency matrix -- a problem that has received increasing research interest in data mining in recent years. But many real-world graphs are dynamic and evolve over time. How can we update our bicliques without having to re-compute them from the scratch? An algorithm was recently proposed for this task (Miettinen, ICMD 2012). The algorithm, however, is only able to handle the case where the new 1s are added to the matrix~--~it cannot handle the removal of existing 1s. Furthermore, the algorithm cannot adjust the rank of the factorization. This paper extends said algorithm with the capability of working in fully dynamic setting (with both additions and deletions) and with capability of adjusting its rank dynamically, as well. The behaviour and performance of the algorithm is studied in experiments conducted with both real-world and synthetic data.
Export
BibTeX
@inproceedings{miettinen13fully, TITLE = {Fully Dynamic Quasi-Biclique Edge Covers via {Boolean} Matrix Factorizations}, AUTHOR = {Miettinen, Pauli}, LANGUAGE = {eng}, ISBN = {978-1-4503-2209-6}, DOI = {10.1145/2489247.2489250}, LOCALID = {Local-ID: CA06050F2A3AFCF0C1257C6A005F39ED-miettinen13fully}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {An important way of summarizing a bipartite graph is to give a set of (quasi-) bicliques that contain (almost) all of its edges. These quasi-bicliques are somewhat similar to clustering of the nodes, giving sets of similar nodes. Unlike clustering, however, the quasi-bicliques are not required to partition the nodes, allowing greater flexibility when creating them. When we identify the bipartite graph with its bi-adjacency matrix, the problem of finding these quasi-bicliques turns into the problem of finding the Boolean matrix factorization of the bi-adjacency matrix -- a problem that has received increasing research interest in data mining in recent years. But many real-world graphs are dynamic and evolve over time. How can we update our bicliques without having to re-compute them from the scratch? An algorithm was recently proposed for this task (Miettinen, ICMD 2012). The algorithm, however, is only able to handle the case where the new 1s are added to the matrix~--~it cannot handle the removal of existing 1s. Furthermore, the algorithm cannot adjust the rank of the factorization. This paper extends said algorithm with the capability of working in fully dynamic setting (with both additions and deletions) and with capability of adjusting its rank dynamically, as well. The behaviour and performance of the algorithm is studied in experiments conducted with both real-world and synthetic data.}, BOOKTITLE = {1st ACM SIGMOD Workshop on Dynamic Networks Management and Mining (DyNetMM 2013)}, PAGES = {17--24}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Fully Dynamic Quasi-Biclique Edge Covers via Boolean Matrix Factorizations : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0018-EF87-D %F OTHER: Local-ID: CA06050F2A3AFCF0C1257C6A005F39ED-miettinen13fully %R 10.1145/2489247.2489250 %D 2013 %B 1st ACM SIGMOD Workshop on Dynamic Networks Management and Mining %Z date of event: 2013-06-23 - 2013-06-23 %C New York, NY, USA %X An important way of summarizing a bipartite graph is to give a set of (quasi-) bicliques that contain (almost) all of its edges. These quasi-bicliques are somewhat similar to clustering of the nodes, giving sets of similar nodes. Unlike clustering, however, the quasi-bicliques are not required to partition the nodes, allowing greater flexibility when creating them. When we identify the bipartite graph with its bi-adjacency matrix, the problem of finding these quasi-bicliques turns into the problem of finding the Boolean matrix factorization of the bi-adjacency matrix -- a problem that has received increasing research interest in data mining in recent years. But many real-world graphs are dynamic and evolve over time. How can we update our bicliques without having to re-compute them from the scratch? An algorithm was recently proposed for this task (Miettinen, ICMD 2012). The algorithm, however, is only able to handle the case where the new 1s are added to the matrix~--~it cannot handle the removal of existing 1s. Furthermore, the algorithm cannot adjust the rank of the factorization. This paper extends said algorithm with the capability of working in fully dynamic setting (with both additions and deletions) and with capability of adjusting its rank dynamically, as well. The behaviour and performance of the algorithm is studied in experiments conducted with both real-world and synthetic data. %B 1st ACM SIGMOD Workshop on Dynamic Networks Management and Mining %P 17 - 24 %I ACM %@ 978-1-4503-2209-6
[210]
D. Milchevski, “Entity Recommendation Based on Wikipedia,” Universität des Saarlandes, Saarbrücken, 2013.
Export
BibTeX
@mastersthesis{MilchevskiMaster2013, TITLE = {Entity Recommendation Based on {Wikipedia}}, AUTHOR = {Milchevski, Dragan}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, }
Endnote
%0 Thesis %A Milchevski, Dragan %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Entity Recommendation Based on Wikipedia : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-CE39-E %I Universität des Saarlandes %C Saarbrücken %D 2013 %P XII, 121 p. %V master %9 master
[211]
I. Miliaraki, K. Berberich, R. Gemulla, and S. Zoupanos, “Mind the Gap: Large-scale Frequent Sequence Mining,” in SIGMOD’13, ACM SIGMOD International Conference on Management of Data, New York, NY, USA, 2013.
Abstract
Frequent sequence mining is one of the fundamental building blocks in data mining. While the problem has been extensively studied, few of the available techniques are suffciently scalable to handle datasets with billions of sequences; such large-scale datasets arise, for instance, in text mining and session analysis. In this paper, we propose PFSM, a scalable algorithm for frequent sequence mining on MapReduce. PFSM can handle so-called ``gap constraints'', which can be used to limit the output to a controlled set of frequent sequences. At its heart, PFSM partitions the input database in a way that allows us to mine each partition independently using any existing frequent sequence mining algorithm. We introduce the notion of w-equivalency, which is a generalization of the notion of a ``projected database'' used by many frequent pattern mining algorithms. We also present a number of optimization techniques that minimize partition size, and therefore computational and communication costs, while still maintaining correctness. Our extensive experimental study in the context of text mining suggests that PFSM is significantly more efficient and scalable than alternative approaches.
Export
BibTeX
@inproceedings{Miliaraki2013, TITLE = {Mind the Gap: Large-scale Frequent Sequence Mining}, AUTHOR = {Miliaraki, Iris and Berberich, Klaus and Gemulla, Rainer and Zoupanos, Spyros}, LANGUAGE = {eng}, ISBN = {978-1-4503-2037-5}, DOI = {10.1145/2463676.2465285}, LOCALID = {Local-ID: 086027E8ABA46DC6C1257B0F003D8C96-Miliaraki2013}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {Frequent sequence mining is one of the fundamental building blocks in data mining. While the problem has been extensively studied, few of the available techniques are suffciently scalable to handle datasets with billions of sequences; such large-scale datasets arise, for instance, in text mining and session analysis. In this paper, we propose PFSM, a scalable algorithm for frequent sequence mining on MapReduce. PFSM can handle so-called ``gap constraints'', which can be used to limit the output to a controlled set of frequent sequences. At its heart, PFSM partitions the input database in a way that allows us to mine each partition independently using any existing frequent sequence mining algorithm. We introduce the notion of w-equivalency, which is a generalization of the notion of a ``projected database'' used by many frequent pattern mining algorithms. We also present a number of optimization techniques that minimize partition size, and therefore computational and communication costs, while still maintaining correctness. Our extensive experimental study in the context of text mining suggests that PFSM is significantly more efficient and scalable than alternative approaches.}, BOOKTITLE = {SIGMOD'13, ACM SIGMOD International Conference on Management of Data}, EDITOR = {Ross, Kenneth and Srivastava, Divesh and Papadias, Dimitris and Papadopoulos, Stavros}, PAGES = {797--808}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %A Miliaraki, Iris %A Berberich, Klaus %A Gemulla, Rainer %A Zoupanos, Spyros %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Mind the Gap: Large-scale Frequent Sequence Mining : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1D76-9 %F OTHER: Local-ID: 086027E8ABA46DC6C1257B0F003D8C96-Miliaraki2013 %R 10.1145/2463676.2465285 %D 2013 %B ACM SIGMOD International Conference on Management of Data %Z date of event: 2013-06-22 - 2013-06-27 %C New York, NY, USA %X Frequent sequence mining is one of the fundamental building blocks in data mining. While the problem has been extensively studied, few of the available techniques are suffciently scalable to handle datasets with billions of sequences; such large-scale datasets arise, for instance, in text mining and session analysis. In this paper, we propose PFSM, a scalable algorithm for frequent sequence mining on MapReduce. PFSM can handle so-called ``gap constraints'', which can be used to limit the output to a controlled set of frequent sequences. At its heart, PFSM partitions the input database in a way that allows us to mine each partition independently using any existing frequent sequence mining algorithm. We introduce the notion of w-equivalency, which is a generalization of the notion of a ``projected database'' used by many frequent pattern mining algorithms. We also present a number of optimization techniques that minimize partition size, and therefore computational and communication costs, while still maintaining correctness. Our extensive experimental study in the context of text mining suggests that PFSM is significantly more efficient and scalable than alternative approaches. %B SIGMOD'13 %E Ross, Kenneth; Srivastava, Divesh; Papadias, Dimitris; Papadopoulos, Stavros %P 797 - 808 %I ACM %@ 978-1-4503-2037-5
[212]
A. Mishra, S. Gurajada, and M. Theobald, “SPAR-Key: Processing SPARQL-Fulltext Queries to Solve Jeopardy! Clues,” in Working Notes for the CLEF 2013 Conference, Valencia, Spain, 2013.
Abstract
We describe our SPAR-Key query engine that implements indexing, ranking, and query processing techniques to run a new kind of SPARQL-fulltext queries that were provided in the context of the INEX 2013 Jeopardy task.
Export
BibTeX
@inproceedings{Theobald2012, TITLE = {{SPAR}-Key: Processing {SPARQL}-Fulltext Queries to Solve {J}eopardy! Clues}, AUTHOR = {Mishra, Arunav and Gurajada, Sairam and Theobald, Martin}, LANGUAGE = {eng}, LOCALID = {Local-ID: 598B0731B76FB570C1257BBB003DA461-Theobald2012}, PUBLISHER = {CLEF Initiative}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, ABSTRACT = {We describe our SPAR-Key query engine that implements indexing, ranking, and query processing techniques to run a new kind of SPARQL-fulltext queries that were provided in the context of the INEX 2013 Jeopardy task.}, BOOKTITLE = {Working Notes for the CLEF 2013 Conference}, EDITOR = {Forner, Pamela and Navigli, Roberto and Tufis, Dan}, ADDRESS = {Valencia, Spain}, }
Endnote
%0 Conference Proceedings %A Mishra, Arunav %A Gurajada, Sairam %A Theobald, Martin %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T SPAR-Key: Processing SPARQL-Fulltext Queries to Solve Jeopardy! Clues : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-3A87-1 %F OTHER: Local-ID: 598B0731B76FB570C1257BBB003DA461-Theobald2012 %D 2013 %B CLEF 2013 Evaluation Labs and Workshop %Z date of event: 2013-09-23 - 2013-09-26 %C Valencia, Spain %X We describe our SPAR-Key query engine that implements indexing, ranking, and query processing techniques to run a new kind of SPARQL-fulltext queries that were provided in the context of the INEX 2013 Jeopardy task. %B Working Notes for the CLEF 2013 Conference %E Forner, Pamela; Navigli, Roberto; Tufis, Dan %I CLEF Initiative %U http://www.clef-initiative.eu/documents/71612/69505b5f-455b-4ce6-a699-28e2268d4d84
[213]
A. Mishra, “Design and Evaluation of an IR-Benchmark for SPARQL Fulltext Queries,” Universität des Saarlandes, Saarbrücken, 2013.
Export
BibTeX
@mastersthesis{MishraMastersThesis2013, TITLE = {Design and Evaluation of an {IR}-Benchmark for {SPARQL} Fulltext Queries}, AUTHOR = {Mishra, Arunav}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, }
Endnote
%0 Thesis %A Mishra, Arunav %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Design and Evaluation of an IR-Benchmark for SPARQL Fulltext Queries : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5C5C-7 %I Universität des Saarlandes %C Saarbrücken %D 2013 %V master %9 master
[214]
N. Nakashole, G. Weikum, and F. M. Suchanek, “Discovering Semantic Relations From the Web and Organizing Them with PATTY,” ACM SIGMOD Record, vol. 42, no. 2, 2013.
Abstract
PATTY is a system for automatically distilling relational patterns from the Web, for example, the pattern "X covered Y" between a singer and someone else's song. We have extracted a large collection of such patterns and organized them in a taxonomic manner, similar in style to the WordNet thesaurus but capturing relations (binary predicates) instead of concepts and classes (unary predicates). The patterns are organized by semantic types and synonyms, and they form a hierarchy based on subsumptions. For example, "X covered Y" is subsumed by "X sang Y", which in turn is subsumed by "X performed Y" (where X can be any musician, not just a singer). In this paper we give an overview of the PATTY system and the resulting collections of relational patterns. We discuss the four main components of PATTY's architecture and a variety of use cases, including the paraphrasing of relations, and semantic search over subjectpredicate- object triples. This kind of search can handle entities, relations, semantic types, noun phrases, and relational phrases.
Export
BibTeX
@article{Nakashole2013, TITLE = {Discovering Semantic Relations From the Web and Organizing Them with {PATTY}}, AUTHOR = {Nakashole, Ndapandula and Weikum, Gerhard and Suchanek, Fabian M.}, LANGUAGE = {eng}, ISSN = {0163-5808}, DOI = {10.1145/2503792.2503799}, LOCALID = {Local-ID: 44317A4E27B1A909C1257C69004C8654-Nakashole2013}, PUBLISHER = {ACM}, ADDRESS = {New York, USA}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {PATTY is a system for automatically distilling relational patterns from the Web, for example, the pattern "X covered Y" between a singer and someone else's song. We have extracted a large collection of such patterns and organized them in a taxonomic manner, similar in style to the WordNet thesaurus but capturing relations (binary predicates) instead of concepts and classes (unary predicates). The patterns are organized by semantic types and synonyms, and they form a hierarchy based on subsumptions. For example, "X covered Y" is subsumed by "X sang Y", which in turn is subsumed by "X performed Y" (where X can be any musician, not just a singer). In this paper we give an overview of the PATTY system and the resulting collections of relational patterns. We discuss the four main components of PATTY's architecture and a variety of use cases, including the paraphrasing of relations, and semantic search over subjectpredicate- object triples. This kind of search can handle entities, relations, semantic types, noun phrases, and relational phrases.}, JOURNAL = {ACM SIGMOD Record}, VOLUME = {42}, NUMBER = {2}, PAGES = {29--34}, }
Endnote
%0 Journal Article %A Nakashole, Ndapandula %A Weikum, Gerhard %A Suchanek, Fabian M. %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Discovering Semantic Relations From the Web and Organizing Them with PATTY : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1D7F-8 %F OTHER: Local-ID: 44317A4E27B1A909C1257C69004C8654-Nakashole2013 %R 10.1145/2503792.2503799 %7 2013-05 %D 2013 %X PATTY is a system for automatically distilling relational patterns from the Web, for example, the pattern "X covered Y" between a singer and someone else's song. We have extracted a large collection of such patterns and organized them in a taxonomic manner, similar in style to the WordNet thesaurus but capturing relations (binary predicates) instead of concepts and classes (unary predicates). The patterns are organized by semantic types and synonyms, and they form a hierarchy based on subsumptions. For example, "X covered Y" is subsumed by "X sang Y", which in turn is subsumed by "X performed Y" (where X can be any musician, not just a singer). In this paper we give an overview of the PATTY system and the resulting collections of relational patterns. We discuss the four main components of PATTY's architecture and a variety of use cases, including the paraphrasing of relations, and semantic search over subjectpredicate- object triples. This kind of search can handle entities, relations, semantic types, noun phrases, and relational phrases. %J ACM SIGMOD Record %V 42 %N 2 %& 29 %P 29 - 34 %I ACM %C New York, USA %@ false
[215]
N. Nakashole, T. Tylenda, and G. Weikum, “Fine-grained Semantic Typing of Emerging Entities,” in Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (ACL 2013), Sofia, Bulgaria, 2013.
Abstract
Methods for information extraction (IE) and knowledge base (KB) construction have been intensively studied. However, a largely under-explored case is tapping into highly dynamic sources like news streams and social media, where new entities are continuously emerging. In this paper, we present a method for discovering and semantically typing newly emerging out-of- KB entities, thus improving the freshness and recall of ontology-based IE and improving the precision and semantic rigor of open IE. Our method is based on a probabilistic model that feeds weights into integer linear programs that leverage type signatures of relational phrases and type correlation or disjointness constraints. Our experimental evaluation, based on crowdsourced user studies, show our method performing significantly better than prior work.
Export
BibTeX
@inproceedings{NakasholeTW2013, TITLE = {Fine-grained Semantic Typing of Emerging Entities}, AUTHOR = {Nakashole, Ndapandula and Tylenda, Tomasz and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-937284-50-3}, URL = {http://www.aclweb.org/anthology/P13-1146}, LOCALID = {Local-ID: FC162FD7AA65180CC1257C690054174F-NakasholeTW2013}, PUBLISHER = {ACL}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {Methods for information extraction (IE) and knowledge base (KB) construction have been intensively studied. However, a largely under-explored case is tapping into highly dynamic sources like news streams and social media, where new entities are continuously emerging. In this paper, we present a method for discovering and semantically typing newly emerging out-of- KB entities, thus improving the freshness and recall of ontology-based IE and improving the precision and semantic rigor of open IE. Our method is based on a probabilistic model that feeds weights into integer linear programs that leverage type signatures of relational phrases and type correlation or disjointness constraints. Our experimental evaluation, based on crowdsourced user studies, show our method performing significantly better than prior work.}, BOOKTITLE = {Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (ACL 2013)}, PAGES = {1488--1497}, ADDRESS = {Sofia, Bulgaria}, }
Endnote
%0 Conference Proceedings %A Nakashole, Ndapandula %A Tylenda, Tomasz %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Fine-grained Semantic Typing of Emerging Entities : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1D86-5 %F OTHER: Local-ID: FC162FD7AA65180CC1257C690054174F-NakasholeTW2013 %U http://www.aclweb.org/anthology/P13-1146 %D 2013 %B 51st Annual Meeting of the Association for Computational Linguistics %Z date of event: 2013-08-04 - 2013-08-04 %C Sofia, Bulgaria %X Methods for information extraction (IE) and knowledge base (KB) construction have been intensively studied. However, a largely under-explored case is tapping into highly dynamic sources like news streams and social media, where new entities are continuously emerging. In this paper, we present a method for discovering and semantically typing newly emerging out-of- KB entities, thus improving the freshness and recall of ontology-based IE and improving the precision and semantic rigor of open IE. Our method is based on a probabilistic model that feeds weights into integer linear programs that leverage type signatures of relational phrases and type correlation or disjointness constraints. Our experimental evaluation, based on crowdsourced user studies, show our method performing significantly better than prior work. %B Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics %P 1488 - 1497 %I ACL %@ 978-1-937284-50-3
[216]
B. Paudel, A. Anand, and K. Berberich, “User-defined Redundancy in Web Archives,” in Proceedings of the 10th International Workshop on Large-Scale and Distributed Systems for Information Retrieval (LSDS-IR 2013), Rome, Itay, 2013.
Export
BibTeX
@inproceedings{Berberich2013a, TITLE = {User-defined Redundancy in Web Archives}, AUTHOR = {Paudel, Bibek and Anand, Avishek and Berberich, Klaus}, LANGUAGE = {eng}, LOCALID = {Local-ID: B1F68A61E42E6170C1257B09003A6105-Berberich2013a}, PUBLISHER = {lsdsir.org}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the 10th International Workshop on Large-Scale and Distributed Systems for Information Retrieval (LSDS-IR 2013)}, ADDRESS = {Rome, Itay}, }
Endnote
%0 Conference Proceedings %A Paudel, Bibek %A Anand, Avishek %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T User-defined Redundancy in Web Archives : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-3A91-9 %F OTHER: Local-ID: B1F68A61E42E6170C1257B09003A6105-Berberich2013a %D 2013 %B 10th International Workshop on Large-Scale and Distributed Systems for Information Retrieval %Z date of event: 2013-02-05 - 2013-02-05 %C Rome, Itay %B Proceedings of the 10th International Workshop on Large-Scale and Distributed Systems for Information Retrieval %I lsdsir.org %U http://www.lsdsir.org/wp-content/uploads/2013/02/LSDS-IR-2013-Proceedings.pdf
[217]
A. Podosinnikova, “Robust Principal Component Analysis as a Nonlinear Eigenproblem,” Universität des Saarlandes, Saarbrücken, 2013.
Abstract
Principal Component Analysis (PCA) is a widely used tool for, e.g., exploratory data analysis, dimensionality reduction and clustering. However, it is well known that PCA is strongly aected by the presence of outliers and, thus, is vulnerable to both gross measurement error and adversarial manipulation of the data. This phenomenon motivates the development of robust PCA as the problem of recovering the principal components of the uncontaminated data. In this thesis, we propose two new algorithms, QRPCA and MDRPCA, for robust PCA components based on the projection-pursuit approach of Huber. While the resulting optimization problems are non-convex and non-smooth, we show that they can be eciently minimized via the RatioDCA using bundle methods/accelerated proximal methods for the interior problem. The key ingredient for the most promising algorithm (QRPCA) is a robust, location invariant scale measure with breakdown point 0.5. Extensive experiments show that our QRPCA is competitive with current state-of-the-art methods and outperforms other methods in particular for a large number of outliers.
Export
BibTeX
@mastersthesis{Podosinnikova2013, TITLE = {Robust Principal Component Analysis as a Nonlinear Eigenproblem}, AUTHOR = {Podosinnikova, Anastasia}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {Principal Component Analysis (PCA) is a widely used tool for, e.g., exploratory data analysis, dimensionality reduction and clustering. However, it is well known that PCA is strongly aected by the presence of outliers and, thus, is vulnerable to both gross measurement error and adversarial manipulation of the data. This phenomenon motivates the development of robust PCA as the problem of recovering the principal components of the uncontaminated data. In this thesis, we propose two new algorithms, QRPCA and MDRPCA, for robust PCA components based on the projection-pursuit approach of Huber. While the resulting optimization problems are non-convex and non-smooth, we show that they can be eciently minimized via the RatioDCA using bundle methods/accelerated proximal methods for the interior problem. The key ingredient for the most promising algorithm (QRPCA) is a robust, location invariant scale measure with breakdown point 0.5. Extensive experiments show that our QRPCA is competitive with current state-of-the-art methods and outperforms other methods in particular for a large number of outliers.}, }
Endnote
%0 Thesis %A Podosinnikova, Anastasia %Y Hein, Matthias %A referee: Gemulla, Rainer %+ International Max Planck Research School, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Robust Principal Component Analysis as a Nonlinear Eigenproblem : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0026-CC75-A %I Universität des Saarlandes %C Saarbrücken %D 2013 %V master %9 master %X Principal Component Analysis (PCA) is a widely used tool for, e.g., exploratory data analysis, dimensionality reduction and clustering. However, it is well known that PCA is strongly aected by the presence of outliers and, thus, is vulnerable to both gross measurement error and adversarial manipulation of the data. This phenomenon motivates the development of robust PCA as the problem of recovering the principal components of the uncontaminated data. In this thesis, we propose two new algorithms, QRPCA and MDRPCA, for robust PCA components based on the projection-pursuit approach of Huber. While the resulting optimization problems are non-convex and non-smooth, we show that they can be eciently minimized via the RatioDCA using bundle methods/accelerated proximal methods for the interior problem. The key ingredient for the most promising algorithm (QRPCA) is a robust, location invariant scale measure with breakdown point 0.5. Extensive experiments show that our QRPCA is competitive with current state-of-the-art methods and outperforms other methods in particular for a large number of outliers.
[218]
N. Preda, F. M. Suchanek, W. Yuan, and G. Weikum, “SUSIE: Search Using Services and Information Extraction,” in 29th IEEE International Conference on Data Engineering (ICDE 2013), Brisbane, Australia, 2013.
Abstract
The API of a Web service restricts the ypes of queries that the service can answer. For example, a Web service might provide a method that returns the songs of a given singer, but it might not provide a method that returns the singers of a given song. If the user asks for the singer of some specic song, then the Web service cannot be called � even though the underlying database might have the desired piece of information. This asymmetry is particularly problematic if the service is used in a Web service orchestration system. In this paper, we propose to use on-the-y information extraction to collect values that can be used as parameter bindings for the Web service. We show how this idea can be integrated into a Web service orchestration system. Our approach is fully implemented in a prototype called SUSIE. We present experiments with real-life data and services to demonstrate the practical viability and good performance of our approach.
Export
BibTeX
@inproceedings{susie, TITLE = {{SUSIE}: Search Using Services and Information Extraction}, AUTHOR = {Preda, Nicoleta and Suchanek, Fabian M. and Yuan, Wenjun and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4673-4909-3}, DOI = {10.1109/ICDE.2013.6544827}, LOCALID = {Local-ID:C1257ACD0050F94E-0C3E76F7AF652AEEC1257AD70062003A-susie}, PUBLISHER = {IEEE}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {The API of a Web service restricts the ypes of queries that the service can answer. For example, a Web service might provide a method that returns the songs of a given singer, but it might not provide a method that returns the singers of a given song. If the user asks for the singer of some specic song, then the Web service cannot be called {\diamond} even though the underlying database might have the desired piece of information. This asymmetry is particularly problematic if the service is used in a Web service orchestration system. In this paper, we propose to use on-the-y information extraction to collect values that can be used as parameter bindings for the Web service. We show how this idea can be integrated into a Web service orchestration system. Our approach is fully implemented in a prototype called SUSIE. We present experiments with real-life data and services to demonstrate the practical viability and good performance of our approach.}, BOOKTITLE = {29th IEEE International Conference on Data Engineering (ICDE 2013)}, PAGES = {218--229}, ADDRESS = {Brisbane, Australia}, }
Endnote
%0 Conference Proceedings %A Preda, Nicoleta %A Suchanek, Fabian M. %A Yuan, Wenjun %A Weikum, Gerhard %+ External Organizations Ontologies, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T SUSIE: Search Using Services and Information Extraction : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0014-546D-9 %R 10.1109/ICDE.2013.6544827 %F OTHER: Local-ID:C1257ACD0050F94E-0C3E76F7AF652AEEC1257AD70062003A-susie %D 2013 %B 29th IEEE International Conference on Data Engineering %Z date of event: 2013-04-08 - 2013-04-11 %C Brisbane, Australia %X The API of a Web service restricts the ypes of queries that the service can answer. For example, a Web service might provide a method that returns the songs of a given singer, but it might not provide a method that returns the singers of a given song. If the user asks for the singer of some specic song, then the Web service cannot be called � even though the underlying database might have the desired piece of information. This asymmetry is particularly problematic if the service is used in a Web service orchestration system. In this paper, we propose to use on-the-y information extraction to collect values that can be used as parameter bindings for the Web service. We show how this idea can be integrated into a Web service orchestration system. Our approach is fully implemented in a prototype called SUSIE. We present experiments with real-life data and services to demonstrate the practical viability and good performance of our approach. %B 29th IEEE International Conference on Data Engineering %P 218 - 229 %I IEEE %@ 978-1-4673-4909-3
[219]
L. Qu, “Sentiment Analysis with Limited Training Data,” Universität des Saarlandes, Saarbrücken, 2013.
Abstract
Sentiments are positive and negative emotions, evaluations and stances. This dissertation focuses on learning based systems for automatic analysis of sentiments and comparisons in natural language text. The proposed approach consists of three contributions: 1. Bag-of-opinions model: For predicting document-level polarity and intensity, we proposed the bag-of-opinions model by modeling each document as a bag of sentiments, which can explore the syntactic structures of sentiment-bearing phrases for improved rating prediction of online reviews. 2. Multi-experts model: Due to the sparsity of manually-labeled training data, we designed the multi-experts model for sentence-level analysis of sentiment polarity and intensity by fully exploiting any available sentiment indicators, such as phrase-level predictors and sentence similarity measures. 3. LSSVMrae model: To understand the sentiments regarding entities, we proposed LSSVMrae model for extracting sentiments and comparisons of entities at both sentence and subsentential level. Different granularity of analysis leads to different model complexity, the finer the more complex. All proposed models aim to minimize the use of hand-labeled data by maximizing the use of the freely available resources. These models explore also different feature representations to capture the compositional semantics inherent in sentiment-bearing expressions. Our experimental results on real-world data showed that all models significantly outperform the state-of-the-art methods on the respective tasks.
Export
BibTeX
@phdthesis{Qu2013, TITLE = {Sentiment Analysis with Limited Training Data}, AUTHOR = {Qu, Lizhen}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {Sentiments are positive and negative emotions, evaluations and stances. This dissertation focuses on learning based systems for automatic analysis of sentiments and comparisons in natural language text. The proposed approach consists of three contributions: 1. Bag-of-opinions model: For predicting document-level polarity and intensity, we proposed the bag-of-opinions model by modeling each document as a bag of sentiments, which can explore the syntactic structures of sentiment-bearing phrases for improved rating prediction of online reviews. 2. Multi-experts model: Due to the sparsity of manually-labeled training data, we designed the multi-experts model for sentence-level analysis of sentiment polarity and intensity by fully exploiting any available sentiment indicators, such as phrase-level predictors and sentence similarity measures. 3. LSSVMrae model: To understand the sentiments regarding entities, we proposed LSSVMrae model for extracting sentiments and comparisons of entities at both sentence and subsentential level. Different granularity of analysis leads to different model complexity, the finer the more complex. All proposed models aim to minimize the use of hand-labeled data by maximizing the use of the freely available resources. These models explore also different feature representations to capture the compositional semantics inherent in sentiment-bearing expressions. Our experimental results on real-world data showed that all models significantly outperform the state-of-the-art methods on the respective tasks.}, }
Endnote
%0 Thesis %A Qu, Lizhen %Y Weikum, Gerhard %A referee: Gemulla, Rainer %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Sentiment Analysis with Limited Training Data : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-9796-9 %I Universität des Saarlandes %C Saarbrücken %D 2013 %P 133 p. %V phd %9 phd %X Sentiments are positive and negative emotions, evaluations and stances. This dissertation focuses on learning based systems for automatic analysis of sentiments and comparisons in natural language text. The proposed approach consists of three contributions: 1. Bag-of-opinions model: For predicting document-level polarity and intensity, we proposed the bag-of-opinions model by modeling each document as a bag of sentiments, which can explore the syntactic structures of sentiment-bearing phrases for improved rating prediction of online reviews. 2. Multi-experts model: Due to the sparsity of manually-labeled training data, we designed the multi-experts model for sentence-level analysis of sentiment polarity and intensity by fully exploiting any available sentiment indicators, such as phrase-level predictors and sentence similarity measures. 3. LSSVMrae model: To understand the sentiments regarding entities, we proposed LSSVMrae model for extracting sentiments and comparisons of entities at both sentence and subsentential level. Different granularity of analysis leads to different model complexity, the finer the more complex. All proposed models aim to minimize the use of hand-labeled data by maximizing the use of the freely available resources. These models explore also different feature representations to capture the compositional semantics inherent in sentiment-bearing expressions. Our experimental results on real-world data showed that all models significantly outperform the state-of-the-art methods on the respective tasks. %U http://scidok.sulb.uni-saarland.de/volltexte/2013/5615/http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=de
[220]
J. Ramon, P. Miettinen, and J. Vreeken, “Detecting Bicliques in GF[q],” in Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2013), Prague, Czech Republic, 2013.
Export
BibTeX
@inproceedings{ramon13detecting, TITLE = {Detecting Bicliques in {GF[q]}}, AUTHOR = {Ramon, Jan and Miettinen, Pauli and Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-3-642-40987-5}, DOI = {10.1007/978-3-642-40988-2_33}, LOCALID = {Local-ID: CEC81211FFBADD4DC1257C6000544E39-ramon13detecting}, PUBLISHER = {Springer}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2013)}, EDITOR = {Blockeel, Hendrik and Kersting, Kristian and Nijssen, Siegfried and {\v Z}elezn{\'y}, Filip}, PAGES = {509--524}, SERIES = {Lecture Notes in Artificial Intelligence}, VOLUME = {8188}, PAGES = {509--524}, ADDRESS = {Prague, Czech Republic}, }
Endnote
%0 Conference Proceedings %A Ramon, Jan %A Miettinen, Pauli %A Vreeken, Jilles %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Detecting Bicliques in GF[q] : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1D9A-C %F OTHER: Local-ID: CEC81211FFBADD4DC1257C6000544E39-ramon13detecting %R 10.1007/978-3-642-40988-2_33 %D 2013 %B European Conference on Machine Learning and Knowledge Discovery in Databases %Z date of event: 2013-09-23 - 2013-09-27 %C Prague, Czech Republic %B Machine Learning and Knowledge Discovery in Databases %E Blockeel, Hendrik; Kersting, Kristian; Nijssen, Siegfried; Železný, Filip %P 509 - 524 %I Springer %@ 978-3-642-40987-5 %B Lecture Notes in Artificial Intelligence %N 8188 %P 509 - 524
[221]
S. Seufert, “RDF-4G: Algorithmic Building Blocks for Large-scale Graph Analytics,” in SIGMOD’13 PhD Symposium, New York, NY, USA, 2013.
Export
BibTeX
@inproceedings{Seufert2013a, TITLE = {{RDF-4G}: Algorithmic Building Blocks for Large-scale Graph Analytics}, AUTHOR = {Seufert, Stephan}, LANGUAGE = {eng}, ISBN = {978-1-4503-2155-6}, DOI = {10.1145/2483574.2483581}, LOCALID = {Local-ID: 24060B7FE63CE27FC1257B71003994B8-Seufert2013az}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {SIGMOD{\textquoteright}13 PhD Symposium}, EDITOR = {Chen, Lei and Dong, Xin Luna}, PAGES = {67--72}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %A Seufert, Stephan %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T RDF-4G: Algorithmic Building Blocks for Large-scale Graph Analytics : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-3A68-5 %R 10.1145/2483574.2483581 %F OTHER: Local-ID: 24060B7FE63CE27FC1257B71003994B8-Seufert2013az %D 2013 %B SIGMOD/PODS PhD Symposium %Z date of event: 2013-06-23 - 2013-06-23 %C New York, NY, USA %B SIGMOD’13 PhD Symposium %E Chen, Lei; Dong, Xin Luna %P 67 - 72 %I ACM %@ 978-1-4503-2155-6
[222]
S. Seufert, S. Bedathur, J. Hoffart, A. Gubichev, and K. Berberich, “Efficient Computation of Relationship-centrality in Large Entity-relationship Graphs,” International Semantic Web Conference (Posters & Demos) 2013. CEUR-WS.org, Aachen, 2013.
Export
BibTeX
@inproceedings{Seufert2013az, TITLE = {Efficient Computation of Relationship-centrality in Large Entity-relationship Graphs}, AUTHOR = {Seufert, Stephan and Bedathur, Srikanta and Hoffart, Johannes and Gubichev, Andrey and Berberich, Klaus}, LANGUAGE = {eng}, ISSN = {1613-0073}, URL = {http://ceur-ws.org/Vol-1035/iswc2013_poster_22.pdf}, LOCALID = {Local-ID: 1DEEABDE0FBFA169C1257C6800586017-Seufert2013a}, PUBLISHER = {CEUR-WS.org}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {International Semantic Web Conference (Posters \& Demos) 2013}, EDITOR = {Blomqvist, Eva and Groza, Tudor}, PAGES = {265--268}, SERIES = {CEUR Workshop Proceedings}, VOLUME = {1035}, ADDRESS = {Sydney, Australia}, }
Endnote
%0 Generic %A Seufert, Stephan %A Bedathur, Srikanta %A Hoffart, Johannes %A Gubichev, Andrey %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Efficient Computation of Relationship-centrality in Large Entity-relationship Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-36D7-7 %F OTHER: Local-ID: 1DEEABDE0FBFA169C1257C6800586017-Seufert2013a %U http://ceur-ws.org/Vol-1035/iswc2013_poster_22.pdf %D 2013 %Z name of event: ISWC-PD 2013 %Z date of event: 2013-10-23 - 2013-10-23 %Z place of event: Sydney, Australia %B International Semantic Web Conference (Posters & Demos) 2013 %E Blomqvist, Eva; Groza, Tudor %P 265 - 268 %B CEUR Workshop Proceedings %N 1035 %@ false
[223]
S. Seufert, A. Anand, S. Bedathur, and G. Weikum, “FERRARI: Flexible and Efficient Reachability Range Assignment for Graph Indexing,” in 29th IEEE International Conference on Data Engineering (ICDE 2013), Brisbane, Australia, 2013.
Abstract
In this paper, we propose a scalable and highly efficient index structure for the reachability problem over graphs. We build on the well-known node interval labeling scheme where the set of vertices reachable from a particular node is compactly encoded as a collection of node identifier ranges. We impose an explicit bound on the size of the index and flexibly assign approximate reachability ranges to nodes of the graph such that the number of index probes to answer a query is minimized. The resulting tunable index structure generates a better range labeling if the space budget is increased, thus providing a direct control over the trade off between index size and the query processing performance. By using a fast recursive querying method in conjunction with our index structure, we show that in practice, reachability queries can be answered in the order of microseconds on an off-the-shelf computer -- even for the case of massive-scale real world graphs. Our claims are supported by an extensive set of experimental results using a multitude of benchmark and real-world web-scale graph datasets.
Export
BibTeX
@inproceedings{Seufert2013, TITLE = {{FERRARI}: Flexible and Efficient Reachability Range Assignment for Graph Indexing}, AUTHOR = {Seufert, Stephan and Anand, Avishek and Bedathur, Srikanta and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4673-4909-3}, DOI = {10.1109/ICDE.2013.6544893}, LOCALID = {Local-ID: 0E395B1E701B8498C1257B0900346A0B-Seufert2013}, PUBLISHER = {IEEE}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {In this paper, we propose a scalable and highly efficient index structure for the reachability problem over graphs. We build on the well-known node interval labeling scheme where the set of vertices reachable from a particular node is compactly encoded as a collection of node identifier ranges. We impose an explicit bound on the size of the index and flexibly assign approximate reachability ranges to nodes of the graph such that the number of index probes to answer a query is minimized. The resulting tunable index structure generates a better range labeling if the space budget is increased, thus providing a direct control over the trade off between index size and the query processing performance. By using a fast recursive querying method in conjunction with our index structure, we show that in practice, reachability queries can be answered in the order of microseconds on an off-the-shelf computer -- even for the case of massive-scale real world graphs. Our claims are supported by an extensive set of experimental results using a multitude of benchmark and real-world web-scale graph datasets.}, BOOKTITLE = {29th IEEE International Conference on Data Engineering (ICDE 2013)}, PAGES = {1009--1020}, ADDRESS = {Brisbane, Australia}, }
Endnote
%0 Conference Proceedings %A Seufert, Stephan %A Anand, Avishek %A Bedathur, Srikanta %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T FERRARI: Flexible and Efficient Reachability Range Assignment for Graph Indexing : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-36CC-1 %F OTHER: Local-ID: 0E395B1E701B8498C1257B0900346A0B-Seufert2013 %R 10.1109/ICDE.2013.6544893 %D 2013 %B 29th IEEE International Conference on Data Engineering %Z date of event: 2013-04-08 - 2013-04-12 %C Brisbane, Australia %X In this paper, we propose a scalable and highly efficient index structure for the reachability problem over graphs. We build on the well-known node interval labeling scheme where the set of vertices reachable from a particular node is compactly encoded as a collection of node identifier ranges. We impose an explicit bound on the size of the index and flexibly assign approximate reachability ranges to nodes of the graph such that the number of index probes to answer a query is minimized. The resulting tunable index structure generates a better range labeling if the space budget is increased, thus providing a direct control over the trade off between index size and the query processing performance. By using a fast recursive querying method in conjunction with our index structure, we show that in practice, reachability queries can be answered in the order of microseconds on an off-the-shelf computer -- even for the case of massive-scale real world graphs. Our claims are supported by an extensive set of experimental results using a multitude of benchmark and real-world web-scale graph datasets. %B 29th IEEE International Conference on Data Engineering %P 1009 - 1020 %I IEEE %@ 978-1-4673-4909-3
[224]
A. Siu, D. B. Nguyen, and G. Weikum, “Fast Entity Recognition in Biomedical Text,” in Workshop on Data Mining for Healthcare at the 19th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD-DMH 2013), Chicago, IL, USA, 2013.
Export
BibTeX
@inproceedings{Siu13, TITLE = {Fast Entity Recognition in Biomedical Text}, AUTHOR = {Siu, Amy and Nguyen, Dat Ba and Weikum, Gerhard}, LANGUAGE = {eng}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Workshop on Data Mining for Healthcare at the 19th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD-DMH 2013)}, ADDRESS = {Chicago, IL, USA}, }
Endnote
%0 Conference Proceedings %A Siu, Amy %A Nguyen, Dat Ba %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Fast Entity Recognition in Biomedical Text : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-3A4E-1 %F OTHER: 6466922B5A48D9CBC1257BCF0033414F-Siu13 %D 2013 %B 19th ACM SIGKDD Conference on Knowledge Discovery and Data Mining %Z date of event: 2013-08-11 - 2013-08-11 %C Chicago, IL, USA %B Workshop on Data Mining for Healthcare at the 19th ACM SIGKDD Conference on Knowledge Discovery and Data Mining %I ACM %U https://sites.google.com/site/kdd2013dmh/doc/dmh3192_Siu.pdf?attredirects=0&d=1
[225]
M. Spaniol, N. Prytkova, and G. Weikum, “Knowledge Linking for Online Statistics,” in Proceedings of the 59th ISI World Statistics Congress (WSC 2013), Hong Kong, China, 2013.
Export
BibTeX
@inproceedings{SPWe13, TITLE = {Knowledge Linking for Online Statistics}, AUTHOR = {Spaniol, Marc and Prytkova, Natalia and Weikum, Gerhard}, LANGUAGE = {eng}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the 59th ISI World Statistics Congress (WSC 2013)}, ADDRESS = {Hong Kong, China}, }
Endnote
%0 Conference Proceedings %A Spaniol, Marc %A Prytkova, Natalia %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Knowledge Linking for Online Statistics : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5E1D-5 %D 2013 %B 59th ISI World Statistics Congress %Z date of event: 2014-08-22 - 2014-08-30 %C Hong Kong, China %B Proceedings of the 59th ISI World Statistics Congress %U http://www.statistics.gov.hk/wsc/STS018-P2-S.pdf
[226]
A. Stupar, “Soundtrack Recommendation for Images,” Universität des Saarlandes, Saarbrücken, 2013.
Abstract
The drastic increase in production of multimedia content has emphasized the research concerning its organization and retrieval. In this thesis, we address the problem of music retrieval when a set of images is given as input query, i.e., the problem of soundtrack recommendation for images. The task at hand is to recommend appropriate music to be played during the presentation of a given set of query images. To tackle this problem, we formulate a hypothesis that the knowledge appropriate for the task is contained in publicly available contemporary movies. Our approach, Picasso, employs similarity search techniques inside the image and music domains, harvesting movies to form a link between the domains. To achieve a fair and unbiased comparison between different soundtrack recommendation approaches, we proposed an evaluation benchmark. The evaluation results are reported for Picasso and the baseline approach, using the proposed benchmark. We further address two efficiency aspects that arise from the Picasso approach. First, we investigate the problem of processing top-K queries with set-defined selections and propose an index structure that aims at minimizing the query answering latency. Second, we address the problem of similarity search in high-dimensional spaces and propose two enhancements to the Locality Sensitive Hashing (LSH) scheme. We also investigate the prospects of a distributed similarity search algorithm based on LSH using the MapReduce framework. Finally, we give an overview of the PicasSound|a smartphone application based on the Picasso approach.
Export
BibTeX
@phdthesis{Stupar2012, TITLE = {Soundtrack Recommendation for Images}, AUTHOR = {Stupar, Aleksandar}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {The drastic increase in production of multimedia content has emphasized the research concerning its organization and retrieval. In this thesis, we address the problem of music retrieval when a set of images is given as input query, i.e., the problem of soundtrack recommendation for images. The task at hand is to recommend appropriate music to be played during the presentation of a given set of query images. To tackle this problem, we formulate a hypothesis that the knowledge appropriate for the task is contained in publicly available contemporary movies. Our approach, Picasso, employs similarity search techniques inside the image and music domains, harvesting movies to form a link between the domains. To achieve a fair and unbiased comparison between different soundtrack recommendation approaches, we proposed an evaluation benchmark. The evaluation results are reported for Picasso and the baseline approach, using the proposed benchmark. We further address two efficiency aspects that arise from the Picasso approach. First, we investigate the problem of processing top-K queries with set-defined selections and propose an index structure that aims at minimizing the query answering latency. Second, we address the problem of similarity search in high-dimensional spaces and propose two enhancements to the Locality Sensitive Hashing (LSH) scheme. We also investigate the prospects of a distributed similarity search algorithm based on LSH using the MapReduce framework. Finally, we give an overview of the PicasSound|a smartphone application based on the Picasso approach.}, }
Endnote
%0 Thesis %A Stupar, Aleksandar %Y Michel, Sebastian %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Soundtrack Recommendation for Images : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-9794-D %I Universität des Saarlandes %C Saarbrücken %D 2013 %P 149 p. %V phd %9 phd %X The drastic increase in production of multimedia content has emphasized the research concerning its organization and retrieval. In this thesis, we address the problem of music retrieval when a set of images is given as input query, i.e., the problem of soundtrack recommendation for images. The task at hand is to recommend appropriate music to be played during the presentation of a given set of query images. To tackle this problem, we formulate a hypothesis that the knowledge appropriate for the task is contained in publicly available contemporary movies. Our approach, Picasso, employs similarity search techniques inside the image and music domains, harvesting movies to form a link between the domains. To achieve a fair and unbiased comparison between different soundtrack recommendation approaches, we proposed an evaluation benchmark. The evaluation results are reported for Picasso and the baseline approach, using the proposed benchmark. We further address two efficiency aspects that arise from the Picasso approach. First, we investigate the problem of processing top-K queries with set-defined selections and propose an index structure that aims at minimizing the query answering latency. Second, we address the problem of similarity search in high-dimensional spaces and propose two enhancements to the Locality Sensitive Hashing (LSH) scheme. We also investigate the prospects of a distributed similarity search algorithm based on LSH using the MapReduce framework. Finally, we give an overview of the PicasSound|a smartphone application based on the Picasso approach. %U http://scidok.sulb.uni-saarland.de/volltexte/2013/5526/http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=de
[227]
F. M. Suchanek, J. Hoffart, E. Kuzey, and E. Lewis-Kelham, “YAGO2s: Modular High-quality Information Extraction with an Application to Flight Planning,” in Datenbanksysteme für Business, Technologie und Web (BTW 2013), Magdeburg, Germany, 2013.
Abstract
Abstract: In this paper, we present YAGO2s, the new edition of the YAGO ontology. The software architecture has been refactored from scratch, yielding a design that modularizes both code and data. This modularization enables us to add in new data sources more easily, while still maintaining the high accuracy and coherence of the ontology. Thus, we believe that YAGO2s occupies a sweetspot between a centralized design and a completely distributed design. In this demo, we present an application of this design to the task of planning a ight. Our proposed system nds ights between all airports close to the departure city to all airports close to the destination city.
Export
BibTeX
@inproceedings{yago2sdemo, TITLE = {{YAGO2s}: Modular High-quality Information Extraction with an Application to Flight Planning}, AUTHOR = {Suchanek, Fabian M. and Hoffart, Johannes and Kuzey, Erdal and Lewis-Kelham, Edwin}, LANGUAGE = {eng}, ISBN = {978-3-88579-608-4}, LOCALID = {Local-ID:C1257ACD0050F94E-5BBCDB0912AAEA29C1257AD70061912D-yago2sdemo}, PUBLISHER = {GI}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {Abstract: In this paper, we present YAGO2s, the new edition of the YAGO ontology. The software architecture has been refactored from scratch, yielding a design that modularizes both code and data. This modularization enables us to add in new data sources more easily, while still maintaining the high accuracy and coherence of the ontology. Thus, we believe that YAGO2s occupies a sweetspot between a centralized design and a completely distributed design. In this demo, we present an application of this design to the task of planning a ight. Our proposed system nds ights between all airports close to the departure city to all airports close to the destination city.}, BOOKTITLE = {Datenbanksysteme f{\"u}r Business, Technologie und Web (BTW 2013)}, EDITOR = {Markl, Volker}, PAGES = {515--518}, SERIES = {Lecture Notes in Informatics}, VOLUME = {P-214}, ADDRESS = {Magdeburg, Germany}, }
Endnote
%0 Conference Proceedings %A Suchanek, Fabian M. %A Hoffart, Johannes %A Kuzey, Erdal %A Lewis-Kelham, Edwin %+ Ontologies, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T YAGO2s: Modular High-quality Information Extraction with an Application to Flight Planning : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0014-547D-5 %F OTHER: Local-ID:C1257ACD0050F94E-5BBCDB0912AAEA29C1257AD70061912D-yago2sdemo %D 2013 %B 15. GI-Fachtagung Datenbanksysteme für Business, Technologie und Web %Z date of event: 2013-03-11 - 2013-03-15 %C Magdeburg, Germany %X Abstract: In this paper, we present YAGO2s, the new edition of the YAGO ontology. The software architecture has been refactored from scratch, yielding a design that modularizes both code and data. This modularization enables us to add in new data sources more easily, while still maintaining the high accuracy and coherence of the ontology. Thus, we believe that YAGO2s occupies a sweetspot between a centralized design and a completely distributed design. In this demo, we present an application of this design to the task of planning a ight. Our proposed system nds ights between all airports close to the departure city to all airports close to the destination city. %B Datenbanksysteme für Business, Technologie und Web %E Markl, Volker %P 515 - 518 %I GI %@ 978-3-88579-608-4 %B Lecture Notes in Informatics %N P-214 %U http://www.btw-2013.de/proceedings/YAGO2s%20Modular%20HighQuality%20Information%20Extraction%20with%20an%20Application%20to%20Flight%20Planning.pdf
[228]
F. M. Suchanek and G. Weikum, “Knowledge Harvesting from Text and Web Sources,” in 29th IEEE International Conference on Data Engineering (ICDE 2013), Brisbane, Australia, 2013.
Export
BibTeX
@inproceedings{SuchanekICDE2013, TITLE = {Knowledge Harvesting from Text and Web Sources}, AUTHOR = {Suchanek, Fabian M. and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {1063-6382}, ISBN = {978-1-4673-4909-3; 978-1-4673-4908-6}, DOI = {10.1109/ICDE.2013.6544916}, LOCALID = {Local-ID: 29838011C99BB159C1257C6900498A91-Suchanek2013}, PUBLISHER = {IEEE}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {29th IEEE International Conference on Data Engineering (ICDE 2013)}, PAGES = {1250--1253}, ADDRESS = {Brisbane, Australia}, }
Endnote
%0 Conference Proceedings %A Suchanek, Fabian M. %A Weikum, Gerhard %+ Ontologies, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Knowledge Harvesting from Text and Web Sources : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0014-5565-2 %R 10.1109/ICDE.2013.6544916 %F OTHER: Local-ID: 29838011C99BB159C1257C6900498A91-Suchanek2013 %D 2013 %B 29th IEEE International Conference on Data Engineering %Z date of event: 2013-04-08 - 2013-04-11 %C Brisbane, Australia %B 29th IEEE International Conference on Data Engineering %P 1250 - 1253 %I IEEE %@ false
[229]
F. M. Suchanek and G. Weikum, “Knowledge Harvesting in the Big-Data Era,” in SIGMOD’13, ACM SIGMOD International Conference on Management of Data, New York, NY, USA, 2013.
Export
BibTeX
@inproceedings{Suchanek:2013:KHB:2463676.2463724, TITLE = {Knowledge Harvesting in the Big-Data Era}, AUTHOR = {Suchanek, Fabian M. and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-2037-5}, URL = {http://doi.acm.org/10.1145/2463676.2463724}, DOI = {10.1145/2463676.2463724}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {SIGMOD{\textquoteright}13, ACM SIGMOD International Conference on Management of Data}, EDITOR = {Ross, Kenneth and Srivastava, Divesh and Papadias, Dimitris and Papadopoulos, Stavros}, PAGES = {933--938}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %A Suchanek, Fabian M. %A Weikum, Gerhard %+ Ontologies, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Knowledge Harvesting in the Big-Data Era : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0014-555B-9 %U http://doi.acm.org/10.1145/2463676.2463724 %R 10.1145/2463676.2463724 %D 2013 %B ACM SIGMOD International Conference on Management of Data %Z date of event: 2013-06-22 - 2013-06-27 %C New York, NY, USA %B SIGMOD’13 %E Ross, Kenneth; Srivastava, Divesh; Papadias, Dimitris; Papadopoulos, Stavros %P 933 - 938 %I ACM %@ 978-1-4503-2037-5
[230]
M. Sydow, M. Pikula, and R. Schenkel, “The Notion of Diversity in Graphical Entity Summarisation on Semantic Knowledge Graphs,” Intelligent Information Systems, vol. 41, no. 2, 2013.
Abstract
Given an entity represented by a single node q in semantic knowledge graph D, the Graphical Entity Summarisation problem (GES) consists in selecting out of D a very small surrounding graph S that constitutes a generic summary of the information concerning the entity q with given limit on size of S. This article concerns the role of diversity in this quite novel problem. It gives an overview of the diversity concept in information retrieval, and proposes how to adapt it to GES. A measure of diversity for GES, called ALC, is defined and two algorithms presented, baseline, diversity-oblivious PRECIS and diversity-aware DIVERSUM. A reported experiment shows that DIVERSUM actually achieves higher values of the ALC diversity measure than PRECIS. Next, an objective evaluation experiment demonstrates that diversity-aware algorithm is superior to the diversity-oblivious one in terms of fact selection. More precisely, DIVERSUM clearly achieves higher recall than PRECIS on ground truth reference entity summaries extracted from Wikipedia. We also report another intrinsic experiment, in which the output of diversity-aware algorithm is significantly preferred by human expert evaluators. Importantly, the user feedback clearly indicates that the notion of diversity is the key reason for the preference. In addition, the experiment is repeated twice on an anonymous sample of broad population of Internet users by means of a crowd-sourcing platform, that further confirms the results mentioned above.
Export
BibTeX
@article{SydowPS_IIS2013, TITLE = {The Notion of Diversity in Graphical Entity Summarisation on Semantic Knowledge Graphs}, AUTHOR = {Sydow, Marcin and Pikula, Mariusz and Schenkel, Ralf}, LANGUAGE = {eng}, ISSN = {0925-9902}, DOI = {10.1007/s10844-013-0239-6}, LOCALID = {Local-ID: D5ACDA4FC2994BF7C1257B390032A07E-SydowPS_IIS2013}, PUBLISHER = {Springer}, ADDRESS = {Berlin}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {Given an entity represented by a single node q in semantic knowledge graph D, the Graphical Entity Summarisation problem (GES) consists in selecting out of D a very small surrounding graph S that constitutes a generic summary of the information concerning the entity q with given limit on size of S. This article concerns the role of diversity in this quite novel problem. It gives an overview of the diversity concept in information retrieval, and proposes how to adapt it to GES. A measure of diversity for GES, called ALC, is defined and two algorithms presented, baseline, diversity-oblivious PRECIS and diversity-aware DIVERSUM. A reported experiment shows that DIVERSUM actually achieves higher values of the ALC diversity measure than PRECIS. Next, an objective evaluation experiment demonstrates that diversity-aware algorithm is superior to the diversity-oblivious one in terms of fact selection. More precisely, DIVERSUM clearly achieves higher recall than PRECIS on ground truth reference entity summaries extracted from Wikipedia. We also report another intrinsic experiment, in which the output of diversity-aware algorithm is significantly preferred by human expert evaluators. Importantly, the user feedback clearly indicates that the notion of diversity is the key reason for the preference. In addition, the experiment is repeated twice on an anonymous sample of broad population of Internet users by means of a crowd-sourcing platform, that further confirms the results mentioned above.}, JOURNAL = {Intelligent Information Systems}, VOLUME = {41}, NUMBER = {2}, PAGES = {109--149}, }
Endnote
%0 Journal Article %A Sydow, Marcin %A Pikula, Mariusz %A Schenkel, Ralf %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T The Notion of Diversity in Graphical Entity Summarisation on Semantic Knowledge Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-3728-B %F OTHER: Local-ID: D5ACDA4FC2994BF7C1257B390032A07E-SydowPS_IIS2013 %R 10.1007/s10844-013-0239-6 %7 2013-03-12 %D 2013 %X Given an entity represented by a single node q in semantic knowledge graph D, the Graphical Entity Summarisation problem (GES) consists in selecting out of D a very small surrounding graph S that constitutes a generic summary of the information concerning the entity q with given limit on size of S. This article concerns the role of diversity in this quite novel problem. It gives an overview of the diversity concept in information retrieval, and proposes how to adapt it to GES. A measure of diversity for GES, called ALC, is defined and two algorithms presented, baseline, diversity-oblivious PRECIS and diversity-aware DIVERSUM. A reported experiment shows that DIVERSUM actually achieves higher values of the ALC diversity measure than PRECIS. Next, an objective evaluation experiment demonstrates that diversity-aware algorithm is superior to the diversity-oblivious one in terms of fact selection. More precisely, DIVERSUM clearly achieves higher recall than PRECIS on ground truth reference entity summaries extracted from Wikipedia. We also report another intrinsic experiment, in which the output of diversity-aware algorithm is significantly preferred by human expert evaluators. Importantly, the user feedback clearly indicates that the notion of diversity is the key reason for the preference. In addition, the experiment is repeated twice on an anonymous sample of broad population of Internet users by means of a crowd-sourcing platform, that further confirms the results mentioned above. %J Intelligent Information Systems %V 41 %N 2 %& 109 %P 109 - 149 %I Springer %C Berlin %@ false
[231]
B. Taneva, T. Cheng, K. Chakrabarti, and Y. He, “Mining Acronym Expansions and Their Meanings Using Query Click Log,” in WWW’13, 22nd International Conference on World Wide Web, Rio de Janeiro, Brazil, 2013.
Export
BibTeX
@inproceedings{TanevaWWW2013, TITLE = {Mining Acronym Expansions and Their Meanings Using Query Click Log}, AUTHOR = {Taneva, Bilyana and Cheng, Tao and Chakrabarti, Kaushik and He, Yeye}, LANGUAGE = {eng}, ISBN = {978-1-4503-2035-1}, URL = {http://dl.acm.org/ft_gateway.cfm?id=2488498&ftid=1374081&dwn=1&CFID=408707560&CFTOKEN=75186124}, LOCALID = {Local-ID: 277D3F616907C539C1257B7000743251-TanevaWWW2013}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {WWW{\textquoteright}13, 22nd International Conference on World Wide Web}, EDITOR = {Almeida, Virg{\'i}lio and Schwabe, Daniel and Glaser, Hartmut and Baeza-Yates, Ricardo and Moon, Sue}, PAGES = {1261--1272}, ADDRESS = {Rio de Janeiro, Brazil}, }
Endnote
%0 Conference Proceedings %A Taneva, Bilyana %A Cheng, Tao %A Chakrabarti, Kaushik %A He, Yeye %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T Mining Acronym Expansions and Their Meanings Using Query Click Log : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-372F-E %F OTHER: Local-ID: 277D3F616907C539C1257B7000743251-TanevaWWW2013 %U http://dl.acm.org/ft_gateway.cfm?id=2488498&ftid=1374081&dwn=1&CFID=408707560&CFTOKEN=75186124 %D 2013 %B 22nd International Conference on World Wide Web %Z date of event: 2013-05-13 - 2013-05-17 %C Rio de Janeiro, Brazil %B WWW’13 %E Almeida, Virgílio; Schwabe, Daniel; Glaser, Hartmut; Baeza-Yates, Ricardo; Moon, Sue %P 1261 - 1272 %I ACM %@ 978-1-4503-2035-1