Publications

2016
[1]
T. Bögel, E. Gius, J. Jacke, and J. Strötgen, “From Order to Order Switch: Mediating between Complexity and Reproducibility in the Context of Automated Literary Annotation,” in Digital Humanities 2016 (DH 2016), Krakow, Poland, 2016.
Export
BibTeX
@inproceedings{BoegelDH2016, TITLE = {From Order to Order Switch: {M}ediating between Complexity and Reproducibility in the Context of Automated Literary Annotation}, AUTHOR = {B{\"o}gel, Thomas and Gius, Evelyn and Jacke, Janina and Str{\"o}tgen, Jannik}, LANGUAGE = {eng}, URL = {http://dh2016.adho.org/abstracts/275}, PUBLISHER = {Jagiellonian University \& Pedagogical University}, YEAR = {2016}, BOOKTITLE = {Digital Humanities 2016 (DH 2016)}, PAGES = {379--382}, ADDRESS = {Krakow, Poland}, }
Endnote
%0 Conference Proceedings %A Bögel, Thomas %A Gius, Evelyn %A Jacke, Janina %A Strötgen, Jannik %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T From Order to Order Switch: Mediating between Complexity and Reproducibility in the Context of Automated Literary Annotation : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002B-0E96-0 %D 2016 %B Digital Humanities %Z date of event: 2016-07-11 - 2016-07-16 %C Krakow, Poland %B Digital Humanities 2016 %P 379 - 382 %I Jagiellonian University & Pedagogical University %U http://dh2016.adho.org/abstracts/275
[2]
N. Boldyrev, M. Spaniol, and G. Weikum, “ACROSS: A Framework for Multi-Cultural Interlinking of Web Taxonomies,” in WebSci’16, ACM Web Science Conference, Hannover, Germany, 2016.
Export
BibTeX
@inproceedings{BoldryevWebSci2016, TITLE = {{ACROSS}: {A} Framework for Multi-Cultural Interlinking of Web Taxonomies}, AUTHOR = {Boldyrev, Natalia and Spaniol, Marc and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-4208-7}, DOI = {10.1145/2908131.2908164}, PUBLISHER = {ACM}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {WebSci'16, ACM Web Science Conference}, PAGES = {127--136}, ADDRESS = {Hannover, Germany}, }
Endnote
%0 Conference Proceedings %A Boldyrev, Natalia %A Spaniol, Marc %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T ACROSS: A Framework for Multi-Cultural Interlinking of Web Taxonomies : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002B-01B6-E %R 10.1145/2908131.2908164 %D 2016 %B ACM Web Science Conference %Z date of event: 2016-05-22 - 2016-05-25 %C Hannover, Germany %B WebSci'16 %P 127 - 136 %I ACM %@ 978-1-4503-4208-7
[3]
L. Derczynski, J. Strötgen, D. Maynard, M. A. Greenwood, and M. Jung, “GATE-Time: Extraction of Temporal Expressions and Event,” in 10th edition of the Language Resources and Evaluation Conference (LREC 2016), Portorož, Slovenia. (Accepted/in press)
Export
BibTeX
@inproceedings{DerczynskiEtAl2016_LREC, TITLE = {{GATE}-Time: {E}xtraction of Temporal Expressions and Event}, AUTHOR = {Derczynski, Leon and Str{\"o}tgen, Jannik and Maynard, Diana and Greenwood, Mark A. and Jung, Manuel}, LANGUAGE = {eng}, YEAR = {2016}, PUBLREMARK = {Accepted}, BOOKTITLE = {10th edition of the Language Resources and Evaluation Conference (LREC 2016)}, EID = {915}, ADDRESS = {Portoro{\v z}, Slovenia}, }
Endnote
%0 Conference Proceedings %A Derczynski, Leon %A Strötgen, Jannik %A Maynard, Diana %A Greenwood, Mark A. %A Jung, Manuel %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T GATE-Time: Extraction of Temporal Expressions and Event : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-4139-8 %D 2016 %B 10th Language Resources and Evaluation Conference %Z date of event: 2016-05-23 - 2016-05-28 %C Portorož, Slovenia %B 10th edition of the Language Resources and Evaluation Conference %Z sequence number: 915
[4]
X. Du, O. Emebo, A. Varde, N. Tandon, S. N. Chowdhury, and G. Weikum, “Air Quality Assessment from Social Media and Structured Data : Pollutants and Health Impacts in Urban Planning,” in Proceedings of the 2016 IEEE 32nd International Conference on Data Engineering Workshops (ICDEW 2016), Helsinki, Finland, 2016.
Export
BibTeX
@inproceedings{DuICDEW2016, TITLE = {Air Quality Assessment from Social Media and Structured Data : {P}ollutants and Health Impacts in Urban Planning}, AUTHOR = {Du, Xu and Emebo, Onyeka and Varde, Aparna and Tandon, Niket and Chowdhury, Sreyasi Nag and Weikum, Gerhard}, LANGUAGE = {eng}, DOI = {10.1109/ICDEW.2016.7495616}, PUBLISHER = {IEEE}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {Proceedings of the 2016 IEEE 32nd International Conference on Data Engineering Workshops (ICDEW 2016)}, PAGES = {54--59}, ADDRESS = {Helsinki, Finland}, }
Endnote
%0 Conference Proceedings %A Du, Xu %A Emebo, Onyeka %A Varde, Aparna %A Tandon, Niket %A Chowdhury, Sreyasi Nag %A Weikum, Gerhard %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Air Quality Assessment from Social Media and Structured Data : Pollutants and Health Impacts in Urban Planning : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002B-01AE-2 %R 10.1109/ICDEW.2016.7495616 %D 2016 %B IEEE 32nd International Conference on Data Engineering Workshops %Z date of event: 2016-05-16 - 2016-05-20 %C Helsinki, Finland %B Proceedings of the 2016 IEEE 32nd International Conference on Data Engineering Workshops %P 54 - 59 %I IEEE
[5]
D. Gupta, “Event Search and Analytics: Detecting Events in Semantically Annotated Corpora for Search & Analytics,” in WSDM’16, 9th ACM International Conference on Web Search and Data Mining, San Francisco, CA, USA, 2016.
Export
BibTeX
@inproceedings{GuptaWSDM2016, TITLE = {Event Search and Analytics: Detecting Events in Semantically Annotated Corpora for Search \& Analytics}, AUTHOR = {Gupta, Dhruv}, LANGUAGE = {eng}, ISBN = {978-1-4503-3716-8}, DOI = {10.1145/2835776.2855083}, PUBLISHER = {ACM}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {WSDM'16, 9th ACM International Conference on Web Search and Data Mining}, PAGES = {705--705}, ADDRESS = {San Francisco, CA, USA}, }
Endnote
%0 Conference Proceedings %A Gupta, Dhruv %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Event Search and Analytics: Detecting Events in Semantically Annotated Corpora for Search & Analytics : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-7526-7 %R 10.1145/2835776.2855083 %D 2016 %B 9th ACM International Conference on Web Search and Data Mining %Z date of event: 2016-02-22 - 2016-02-25 %C San Francisco, CA, USA %B WSDM'16 %P 705 - 705 %I ACM %@ 978-1-4503-3716-8
[6]
D. Gupta and K. Berberich, “Diversifying Search Results Using Time : An Information Retrieval Method for Historians,” in Advances in Information Retrieval (ECIR 2016), Padova, Italy, 2016.
Export
BibTeX
@inproceedings{GuptaECIR2016, TITLE = {Diversifying Search Results Using Time : An Information Retrieval Method for Historians}, AUTHOR = {Gupta, Dhruv and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-3-319-30670-4}, DOI = {10.1007/978-3-319-30671-1_69}, PUBLISHER = {Springer}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {Advances in Information Retrieval (ECIR 2016)}, EDITOR = {Ferro, Nicola and Crestani, Fabio and Moens, Marie-Francine and Mothe, Josiane and Silvestre, Fabrizio and Di Nunzio, Giorgio Maria and Hauff, Claudia and Silvello, Gianmaria}, PAGES = {789--795}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {9626}, ADDRESS = {Padova, Italy}, }
Endnote
%0 Conference Proceedings %A Gupta, Dhruv %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Diversifying Search Results Using Time : An Information Retrieval Method for Historians : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-7514-F %R 10.1007/978-3-319-30671-1_69 %D 2016 %B 38th European Conference on Information Retrieval %Z date of event: 2016-03-20 - 2016-03-23 %C Padova, Italy %B Advances in Information Retrieval %E Ferro, Nicola; Crestani, Fabio; Moens, Marie-Francine; Mothe, Josiane; Silvestre, Fabrizio; Di Nunzio, Giorgio Maria; Hauff, Claudia; Silvello, Gianmaria %P 789 - 795 %I Springer %@ 978-3-319-30670-4 %B Lecture Notes in Computer Science %N 9626
[7]
D. Gupta and K. Berberich, “Diversifying Search Results Using Time,” Max-Planck-Institut für Informatik, Saarbrücken, MPI-I-2015-5-001, 2016.
Abstract
Getting an overview of a historic entity or event can be difficult in search results, especially if important dates concerning the entity or event are not known beforehand. For such information needs, users would benefit if returned results covered diverse dates, thus giving an overview of what has happened throughout history. Diversifying search results based on important dates can be a building block for applications, for instance, in digital humanities. Historians would thus be able to quickly explore longitudinal document collections by querying for entities or events without knowing associated important dates apriori. In this work, we describe an approach to diversify search results using temporal expressions (e.g., in the 1990s) from their contents. Our approach first identifies time intervals of interest to the given keyword query based on pseudo-relevant documents. It then re-ranks query results so as to maximize the coverage of identified time intervals. We present a novel and objective evaluation for our proposed approach. We test the effectiveness of our methods on the New York Times Annotated corpus and the Living Knowledge corpus, collectively consisting of around 6 million documents. Using history-oriented queries and encyclopedic resources we show that our method indeed is able to present search results diversified along time.
Export
BibTeX
@techreport{GuptaReport2015-5-001, TITLE = {Diversifying Search Results Using Time}, AUTHOR = {Gupta, Dhruv and Berberich, Klaus}, LANGUAGE = {eng}, ISSN = {0946-011X}, NUMBER = {MPI-I-2015-5-001}, INSTITUTION = {Max-Planck-Institut f{\"u}r Informatik}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2016}, ABSTRACT = {Getting an overview of a historic entity or event can be difficult in search results, especially if important dates concerning the entity or event are not known beforehand. For such information needs, users would benefit if returned results covered diverse dates, thus giving an overview of what has happened throughout history. Diversifying search results based on important dates can be a building block for applications, for instance, in digital humanities. Historians would thus be able to quickly explore longitudinal document collections by querying for entities or events without knowing associated important dates apriori. In this work, we describe an approach to diversify search results using temporal expressions (e.g., in the 1990s) from their contents. Our approach first identifies time intervals of interest to the given keyword query based on pseudo-relevant documents. It then re-ranks query results so as to maximize the coverage of identified time intervals. We present a novel and objective evaluation for our proposed approach. We test the effectiveness of our methods on the New York Times Annotated corpus and the Living Knowledge corpus, collectively consisting of around 6 million documents. Using history-oriented queries and encyclopedic resources we show that our method indeed is able to present search results diversified along time.}, TYPE = {Research Report}, }
Endnote
%0 Report %A Gupta, Dhruv %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Diversifying Search Results Using Time : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-0AA4-C %Y Max-Planck-Institut für Informatik %C Saarbrücken %D 2016 %P 51 p. %X Getting an overview of a historic entity or event can be difficult in search results, especially if important dates concerning the entity or event are not known beforehand. For such information needs, users would benefit if returned results covered diverse dates, thus giving an overview of what has happened throughout history. Diversifying search results based on important dates can be a building block for applications, for instance, in digital humanities. Historians would thus be able to quickly explore longitudinal document collections by querying for entities or events without knowing associated important dates apriori. In this work, we describe an approach to diversify search results using temporal expressions (e.g., in the 1990s) from their contents. Our approach first identifies time intervals of interest to the given keyword query based on pseudo-relevant documents. It then re-ranks query results so as to maximize the coverage of identified time intervals. We present a novel and objective evaluation for our proposed approach. We test the effectiveness of our methods on the New York Times Annotated corpus and the Living Knowledge corpus, collectively consisting of around 6 million documents. Using history-oriented queries and encyclopedic resources we show that our method indeed is able to present search results diversified along time. %B Research Report %@ false
[8]
D. Gupta, J. Strötgen, and K. Berberich, “DIGITALHISTORIAN: Search & Analytics Using Annotations,” in HistoInformatics 2016, The 3rd HistoInformatics Workshop on Computational History, Krakow, Poland, 2016.
Export
BibTeX
@inproceedings{Gupta, TITLE = {DIGITALHISTORIAN: {S}earch \& Analytics Using Annotations}, AUTHOR = {Gupta, Dhruv and Str{\"o}tgen, Jannik and Berberich, Klaus}, LANGUAGE = {eng}, ISSN = {1613-0073}, URL = {urn:nbn:de:0074-1632-7}, PUBLISHER = {CEUR-WS.org}, YEAR = {2016}, BOOKTITLE = {HistoInformatics 2016, The 3rd HistoInformatics Workshop on Computational History}, EDITOR = {D{\"u}ring, Marten and Jatowt, Adam and Preiser-Kappeller, Johannes and van Den Bosch, Antal}, PAGES = {5--10}, SERIES = {CEUR Workshop Proceedings}, VOLUME = {1632}, ADDRESS = {Krakow, Poland}, }
Endnote
%0 Conference Proceedings %A Gupta, Dhruv %A Strötgen, Jannik %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T DIGITALHISTORIAN: Search & Analytics Using Annotations : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002B-0885-2 %D 2016 %B The 3rd HistoInformatics Workshop on Computational History %Z date of event: 2016-07-11 - 2016-07-11 %C Krakow, Poland %B HistoInformatics 2016 %E Düring, Marten; Jatowt, Adam; Preiser-Kappeller, Johannes; van Den Bosch, Antal %P 5 - 10 %I CEUR-WS.org %B CEUR Workshop Proceedings %N 1632 %@ false %U http://ceur-ws.org/Vol-1632/paper_1.pdf
[9]
Y. He, K. Chakrabarti, T. Cheng, and T. Tylenda, “Automatic Discovery of Attribute Synonyms Using Query Logs and Table Corpora,” in WWW’16, 25th International Conference on World Wide Web, Montréal, Canada, 2016.
Export
BibTeX
@inproceedings{He_WWW2016, TITLE = {Automatic Discovery of Attribute Synonyms Using Query Logs and Table Corpora}, AUTHOR = {He, Yeye and Chakrabarti, Kaushik and Cheng, Tao and Tylenda, Tomasz}, LANGUAGE = {eng}, ISBN = {978-1-4503-4143-1}, DOI = {10.1145/2872427.2874816}, PUBLISHER = {ACM}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {WWW'16, 25th International Conference on World Wide Web}, PAGES = {1429--1439}, ADDRESS = {Montr{\'e}al, Canada}, }
Endnote
%0 Conference Proceedings %A He, Yeye %A Chakrabarti, Kaushik %A Cheng, Tao %A Tylenda, Tomasz %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Automatic Discovery of Attribute Synonyms Using Query Logs and Table Corpora : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-312D-5 %R 10.1145/2872427.2874816 %D 2016 %B 25th International Conference on World Wide Web %Z date of event: 2016-05-11 - 2016-05-15 %C Montréal, Canada %B WWW'16 %P 1429 - 1439 %I ACM %@ 978-1-4503-4143-1
[10]
J. Hoffart, D. Milchevski, G. Weikum, A. Anand, and J. Singh, “The Knowledge Awakens: Keeping Knowledge Bases Fresh with Emerging Entities,” in WWW’16 Companion, Montréal, Canada, 2016.
Export
BibTeX
@inproceedings{HoffartWWW2016, TITLE = {The Knowledge Awakens: {K}eeping Knowledge Bases Fresh with Emerging Entities}, AUTHOR = {Hoffart, Johannes and Milchevski, Dragan and Weikum, Gerhard and Anand, Avishek and Singh, Jaspreet}, LANGUAGE = {eng}, ISBN = {978-1-4503-4144-8}, DOI = {10.1145/2872518.2890537}, PUBLISHER = {ACM}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {WWW'16 Companion}, PAGES = {203--206}, ADDRESS = {Montr{\'e}al, Canada}, }
Endnote
%0 Conference Proceedings %A Hoffart, Johannes %A Milchevski, Dragan %A Weikum, Gerhard %A Anand, Avishek %A Singh, Jaspreet %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T The Knowledge Awakens: Keeping Knowledge Bases Fresh with Emerging Entities : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002B-01BB-4 %R 10.1145/2872518.2890537 %D 2016 %B 25th International Conference on World Wide Web %Z date of event: 2016-05-11 - 2016-05-15 %C Montréal, Canada %B WWW'16 Companion %P 203 - 206 %I ACM %@ 978-1-4503-4144-8
[11]
K. Hui and K. Berberich, “Cluster Hypothesis in Low-Cost IR Evaluation with Different Document Representations,” in WWW’16 Companion, Montréal, Canada, 2016.
Export
BibTeX
@inproceedings{HuiWWW2016, TITLE = {Cluster Hypothesis in Low-Cost {IR} Evaluation with Different Document Representations}, AUTHOR = {Hui, Kai and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-1-4503-4144-8}, DOI = {10.1145/2872518.2889370}, PUBLISHER = {ACM}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {WWW'16 Companion}, PAGES = {47--48}, ADDRESS = {Montr{\'e}al, Canada}, }
Endnote
%0 Conference Proceedings %A Hui, Kai %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Cluster Hypothesis in Low-Cost IR Evaluation with Different Document Representations : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002B-08E3-C %R 10.1145/2872518.2889370 %D 2016 %B 25th International Conference on World Wide Web %Z date of event: 2016-05-11 - 2016-05-15 %C Montréal, Canada %B WWW'16 Companion %P 47 - 48 %I ACM %@ 978-1-4503-4144-8
[12]
S. Karaev and P. Miettinen, “Capricorn: An Algorithm for Subtropical Matrix Factorization,” in Proceedings of the 2016 SIAM International Conference on Data Mining (SDM 2016), Miama, FL, USA. (Accepted/in press)
Abstract
Finding patterns from binary data is a classical problem in data mining, dating back to at least frequent itemset mining. More recently, approaches such as tiling and Boolean matrix factorization (BMF), have been proposed to find sets of patterns that aim to explain the full data well. These methods, however, are not robust against non-trivial destructive noise, i.e. when relatively many 1s are removed from the data: tiling can only model additive noise while BMF assumes approximately equal amounts of additive and destructive noise. Most real-world binary datasets, however, exhibit mostly destructive noise. In presence/absence data, for instance, it is much more common to fail to observe something than it is to observe a spurious presence. To address this problem, we take the recent approach of employing the Minimum Description Length (MDL) principle for BMF and introduce a new algorithm, Nassau, that directly optimizes the description length of the factorization instead of the reconstruction error. In addition, unlike the previous algorithms, it can adjust the factors it has discovered during its search. Empirical evaluation on synthetic data shows that Nassau excels at datasets with high destructive noise levels and its performance on real-world datasets confirms our hypothesis of the high numbers of missing observations in the real-world data.
Export
BibTeX
@inproceedings{karaev16capricorn, TITLE = {Capricorn: {An} Algorithm for Subtropical Matrix Factorization}, AUTHOR = {Karaev, Sanjar and Miettinen, Pauli}, LANGUAGE = {eng}, PUBLISHER = {SIAM}, YEAR = {2016}, PUBLREMARK = {Accepted}, ABSTRACT = {Finding patterns from binary data is a classical problem in data mining, dating back to at least frequent itemset mining. More recently, approaches such as tiling and Boolean matrix factorization (BMF), have been proposed to find sets of patterns that aim to explain the full data well. These methods, however, are not robust against non-trivial destructive noise, i.e. when relatively many 1s are removed from the data: tiling can only model additive noise while BMF assumes approximately equal amounts of additive and destructive noise. Most real-world binary datasets, however, exhibit mostly destructive noise. In presence/absence data, for instance, it is much more common to fail to observe something than it is to observe a spurious presence. To address this problem, we take the recent approach of employing the Minimum Description Length (MDL) principle for BMF and introduce a new algorithm, Nassau, that directly optimizes the description length of the factorization instead of the reconstruction error. In addition, unlike the previous algorithms, it can adjust the factors it has discovered during its search. Empirical evaluation on synthetic data shows that Nassau excels at datasets with high destructive noise levels and its performance on real-world datasets confirms our hypothesis of the high numbers of missing observations in the real-world data.}, BOOKTITLE = {Proceedings of the 2016 SIAM International Conference on Data Mining (SDM 2016)}, ADDRESS = {Miama, FL, USA}, }
Endnote
%0 Conference Proceedings %A Karaev, Sanjar %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Capricorn: An Algorithm for Subtropical Matrix Factorization : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-542F-3 %D 2016 %B 16th SIAM International Conference on Data Mining %Z date of event: 2016-05-05 - 2016-05-07 %C Miama, FL, USA %X Finding patterns from binary data is a classical problem in data mining, dating back to at least frequent itemset mining. More recently, approaches such as tiling and Boolean matrix factorization (BMF), have been proposed to find sets of patterns that aim to explain the full data well. These methods, however, are not robust against non-trivial destructive noise, i.e. when relatively many 1s are removed from the data: tiling can only model additive noise while BMF assumes approximately equal amounts of additive and destructive noise. Most real-world binary datasets, however, exhibit mostly destructive noise. In presence/absence data, for instance, it is much more common to fail to observe something than it is to observe a spurious presence. To address this problem, we take the recent approach of employing the Minimum Description Length (MDL) principle for BMF and introduce a new algorithm, Nassau, that directly optimizes the description length of the factorization instead of the reconstruction error. In addition, unlike the previous algorithms, it can adjust the factors it has discovered during its search. Empirical evaluation on synthetic data shows that Nassau excels at datasets with high destructive noise levels and its performance on real-world datasets confirms our hypothesis of the high numbers of missing observations in the real-world data. %B Proceedings of the 2016 SIAM International Conference on Data Mining %I SIAM
[13]
M. Krötzsch and G. Weikum, “Editorial,” Journal of Web Semantics, vol. 37/38, 2016.
Export
BibTeX
@article{Kroetzsch2016, TITLE = {Editorial}, AUTHOR = {Kr{\"o}tzsch, Markus and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {1570-8268}, DOI = {10.1016/j.websem.2016.04.002}, PUBLISHER = {Elsevier}, ADDRESS = {Amsterdam}, YEAR = {2016}, DATE = {2016}, JOURNAL = {Journal of Web Semantics}, VOLUME = {37/38}, PAGES = {53--54}, }
Endnote
%0 Journal Article %A Krötzsch, Markus %A Weikum, Gerhard %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Editorial : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-EB8D-B %R 10.1016/j.websem.2016.04.002 %7 2016 %D 2016 %J Journal of Web Semantics %O Science, Services and Agents on the World Wide Web Web Semantics: Science, Services and Agents on the World Wide Web %V 37/38 %& 53 %P 53 - 54 %I Elsevier %C Amsterdam %@ false
[14]
E. Kuzey, J. Strötgen, V. Setty, and G. Weikum, “Temponym Tagging: Temporal Scopes for Textual Phrases,” in WWW’16 Companion, Montréal, Canada, 2016.
Export
BibTeX
@inproceedings{Kuzey:2016:TTT:2872518.2889289, TITLE = {Temponym Tagging: {T}emporal Scopes for Textual Phrases}, AUTHOR = {Kuzey, Erdal and Str{\"o}tgen, Jannik and Setty, Vinay and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-4144-8}, DOI = {10.1145/2872518.2889289}, PUBLISHER = {ACM}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {WWW'16 Companion}, PAGES = {841--842}, ADDRESS = {Montr{\'e}al, Canada}, }
Endnote
%0 Conference Proceedings %A Kuzey, Erdal %A Strötgen, Jannik %A Setty, Vinay %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Temponym Tagging: Temporal Scopes for Textual Phrases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-4134-1 %R 10.1145/2872518.2889289 %D 2016 %B 25th International Conference on World Wide Web %Z date of event: 2016-05-11 - 2016-05-15 %C Montréal, Canada %B WWW'16 Companion %P 841 - 842 %I ACM %@ 978-1-4503-4144-8
[15]
E. Kuzey, V. Setty, J. Strötgen, and G. Weikum, “As Time Goes By: Comprehensive Tagging of Textual Phrases with Temporal Scopes,” in WWW’16, 25th International Conference on World Wide Web, Montréal, Canada, 2016.
Export
BibTeX
@inproceedings{Kuzey_WWW2016, TITLE = {As Time Goes By: {C}omprehensive Tagging of Textual Phrases with Temporal Scopes}, AUTHOR = {Kuzey, Erdal and Setty, Vinay and Str{\"o}tgen, Jannik and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-4143-1}, DOI = {10.1145/2872427.2883055}, PUBLISHER = {ACM}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {WWW'16, 25th International Conference on World Wide Web}, PAGES = {915--925}, ADDRESS = {Montr{\'e}al, Canada}, }
Endnote
%0 Conference Proceedings %A Kuzey, Erdal %A Setty, Vinay %A Strötgen, Jannik %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T As Time Goes By: Comprehensive Tagging of Textual Phrases with Temporal Scopes : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-310D-D %R 10.1145/2872427.2883055 %D 2016 %B 25th International Conference on World Wide Web %Z date of event: 2016-05-11 - 2016-05-15 %C Montréal, Canada %B WWW'16 %P 915 - 925 %I ACM %@ 978-1-4503-4143-1
[16]
S. Metzler, S. Günnemann, and P. Miettinen, “Hyperbolae Are No Hyperbole: Modelling Communities That Are Not Cliques,” 2016. [Online]. Available: http://arxiv.org/abs/1602.04650. (arXiv: 1602.04650)
Abstract
Cliques (or quasi-cliques) are frequently used to model communities: a set of nodes where each pair is (equally) likely to be connected. However, when observing real-world communities, we see that most communities have more structure than that. In particular, the nodes can be ordered in such a way that (almost) all edges in the community lie below a hyperbola. In this paper we present three new models for communities that capture this phenomenon. Our models explain the structure of the communities differently, but we also prove that they are identical in their expressive power. Our models fit to real-world data much better than traditional block models, and allow for more in-depth understanding of the structure of the data.
Export
BibTeX
@online{Metzler_arXiv2016, TITLE = {Hyperbolae Are No Hyperbole: Modelling Communities That Are Not Cliques}, AUTHOR = {Metzler, Saskia and G{\"u}nnemann, Stephan and Miettinen, Pauli}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1602.04650}, EPRINT = {1602.04650}, EPRINTTYPE = {arXiv}, YEAR = {2016}, ABSTRACT = {Cliques (or quasi-cliques) are frequently used to model communities: a set of nodes where each pair is (equally) likely to be connected. However, when observing real-world communities, we see that most communities have more structure than that. In particular, the nodes can be ordered in such a way that (almost) all edges in the community lie below a hyperbola. In this paper we present three new models for communities that capture this phenomenon. Our models explain the structure of the communities differently, but we also prove that they are identical in their expressive power. Our models fit to real-world data much better than traditional block models, and allow for more in-depth understanding of the structure of the data.}, }
Endnote
%0 Report %A Metzler, Saskia %A Günnemann, Stephan %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Hyperbolae Are No Hyperbole: Modelling Communities That Are Not Cliques : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002B-08E5-8 %U http://arxiv.org/abs/1602.04650 %D 2016 %X Cliques (or quasi-cliques) are frequently used to model communities: a set of nodes where each pair is (equally) likely to be connected. However, when observing real-world communities, we see that most communities have more structure than that. In particular, the nodes can be ordered in such a way that (almost) all edges in the community lie below a hyperbola. In this paper we present three new models for communities that capture this phenomenon. Our models explain the structure of the communities differently, but we also prove that they are identical in their expressive power. Our models fit to real-world data much better than traditional block models, and allow for more in-depth understanding of the structure of the data. %K cs.SI, Physics, Physics and Society, physics.soc-ph
[17]
A. Mishra and K. Berberich, “Leveraging Semantic Annotations to Link Wikipedia and News Archives,” Max-Planck-Institut für Informatik, Saarbrücken, MPI-I-2016-5-002, 2016.
Abstract
The incomprehensible amount of information available online has made it difficult to retrospect on past events. We propose a novel linking problem to connect excerpts from Wikipedia summarizing events to online news articles elaborating on them. To address the linking problem, we cast it into an information retrieval task by treating a given excerpt as a user query with the goal to retrieve a ranked list of relevant news articles. We find that Wikipedia excerpts often come with additional semantics, in their textual descriptions, representing the time, geolocations, and named entities involved in the event. Our retrieval model leverages text and semantic annotations as different dimensions of an event by estimating independent query models to rank documents. In our experiments on two datasets, we compare methods that consider different combinations of dimensions and find that the approach that leverages all dimensions suits our problem best.
Export
BibTeX
@techreport{MishraBerberich16, TITLE = {Leveraging Semantic Annotations to Link Wikipedia and News Archives}, AUTHOR = {Mishra, Arunav and Berberich, Klaus}, LANGUAGE = {eng}, ISSN = {0946-011X}, NUMBER = {MPI-I-2016-5-002}, INSTITUTION = {Max-Planck-Institut f{\"u}r Informatik}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2016}, ABSTRACT = {The incomprehensible amount of information available online has made it difficult to retrospect on past events. We propose a novel linking problem to connect excerpts from Wikipedia summarizing events to online news articles elaborating on them. To address the linking problem, we cast it into an information retrieval task by treating a given excerpt as a user query with the goal to retrieve a ranked list of relevant news articles. We find that Wikipedia excerpts often come with additional semantics, in their textual descriptions, representing the time, geolocations, and named entities involved in the event. Our retrieval model leverages text and semantic annotations as different dimensions of an event by estimating independent query models to rank documents. In our experiments on two datasets, we compare methods that consider different combinations of dimensions and find that the approach that leverages all dimensions suits our problem best.}, TYPE = {Research Reports}, }
Endnote
%0 Report %A Mishra, Arunav %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Leveraging Semantic Annotations to Link Wikipedia and News Archives : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-5FF0-A %Y Max-Planck-Institut für Informatik %C Saarbrücken %D 2016 %P 21 p. %X The incomprehensible amount of information available online has made it difficult to retrospect on past events. We propose a novel linking problem to connect excerpts from Wikipedia summarizing events to online news articles elaborating on them. To address the linking problem, we cast it into an information retrieval task by treating a given excerpt as a user query with the goal to retrieve a ranked list of relevant news articles. We find that Wikipedia excerpts often come with additional semantics, in their textual descriptions, representing the time, geolocations, and named entities involved in the event. Our retrieval model leverages text and semantic annotations as different dimensions of an event by estimating independent query models to rank documents. In our experiments on two datasets, we compare methods that consider different combinations of dimensions and find that the approach that leverages all dimensions suits our problem best. %B Research Reports %@ false
[18]
A. Mishra and K. Berberich, “Leveraging Semantic Annotations to Link Wikipedia and News Archives,” in Advances in Information Retrieval (ECIR 2016), Padova, Italy, 2016.
Export
BibTeX
@inproceedings{MishraECIR2016, TITLE = {Leveraging Semantic Annotations to Link Wikipedia and News Archives}, AUTHOR = {Mishra, Arunav and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-3-319-30670-4}, DOI = {10.1007/978-3-319-30671-1_3}, PUBLISHER = {Springer}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {Advances in Information Retrieval (ECIR 2016)}, EDITOR = {Ferro, Nicola and Crestani, Fabio and Moens, Marie-Francine and Mothe, Josiane and Silvestre, Fabrizio and Di Nunzio, Giorgio Maria and Hauff, Claudia and Silvello, Gianmaria}, PAGES = {30--42}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {9626}, ADDRESS = {Padova, Italy}, }
Endnote
%0 Conference Proceedings %A Mishra, Arunav %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Leveraging Semantic Annotations to Link Wikipedia and News Archives : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-48DC-F %R 10.1007/978-3-319-30671-1_3 %D 2016 %B 38th European Conference on Information Retrieval %Z date of event: 2016-03-20 - 2016-03-23 %C Padova, Italy %B Advances in Information Retrieval %E Ferro, Nicola; Crestani, Fabio; Moens, Marie-Francine; Mothe, Josiane; Silvestre, Fabrizio; Di Nunzio, Giorgio Maria; Hauff, Claudia; Silvello, Gianmaria %P 30 - 42 %I Springer %@ 978-3-319-30670-4 %B Lecture Notes in Computer Science %N 9626
[19]
A. Mishra and K. Berberich, “Event Digest: A Holistic View on Past Events,” in SIGIR’16, 39th International ACM SIGIR Conference on Research and Development in Information Retrieval, Pisa, Italy, 2016.
Export
BibTeX
@inproceedings{MishraSIGIR2016, TITLE = {Event Digest: {A} Holistic View on Past Events}, AUTHOR = {Mishra, Arunav and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-1-4503-4069-4}, DOI = {10.1145/2911451.2911526}, PUBLISHER = {ACM}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {SIGIR'16, 39th International ACM SIGIR Conference on Research and Development in Information Retrieval}, PAGES = {493--502}, ADDRESS = {Pisa, Italy}, }
Endnote
%0 Conference Proceedings %A Mishra, Arunav %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Event Digest: A Holistic View on Past Events : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002B-0895-D %R 10.1145/2911451.2911526 %D 2016 %B 39th International ACM SIGIR Conference on Research and Development in Information Retrieval %Z date of event: 2016-07-17 - 2016-07-21 %C Pisa, Italy %B SIGIR'16 %P 493 - 502 %I ACM %@ 978-1-4503-4069-4
[20]
D. B. Nguyen, M. Theobald, and G. Weikum, “J-NERD: Joint Named Entity Recognition and Disambiguation with Rich Linguistic Features,” Transactions of the Association for Computational Linguistics, vol. 4, 2016.
Export
BibTeX
@article{Nguyen2016, TITLE = {{J}-{NERD}: {J}oint {N}amed {E}ntity {R}ecognition and {D}isambiguation with Rich Linguistic Features}, AUTHOR = {Nguyen, Dat Ba and Theobald, Martin and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {2307-387X}, YEAR = {2016}, JOURNAL = {Transactions of the Association for Computational Linguistics}, VOLUME = {4}, PAGES = {215--229}, }
Endnote
%0 Journal Article %A Nguyen, Dat Ba %A Theobald, Martin %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T J-NERD: Joint Named Entity Recognition and Disambiguation with Rich Linguistic Features : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002B-0199-1 %7 2016 %D 2016 %J Transactions of the Association for Computational Linguistics %O TACL %V 4 %& 215 %P 215 - 229 %@ false %U https://tacl2013.cs.columbia.edu/ojs/index.php/tacl/article/view/698
[21]
A. Rohrbach, A. Torabi, M. Rohrbach, N. Tandon, C. Pal, H. Larochelle, A. Courville, and B. Schiele, “Movie Description,” 2016. [Online]. Available: http://arxiv.org/abs/1605.03705. (arXiv: 1605.03705)
Abstract
Audio Description (AD) provides linguistic descriptions of movies and allows visually impaired people to follow a movie along with their peers. Such descriptions are by design mainly visual and thus naturally form an interesting data source for computer vision and computational linguistics. In this work we propose a novel dataset which contains transcribed ADs, which are temporally aligned to full length movies. In addition we also collected and aligned movie scripts used in prior work and compare the two sources of descriptions. In total the Large Scale Movie Description Challenge (LSMDC) contains a parallel corpus of 118,114 sentences and video clips from 202 movies. First we characterize the dataset by benchmarking different approaches for generating video descriptions. Comparing ADs to scripts, we find that ADs are indeed more visual and describe precisely what is shown rather than what should happen according to the scripts created prior to movie production. Furthermore, we present and compare the results of several teams who participated in a challenge organized in the context of the workshop "Describing and Understanding Video & The Large Scale Movie Description Challenge (LSMDC)", at ICCV 2015.
Export
BibTeX
@online{RohrbachMovie, TITLE = {Movie Description}, AUTHOR = {Rohrbach, Anna and Torabi, Atousa and Rohrbach, Marcus and Tandon, Niket and Pal, Christopher and Larochelle, Hugo and Courville, Aaron and Schiele, Bernt}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1605.03705}, EPRINT = {1605.03705}, EPRINTTYPE = {arXiv}, YEAR = {2016}, ABSTRACT = {Audio Description (AD) provides linguistic descriptions of movies and allows visually impaired people to follow a movie along with their peers. Such descriptions are by design mainly visual and thus naturally form an interesting data source for computer vision and computational linguistics. In this work we propose a novel dataset which contains transcribed ADs, which are temporally aligned to full length movies. In addition we also collected and aligned movie scripts used in prior work and compare the two sources of descriptions. In total the Large Scale Movie Description Challenge (LSMDC) contains a parallel corpus of 118,114 sentences and video clips from 202 movies. First we characterize the dataset by benchmarking different approaches for generating video descriptions. Comparing ADs to scripts, we find that ADs are indeed more visual and describe precisely what is shown rather than what should happen according to the scripts created prior to movie production. Furthermore, we present and compare the results of several teams who participated in a challenge organized in the context of the workshop "Describing and Understanding Video & The Large Scale Movie Description Challenge (LSMDC)", at ICCV 2015.}, }
Endnote
%0 Report %A Rohrbach, Anna %A Torabi, Atousa %A Rohrbach, Marcus %A Tandon, Niket %A Pal, Christopher %A Larochelle, Hugo %A Courville, Aaron %A Schiele, Bernt %+ Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society %T Movie Description : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-FD03-C %U http://arxiv.org/abs/1605.03705 %D 2016 %X Audio Description (AD) provides linguistic descriptions of movies and allows visually impaired people to follow a movie along with their peers. Such descriptions are by design mainly visual and thus naturally form an interesting data source for computer vision and computational linguistics. In this work we propose a novel dataset which contains transcribed ADs, which are temporally aligned to full length movies. In addition we also collected and aligned movie scripts used in prior work and compare the two sources of descriptions. In total the Large Scale Movie Description Challenge (LSMDC) contains a parallel corpus of 118,114 sentences and video clips from 202 movies. First we characterize the dataset by benchmarking different approaches for generating video descriptions. Comparing ADs to scripts, we find that ADs are indeed more visual and describe precisely what is shown rather than what should happen according to the scripts created prior to movie production. Furthermore, we present and compare the results of several teams who participated in a challenge organized in the context of the workshop "Describing and Understanding Video & The Large Scale Movie Description Challenge (LSMDC)", at ICCV 2015. %K Computer Science, Computer Vision and Pattern Recognition, cs.CV,Computer Science, Computation and Language, cs.CL
[22]
R. S. Roy, A. Suresh, N. Ganguly, and M. Choudhury, “Improving Document Ranking for Long Queries with Nested Query Segmentation,” in Advances in Information Retrieval (ECIR 2016), Padova, Italy, 2016.
Export
BibTeX
@inproceedings{RoyECIR2016, TITLE = {Improving Document Ranking for Long Queries with Nested Query Segmentation}, AUTHOR = {Roy, Rishiraj Saha and Suresh, Anusha and Ganguly, Niloy and Choudhury, Monojit}, LANGUAGE = {eng}, ISBN = {978-3-319-30670-4}, DOI = {10.1007/978-3-319-30671-1_67}, PUBLISHER = {Springer}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {Advances in Information Retrieval (ECIR 2016)}, EDITOR = {Ferro, Nicola and Crestani, Fabio and Moens, Marie-Francine and Mothe, Josiane and Silvestre, Fabrizio and Di Nunzio, Giorgio Maria and Hauff, Claudia and Silvello, Gianmaria}, PAGES = {775--781}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {9626}, ADDRESS = {Padova, Italy}, }
Endnote
%0 Conference Proceedings %A Roy, Rishiraj Saha %A Suresh, Anusha %A Ganguly, Niloy %A Choudhury, Monojit %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T Improving Document Ranking for Long Queries with Nested Query Segmentation : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-48DF-9 %R 10.1007/978-3-319-30671-1_67 %D 2016 %B 38th European Conference on Information Retrieval %Z date of event: 2016-03-20 - 2016-03-23 %C Padova, Italy %B Advances in Information Retrieval %E Ferro, Nicola; Crestani, Fabio; Moens, Marie-Francine; Mothe, Josiane; Silvestre, Fabrizio; Di Nunzio, Giorgio Maria; Hauff, Claudia; Silvello, Gianmaria %P 775 - 781 %I Springer %@ 978-3-319-30670-4 %B Lecture Notes in Computer Science %N 9626
[23]
S. Seufert, P. Ernst, S. J. Bedathur, S. K. Kondreddi, K. Berberich, and G. Weikum, “Instant Espresso: Interactive Analysis of Relationships in Knowledge Graphs,” in WWW’16 Companion, Montréal, Canada, 2016.
Export
BibTeX
@inproceedings{SeufertWWW2016, TITLE = {Instant Espresso: {I}nteractive Analysis of Relationships in Knowledge Graphs}, AUTHOR = {Seufert, Stephan and Ernst, Patrick and Bedathur, Srikanta J. and Kondreddi, Sarath Kumar and Berberich, Klaus and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-4144-8}, DOI = {10.1145/2872518.2890528}, PUBLISHER = {ACM}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {WWW'16 Companion}, PAGES = {251--254}, ADDRESS = {Montr{\'e}al, Canada}, }
Endnote
%0 Conference Proceedings %A Seufert, Stephan %A Ernst, Patrick %A Bedathur, Srikanta J. %A Kondreddi, Sarath Kumar %A Berberich, Klaus %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Instant Espresso: Interactive Analysis of Relationships in Knowledge Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002B-01BD-F %R 10.1145/2872518.2890528 %D 2016 %B 25th International Conference on World Wide Web %Z date of event: 2016-05-11 - 2016-05-15 %C Montréal, Canada %B WWW'16 Companion %P 251 - 254 %I ACM %@ 978-1-4503-4144-8
[24]
D. Seyler, M. Yahya, K. Berberich, and O. Alonso, “Automated Question Generation for Quality Control in Human Computation Tasks,” in WebSci’16, ACM Web Science Conference, Hannover, Germany, 2016.
Export
BibTeX
@inproceedings{SeylerWebSci2016, TITLE = {Automated Question Generation for Quality Control in Human Computation Tasks}, AUTHOR = {Seyler, Dominic and Yahya, Mohamed and Berberich, Klaus and Alonso, Omar}, LANGUAGE = {eng}, ISBN = {978-1-4503-4208-7}, DOI = {10.1145/2908131.2908210}, PUBLISHER = {ACM}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {WebSci'16, ACM Web Science Conference}, PAGES = {360--362}, ADDRESS = {Hannover, Germany}, }
Endnote
%0 Conference Proceedings %A Seyler, Dominic %A Yahya, Mohamed %A Berberich, Klaus %A Alonso, Omar %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Automated Question Generation for Quality Control in Human Computation Tasks : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002B-08DF-7 %R 10.1145/2908131.2908210 %D 2016 %B ACM Web Science Conference %Z date of event: 2016-05-22 - 2016-05-25 %C Hannover, Germany %B WebSci'16 %P 360 - 362 %I ACM %@ 978-1-4503-4208-7
[25]
J. Strötgen and M. Gertz, Domain-Sensitive Temporal Tagging. San Rafael, CA: Morgan & Claypool Publishers, 2016.
Export
BibTeX
@book{StroetgenBook2016, TITLE = {Domain-Sensitive Temporal Tagging}, AUTHOR = {Str{\"o}tgen, Jannik and Gertz, Michael}, LANGUAGE = {eng}, ISSN = {1947-4040}, ISBN = {9781627054591; 9781627054997}, DOI = {10.2200/S00721ED1V01Y201606HLT036}, PUBLISHER = {Morgan \& Claypool Publishers}, ADDRESS = {San Rafael, CA}, YEAR = {2016}, DATE = {2016}, SERIES = {Synthesis Lectures on Human Language Technologies}, }
Endnote
%0 Book %A Strötgen, Jannik %A Gertz, Michael %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Domain-Sensitive Temporal Tagging : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002B-1777-9 %@ 9781627054591 %@ 9781627054997 %R 10.2200/S00721ED1V01Y201606HLT036 %I Morgan & Claypool Publishers %C San Rafael, CA %D 2016 %B Synthesis Lectures on Human Language Technologies %@ false
[26]
N. Tandon, C. D. Hariman, J. Urbani, A. Rohrbach, M. Rohrbach, and G. Weikum, “Commonsense in Parts: Mining Part-Whole Relations from the Web and Image Tags,” in Proceedings of the Thirtieth AAAI Conference on Artificial Intelligence, Phoenix, AZ, USA, 2016.
Export
BibTeX
@inproceedings{TandonAAAI2016, TITLE = {Commonsense in Parts: Mining Part-Whole Relations from the Web and Image Tags}, AUTHOR = {Tandon, Niket and Hariman, Charles Darwis and Urbani, Jacopo and Rohrbach, Anna and Rohrbach, Marcus and Weikum, Gerhard}, LANGUAGE = {eng}, PUBLISHER = {AAAI Press}, YEAR = {2016}, BOOKTITLE = {Proceedings of the Thirtieth AAAI Conference on Artificial Intelligence}, PAGES = {243--250}, ADDRESS = {Phoenix, AZ, USA}, }
Endnote
%0 Conference Proceedings %A Tandon, Niket %A Hariman, Charles Darwis %A Urbani, Jacopo %A Rohrbach, Anna %A Rohrbach, Marcus %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Commonsense in Parts: Mining Part-Whole Relations from the Web and Image Tags : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-ABFE-1 %D 2016 %B Thirtieth AAAI Conference on Artificial Intelligence %Z date of event: 2016-02-12 - 2016-02-17 %C Phoenix, AZ, USA %B Proceedings of the Thirtieth AAAI Conference on Artificial Intelligence %P 243 - 250 %I AAAI Press %U http://www.aaai.org/ocs/index.php/AAAI/AAAI16/paper/view/11962/11591
[27]
C. Teflioudi, “Algorithms for Shared-Memory Matrix Completion and Maximum Inner Product Search,” Universität des Saarlandes, Saarbrücken, 2016.
Export
BibTeX
@phdthesis{Teflioudiphd2016, TITLE = {Algorithms for Shared-Memory Matrix Completion and Maximum Inner Product Search}, AUTHOR = {Teflioudi, Christina}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2016}, DATE = {2016}, }
Endnote
%0 Thesis %A Teflioudi, Christina %Y Gemulla, Rainer %A referee: Weikum, Gerhard %+ International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Algorithms for Shared-Memory Matrix Completion and Maximum Inner Product Search : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-43FA-2 %I Universität des Saarlandes %C Saarbrücken %D 2016 %P xi, 110 p. %V phd %9 phd %U http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=dehttp://scidok.sulb.uni-saarland.de/volltexte/2016/6469/
[28]
J. Urbani, S. Dutta, S. Gurajada, and G. Weikum, “KOGNAC: Efficient Encoding of Large Knowledge Graphs,” 2016. [Online]. Available: http://arxiv.org/abs/1604.04795. (arXiv: 1604.04795)
Abstract
Many Web applications require efficient querying of large Knowledge Graphs (KGs). We propose KOGNAC, a dictionary-encoding algorithm designed to improve SPARQL querying with a judicious combination of statistical and semantic techniques. In KOGNAC, frequent terms are detected with a frequency approximation algorithm and encoded to maximise compression. Infrequent terms are semantically grouped into ontological classes and encoded to increase data locality. We evaluated KOGNAC in combination with state-of-the-art RDF engines, and observed that it significantly improves SPARQL querying on KGs with up to 1B edges.
Export
BibTeX
@online{Urbani2016, TITLE = {{KOGNAC}: Efficient Encoding of Large Knowledge Graphs}, AUTHOR = {Urbani, Jacopo and Dutta, Sourav and Gurajada, Sairam and Weikum, Gerhard}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1604.04795}, EPRINT = {1604.04795}, EPRINTTYPE = {arXiv}, YEAR = {2016}, ABSTRACT = {Many Web applications require efficient querying of large Knowledge Graphs (KGs). We propose KOGNAC, a dictionary-encoding algorithm designed to improve SPARQL querying with a judicious combination of statistical and semantic techniques. In KOGNAC, frequent terms are detected with a frequency approximation algorithm and encoded to maximise compression. Infrequent terms are semantically grouped into ontological classes and encoded to increase data locality. We evaluated KOGNAC in combination with state-of-the-art RDF engines, and observed that it significantly improves SPARQL querying on KGs with up to 1B edges.}, }
Endnote
%0 Report %A Urbani, Jacopo %A Dutta, Sourav %A Gurajada, Sairam %A Weikum, Gerhard %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T KOGNAC: Efficient Encoding of Large Knowledge Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002B-01C1-3 %U http://arxiv.org/abs/1604.04795 %D 2016 %X Many Web applications require efficient querying of large Knowledge Graphs (KGs). We propose KOGNAC, a dictionary-encoding algorithm designed to improve SPARQL querying with a judicious combination of statistical and semantic techniques. In KOGNAC, frequent terms are detected with a frequency approximation algorithm and encoded to maximise compression. Infrequent terms are semantically grouped into ontological classes and encoded to increase data locality. We evaluated KOGNAC in combination with state-of-the-art RDF engines, and observed that it significantly improves SPARQL querying on KGs with up to 1B edges. %K Computer Science, Artificial Intelligence, cs.AI
[29]
G. Weikum, “Die Abteilung Datenbanken und Informationssysteme am Max-Planck-Institut für Informatik,” Datenbank Spektrum, vol. 16, no. 1, 2016.
Export
BibTeX
@article{WeikumDBSpektrum2016, TITLE = {{Die Abteilung Datenbanken und Informationssysteme am Max-Planck-Institut f{\"u}r Informatik}}, AUTHOR = {Weikum, Gerhard}, LANGUAGE = {deu}, DOI = {10.1007/s13222-016-0211-z}, PUBLISHER = {Springer}, ADDRESS = {Berlin}, YEAR = {2016}, DATE = {2016}, JOURNAL = {Datenbank Spektrum}, VOLUME = {16}, NUMBER = {1}, PAGES = {77--82}, }
Endnote
%0 Journal Article %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Die Abteilung Datenbanken und Informationssysteme am Max-Planck-Institut für Informatik : %G deu %U http://hdl.handle.net/11858/00-001M-0000-002B-0194-B %R 10.1007/s13222-016-0211-z %7 2016 %D 2016 %J Datenbank Spektrum %V 16 %N 1 %& 77 %P 77 - 82 %I Springer %C Berlin
[30]
H. Wu, Y. Ning, P. Chakraborty, J. Vreeken, N. Tatti, and N. Ramakrishnan, “Generating Realistic Synthetic Population Datasets,” 2016. [Online]. Available: http://arxiv.org/abs/1602.06844. (arXiv: 1602.06844)
Abstract
Modern studies of societal phenomena rely on the availability of large datasets capturing attributes and activities of synthetic, city-level, populations. For instance, in epidemiology, synthetic population datasets are necessary to study disease propagation and intervention measures before implementation. In social science, synthetic population datasets are needed to understand how policy decisions might affect preferences and behaviors of individuals. In public health, synthetic population datasets are necessary to capture diagnostic and procedural characteristics of patient records without violating confidentialities of individuals. To generate such datasets over a large set of categorical variables, we propose the use of the maximum entropy principle to formalize a generative model such that in a statistically well-founded way we can optimally utilize given prior information about the data, and are unbiased otherwise. An efficient inference algorithm is designed to estimate the maximum entropy model, and we demonstrate how our approach is adept at estimating underlying data distributions. We evaluate this approach against both simulated data and on US census datasets, and demonstrate its feasibility using an epidemic simulation application.
Export
BibTeX
@online{Wu_arXiv2016, TITLE = {Generating Realistic Synthetic Population Datasets}, AUTHOR = {Wu, Hao and Ning, Yue and Chakraborty, Prithwish and Vreeken, Jilles and Tatti, Nikolaj and Ramakrishnan, Naren}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1602.06844}, EPRINT = {1602.06844}, EPRINTTYPE = {arXiv}, YEAR = {2016}, ABSTRACT = {Modern studies of societal phenomena rely on the availability of large datasets capturing attributes and activities of synthetic, city-level, populations. For instance, in epidemiology, synthetic population datasets are necessary to study disease propagation and intervention measures before implementation. In social science, synthetic population datasets are needed to understand how policy decisions might affect preferences and behaviors of individuals. In public health, synthetic population datasets are necessary to capture diagnostic and procedural characteristics of patient records without violating confidentialities of individuals. To generate such datasets over a large set of categorical variables, we propose the use of the maximum entropy principle to formalize a generative model such that in a statistically well-founded way we can optimally utilize given prior information about the data, and are unbiased otherwise. An efficient inference algorithm is designed to estimate the maximum entropy model, and we demonstrate how our approach is adept at estimating underlying data distributions. We evaluate this approach against both simulated data and on US census datasets, and demonstrate its feasibility using an epidemic simulation application.}, }
Endnote
%0 Report %A Wu, Hao %A Ning, Yue %A Chakraborty, Prithwish %A Vreeken, Jilles %A Tatti, Nikolaj %A Ramakrishnan, Naren %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Generating Realistic Synthetic Population Datasets : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002B-08F9-B %U http://arxiv.org/abs/1602.06844 %D 2016 %X Modern studies of societal phenomena rely on the availability of large datasets capturing attributes and activities of synthetic, city-level, populations. For instance, in epidemiology, synthetic population datasets are necessary to study disease propagation and intervention measures before implementation. In social science, synthetic population datasets are needed to understand how policy decisions might affect preferences and behaviors of individuals. In public health, synthetic population datasets are necessary to capture diagnostic and procedural characteristics of patient records without violating confidentialities of individuals. To generate such datasets over a large set of categorical variables, we propose the use of the maximum entropy principle to formalize a generative model such that in a statistically well-founded way we can optimally utilize given prior information about the data, and are unbiased otherwise. An efficient inference algorithm is designed to estimate the maximum entropy model, and we demonstrate how our approach is adept at estimating underlying data distributions. We evaluate this approach against both simulated data and on US census datasets, and demonstrate its feasibility using an epidemic simulation application. %K Computer Science, Databases, cs.DB
[31]
M. Yahya, D. Barbosa, K. Berberich, Q. Wang, and G. Weikum, “Relationship Queries on Extended Knowledge Graphs,” in WSDM’16, 9th ACM International Conference on Web Search and Data Mining, San Francisco, CA, USA, 2016.
Export
BibTeX
@inproceedings{YahyaWSDM2016, TITLE = {Relationship Queries on Extended Knowledge Graphs}, AUTHOR = {Yahya, Mohamed and Barbosa, Denilson and Berberich, Klaus and Wang, Quiyue and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-3716-8}, DOI = {10.1145/2835776.2835795}, PUBLISHER = {ACM}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {WSDM'16, 9th ACM International Conference on Web Search and Data Mining}, PAGES = {605--614}, ADDRESS = {San Francisco, CA, USA}, }
Endnote
%0 Conference Proceedings %A Yahya, Mohamed %A Barbosa, Denilson %A Berberich, Klaus %A Wang, Quiyue %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Relationship Queries on Extended Knowledge Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-ABAA-0 %R 10.1145/2835776.2835795 %D 2016 %B 9th ACM International Conference on Web Search and Data Mining %Z date of event: 2016-02-22 - 2016-02-25 %C San Francisco, CA, USA %B WSDM'16 %P 605 - 614 %I ACM %@ 978-1-4503-3716-8
[32]
M. Yahya and H. Schütze, “Question Answering and Query Processing for Extended Knowledge Graphs,” Universität des Saarlandes, Saarbrücken, 2016.
Export
BibTeX
@phdthesis{yahyaphd2016, TITLE = {Question Answering and Query Processing for Extended Knowledge Graphs}, AUTHOR = {Yahya, Mohamed and Sch{\"u}tze, Hinrich}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2016}, DATE = {2016}, }
Endnote
%0 Thesis %A Yahya, Mohamed %Y Weikum, Gerhard %A Schütze, Hinrich %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Question Answering and Query Processing for Extended Knowledge Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-48C2-7 %I Universität des Saarlandes %C Saarbrücken %D 2016 %P x, 160 p. %V phd %9 phd %U http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=dehttp://scidok.sulb.uni-saarland.de/volltexte/2016/6476/
[33]
H. Zhang and V. Setty, “Finding Diverse Needles in a Haystack of Comments -- Social Media Exploration for News,” in WebSci’16, ACM Web Science Conference, Hannover, Germany, 2016.
Export
BibTeX
@inproceedings{ZhangWebSci2016, TITLE = {Finding Diverse Needles in a Haystack of Comments -- Social Media Exploration for News}, AUTHOR = {Zhang, Hang and Setty, Vinay}, LANGUAGE = {eng}, ISBN = {978-1-4503-4208-7}, DOI = {10.1145/2908131.2908168}, PUBLISHER = {ACM}, YEAR = {2016}, DATE = {2016}, BOOKTITLE = {WebSci'16, ACM Web Science Conference}, PAGES = {286--290}, ADDRESS = {Hannover, Germany}, }
Endnote
%0 Conference Proceedings %A Zhang, Hang %A Setty, Vinay %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Finding Diverse Needles in a Haystack of Comments -- Social Media Exploration for News : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002B-020A-C %R 10.1145/2908131.2908168 %D 2016 %B ACM Web Science Conference %Z date of event: 2016-05-22 - 2016-05-25 %C Hannover, Germany %B WebSci'16 %P 286 - 290 %I ACM %@ 978-1-4503-4208-7
2015
[34]
S. Abiteboul, L. Dong, O. Etzioni, D. Srivastava, G. Weikum, J. Stoyanovich, and F. M. Suchanek, “The Elephant in the Room: Getting Value from Big Data,” in Proceedings of the 18th International Workshop on Web and Databases (WebDB 2015), Melbourne, Australia, 2015.
Export
BibTeX
@inproceedings{AbiteboulWebDB2015, TITLE = {The Elephant in the Room: {G}etting Value from {Big Data}}, AUTHOR = {Abiteboul, Serge and Dong, Luna and Etzioni, Oren and Srivastava, Divesh and Weikum, Gerhard and Stoyanovich, Julia and Suchanek, Fabian M.}, LANGUAGE = {eng}, ISBN = {978-1-4503-3627-7}, DOI = {10.1145/2767109.2770014}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Proceedings of the 18th International Workshop on Web and Databases (WebDB 2015)}, EDITOR = {Stoyanovich, Julia and Suchanek, Fabian M.}, PAGES = {1--5}, ADDRESS = {Melbourne, Australia}, }
Endnote
%0 Conference Proceedings %A Abiteboul , Serge %A Dong, Luna %A Etzioni, Oren %A Srivastava, Divesh %A Weikum, Gerhard %A Stoyanovich, Julia %A Suchanek, Fabian M. %+ External Organizations External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Télécom ParisTech %T The Elephant in the Room: Getting Value from Big Data : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0027-D3F2-F %R 10.1145/2767109.2770014 %D 2015 %B 18th International Workshop on the Web and Databases %Z date of event: 2015-05-31 - 2015-05-31 %C Melbourne, Australia %B Proceedings of the 18th International Workshop on Web and Databases %E Stoyanovich, Julia; Suchanek, Fabian M. %P 1 - 5 %I ACM %@ 978-1-4503-3627-7
[35]
A. Abujabal and K. Berberich, “Important Events in the Past, Present, and Future,” in WWW’15 Companion, Florence, Italy, 2015.
Export
BibTeX
@inproceedings{AbjuabalWWW2015, TITLE = {Important Events in the Past, Present, and Future}, AUTHOR = {Abujabal, Abdalghani and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-1-4503-3473-0}, DOI = {10.1145/2740908.2741692}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {WWW'15 Companion}, PAGES = {1315--1320}, ADDRESS = {Florence, Italy}, }
Endnote
%0 Conference Proceedings %A Abujabal, Abdalghani %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Important Events in the Past, Present, and Future : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-E33A-8 %R 10.1145/2740908.2741692 %D 2015 %B 24th International Conference on World Wide Web %Z date of event: 2015-04-18 - 2015-04-22 %C Florence, Italy %B WWW'15 Companion %P 1315 - 1320 %I ACM %@ 978-1-4503-3473-0
[36]
A. Abujabal, “Mining Past, Present, and Future,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@mastersthesis{AbujabalMaster2015, TITLE = {Mining Past, Present, and Future}, AUTHOR = {Abujabal, Abdalghani}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Abujabal, Abdalghani %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Mining Past, Present, and Future : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0025-A974-2 %I Universität des Saarlandes %C Saarbrücken %D 2015 %P XII, 86 p. %V master %9 master
[37]
A. Anagnostopoulos, L. Becchetti, I. Bordino, S. Leonardi, I. Mele, and P. Sankowski, “Stochastic Query Covering for Fast Approximate Document Retrieval,” ACM Transactions on Information Systems, vol. 33, no. 3, 2015.
Export
BibTeX
@article{Anagnostopoulos:TOIS, TITLE = {Stochastic Query Covering for Fast Approximate Document Retrieval}, AUTHOR = {Anagnostopoulos, Aris and Becchetti, Luca and Bordino, Ilaria and Leonardi, Stefano and Mele, Ida and Sankowski, Piotr}, LANGUAGE = {eng}, ISSN = {1046-8188}, DOI = {10.1145/2699671}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, JOURNAL = {ACM Transactions on Information Systems}, VOLUME = {33}, NUMBER = {3}, PAGES = {1--35}, EID = {11}, }
Endnote
%0 Journal Article %A Anagnostopoulos, Aris %A Becchetti, Luca %A Bordino, Ilaria %A Leonardi, Stefano %A Mele, Ida %A Sankowski, Piotr %+ External Organizations External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Stochastic Query Covering for Fast Approximate Document Retrieval : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-B6C7-2 %R 10.1145/2699671 %7 2015 %D 2015 %J ACM Transactions on Information Systems %O TOIS %V 33 %N 3 %& 1 %P 1 - 35 %Z sequence number: 11 %I ACM %C New York, NY %@ false
[38]
A. Anagnostopoulos, L. Becchetti, A. Fazzone, I. Mele, and M. Riondato, “The Importance of Being Expert: Efficient Max-Finding in Crowdsourcing,” in SIGMOD’15, ACM SIGMOD International Conference on Management of Data, Melbourne, Victoria, Australia, 2015.
Export
BibTeX
@inproceedings{Anagnostopoulos:SIGMOD2015, TITLE = {The Importance of Being Expert: Efficient Max-Finding in Crowdsourcing}, AUTHOR = {Anagnostopoulos, Aris and Becchetti, Luca and Fazzone, Adriano and Mele, Ida and Riondato, Matteo}, LANGUAGE = {eng}, ISBN = {978-1-4503-2758-9}, DOI = {10.1145/2723372.2723722}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {SIGMOD'15, ACM SIGMOD International Conference on Management of Data}, PAGES = {983--998}, ADDRESS = {Melbourne, Victoria, Australia}, }
Endnote
%0 Conference Proceedings %A Anagnostopoulos, Aris %A Becchetti, Luca %A Fazzone, Adriano %A Mele, Ida %A Riondato, Matteo %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T The Importance of Being Expert: Efficient Max-Finding in Crowdsourcing : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-B6BE-7 %R 10.1145/2723372.2723722 %D 2015 %B ACM SIGMOD International Conference on Management of Data %Z date of event: 2015-05-31 - 2015-06-04 %C Melbourne, Victoria, Australia %B SIGMOD'15 %P 983 - 998 %I ACM %@ 978-1-4503-2758-9
[39]
K. Athukorala, D. Głowack, G. Jacucc, A. Oulasvirta, and J. Vreeken, “Is Exploratory Search Different? A Comparison of Information Search Behavior for Exploratory and Lookup Tasks,” Journal of the Association for Information Science and Technology, 2015.
Export
BibTeX
@article{VreekenSearch2015, TITLE = {Is Exploratory Search Different? A Comparison of Information Search Behavior for Exploratory and Lookup Tasks}, AUTHOR = {Athukorala, Kumaripaba and G{\l}owack, Dorota and Jacucc, Giulio and Oulasvirta, Antti and Vreeken, Jilles}, LANGUAGE = {eng}, ISSN = {2330-1643}, DOI = {10.1002/asi.23617}, PUBLISHER = {Wiley}, ADDRESS = {Chichester}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, JOURNAL = {Journal of the Association for Information Science and Technology}, PAGES = {1--17}, }
Endnote
%0 Journal Article %A Athukorala, Kumaripaba %A Głowack, Dorota %A Jacucc, Giulio %A Oulasvirta, Antti %A Vreeken, Jilles %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Is Exploratory Search Different? A Comparison of Information Search Behavior for Exploratory and Lookup Tasks : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0028-E6A7-D %R 10.1002/asi.23617 %7 2015-10-22 %D 2015 %8 22.10.2015 %J Journal of the Association for Information Science and Technology %& 1 %P 1 - 17 %I Wiley %C Chichester %@ false
[40]
H. R. Bazoobandi, S. de Rooij, J. Urbani, A. ten Teije, F. van Harmelen, and H. Bal, “A Compact In-Memory Dictionary for RDF Data,” in The Semantic Web. Latest Advances and New Domains, Portoroz, Slovenia, 2015.
Export
BibTeX
@inproceedings{Urbanilncs15, TITLE = {A Compact In-Memory Dictionary for {RDF} Data}, AUTHOR = {Bazoobandi, Hamid R. and de Rooij, Steve and Urbani, Jacopo and ten Teije, Annette and van Harmelen, Frank and Bal, Henri}, LANGUAGE = {eng}, ISBN = {978-3-319-18817-1}, DOI = {10.1007/978-3-319-18818-8_13}, PUBLISHER = {Springer}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {The Semantic Web. Latest Advances and New Domains}, PAGES = {205--220}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {9088}, ADDRESS = {Portoroz, Slovenia}, }
Endnote
%0 Conference Proceedings %A Bazoobandi, Hamid R. %A de Rooij, Steve %A Urbani, Jacopo %A ten Teije, Annette %A van Harmelen, Frank %A Bal, Henri %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T A Compact In-Memory Dictionary for RDF Data : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0028-F1A6-9 %R 10.1007/978-3-319-18818-8_13 %D 2015 %B 12th European Semantic Web Conference %Z date of event: 2015-05-31 - 2015-06-04 %C Portoroz, Slovenia %B The Semantic Web. Latest Advances and New Domains %P 205 - 220 %I Springer %@ 978-3-319-18817-1 %B Lecture Notes in Computer Science %N 9088
[41]
K. Budhathoki and J. Vreeken, “The Difference and the Norm - Characterising Similarities and Differences Between Databases,” in Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2015), Porto, Portugal, 2015.
Export
BibTeX
@inproceedings{BudhathokiECML2015, TITLE = {The Difference and the Norm -- Characterising Similarities and Differences Between Databases}, AUTHOR = {Budhathoki, Kailash and Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-3-319-23524-0}, DOI = {10.1007/978-3-319-23525-7_13}, PUBLISHER = {Springer}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2015)}, EDITOR = {Appice, Annalisa and Pereira Rodrigues, Pedro and Gama, Jo{\~a}o and Al{\'i}pio, Jorge and Soares, Carlos}, PAGES = {206--223}, SERIES = {Lecture Notes in Artificial Intellligence}, VOLUME = {9285}, ADDRESS = {Porto, Portugal}, }
Endnote
%0 Conference Proceedings %A Budhathoki, Kailash %A Vreeken, Jilles %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T The Difference and the Norm - Characterising Similarities and Differences Between Databases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-2271-F %R 10.1007/978-3-319-23525-7_13 %D 2015 %B European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases %Z date of event: 2015-09-07 - 2015-09-11 %C Porto, Portugal %B Machine Learning and Knowledge Discovery in Databases %E Appice, Annalisa; Pereira Rodrigues, Pedro; Gama, João; Alípio, Jorge; Soares, Carlos %P 206 - 223 %I Springer %@ 978-3-319-23524-0 %B Lecture Notes in Artificial Intellligence %N 9285
[42]
K. Budhathoki, “Correlation by Compression,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@mastersthesis{BudhathokiMaster2015, TITLE = {Correlation by Compression}, AUTHOR = {Budhathoki, Kailash}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Budhathoki, Kailash %Y Vreeken, Jilles %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Correlation by Compression : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-0753-D %I Universität des Saarlandes %C Saarbrücken %D 2015 %P X, 56 p. %V master %9 master
[43]
P. Chau, J. Vreeken, M. van Leeuwen, and C. Faloutsos, Eds., Proceedings of the ACM SIGKDD 2015 Full-day Workshop on Interactive Data Exploration and Analytics. ACM, 2015.
Export
BibTeX
@proceedings{chau:15:idea, TITLE = {Proceedings of the ACM SIGKDD 2015 Full-day Workshop on Interactive Data Exploration and Analytics (IDEA 2015)}, EDITOR = {Chau, Polo and Vreeken, Jilles and van Leeuwen, Matthijs and Faloutsos, Christos}, LANGUAGE = {eng}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, PAGES = {72 p.}, ADDRESS = {Sydney, Australia}, }
Endnote
%0 Conference Proceedings %E Chau, Polo %E Vreeken, Jilles %E van Leeuwen, Matthijs %E Faloutsos, Christos %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Proceedings of the ACM SIGKDD 2015 Full-day Workshop on Interactive Data Exploration and Analytics : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-578A-0 %I ACM %D 2015 %B ACM SIGKDD 2015 Full-day Workshop on Interactive Data Exploration and Analytics %Z date of event: 2015-08-10 - 2014-08-10 %D 2015 %C Sydney, Australia %P 72 p. %U http://poloclub.gatech.edu/idea2015/papers/idea15-proceedings.pdf
[44]
D. Dedik, “Robust Type Classification of Out of Knowledge Base Entities,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@mastersthesis{DedikMaster2015, TITLE = {Robust Type Classification of Out of Knowledge Base Entities}, AUTHOR = {Dedik, Darya}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Dedik, Darya %Y Weikum, Gerhard %A referee: Spaniol, Marc %+ International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Robust Type Classification of Out of Knowledge Base Entities : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0026-C0EC-F %I Universität des Saarlandes %C Saarbrücken %D 2015 %P 65 p. %V master %9 master
[45]
L. Del Corro, “Methods for Open Information Extraction and Sense Disambiguation on Natural Language Text,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@phdthesis{delcorrophd15, TITLE = {Methods for Open Information Extraction and Sense Disambiguation on Natural Language Text}, AUTHOR = {Del Corro, Luciano}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Del Corro, Luciano %Y Gemulla, Rainer %A referee: Ponzetto, Simone Paolo %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Methods for Open Information Extraction and Sense Disambiguation on Natural Language Text : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-B3DB-3 %I Universität des Saarlandes %C Saarbrücken %D 2015 %P xiv, 101 p. %V phd %9 phd %U http://scidok.sulb.uni-saarland.de/volltexte/2016/6346/http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=de
[46]
L. Del Corro, A. Abujabal, R. Gemulla, and G. Weikum, “FINET: Context-Aware Fine-Grained Named Entity Typing,” in Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing (EMNLP 2015), Lisbon, Portugal, 2015.
Export
BibTeX
@inproceedings{delcorro-EtAl:2015:EMNLP, TITLE = {{FINET}: {C}ontext-Aware Fine-Grained Named Entity Typing}, AUTHOR = {Del Corro, Luciano and Abujabal, Abdalghani and Gemulla, Rainer and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-941643-32-7}, URL = {https://aclweb.org/anthology/D/D15/D15-1103}, PUBLISHER = {ACL}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing (EMNLP 2015)}, PAGES = {868--878}, ADDRESS = {Lisbon, Portugal}, }
Endnote
%0 Conference Proceedings %A Del Corro, Luciano %A Abujabal, Abdalghani %A Gemulla, Rainer %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T FINET: Context-Aware Fine-Grained Named Entity Typing : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-49C3-C %U https://aclweb.org/anthology/D/D15/D15-1103 %D 2015 %B Conference on Empirical Methods in Natural Language Processing %Z date of event: 2015-09-17 - 2015-09-21 %C Lisbon, Portugal %B Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing %P 868 - 878 %I ACL %@ 978-1-941643-32-7 %U https://www.cs.cmu.edu/~ark/EMNLP-2015/proceedings/EMNLP/pdf/EMNLP103.pdf
[47]
S. Dutta, S. Bhattacherjee, and A. Narang, “Mining Wireless Intelligence using Unsupervised Edge and Core Analytics,” in 2nd Workshop on Smarter Planet and Big Data Analytics, Goa, Indien. (Accepted/in press)
Export
BibTeX
@inproceedings{SouSPBDA2015, TITLE = {Mining Wireless Intelligence using Unsupervised Edge and Core Analytics}, AUTHOR = {Dutta, Sourav and Bhattacherjee, Souvik and Narang, Ankur}, LANGUAGE = {eng}, YEAR = {2015}, PUBLREMARK = {Accepted}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {2nd Workshop on Smarter Planet and Big Data Analytics}, ADDRESS = {Goa, Indien}, }
Endnote
%0 Conference Proceedings %A Dutta, Sourav %A Bhattacherjee, Souvik %A Narang, Ankur %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Mining Wireless Intelligence using Unsupervised Edge and Core Analytics : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-54B5-0 %D 2014 %B 2nd Workshop on Smarter Planet and Big Data Analytics %Z date of event: 2015-01-04 - 2015-01-07 %C Goa, Indien %B 2nd Workshop on Smarter Planet and Big Data Analytics
[48]
S. Dutta, “MIST: Top-k Approximate Sub-String Mining using Triplet Statistical Significance,” in Advances in Information Retrieval (ECIR 2015), Vienna, Austria, 2015.
Export
BibTeX
@inproceedings{SouECIR2015, TITLE = {{MIST}: Top-k Approximate Sub-String Mining using Triplet Statistical Significance}, AUTHOR = {Dutta, Sourav}, LANGUAGE = {eng}, ISBN = {978-3-319-16353-6}, DOI = {10.1007/978-3-319-16354-3_31}, PUBLISHER = {Springer}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Advances in Information Retrieval (ECIR 2015)}, EDITOR = {Hanbury, Allan and Kazai, Gabriella and Rauber, Andreas and Fuhr, Norbert}, PAGES = {284--290}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {9022}, ADDRESS = {Vienna, Austria}, }
Endnote
%0 Conference Proceedings %A Dutta, Sourav %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T MIST: Top-k Approximate Sub-String Mining using Triplet Statistical Significance : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-54B2-5 %R 10.1007/978-3-319-16354-3_31 %D 2015 %B 37th European Conference on Information Retrieval %Z date of event: 2015-03-29 - 2015-04-02 %C Vienna, Austria %B Advances in Information Retrieval %E Hanbury, Allan; Kazai, Gabriella; Rauber, Andreas; Fuhr, Norbert %P 284 - 290 %I Springer %@ 978-3-319-16353-6 %B Lecture Notes in Computer Science %N 9022
[49]
S. Dutta and G. Weikum, “Cross-document Co-reference Resolution using Sample-based Clustering with Knowledge Enrichment,” Transactions of the Association for Computational Linguistics, vol. 3, 2015.
Export
BibTeX
@article{SouTACL2015, TITLE = {Cross-document Co-reference Resolution using Sample-based Clustering with Knowledge Enrichment}, AUTHOR = {Dutta, Sourav and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {2307-387X}, PUBLISHER = {ACL}, ADDRESS = {Stroudsbourg, PA}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, JOURNAL = {Transactions of the Association for Computational Linguistics}, VOLUME = {3}, PAGES = {15--28}, }
Endnote
%0 Journal Article %A Dutta, Sourav %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Cross-document Co-reference Resolution using Sample-based Clustering with Knowledge Enrichment : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-54B7-C %7 2015 %D 2015 %J Transactions of the Association for Computational Linguistics %O TACL %V 3 %& 15 %P 15 - 28 %I ACL %C Stroudsbourg, PA %@ false
[50]
S. Dutta and G. Weikum, “C3EL: A Joint Model for Cross-Document Co-Reference Resolution and Entity Linking,” in Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing (EMNLP 2015), Lisbon, Portugal, 2015.
Export
BibTeX
@inproceedings{dutta-weikum:2015:EMNLP, TITLE = {{C3EL}: {A} Joint Model for Cross-Document Co-Reference Resolution and Entity Linking}, AUTHOR = {Dutta, Sourav and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-941643-32-7}, URL = {https://aclweb.org/anthology/D/D15/D15-1101}, PUBLISHER = {ACL}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing (EMNLP 2015)}, PAGES = {846--856}, ADDRESS = {Lisbon, Portugal}, }
Endnote
%0 Conference Proceedings %A Dutta, Sourav %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T C3EL: A Joint Model for Cross-Document Co-Reference Resolution and Entity Linking : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-49C1-0 %U https://aclweb.org/anthology/D/D15/D15-1101 %D 2015 %B Conference on Empirical Methods in Natural Language Processing %Z date of event: 2015-09-17 - 2015-09-21 %C Lisbon, Portugal %B Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing %P 846 - 856 %I ACL %@ 978-1-941643-32-7 %U https://www.cs.cmu.edu/~ark/EMNLP-2015/proceedings/EMNLP/pdf/EMNLP101.pdf
[51]
P. Ernst, A. Siu, and G. Weikum, “KnowLife: A Versatile Approach for Constructing a Large Knowledge Graph for Biomedical Sciences,” BMC Bioinformatics, vol. 16, no. 1, 2015.
Export
BibTeX
@article{ErnstSiuWeikum2015, TITLE = {{KnowLife}: A Versatile Approach for Constructing a Large Knowledge Graph for Biomedical Sciences}, AUTHOR = {Ernst, Patrick and Siu, Amy and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {1471-2105}, URL = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=4448285&tool=pmcentrez&rendertype=abstract}, DOI = {10.1186/s12859-015-0549-5}, PUBLISHER = {BioMed Central}, ADDRESS = {London}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, JOURNAL = {BMC Bioinformatics}, VOLUME = {16}, NUMBER = {1}, EID = {157}, }
Endnote
%0 Journal Article %A Ernst, Patrick %A Siu, Amy %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T KnowLife: A Versatile Approach for Constructing a Large Knowledge Graph for Biomedical Sciences : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0027-7AB7-0 %F OTHER: pmcidPMC4448285 %F OTHER: pmc-uid4448285 %F OTHER: publisher-id549 %R 10.1186/s12859-015-0549-5 %U http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=4448285&tool=pmcentrez&rendertype=abstract %7 2015-05-14 %D 2015 %8 14.05.2015 %K Relation extraction %J BMC Bioinformatics %V 16 %N 1 %Z sequence number: 157 %I BioMed Central %C London %@ false
[52]
M. Gad-Elrab, “AIDArabic+ Named Entity Disambiguation for Arabic Text,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@mastersthesis{Gad-ElrabMaster2015, TITLE = {{AIDArabic}+ Named Entity Disambiguation for Arabic Text}, AUTHOR = {Gad-Elrab, Mohamed}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Gad-Elrab, Mohamed %Y Weikum, Gerhard %A referee: Berberich, Klaus %+ International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T AIDArabic+ Named Entity Disambiguation for Arabic Text : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-0F70-5 %I Universität des Saarlandes %C Saarbrücken %D 2015 %P 56 p. %V master %9 master
[53]
M. H. Gad-Elrab, M. A. Yosef, and G. Weikum, “Named Entity Disambiguation for Resource-poor Languages,” in ESAIR’15, Eighth Workshop on Exploiting Semantic Annotations in Information Retrieval, Melbourne, Australia, 2015.
Export
BibTeX
@inproceedings{Gad-ElrabESAIR2015, TITLE = {Named Entity Disambiguation for Resource-poor Languages}, AUTHOR = {Gad-Elrab, Mohamed H. and Yosef, Mohamed Amir and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-3790-8}, DOI = {10.1145/2810133.2810138}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {ESAIR'15, Eighth Workshop on Exploiting Semantic Annotations in Information Retrieval}, EDITOR = {Alonso, Omar and Kamps, Jaap and Karlgren, Jussi}, PAGES = {29--34}, ADDRESS = {Melbourne, Australia}, }
Endnote
%0 Conference Proceedings %A Gad-Elrab, Mohamed H. %A Yosef, Mohamed Amir %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Named Entity Disambiguation for Resource-poor Languages : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-077F-B %R 10.1145/2810133.2810138 %D 2015 %B Eighth Workshop on Exploiting Semantic Annotations in Information Retrieval %Z date of event: 2015-10-23 - 2015-10-23 %C Melbourne, Australia %B ESAIR'15 %E Alonso, Omar; Kamps, Jaap; Karlgren, Jussi %P 29 - 34 %I ACM %@ 978-1-4503-3790-8
[54]
M. H. Gad-Elrab, M. A. Yosef, and G. Weikum, “EDRAK: Entity-Centric Data Resource for Arabic Knowledge,” in The Second Workshop on Arabic Natural Language Processing (ANLP 2015), Beijing, China, 2015.
Export
BibTeX
@inproceedings{Gad-ElrabAnLP2015, TITLE = {{EDRAK}: {E}ntity-Centric Data Resource for {Arabic} Knowledge}, AUTHOR = {Gad-Elrab, Mohamed H. and Yosef, Mohamed Amir and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-941643-58-7}, PUBLISHER = {ACL}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {The Second Workshop on Arabic Natural Language Processing (ANLP 2015)}, PAGES = {191--200}, ADDRESS = {Beijing, China}, }
Endnote
%0 Conference Proceedings %A Gad-Elrab, Mohamed H. %A Yosef, Mohamed Amir %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T EDRAK: Entity-Centric Data Resource for Arabic Knowledge : %G eng %U http://hdl.handle.net/11858/00-001M-0000-002A-0773-3 %D 2015 %B The Second Workshop on Arabic Natural Language Processing %Z date of event: 2015-07-26 - 2015-07-31 %C Beijing, China %B The Second Workshop on Arabic Natural Language Processing %P 191 - 200 %I ACL %@ 978-1-941643-58-7
[55]
L. Galárraga, C. Teflioudi, K. Hose, and F. M. Suchanek, “Fast Rule Mining in Ontological Knowledge Bases with AMIE+,” The VLDB Journal, vol. 24, no. 6, 2015.
Export
BibTeX
@article{Galarrag2015, TITLE = {Fast Rule Mining in Ontological Knowledge Bases with {AMIE}+}, AUTHOR = {Gal{\'a}rraga, Luis and Teflioudi, Christina and Hose, Katja and Suchanek, Fabian M.}, LANGUAGE = {eng}, ISSN = {1066-8888}, DOI = {10.1007/s00778-015-0394-1}, PUBLISHER = {Springer}, ADDRESS = {Berlin}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, JOURNAL = {The VLDB Journal}, VOLUME = {24}, NUMBER = {6}, PAGES = {707--730}, }
Endnote
%0 Journal Article %A Galárraga, Luis %A Teflioudi, Christina %A Hose, Katja %A Suchanek, Fabian M. %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Télécom ParisTech %T Fast Rule Mining in Ontological Knowledge Bases with AMIE+ : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-3510-3 %R 10.1007/s00778-015-0394-1 %7 2015 %D 2015 %J The VLDB Journal %V 24 %N 6 %& 707 %P 707 - 730 %I Springer %C Berlin %@ false
[56]
J. Geiß, A. Spitz, J. Strötgen, and M. Gertz, “The Wikipedia Location Network - Overcoming Borders and Oceans,” in Proceedings of the 9th Workshop on Geographic Information Retrieval (GIR 2015), Paris, France, 2015.
Export
BibTeX
@inproceedings{GIR2015, TITLE = {The {Wikipedia} Location Network -- Overcoming Borders and Oceans}, AUTHOR = {Gei{\ss}, Johanna and Spitz, Andreas and Str{\"o}tgen, Jannik and Gertz, Michael}, LANGUAGE = {eng}, ISBN = {978-1-4503-3937-7}, DOI = {10.1145/2837689.2837694}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Proceedings of the 9th Workshop on Geographic Information Retrieval (GIR 2015)}, EDITOR = {Purves, Ross S. and Jones, Christopher B.}, PAGES = {1--3}, EID = {2}, ADDRESS = {Paris, France}, }
Endnote
%0 Conference Proceedings %A Geiß, Johanna %A Spitz, Andreas %A Strötgen, Jannik %A Gertz, Michael %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T The Wikipedia Location Network - Overcoming Borders and Oceans : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-216D-0 %R 10.1145/2837689.2837694 %D 2015 %B 9th Workshop on Geographic Information Retrieval %Z date of event: 2015-11-26 - 2015-11-27 %C Paris, France %B Proceedings of the 9th Workshop on Geographic Information Retrieval %E Purves, Ross S.; Jones, Christopher B. %P 1 - 3 %Z sequence number: 2 %I ACM %@ 978-1-4503-3937-7
[57]
A. Grycner, G. Weikum, J. Pujara, J. Foulds, and L. Getoor, “RELLY: Inferring Hypernym Relationships Between Relational Phrases,” in Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing (EMNLP 2015), Lisbon, Portugal, 2015.
Export
BibTeX
@inproceedings{grycner-EtAl:2015:EMNLP, TITLE = {{RELLY}: {I}nferring Hypernym Relationships Between Relational Phrases}, AUTHOR = {Grycner, Adam and Weikum, Gerhard and Pujara, Jay and Foulds, James and Getoor, Lise}, LANGUAGE = {eng}, ISBN = {978-1-941643-32-7}, URL = {http://aclweb.org/anthology/D15-1113}, PUBLISHER = {ACL}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing (EMNLP 2015)}, PAGES = {971--981}, ADDRESS = {Lisbon, Portugal}, }
Endnote
%0 Conference Proceedings %A Grycner, Adam %A Weikum, Gerhard %A Pujara, Jay %A Foulds, James %A Getoor, Lise %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T RELLY: Inferring Hypernym Relationships Between Relational Phrases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-49B0-5 %U http://aclweb.org/anthology/D15-1113 %D 2015 %B Conference on Empirical Methods in Natural Language Processing %Z date of event: 2015-09-17 - 2015-09-21 %C Lisbon, Portugal %B Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing %P 971 - 981 %I ACL %@ 978-1-941643-32-7 %U https://www.cs.cmu.edu/~ark/EMNLP-2015/proceedings/EMNLP/pdf/EMNLP113.pdf
[58]
D. Gupta and K. Berberich, “Temporal Query Classification at Different Granularities,” in String Processing and Information Retrieval (SPIRE 2015), London, UK, 2015.
Export
BibTeX
@inproceedings{spire15-gupta, TITLE = {Temporal Query Classification at Different Granularities}, AUTHOR = {Gupta, Dhruv and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-3-319-23825-8}, DOI = {10.1007/978-3-319-23826-5_16}, PUBLISHER = {Springer}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {String Processing and Information Retrieval (SPIRE 2015)}, EDITOR = {Iliopoulos, Costas S. and Publisi, Simon J. and Yilmaz, Emine}, PAGES = {137--148}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {9309}, ADDRESS = {London, UK}, }
Endnote
%0 Conference Proceedings %A Gupta, Dhruv %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Temporal Query Classification at Different Granularities : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-4249-D %R 10.1007/978-3-319-23826-5_16 %D 2015 %B 22nd International Symposium on String Processing and Information Retrieval %Z date of event: 2015-08-31 - 2015-09-02 %C London, UK %B String Processing and Information Retrieval %E Iliopoulos, Costas S.; Publisi, Simon J.; Yilmaz, Emine %P 137 - 148 %I Springer %@ 978-3-319-23825-8 %B Lecture Notes in Computer Science %N 9309
[59]
C. D. Hariman, “Part-Whole Commonsense Knowledge Harvesting from the Web,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@mastersthesis{HarimanMaster2015, TITLE = {Part-Whole Commonsense Knowledge Harvesting from the Web}, AUTHOR = {Hariman, Charles Darwis}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Hariman, Charles Darwis %Y Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Part-Whole Commonsense Knowledge Harvesting from the Web : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0026-C0E6-C %I Universität des Saarlandes %C Saarbrücken %D 2015 %P 53 p. %V master %9 master
[60]
J. Hoffart, “Discovering and Disambiguating Named Entities in Text,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@phdthesis{Hoffartthesis, TITLE = {Discovering and Disambiguating Named Entities in Text}, AUTHOR = {Hoffart, Johannes}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Hoffart, Johannes %Y Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Discovering and Disambiguating Named Entities in Text : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0025-6C44-0 %I Universität des Saarlandes %C Saarbrücken %D 2015 %P X, 103 p. %V phd %9 phd %U http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=dehttp://scidok.sulb.uni-saarland.de/volltexte/2015/6022/
[61]
J. Hoffart, N. Preda, F. M. Suchanek, and G. Weikum, “Knowledge Bases for Web Content Analytics,” in WWW’15 Companion, Florence, Italy, 2015.
Export
BibTeX
@inproceedings{hoffart2015knowledgebases, TITLE = {Knowledge Bases for Web Content Analytics}, AUTHOR = {Hoffart, Johannes and Preda, Nicoleta and Suchanek, Fabian M. and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-3473-0}, DOI = {10.1145/2740908.2741984}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {WWW'15 Companion}, PAGES = {1535--1535}, ADDRESS = {Florence, Italy}, }
Endnote
%0 Conference Proceedings %A Hoffart, Johannes %A Preda, Nicoleta %A Suchanek, Fabian M. %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Knowledge Bases for Web Content Analytics : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0028-8E68-7 %R 10.1145/2740908.2741984 %D 2015 %B 24th International Conference on World Wide Web %Z date of event: 2015-05-18 - 2015-05-22 %C Florence, Italy %B WWW'15 Companion %P 1535 - 1535 %I ACM %@ 978-1-4503-3473-0
[62]
K. Hui and K. Berberich, “Selective Labeling and Incomplete Label Mitigation for Low-Cost Evaluation,” in String Processing and Information Retrieval (SPIRE 2015), London, UK, 2015.
Export
BibTeX
@inproceedings{spire15-kaihui, TITLE = {Selective Labeling and Incomplete Label Mitigation for Low-Cost Evaluation}, AUTHOR = {Hui, Kai and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-3-319-23825-8}, DOI = {10.1007/978-3-319-23826-5_14}, PUBLISHER = {Springer}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {String Processing and Information Retrieval (SPIRE 2015)}, EDITOR = {Iliopoulos, Costas S. and Publisi, Simon J. and Yilmaz, Emine}, PAGES = {137--148}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {9309}, ADDRESS = {London, UK}, }
Endnote
%0 Conference Proceedings %A Hui, Kai %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Selective Labeling and Incomplete Label Mitigation for Low-Cost Evaluation : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0028-5DAA-5 %R 10.1007/978-3-319-23826-5_14 %D 2015 %B 22nd International Symposium on String Processing and Information Retrieval %Z date of event: 2015-08-31 - 2015-09-02 %C London, UK %B String Processing and Information Retrieval %E Iliopoulos, Costas S.; Publisi, Simon J.; Yilmaz, Emine %P 137 - 148 %I Springer %@ 978-3-319-23825-8 %B Lecture Notes in Computer Science %N 9309
[63]
S. Karaev, P. Miettinen, and J. Vreeken, “Getting to Know the Unknown Unknowns: Destructive-noise Resistant Boolean Matrix Factorization,” in Proceedings of the 2015 SIAM International Conference on Data Mining (SDM 2015), Vancouver, Canada, 2015.
Abstract
Finding patterns from binary data is a classical problem in data mining, dating back to at least frequent itemset mining. More recently, approaches such as tiling and Boolean matrix factorization (BMF), have been proposed to find sets of patterns that aim to explain the full data well. These methods, however, are not robust against non-trivial destructive noise, i.e. when relatively many 1s are removed from the data: tiling can only model additive noise while BMF assumes approximately equal amounts of additive and destructive noise. Most real-world binary datasets, however, exhibit mostly destructive noise. In presence/absence data, for instance, it is much more common to fail to observe something than it is to observe a spurious presence. To address this problem, we take the recent approach of employing the Minimum Description Length (MDL) principle for BMF and introduce a new algorithm, Nassau, that directly optimizes the description length of the factorization instead of the reconstruction error. In addition, unlike the previous algorithms, it can adjust the factors it has discovered during its search. Empirical evaluation on synthetic data shows that Nassau excels at datasets with high destructive noise levels and its performance on real-world datasets confirms our hypothesis of the high numbers of missing observations in the real-world data.
Export
BibTeX
@inproceedings{karaev15getting, TITLE = {Getting to Know the Unknown Unknowns: {D}estructive-noise Resistant {Boolean} Matrix Factorization}, AUTHOR = {Karaev, Sanjar and Miettinen, Pauli and Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-1-61197-401-0}, DOI = {10.1137/1.9781611974010.37}, PUBLISHER = {SIAM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, ABSTRACT = {Finding patterns from binary data is a classical problem in data mining, dating back to at least frequent itemset mining. More recently, approaches such as tiling and Boolean matrix factorization (BMF), have been proposed to find sets of patterns that aim to explain the full data well. These methods, however, are not robust against non-trivial destructive noise, i.e. when relatively many 1s are removed from the data: tiling can only model additive noise while BMF assumes approximately equal amounts of additive and destructive noise. Most real-world binary datasets, however, exhibit mostly destructive noise. In presence/absence data, for instance, it is much more common to fail to observe something than it is to observe a spurious presence. To address this problem, we take the recent approach of employing the Minimum Description Length (MDL) principle for BMF and introduce a new algorithm, Nassau, that directly optimizes the description length of the factorization instead of the reconstruction error. In addition, unlike the previous algorithms, it can adjust the factors it has discovered during its search. Empirical evaluation on synthetic data shows that Nassau excels at datasets with high destructive noise levels and its performance on real-world datasets confirms our hypothesis of the high numbers of missing observations in the real-world data.}, BOOKTITLE = {Proceedings of the 2015 SIAM International Conference on Data Mining (SDM 2015)}, EDITOR = {Venkatasubramanian, Suresh and Ye, Jieping}, PAGES = {325--333}, ADDRESS = {Vancouver, Canada}, }
Endnote
%0 Conference Proceedings %A Karaev, Sanjar %A Miettinen, Pauli %A Vreeken, Jilles %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Getting to Know the Unknown Unknowns: Destructive-noise Resistant Boolean Matrix Factorization : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-6C59-C %R 10.1137/1.9781611974010.37 %D 2015 %B 15th SIAM International Conference on Data Mining %Z date of event: 2015-04-30 - 2015-05-02 %C Vancouver, Canada %X Finding patterns from binary data is a classical problem in data mining, dating back to at least frequent itemset mining. More recently, approaches such as tiling and Boolean matrix factorization (BMF), have been proposed to find sets of patterns that aim to explain the full data well. These methods, however, are not robust against non-trivial destructive noise, i.e. when relatively many 1s are removed from the data: tiling can only model additive noise while BMF assumes approximately equal amounts of additive and destructive noise. Most real-world binary datasets, however, exhibit mostly destructive noise. In presence/absence data, for instance, it is much more common to fail to observe something than it is to observe a spurious presence. To address this problem, we take the recent approach of employing the Minimum Description Length (MDL) principle for BMF and introduce a new algorithm, Nassau, that directly optimizes the description length of the factorization instead of the reconstruction error. In addition, unlike the previous algorithms, it can adjust the factors it has discovered during its search. Empirical evaluation on synthetic data shows that Nassau excels at datasets with high destructive noise levels and its performance on real-world datasets confirms our hypothesis of the high numbers of missing observations in the real-world data. %B Proceedings of the 2015 SIAM International Conference on Data Mining %E Venkatasubramanian, Suresh; Ye, Jieping %P 325 - 333 %I SIAM %@ 978-1-61197-401-0
[64]
D. Koutra, U. Kang, J. Vreeken, and C. Faloutsos, “Summarizing and Understanding Large Graphs,” Statistical Analysis and Data Mining, vol. 8, no. 3, 2015.
Export
BibTeX
@article{koutra:15:vog, TITLE = {Summarizing and Understanding Large Graphs}, AUTHOR = {Koutra, Danai and Kang, U and Vreeken, Jilles and Faloutsos, Christos}, LANGUAGE = {eng}, ISSN = {1932-1872}, DOI = {10.1002/sam.11267}, PUBLISHER = {Wiley-Blackwell}, ADDRESS = {Chichester}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, JOURNAL = {Statistical Analysis and Data Mining}, VOLUME = {8}, NUMBER = {3}, PAGES = {183--202}, }
Endnote
%0 Journal Article %A Koutra, Danai %A Kang, U %A Vreeken, Jilles %A Faloutsos, Christos %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Summarizing and Understanding Large Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0026-D185-2 %R 10.1002/sam.11267 %7 2015-05-18 %D 2015 %J Statistical Analysis and Data Mining %O The ASA Data Science Journal %V 8 %N 3 %& 183 %P 183 - 202 %I Wiley-Blackwell %C Chichester %@ false
[65]
P. Mandros, “Information Theoretic Supervised Feature Selection for Continuous Data,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@mastersthesis{MandrosMaster2015, TITLE = {Information Theoretic Supervised Feature Selection for Continuous Data}, AUTHOR = {Mandros, Panagiotis}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Mandros, Panagiotis %Y Weikum, Gerhard %A referee: Vreeken, Jilles %+ International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Information Theoretic Supervised Feature Selection for Continuous Data : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-BAF3-F %I Universität des Saarlandes %C Saarbrücken %D 2015 %P 67 p. %V master %9 master
[66]
S. Metzger, R. Schenkel, and M. Sydow, “Aspect-based Similar Entity Search in Semantic Knowledge Graphs with Diversity-awareness and Relaxation,” in The 2014 IEEE/WIC/ACM International Conference on Intelligent Agent Technology (IAT 2014), Warsaw, Poland, 2015.
Export
BibTeX
@inproceedings{MetzgerIAT2014, TITLE = {Aspect-based Similar Entity Search in Semantic Knowledge Graphs with Diversity-awareness and Relaxation}, AUTHOR = {Metzger, Steffen and Schenkel, Ralf and Sydow, Marcin}, LANGUAGE = {eng}, ISBN = {978-1-4799-4143-8}, DOI = {10.1109/WI-IAT.2014.17}, PUBLISHER = {IEEE}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {The 2014 IEEE/WIC/ACM International Conference on Intelligent Agent Technology (IAT 2014)}, PAGES = {60--69}, ADDRESS = {Warsaw, Poland}, }
Endnote
%0 Conference Proceedings %A Metzger, Steffen %A Schenkel, Ralf %A Sydow, Marcin %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Aspect-based Similar Entity Search in Semantic Knowledge Graphs with Diversity-awareness and Relaxation : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-424D-5 %R 10.1109/WI-IAT.2014.17 %D 2015 %B IEEE/WIC/ACM International Conference on Intelligent Agent Technology %Z date of event: 2014-08-11 - 2014-08-14 %C Warsaw, Poland %B The 2014 IEEE/WIC/ACM International Conference on Intelligent Agent Technology %P 60 - 69 %I IEEE %@ 978-1-4799-4143-8
[67]
S. Metzler and P. Miettinen, “On Defining SPARQL with Boolean Tensor Algebra,” 2015. [Online]. Available: http://arxiv.org/abs/1503.00301. (arXiv: 1503.00301)
Abstract
The Resource Description Framework (RDF) represents information as subject-predicate-object triples. These triples are commonly interpreted as a directed labelled graph. We propose an alternative approach, interpreting the data as a 3-way Boolean tensor. We show how SPARQL queries - the standard queries for RDF - can be expressed as elementary operations in Boolean algebra, giving us a complete re-interpretation of RDF and SPARQL. We show how the Boolean tensor interpretation allows for new optimizations and analyses of the complexity of SPARQL queries. For example, estimating the size of the results for different join queries becomes much simpler.
Export
BibTeX
@online{metzler15defining:arxiv, TITLE = {On Defining {SPARQL} with {B}oolean Tensor Algebra}, AUTHOR = {Metzler, Saskia and Miettinen, Pauli}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1503.00301}, EPRINT = {1503.00301}, EPRINTTYPE = {arXiv}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, ABSTRACT = {The Resource Description Framework (RDF) represents information as subject-predicate-object triples. These triples are commonly interpreted as a directed labelled graph. We propose an alternative approach, interpreting the data as a 3-way Boolean tensor. We show how SPARQL queries -- the standard queries for RDF -- can be expressed as elementary operations in Boolean algebra, giving us a complete re-interpretation of RDF and SPARQL. We show how the Boolean tensor interpretation allows for new optimizations and analyses of the complexity of SPARQL queries. For example, estimating the size of the results for different join queries becomes much simpler.}, }
Endnote
%0 Report %A Metzler, Saskia %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T On Defining SPARQL with Boolean Tensor Algebra : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0025-054A-9 %U http://arxiv.org/abs/1503.00301 %D 2015 %8 03.03.2015 %X The Resource Description Framework (RDF) represents information as subject-predicate-object triples. These triples are commonly interpreted as a directed labelled graph. We propose an alternative approach, interpreting the data as a 3-way Boolean tensor. We show how SPARQL queries - the standard queries for RDF - can be expressed as elementary operations in Boolean algebra, giving us a complete re-interpretation of RDF and SPARQL. We show how the Boolean tensor interpretation allows for new optimizations and analyses of the complexity of SPARQL queries. For example, estimating the size of the results for different join queries becomes much simpler. %K Computer Science, Databases, cs.DB
[68]
S. Metzler and P. Miettinen, “Join Size Estimation on Boolean Tensors of RDF Data,” in WWW’15 Companion, Florence, Italy, 2015.
Export
BibTeX
@inproceedings{metzler15join, TITLE = {Join Size Estimation on Boolean Tensors of {RDF} Data}, AUTHOR = {Metzler, Saskia and Miettinen, Pauli}, LANGUAGE = {eng}, ISBN = {978-1-4503-3473-0}, DOI = {10.1145/2740908.2742738}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {WWW'15 Companion}, PAGES = {77--78}, ADDRESS = {Florence, Italy}, }
Endnote
%0 Conference Proceedings %A Metzler, Saskia %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Join Size Estimation on Boolean Tensors of RDF Data : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-CCED-A %R 10.1145/2740908.2742738 %D 2015 %B 24th International Conference on World Wide Web %Z date of event: 2015-05-18 - 2015-05-22 %C Florence, Italy %B WWW'15 Companion %P 77 - 78 %I ACM %@ 978-1-4503-3473-0
[69]
S. Metzler and P. Miettinen, “Clustering Boolean Tensors,” Data Mining and Knowledge Discovery, vol. 29, no. 5, 2015.
Export
BibTeX
@article{MetzlerMiettinen2015, TITLE = {Clustering {Boolean} tensors}, AUTHOR = {Metzler, Saskia and Miettinen, Pauli}, LANGUAGE = {eng}, DOI = {10.1007/s10618-015-0420-3}, PUBLISHER = {Springer}, ADDRESS = {New York, NY}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, JOURNAL = {Data Mining and Knowledge Discovery}, VOLUME = {29}, NUMBER = {5}, PAGES = {1343--1373}, }
Endnote
%0 Journal Article %A Metzler, Saskia %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Clustering Boolean Tensors : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0028-536A-B %R 10.1007/s10618-015-0420-3 %7 2015 %D 2015 %J Data Mining and Knowledge Discovery %V 29 %N 5 %& 1343 %P 1343 - 1373 %I Springer %C New York, NY
[70]
S. Metzler and P. Miettinen, “Clustering Boolean Tensors,” 2015. [Online]. Available: http://arxiv.org/abs/1501.00696. (arXiv: 1501.00696)
Abstract
Tensor factorizations are computationally hard problems, and in particular, are often significantly harder than their matrix counterparts. In case of Boolean tensor factorizations -- where the input tensor and all the factors are required to be binary and we use Boolean algebra -- much of that hardness comes from the possibility of overlapping components. Yet, in many applications we are perfectly happy to partition at least one of the modes. In this paper we investigate what consequences does this partitioning have on the computational complexity of the Boolean tensor factorizations and present a new algorithm for the resulting clustering problem. This algorithm can alternatively be seen as a particularly regularized clustering algorithm that can handle extremely high-dimensional observations. We analyse our algorithms with the goal of maximizing the similarity and argue that this is more meaningful than minimizing the dissimilarity. As a by-product we obtain a PTAS and an efficient 0.828-approximation algorithm for rank-1 binary factorizations. Our algorithm for Boolean tensor clustering achieves high scalability, high similarity, and good generalization to unseen data with both synthetic and real-world data sets.
Export
BibTeX
@online{metzler15clustering:arxiv, TITLE = {Clustering {Boolean} Tensors}, AUTHOR = {Metzler, Saskia and Miettinen, Pauli}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1501.00696}, EPRINT = {1501.00696}, EPRINTTYPE = {arXiv}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Tensor factorizations are computationally hard problems, and in particular, are often significantly harder than their matrix counterparts. In case of Boolean tensor factorizations -- where the input tensor and all the factors are required to be binary and we use Boolean algebra -- much of that hardness comes from the possibility of overlapping components. Yet, in many applications we are perfectly happy to partition at least one of the modes. In this paper we investigate what consequences does this partitioning have on the computational complexity of the Boolean tensor factorizations and present a new algorithm for the resulting clustering problem. This algorithm can alternatively be seen as a particularly regularized clustering algorithm that can handle extremely high-dimensional observations. We analyse our algorithms with the goal of maximizing the similarity and argue that this is more meaningful than minimizing the dissimilarity. As a by-product we obtain a PTAS and an efficient 0.828-approximation algorithm for rank-1 binary factorizations. Our algorithm for Boolean tensor clustering achieves high scalability, high similarity, and good generalization to unseen data with both synthetic and real-world data sets.}, }
Endnote
%0 Report %A Metzler, Saskia %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Clustering Boolean Tensors : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-6C5B-8 %U http://arxiv.org/abs/1501.00696 %D 2015 %X Tensor factorizations are computationally hard problems, and in particular, are often significantly harder than their matrix counterparts. In case of Boolean tensor factorizations -- where the input tensor and all the factors are required to be binary and we use Boolean algebra -- much of that hardness comes from the possibility of overlapping components. Yet, in many applications we are perfectly happy to partition at least one of the modes. In this paper we investigate what consequences does this partitioning have on the computational complexity of the Boolean tensor factorizations and present a new algorithm for the resulting clustering problem. This algorithm can alternatively be seen as a particularly regularized clustering algorithm that can handle extremely high-dimensional observations. We analyse our algorithms with the goal of maximizing the similarity and argue that this is more meaningful than minimizing the dissimilarity. As a by-product we obtain a PTAS and an efficient 0.828-approximation algorithm for rank-1 binary factorizations. Our algorithm for Boolean tensor clustering achieves high scalability, high similarity, and good generalization to unseen data with both synthetic and real-world data sets. %K Computer Science, Numerical Analysis, cs.NA,Computer Science, Data Structures and Algorithms, cs.DS
[71]
P. Miettinen, “Generalized Matrix Factorizations as a Unifying Framework for Pattern Set Mining: Complexity Beyond Blocks,” in Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2015), Porto, Portugal, 2015.
Export
BibTeX
@inproceedings{MiettinenECML2015, TITLE = {Generalized Matrix Factorizations as a Unifying Framework for Pattern Set Mining: {C}omplexity Beyond Blocks}, AUTHOR = {Miettinen, Pauli}, LANGUAGE = {eng}, ISBN = {978-3-319-23524-0}, DOI = {10.1007/978-3-319-23525-7_3}, PUBLISHER = {Springer}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2015)}, EDITOR = {Appice, Annalisa and Pereira Rodrigues, Pedro and Gama, Jo{\~a}o and Al{\'i}pio, Jorge and Soares, Carlos}, PAGES = {36--52}, SERIES = {Lecture Notes in Artificial Intellligence}, VOLUME = {9285}, ADDRESS = {Porto, Portugal}, }
Endnote
%0 Conference Proceedings %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Generalized Matrix Factorizations as a Unifying Framework for Pattern Set Mining: Complexity Beyond Blocks : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-2278-1 %R 10.1007/978-3-319-23525-7_3 %D 2015 %B European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases %Z date of event: 2015-09-07 - 2015-09-11 %C Porto, Portugal %B Machine Learning and Knowledge Discovery in Databases %E Appice, Annalisa; Pereira Rodrigues, Pedro; Gama, João; Alípio, Jorge; Soares, Carlos %P 36 - 52 %I Springer %@ 978-3-319-23524-0 %B Lecture Notes in Artificial Intellligence %N 9285
[72]
A. Mishra and K. Berberich, “EXPOSÉ: EXploring Past news fOr Seminal Events,” in WWW’15 Companion, Florence, Italy, 2015.
Export
BibTeX
@inproceedings{MishraWWW2015, TITLE = {EXPOSÉ: {EXploring Past news fOr Seminal Events}}, AUTHOR = {Mishra, Arunav and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-1-4503-3473-0}, DOI = {10.1145/2740908.2742844}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {WWW'15 Companion}, PAGES = {223--226}, ADDRESS = {Florence, Italy}, }
Endnote
%0 Conference Proceedings %A Mishra, Arunav %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T EXPOSÉ: EXploring Past news fOr Seminal Events : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-E33E-F %R 10.1145/2740908.2742844 %D 2015 %B 24th International Conference on World Wide Web %Z date of event: 2015-04-18 - 2015-04-22 %C Florence, Italy %B WWW'15 Companion %P 223 - 226 %I ACM %@ 978-1-4503-3473-0
[73]
S. Mukherjee, H. Lamba, and G. Weikum, “Experience-aware Item Recommendation in Evolving Review Communities,” in 15th IEEE International Conference on Data Mining (ICDM 2015), Atlantic City, NJ, USA, 2015.
Export
BibTeX
@inproceedings{mukherjee-experience-model, TITLE = {Experience-aware Item Recommendation in Evolving Review Communities}, AUTHOR = {Mukherjee, Subhabrata and Lamba, Hemank and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4673-9503-8}, DOI = {10.1109/ICDM.2015.111}, PUBLISHER = {IEEE}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {15th IEEE International Conference on Data Mining (ICDM 2015)}, EDITOR = {Aggarwal, Charu and Zhou, Zhi-Hua and Tuzhilin, Alexander and Xiong, Hui and Wu, Xindong}, PAGES = {925--930}, ADDRESS = {Atlantic City, NJ, USA}, }
Endnote
%0 Conference Proceedings %A Mukherjee, Subhabrata %A Lamba, Hemank %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Experience-aware Item Recommendation in Evolving Review Communities : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-49F3-F %R 10.1109/ICDM.2015.111 %D 2015 %B 15th International Conference on Data Mining %Z date of event: 2015-11-14 - 2015-11-17 %C Atlantic City, NJ, USA %B 15th IEEE International Conference on Data Mining %E Aggarwal, Charu; Zhou, Zhi-Hua; Tuzhilin, Alexander; Xiong, Hui; Wu, Xindong %P 925 - 930 %I IEEE %@ 978-1-4673-9503-8
[74]
S. Mukherjee and G. Weikum, “Leveraging Joint Interactions for Credibility Analysis in News Communities,” in CIKM’15, 24th ACM International Conference on Information and Knowledge Management, Melbourne, Australia, 2015.
Export
BibTeX
@inproceedings{mukherjee-credibility-analysis, TITLE = {Leveraging Joint Interactions for Credibility Analysis in News Communities}, AUTHOR = {Mukherjee, Subhabrata and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-3794-6}, DOI = {10.1145/2806416.2806537}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {CIKM'15, 24th ACM International Conference on Information and Knowledge Management}, PAGES = {353--362}, ADDRESS = {Melbourne, Australia}, }
Endnote
%0 Conference Proceedings %A Mukherjee, Subhabrata %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Leveraging Joint Interactions for Credibility Analysis in News Communities : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-49DE-1 %R 10.1145/2806416.2806537 %D 2015 %B 24th ACM International Conference on Information and Knowledge Management %Z date of event: 2015-10-19 - 2015-10-23 %C Melbourne, Australia %B CIKM'15 %P 353 - 362 %I ACM %@ 978-1-4503-3794-6
[75]
S. Neumann, “On Some Problems of Rounding Rank,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@mastersthesis{NeumannMaster2015, TITLE = {On Some Problems of Rounding Rank}, AUTHOR = {Neumann, Stefan}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Neumann, Stefan %Y Miettinen, Pauli %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T On Some Problems of Rounding Rank : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-57D6-2 %I Universität des Saarlandes %C Saarbrücken %D 2015 %P X, 77 p. %V master %9 master
[76]
H.-V. Nguyen and J. Vreeken, “Non-parametric Jensen-Shannon Divergence,” in Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2015), Porto, Portugal, 2015.
Export
BibTeX
@inproceedings{NguyenECML2015, TITLE = {Non-parametric {Jensen}-{Shannon} Divergence}, AUTHOR = {Nguyen, Hoang-Vu and Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-3-319-23524-0}, DOI = {10.1007/978-3-319-23525-7_11}, PUBLISHER = {Springer}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2015)}, EDITOR = {Appice, Annalisa and Pereira Rodrigues, Pedro and Gama, Jo{\~a}o and Al{\'i}pio, Jorge and Soares, Carlos}, PAGES = {173--189}, SERIES = {Lecture Notes in Artificial Intellligence}, VOLUME = {9285}, ADDRESS = {Porto, Portugal}, }
Endnote
%0 Conference Proceedings %A Nguyen, Hoang-Vu %A Vreeken, Jilles %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Non-parametric Jensen-Shannon Divergence : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-2286-3 %R 10.1007/978-3-319-23525-7_11 %D 2015 %B European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases %Z date of event: 2015-09-07 - 2015-09-11 %C Porto, Portugal %B Machine Learning and Knowledge Discovery in Databases %E Appice, Annalisa; Pereira Rodrigues, Pedro; Gama, João; Alípio, Jorge; Soares, Carlos %P 173 - 189 %I Springer %@ 978-3-319-23524-0 %B Lecture Notes in Artificial Intellligence %N 9285
[77]
R. Pienta, Z. Lin, M. Kahng, J. Vreeken, P. P. Talukdar, J. Abello, G. Parameswaran, and D. H. Chau, “AdaptiveNav: Adaptive Discovery of Interesting and Surprising Nodes in Large Graphs.” 2015.
Export
BibTeX
@inproceedings{pienta:15:adaptivenav, TITLE = {{AdaptiveNav}: {A}daptive Discovery of Interesting and Surprising Nodes in Large Graphs}, AUTHOR = {Pienta, Robert and Lin, Zhiyuan and Kahng, Minsuk and Vreeken, Jilles and Talukdar, Partha P. and Abello, James and Parameswaran, Ganesh and Chau, Duen Horng}, LANGUAGE = {eng}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, ADDRESS = {Chicago, IL, USA}, }
Endnote
%0 Generic %A Pienta, Robert %A Lin, Zhiyuan %A Kahng, Minsuk %A Vreeken, Jilles %A Talukdar, Partha P. %A Abello, James %A Parameswaran, Ganesh %A Chau, Duen Horng %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations %T AdaptiveNav: Adaptive Discovery of Interesting and Surprising Nodes in Large Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-57B4-E %D 2015 %Z name of event: IEEE VIS 2015 %Z date of event: 2015-10-25 - 2015-10-30 %Z place of event: Chicago, IL, USA
[78]
N. Prytkova, M. Spaniol, and G. Weikum, “Aligning Multi-cultural Knowledge Taxonomies by Combinatorial Optimization,” in WWW’15 Companion, Florence, Italy, 2015.
Export
BibTeX
@inproceedings{PSWe15, TITLE = {Aligning Multi-cultural Knowledge Taxonomies by Combinatorial Optimization}, AUTHOR = {Prytkova, Natalia and Spaniol, Marc and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-3473-0}, DOI = {10.1145/2740908.2742721}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {WWW'15 Companion}, PAGES = {93--94}, ADDRESS = {Florence, Italy}, }
Endnote
%0 Conference Proceedings %A Prytkova, Natalia %A Spaniol, Marc %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Aligning Multi-cultural Knowledge Taxonomies by Combinatorial Optimization : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0025-06E5-3 %R 10.1145/2740908.2742721 %D 2015 %B 24th International Conference on World Wide Web %Z date of event: 2015-05-18 - 2015-05-22 %C Florence, Italy %B WWW'15 Companion %P 93 - 94 %I ACM %@ 978-1-4503-3473-0
[79]
A. Rohrbach, M. Rohrbach, N. Tandon, and B. Schiele, “A Dataset for Movie Description,” in IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2015), Boston, MA, USA, 2015.
Export
BibTeX
@inproceedings{Rohrbach15cvpr, TITLE = {A Dataset for Movie Description}, AUTHOR = {Rohrbach, Anna and Rohrbach, Marcus and Tandon, Niket and Schiele, Bernt}, LANGUAGE = {eng}, DOI = {10.1109/CVPR.2015.7298940}, PUBLISHER = {IEEE Computer Society}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2015)}, PAGES = {3202--3212}, ADDRESS = {Boston, MA, USA}, }
Endnote
%0 Conference Proceedings %A Rohrbach, Anna %A Rohrbach, Marcus %A Tandon, Niket %A Schiele, Bernt %+ Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society %T A Dataset for Movie Description : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0025-01B9-B %R 10.1109/CVPR.2015.7298940 %D 2015 %B IEEE Conference on Computer Vision and Pattern Recognition %Z date of event: 2015-06-08 - 2015-06-10 %C Boston, MA, USA %B IEEE Conference on Computer Vision and Pattern Recognition %P 3202 - 3212 %I IEEE Computer Society
[80]
C. Schulte, B. Taneva, and G. Weikum, “On-topic Cover Stories from News Archives,” in Advances in Information Retrieval (ECIR 2015), Vienna, Austria, 2015.
Export
BibTeX
@inproceedings{Schulte:ECIR2015, TITLE = {On-topic Cover Stories from News Archives}, AUTHOR = {Schulte, Christian and Taneva, Bilyana and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-3-319-16353-6}, DOI = {10.1007/978-3-319-16354-3_4}, PUBLISHER = {Springer}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Advances in Information Retrieval (ECIR 2015)}, EDITOR = {Hanbury, Allan and Kazai, Gabriella and Rauber, Andreas and Fuhr, Norbert}, PAGES = {37--42}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {9022}, ADDRESS = {Vienna, Austria}, }
Endnote
%0 Conference Proceedings %A Schulte, Christian %A Taneva, Bilyana %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T On-topic Cover Stories from News Archives : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-A6DE-B %R 10.1007/978-3-319-16354-3_4 %D 2015 %B 37th European Conference on Information Retrieval %Z date of event: 2015-03-29 - 2015-04-02 %C Vienna, Austria %B Advances in Information Retrieval %E Hanbury, Allan; Kazai, Gabriella; Rauber, Andreas; Fuhr, Norbert %P 37 - 42 %I Springer %@ 978-3-319-16353-6 %B Lecture Notes in Computer Science %N 9022
[81]
S. Seufert, “Algorithmic Building Blocks for Relationship Analysis over Large Graphs,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@phdthesis{Seufertphd15, TITLE = {Algorithmic Building Blocks for Relationship Analysis over Large Graphs}, AUTHOR = {Seufert, Stephan}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Seufert, Stephan %Y Bedathur, Srikanta %A referee: Barbosa, Denilson %A referee: Weidenbach, Christoph %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Automation of Logic, MPI for Informatics, Max Planck Society %T Algorithmic Building Blocks for Relationship Analysis over Large Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-6E65-D %I Universität des Saarlandes %C Saarbrücken %D 2015 %P 198 p. %V phd %9 phd %U http://scidok.sulb.uni-saarland.de/volltexte/2015/6183/http://scidok.sulb.uni-saarland.de/doku/urheberrecht.php?la=de
[82]
D. Seyler, M. Yahya, and K. Berberich, “Generating Quiz Questions from Knowledge Graphs,” in WWW’15 Companion, Florence, Italy, 2015.
Export
BibTeX
@inproceedings{SeylerWWW2015, TITLE = {Generating Quiz Questions from Knowledge Graphs}, AUTHOR = {Seyler, Dominic and Yahya, Mohamed and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-1-4503-3473-0}, DOI = {10.1145/2740908.2742722}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {WWW'15 Companion}, PAGES = {113--114}, ADDRESS = {Florence, Italy}, }
Endnote
%0 Conference Proceedings %A Seyler, Dominic %A Yahya, Mohamed %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Generating Quiz Questions from Knowledge Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-E33C-4 %R 10.1145/2740908.2742722 %D 2015 %B 24th International Conference on World Wide Web %Z date of event: 2015-04-18 - 2015-04-22 %C Florence, Italy %B WWW'15 Companion %P 113 - 114 %I ACM %@ 978-1-4503-3473-0
[83]
D. Seyler, “Question Generation from Knowledge Graphs,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@mastersthesis{SeylerMaster2015, TITLE = {Question Generation from Knowledge Graphs}, AUTHOR = {Seyler, Dominic}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Seyler, Dominic %Y Berberich, Klaus %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Question Generation from Knowledge Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-08B0-4 %I Universität des Saarlandes %C Saarbrücken %D 2015 %P XII, 104 p. %V master %9 master
[84]
A. Sierra, “Ad-hoc Information Retrieval using Annotated Queries and Documents,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@mastersthesis{SierraMaster2015, TITLE = {Ad-hoc Information Retrieval using Annotated Queries and Documents}, AUTHOR = {Sierra, Alejandro}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Sierra, Alejandro %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Ad-hoc Information Retrieval using Annotated Queries and Documents : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0025-A968-D %I Universität des Saarlandes %C Saarbrücken %D 2015 %P 68 p. %V master %9 master
[85]
S. Sundareisan, J. Vreeken, and B. A. Prakash, “Hidden Hazards: Finding Missing Nodes in Large Graph Epidemics,” in Proceedings of the SIAM International Conference on Data Mining (SDM 2015), Vancouver, Canada, 2015.
Export
BibTeX
@inproceedings{sundareisan:15:netfill, TITLE = {Hidden Hazards: {Finding} Missing Nodes in Large Graph Epidemics}, AUTHOR = {Sundareisan, Shashi and Vreeken, Jilles and Prakash, B. Aditya}, LANGUAGE = {eng}, ISBN = {978-1-61197-401-0}, DOI = {10.1137/1.9781611974010.47}, PUBLISHER = {SIAM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Proceedings of the SIAM International Conference on Data Mining (SDM 2015)}, EDITOR = {Venkatasubramanian, Suresh and Ye, Jieping}, PAGES = {415--423}, ADDRESS = {Vancouver, Canada}, }
Endnote
%0 Conference Proceedings %A Sundareisan, Shashi %A Vreeken, Jilles %A Prakash, B. Aditya %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Hidden Hazards: Finding Missing Nodes in Large Graph Epidemics : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-A82A-2 %R 10.1137/1.9781611974010.47 %D 2015 %B 15th SIAM International Conference on Data Mining %Z date of event: 2015-04-30 - 2015-05-02 %C Vancouver, Canada %B Proceedings of the SIAM International Conference on Data Mining %E Venkatasubramanian, Suresh; Ye, Jieping %P 415 - 423 %I SIAM %@ 978-1-61197-401-0
[86]
N. Tandon, G. de Melo, A. De, and G. Weikum, “Lights, Camera, Action: Knowledge Extraction from Movie Scripts,” in WWW’15 Companion, Florence, Italy, 2015.
Export
BibTeX
@inproceedings{tandon2015moviescripts, TITLE = {Lights, Camera, Action: Knowledge Extraction from Movie Scripts}, AUTHOR = {Tandon, Niket and de Melo, Gerard and De, Abir and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-3473-0}, DOI = {10.1145/2740908.2742756}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {WWW'15 Companion}, PAGES = {127--128}, ADDRESS = {Florence, Italy}, }
Endnote
%0 Conference Proceedings %A Tandon, Niket %A de Melo, Gerard %A De, Abir %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Lights, Camera, Action: Knowledge Extraction from Movie Scripts : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-E32D-6 %R 10.1145/2740908.2742756 %D 2015 %B 24th International Conference on World Wide Web %Z date of event: 2015-05-18 - 2015-05-22 %C Florence, Italy %B WWW'15 Companion %P 127 - 128 %I ACM %@ 978-1-4503-3473-0
[87]
N. Tandon, G. de Melo, A. De, and G. Weikum, “Knowlywood: Mining Activity Knowledge From Hollywood Narratives,” in CIKM’15, 24th ACM International Conference on Information and Knowledge Management, Melbourne, Australia, 2015.
Export
BibTeX
@inproceedings{Tandon:2015:KMA:2806416.2806583, TITLE = {Knowlywood: {M}ining Activity Knowledge From Hollywood Narratives}, AUTHOR = {Tandon, Niket and de Melo, Gerard and De, Abir and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-3794-6}, DOI = {10.1145/2806416.2806583}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {CIKM'15, 24th ACM International Conference on Information and Knowledge Management}, PAGES = {223--232}, ADDRESS = {Melbourne, Australia}, }
Endnote
%0 Conference Proceedings %A Tandon, Niket %A de Melo, Gerard %A De, Abir %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Knowlywood: Mining Activity Knowledge From Hollywood Narratives : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-49E0-A %R 10.1145/2806416.2806583 %D 2015 %B 24th ACM International Conference on Information and Knowledge Management %Z date of event: 2015-10-19 - 2015-10-23 %C Melbourne, Australia %B CIKM'15 %P 223 - 232 %I ACM %@ 978-1-4503-3794-6
[88]
C. Teflioudi, R. Gemulla, and O. Mykytiuk, “LEMP: Fast Retrieval of Large Entries in a Matrix Product,” in SIGMOD’15, ACM SIGMOD International Conference on Management of Data, Melbourne, Victoria, Australia, 2015.
Export
BibTeX
@inproceedings{Teflioudi15, TITLE = {{LEMP}: {F}ast Retrieval of Large Entries in a Matrix Product}, AUTHOR = {Teflioudi, Christina and Gemulla, Rainer and Mykytiuk, Olga}, LANGUAGE = {eng}, ISBN = {978-1-4503-2758-9}, DOI = {10.1145/2723372.2747647}, PUBLISHER = {ACM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {SIGMOD'15, ACM SIGMOD International Conference on Management of Data}, PAGES = {107--122}, ADDRESS = {Melbourne, Victoria, Australia}, }
Endnote
%0 Conference Proceedings %A Teflioudi, Christina %A Gemulla, Rainer %A Mykytiuk, Olga %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T LEMP: Fast Retrieval of Large Entries in a Matrix Product : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-4A1C-F %R 10.1145/2723372.2747647 %D 2015 %B ACM SIGMOD International Conference on Management of Data %Z date of event: 2015-05-31 - 2015-06-04 %C Melbourne, Victoria, Australia %B SIGMOD'15 %P 107 - 122 %I ACM %@ 978-1-4503-2758-9
[89]
T. Tylenda, “Methods and Tools for Summarization of Entities and Facts in Knowledge Bases,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@phdthesis{TylendaPhd15, TITLE = {Methods and Tools for Summarization of Entities and Facts in Knowledge Bases}, AUTHOR = {Tylenda, Tomasz}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Tylenda, Tomasz %Y Weikum, Gerhard %A referee: Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Methods and Tools for Summarization of Entities and Facts in Knowledge Bases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0028-FC65-5 %I Universität des Saarlandes %C Saarbrücken %D 2015 %P 113 p. %V phd %9 phd %U http://scidok.sulb.uni-saarland.de/volltexte/2015/6263/http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=de
[90]
J. Vreeken, “Causal Inference by Direction of Information,” in Proceedings of the SIAM International Conference on Data Mining (SDM 2015), Vancouver, Canada, 2015.
Export
BibTeX
@inproceedings{vreeken:15:ergo, TITLE = {Causal Inference by Direction of Information}, AUTHOR = {Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-1-61197-401-0}, DOI = {10.1137/1.9781611974010.102}, PUBLISHER = {SIAM}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {Proceedings of the SIAM International Conference on Data Mining (SDM 2015)}, EDITOR = {Venkatasubramanian, Suresh and Ye, Jieping}, PAGES = {909--917}, ADDRESS = {Vancouver, Canada}, }
Endnote
%0 Conference Proceedings %A Vreeken, Jilles %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Causal Inference by Direction of Information : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-A82C-D %R 10.1137/1.9781611974010.102 %D 2015 %B 15th SIAM International Conference on Data Mining %Z date of event: 2015-04-30 - 2015-05-02 %C Vancouver, Canada %B Proceedings of the SIAM International Conference on Data Mining %E Venkatasubramanian, Suresh; Ye, Jieping %P 909 - 917 %I SIAM %@ 978-1-61197-401-0
[91]
H. Wang, “Retrospective Summarization : What Did I Miss?,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@mastersthesis{WangMaster2015, TITLE = {Retrospective Summarization : What Did I Miss?}, AUTHOR = {Wang, He}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Wang, He %Y Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Retrospective Summarization : What Did I Miss? : %U http://hdl.handle.net/11858/00-001M-0000-0026-A0B4-B %I Universität des Saarlandes %C Saarbrücken %D 2015 %P XVI, 73 p. %V master %9 master
[92]
M. A. Yosef, “U-AIDA : A Customizable System for Named Entity Recognition, Classification, and Disambiguation,” Universität des Saarlandes, Saarbrücken, 2015.
Export
BibTeX
@phdthesis{Yosefphd15, TITLE = {U-{AIDA} : A Customizable System for Named Entity Recognition, Classification, and Disambiguation}, AUTHOR = {Yosef, Mohamed Amir}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, }
Endnote
%0 Thesis %A Yosef, Mohamed Amir %Y Weikum, Gerhard %A referee: Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T U-AIDA : A Customizable System for Named Entity Recognition, Classification, and Disambiguation : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-B9B9-C %I Universität des Saarlandes %C Saarbrücken %D 2015 %P XV, 101 p. %V phd %9 phd %U http://scidok.sulb.uni-saarland.de/volltexte/2016/6370/http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=de
[93]
A. Zimek and J. Vreeken, “The Blind Men and the Elephant: On Meeting the Problem of Multiple Truths in Data from Clustering and Pattern Mining Perspectives,” Machine Learning, vol. 98, no. 1, 2015.
Export
BibTeX
@article{zimek:15:blind, TITLE = {The Blind Men and the Elephant: On Meeting the Problem of Multiple Truths in Data from Clustering and Pattern Mining Perspectives}, AUTHOR = {Zimek, Arthur and Vreeken, Jilles}, LANGUAGE = {eng}, ISSN = {0885-6125}, DOI = {10.1007/s10994-013-5334-y}, PUBLISHER = {Springer}, ADDRESS = {New York, NY}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, JOURNAL = {Machine Learning}, VOLUME = {98}, NUMBER = {1}, PAGES = {121--155}, }
Endnote
%0 Journal Article %A Zimek, Arthur %A Vreeken, Jilles %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T The Blind Men and the Elephant: On Meeting the Problem of Multiple Truths in Data from Clustering and Pattern Mining Perspectives : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-57AE-D %R 10.1007/s10994-013-5334-y %7 2013-03-07 %D 2015 %J Machine Learning %V 98 %N 1 %& 121 %P 121 - 155 %I Springer %C New York, NY %@ false
[94]
T. Zinchenko, E. Galbrun, and P. Miettinen, “Mining Predictive Redescriptions with Trees,” in 15th IEEE International Conference on Data Mining Workshop (ICDMW 2015), Atlantic City, NJ, USA, 2015.
Export
BibTeX
@inproceedings{zinchenko15mining, TITLE = {Mining Predictive Redescriptions with Trees}, AUTHOR = {Zinchenko, Tetiana and Galbrun, Esther and Miettinen, Pauli}, LANGUAGE = {eng}, ISBN = {978-1-4673-8492-6}, DOI = {10.1109/ICDMW.2015.123}, PUBLISHER = {IEEE Computer Society}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, DATE = {2015}, BOOKTITLE = {15th IEEE International Conference on Data Mining Workshop (ICDMW 2015)}, EDITOR = {Cui, Peng and Dy, Jennifer and Aggarwal, Charu and Zhou, Zhi-Hua and Tuzhilin, Alexander and Xiong, Hui and Wu, Xindong}, PAGES = {1672--1675}, ADDRESS = {Atlantic City, NJ, USA}, }
Endnote
%0 Conference Proceedings %A Zinchenko, Tetiana %A Galbrun, Esther %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Mining Predictive Redescriptions with Trees : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0029-5424-A %R 10.1109/ICDMW.2015.123 %D 2015 %B 15th International Conference on Data Mining %Z date of event: 2015-11-14 - 2015-11-17 %C Atlantic City, NJ, USA %B 15th IEEE International Conference on Data Mining Workshop %E Cui, Peng; Dy, Jennifer; Aggarwal, Charu; Zhou, Zhi-Hua; Tuzhilin, Alexander; Xiong, Hui; Wu, Xindong %P 1672 - 1675 %I IEEE Computer Society %@ 978-1-4673-8492-6
2014
[95]
F. Alvanaki, “Mining Interesting Events on Large and Dynamic Data,” Universität des Saarlandes, Saarbrücken, 2014.
Export
BibTeX
@phdthesis{Alvanakithesis, TITLE = {Mining Interesting Events on Large and Dynamic Data}, AUTHOR = {Alvanaki, Foteini}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, }
Endnote
%0 Thesis %A Alvanaki, Foteini %Y Michel, Sebastian %A referee: Weikum, Gerhard %A referee: Delis, Alexis %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Mining Interesting Events on Large and Dynamic Data : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0025-6C4E-B %I Universität des Saarlandes %C Saarbrücken %D 2014 %P 128 p. %V phd %9 phd %U http://scidok.sulb.uni-saarland.de/volltexte/2015/5985/http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=de
[96]
F. Alvanaki and S. Michel, “Tracking Set Correlations at Large Scale,” in SIGMOD’14, ACM SIGMOD International Conference on Management of Data, Snowbird, UT, USA, 2014.
Export
BibTeX
@inproceedings{Alvanaki2014, TITLE = {Tracking Set Correlations at Large Scale}, AUTHOR = {Alvanaki, Foteini and Michel, Sebastian}, LANGUAGE = {eng}, ISBN = {978-1-4503-2376-5}, DOI = {10.1145/2588555.2610510}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {SIGMOD'14, ACM SIGMOD International Conference on Management of Data}, EDITOR = {Dyresson, Curtis and Li, Feifei and {\"O}zsu, M. Tamer}, PAGES = {1507--1518}, ADDRESS = {Snowbird, UT, USA}, }
Endnote
%0 Conference Proceedings %A Alvanaki, Foteini %A Michel, Sebastian %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Tracking Set Correlations at Large Scale : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0019-8423-2 %R 10.1145/2588555.2610510 %D 2014 %B ACM SIGMOD International Conference on Management of Data %Z date of event: 2014-06-22 - 2014-06-27 %C Snowbird, UT, USA %B SIGMOD'14 %E Dyresson, Curtis; Li, Feifei; Özsu, M. Tamer %P 1507 - 1518 %I ACM %@ 978-1-4503-2376-5
[97]
A. Anand, I. Mele, S. Bedathur, and K. Berberich, “Phrase Query Optimization on Inverted Indexes,” Max-Planck-Institut für Informatik, Saarbrücken, MPI-I-2014-5-002, 2014.
Abstract
Phrase queries are a key functionality of modern search engines. Beyond that, they increasingly serve as an important building block for applications such as entity-oriented search, text analytics, and plagiarism detection. Processing phrase queries is costly, though, since positional information has to be kept in the index and all words, including stopwords, need to be considered. We consider an augmented inverted index that indexes selected variable-length multi-word sequences in addition to single words. We study how arbitrary phrase queries can be processed efficiently on such an augmented inverted index. We show that the underlying optimization problem is NP-hard in the general case and describe an exact exponential algorithm and an approximation algorithm to its solution. Experiments on ClueWeb09 and The New York Times with different real-world query workloads examine the practical performance of our methods.
Export
BibTeX
@techreport{AnandMeleBedathurBerberich2014, TITLE = {Phrase Query Optimization on Inverted Indexes}, AUTHOR = {Anand, Avishek and Mele, Ida and Bedathur, Srikanta and Berberich, Klaus}, LANGUAGE = {eng}, ISSN = {0946-011X}, NUMBER = {MPI-I-2014-5-002}, INSTITUTION = {Max-Planck-Institut f{\"u}r Informatik}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Phrase queries are a key functionality of modern search engines. Beyond that, they increasingly serve as an important building block for applications such as entity-oriented search, text analytics, and plagiarism detection. Processing phrase queries is costly, though, since positional information has to be kept in the index and all words, including stopwords, need to be considered. We consider an augmented inverted index that indexes selected variable-length multi-word sequences in addition to single words. We study how arbitrary phrase queries can be processed efficiently on such an augmented inverted index. We show that the underlying optimization problem is NP-hard in the general case and describe an exact exponential algorithm and an approximation algorithm to its solution. Experiments on ClueWeb09 and The New York Times with different real-world query workloads examine the practical performance of our methods.}, TYPE = {Research Report}, }
Endnote
%0 Report %A Anand, Avishek %A Mele, Ida %A Bedathur, Srikanta %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Phrase Query Optimization on Inverted Indexes : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-022A-3 %Y Max-Planck-Institut für Informatik %C Saarbrücken %D 2014 %P 20 p. %X Phrase queries are a key functionality of modern search engines. Beyond that, they increasingly serve as an important building block for applications such as entity-oriented search, text analytics, and plagiarism detection. Processing phrase queries is costly, though, since positional information has to be kept in the index and all words, including stopwords, need to be considered. We consider an augmented inverted index that indexes selected variable-length multi-word sequences in addition to single words. We study how arbitrary phrase queries can be processed efficiently on such an augmented inverted index. We show that the underlying optimization problem is NP-hard in the general case and describe an exact exponential algorithm and an approximation algorithm to its solution. Experiments on ClueWeb09 and The New York Times with different real-world query workloads examine the practical performance of our methods. %B Research Report %@ false
[98]
A. Anand, I. Mele, S. Bedathur, and K. Berberich, “Phrase Query Optimization on Inverted Indexes,” in CIKM’14, 23rd ACM International Conference on Information and Knowledge Management, Shanghai, China, 2014.
Export
BibTeX
@inproceedings{Anand:CIKM2014, TITLE = {Phrase Query Optimization on Inverted Indexes}, AUTHOR = {Anand, Avishek and Mele, Ida and Bedathur, Srikanta and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-1-4503-2598-1}, DOI = {10.1145/2661829.2661928}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {CIKM'14, 23rd ACM International Conference on Information and Knowledge Management}, EDITOR = {Li, Jianzhong and Wang, X. Sean and Garofalakis, Minos and Soboroff, Ian and Suel, Torsten and Wang, Min}, PAGES = {1807--1810}, ADDRESS = {Shanghai, China}, }
Endnote
%0 Conference Proceedings %A Anand, Avishek %A Mele, Ida %A Bedathur, Srikanta %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Phrase Query Optimization on Inverted Indexes : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-549A-0 %R 10.1145/2661829.2661928 %D 2014 %B 23rd ACM International Conference on Information and Knowledge Management %Z date of event: 2014-11-03 - 2014-11-07 %C Shanghai, China %K multi-word indexing, phrase queries, query optimization %B CIKM'14 %E Li, Jianzhong; Wang, X. Sean; Garofalakis, Minos; Soboroff, Ian; Suel, Torsten; Wang, Min %P 1807 - 1810 %I ACM %@ 978-1-4503-2598-1
[99]
N. An, L. Jiang, J. Wang, P. Luo, M. Wang, and B. N. Li, “Toward Detection of Aliases without String Similarity,” Information Sciences, vol. 261, 2014.
Export
BibTeX
@article{AnJiangWang2014, TITLE = {Toward Detection of Aliases without String Similarity}, AUTHOR = {An, Ning and Jiang, Lili and Wang, Jianyong and Luo, Ping and Wang, Min and Li, Bing Nan}, LANGUAGE = {eng}, ISSN = {0020-0255}, DOI = {10.1016/j.ins.2013.11.010}, PUBLISHER = {Elsevier}, ADDRESS = {Amsterdam}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, JOURNAL = {Information Sciences}, VOLUME = {261}, PAGES = {89--100}, }
Endnote
%0 Journal Article %A An, Ning %A Jiang, Lili %A Wang, Jianyong %A Luo, Ping %A Wang, Min %A Li, Bing Nan %+ external Databases and Information Systems, MPI for Informatics, Max Planck Society external external external external %T Toward Detection of Aliases without String Similarity : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-3DFB-8 %F ISI: 000331689700005 %R 10.1016/j.ins.2013.11.010 %7 2013-11-18 %D 2014 %J Information Sciences %O Inf. Sci. %V 261 %& 89 %P 89 - 100 %I Elsevier %C Amsterdam %@ false
[100]
K. Athukorala, A. Oulasvirta, D. Glowacka, J. Vreeken, and G. Jaccuci, “Interaction Model to Predict Subjective-specificity of Search Results,” in UMAP 2014 Extended Proceedings, Aalborg, Denmark, 2014.
Export
BibTeX
@inproceedings{atukorala:14:interaction, TITLE = {Interaction Model to Predict Subjective-specificity of Search Results}, AUTHOR = {Athukorala, Kumaripaba and Oulasvirta, Antti and Glowacka, Dorata and Vreeken, Jilles and Jaccuci, Giulio}, LANGUAGE = {eng}, URL = {http://ceur-ws.org/Vol-1181/umap2014_lateresults_01.pdf; urn:nbn:de:0074-1181-4}, PUBLISHER = {CEUR-WS.org}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {UMAP 2014 Extended Proceedings}, EDITOR = {Cantador, Iv{\'a}n and Chi, Min and Farzan, Rosta and J{\"a}schke, Robert}, PAGES = {69--74}, SERIES = {CEUR Workshop Proceedings}, VOLUME = {1181}, ADDRESS = {Aalborg, Denmark}, }
Endnote
%0 Conference Proceedings %A Athukorala, Kumaripaba %A Oulasvirta, Antti %A Glowacka, Dorata %A Vreeken, Jilles %A Jaccuci, Giulio %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Interaction Model to Predict Subjective-specificity of Search Results : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5397-D %U http://ceur-ws.org/Vol-1181/umap2014_lateresults_01.pdf %D 2014 %B 22nd Conference on User Modeling, Adaptation, and Personalization %Z date of event: 2014-07-07 - 2014-07-11 %C Aalborg, Denmark %B UMAP 2014 Extended Proceedings %E Cantador, Iván; Chi, Min; Farzan, Rosta; Jäschke, Robert %P 69 - 74 %I CEUR-WS.org %B CEUR Workshop Proceedings %N 1181 %U http://ceur-ws.org/Vol-1181/umap2014_lateresults_01.pdf
[101]
K. Athukorala, A. Oulasvirta, D. Glowacka, J. Vreeken, and G. Jaccuci, “Supporting Exploratory Search Through User Modelling,” in UMAP 2014 Extended Proceedings (PIA 2014 in conjunction with UMAP 2014), Aalborg, Denmark, 2014.
Export
BibTeX
@inproceedings{atukorala:14:supporting, TITLE = {Supporting Exploratory Search Through User Modelling}, AUTHOR = {Athukorala, Kumaripaba and Oulasvirta, Antti and Glowacka, Dorata and Vreeken, Jilles and Jaccuci, Giulio}, LANGUAGE = {eng}, ISSN = {1613-0073}, URL = {http://ceur-ws.org/Vol-1181/pia2014_paper_04.pdf; urn:nbn:de:0074-1181-4; http://ceur-ws.org/Vol-1181/pia2014_proceedings.pdf}, PUBLISHER = {CEUR-WS.org}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {UMAP 2014 Extended Proceedings (PIA 2014 in conjunction with UMAP 2014)}, EDITOR = {Cantador, Iv{\'a}n and Chi, Min and Farzan, Rosta and J{\"a}schke, Robert}, PAGES = {1--47}, SERIES = {CEUR Workshop Proceedings}, VOLUME = {1181}, ADDRESS = {Aalborg, Denmark}, }
Endnote
%0 Conference Proceedings %A Athukorala, Kumaripaba %A Oulasvirta, Antti %A Glowacka, Dorata %A Vreeken, Jilles %A Jaccuci, Giulio %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Supporting Exploratory Search Through User Modelling : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-538C-7 %U http://ceur-ws.org/Vol-1181/pia2014_paper_04.pdf %D 2014 %B Joint Workshop on Personalised Information Access %Z date of event: 2014-07-07 - 2014-07-07 %C Aalborg, Denmark %B UMAP 2014 Extended Proceedings %E Cantador, Iván; Chi, Min; Farzan, Rosta; Jäschke, Robert %P 1 - 47 %I CEUR-WS.org %B CEUR Workshop Proceedings %N 1181 %@ false %U http://ceur-ws.org/Vol-1181/pia2014_paper_04.pdf
[102]
K. Athukorala, A. Oulasvirta, D. Glowacka, J. Vreeken, and G. Jaccuci, “Narrow or Broad? Estimating Subjective Specificity in Exploratory Search,” in CIKM’14, 23rd ACM International Conference on Information and Knowledge Management, Shanghai, China, 2014.
Export
BibTeX
@inproceedings{atukorala:14:foraging, TITLE = {Narrow or Broad? {Estimating} Subjective Specificity in Exploratory Search}, AUTHOR = {Athukorala, Kumaripaba and Oulasvirta, Antti and Glowacka, Dorata and Vreeken, Jilles and Jaccuci, Giulio}, LANGUAGE = {eng}, ISBN = {978-1-4503-2598-1}, DOI = {10.1145/2661829.2661904}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {CIKM'14, 23rd ACM International Conference on Information and Knowledge Management}, EDITOR = {Li, Jianzhong and Wang, X. Sean and Garofalakis, Minos and Soboroff, Ian and Suel, Torsten and Wang, Min}, PAGES = {819--828}, ADDRESS = {Shanghai, China}, }
Endnote
%0 Conference Proceedings %A Athukorala, Kumaripaba %A Oulasvirta, Antti %A Glowacka, Dorata %A Vreeken, Jilles %A Jaccuci, Giulio %+ External Organizations Computer Graphics, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Narrow or Broad? Estimating Subjective Specificity in Exploratory Search : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53A1-6 %R 10.1145/2661829.2661904 %D 2014 %B 23rd ACM International Conference on Information and Knowledge Management %Z date of event: 2014-11-03 - 2014-11-07 %C Shanghai, China %B CIKM'14 %E Li, Jianzhong; Wang, X. Sean; Garofalakis, Minos; Soboroff, Ian; Suel, Torsten; Wang, Min %P 819 - 828 %I ACM %@ 978-1-4503-2598-1
[103]
K. Berberich, “Web Archives,” in Encyclopedia of Social Network Analysis and Mining, Berlin: Springer, 2014.
Export
BibTeX
@incollection{DBLP:reference/snam/Berberich14, TITLE = {Web Archives}, AUTHOR = {Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-1-4614-6169-2}, DOI = {10.1007/978-1-4614-6170-8_128}, PUBLISHER = {Springer}, ADDRESS = {Berlin}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {Encyclopedia of Social Network Analysis and Mining}, PAGES = {2337--2343}, }
Endnote
%0 Book Section %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Web Archives : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53C1-B %R 10.1007/978-1-4614-6170-8_128 %D 2014 %B Encyclopedia of Social Network Analysis and Mining %P 2337 - 2343 %I Springer %C Berlin %@ 978-1-4614-6169-2
[104]
J. Biega, I. Mele, and G. Weikum, “Probabilistic Prediction of Privacy Risks in User Search Histories,” in PSBD’14, First International Workshop on Privacy and Security of Big Data, Shanghai, China, 2014.
Export
BibTeX
@inproceedings{Biega:PSBD2014, TITLE = {Probabilistic Prediction of Privacy Risks in User Search Histories}, AUTHOR = {Biega, Joanna and Mele, Ida and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-1583-8}, DOI = {10.1145/2663715.2669609}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {PSBD'14, First International Workshop on Privacy and Security of Big Data}, PAGES = {29--36}, ADDRESS = {Shanghai, China}, }
Endnote
%0 Conference Proceedings %A Biega, Joanna %A Mele, Ida %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Probabilistic Prediction of Privacy Risks in User Search Histories : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5486-B %R 10.1145/2663715.2669609 %D 2014 %B First International Workshop on Privacy and Security of Big Data %Z date of event: 2014-11-07 - 2014-11-07 %C Shanghai, China %K privacy risk prediction, probabilistic privacy, query logs, user-centric privacy %B PSBD'14 %P 29 - 36 %I ACM %@ 978-1-4503-1583-8
[105]
R. Burghartz and K. Berberich, “MPI-INF at the NTCIR-11 Temporal Query Classification Task,” in Proceedings of the 11th NTCIR Conference on Evaluation of Information Access Technologies, Tokyo, Japan, 2014.
Export
BibTeX
@inproceedings{burghartz2014, TITLE = {{MPI}-{INF} at the {NTCIR}-11 Temporal Query Classification Task}, AUTHOR = {Burghartz, Robin and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-4-86049-065-2}, PUBLISHER = {National Institute of Informatics}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the 11th NTCIR Conference on Evaluation of Information Access Technologies}, EDITOR = {Kando, Noriko and Joho, Hideo and Kishida, Kazuaki}, PAGES = {443--450}, ADDRESS = {Tokyo, Japan}, }
Endnote
%0 Conference Proceedings %A Burghartz, Robin %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T MPI-INF at the NTCIR-11 Temporal Query Classification Task : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5418-1 %D 2014 %8 09.12.2014 %B 11th NTCIR Conference on Evaluation of Information Access Technologies %Z date of event: 2014-12-09 - 2014-12-12 %C Tokyo, Japan %B Proceedings of the 11th NTCIR Conference on Evaluation of Information Access Technologies %E Kando, Noriko; Joho, Hideo; Kishida, Kazuaki %P 443 - 450 %I National Institute of Informatics %@ 978-4-86049-065-2 %U http://research.nii.ac.jp/ntcir/workshop/OnlineProceedings11/pdf/NTCIR/Temporalia/03-NTCIR11-TEMPORALIA-BurghartzR.pdf
[106]
P. Chau, J. Vreeken, M. van Leeuwen, and C. Faloutsos, Eds., Proceedings of the ACM SIGKDD 2014 Full-day Workshop on Interactive Data Exploration and Analytics. Georgia Institute of Technology, 2014.
Export
BibTeX
@proceedings{escidoc:2078519, TITLE = {Proceedings of the ACM SIGKDD 2014 Full-day Workshop on Interactive Data Exploration and Analytics (IDEA 2014)}, EDITOR = {Chau, Polo and Vreeken, Jilles and van Leeuwen, Matthijs and Faloutsos, Christos}, LANGUAGE = {eng}, PUBLISHER = {Georgia Institute of Technology}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, PAGES = {130 p.}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %E Chau, Polo %E Vreeken, Jilles %E van Leeuwen, Matthijs %E Faloutsos, Christos %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Proceedings of the ACM SIGKDD 2014 Full-day Workshop on Interactive Data Exploration and Analytics : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5564-F %I Georgia Institute of Technology %D 2014 %B ACM SIGKDD 2014 Full-day Workshop on Interactive Data Exploration and Analytics %Z date of event: 2014-08-24 - 2014-08-24 %D 2014 %C New York, NY, USA %P 130 p. %U http://poloclub.gatech.edu/idea2014/papers/idea14-proceedings.pdf
[107]
L. Del Corro, R. Gemulla, and G. Weikum, “Werdy: Recognition and Disambiguation of Verbs and Verb Phrases with Syntactic and Semantic Pruning,” in The 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP 2014), Doha, Qatar, 2014.
Export
BibTeX
@inproceedings{DelCorro2014, TITLE = {Werdy: Recognition and Disambiguation of Verbs and Verb Phrases with Syntactic and Semantic Pruning}, AUTHOR = {Del Corro, Luciano and Gemulla, Rainer and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-937284-96-1}, URL = {http://aclweb.org/anthology/D14-1042}, PUBLISHER = {ACL}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {The 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP 2014)}, PAGES = {374--385}, ADDRESS = {Doha, Qatar}, }
Endnote
%0 Conference Proceedings %A Del Corro, Luciano %A Gemulla, Rainer %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Werdy: Recognition and Disambiguation of Verbs and Verb Phrases with Syntactic and Semantic Pruning : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-51DF-E %U http://aclweb.org/anthology/D14-1042 %D 2014 %B 2014 Conference on Empirical Methods in Natural Language Processing %Z date of event: 2014-10-25 - 2014-10-29 %C Doha, Qatar %B The 2014 Conference on Empirical Methods in Natural Language Processing %P 374 - 385 %I ACL %@ 978-1-937284-96-1
[108]
G. de Melo and G. Weikum, “Taxonomic Data Integration from Multilingual Wikipedia Editions,” Knowledge and Information Systems, vol. 39, no. 1, 2014.
Export
BibTeX
@article{deMeloWeikum2013KAIS, TITLE = {Taxonomic Data Integration from Multilingual {Wikipedia} Editions}, AUTHOR = {de Melo, Gerard and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {0219-1377}, DOI = {10.1007%2Fs10115-012-0597-3}, LOCALID = {Local-ID: E21183D8146A7A86C1257B1100306F46-deMeloWeikum2013KAIS}, PUBLISHER = {Springer}, ADDRESS = {Berlin}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, JOURNAL = {Knowledge and Information Systems}, VOLUME = {39}, NUMBER = {1}, PAGES = {1--39}, }
Endnote
%0 Journal Article %A de Melo, Gerard %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Taxonomic Data Integration from Multilingual Wikipedia Editions : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1A38-F %F OTHER: Local-ID: E21183D8146A7A86C1257B1100306F46-deMeloWeikum2013KAIS %R 10.1007%2Fs10115-012-0597-3 %7 2013-01-08 %D 2014 %J Knowledge and Information Systems %V 39 %N 1 %& 1 %P 1 - 39 %I Springer %C Berlin %@ false
[109]
M. Dylla, M. Theobald, and I. Miliaraki, “Querying and Learning in Probabilistic Databases,” in Reasoning Web (RW 2014), Athens, Greece, 2014.
Abstract
Probabilistic Databases (PDBs) lie at the expressive intersection of databases, first-order logic, and probability theory. PDBs employ logical deduction rules to process Select-Project-Join (SPJ) queries, which form the basis for a variety of declarative query languages such as Datalog, Relational Algebra, and SQL. They employ logical consistency constraints to resolve data inconsistencies, and they represent query answers via logical lineage formulas (aka. "data provenance") to trace the dependencies between these answers and the input tuples that led to their derivation. While the literature on PDBs dates back to more than 25 years of research, only fairly recently the key role of lineage for establishing a closed and complete representation model of relational operations over this kind of probabilistic data was discovered. Although PDBs benefit from their efficient and scalable database infrastructures for data storage and indexing, they couple the data computation with probabilistic inference, the latter of which remains a #P-hard problem also in the context of PDBs. In this chapter, we provide a review on the key concepts of PDBs with a particular focus on our own recent research results related to this field. We highlight a number of ongoing research challenges related to PDBs, and we keep referring to an information extraction (IE) scenario as a running application to manage uncertain and temporal facts obtained from IE techniques directly inside a PDB setting.
Export
BibTeX
@inproceedings{DyllaRW2014, TITLE = {Querying and Learning in Probabilistic Databases}, AUTHOR = {Dylla, Maximilian and Theobald, Martin and Miliaraki, Iris}, LANGUAGE = {eng}, ISBN = {978-3-319-10587-1; 978-3-319-10586-4}, DOI = {10.1007/978-3-319-10587-1_8}, PUBLISHER = {Springer}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, ABSTRACT = {Probabilistic Databases (PDBs) lie at the expressive intersection of databases, first-order logic, and probability theory. PDBs employ logical deduction rules to process Select-Project-Join (SPJ) queries, which form the basis for a variety of declarative query languages such as Datalog, Relational Algebra, and SQL. They employ logical consistency constraints to resolve data inconsistencies, and they represent query answers via logical lineage formulas (aka. "data provenance") to trace the dependencies between these answers and the input tuples that led to their derivation. While the literature on PDBs dates back to more than 25 years of research, only fairly recently the key role of lineage for establishing a closed and complete representation model of relational operations over this kind of probabilistic data was discovered. Although PDBs benefit from their efficient and scalable database infrastructures for data storage and indexing, they couple the data computation with probabilistic inference, the latter of which remains a #P-hard problem also in the context of PDBs. In this chapter, we provide a review on the key concepts of PDBs with a particular focus on our own recent research results related to this field. We highlight a number of ongoing research challenges related to PDBs, and we keep referring to an information extraction (IE) scenario as a running application to manage uncertain and temporal facts obtained from IE techniques directly inside a PDB setting.}, BOOKTITLE = {Reasoning Web (RW 2014)}, EDITOR = {Koubarakis, Manolis and Stamou, Giorgos and Stoilos, Giorgos and Horrocks, Ian and Kolaitis, Phokion and Lausen, Georg and Weikum, Gerhard}, PAGES = {313--368}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {8714}, ADDRESS = {Athens, Greece}, }
Endnote
%0 Conference Proceedings %A Dylla, Maximilian %A Theobald, Martin %A Miliaraki, Iris %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Querying and Learning in Probabilistic Databases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-E51D-9 %F OTHER: WOS:000348929200008 %R 10.1007/978-3-319-10587-1_8 %D 2014 %B 10th Reasoning Web Summer School %Z date of event: 2014-09-08 - 2014-09-13 %C Athens, Greece %X Probabilistic Databases (PDBs) lie at the expressive intersection of databases, first-order logic, and probability theory. PDBs employ logical deduction rules to process Select-Project-Join (SPJ) queries, which form the basis for a variety of declarative query languages such as Datalog, Relational Algebra, and SQL. They employ logical consistency constraints to resolve data inconsistencies, and they represent query answers via logical lineage formulas (aka. "data provenance") to trace the dependencies between these answers and the input tuples that led to their derivation. While the literature on PDBs dates back to more than 25 years of research, only fairly recently the key role of lineage for establishing a closed and complete representation model of relational operations over this kind of probabilistic data was discovered. Although PDBs benefit from their efficient and scalable database infrastructures for data storage and indexing, they couple the data computation with probabilistic inference, the latter of which remains a #P-hard problem also in the context of PDBs. In this chapter, we provide a review on the key concepts of PDBs with a particular focus on our own recent research results related to this field. We highlight a number of ongoing research challenges related to PDBs, and we keep referring to an information extraction (IE) scenario as a running application to manage uncertain and temporal facts obtained from IE techniques directly inside a PDB setting. %K Probabilistic and Temporal Databases Deduction Rules Consistency Constraints Information Extraction LINEAGE SYSTEMS WEB Computer Science, Information Systems Computer Science, Theory & Methods %B Reasoning Web %E Koubarakis, Manolis; Stamou, Giorgos; Stoilos, Giorgos; Horrocks, Ian; Kolaitis, Phokion; Lausen, Georg; Weikum, Gerhard %P 313 - 368 %I Springer %@ 978-3-319-10587-1 978-3-319-10586-4 %B Lecture Notes in Computer Science %N 8714
[110]
M. Dylla, “Efficient Querying and Learning in Probabilistic and Temporal Databases,” Universität des Saarlandes, Saarbrücken, 2014.
Abstract
Probabilistic databases store, query, and manage large amounts of uncertain information. This thesis advances the state-of-the-art in probabilistic databases in three different ways: 1. We present a closed and complete data model for temporal probabilistic databases and analyze its complexity. Queries are posed via temporal deduction rules which induce lineage formulas capturing both time and uncertainty. 2. We devise a methodology for computing the top-k most probable query answers. It is based on first-order lineage formulas representing sets of answer candidates. Theoretically derived probability bounds on these formulas enable pruning low-probability answers. 3. We introduce the problem of learning tuple probabilities which allows updating and cleaning of probabilistic databases. We study its complexity, characterize its solutions, cast it into an optimization problem, and devise an approximation algorithm based on stochastic gradient descent. All of the above contributions support consistency constraints and are evaluated experimentally.
Export
BibTeX
@phdthesis{DyllaPhDThesis2014, TITLE = {Efficient Querying and Learning in Probabilistic and Temporal Databases}, AUTHOR = {Dylla, Maximilian}, LANGUAGE = {eng}, URL = {urn:nbn:de:bsz:291-scidok-58146}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, ABSTRACT = {Probabilistic databases store, query, and manage large amounts of uncertain information. This thesis advances the state-of-the-art in probabilistic databases in three different ways: 1. We present a closed and complete data model for temporal probabilistic databases and analyze its complexity. Queries are posed via temporal deduction rules which induce lineage formulas capturing both time and uncertainty. 2. We devise a methodology for computing the top-k most probable query answers. It is based on first-order lineage formulas representing sets of answer candidates. Theoretically derived probability bounds on these formulas enable pruning low-probability answers. 3. We introduce the problem of learning tuple probabilities which allows updating and cleaning of probabilistic databases. We study its complexity, characterize its solutions, cast it into an optimization problem, and devise an approximation algorithm based on stochastic gradient descent. All of the above contributions support consistency constraints and are evaluated experimentally.}, }
Endnote
%0 Thesis %A Dylla, Maximilian %Y Weikum, Gerhard %A referee: Theobald, Martin %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Efficient Querying and Learning in Probabilistic and Temporal Databases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-3C44-E %U urn:nbn:de:bsz:291-scidok-58146 %I Universität des Saarlandes %C Saarbrücken %D 2014 %P VIII, 169 p. %V phd %9 phd %X Probabilistic databases store, query, and manage large amounts of uncertain information. This thesis advances the state-of-the-art in probabilistic databases in three different ways: 1. We present a closed and complete data model for temporal probabilistic databases and analyze its complexity. Queries are posed via temporal deduction rules which induce lineage formulas capturing both time and uncertainty. 2. We devise a methodology for computing the top-k most probable query answers. It is based on first-order lineage formulas representing sets of answer candidates. Theoretically derived probability bounds on these formulas enable pruning low-probability answers. 3. We introduce the problem of learning tuple probabilities which allows updating and cleaning of probabilistic databases. We study its complexity, characterize its solutions, cast it into an optimization problem, and devise an approximation algorithm based on stochastic gradient descent. All of the above contributions support consistency constraints and are evaluated experimentally. %K Deduction Rules, Probabilistic Database, Temporal Database, Learning, Constraints, Top-k %U http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=dehttp://scidok.sulb.uni-saarland.de/volltexte/2014/5814/
[111]
M. Dylla and M. Theobald, “Learning Tuple Probabilities in Probabilistic Databases,” Max-Planck-Institut für Informatik, Saarbrücken, MPI-I-2014-5-001, 2014.
Abstract
Learning the parameters of complex probabilistic-relational models from labeled training data is a standard technique in machine learning, which has been intensively studied in the subfield of Statistical Relational Learning (SRL), but---so far---this is still an under-investigated topic in the context of Probabilistic Databases (PDBs). In this paper, we focus on learning the probability values of base tuples in a PDB from query answers, the latter of which are represented as labeled lineage formulas. Specifically, we consider labels in the form of pairs, each consisting of a Boolean lineage formula and a marginal probability that comes attached to the corresponding query answer. The resulting learning problem can be viewed as the inverse problem to confidence computations in PDBs: given a set of labeled query answers, learn the probability values of the base tuples, such that the marginal probabilities of the query answers again yield in the assigned probability labels. We analyze the learning problem from a theoretical perspective, devise two optimization-based objectives, and provide an efficient algorithm (based on Stochastic Gradient Descent) for solving these objectives. Finally, we conclude this work by an experimental evaluation on three real-world and one synthetic dataset, while competing with various techniques from SRL, reasoning in information extraction, and optimization.
Export
BibTeX
@techreport{Dylla-Learning2014, TITLE = {Learning Tuple Probabilities in Probabilistic Databases}, AUTHOR = {Dylla, Maximilian and Theobald, Martin}, LANGUAGE = {eng}, ISSN = {0946-011X}, NUMBER = {MPI-I-2014-5-001}, INSTITUTION = {Max-Planck-Institut f{\"u}r Informatik}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Learning the parameters of complex probabilistic-relational models from labeled training data is a standard technique in machine learning, which has been intensively studied in the subfield of Statistical Relational Learning (SRL), but---so far---this is still an under-investigated topic in the context of Probabilistic Databases (PDBs). In this paper, we focus on learning the probability values of base tuples in a PDB from query answers, the latter of which are represented as labeled lineage formulas. Specifically, we consider labels in the form of pairs, each consisting of a Boolean lineage formula and a marginal probability that comes attached to the corresponding query answer. The resulting learning problem can be viewed as the inverse problem to confidence computations in PDBs: given a set of labeled query answers, learn the probability values of the base tuples, such that the marginal probabilities of the query answers again yield in the assigned probability labels. We analyze the learning problem from a theoretical perspective, devise two optimization-based objectives, and provide an efficient algorithm (based on Stochastic Gradient Descent) for solving these objectives. Finally, we conclude this work by an experimental evaluation on three real-world and one synthetic dataset, while competing with various techniques from SRL, reasoning in information extraction, and optimization.}, TYPE = {Research Report}, }
Endnote
%0 Report %A Dylla, Maximilian %A Theobald, Martin %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Learning Tuple Probabilities in Probabilistic Databases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0019-8492-6 %Y Max-Planck-Institut für Informatik %C Saarbrücken %D 2014 %P 51 p. %X Learning the parameters of complex probabilistic-relational models from labeled training data is a standard technique in machine learning, which has been intensively studied in the subfield of Statistical Relational Learning (SRL), but---so far---this is still an under-investigated topic in the context of Probabilistic Databases (PDBs). In this paper, we focus on learning the probability values of base tuples in a PDB from query answers, the latter of which are represented as labeled lineage formulas. Specifically, we consider labels in the form of pairs, each consisting of a Boolean lineage formula and a marginal probability that comes attached to the corresponding query answer. The resulting learning problem can be viewed as the inverse problem to confidence computations in PDBs: given a set of labeled query answers, learn the probability values of the base tuples, such that the marginal probabilities of the query answers again yield in the assigned probability labels. We analyze the learning problem from a theoretical perspective, devise two optimization-based objectives, and provide an efficient algorithm (based on Stochastic Gradient Descent) for solving these objectives. Finally, we conclude this work by an experimental evaluation on three real-world and one synthetic dataset, while competing with various techniques from SRL, reasoning in information extraction, and optimization. %B Research Report %@ false
[112]
D. Erdős, R. Gemulla, and E. Terzi, “Reconstructing Graphs from Neighborhood Data,” ACM Transactions on Knowledge Discovery from Data, vol. 8, no. 4, 2014.
Export
BibTeX
@article{Erdos:2014:RGN:2663597.2641761, TITLE = {Reconstructing Graphs from Neighborhood Data}, AUTHOR = {Erd{\H o}s, D{\'o}ra and Gemulla, Rainer and Terzi, Evimaria}, LANGUAGE = {eng}, ISSN = {1556-4681}, DOI = {10.1145/2641761}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, JOURNAL = {ACM Transactions on Knowledge Discovery from Data}, VOLUME = {8}, NUMBER = {4}, PAGES = {1--22}, EID = {23}, }
Endnote
%0 Journal Article %A Erdős, Dóra %A Gemulla, Rainer %A Terzi, Evimaria %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Reconstructing Graphs from Neighborhood Data : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-692A-E %R 10.1145/2641761 %7 2014 %D 2014 %K Bipartite graph reconstruction, adjacency matrix, singular value decomposition %J ACM Transactions on Knowledge Discovery from Data %O TKDD %V 8 %N 4 %& 1 %P 1 - 22 %Z sequence number: 23 %I ACM %C New York, NY %@ false
[113]
P. Ernst, C. Meng, A. Siu, and G. Weikum, “KnowLife: A Knowledge Graph for Health and Life Sciences,” in 30th International Conference on Data Engineering (ICDE 2014), Chicago, IL, USA, 2014.
Export
BibTeX
@inproceedings{DBLP:conf/icde/ErnstMSW14, TITLE = {{KnowLife}: A Knowledge Graph for Health and Life Sciences}, AUTHOR = {Ernst, Patrick and Meng, Cynthia and Siu, Amy and Weikum, Gerhard}, LANGUAGE = {eng}, DOI = {10.1109/ICDE.2014.6816754}, PUBLISHER = {IEEE}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {30th International Conference on Data Engineering (ICDE 2014)}, PAGES = {1254--1257}, ADDRESS = {Chicago, IL, USA}, }
Endnote
%0 Conference Proceedings %A Ernst, Patrick %A Meng, Cynthia %A Siu, Amy %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T KnowLife: A Knowledge Graph for Health and Life Sciences : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-6BA0-1 %R 10.1109/ICDE.2014.6816754 %D 2014 %B 30th International Conference on Data Engineering %Z date of event: 2014-03-31 - 2014-04-04 %C Chicago, IL, USA %B 30th International Conference on Data Engineering %P 1254 - 1257 %I IEEE %U http://dx.doi.org/10.1109/ICDE.2014.6816754
[114]
E. Galbrun and P. Miettinen, “Interactive Redescription Mining,” in SIGMOD’14, ACM SIGMOD International Conference on Management of Data, Snowbird, UT, USA, 2014.
Abstract
Exploratory data analysis consists of multiple iterated steps: a data mining method is run on the data, the results are interpreted, new insights are formed, and the resulting knowl- edge is utilized when executing the method in a next round, and so on until satisfactory results are obtained. We focus on redescription mining, a powerful data analysis method that aims at finding alternative descriptions of the same entities, for example, ways to characterize geographical regions in terms of both the fauna that inhabits them and their bioclimatic conditions, so-called bioclimatic niches. We present Siren, a tool for interactive redescription min- ing. It is designed to facilitate the exploratory analysis of data by providing a seamless environment for mining, visu- alizing and editing redescriptions in an interactive fashion, supporting the analysis process in all its stages. We demon- strate its use for exploratory data mining. Simultaneously, Siren exemplifies the power of the various visualizations and means of interaction integrated into it; Techniques that reach beyond the task of redescription mining considered here, to other analysis methods.
Export
BibTeX
@inproceedings{galbrun14interactive, TITLE = {Interactive Redescription Mining}, AUTHOR = {Galbrun, Esther and Miettinen, Pauli}, LANGUAGE = {eng}, ISBN = {978-1-4503-2376-5}, DOI = {10.1145/2588555.2594520}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014-03}, ABSTRACT = {Exploratory data analysis consists of multiple iterated steps: a data mining method is run on the data, the results are interpreted, new insights are formed, and the resulting knowl- edge is utilized when executing the method in a next round, and so on until satisfactory results are obtained. We focus on redescription mining, a powerful data analysis method that aims at finding alternative descriptions of the same entities, for example, ways to characterize geographical regions in terms of both the fauna that inhabits them and their bioclimatic conditions, so-called bioclimatic niches. We present Siren, a tool for interactive redescription min- ing. It is designed to facilitate the exploratory analysis of data by providing a seamless environment for mining, visu- alizing and editing redescriptions in an interactive fashion, supporting the analysis process in all its stages. We demon- strate its use for exploratory data mining. Simultaneously, Siren exemplifies the power of the various visualizations and means of interaction integrated into it; Techniques that reach beyond the task of redescription mining considered here, to other analysis methods.}, BOOKTITLE = {SIGMOD'14, ACM SIGMOD International Conference on Management of Data}, DEBUG = {author: Özsu, M. Tamer}, EDITOR = {Dyresson, Curtis and Li, Feifei}, PAGES = {1079--1082}, ADDRESS = {Snowbird, UT, USA}, }
Endnote
%0 Conference Proceedings %A Galbrun, Esther %A Miettinen, Pauli %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Interactive Redescription Mining : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-4987-F %R 10.1145/2588555.2594520 %D 2014 %B ACM SIGMOD International Conference on Management of Data %Z date of event: 2014-06-22 - 2014-06-27 %C Snowbird, UT, USA %X Exploratory data analysis consists of multiple iterated steps: a data mining method is run on the data, the results are interpreted, new insights are formed, and the resulting knowl- edge is utilized when executing the method in a next round, and so on until satisfactory results are obtained. We focus on redescription mining, a powerful data analysis method that aims at finding alternative descriptions of the same entities, for example, ways to characterize geographical regions in terms of both the fauna that inhabits them and their bioclimatic conditions, so-called bioclimatic niches. We present Siren, a tool for interactive redescription min- ing. It is designed to facilitate the exploratory analysis of data by providing a seamless environment for mining, visu- alizing and editing redescriptions in an interactive fashion, supporting the analysis process in all its stages. We demon- strate its use for exploratory data mining. Simultaneously, Siren exemplifies the power of the various visualizations and means of interaction integrated into it; Techniques that reach beyond the task of redescription mining considered here, to other analysis methods. %B SIGMOD'14 %E Dyresson, Curtis; Li, Feifei; Özsu, M. Tamer %P 1079 - 1082 %I ACM %@ 978-1-4503-2376-5
[115]
A. Grycner, G. Weikum, J. Pujara, J. Foulds, and L. Getoor, “A Unified Probabilistic Approach for Semantic Clustering of Relational Phrases,” in AKBC 2014, 4th Workshop on Automated Knowledge Base Construction, Montreal, Canada, 2014.
Export
BibTeX
@inproceedings{grycner2014:AKBC, TITLE = {A Unified Probabilistic Approach for Semantic Clustering of Relational Phrases}, AUTHOR = {Grycner, Adam and Weikum, Gerhard and Pujara, Jay and Foulds, James and Getoor, Lise}, LANGUAGE = {eng}, URL = {http://www.akbc.ws/2014/submissions/akbc2014_submission_13.pdf}, PUBLISHER = {AKBC Board}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {AKBC 2014, 4th Workshop on Automated Knowledge Base Construction}, ADDRESS = {Montreal, Canada}, }
Endnote
%0 Conference Proceedings %A Grycner, Adam %A Weikum, Gerhard %A Pujara, Jay %A Foulds, James %A Getoor, Lise %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T A Unified Probabilistic Approach for Semantic Clustering of Relational Phrases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5B22-D %U http://www.akbc.ws/2014/submissions/akbc2014_submission_13.pdf %D 2014 %B 4th Workshop on Automated Knowledge Base Construction %Z date of event: 2014-12-13 - 2014-12-13 %C Montreal, Canada %B AKBC 2014 %I AKBC Board %U http://www.akbc.ws/2014/submissions/akbc2014_submission_13.pdf
[116]
A. Grycner and G. Weikum, “HARPY: Hypernyms and Alignment of Relational Paraphrases,” in Proceedings of COLING 2014: Technical Papers, Dublin, Ireland, 2014.
Abstract
Collections of relational paraphrases have been automatically constructed from \u000Alarge text corpora, as a WordNet counterpart for the realm of binary predicates \u000Aand their surface forms.\u000AHowever, these resources fall short in their coverage of hypernymy links \u000A(subsumptions) among the synsets of phrases. \u000AThis paper closes this gap by computing a high‐quality alignment between the \u000Arelational phrases of the Patty taxonomy, one of the largest collections of \u000Athis kind, and the verb senses of WordNet. To this end, we devise judicious \u000Afeatures and develop a graph‐based alignment algorithm by adapting and \u000Aextending the SimRank random‐walk method.\u000AThe resulting taxonomy of relational phrases and verb senses, coined HARPY, \u000Acontains 20,812 synsets organized into a \em Directed Acyclic Graph (DAG)} \u000Awith 616,792 hypernymy links. \u000AOur empirical assessment, indicates that the alignment links between Patty and \u000AWordNet have high accuracy, with {\em Mean Reciprocal Rank (MRR)} score 0.7 and \u000A{\em Normalized Discounted Cumulative Gain (NDCG) score 0.73. \u000AAs an additional extrinsic value, HARPY provides fine‐grained lexical types for \u000Athe arguments of verb senses in WordNet.
Export
BibTeX
@inproceedings{grycner-weikum:2014:Coling, TITLE = {{HARPY}: {Hypernyms} and Alignment of Relational Paraphrases}, AUTHOR = {Grycner, Adam and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-941643-26-6}, URL = {http://www.aclweb.org/anthology/C14-1207}, PUBLISHER = {ACL}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Collections of relational paraphrases have been automatically constructed from \u000Alarge text corpora, as a WordNet counterpart for the realm of binary predicates \u000Aand their surface forms.\u000AHowever, these resources fall short in their coverage of hypernymy links \u000A(subsumptions) among the synsets of phrases. \u000AThis paper closes this gap by computing a high-quality alignment between the \u000Arelational phrases of the Patty taxonomy, one of the largest collections of \u000Athis kind, and the verb senses of WordNet. To this end, we devise judicious \u000Afeatures and develop a graph-based alignment algorithm by adapting and \u000Aextending the SimRank random-walk method.\u000AThe resulting taxonomy of relational phrases and verb senses, coined HARPY, \u000Acontains 20,812 synsets organized into a \em Directed Acyclic Graph (DAG)} \u000Awith 616,792 hypernymy links. \u000AOur empirical assessment, indicates that the alignment links between Patty and \u000AWordNet have high accuracy, with {\em Mean Reciprocal Rank (MRR)} score 0.7 and \u000A{\em Normalized Discounted Cumulative Gain (NDCG) score 0.73. \u000AAs an additional extrinsic value, HARPY provides fine-grained lexical types for \u000Athe arguments of verb senses in WordNet.}, BOOKTITLE = {Proceedings of COLING 2014: Technical Papers}, EDITOR = {Hajic, Jan and Tsujii, Junichi}, PAGES = {2195--2204}, EID = {C14}, ADDRESS = {Dublin, Ireland}, }
Endnote
%0 Conference Proceedings %A Grycner, Adam %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T HARPY: Hypernyms and Alignment of Relational Paraphrases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-3329-1 %U http://www.aclweb.org/anthology/C14-1207 %D 2014 %B 25th International Conference on Computational Linguistics %Z date of event: 2014-08-23 - 2014-08-29 %C Dublin, Ireland %X Collections of relational paraphrases have been automatically constructed from \u000Alarge text corpora, as a WordNet counterpart for the realm of binary predicates \u000Aand their surface forms.\u000AHowever, these resources fall short in their coverage of hypernymy links \u000A(subsumptions) among the synsets of phrases. \u000AThis paper closes this gap by computing a high‐quality alignment between the \u000Arelational phrases of the Patty taxonomy, one of the largest collections of \u000Athis kind, and the verb senses of WordNet. To this end, we devise judicious \u000Afeatures and develop a graph‐based alignment algorithm by adapting and \u000Aextending the SimRank random‐walk method.\u000AThe resulting taxonomy of relational phrases and verb senses, coined HARPY, \u000Acontains 20,812 synsets organized into a \em Directed Acyclic Graph (DAG)} \u000Awith 616,792 hypernymy links. \u000AOur empirical assessment, indicates that the alignment links between Patty and \u000AWordNet have high accuracy, with {\em Mean Reciprocal Rank (MRR)} score 0.7 and \u000A{\em Normalized Discounted Cumulative Gain (NDCG) score 0.73. \u000AAs an additional extrinsic value, HARPY provides fine‐grained lexical types for \u000Athe arguments of verb senses in WordNet. %B Proceedings of COLING 2014: Technical Papers %E Hajic, Jan; Tsujii, Junichi %P 2195 - 2204 %Z sequence number: C14 %I ACL %@ 978‐1‐941643‐26‐6
[117]
D. Gupta and K. Berberich, “Identifying Time Intervals of Interest to Queries,” in CIKM’14, 23rd ACM International Conference on Information and Knowledge Management, Shanghai, China, 2014.
Export
BibTeX
@inproceedings{DBLP:conf/cikm/GuptaB14, TITLE = {Identifying Time Intervals of Interest to Queries}, AUTHOR = {Gupta, Dhruv and Berberich, Klaus}, LANGUAGE = {eng}, DOI = {10.1145/2661829.2661927}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {CIKM'14, 23rd ACM International Conference on Information and Knowledge Management}, EDITOR = {Li, Jianzhong and Wang, Xiaoyang Sean and Garofalakis, Minos N. and Soboroff, Ian and Suel, Torsten and Wang, Min}, PAGES = {1835--1838}, ADDRESS = {Shanghai, China}, }
Endnote
%0 Conference Proceedings %A Gupta, Dhruv %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Identifying Time Intervals of Interest to Queries : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5435-1 %R 10.1145/2661829.2661927 %D 2014 %B 23rd ACM International Conference on Information and Knowledge Management %Z date of event: 2014-11-03 - 2014-11-07 %C Shanghai, China %B CIKM'14 %E Li, Jianzhong; Wang, Xiaoyang Sean; Garofalakis, Minos N.; Soboroff, Ian; Suel, Torsten; Wang, Min %P 1835 - 1838 %I ACM
[118]
S. Gurajada, S. Seufert, I. Miliaraki, and M. Theobald, “TriAD: A Distributed Shared-nothing RDF Engine Based on Asynchronous Message Passing,” in SIGMOD’14, ACM SIGMOD International Conference on Management of Data, Snowbird, UT, USA, 2014.
Export
BibTeX
@inproceedings{Gurajada:2014:TDS:2588555.2610511, TITLE = {{TriAD}: A Distributed Shared-nothing {RDF} Engine Based on Asynchronous Message Passing}, AUTHOR = {Gurajada, Sairam and Seufert, Stephan and Miliaraki, Iris and Theobald, Martin}, LANGUAGE = {eng}, ISBN = {978-1-4503-2376-5}, DOI = {10.1145/2588555.2610511}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {SIGMOD'14, ACM SIGMOD International Conference on Management of Data}, EDITOR = {Dyresson, Curtis and Li, Feifei and {\"O}zsu, M. Tamer}, PAGES = {289--300}, ADDRESS = {Snowbird, UT, USA}, }
Endnote
%0 Conference Proceedings %A Gurajada, Sairam %A Seufert, Stephan %A Miliaraki, Iris %A Theobald, Martin %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T TriAD: A Distributed Shared-nothing RDF Engine Based on Asynchronous Message Passing : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5C81-2 %R 10.1145/2588555.2610511 %D 2014 %B ACM SIGMOD International Conference on Management of Data %Z date of event: 2014-06-22 - 2014-06-27 %C Snowbird, UT, USA %K asynchronous message passing, distributed RDF indexing 38; SparQL processing, join-ahead pruning, parallel join evaluation %B SIGMOD'14 %E Dyresson, Curtis; Li, Feifei; Özsu, M. Tamer %P 289 - 300 %I ACM %@ 978-1-4503-2376-5
[119]
S. Gurajada, S. Seufert, I. Miliaraki, and M. Theobald, “Using Graph Summarization for Join-ahead Pruning in a Distributed RDF Engine,” in SWIM’14, 6th International Workshop on Semantic Web Information Management, Snowbird, UT, USA, 2014.
Export
BibTeX
@inproceedings{Gurajada:2014:UGS:2630602.2630610, TITLE = {Using Graph Summarization for Join-ahead Pruning in a Distributed {RDF} Engine}, AUTHOR = {Gurajada, Sairam and Seufert, Stephan and Miliaraki, Iris and Theobald, Martin}, LANGUAGE = {eng}, ISBN = {978-1-4503-2994-1}, DOI = {10.1145/2630602.2630610}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {SWIM'14, 6th International Workshop on Semantic Web Information Management}, PAGES = {1--4}, EID = {41}, ADDRESS = {Snowbird, UT, USA}, }
Endnote
%0 Conference Proceedings %A Gurajada, Sairam %A Seufert, Stephan %A Miliaraki, Iris %A Theobald, Martin %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Using Graph Summarization for Join-ahead Pruning in a Distributed RDF Engine : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5C65-2 %R 10.1145/2630602.2630610 %D 2014 %B 6th International Workshop on Semantic Web Information Management %Z date of event: 2014-06-22 - 2014-06-27 %C Snowbird, UT, USA %B SWIM'14 %P 1 - 4 %Z sequence number: 41 %I ACM %@ 978-1-4503-2994-1
[120]
A. Harth, K. Hose, and R. Schenkel, Eds., Linked Data Management. Boca Raton, FL: CRC Press, 2014.
Export
BibTeX
@book{LinkedDataBook2014, TITLE = {Linked Data Management}, EDITOR = {Harth, Andreas and Hose, Katja and Schenkel, Ralf}, LANGUAGE = {eng}, ISBN = {978-1466582408; 1466582405}, PUBLISHER = {CRC Press}, ADDRESS = {Boca Raton, FL}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, PAGES = {576 p.}, SERIES = {Emerging Directions in Database Systems and Applications}, }
Endnote
%0 Edited Book %A Harth, Andreas %A Hose, Katja %A Schenkel, Ralf %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Linked Data Management : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0019-8478-2 %@ 978-1466582408 %@ 1466582405 %I CRC Press %C Boca Raton, FL %D 2014 %P 576 p. %B Emerging Directions in Database Systems and Applications
[121]
J. Hoffart, Y. Altun, and G. Weikum, “Discovering Emerging Entities with Ambiguous Names,” in WWW’14, 23rd International World Wide Web Conference, Seoul, Korea, 2014.
Export
BibTeX
@inproceedings{Hoffart:2014hp, TITLE = {Discovering Emerging Entities with Ambiguous Names}, AUTHOR = {Hoffart, Johannes and Altun, Yasemin and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-2744-2}, DOI = {10.1145/2566486.2568003}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {WWW'14, 23rd International World Wide Web Conference}, EDITOR = {Chung, Chin-Wan and Broder, Andrei and Shin, Kyuseok and Suel, Torsten}, PAGES = {385--395}, ADDRESS = {Seoul, Korea}, }
Endnote
%0 Conference Proceedings %A Hoffart, Johannes %A Altun, Yasemin %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Discovering Emerging Entities with Ambiguous Names : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5364-0 %R 10.1145/2566486.2568003 %D 2014 %B 23rd International World Wide Web Conference %Z date of event: 2014-04-07 - 2014-04-11 %C Seoul, Korea %B WWW'14 %E Chung, Chin-Wan; Broder, Andrei; Shin, Kyuseok; Suel, Torsten %P 385 - 395 %I ACM %@ 978-1-4503-2744-2
[122]
J. Hoffart, D. Milchevski, and G. Weikum, “AESTHETICS: Analytics with Strings, Things, and Cats,” in CIKM’14, 23rd ACM International Conference on Information and Knowledge Management, Shanghai, China, 2014.
Export
BibTeX
@inproceedings{Hoffart:2014cy, TITLE = {{AESTHETICS}: Analytics with Strings, Things, and Cats}, AUTHOR = {Hoffart, Johannes and Milchevski, Dragan and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-2598-1}, DOI = {10.1145/2661829.2661835}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {CIKM'14, 23rd ACM International Conference on Information and Knowledge Management}, DEBUG = {author: Wang, Min}, EDITOR = {Li, Jianzhong and Wang, X. Sean and Garofalakis, Minos and Soboroff, Ian and Suel, Torsten}, PAGES = {2018--2020}, ADDRESS = {Shanghai, China}, }
Endnote
%0 Conference Proceedings %A Hoffart, Johannes %A Milchevski, Dragan %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T AESTHETICS: Analytics with Strings, Things, and Cats : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-536B-2 %R 10.1145/2661829.2661835 %D 2014 %B 23rd ACM International Conference on Information and Knowledge Management %Z date of event: 2014-11-03 - 2014-11-07 %C Shanghai, China %B CIKM'14 %E Li, Jianzhong; Wang, X. Sean; Garofalakis, Minos; Soboroff, Ian; Suel, Torsten; Wang, Min %P 2018 - 2020 %I ACM %@ 978-1-4503-2598-1
[123]
J. Hoffart, D. Milchevski, and G. Weikum, “STICS: Searching with Strings, Things, and Cats,” in SIGIR’14, 37th International ACM SIGIR Conference on Research and Development in Information Retrieval, Gold Coast, Australia, 2014.
Export
BibTeX
@inproceedings{Hoffart:2014dt, TITLE = {{STICS}: Searching with Strings, Things, and Cats}, AUTHOR = {Hoffart, Johannes and Milchevski, Dragan and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-2257-7}, DOI = {10.1145/2600428.2611177}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {SIGIR'14, 37th International ACM SIGIR Conference on Research and Development in Information Retrieval}, PAGES = {1247--1248}, ADDRESS = {Gold Coast, Australia}, }
Endnote
%0 Conference Proceedings %A Hoffart, Johannes %A Milchevski, Dragan %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T STICS: Searching with Strings, Things, and Cats : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5344-7 %R 10.1145/2600428.2611177 %D 2014 %B 37th International ACM SIGIR Conference on Research and Development in Information Retrieval %Z date of event: 2014-07-06 - 2014-07-11 %C Gold Coast, Australia %B SIGIR'14 %P 1247 - 1248 %I ACM %@ 978-1-4503-2257-7
[124]
K. Hui, “Towards Robust & Reusable Evaluation for Novelty & Diversity,” in PIKM’14, 7th PhD Workshop in Information and Knowledge Management, Shanghai, China, 2014.
Export
BibTeX
@inproceedings{Hui-pikm2014, TITLE = {Towards Robust \& Reusable Evaluation for Novelty \& Diversity}, AUTHOR = {Hui, Kai}, LANGUAGE = {eng}, ISBN = {978-1-4503-1481-7}, DOI = {10.1145/2663714.2668045}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {PIKM'14, 7th PhD Workshop in Information and Knowledge Management}, EDITOR = {de Melo, Gerard and Kacimi, Mouna and Varde, Aparna S.}, PAGES = {9--17}, ADDRESS = {Shanghai, China}, }
Endnote
%0 Conference Proceedings %A Hui, Kai %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Towards Robust & Reusable Evaluation for Novelty & Diversity : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-4F55-D %R 10.1145/2663714.2668045 %D 2014 %B 7th PhD Workshop in Information and Knowledge Management %Z date of event: 2014-11-03 - 2014-11-03 %C Shanghai, China %B PIKM'14 %E de Melo, Gerard; Kacimi, Mouna; Varde, Aparna S. %P 9 - 17 %I ACM %@ 978-1-4503-1481-7
[125]
Y. Ibrahim, M. A. Yosef, and G. Weikum, “AIDA-Social: Entity Linking on the Social Stream,” in ESAIR’14, 7th International Workshop on Exploiting Semantic Annotations in Information Retrieval, Shanghai, China, 2014.
Export
BibTeX
@inproceedings{mamir:2014:aida-social, TITLE = {{AIDA}-{Social}: {Entity} Linking on the Social Stream}, AUTHOR = {Ibrahim, Yusra and Yosef, Mohamed Amir and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-1365-0}, DOI = {10.1145/2663712.2666185}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {ESAIR'14, 7th International Workshop on Exploiting Semantic Annotations in Information Retrieval}, EDITOR = {Alonso, Omar and Kamps, Jaap and Karlgren, Jussi}, PAGES = {17--19}, ADDRESS = {Shanghai, China}, }
Endnote
%0 Conference Proceedings %A Ibrahim, Yusra %A Yosef, Mohamed Amir %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T AIDA-Social: Entity Linking on the Social Stream : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-54A3-7 %R 10.1145/2663712.2666185 %D 2014 %B 7th International Workshop on Exploiting Semantic Annotations in Information Retrieval %Z date of event: 2014-11-07 - 2014-11-07 %C Shanghai, China %K information extraction, named entity linking, semantic annotation, social media %B ESAIR'14 %E Alonso, Omar; Kamps, Jaap; Karlgren, Jussi %P 17 - 19 %I ACM %@ 978-1-4503-1365-0 %U http://doi.acm.org/10.1145/2663712.2666185
[126]
S. Karaev, “NASSAU: Description Length Minimization for Boolean Matrix Factorization,” in ECML/PKDD 2014 PhD Session Proceedings, Nancy, France, 2014.
Export
BibTeX
@inproceedings{karaev2014nassau, TITLE = {NASSAU: {D}escription Length Minimization for {Boolean} Matrix Factorization}, AUTHOR = {Karaev, Sanjar}, LANGUAGE = {eng}, URL = {https://phdsession-ecmlpkdd2014.greyc.fr/sites/phdsession-ecmlpkdd2014.greyc.fr/files/papers/Paper_20702.pdf}, PUBLISHER = {University of Caen}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {ECML/PKDD 2014 PhD Session Proceedings}, EDITOR = {Belohlavek, Radim and Cr{\'e}milleux, Bruno}, PAGES = {177--186}, ADDRESS = {Nancy, France}, }
Endnote
%0 Conference Proceedings %A Karaev, Sanjar %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T NASSAU: Description Length Minimization for Boolean Matrix Factorization : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-51A9-6 %U https://phdsession-ecmlpkdd2014.greyc.fr/sites/phdsession-ecmlpkdd2014.greyc.fr/files/papers/Paper_20702.pdf %D 2014 %B The European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases %Z date of event: 2014-09-15 - 2014-09-19 %C Nancy, France %B ECML/PKDD 2014 PhD Session Proceedings %E Belohlavek, Radim; Crémilleux, Bruno %P 177 - 186 %I University of Caen
[127]
S. K. Kondreddi, “Human Computing and Crowdsourcing Methods for Knowledge Acquisition,” Universität des Saarlandes, Saarbrücken, 2014.
Abstract
Ambiguity, complexity, and diversity in natural language textual expressions are major hindrances to automated knowledge extraction. As a result state-of-the-art methods for extracting entities and relationships from unstructured data make incorrect extractions or produce noise. With the advent of human computing, computationally hard tasks have been addressed through human inputs. While text-based knowledge acquisition can benefit from this approach, humans alone cannot bear the burden of extracting knowledge from the vast textual resources that exist today. Even making payments for crowdsourced acquisition can quickly become prohibitively expensive. In this thesis we present principled methods that effectively garner human computing inputs for improving the extraction of knowledge-base facts from natural language texts. Our methods complement automatic extraction techniques with human computing to reap the benefits of both while overcoming each other�s limitations. We present the architecture and implementation of HIGGINS, a system that combines an information extraction (IE) engine with a human computing (HC) engine to produce high quality facts. The IE engine combines statistics derived from large Web corpora with semantic resources like WordNet and ConceptNet to construct a large dictionary of entity and relational phrases. It employs specifically designed statistical language models for phrase relatedness to come up with questions and relevant candidate answers that are presented to human workers. Through extensive experiments we establish the superiority of this approach in extracting relation-centric facts from text. In our experiments we extract facts about fictitious characters in narrative text, where the issues of diversity and complexity in expressing relations are far more pronounced. Finally, we also demonstrate how interesting human computing games can be designed for knowledge acquisition tasks.
Export
BibTeX
@phdthesis{Kondreddi2014b, TITLE = {Human Computing and Crowdsourcing Methods for Knowledge Acquisition}, AUTHOR = {Kondreddi, Sarath Kumar}, LANGUAGE = {eng}, URL = {urn:nbn:de:bsz:291-scidok-57948}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, ABSTRACT = {Ambiguity, complexity, and diversity in natural language textual expressions are major hindrances to automated knowledge extraction. As a result state-of-the-art methods for extracting entities and relationships from unstructured data make incorrect extractions or produce noise. With the advent of human computing, computationally hard tasks have been addressed through human inputs. While text-based knowledge acquisition can benefit from this approach, humans alone cannot bear the burden of extracting knowledge from the vast textual resources that exist today. Even making payments for crowdsourced acquisition can quickly become prohibitively expensive. In this thesis we present principled methods that effectively garner human computing inputs for improving the extraction of knowledge-base facts from natural language texts. Our methods complement automatic extraction techniques with human computing to reap the benefits of both while overcoming each other{\diamond}s limitations. We present the architecture and implementation of HIGGINS, a system that combines an information extraction (IE) engine with a human computing (HC) engine to produce high quality facts. The IE engine combines statistics derived from large Web corpora with semantic resources like WordNet and ConceptNet to construct a large dictionary of entity and relational phrases. It employs specifically designed statistical language models for phrase relatedness to come up with questions and relevant candidate answers that are presented to human workers. Through extensive experiments we establish the superiority of this approach in extracting relation-centric facts from text. In our experiments we extract facts about fictitious characters in narrative text, where the issues of diversity and complexity in expressing relations are far more pronounced. Finally, we also demonstrate how interesting human computing games can be designed for knowledge acquisition tasks.}, }
Endnote
%0 Thesis %A Kondreddi, Sarath Kumar %Y Triantafillou, Peter %A referee: Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Human Computing and Crowdsourcing Methods for Knowledge Acquisition : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-3C3D-F %U urn:nbn:de:bsz:291-scidok-57948 %I Universität des Saarlandes %C Saarbrücken %D 2014 %P 116 p. %V phd %9 phd %X Ambiguity, complexity, and diversity in natural language textual expressions are major hindrances to automated knowledge extraction. As a result state-of-the-art methods for extracting entities and relationships from unstructured data make incorrect extractions or produce noise. With the advent of human computing, computationally hard tasks have been addressed through human inputs. While text-based knowledge acquisition can benefit from this approach, humans alone cannot bear the burden of extracting knowledge from the vast textual resources that exist today. Even making payments for crowdsourced acquisition can quickly become prohibitively expensive. In this thesis we present principled methods that effectively garner human computing inputs for improving the extraction of knowledge-base facts from natural language texts. Our methods complement automatic extraction techniques with human computing to reap the benefits of both while overcoming each other�s limitations. We present the architecture and implementation of HIGGINS, a system that combines an information extraction (IE) engine with a human computing (HC) engine to produce high quality facts. The IE engine combines statistics derived from large Web corpora with semantic resources like WordNet and ConceptNet to construct a large dictionary of entity and relational phrases. It employs specifically designed statistical language models for phrase relatedness to come up with questions and relevant candidate answers that are presented to human workers. Through extensive experiments we establish the superiority of this approach in extracting relation-centric facts from text. In our experiments we extract facts about fictitious characters in narrative text, where the issues of diversity and complexity in expressing relations are far more pronounced. Finally, we also demonstrate how interesting human computing games can be designed for knowledge acquisition tasks. %U http://scidok.sulb.uni-saarland.de/volltexte/2014/5794/http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=de
[128]
S. K. Kondreddi, P. Triantafillou, and G. Weikum, “Combining Information Extraction and Human Computing for Crowdsourced Knowledge Acquisition,” in 30th IEEE International Conference on Data Engineering (ICDE 2014), Chicago, IL, USA, 2014.
Abstract
Automatic information extraction (IE) enables the construction of very large knowledge bases (KBs), with relational facts on millions of entities from text corpora and Web sources. However, such KBs contain errors and they are far from being complete. This motivates the need for exploiting human intelligence and knowledge using crowd-based human computing (HC) for assessing the validity of facts and for gathering additional knowledge. This paper presents a novel system architecture, called Higgins, which shows how to effectively integrate an IE engine and a HC engine. Higgins generates game questions where players choose or fill in missing relations for subject-relation-object triples. For generating multiple-choice answer candidates, we have constructed a large dictionary of entity names and relational phrases, and have developed specifically designed statistical language models for phrase relatedness. To this end, we combine semantic resources like WordNet, ConceptNet, and others with statistics derived from a large Web corpus. We demonstrate the effectiveness of Higgins for knowledge acquisition by crowdsourced gathering of relationships between characters in narrative descriptions of movies and books.
Export
BibTeX
@inproceedings{Kondreddi2014a, TITLE = {Combining Information Extraction and Human Computing for Crowdsourced Knowledge Acquisition}, AUTHOR = {Kondreddi, Sarath Kumar and Triantafillou, Peter and Weikum, Gerhard}, LANGUAGE = {eng}, DOI = {10.1109/ICDE.2014.6816717}, PUBLISHER = {IEEE}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, ABSTRACT = {Automatic information extraction (IE) enables the construction of very large knowledge bases (KBs), with relational facts on millions of entities from text corpora and Web sources. However, such KBs contain errors and they are far from being complete. This motivates the need for exploiting human intelligence and knowledge using crowd-based human computing (HC) for assessing the validity of facts and for gathering additional knowledge. This paper presents a novel system architecture, called Higgins, which shows how to effectively integrate an IE engine and a HC engine. Higgins generates game questions where players choose or fill in missing relations for subject-relation-object triples. For generating multiple-choice answer candidates, we have constructed a large dictionary of entity names and relational phrases, and have developed specifically designed statistical language models for phrase relatedness. To this end, we combine semantic resources like WordNet, ConceptNet, and others with statistics derived from a large Web corpus. We demonstrate the effectiveness of Higgins for knowledge acquisition by crowdsourced gathering of relationships between characters in narrative descriptions of movies and books.}, BOOKTITLE = {30th IEEE International Conference on Data Engineering (ICDE 2014)}, PAGES = {988--999}, ADDRESS = {Chicago, IL, USA}, }
Endnote
%0 Conference Proceedings %A Kondreddi, Sarath Kumar %A Triantafillou, Peter %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Combining Information Extraction and Human Computing for Crowdsourced Knowledge Acquisition : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0023-C15D-6 %R 10.1109/ICDE.2014.6816717 %D 2014 %B 30th IEEE International Conference on Data Engineering %Z date of event: 2014-03-31 - 2014-04-04 %C Chicago, IL, USA %X Automatic information extraction (IE) enables the construction of very large knowledge bases (KBs), with relational facts on millions of entities from text corpora and Web sources. However, such KBs contain errors and they are far from being complete. This motivates the need for exploiting human intelligence and knowledge using crowd-based human computing (HC) for assessing the validity of facts and for gathering additional knowledge. This paper presents a novel system architecture, called Higgins, which shows how to effectively integrate an IE engine and a HC engine. Higgins generates game questions where players choose or fill in missing relations for subject-relation-object triples. For generating multiple-choice answer candidates, we have constructed a large dictionary of entity names and relational phrases, and have developed specifically designed statistical language models for phrase relatedness. To this end, we combine semantic resources like WordNet, ConceptNet, and others with statistics derived from a large Web corpus. We demonstrate the effectiveness of Higgins for knowledge acquisition by crowdsourced gathering of relationships between characters in narrative descriptions of movies and books. %B 30th IEEE International Conference on Data Engineering %P 988 - 999 %I IEEE
[129]
M. Koubarakis, G. B. Stamou, G. Stoilos, I. Horrocks, P. G. Kolaitis, G. Lausen, and G. Weikum, Eds., Reasoning Web. Springer, 2014.
Export
BibTeX
@proceedings{DBLP:conf/rweb/2014, TITLE = {Reasoning Web}, EDITOR = {Koubarakis, Manolis and Stamou, Giorgos B. and Stoilos, Giorgos and Horrocks, Ian and Kolaitis, Phokion G. and Lausen, Georg and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-3-319-10586-4}, DOI = {10.1007/978-3-319-10587-1}, PUBLISHER = {Springer}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, PAGES = {X, 390 p.}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {8714}, ADDRESS = {Athens, Greece}, }
Endnote
%0 Conference Proceedings %E Koubarakis, Manolis %E Stamou, Giorgos B. %E Stoilos, Giorgos %E Horrocks, Ian %E Kolaitis, Phokion G. %E Lausen, Georg %E Weikum, Gerhard %+ External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Reasoning Web : Reasoning on the Web in the Big Data Era ; 10th International Summer School 2014, Athens, Greece, September 8-13, 2014. Proceedings %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-6BD5-B %@ 978-3-319-10586-4 %R 10.1007/978-3-319-10587-1 %I Springer %D 2014 %B 10th Reasoning Web Summer School %Z date of event: 2014-09-08 - 2014-09-13 %D 2014 %C Athens, Greece %P X, 390 p. %S Lecture Notes in Computer Science %V 8714
[130]
D. Koutra, U. Kang, J. Vreeken, and C. Faloutsos, “VoG: Summarizing and Understanding Large Graphs,” in 2014 SIAM International Conference on Data Mining (SDM 2014), Philadelphia, PA, USA, 2014.
Export
BibTeX
@inproceedings{koutra:14:vog, TITLE = {{VoG}: {Summarizing} and Understanding Large Graphs}, AUTHOR = {Koutra, Danai and Kang, U and Vreeken, Jilles and Faloutsos, Christos}, LANGUAGE = {eng}, ISBN = {978-1-61197-344-0}, DOI = {10.1137/1.9781611973440.11}, PUBLISHER = {SIAM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {2014 SIAM International Conference on Data Mining (SDM 2014)}, PAGES = {91--99}, ADDRESS = {Philadelphia, PA, USA}, }
Endnote
%0 Conference Proceedings %A Koutra, Danai %A Kang, U %A Vreeken, Jilles %A Faloutsos, Christos %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T VoG: Summarizing and Understanding Large Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53AF-A %R 10.1137/1.9781611973440.11 %D 2014 %B SIAM International Conference on Data Mining %Z date of event: 2014-04-24 - 2014-04-26 %C Philadelphia, PA, USA %B 2014 SIAM International Conference on Data Mining %P 91 - 99 %I SIAM %@ 978-1-61197-344-0
[131]
D. Koutra, U. Kang, J. Vreeken, and C. Faloutsos, “VoG: Summarizing and Understanding Large Graphs,” 2014. [Online]. Available: http://arxiv.org/abs/1406.3411. (arXiv: 1406.3411)
Abstract
How can we succinctly describe a million-node graph with a few simple sentences? How can we measure the "importance" of a set of discovered subgraphs in a large graph? These are exactly the problems we focus on. Our main ideas are to construct a "vocabulary" of subgraph-types that often occur in real graphs (e.g., stars, cliques, chains), and from a set of subgraphs, find the most succinct description of a graph in terms of this vocabulary. We measure success in a well-founded way by means of the Minimum Description Length (MDL) principle: a subgraph is included in the summary if it decreases the total description length of the graph. Our contributions are three-fold: (a) formulation: we provide a principled encoding scheme to choose vocabulary subgraphs; (b) algorithm: we develop \method, an efficient method to minimize the description cost, and (c) applicability: we report experimental results on multi-million-edge real graphs, including Flickr and the Notre Dame web graph.
Export
BibTeX
@online{KoutraKangVreekenFaloutsosarXiv2014, TITLE = {{VoG}: {Summarizing} and Understanding Large Graphs}, AUTHOR = {Koutra, Danai and Kang, U and Vreeken, Jilles and Faloutsos, Christos}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1406.3411}, EPRINT = {1406.3411}, EPRINTTYPE = {arXiv}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, ABSTRACT = {How can we succinctly describe a million-node graph with a few simple sentences? How can we measure the "importance" of a set of discovered subgraphs in a large graph? These are exactly the problems we focus on. Our main ideas are to construct a "vocabulary" of subgraph-types that often occur in real graphs (e.g., stars, cliques, chains), and from a set of subgraphs, find the most succinct description of a graph in terms of this vocabulary. We measure success in a well-founded way by means of the Minimum Description Length (MDL) principle: a subgraph is included in the summary if it decreases the total description length of the graph. Our contributions are three-fold: (a) formulation: we provide a principled encoding scheme to choose vocabulary subgraphs; (b) algorithm: we develop \method, an efficient method to minimize the description cost, and (c) applicability: we report experimental results on multi-million-edge real graphs, including Flickr and the Notre Dame web graph.}, }
Endnote
%0 Report %A Koutra, Danai %A Kang, U %A Vreeken, Jilles %A Faloutsos, Christos %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T VoG: Summarizing and Understanding Large Graphs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-49A3-F %U http://arxiv.org/abs/1406.3411 %D 2014 %X How can we succinctly describe a million-node graph with a few simple sentences? How can we measure the "importance" of a set of discovered subgraphs in a large graph? These are exactly the problems we focus on. Our main ideas are to construct a "vocabulary" of subgraph-types that often occur in real graphs (e.g., stars, cliques, chains), and from a set of subgraphs, find the most succinct description of a graph in terms of this vocabulary. We measure success in a well-founded way by means of the Minimum Description Length (MDL) principle: a subgraph is included in the summary if it decreases the total description length of the graph. Our contributions are three-fold: (a) formulation: we provide a principled encoding scheme to choose vocabulary subgraphs; (b) algorithm: we develop \method, an efficient method to minimize the description cost, and (c) applicability: we report experimental results on multi-million-edge real graphs, including Flickr and the Notre Dame web graph. %K cs.SI, Physics, Physics and Society, physics.soc-ph
[132]
E. Kuzey and G. Weikum, “EVIN: Building a Knowledge Base of Events,” in WWW’14 Companion, Seoul, Korea, 2014.
Export
BibTeX
@inproceedings{ekuzeyWWW14, TITLE = {{EVIN}: Building a Knowledge Base of Events}, AUTHOR = {Kuzey, Erdal and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-2745-9}, URL = {http://dl.acm.org/citation.cfm?id=2577009}, DOI = {10.1145/2567948.2577009}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {WWW'14 Companion}, PAGES = {103--106}, ADDRESS = {Seoul, Korea}, }
Endnote
%0 Conference Proceedings %A Kuzey, Erdal %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T EVIN: Building a Knowledge Base of Events : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-525B-C %R 10.1145/2567948.2577009 %U http://dl.acm.org/citation.cfm?id=2577009 %D 2014 %B 23rd International Conference on World Wide Web %Z date of event: 2014-04-07 - 2014-04-11 %C Seoul, Korea %B WWW'14 Companion %P 103 - 106 %I ACM %@ 978-1-4503-2745-9
[133]
E. Kuzey, J. Vreeken, and G. Weikum, “A Fresh Look on Knowledge Bases: Distilling Named Events from News,” in CIKM’14, 23rd ACM International Conference on Information and Knowledge Management, Shanghai, China, 2014.
Export
BibTeX
@inproceedings{ekuzeyCIKM14, TITLE = {A Fresh Look on Knowledge Bases: Distilling Named Events from News}, AUTHOR = {Kuzey, Erdal and Vreeken, Jilles and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-2598-1}, DOI = {10.1145/2661829.2661984}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {CIKM'14, 23rd ACM International Conference on Information and Knowledge Management}, EDITOR = {Li, Jianzhong and Garofalakis, Minos and Soboroff, Ian and Suel, Torsten and Wang, Min}, PAGES = {1689--1698}, ADDRESS = {Shanghai, China}, }
Endnote
%0 Conference Proceedings %A Kuzey, Erdal %A Vreeken, Jilles %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T A Fresh Look on Knowledge Bases: Distilling Named Events from News : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5263-9 %R 10.1145/2661829.2661984 %D 2014 %B 23rd ACM International Conference on Information and Knowledge Management %Z date of event: 2014-11-03 - 2014-11-07 %C Shanghai, China %B CIKM'14 %E Li, Jianzhong; Wang, X. Sean; Garofalakis, Minos; Soboroff, Ian; Suel, Torsten; Wang, Min %P 1689 - 1698 %I ACM %@ 978-1-4503-2598-1
[134]
F. Mahdisoltani, J. Biega, and F. Suchanek, “YAGO3: A Knowledge Base from Multilingual Wikipedias,” in 7th Biennial Conference on Innovative Data Systems Research (CIDR 2015), Asilomar, CA, USA, 2014.
Export
BibTeX
@inproceedings{Mahdisoltani:2015, TITLE = {{YAGO}3: A Knowledge Base from Multilingual Wikipedias}, AUTHOR = {Mahdisoltani, Farzaneh and Biega, Joanna and Suchanek, Fabian}, LANGUAGE = {eng}, URL = {http://www.cidrdb.org/cidr2015/Papers/CIDR15_Paper1.pdf}, PUBLISHER = {CIDR Conference}, YEAR = {2015}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {7th Biennial Conference on Innovative Data Systems Research (CIDR 2015)}, ADDRESS = {Asilomar, CA, USA}, }
Endnote
%0 Conference Proceedings %A Mahdisoltani, Farzaneh %A Biega, Joanna %A Suchanek, Fabian %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T YAGO3: A Knowledge Base from Multilingual Wikipedias : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-501C-6 %U http://www.cidrdb.org/cidr2015/Papers/CIDR15_Paper1.pdf %D 2014 %B 7th Biennial Conference on Innovative Data Systems Research %Z date of event: 2015-01-04 - 2015-01-07 %C Asilomar, CA, USA %B 7th Biennial Conference on Innovative Data Systems Research %I CIDR Conference
[135]
F. Makari, C. Teflioudi, R. Gemulla, P. Haas, and Y. Sismanis, “Shared-memory and Shared-nothing Stochastic Gradient Descent Algorithms for Matrix Completion,” Knowledge and Information Systems, vol. 42, no. 3, 2014.
Export
BibTeX
@article{MakariTeflioudiGemulla2014, TITLE = {Shared-memory and Shared-nothing Stochastic Gradient Descent Algorithms for Matrix Completion}, AUTHOR = {Makari, Faraz and Teflioudi, Christina and Gemulla, Rainer and Haas, Peter and Sismanis, Yannis}, LANGUAGE = {eng}, ISSN = {0219-1377}, DOI = {10.1007/s10115-013-0718-7}, PUBLISHER = {Springer}, ADDRESS = {London}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, JOURNAL = {Knowledge and Information Systems}, VOLUME = {42}, NUMBER = {3}, PAGES = {493--523}, }
Endnote
%0 Journal Article %A Makari, Faraz %A Teflioudi, Christina %A Gemulla, Rainer %A Haas, Peter %A Sismanis, Yannis %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Shared-memory and Shared-nothing Stochastic Gradient Descent Algorithms for Matrix Completion : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-4F57-9 %R 10.1007/s10115-013-0718-7 %7 2014-02-15 %D 2014 %J Knowledge and Information Systems %V 42 %N 3 %& 493 %P 493 - 523 %I Springer %C London %@ false
[136]
F. Makari Manshadi, “Scalable Optimization Algorithms for Recommender Systems,” Universität des Saarlandes, Saarbrücken, 2014.
Export
BibTeX
@phdthesis{MakariManshadi2014, TITLE = {Scalable Optimization Algorithms for Recommender Systems}, AUTHOR = {Makari Manshadi, Faraz}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, }
Endnote
%0 Thesis %A Makari Manshadi, Faraz %Y Gemulla, Rainer %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Scalable Optimization Algorithms for Recommender Systems : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-96AA-5 %I Universität des Saarlandes %C Saarbrücken %D 2014 %P 121 p. %V phd %9 phd %U http://scidok.sulb.uni-saarland.de/volltexte/2014/5922/http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=de
[137]
S. Metzger, “User-centric Knowledge Extraction and Maintenance,” Universität des Saarlandes, Saarbrücken, 2014.
Export
BibTeX
@phdthesis{Metzger2014, TITLE = {User-centric Knowledge Extraction and Maintenance}, AUTHOR = {Metzger, Steffen}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, }
Endnote
%0 Thesis %A Metzger, Steffen %Y Schenkel, Ralf %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T User-centric Knowledge Extraction and Maintenance : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-96AE-E %I Universität des Saarlandes %C Saarbrücken %D 2014 %P 230 p. %V phd %9 phd %U http://scidok.sulb.uni-saarland.de/volltexte/2014/5763/http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=de
[138]
S. Metzler and P. Miettinen, “Clustering Boolean Tensors,” in ECML/PKDD 2014 PhD Session Proceedings, Nancy, France, 2014.
Export
BibTeX
@inproceedings{Metzler2014Clustering, TITLE = {Clustering {Boolean} Tensors}, AUTHOR = {Metzler, Saskia and Miettinen, Pauli}, LANGUAGE = {eng}, URL = {https://phdsession-ecmlpkdd2014.greyc.fr/sites/phdsession-ecmlpkdd2014.greyc.fr/files/papers/Paper_20692.pdf}, PUBLISHER = {University of Caen}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {ECML/PKDD 2014 PhD Session Proceedings}, EDITOR = {Belohlavek, Radim and Cr{\'e}milleux, Bruno}, PAGES = {31--40}, ADDRESS = {Nancy, France}, }
Endnote
%0 Conference Proceedings %A Metzler, Saskia %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Clustering Boolean Tensors : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5C44-C %U https://phdsession-ecmlpkdd2014.greyc.fr/sites/phdsession-ecmlpkdd2014.greyc.fr/files/papers/Paper_20692.pdf %D 2014 %B The European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases %Z date of event: 2014-09-15 - 2014-09-19 %C Nancy, France %B ECML/PKDD 2014 PhD Session Proceedings %E Belohlavek, Radim; Crémilleux, Bruno %P 31 - 40 %I University of Caen
[139]
P. Miettinen and J. Vreeken, “MDL4BMF: Minimum Description Length for Boolean Matrix Factorization,” ACM Transactions on Knowledge Discovery from Data, vol. 8, no. 4, Oct. 2014.
Export
BibTeX
@article{miettinen14mdl4bmf, TITLE = {{MDL4BMF}: {Minimum} {D}escription {L}ength for {Boolean} {M}atrix {F}actorization}, AUTHOR = {Miettinen, Pauli and Vreeken, Jilles}, LANGUAGE = {eng}, DOI = {10.1145/2601437}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014-10}, JOURNAL = {ACM Transactions on Knowledge Discovery from Data}, VOLUME = {8}, NUMBER = {4}, PAGES = {1--31}, EID = {18}, }
Endnote
%0 Journal Article %A Miettinen, Pauli %A Vreeken, Jilles %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T MDL4BMF: Minimum Description Length for Boolean Matrix Factorization : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-4980-E %R 10.1145/2601437 %7 2014 %D 2014 %J ACM Transactions on Knowledge Discovery from Data %V 8 %N 4 %& 1 %P 1 - 31 %Z sequence number: 18 %I ACM %C New York, NY %U http://dl.acm.org/citation.cfm?id=2663597.2601437
[140]
P. Miettinen, “Interactive Data Mining Considered Harmful (If Done Wrong),” in Proceedings of the ACM SIGKDD 2014 Full-day Workshop on Interactive Data Exploration and Analytics (IDEA 2014), New York, NY, USA, 2014.
Abstract
Interactive data mining can be a powerful tool for data analysis. But in this short opinion piece I argue that this power comes with new pitfalls that can undermine the value of interactive mining, if not properly addressed. Most notably, there is a serious risk that the user of powerful interactive data mining tools will only find the results she was expecting. The purpose of this piece is to raise awareness of this potential issue, stimulate discussion on it, and hopefully give rise to new research directions in addressing it.
Export
BibTeX
@inproceedings{miettinen14interactive, TITLE = {Interactive Data Mining Considered Harmful (If Done Wrong)}, AUTHOR = {Miettinen, Pauli}, LANGUAGE = {eng}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014-07}, ABSTRACT = {Interactive data mining can be a powerful tool for data analysis. But in this short opinion piece I argue that this power comes with new pitfalls that can undermine the value of interactive mining, if not properly addressed. Most notably, there is a serious risk that the user of powerful interactive data mining tools will only find the results she was expecting. The purpose of this piece is to raise awareness of this potential issue, stimulate discussion on it, and hopefully give rise to new research directions in addressing it.}, BOOKTITLE = {Proceedings of the ACM SIGKDD 2014 Full-day Workshop on Interactive Data Exploration and Analytics (IDEA 2014)}, EDITOR = {Chau, Polo and Vreeken, Jilles and van Leeuwen, Matthijs and Faloutsos, Christos}, PAGES = {85--87}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Interactive Data Mining Considered Harmful (If Done Wrong) : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5567-9 %D 2014 %B ACM SIGKDD 2014 Full-day Workshop on Interactive Data Exploration and Analytics %Z date of event: 2014-08-24 - 2014-08-24 %C New York, NY, USA %X Interactive data mining can be a powerful tool for data analysis. But in this short opinion piece I argue that this power comes with new pitfalls that can undermine the value of interactive mining, if not properly addressed. Most notably, there is a serious risk that the user of powerful interactive data mining tools will only find the results she was expecting. The purpose of this piece is to raise awareness of this potential issue, stimulate discussion on it, and hopefully give rise to new research directions in addressing it. %B Proceedings of the ACM SIGKDD 2014 Full-day Workshop on Interactive Data Exploration and Analytics %E Chau, Polo; Vreeken, Jilles; van Leeuwen, Matthijs; Faloutsos, Christos %P 85 - 87 %U http://poloclub.gatech.edu/idea2014/papers/p85-miettinen.pdf
[141]
D. Milchevski and K. Berberich, “X-REC: Cross-category Entity Recommendation,” in Proceedings of the 5th Information Interaction in Context Conference (IIiX 2014), Regensburg, Germany, 2014.
Export
BibTeX
@inproceedings{DBLP:conf/iiix/MilchevskiB14, TITLE = {{X-REC}: Cross-category Entity Recommendation}, AUTHOR = {Milchevski, Dragan and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-1-4503-2976-7}, DOI = {10.1145/2637002.2637049}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {Proceedings of the 5th Information Interaction in Context Conference (IIiX 2014)}, EDITOR = {Elsweiler, David and Ludwig, Bernd and Azzopardi, Leif and Wilson, Max L.}, PAGES = {308--311}, ADDRESS = {Regensburg, Germany}, }
Endnote
%0 Conference Proceedings %A Milchevski, Dragan %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T X-REC: Cross-category Entity Recommendation : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5430-B %R 10.1145/2637002.2637049 %D 2014 %B 5th Information Interaction in Context Conference %Z date of event: 2014-08-26 - 2014-08-29 %C Regensburg, Germany %B Proceedings of the 5th Information Interaction in Context Conference %E Elsweiler, David; Ludwig, Bernd; Azzopardi, Leif; Wilson, Max L. %P 308 - 311 %I ACM %@ 978-1-4503-2976-7
[142]
A. Mishra, “Linking Today’s Wikipedia and News from the Past,” in PIKM’14, 7th PhD Workshop in Information and Knowledge Management, Shanghai, China, 2014.
Export
BibTeX
@inproceedings{Mishra:2014:LTW:2663714.2668048, TITLE = {Linking Today's {Wikipedia} and News from the Past}, AUTHOR = {Mishra, Arunav}, LANGUAGE = {eng}, ISBN = {978-1-4503-1481-7}, DOI = {10.1145/2663714.2668048}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {PIKM'14, 7th PhD Workshop in Information and Knowledge Management}, EDITOR = {de Melo, Gerard and Kacimi, Mouna and Varde, Aparna S.}, PAGES = {1--8}, ADDRESS = {Shanghai, China}, }
Endnote
%0 Conference Proceedings %A Mishra, Arunav %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Linking Today's Wikipedia and News from the Past : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-6C6E-D %R 10.1145/2663714.2668048 %D 2014 %B 7th PhD Workshop in Information and Knowledge Management %Z date of event: 2014-11-03 - 2014-11-07 %C Shanghai, China %K events, linking, time-aware language model, wikipedia %B PIKM'14 %E de Melo, Gerard; Kacimi, Mouna; Varde, Aparna S. %P 1 - 8 %I ACM %@ 978-1-4503-1481-7
[143]
A. Mishra, D. Milchevski, and K. Berberich, “Linking Wikipedia Events to Past News,” in SIGIR 2014 Workshop on Temporal, Social and Spatially-aware Information Access (TAIA 2014), Gold Coast, Australia, 2014.
Export
BibTeX
@inproceedings{Mishra2014a, TITLE = {Linking {Wikipedia} Events to Past News}, AUTHOR = {Mishra, Arunav and Milchevski, Dragan and Berberich, Klaus}, LANGUAGE = {eng}, URL = {http://research.microsoft.com/en-US/people/milads/taia2014-mishra.pdf}, PUBLISHER = {Microsoft Research}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {SIGIR 2014 Workshop on Temporal, Social and Spatially-aware Information Access (TAIA 2014)}, PAGES = {1--4}, ADDRESS = {Gold Coast, Australia}, }
Endnote
%0 Conference Proceedings %A Mishra, Arunav %A Milchevski, Dragan %A Berberich, Klaus %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Linking Wikipedia Events to Past News : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-3C35-0 %U http://research.microsoft.com/en-US/people/milads/taia2014-mishra.pdf %D 2014 %B SIGIR 2014 Workshop on Temporal, Social and Spatially-aware Information Access %Z date of event: 2014-07-11 - 2014-07-11 %C Gold Coast, Australia %B SIGIR 2014 Workshop on Temporal, Social and Spatially-aware Information Access %P 1 - 4 %I Microsoft Research
[144]
S. Mukherjee, J. Ajmera, and S. Joshi, “Unsupervised Approach for Shallow Domain Ontology Construction from Corpus,” in WWW’14 Companion, Seoul, Korea, 2014.
Export
BibTeX
@inproceedings{Mukherjee:2014:DCU, TITLE = {Unsupervised Approach for Shallow Domain Ontology Construction from Corpus}, AUTHOR = {Mukherjee, Subhabrata and Ajmera, Jitendra and Joshi, Sachindra}, LANGUAGE = {eng}, ISBN = {978-1-4503-2745-9}, URL = {http://dl.acm.org/citation.cfm?id=2577350}, DOI = {10.1145/2567948.2577021}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {WWW'14 Companion}, PAGES = {349--350}, ADDRESS = {Seoul, Korea}, }
Endnote
%0 Conference Proceedings %A Mukherjee, Subhabrata %A Ajmera, Jitendra %A Joshi, Sachindra %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Unsupervised Approach for Shallow Domain Ontology Construction from Corpus : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-4FFD-6 %R 10.1145/2567948.2577021 %U http://dl.acm.org/citation.cfm?id=2577350 %D 2014 %B 23rd International Conference on World Wide Web %Z date of event: 2014-04-07 - 2014-04-11 %C Seoul, Korea %B WWW'14 Companion %P 349 - 350 %I ACM %@ 978-1-4503-2745-9
[145]
S. Mukherjee, G. Weikum, and C. Danescu-Niculescu-Mizil, “People on Drugs: Credibility of User Statements in Health Communities,” in KDD’14, 20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, New York, NY, USA, 2014.
Export
BibTeX
@inproceedings{Mukherjee:2014:PeopleOnDrugs, TITLE = {People on Drugs: Credibility of User Statements in Health Communities}, AUTHOR = {Mukherjee, Subhabrata and Weikum, Gerhard and Danescu-Niculescu-Mizil, Cristian}, LANGUAGE = {eng}, DOI = {10.1145/2623330.2623714}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {KDD'14, 20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, EDITOR = {Macskassy, Sofus A. and Perlich, Claudia and Lescovec, Jure and Wang, Wei and Ghani, Rayid}, PAGES = {65--74}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %A Mukherjee, Subhabrata %A Weikum, Gerhard %A Danescu-Niculescu-Mizil, Cristian %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Group C. Danescu-Niculescu-Mizil, Max Planck Institute for Software Systems, Max Planck Society %T People on Drugs: Credibility of User Statements in Health Communities : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-4FF9-E %R 10.1145/2623330.2623714 %D 2014 %B 20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining %Z date of event: 2014-08-24 - 2014-08-27 %C New York, NY, USA %B KDD'14 %E Macskassy, Sofus A.; Perlich, Claudia; Lescovec, Jure; Wang, Wei; Ghani, Rayid %P 65 - 74 %I ACM
[146]
S. Mukherjee and S. Joshi, “Help Yourself: A Virtual Self-assist System,” in WWW’14 Companion, Seoul, Korea, 2014.
Export
BibTeX
@inproceedings{Mukherjee:2014:SelfAssist, TITLE = {Help Yourself: A Virtual Self-assist System}, AUTHOR = {Mukherjee, Subhabrata and Joshi, Sachindra}, LANGUAGE = {eng}, ISBN = {978-1-4503-2745-9}, URL = {http://dl.acm.org/citation.cfm?id=2577021}, DOI = {10.1145/2567948.2577021}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {WWW'14 Companion}, PAGES = {171--174}, ADDRESS = {Seoul, Korea}, }
Endnote
%0 Conference Proceedings %A Mukherjee, Subhabrata %A Joshi, Sachindra %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Help Yourself: A Virtual Self-assist System : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5007-5 %R 10.1145/2567948.2577021 %U http://dl.acm.org/citation.cfm?id=2577021 %D 2014 %B 23rd International Conference on World Wide Web %Z date of event: 2014-04-07 - 2014-04-11 %C Seoul, Korea %B WWW'14 Companion %P 171 - 174 %I ACM %@ 978-1-4503-2745-9
[147]
S. Mukherjee, G. Basu, and S. Joshi, “42 - Joint Author Sentiment Topic Model,” in 2014 SIAM International Conference on Data Mining (SDM 2014), Philadelphia, PA, USA, 2014.
Export
BibTeX
@inproceedings{Mukherjee:2014:JAST, TITLE = {42 -- Joint Author Sentiment Topic Model}, AUTHOR = {Mukherjee, Subhabrata and Basu, Gaurab and Joshi, Sachindra}, LANGUAGE = {eng}, ISBN = {978-1-61197-344-0}, DOI = {10.1137/1.9781611973440.43}, PUBLISHER = {SIAM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {2014 SIAM International Conference on Data Mining (SDM 2014)}, PAGES = {370--378}, ADDRESS = {Philadelphia, PA, USA}, }
Endnote
%0 Conference Proceedings %A Mukherjee, Subhabrata %A Basu, Gaurab %A Joshi, Sachindra %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T 42 - Joint Author Sentiment Topic Model : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-4F9D-B %R 10.1137/1.9781611973440.43 %D 2014 %B SIAM International Conference on Data Mining %Z date of event: 2014-04-24 - 2014-04-26 %C Philadelphia, PA, USA %B 2014 SIAM International Conference on Data Mining %P 370 - 378 %I SIAM %@ 978-1-61197-344-0
[148]
S. Mukherjee, J. Ajmera, and S. Joshi, “Domain Cartridge: Unsupervised Framework for Shallow Domain Ontology Construction from Corpus,” in CIKM’14, 23rd ACM International Conference on Information and Knowledge Management, Shanghai, China, 2014.
Export
BibTeX
@inproceedings{Mukherjee:2014:DomainCartridge, TITLE = {Domain Cartridge: Unsupervised Framework for Shallow Domain Ontology Construction from Corpus}, AUTHOR = {Mukherjee, Subhabrata and Ajmera, Jitendra and Joshi, Sachindra}, LANGUAGE = {eng}, ISBN = {978-1-4503-2598-1}, DOI = {10.1145/2661829.2662087}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {CIKM'14, 23rd ACM International Conference on Information and Knowledge Management}, EDITOR = {Li, Jianzhong and Wang, X. Sean and Garofalakis, Minos and Soboroff, Ian and Suel, Torsten and Wang, Min}, PAGES = {929--938}, ADDRESS = {Shanghai, China}, }
Endnote
%0 Conference Proceedings %A Mukherjee, Subhabrata %A Ajmera, Jitendra %A Joshi, Sachindra %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Domain Cartridge: Unsupervised Framework for Shallow Domain Ontology Construction from Corpus : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-4FB1-C %R 10.1145/2661829.2662087 %D 2014 %8 03.11.2014 %B 23rd ACM International Conference on Information and Knowledge Management %Z date of event: 2014-11-03 - 2014-11-07 %C Shanghai, China %B CIKM'14 %E Li, Jianzhong; Wang, X. Sean; Garofalakis, Minos; Soboroff, Ian; Suel, Torsten; Wang, Min %P 929 - 938 %I ACM %@ 978-1-4503-2598-1
[149]
S. Mukherjee and S. Jos, “Author-Specific Sentiment Aggregation for Polarity Prediction of Reviews,” in Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC 2014), Reykjavik, Iceland, 2014.
Export
BibTeX
@inproceedings{Mukherjee:2014:PASOT, TITLE = {Author-Specific Sentiment Aggregation for Polarity Prediction of Reviews}, AUTHOR = {Mukherjee, Subhabrata and Jos, Sachindra}, LANGUAGE = {eng}, ISBN = {978-2-9517408-8-4}, PUBLISHER = {ELRA}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC 2014)}, PAGES = {3092--3099}, ADDRESS = {Reykjavik, Iceland}, }
Endnote
%0 Conference Proceedings %A Mukherjee, Subhabrata %A Jos, Sachindra %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Author-Specific Sentiment Aggregation for Polarity Prediction of Reviews : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-4FF7-1 %D 2014 %B Ninth International Conference on Language Resources and Evaluation %Z date of event: 2014-05-26 - 2014-05-31 %C Reykjavik, Iceland %B Proceedings of the Ninth International Conference on Language Resources and Evaluation %P 3092 - 3099 %I ELRA %@ 978-2-9517408-8-4 %U http://www.lrec-conf.org/proceedings/lrec2014/pdf/467_Paper.pdf
[150]
D. B. Nguyen, J. Hoffart, M. Theobald, and G. Weikum, “AIDA-light: High-throughput Named-entity Disambiguation,” in Linked Data on the Web (LDOW 2014), Seoul, Korea, 2014.
Export
BibTeX
@inproceedings{Nguyen:2014wl, TITLE = {{AIDA}--light: High-Throughput Named-entity Disambiguation}, AUTHOR = {Nguyen, Dat Ba and Hoffart, Johannes and Theobald, Martin and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {1613-0073}, URL = {http://ceur-ws.org/Vol-1184/ldow2014_paper_03.pdf}, PUBLISHER = {CEUR-WS.org}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Linked Data on the Web (LDOW 2014)}, DEBUG = {author: Berner-Lee, Tim}, EDITOR = {Bizer, Christian and Heath, Tom and Auer, S{\"o}ren}, PAGES = {1--10}, SERIES = {CEUR Workshop Proceedings}, VOLUME = {1184}, ADDRESS = {Seoul, Korea}, }
Endnote
%0 Conference Proceedings %A Nguyen, Dat Ba %A Hoffart, Johannes %A Theobald, Martin %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T AIDA-light: High-throughput Named-entity Disambiguation : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5162-2 %U http://ceur-ws.org/Vol-1184/ldow2014_paper_03.pdf %D 2014 %B Workshop on Linked Data on the Web 2014 %Z date of event: 2014-04-08 - 2014-04-08 %C Seoul, Korea %B Linked Data on the Web %E Bizer, Christian; Heath, Tom; Auer, Sören; Berner-Lee, Tim %P 1 - 10 %I CEUR-WS.org %B CEUR Workshop Proceedings %N 1184 %@ false %U http://ceur-ws.org/Vol-1184/ldow2014_paper_03.pdf
[151]
H.-V. Nguyen, E. Müller, J. Vreeken, and K. Böhm, “Multivariate Maximal Correlation Analysis,” in Proceedings of The 31st International Conference on Machine Learning (ICML 2014), Beijing, China, 2014.
Export
BibTeX
@inproceedings{nguyen:14:mac, TITLE = {Multivariate Maximal Correlation Analysis}, AUTHOR = {Nguyen, Hoang-Vu and M{\"u}ller, Emmanuel and Vreeken, Jilles and B{\"o}hm, Klemens}, LANGUAGE = {eng}, ISSN = {1938-7228}, URL = {http://jmlr.csail.mit.edu/proceedings/papers/v32/nguyenc14.pdf}, PUBLISHER = {JMLR}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of The 31st International Conference on Machine Learning (ICML 2014)}, EDITOR = {Xing, Eric P. and Jebara, Tony}, PAGES = {775--783}, SERIES = {JMLR Workshop and Conference Proceedings}, VOLUME = {32}, ADDRESS = {Beijing, China}, }
Endnote
%0 Conference Proceedings %A Nguyen, Hoang-Vu %A Müller, Emmanuel %A Vreeken, Jilles %A Böhm, Klemens %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Multivariate Maximal Correlation Analysis : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53A7-9 %U http://jmlr.csail.mit.edu/proceedings/papers/v32/nguyenc14.pdf %D 2014 %B 31st International Conference on Machine Learning %Z date of event: 2014-06-21 - 2014-06-26 %C Beijing, China %B Proceedings of The 31st International Conference on Machine Learning %E Xing, Eric P.; Jebara, Tony %P 775 - 783 %I JMLR %B JMLR Workshop and Conference Proceedings %N 32 %@ false %U http://jmlr.csail.mit.edu/proceedings/papers/v32/nguyenc14.pdf
[152]
H.-V. Nguyen, E. Müller, J. Vreeken, and K. Böhm, “Unsupervised Interaction-preserving Discretization of Multivariate Data,” Data Mining and Knowledge Discovery, vol. 28, no. 5–6, 2014.
Export
BibTeX
@article{nguyen:14:unsupervised, TITLE = {Unsupervised Interaction-preserving Discretization of Multivariate Data}, AUTHOR = {Nguyen, Hoang-Vu and M{\"u}ller, Emmanuel and Vreeken, Jilles and B{\"o}hm, Klemens}, LANGUAGE = {eng}, DOI = {10.1007/s10618-014-0350-5}, PUBLISHER = {Springer}, ADDRESS = {New York, NY}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, JOURNAL = {Data Mining and Knowledge Discovery}, VOLUME = {28}, NUMBER = {5-6}, PAGES = {1366--1397}, }
Endnote
%0 Journal Article %A Nguyen, Hoang-Vu %A Müller, Emmanuel %A Vreeken, Jilles %A Böhm, Klemens %+ Karlsruhe Institute of Technology Karlsruhe Institute of Technology Databases and Information Systems, MPI for Informatics, Max Planck Society Karlsruhe Institute of Technology %T Unsupervised Interaction-preserving Discretization of Multivariate Data : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-49A7-7 %R 10.1007/s10618-014-0350-5 %7 2014-04-04 %D 2014 %J Data Mining and Knowledge Discovery %V 28 %N 5-6 %& 1366 %P 1366 - 1397 %I Springer %C New York, NY
[153]
K. Panev and K. Berberich, “Phrase Queries with Inverted + Direct Indexes,” in Web Information Systems Engineering - WISE 2014, Thessaloniki, Greece, 2014, vol. 8786.
Export
BibTeX
@inproceedings{DBLP:conf/wise/PanevB14, TITLE = {Phrase Queries with Inverted + Direct Indexes}, AUTHOR = {Panev, Kiril and Berberich, Klaus}, LANGUAGE = {eng}, ISBN = {978-3-319-11748-5}, DOI = {10.1007/978-3-319-11749-2_13}, PUBLISHER = {Springer}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {Web Information Systems Engineering -- WISE 2014}, EDITOR = {Benatallah, Boualem and Bestavros, Azer and Manolopoulos, Yannis and Vakali, Athena and Zhang, Yanchun}, VOLUME = {8786}, PAGES = {156--169}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {8786}, ADDRESS = {Thessaloniki, Greece}, }
Endnote
%0 Conference Proceedings %A Panev, Kiril %A Berberich, Klaus %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Phrase Queries with Inverted + Direct Indexes : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53C6-1 %R 10.1007/978-3-319-11749-2_13 %D 2014 %B 15th International Conference on Web Information Systems Engineering %Z date of event: 2014-10-12 - 2014-10-14 %C Thessaloniki, Greece %B Web Information Systems Engineering - WISE 2014 %E Benatallah, Boualem; Bestavros, Azer; Manolopoulos, Yannis; Vakali, Athena; Zhang, Yanchun %V 8786 %P 156 - 169 %I Springer %@ 978-3-319-11748-5 %B Lecture Notes in Computer Science %N 8786 %U http://dx.doi.org/10.1007/978-3-319-11749-2_13
[154]
B. A. Prakash, J. Vreeken, and C. Faloutsos, “Efficiently Spotting the Starting Points of an Epidemic in a Large Graph,” Knowledge and Information Systems, vol. 38, no. 1, 2014.
Export
BibTeX
@article{prakash:14:culprits, TITLE = {Efficiently Spotting the Starting Points of an Epidemic in a Large Graph}, AUTHOR = {Prakash, B. Aditya and Vreeken, Jilles and Faloutsos, Christos}, LANGUAGE = {eng}, ISSN = {0219-1377}, DOI = {10.1007/s10115-013-0671-5}, PUBLISHER = {Springer}, ADDRESS = {London}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, JOURNAL = {Knowledge and Information Systems}, VOLUME = {38}, NUMBER = {1}, PAGES = {35--59}, }
Endnote
%0 Journal Article %A Prakash, B. Aditya %A Vreeken, Jilles %A Faloutsos, Christos %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Efficiently Spotting the Starting Points of an Epidemic in a Large Graph : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53B3-D %R 10.1007/s10115-013-0671-5 %7 2013-07-17 %D 2014 %J Knowledge and Information Systems %V 38 %N 1 %& 35 %P 35 - 59 %I Springer %C London %@ false
[155]
L. Qu, Y. Zhang, R. Wang, L. Jiang, R. Gemulla, and G. Weikum, “Senti-LSSVM: Sentiment-oriented Multi-relation Extraction with Latent structural SVM,” Transactions of the Association for Computational Linguistics (Proc. ACL 2014), vol. 2, 2014.
Export
BibTeX
@article{Gemullaacl2014, TITLE = {Senti-{LSSVM}: {S}entiment-oriented Multi-relation Extraction with Latent structural {SVM}}, AUTHOR = {Qu, Lizhen and Zhang, Yi and Wang, Rui and Jiang, Lili and Gemulla, Rainer and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {2307-387X}, PUBLISHER = {ACL}, ADDRESS = {Stroudsburg, PA}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, JOURNAL = {Transactions of the Association for Computational Linguistics (Proc. ACL)}, VOLUME = {2}, PAGES = {155--164}, BOOKTITLE = {The 52nd Annual Meeting of the Association for Computational Linguistics (ACL 2014)}, }
Endnote
%0 Journal Article %A Qu, Lizhen %A Zhang, Yi %A Wang, Rui %A Jiang, Lili %A Gemulla, Rainer %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Senti-LSSVM: Sentiment-oriented Multi-relation Extraction with Latent structural SVM : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-6AF0-6 %7 2014 %D 2014 %J Transactions of the Association for Computational Linguistics %O TACL %V 2 %& 155 %P 155 - 164 %I ACL %C Stroudsburg, PA %@ false %B The 52nd Annual Meeting of the Association for Computational Linguistics %O ACL 2014
[156]
L. Qu and B. Andres, “Estimating Maximally Probable Constrained Relations by Mathematical Programming,” 2014. [Online]. Available: http://arxiv.org/abs/1408.0838. (arXiv: 1408.0838)
Abstract
Estimating a constrained relation is a fundamental problem in machine learning. Special cases are classification (the problem of estimating a map from a set of to-be-classified elements to a set of labels), clustering (the problem of estimating an equivalence relation on a set) and ranking (the problem of estimating a linear order on a set). We contribute a family of probability measures on the set of all relations between two finite, non-empty sets, which offers a joint abstraction of multi-label classification, correlation clustering and ranking by linear ordering. Estimating (learning) a maximally probable measure, given (a training set of) related and unrelated pairs, is a convex optimization problem. Estimating (inferring) a maximally probable relation, given a measure, is a 01-linear program. It is solved in linear time for maps. It is NP-hard for equivalence relations and linear orders. Practical solutions for all three cases are shown in experiments with real data. Finally, estimating a maximally probable measure and relation jointly is posed as a mixed-integer nonlinear program. This formulation suggests a mathematical programming approach to semi-supervised learning.
Export
BibTeX
@online{qu-2014, TITLE = {Estimating Maximally Probable Constrained Relations by Mathematical Programming}, AUTHOR = {Qu, Lizhen and Andres, Bj{\"o}rn}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1408.0838}, EPRINT = {1408.0838}, EPRINTTYPE = {arXiv}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Estimating a constrained relation is a fundamental problem in machine learning. Special cases are classification (the problem of estimating a map from a set of to-be-classified elements to a set of labels), clustering (the problem of estimating an equivalence relation on a set) and ranking (the problem of estimating a linear order on a set). We contribute a family of probability measures on the set of all relations between two finite, non-empty sets, which offers a joint abstraction of multi-label classification, correlation clustering and ranking by linear ordering. Estimating (learning) a maximally probable measure, given (a training set of) related and unrelated pairs, is a convex optimization problem. Estimating (inferring) a maximally probable relation, given a measure, is a 01-linear program. It is solved in linear time for maps. It is NP-hard for equivalence relations and linear orders. Practical solutions for all three cases are shown in experiments with real data. Finally, estimating a maximally probable measure and relation jointly is posed as a mixed-integer nonlinear program. This formulation suggests a mathematical programming approach to semi-supervised learning.}, }
Endnote
%0 Report %A Qu, Lizhen %A Andres, Björn %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Computer Vision and Multimodal Computing, MPI for Informatics, Max Planck Society %T Estimating Maximally Probable Constrained Relations by Mathematical Programming : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-D324-6 %U http://arxiv.org/abs/1408.0838 %D 2014 %8 04.08.2014 %X Estimating a constrained relation is a fundamental problem in machine learning. Special cases are classification (the problem of estimating a map from a set of to-be-classified elements to a set of labels), clustering (the problem of estimating an equivalence relation on a set) and ranking (the problem of estimating a linear order on a set). We contribute a family of probability measures on the set of all relations between two finite, non-empty sets, which offers a joint abstraction of multi-label classification, correlation clustering and ranking by linear ordering. Estimating (learning) a maximally probable measure, given (a training set of) related and unrelated pairs, is a convex optimization problem. Estimating (inferring) a maximally probable relation, given a measure, is a 01-linear program. It is solved in linear time for maps. It is NP-hard for equivalence relations and linear orders. Practical solutions for all three cases are shown in experiments with real data. Finally, estimating a maximally probable measure and relation jointly is posed as a mixed-integer nonlinear program. This formulation suggests a mathematical programming approach to semi-supervised learning. %K Computer Science, Learning, cs.LG,Computer Science, Numerical Analysis, cs.NA,Mathematics, Optimization and Control, math.OC,Statistics, Machine Learning, stat.ML
[157]
P. Roy, J. Teubner, and R. Gemulla, “Low-latency Handshake Join,” Proceedings of the VLDB Endowment (Proc. VLDB 2014), vol. 7, no. 9, 2014.
Export
BibTeX
@article{GemullaVLDB2014, TITLE = {Low-latency Handshake Join}, AUTHOR = {Roy, Pratanu and Teubner, Jens and Gemulla, Rainer}, LANGUAGE = {eng}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, JOURNAL = {Proceedings of the VLDB Endowment (Proc. VLDB)}, VOLUME = {7}, NUMBER = {9}, PAGES = {709--720}, BOOKTITLE = {Proceedings of the 40th International Conference on Very Large Data Bases (VLDB 2014)}, EDITOR = {Jagadish, H. V. and Zhou, Aoying}, }
Endnote
%0 Journal Article %A Roy, Pratanu %A Teubner, Jens %A Gemulla, Rainer %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Low-latency Handshake Join : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-6AFF-8 %7 2014 %D 2014 %J Proceedings of the VLDB Endowment %O PVLDB %V 7 %N 9 %& 709 %P 709 - 720 %I ACM %C New York, NY %B Proceedings of the 40th International Conference on Very Large Data Bases %O VLDB 2014 Hangzhou, China, September 1st - 5th
[158]
F. M. Suchanek and G. Weikum, “Knowledge Bases in the Age of Big Data Analytics,” Proceedings of the VLDB Endowment (Proc. VLDB 2014), vol. 7, no. 13, 2014.
Export
BibTeX
@article{DBLP:journals/pvldb/SuchanekW14, TITLE = {Knowledge Bases in the Age of Big Data Analytics}, AUTHOR = {Suchanek, Fabian M. and Weikum, Gerhard}, LANGUAGE = {eng}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, JOURNAL = {Proceedings of the VLDB Endowment (Proc. VLDB)}, VOLUME = {7}, NUMBER = {13}, PAGES = {1713--1714}, BOOKTITLE = {Proceedings of the 40th International Conference on Very Large Data Bases (VLDB 2014)}, EDITOR = {Jagadish, H. V. and Zhou, Aoying}, }
Endnote
%0 Journal Article %A Suchanek, Fabian M. %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Knowledge Bases in the Age of Big Data Analytics : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-6B2A-F %7 2014 %D 2014 %J Proceedings of the VLDB Endowment %O PVLDB %V 7 %N 13 %& 1713 %P 1713 - 1714 %I ACM %C New York, NY %B Proceedings of the 40th International Conference on Very Large Data Bases %O VLDB 2014 Hangzhou, China, September 1st - 5th %U http://www.vldb.org/pvldb/vol7/p1713-suchanek.pdf
[159]
N. Tandon, G. de Melo, F. M. Suchanek, and G. Weikum, “WebChild: Harvesting and Organizing Commonsense Knowledge from the Web,” in WSDM’14, 7th ACM International Conference on Web Search and Data Mining, New York, NY, USA, 2014.
Export
BibTeX
@inproceedings{Tandon2013, TITLE = {{WebChild}: Harvesting and Organizing Commonsense Knowledge from the Web}, AUTHOR = {Tandon, Niket and de Melo, Gerard and Suchanek, Fabian M. and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-2351-2}, DOI = {10.1145/2556195.2556245}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {WSDM'14, 7th ACM International Conference on Web Search and Data Mining}, PAGES = {523--532}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %A Tandon, Niket %A de Melo, Gerard %A Suchanek, Fabian M. %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T WebChild: Harvesting and Organizing Commonsense Knowledge from the Web : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0019-84C4-7 %R 10.1145/2556195.2556245 %D 2014 %B 7th ACM International Conference on Web Search and Data Mining %Z date of event: 2014-04-24 - 2014-04-28 %C New York, NY, USA %B WSDM'14 %P 523 - 532 %I ACM %@ 978-1-4503-2351-2
[160]
N. Tandon, G. de Melo, and G. Weikum, “Acquiring Comparative Commonsense Knowledge from the Web,” in Proceedings of the Twenty-Eighth AAAI Conference on Artificial Intelligence and the Twenty-Sixth Innovative Applications of Artificial Intelligence Conference, Québec City, Québec, Canada, 2014.
Export
BibTeX
@inproceedings{DBLP:conf/aaai/TandonMW14, TITLE = {Acquiring Comparative Commonsense Knowledge from the Web}, AUTHOR = {Tandon, Niket and de Melo, Gerard and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-57735-661-5}, PUBLISHER = {AAAI Press}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {Proceedings of the Twenty-Eighth AAAI Conference on Artificial Intelligence and the Twenty-Sixth Innovative Applications of Artificial Intelligence Conference}, EDITOR = {Brodley, Carla E. and Stone, Peter}, PAGES = {166--172}, ADDRESS = {Qu{\'e}bec City, Qu{\'e}bec, Canada}, }
Endnote
%0 Conference Proceedings %A Tandon, Niket %A de Melo, Gerard %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Acquiring Comparative Commonsense Knowledge from the Web : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-49A0-6 %D 2014 %B Twenty-Eighth AAAI Conference on Artificial Intelligence %Z date of event: 2014-07-27 - 2014-07-31 %C Québec City, Québec, Canada %B Proceedings of the Twenty-Eighth AAAI Conference on Artificial Intelligence and the Twenty-Sixth Innovative Applications of Artificial Intelligence Conference %E Brodley, Carla E.; Stone, Peter %P 166 - 172 %I AAAI Press %@ 978-1-57735-661-5 %U http://www.aaai.org/ocs/index.php/AAAI/AAAI14/paper/view/8649
[161]
T. Tylenda, S. K. Kondreddi, and G. Weikum, “Spotting Knowledge Base Facts in Web Texts,” in AKBC 2014, 4th Workshop on Automated Knowledge Base Construction, Montreal, Canada, 2014.
Export
BibTeX
@inproceedings{TylendaKW2014, TITLE = {Spotting Knowledge Base Facts in Web Texts}, AUTHOR = {Tylenda, Tomasz and Kondreddi, Sarath Kumar and Weikum, Gerhard}, LANGUAGE = {eng}, PUBLISHER = {AKBC Board}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {AKBC 2014, 4th Workshop on Automated Knowledge Base Construction}, ADDRESS = {Montreal, Canada}, }
Endnote
%0 Conference Proceedings %A Tylenda, Tomasz %A Kondreddi, Sarath Kumar %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Spotting Knowledge Base Facts in Web Texts : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-689C-7 %D 2014 %B 4th Workshop on Automated Knowledge Base Construction %Z date of event: 2014-12-13 - 2014-12-13 %C Montreal, Canada %B AKBC 2014 %I AKBC Board %U http://www.akbc.ws/2014/submissions/akbc2014_submission_8.pdf
[162]
T. Tylenda, Y. Wang, and G. Weikum, “Spotting Facts in the Wild,” in Workshop on Automatic Creation and Curation of Knowledge Bases at SIGMOD (WACCK 2014), Snowbird, UT, USA. (Accepted/in press)
Export
BibTeX
@inproceedings{TylendaWW2014, TITLE = {Spotting Facts in the Wild}, AUTHOR = {Tylenda, Tomasz and Wang, Yafang and Weikum, Gerhard}, LANGUAGE = {eng}, YEAR = {2014}, PUBLREMARK = {Accepted}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Workshop on Automatic Creation and Curation of Knowledge Bases at SIGMOD (WACCK 2014)}, ADDRESS = {Snowbird, UT, USA}, }
Endnote
%0 Conference Proceedings %A Tylenda, Tomasz %A Wang, Yafang %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Spotting Facts in the Wild : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-68A8-B %D 2014 %B Workshop on Automatic Creation and Curation of Knowledge Bases %Z date of event: 2014-06-27 - 2014-06-27 %C Snowbird, UT, USA %B Workshop on Automatic Creation and Curation of Knowledge Bases at SIGMOD
[163]
M. van Leeuwen and J. Vreeken, “Mining and Using Sets of Patterns through Compression,” in Frequent Pattern Mining, New York, NY: Springer, 2014.
Export
BibTeX
@incollection{leeuwen:14:compression, TITLE = {Mining and Using Sets of Patterns through Compression}, AUTHOR = {van Leeuwen, Matthijs and Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-3-319-07820-5}, DOI = {10.1007/978-3-319-07821-2_8}, PUBLISHER = {Springer}, ADDRESS = {New York, NY}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {Frequent Pattern Mining}, EDITOR = {Aggarwal, Charu C. and Han, Jiawei}, PAGES = {165--198}, }
Endnote
%0 Book Section %A van Leeuwen, Matthijs %A Vreeken, Jilles %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Mining and Using Sets of Patterns through Compression : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53BB-E %R 10.1007/978-3-319-07821-2_8 %D 2014 %B Frequent Pattern Mining %E Aggarwal, Charu C.; Han, Jiawei %P 165 - 198 %I Springer %C New York, NY %@ 978-3-319-07820-5
[164]
J. Vreeken and N. Tatti, “Interesting Patterns,” in Frequent Pattern Mining, New York, NY: Springer, 2014.
Export
BibTeX
@incollection{vreeken:14:interesting, TITLE = {Interesting Patterns}, AUTHOR = {Vreeken, Jilles and Tatti, Nikolaj}, LANGUAGE = {eng}, ISBN = {978-3-319-07820-5}, DOI = {10.1007/978-3-319-07821-2_5}, PUBLISHER = {Springer}, ADDRESS = {New York, NY}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {Frequent Pattern Mining}, EDITOR = {Aggarwal, Charu C. and Han, Jiawei}, PAGES = {105--134}, }
Endnote
%0 Book Section %A Vreeken, Jilles %A Tatti, Nikolaj %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Interesting Patterns : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53B9-1 %R 10.1007/978-3-319-07821-2_5 %D 2014 %K Pattern mining; Interestingness measures; Statistics; Ranking; Pattern set mining %B Frequent Pattern Mining %E Aggarwal, Charu C.; Han, Jiawei %P 105 - 134 %I Springer %C New York, NY %@ 978-3-319-07820-5
[165]
G. I. Webb and J. Vreeken, “Efficient Discovery of the Most Interesting Associations,” ACM Transactions on Knowledge Discovery from Data, vol. 8, no. 3, 2014.
Export
BibTeX
@article{webb:14:selfsufs, TITLE = {Efficient Discovery of the Most Interesting Associations}, AUTHOR = {Webb, Geoffrey I. and Vreeken, Jilles}, LANGUAGE = {eng}, DOI = {10.1145/2601433}, PUBLISHER = {ACM}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, JOURNAL = {ACM Transactions on Knowledge Discovery from Data}, VOLUME = {8}, NUMBER = {3}, PAGES = {1--31}, EID = {15}, }
Endnote
%0 Journal Article %A Webb, Geoffrey I. %A Vreeken, Jilles %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Efficient Discovery of the Most Interesting Associations : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53B1-2 %R 10.1145/2601433 %7 2014 %D 2014 %J ACM Transactions on Knowledge Discovery from Data %O TKDD %V 8 %N 3 %& 1 %P 1 - 31 %Z sequence number: 15 %I ACM
[166]
G. Weikum, “Big Text: von Sprache zu Wissen,” in Informatik 2014: Big Data - Komplexität meistern, Stuttgart, Deutschland, 2014.
Export
BibTeX
@inproceedings{DBLP:conf/gi/Weikum14, TITLE = {{Big {Text}: von {Sprache} zu {Wissen}}}, AUTHOR = {Weikum, Gerhard}, LANGUAGE = {deu}, ISBN = {978-388579626-8}, PUBLISHER = {GI}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {Informatik 2014: Big Data -- Komplexit{\"a}t meistern}, EDITOR = {Pl{\"o}dereder, Erhard and Grunske, Lars and Schneider, Eric and Ull, Dominik}, PAGES = {55}, SERIES = {Lecture Notes in Informatics}, VOLUME = {P-232}, ADDRESS = {Stuttgart, Deutschland}, }
Endnote
%0 Conference Proceedings %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Big Text: von Sprache zu Wissen : %G deu %U http://hdl.handle.net/11858/00-001M-0000-0024-54D4-A %D 2014 %B 44. Jahrestagung der Gesellschaft für Informatik %Z date of event: 2014-09-22 - 2014-09-26 %C Stuttgart, Deutschland %B Informatik 2014: Big Data - Komplexität meistern %E Plödereder, Erhard; Grunske, Lars; Schneider, Eric; Ull, Dominik %P 55 %I GI %@ 978-388579626-8 %B Lecture Notes in Informatics %N P-232
[167]
H. Wu, J. Vreeken, N. Tatti, and N. Ramakrishnan, “Uncovering the Plot: Detecting Surprising Coalitions of Entities in Multi-relational Schemas,” Data Mining and Knowledge Discovery, vol. 28, no. 5–6, 2014.
Export
BibTeX
@article{wu:14:plots, TITLE = {Uncovering the Plot: {Detecting} Surprising Coalitions of Entities in Multi-relational Schemas}, AUTHOR = {Wu, Hao and Vreeken, Jilles and Tatti, Nikolaj and Ramakrishnan, Naren}, LANGUAGE = {eng}, DOI = {10.1007/s10618-014-0370-1}, PUBLISHER = {Springer}, ADDRESS = {London}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, JOURNAL = {Data Mining and Knowledge Discovery}, VOLUME = {28}, NUMBER = {5-6}, PAGES = {1398--1428}, }
Endnote
%0 Journal Article %A Wu, Hao %A Vreeken, Jilles %A Tatti, Nikolaj %A Ramakrishnan, Naren %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Uncovering the Plot: Detecting Surprising Coalitions of Entities in Multi-relational Schemas : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53B7-5 %R 10.1007/s10618-014-0370-1 %7 2014-07-22 %D 2014 %J Data Mining and Knowledge Discovery %V 28 %N 5-6 %& 1398 %P 1398 - 1428 %I Springer %C London
[168]
M. Yahya, S. E. Whang, R. Gupta, and A. Halevy, “ReNoun: Fact Extraction for Nominal Attributes,” in The 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP 2014), Doha, Qatar, 2014.
Abstract
Search engines are increasingly relying on large knowledge bases of facts to provide direct answers to users' queries. However, the construction of these knowledge bases is largely manual and does not scale to the long and heavy tail of facts. Open information extraction tries to address this challenge, but typically assumes that facts are expressed with verb phrases, and therefore has had difficulty extracting facts for noun‐based relations. We describe ReNoun, an open information extraction system that complements previous efforts by focusing on nominal attributes and on the long tail. ReNoun's approach is based on leveraging a large ontology of noun attributes mined from a text corpus and from user queries. ReNoun creates a seed set of training data by using specialized patterns and requiring that the facts mention an attribute in the ontology. ReNoun then generalizes from this seed set to produce a much larger set of extractions that are then scored. We describe experiments that show that we extract facts with high precision and for attributes that cannot be extracted with verb‐based techniques.
Export
BibTeX
@inproceedings{YahyaSRA14, TITLE = {{ReNoun}: Fact Extraction for Nominal Attributes}, AUTHOR = {Yahya, Mohamed and Whang, Steven Euijong and Gupta, Rahul and Halevy, Alon}, ISBN = {978-1-937284-96-1}, PUBLISHER = {ACL}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014-10}, ABSTRACT = {Search engines are increasingly relying on large knowledge bases of facts to provide direct answers to users' queries. However, the construction of these knowledge bases is largely manual and does not scale to the long and heavy tail of facts. Open information extraction tries to address this challenge, but typically assumes that facts are expressed with verb phrases, and therefore has had difficulty extracting facts for noun-based relations. We describe ReNoun, an open information extraction system that complements previous efforts by focusing on nominal attributes and on the long tail. ReNoun's approach is based on leveraging a large ontology of noun attributes mined from a text corpus and from user queries. ReNoun creates a seed set of training data by using specialized patterns and requiring that the facts mention an attribute in the ontology. ReNoun then generalizes from this seed set to produce a much larger set of extractions that are then scored. We describe experiments that show that we extract facts with high precision and for attributes that cannot be extracted with verb-based techniques.}, BOOKTITLE = {The 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP 2014)}, PAGES = {325--335}, ADDRESS = {Doha, Qatar}, }
Endnote
%0 Conference Proceedings %A Yahya, Mohamed %A Whang, Steven Euijong %A Gupta, Rahul %A Halevy, Alon %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T ReNoun: Fact Extraction for Nominal Attributes : %U http://hdl.handle.net/11858/00-001M-0000-0024-2589-7 %D 2014 %B 2014 Conference on Empirical Methods in Natural Language Processing %Z date of event: 2014-10-25 - 2014-10-29 %C Doha, Qatar %X Search engines are increasingly relying on large knowledge bases of facts to provide direct answers to users' queries. However, the construction of these knowledge bases is largely manual and does not scale to the long and heavy tail of facts. Open information extraction tries to address this challenge, but typically assumes that facts are expressed with verb phrases, and therefore has had difficulty extracting facts for noun‐based relations. We describe ReNoun, an open information extraction system that complements previous efforts by focusing on nominal attributes and on the long tail. ReNoun's approach is based on leveraging a large ontology of noun attributes mined from a text corpus and from user queries. ReNoun creates a seed set of training data by using specialized patterns and requiring that the facts mention an attribute in the ontology. ReNoun then generalizes from this seed set to produce a much larger set of extractions that are then scored. We describe experiments that show that we extract facts with high precision and for attributes that cannot be extracted with verb‐based techniques. %B The 2014 Conference on Empirical Methods in Natural Language Processing %P 325 - 335 %I ACL %@ 978-1-937284-96-1 %U http://emnlp2014.org/papers/pdf/EMNLP2014038.pdf
[169]
M. A. Yosef, M. Spaniol, and G. Weikum, “AIDArabic: A Named-entity Disambiguation Framework for Arabic Text,” in The EMNLP 2014 Workshop on Arabic Natural Language Processing (ANLP 2014), Dohar, Qatar, 2014.
Export
BibTeX
@inproceedings{mamir:2014:aidarabic, TITLE = {{AIDArabic}: A Named-entity Disambiguation Framework for {Arabic} Text}, AUTHOR = {Yosef, Mohamed Amir and Spaniol, Marc and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-937284-96-1}, PUBLISHER = {ACL}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {The EMNLP 2014 Workshop on Arabic Natural Language Processing (ANLP 2014)}, PAGES = {187--195}, EID = {W14-3626}, ADDRESS = {Dohar, Qatar}, }
Endnote
%0 Conference Proceedings %A Yosef, Mohamed Amir %A Spaniol, Marc %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T AIDArabic: A Named-entity Disambiguation Framework for Arabic Text : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-548F-A %D 2014 %B The EMNLP 2014 Workshop on Arabic Natural Language Processing %Z date of event: 2014-10-25 - 2014-10-25 %C Dohar, Qatar %B The EMNLP 2014 Workshop on Arabic Natural Language Processing %P 187 - 195 %Z sequence number: W14-3626 %I ACL %@ 978-1-937284-96-1 %U http://www.aclweb.org/anthology/W14-3626
[170]
M. A. Yosef, J. Hoffart, Y. Ibrahim, A. Boldyrev, and G. Weikum, “Adapting AIDA for Tweets,” in Proceedings of the 4th Workshop on Making Sense of Microposts, Seoul, Korea, 2014.
Export
BibTeX
@inproceedings{mamir:2014:aida-for-tweets, TITLE = {Adapting {AIDA} for Tweets}, AUTHOR = {Yosef, Mohamed Amir and Hoffart, Johannes and Ibrahim, Yusra and Boldyrev, Artem and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {1613-0073}, URL = {urn:nbn:de:0074-1141-0}, PUBLISHER = {CEUR-WS.org}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the 4th Workshop on Making Sense of Microposts}, EDITOR = {Rowe, Matthew and Stankovic, Milan and Dadzie, Aba-Sah}, PAGES = {68--69}, SERIES = {CEUR Workshop Proceedings}, VOLUME = {1141}, ADDRESS = {Seoul, Korea}, }
Endnote
%0 Conference Proceedings %A Yosef, Mohamed Amir %A Hoffart, Johannes %A Ibrahim, Yusra %A Boldyrev, Artem %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Adapting AIDA for Tweets : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-54AB-8 %D 2014 %B 4th Workshop on Making Sense of Microposts %Z date of event: 2014-04-07 - 2014-04-07 %C Seoul, Korea %B Proceedings of the 4th Workshop on Making Sense of Microposts %E Rowe, Matthew; Stankovic, Milan; Dadzie, Aba-Sah %P 68 - 69 %I CEUR-WS.org %B CEUR Workshop Proceedings %N 1141 %@ false %U http://ceur-ws.org/Vol-1141/paper_15.pdf
[171]
A. Zimek, I. Assent, and J. Vreeken, “Frequent Pattern Mining Algorithms for Data Clustering,” in Frequent Pattern Mining, New York, NY: Springer, 2014.
Export
BibTeX
@incollection{zimek:14:clustering, TITLE = {Frequent Pattern Mining Algorithms for Data Clustering}, AUTHOR = {Zimek, Arthur and Assent, Ira and Vreeken, Jilles}, LANGUAGE = {eng}, ISBN = {978-3-319-07820-5}, DOI = {10.1007/978-3-319-07821-2_16}, PUBLISHER = {Springer}, ADDRESS = {New York, NY}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, BOOKTITLE = {Frequent Pattern Mining}, EDITOR = {Aggarwal, Charu C. and Han, Jiawei}, PAGES = {403--423}, }
Endnote
%0 Book Section %A Zimek, Arthur %A Assent, Ira %A Vreeken, Jilles %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Frequent Pattern Mining Algorithms for Data Clustering : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-53BD-A %R 10.1007/978-3-319-07821-2_16 %D 2014 %B Frequent Pattern Mining %E Aggarwal, Charu C.; Han, Jiawei %P 403 - 423 %I Springer %C New York, NY %@ 978-3-319-07820-5
[172]
T. Zinchenko, “Redescription Mining Over non-Binary Data Sets Using Decision Trees,” Universität des Saarlandes, Saarbrücken, 2014.
Export
BibTeX
@mastersthesis{ZinchenkoMaster2014, TITLE = {Redescription Mining Over non-Binary Data Sets Using Decision Trees}, AUTHOR = {Zinchenko, Tetiana}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2014}, MARGINALMARK = {$\bullet$}, DATE = {2014}, }
Endnote
%0 Thesis %A Zinchenko, Tetiana %Y Miettinen, Pauli %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Redescription Mining Over non-Binary Data Sets Using Decision Trees : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-B73A-5 %I Universität des Saarlandes %C Saarbrücken %D 2014 %P X, 118 p. %V master %9 master
2013
[173]
E. Aksehirli, B. Goethals, E. Müller, and J. Vreeken, “Cartification: A Neighborhood Preserving Transformation for Mining High Dimensional Data,” in IEEE 13th International Conference on Data Mining (ICDM 2013), Dallas, TX, USA, 2013.
Export
BibTeX
@inproceedings{Aksehirli2013a, TITLE = {Cartification: A Neighborhood Preserving Transformation for Mining High Dimensional Data}, AUTHOR = {Aksehirli, Emin and Goethals, Bart and M{\"u}ller, Emmanuel and Vreeken, Jilles}, LANGUAGE = {eng}, DOI = {10.1109/ICDM.2013.146}, LOCALID = {Local-ID: 9972B38173345D64C1257C600054DB8E-Aksehirli2013a}, PUBLISHER = {IEEE}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {IEEE 13th International Conference on Data Mining (ICDM 2013)}, EDITOR = {Karypis, George and Xiong, Hui}, PAGES = {937--942}, ADDRESS = {Dallas, TX, USA}, }
Endnote
%0 Conference Proceedings %A Aksehirli, Emin %A Goethals, Bart %A Müller, Emmanuel %A Vreeken, Jilles %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Cartification: A Neighborhood Preserving Transformation for Mining High Dimensional Data : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-19EA-5 %R 10.1109/ICDM.2013.146 %F OTHER: Local-ID: 9972B38173345D64C1257C600054DB8E-Aksehirli2013a %D 2013 %B 13th International Conference on Data Mining %Z date of event: 2013-12-07 - 2013-12-10 %C Dallas, TX, USA %B IEEE 13th International Conference on Data Mining %E Karypis, George; Xiong, Hui %P 937 - 942 %I IEEE
[174]
F. Alvanaki, E. Ilieva, S. Michel, and A. Stupar, “Interesting Event Detection through Hall of Fame Rankings,” in Proceedings of the ACM SIGMOD Workshop on Databases and Social Networks (DBSocial 2013), New York, NY, USA, 2013.
Abstract
Everything is relative. Cars are compared by gas per mile, websites by page rank, students based on GPA, scientists by number of publications, and celebrities by beauty or wealth. In this paper, we study the characteristics of such entity rankings based on a set of rankings obtained from a popular Web portal. The obtained insights are integrated in our approach, coined Pantheon. Pantheon maintains sets of top-k rankings and reports identified changes in a way that appeals to users, using a novel combination of different characteristics like competitiveness, information entropy, and scale of change. Entity rankings are assembled by combining entity type attributes with data-driven categorical constraints and sorting criteria on numeric attributes. We report on the results of an experimental evaluation using real-world data obtained from a basketball statistics website.
Export
BibTeX
@inproceedings{Avlanaki2013b, TITLE = {Interesting Event Detection through Hall of Fame Rankings}, AUTHOR = {Alvanaki, Foteini and Ilieva, Evica and Michel, Sebastian and Stupar, Aleksandar}, LANGUAGE = {eng}, ISBN = {978-1-4503-2191-4}, DOI = {10.1145/2484702.2484704}, LOCALID = {Local-ID: BCF76B7E62BA3435C1257B9700501576-Avlanaki2013b}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {Everything is relative. Cars are compared by gas per mile, websites by page rank, students based on GPA, scientists by number of publications, and celebrities by beauty or wealth. In this paper, we study the characteristics of such entity rankings based on a set of rankings obtained from a popular Web portal. The obtained insights are integrated in our approach, coined Pantheon. Pantheon maintains sets of top-k rankings and reports identified changes in a way that appeals to users, using a novel combination of different characteristics like competitiveness, information entropy, and scale of change. Entity rankings are assembled by combining entity type attributes with data-driven categorical constraints and sorting criteria on numeric attributes. We report on the results of an experimental evaluation using real-world data obtained from a basketball statistics website.}, BOOKTITLE = {Proceedings of the ACM SIGMOD Workshop on Databases and Social Networks (DBSocial 2013)}, EDITOR = {LeFevre, Kristen and Machanavajjhala, Ashwin and Silberstein, Adam}, PAGES = {7--12}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %A Alvanaki, Foteini %A Ilieva, Evica %A Michel, Sebastian %A Stupar, Aleksandar %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Interesting Event Detection through Hall of Fame Rankings : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-3A8A-C %R 10.1145/2484702.2484704 %F OTHER: Local-ID: BCF76B7E62BA3435C1257B9700501576-Avlanaki2013b %D 2013 %B ACM SIGMOD Workshop on Databases and Social Networks %Z date of event: 2013-06-22 - 2013-06-27 %C New York, NY, USA %X Everything is relative. Cars are compared by gas per mile, websites by page rank, students based on GPA, scientists by number of publications, and celebrities by beauty or wealth. In this paper, we study the characteristics of such entity rankings based on a set of rankings obtained from a popular Web portal. The obtained insights are integrated in our approach, coined Pantheon. Pantheon maintains sets of top-k rankings and reports identified changes in a way that appeals to users, using a novel combination of different characteristics like competitiveness, information entropy, and scale of change. Entity rankings are assembled by combining entity type attributes with data-driven categorical constraints and sorting criteria on numeric attributes. We report on the results of an experimental evaluation using real-world data obtained from a basketball statistics website. %B Proceedings of the ACM SIGMOD Workshop on Databases and Social Networks %E LeFevre, Kristen; Machanavajjhala, Ashwin; Silberstein, Adam %P 7 - 12 %I ACM %@ 978-1-4503-2191-4
[175]
F. Alvanaki and S. Michel, “Scalable, Continuous Tracking of Tag Co-occurrences Between Short Sets Using (Almost) Disjoint Tag Partitions,” in Proceedings of the ACM SIGMOD Workshop on Databases and Social Networks (DBSocial 2013), New York, NY, USA, 2013.
Abstract
In this work we consider the continuous computation of set correlations over a stream of set-valued attributes, such as Tweets and their hashtags, social annotations of blog posts obtained through RSS, or updates to set-valued attributes of databases. In order to compute tag correlations in a distributed fashion, all necessary information has to be present at the computing node(s). Our approach makes use of a partitioning scheme based on set covers for efficient and replication-lean information flow. We report on the results of a preliminary performance evaluation using Tweets obtained through Twitter's streaming API.
Export
BibTeX
@inproceedings{Avlanaki2013a, TITLE = {Scalable, Continuous Tracking of Tag Co-occurrences Between Short Sets Using (Almost) Disjoint Tag Partitions}, AUTHOR = {Alvanaki, Foteini and Michel, Sebastian}, LANGUAGE = {eng}, ISBN = {978-1-4503-2191-4}, DOI = {10.1145/2484702.2484705}, LOCALID = {Local-ID: 305767E5408759CFC1257B97004FACE2-Avlanaki2013a}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {In this work we consider the continuous computation of set correlations over a stream of set-valued attributes, such as Tweets and their hashtags, social annotations of blog posts obtained through RSS, or updates to set-valued attributes of databases. In order to compute tag correlations in a distributed fashion, all necessary information has to be present at the computing node(s). Our approach makes use of a partitioning scheme based on set covers for efficient and replication-lean information flow. We report on the results of a preliminary performance evaluation using Tweets obtained through Twitter's streaming API.}, BOOKTITLE = {Proceedings of the ACM SIGMOD Workshop on Databases and Social Networks (DBSocial 2013)}, EDITOR = {LeFevre, Kristen and Machanavajjhala, Ashwin and Silberstein, Adam}, PAGES = {49--54}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %A Alvanaki, Foteini %A Michel, Sebastian %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Scalable, Continuous Tracking of Tag Co-occurrences Between Short Sets Using (Almost) Disjoint Tag Partitions : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-3A81-D %R 10.1145/2484702.2484705 %F OTHER: Local-ID: 305767E5408759CFC1257B97004FACE2-Avlanaki2013a %D 2013 %B ACM SIGMOD Workshop on Databases and Social Networks %Z date of event: 2013-06-13 - 2013-06-13 %C New York, NY, USA %X In this work we consider the continuous computation of set correlations over a stream of set-valued attributes, such as Tweets and their hashtags, social annotations of blog posts obtained through RSS, or updates to set-valued attributes of databases. In order to compute tag correlations in a distributed fashion, all necessary information has to be present at the computing node(s). Our approach makes use of a partitioning scheme based on set covers for efficient and replication-lean information flow. We report on the results of a preliminary performance evaluation using Tweets obtained through Twitter's streaming API. %K Distributed Stream Processing, Tags, Twitter, Correlation, Continuous %B Proceedings of the ACM SIGMOD Workshop on Databases and Social Networks %E LeFevre, Kristen; Machanavajjhala, Ashwin; Silberstein, Adam %P 49 - 54 %I ACM %@ 978-1-4503-2191-4
[176]
F. Alvanaki and S. Michel, “A Thin Monitoring Layer for Top-k Aggregation Queries over a Database,” in 7th International Workshop on Ranking in Databases (DBRank 2013), Riva del Garda, Italy, 2013.
Export
BibTeX
@inproceedings{AlvanakiMichel2013c, TITLE = {A Thin Monitoring Layer for Top-k Aggregation Queries over a Database}, AUTHOR = {Alvanaki, Foteini and Michel, Sebastian}, LANGUAGE = {eng}, ISBN = {978-1-4503-2497-7}, DOI = {10.1145/2524828.2524831}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {7th International Workshop on Ranking in Databases (DBRank 2013)}, PAGES = {1--6}, EID = {3}, ADDRESS = {Riva del Garda, Italy}, }
Endnote
%0 Conference Proceedings %A Alvanaki, Foteini %A Michel, Sebastian %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T A Thin Monitoring Layer for Top-k Aggregation Queries over a Database : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-CCDD-E %R 10.1145/2524828.2524831 %D 2013 %B 7th International Workshop on Ranking in Databases %Z date of event: 2013-08-30 - 2013-08-30 %C Riva del Garda, Italy %B 7th International Workshop on Ranking in Databases %P 1 - 6 %Z sequence number: 3 %I ACM %@ 978-1-4503-2497-7
[177]
A. Anand, “Indexing Methods for Web Archives,” Universität des Saarlandes, Saarbrücken, 2013.
Abstract
There have been numerous efforts recently to digitize previously published content and preserving born-digital content leading to the widespread growth of large text repositories. Web archives are such continuously growing text collections which contain versions of documents spanning over long time periods. Web archives present many opportunities for historical, cultural and political analyses. Consequently there is a growing need for tools which can efficiently access and search them. In this work, we are interested in indexing methods for supporting text-search workloads over web archives like time-travel queries and phrase queries. To this end we make the following contributions: Time-travel queries are keyword queries with a temporal predicate, e.g., mpii saarland @ [06/2009], which return versions of documents in the past. We introduce a novel index organization strategy, called index sharding, for efficiently supporting time-travel queries without incurring additional index-size blowup. We also propose index-maintenance approaches which scale to such continuously growing collections. We develop query-optimization techniques for time-travel queries called partition selection which maximizes recall at any given query-execution stage. We propose indexing methods to support phrase queries, e.g., to be or not to be that is the question. We index multi-word sequences and devise novel queryoptimization methods over the indexed sequences to efficiently answer phrase queries. We demonstrate the superior performance of our approaches over existing methods by extensive experimentation on real-world web archives.
Export
BibTeX
@phdthesis{Anand2013, TITLE = {Indexing Methods for Web Archives}, AUTHOR = {Anand, Avishek}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {There have been numerous efforts recently to digitize previously published content and preserving born-digital content leading to the widespread growth of large text repositories. Web archives are such continuously growing text collections which contain versions of documents spanning over long time periods. Web archives present many opportunities for historical, cultural and political analyses. Consequently there is a growing need for tools which can efficiently access and search them. In this work, we are interested in indexing methods for supporting text-search workloads over web archives like time-travel queries and phrase queries. To this end we make the following contributions: Time-travel queries are keyword queries with a temporal predicate, e.g., mpii saarland @ [06/2009], which return versions of documents in the past. We introduce a novel index organization strategy, called index sharding, for efficiently supporting time-travel queries without incurring additional index-size blowup. We also propose index-maintenance approaches which scale to such continuously growing collections. We develop query-optimization techniques for time-travel queries called partition selection which maximizes recall at any given query-execution stage. We propose indexing methods to support phrase queries, e.g., to be or not to be that is the question. We index multi-word sequences and devise novel queryoptimization methods over the indexed sequences to efficiently answer phrase queries. We demonstrate the superior performance of our approaches over existing methods by extensive experimentation on real-world web archives.}, }
Endnote
%0 Thesis %A Anand, Avishek %Y Berberich, Klaus %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Indexing Methods for Web Archives : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0026-CB4B-0 %I Universität des Saarlandes %C Saarbrücken %D 2013 %V phd %9 phd %X There have been numerous efforts recently to digitize previously published content and preserving born-digital content leading to the widespread growth of large text repositories. Web archives are such continuously growing text collections which contain versions of documents spanning over long time periods. Web archives present many opportunities for historical, cultural and political analyses. Consequently there is a growing need for tools which can efficiently access and search them. In this work, we are interested in indexing methods for supporting text-search workloads over web archives like time-travel queries and phrase queries. To this end we make the following contributions: Time-travel queries are keyword queries with a temporal predicate, e.g., mpii saarland @ [06/2009], which return versions of documents in the past. We introduce a novel index organization strategy, called index sharding, for efficiently supporting time-travel queries without incurring additional index-size blowup. We also propose index-maintenance approaches which scale to such continuously growing collections. We develop query-optimization techniques for time-travel queries called partition selection which maximizes recall at any given query-execution stage. We propose indexing methods to support phrase queries, e.g., to be or not to be that is the question. We index multi-word sequences and devise novel queryoptimization methods over the indexed sequences to efficiently answer phrase queries. We demonstrate the superior performance of our approaches over existing methods by extensive experimentation on real-world web archives. %U http://scidok.sulb.uni-saarland.de/volltexte/2013/5531/http://scidok.sulb.uni-saarland.de/doku/lic_ohne_pod.php?la=de
[178]
F. Ansari, “A Comparative Study of MAX-SAT Solving Techniques with Soft and Hard Rules,” Universität des Saarlandes, Saarbrücken, 2013.
Export
BibTeX
@mastersthesis{AnsariMastersThesis2013, TITLE = {A Comparative Study of {MAX}--{SAT} Solving Techniques with Soft and Hard Rules}, AUTHOR = {Ansari, Farzaneh}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, }
Endnote
%0 Thesis %A Ansari, Farzaneh %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T A Comparative Study of MAX-SAT Solving Techniques with Soft and Hard Rules : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5C6B-5 %I Universität des Saarlandes %C Saarbrücken %D 2013 %V master %9 master
[179]
R. Awadallah, M. Ramanath, and G. Weikum, “OpinioNetIt: A Structured and Faceted Knowledge-base of Opinions,” in Proceedings of the 12th IEEE International Conference on Data Mining Workshops (ICDMW 2012), Brussels, Belgium, 2013.
Export
BibTeX
@inproceedings{Awadallah2012i, TITLE = {{OpinioNetIt}: A Structured and Faceted Knowledge-base of Opinions}, AUTHOR = {Awadallah, Rawia and Ramanath, Maya and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4673-5164-5}, DOI = {10.1109/ICDMW.2012.49}, LOCALID = {Local-ID: 04756AF15FFC805BC1257B12002D6750-Awadallah2012i}, PUBLISHER = {IEEE}, YEAR = {2012}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {Proceedings of the 12th IEEE International Conference on Data Mining Workshops (ICDMW 2012)}, EDITOR = {Vreeken, Jilles and Ling, Charles and Javeed Zaki, Mohammed and Siebes, Arno and Yu, Jeffrey Xu and Goethals, Bart and Webb, Geoffrey I. and Wu, Xindong}, PAGES = {878 --881}, ADDRESS = {Brussels, Belgium}, }
Endnote
%0 Conference Proceedings %A Awadallah, Rawia %A Ramanath, Maya %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T OpinioNetIt: A Structured and Faceted Knowledge-base of Opinions : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-198C-C %F OTHER: Local-ID: 04756AF15FFC805BC1257B12002D6750-Awadallah2012i %R 10.1109/ICDMW.2012.49 %D 2013 %B 12th IEEE International Conference on Data Mining Workshops %Z date of event: 2012-12-10 - 2012-12-10 %C Brussels, Belgium %B Proceedings of the 12th IEEE International Conference on Data Mining Workshops %E Vreeken, Jilles; Ling, Charles; Javeed Zaki, Mohammed; Siebes, Arno; Yu, Jeffrey Xu; Goethals, Bart; Webb, Geoffrey I.; Wu, Xindong %P 878 - 881 %I IEEE %@ 978-1-4673-5164-5
[180]
S. Bedathur, K. Berberich, I. Patlakas, P. Triantafillou, and G. Weikum, “D-Hive: Data Bees Pollinating RDF, Text, and Time,” in Online Proceedings of Sixth Biennial Conference on Innovative Data Systems Research (CIDR 2013), Asilomar, CA, USA, 2013.
Export
BibTeX
@inproceedings{Bedathur2013, TITLE = {{D-Hive}: Data Bees Pollinating {RDF}, Text, and Time}, AUTHOR = {Bedathur, Srikanta and Berberich, Klaus and Patlakas, Ioannis and Triantafillou, Peter and Weikum, Gerhard}, LANGUAGE = {eng}, LOCALID = {Local-ID: D3BAD8992F713EB5C1257B10002BB930-Bedathur2013}, PUBLISHER = {cidrdb.org}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Online Proceedings of Sixth Biennial Conference on Innovative Data Systems Research (CIDR 2013)}, EID = {73}, ADDRESS = {Asilomar, CA, USA}, }
Endnote
%0 Conference Proceedings %A Bedathur, Srikanta %A Berberich, Klaus %A Patlakas, Ioannis %A Triantafillou, Peter %A Weikum, Gerhard %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T D-Hive: Data Bees Pollinating RDF, Text, and Time : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-3A7C-A %F OTHER: Local-ID: D3BAD8992F713EB5C1257B10002BB930-Bedathur2013 %D 2013 %B Sixth Biennial Conference on Innovative Data Systems Research %Z date of event: 2013-01-06 - 2013-01-09 %C Asilomar, CA, USA %B Online Proceedings of Sixth Biennial Conference on Innovative Data Systems Research %Z sequence number: 73 %I cidrdb.org %U http://www.cidrdb.org/cidr2013/Papers/CIDR13_Paper73.pdf
[181]
K. Beedkar, L. Del Corro, and R. Gemulla, “Fully Parallel Inference in Markov Logic Networks,” in 15th GI-Symposium Database Systems for Business, Technology and Web (BTW 2013), Magdeburg, Germany, 2013.
Export
BibTeX
@inproceedings{bcg-btw13, TITLE = {Fully Parallel Inference in {M}arkov Logic Networks}, AUTHOR = {Beedkar, Kaustubh and Del Corro, Luciano and Gemulla, Rainer}, LANGUAGE = {eng}, ISBN = {978-3-88579-608-4}, LOCALID = {Local-ID: BB228B55B464BC71C1257B08003A6698-bcg-btw13}, PUBLISHER = {GI}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {15th GI-Symposium Database Systems for Business, Technology and Web (BTW 2013)}, EDITOR = {Saake, Gunther}, ADDRESS = {Magdeburg, Germany}, }
Endnote
%0 Conference Proceedings %A Beedkar, Kaustubh %A Del Corro, Luciano %A Gemulla, Rainer %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Fully Parallel Inference in Markov Logic Networks : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1989-1 %F OTHER: Local-ID: BB228B55B464BC71C1257B08003A6698-bcg-btw13 %D 2013 %B 15th GI-Symposium Database Systems for Business, Technology and Web %Z date of event: 2013-03-11 - 2013-03-15 %C Magdeburg, Germany %B 15th GI-Symposium Database Systems for Business, Technology and Web %E Saake, Gunther %I GI %@ 978-3-88579-608-4 %U http://www.btw-2013.de/proceedings/Fully%20Parallel%20Inference%20in%20Markov%20Logic%20Networks.pdf
[182]
R. Belet, “Leveraging Independence and Locality for Random Forests in a Distributed Environment,” Universität des Saarlandes, Saarbrücken, 2013.
Abstract
With the emergence of big data, inducting regression trees on very large data sets became a common data mining task. Even though centralized algorithms for computing ensembles of Classification/Regression trees are a well studied machine learning/data mining problem, their distributed versions still raise scalability, efficiency and accuracy issues. Most state of the art tree learning algorithms require data to reside in memory on a single machine. Adopting this approach for trees on big data is not feasible as the limited resources provided by only one machine lead to scalability problems. While more scalable implementations of tree learning algorithms have been proposed, they typically require specialized parallel computing architectures rendering those algorithms complex and error-prone. In this thesis we will introduce two approaches to computing ensembles of regression trees on very large training data sets using the MapReduce framework as an underlying tool. The first approach employs the entire MapReduce cluster to parallely and fully distributedly learn tree ensembles. The second approach exploits locality and independence in the tree learning process.
Export
BibTeX
@mastersthesis{Belet2013, TITLE = {Leveraging Independence and Locality for Random Forests in a Distributed Environment}, AUTHOR = {Belet, Razvan}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {With the emergence of big data, inducting regression trees on very large data sets became a common data mining task. Even though centralized algorithms for computing ensembles of Classification/Regression trees are a well studied machine learning/data mining problem, their distributed versions still raise scalability, efficiency and accuracy issues. Most state of the art tree learning algorithms require data to reside in memory on a single machine. Adopting this approach for trees on big data is not feasible as the limited resources provided by only one machine lead to scalability problems. While more scalable implementations of tree learning algorithms have been proposed, they typically require specialized parallel computing architectures rendering those algorithms complex and error-prone. In this thesis we will introduce two approaches to computing ensembles of regression trees on very large training data sets using the MapReduce framework as an underlying tool. The first approach employs the entire MapReduce cluster to parallely and fully distributedly learn tree ensembles. The second approach exploits locality and independence in the tree learning process.}, }
Endnote
%0 Thesis %A Belet, Razvan %Y Weikum, Gerhard %A referee: Schenkel, Ralf %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Leveraging Independence and Locality for Random Forests in a Distributed Environment : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-97B8-0 %I Universität des Saarlandes %C Saarbrücken %D 2013 %P 132 p. %V master %9 master %X With the emergence of big data, inducting regression trees on very large data sets became a common data mining task. Even though centralized algorithms for computing ensembles of Classification/Regression trees are a well studied machine learning/data mining problem, their distributed versions still raise scalability, efficiency and accuracy issues. Most state of the art tree learning algorithms require data to reside in memory on a single machine. Adopting this approach for trees on big data is not feasible as the limited resources provided by only one machine lead to scalability problems. While more scalable implementations of tree learning algorithms have been proposed, they typically require specialized parallel computing architectures rendering those algorithms complex and error-prone. In this thesis we will introduce two approaches to computing ensembles of regression trees on very large training data sets using the MapReduce framework as an underlying tool. The first approach employs the entire MapReduce cluster to parallely and fully distributedly learn tree ensembles. The second approach exploits locality and independence in the tree learning process.
[183]
P. Bellot, A. Doucet, S. Geva, S. Gurajada, J. Kamps, G. Kazai, M. Koolen, A. Mishra, V. Moriceau, J. Mothe, M. Preminger, E. SanJuan, R. Schenkel, X. Tannier, M. Theobald, M. Trappett, and Q. Wang, “Overview of INEX 2013,” in Information Access Evaluation : Multilinguality, Multimodality, and Visualization (CLEF 2013), Valencia, Spain, 2013.
Abstract
INEX investigates focused retrieval from structured docu- ments by providing large test collections of structured documents, uni- form evaluation measures, and a forum for organizations to compare their results. This paper reports on the INEX 2013 evaluation campaign, which consisted of a four activities addressing three themes: searching profes- sional and user generated data (Social Book Search track); searching structured or semantic data (Linked Data track); and focused retrieval (Snippet Retrieval and Tweet Contextualization tracks). INEX 2013 was an exciting year for INEX in which we consolidated the collaboration with (other activities in) CLEF and for the second time ran our work- shop as part of the CLEF labs in order to facilitate knowledge transfer between the evaluation forums. This paper gives an overview of all the INEX 2013 tracks, their aims and task, the built test-collections, and gives an initial analysis of the results.
Export
BibTeX
@inproceedings{INEX-Kamps2012, TITLE = {Overview of {INEX} 2013}, AUTHOR = {Bellot, Patrice and Doucet, Antoine and Geva, Shlomo and Gurajada, Sairam and Kamps, Jaap and Kazai, Gabriella and Koolen, Marijn and Mishra, Arunav and Moriceau, Veronique and Mothe, Josiane and Preminger, Michael and SanJuan, Eric and Schenkel, Ralf and Tannier, Xavier and Theobald, Martin and Trappett, Matthew and Wang, Qiuyue}, LANGUAGE = {eng}, ISSN = {0302-9743}, ISBN = {978-3-642-40801-4}, DOI = {10.1007/978-3-642-40802-1_27}, LOCALID = {Local-ID: E0D7037ADFDDA1C6C1257BBB003D49E2-INEX-Kamps2012}, PUBLISHER = {Springer}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {INEX investigates focused retrieval from structured docu- ments by providing large test collections of structured documents, uni- form evaluation measures, and a forum for organizations to compare their results. This paper reports on the INEX 2013 evaluation campaign, which consisted of a four activities addressing three themes: searching profes- sional and user generated data (Social Book Search track); searching structured or semantic data (Linked Data track); and focused retrieval (Snippet Retrieval and Tweet Contextualization tracks). INEX 2013 was an exciting year for INEX in which we consolidated the collaboration with (other activities in) CLEF and for the second time ran our work- shop as part of the CLEF labs in order to facilitate knowledge transfer between the evaluation forums. This paper gives an overview of all the INEX 2013 tracks, their aims and task, the built test-collections, and gives an initial analysis of the results.}, BOOKTITLE = {Information Access Evaluation : Multilinguality, Multimodality, and Visualization (CLEF 2013)}, EDITOR = {Forner, Pamela and M{\"u}ller, Henning and Paredes, Roberto and Rosso, Paolo and Stein, Benno}, PAGES = {269--281}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {8138}, ADDRESS = {Valencia, Spain}, }
Endnote
%0 Conference Proceedings %A Bellot, Patrice %A Doucet, Antoine %A Geva, Shlomo %A Gurajada, Sairam %A Kamps, Jaap %A Kazai, Gabriella %A Koolen, Marijn %A Mishra, Arunav %A Moriceau, Veronique %A Mothe, Josiane %A Preminger, Michael %A SanJuan, Eric %A Schenkel, Ralf %A Tannier, Xavier %A Theobald, Martin %A Trappett, Matthew %A Wang, Qiuyue %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations %T Overview of INEX 2013 : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-3A78-1 %R 10.1007/978-3-642-40802-1_27 %F OTHER: Local-ID: E0D7037ADFDDA1C6C1257BBB003D49E2-INEX-Kamps2012 %D 2013 %B 4th International Conference of the CLEF Initiative %Z date of event: 2013-09-23 - 2013-09-26 %C Valencia, Spain %X INEX investigates focused retrieval from structured docu- ments by providing large test collections of structured documents, uni- form evaluation measures, and a forum for organizations to compare their results. This paper reports on the INEX 2013 evaluation campaign, which consisted of a four activities addressing three themes: searching profes- sional and user generated data (Social Book Search track); searching structured or semantic data (Linked Data track); and focused retrieval (Snippet Retrieval and Tweet Contextualization tracks). INEX 2013 was an exciting year for INEX in which we consolidated the collaboration with (other activities in) CLEF and for the second time ran our work- shop as part of the CLEF labs in order to facilitate knowledge transfer between the evaluation forums. This paper gives an overview of all the INEX 2013 tracks, their aims and task, the built test-collections, and gives an initial analysis of the results. %B Information Access Evaluation : Multilinguality, Multimodality, and Visualization %E Forner, Pamela; Müller, Henning; Paredes, Roberto; Rosso, Paolo; Stein, Benno %P 269 - 281 %I Springer %@ 978-3-642-40801-4 %B Lecture Notes in Computer Science %N 8138 %@ false
[184]
P. Bellot, A. Doucet, S. Geva, S. Gurajada, J. Kamps, G. Kazai, M. Koolen, A. Mishra, V. Moriceau, J. Mothe, M. Preminger, E. SanJuan, R. Schenkel, X. Tannier, M. Theobald, M. Trappett, A. Trotman, M. Sanderson, F. Scholer, and Q. Wang, “Report on INEX 2013,” SIGIR Forum, vol. 47, no. 2, 2013.
Export
BibTeX
@article{INEX_SIGIRF2013, TITLE = {Report on {INEX 2013}}, AUTHOR = {Bellot, Patrice and Doucet, Antoine and Geva, Shlomo and Gurajada, Sairam and Kamps, Jaap and Kazai, Gabriella and Koolen, Marijn and Mishra, Arunav and Moriceau, V{\'e}ronique and Mothe, Josiane and Preminger, Michael and SanJuan, Eric and Schenkel, Ralf and Tannier, Xavier and Theobald, Martin and Trappett, Matthew and Trotman, Andrew and Sanderson, Mark and Scholer, Falk and Wang, Qiuyue}, LANGUAGE = {eng}, ISSN = {0163-5840}, DOI = {10.1145/2568388.2568393}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, JOURNAL = {SIGIR Forum}, VOLUME = {47}, NUMBER = {2}, PAGES = {21--32}, }
Endnote
%0 Journal Article %A Bellot, Patrice %A Doucet, Antoine %A Geva, Shlomo %A Gurajada, Sairam %A Kamps, Jaap %A Kazai, Gabriella %A Koolen, Marijn %A Mishra, Arunav %A Moriceau, Véronique %A Mothe, Josiane %A Preminger, Michael %A SanJuan, Eric %A Schenkel, Ralf %A Tannier, Xavier %A Theobald, Martin %A Trappett, Matthew %A Trotman, Andrew %A Sanderson, Mark %A Scholer, Falk %A Wang, Qiuyue %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations %T Report on INEX 2013 : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0019-82A3-0 %R 10.1145/2568388.2568393 %7 2013-12 %D 2013 %J SIGIR Forum %V 47 %N 2 %& 21 %P 21 - 32 %@ false
[185]
K. Berberich and S. Bedathur, “Computing n-gram Statistics in MapReduce,” in Advances in Database Technology (EDBT 2013), Genova, Italy, 2013.
Export
BibTeX
@inproceedings{Berberich2013b, TITLE = {Computing n-gram Statistics in {MapReduce}}, AUTHOR = {Berberich, Klaus and Bedathur, Srikanta}, LANGUAGE = {eng}, ISBN = {978-1-4503-1597-5}, DOI = {10.1145/2452376.2452389}, LOCALID = {Local-ID: 31F260D05B735433C1257B09003B1404-Berberich2013b}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {Advances in Database Technology (EDBT 2013)}, PAGES = {101--112}, ADDRESS = {Genova, Italy}, }
Endnote
%0 Conference Proceedings %A Berberich, Klaus %A Bedathur, Srikanta %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Computing n-gram Statistics in MapReduce : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-19CE-5 %F OTHER: Local-ID: 31F260D05B735433C1257B09003B1404-Berberich2013b %R 10.1145/2452376.2452389 %D 2013 %B 16th International Conference on Extending Database Technology %Z date of event: 2013-03-18 - 2013-03-22 %C Genova, Italy %B Advances in Database Technology %P 101 - 112 %I ACM %@ 978-1-4503-1597-5
[186]
K. Berberich and S. Bedathur, “Temporal Diversification of Search Results,” in SIGIR 2013 Workshop on Time-aware Information Access (TAIA 2013), Dublin, Ireland, 2013.
Export
BibTeX
@inproceedings{Berberich2013g, TITLE = {Temporal Diversification of Search Results}, AUTHOR = {Berberich, Klaus and Bedathur, Srikanta}, LANGUAGE = {eng}, URL = {http://research.microsoft.com/en-us/people/milads/taia2013.proceedings.final.pdf}, LOCALID = {Local-ID: F06E854555530CFBC1257C6E0023BA55-Berberich2013g}, PUBLISHER = {Microsoft Research}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {SIGIR 2013 Workshop on Time-aware Information Access (TAIA 2013)}, EDITOR = {Diaz, Fernando and Dumais, Susan and Radinsky, Kira and de Rijke, Maarten and Shokouhi, Milad}, ADDRESS = {Dublin, Ireland}, }
Endnote
%0 Conference Proceedings %A Berberich, Klaus %A Bedathur, Srikanta %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Temporal Diversification of Search Results : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-3A73-B %F OTHER: Local-ID: F06E854555530CFBC1257C6E0023BA55-Berberich2013g %U http://research.microsoft.com/en-us/people/milads/taia2013.proceedings.final.pdf %D 2013 %B SIGIR 2013 Workshop on Time-aware Information Access %Z date of event: 2013-08-01 - 2013-08-01 %C Dublin, Ireland %B SIGIR 2013 Workshop on Time-aware Information Access %E Diaz, Fernando; Dumais, Susan; Radinsky, Kira; de Rijke, Maarten; Shokouhi, Milad %I Microsoft Research %U http://research.microsoft.com/en-us/people/milads/taia2013.proceedings.final.pdf
[187]
J. Biega, E. Kuzey, and F. M. Suchanek, “Inside YAGO2s: A Transparent Information Extraction Architecture,” in WWW’13, 22nd International Conference on World Wide Web, Rio de Janeiro, Brasil, 2013.
Export
BibTeX
@inproceedings{Biega:2013:IYT:2487788.2487935, TITLE = {Inside {YAGO2s}: A Transparent Information Extraction Architecture}, AUTHOR = {Biega, Joanna and Kuzey, Erdal and Suchanek, Fabian M.}, LANGUAGE = {eng}, ISBN = {978-1-4503-2038-2}, URL = {http://dl.acm.org/citation.cfm?id=2487788.2487935}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {WWW'13, 22nd International Conference on World Wide Web}, EDITOR = {Schwabe, Daniel and Almeida, Virgilio and Glaser, Hartmut and Baeza-Yates, Ricardo and Moon, Sue}, PAGES = {325--328}, ADDRESS = {Rio de Janeiro, Brasil}, }
Endnote
%0 Conference Proceedings %A Biega, Joanna %A Kuzey, Erdal %A Suchanek, Fabian M. %+ Ontologies, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Ontologies, MPI for Informatics, Max Planck Society %T Inside YAGO2s: A Transparent Information Extraction Architecture : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0014-54E3-C %U http://dl.acm.org/citation.cfm?id=2487788.2487935 %D 2013 %B 22nd International Conference on World Wide Web %Z date of event: 2013-05-13 - 2013-05-17 %C Rio de Janeiro, Brasil %K information extraction, ontologies, yago %B WWW'13 %E Schwabe, Daniel; Almeida, Virgilio; Glaser, Hartmut; Baeza-Yates, Ricardo; Moon, Sue %P 325 - 328 %I ACM %@ 978-1-4503-2038-2
[188]
A. Boldyrev, “Dictionary-based Named Entity Recognition,” Universität des Saarlandes, Saarbrücken, 2013.
Export
BibTeX
@mastersthesis{BoldyrevMastersThesis2013, TITLE = {Dictionary-based Named Entity Recognition}, AUTHOR = {Boldyrev, Artem}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, }
Endnote
%0 Thesis %A Boldyrev, Artem %Y Weikum, Gerhard %A referee: Theobalt, Christian %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Computer Graphics, MPI for Informatics, Max Planck Society %T Dictionary-based Named Entity Recognition : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5C74-F %I Universität des Saarlandes %C Saarbrücken %D 2013 %V master %9 master
[189]
E. Cergani and P. Miettinen, “Discovering Relations Using Matrix Factorization Methods,” in CIKM’13, 22nd ACM International Conference on Information & Knowledge Management, San Francisco, CA, USA, 2013.
Abstract
Traditional relation extraction methods work on manually defined relations and typically expect manually labelled extraction patterns for each relation. This strongly limits the scalability of these systems. In Open Relation Extraction (ORE), the relations are identified automatically based on co-occurrences of ``surface relations'' (contexts) and entity pairs. The recently-proposed methods for ORE use partition clustering to find the relations. In this work we propose the use of matrix factorization methods instead of clustering. Specifically, we study Non-Negative Matrix Factorization (NMF) and Boolean Matrix Factorization (BMF). These methods overcome many problems inherent in clustering and perform better than the k-means clustering in our evaluation.
Export
BibTeX
@inproceedings{cergani13discovering, TITLE = {Discovering Relations Using Matrix Factorization Methods}, AUTHOR = {Cergani, Ervina and Miettinen, Pauli}, LANGUAGE = {eng}, ISBN = {978-1-4503-2263-8}, DOI = {10.1145/2505515.2507841}, LOCALID = {Local-ID: B85EF949714E8A6EC1257C6A00608792-cergani13discovering}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {Traditional relation extraction methods work on manually defined relations and typically expect manually labelled extraction patterns for each relation. This strongly limits the scalability of these systems. In Open Relation Extraction (ORE), the relations are identified automatically based on co-occurrences of ``surface relations'' (contexts) and entity pairs. The recently-proposed methods for ORE use partition clustering to find the relations. In this work we propose the use of matrix factorization methods instead of clustering. Specifically, we study Non-Negative Matrix Factorization (NMF) and Boolean Matrix Factorization (BMF). These methods overcome many problems inherent in clustering and perform better than the k-means clustering in our evaluation.}, BOOKTITLE = {CIKM{\textquoteright}13, 22nd ACM International Conference on Information \& Knowledge Management}, EDITOR = {Nejdl, Wolfgang and Pei, Jian and Rastogi, Rajeev}, PAGES = {1549--1552}, ADDRESS = {San Francisco, CA, USA}, }
Endnote
%0 Conference Proceedings %A Cergani, Ervina %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Discovering Relations Using Matrix Factorization Methods : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-19DB-7 %F OTHER: Local-ID: B85EF949714E8A6EC1257C6A00608792-cergani13discovering %R 10.1145/2505515.2507841 %D 2013 %B 22nd ACM International Conference on Information & Knowledge Management %Z date of event: 2013-10-27 - 2013-11-01 %C San Francisco, CA, USA %X Traditional relation extraction methods work on manually defined relations and typically expect manually labelled extraction patterns for each relation. This strongly limits the scalability of these systems. In Open Relation Extraction (ORE), the relations are identified automatically based on co-occurrences of ``surface relations'' (contexts) and entity pairs. The recently-proposed methods for ORE use partition clustering to find the relations. In this work we propose the use of matrix factorization methods instead of clustering. Specifically, we study Non-Negative Matrix Factorization (NMF) and Boolean Matrix Factorization (BMF). These methods overcome many problems inherent in clustering and perform better than the k-means clustering in our evaluation. %B CIKM’13 %E Nejdl, Wolfgang; Pei, Jian; Rastogi, Rajeev %P 1549 - 1552 %I ACM %@ 978-1-4503-2263-8
[190]
D. H. Chau, J. Vreeken, M. van Leeuwen, and C. Faloutsos, Eds., Proceedings of the ACM SIGKDD Full-day Workshop on Interactive Data Exploration and Analytics. ACM, 2013.
Export
BibTeX
@proceedings{Chau2013a, TITLE = {Proceedings of the ACM SIGKDD Full-day Workshop on Interactive Data Exploration and Analytics (IDEA 2013)}, EDITOR = {Chau, Duen Horn and Vreeken, Jilles and van Leeuwen, Matthijs and Faloutsos, Christos}, LANGUAGE = {eng}, ISBN = {978-1-4503-2329-1}, LOCALID = {Local-ID: 1F669F9DC4CC9410C1257C60005593D1-Chau2013a}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, PAGES = {103}, ADDRESS = {Chicago, IL, USA}, }
Endnote
%0 Conference Proceedings %E Chau, Duen Horn %E Vreeken, Jilles %E van Leeuwen, Matthijs %E Faloutsos, Christos %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Proceedings of the ACM SIGKDD Full-day Workshop on Interactive Data Exploration and Analytics : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-19E0-A %F OTHER: Local-ID: 1F669F9DC4CC9410C1257C60005593D1-Chau2013a %@ 978-1-4503-2329-1 %I ACM %D 2013 %B ACM SIGKDD Full-day Workshop on Interactive Data Exploration and Analytics %Z date of event: 2013-08-11 - 2013-08-11 %D 2013 %C Chicago, IL, USA %P 103
[191]
O. Čulo and G. de Melo, “Source-Path-Goal: Investigating the Cross-Linguistic Potential of Frame-Semantic Text Analysis,” Information Technology, vol. 54, no. 3, 2013.
Export
BibTeX
@article{CuloDeMelo2012, TITLE = {Source-Path-Goal: Investigating the Cross-Linguistic Potential of Frame-Semantic Text Analysis}, AUTHOR = {{\v C}ulo, Oliver and de Melo, Gerard}, LANGUAGE = {eng}, ISSN = {1611-2776}, LOCALID = {Local-ID: 4B73EA65B090D965C1257B11002D73A2-CuloDeMelo2012}, PUBLISHER = {Oldenbourg Wissenschaftsverlag}, ADDRESS = {M{\"u}nchen}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, JOURNAL = {Information Technology}, VOLUME = {54}, NUMBER = {3}, PAGES = {147--152}, }
Endnote
%0 Journal Article %A Čulo, Oliver %A de Melo, Gerard %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Source-Path-Goal: Investigating the Cross-Linguistic Potential of Frame-Semantic Text Analysis : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-3A44-6 %F OTHER: Local-ID: 4B73EA65B090D965C1257B11002D73A2-CuloDeMelo2012 %D 2013 %J Information Technology %O it %V 54 %N 3 %& 147 %P 147 - 152 %I Oldenbourg Wissenschaftsverlag %C München %@ false
[192]
M. Daivandy, D. Hünich, R. Jäkel, S. Metzger, R. Müller-Pfefferkorn, and B. Schuller, “Heterogeneous Resource Federation with a Centralized Security Model for Information Extraction,” Journal of Internet Services and Applications, vol. 4, 2013.
Export
BibTeX
@article{MetzgerJISA2012, TITLE = {Heterogeneous Resource Federation with a Centralized Security Model for Information Extraction}, AUTHOR = {Daivandy, Milad and H{\"u}nich, Denis and J{\"a}kel, Rene and Metzger, Steffen and M{\"u}ller-Pfefferkorn, Ralph and Schuller, Bernd}, LANGUAGE = {eng}, ISSN = {1869-0238}, DOI = {10.1186/1869-0238-4-10}, LOCALID = {Local-ID: 9D149AAF29E33BCCC1257B83000D0937-MetzgerJISA2012}, PUBLISHER = {Springer}, ADDRESS = {New York, NY}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, JOURNAL = {Journal of Internet Services and Applications}, VOLUME = {4}, PAGES = {1--14}, EID = {10}, }
Endnote
%0 Journal Article %A Daivandy, Milad %A Hünich, Denis %A Jäkel, Rene %A Metzger, Steffen %A Müller-Pfefferkorn, Ralph %A Schuller, Bernd %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Heterogeneous Resource Federation with a Centralized Security Model for Information Extraction : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0014-6395-C %R 10.1186/1869-0238-4-10 %F OTHER: Local-ID: 9D149AAF29E33BCCC1257B83000D0937-MetzgerJISA2012 %7 2013-03-20 %D 2013 %8 20.03.2013 %J Journal of Internet Services and Applications %V 4 %& 1 %P 1 - 14 %Z sequence number: 10 %I Springer %C New York, NY %@ false %U http://www.jisajournal.com/content/4/1/10
[193]
L. Del Corro and R. Gemulla, “ClausIE: Clause-Based Open Information Extraction,” in WWW’13, 22nd International Conference on World Wide Web, Rio do Janeiro, Brazil, 2013.
Export
BibTeX
@inproceedings{ClausIE, TITLE = {{ClausIE}: Clause-Based Open Information Extraction}, AUTHOR = {Del Corro, Luciano and Gemulla, Rainer}, LANGUAGE = {eng}, ISBN = {978-1-4503-2035-1}, URL = {http://dl.acm.org/citation.cfm?id=2488388.2488420}, LOCALID = {Local-ID: 937BBDB401D54B01C1257B10003FDEFF-ClausIE}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {WWW'13, 22nd International Conference on World Wide Web}, EDITOR = {Schwabe, Daniel and Almeida, Virgilio and Glaser, Hartmut and Baeza-Yates, Ricardo and Moon, Sue}, PAGES = {355--366}, ADDRESS = {Rio do Janeiro, Brazil}, }
Endnote
%0 Conference Proceedings %A Del Corro, Luciano %A Gemulla, Rainer %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T ClausIE: Clause-Based Open Information Extraction : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1A3A-B %F OTHER: Local-ID: 937BBDB401D54B01C1257B10003FDEFF-ClausIE %U http://dl.acm.org/citation.cfm?id=2488388.2488420 %D 2013 %B 22nd International Conference on World Wide Web %Z date of event: 2013-05-13 - 2013-05-17 %C Rio do Janeiro, Brazil %B WWW'13 %E Schwabe, Daniel; Almeida, Virgilio; Glaser, Hartmut; Baeza-Yates, Ricardo; Moon, Sue %P 355 - 366 %I ACM %@ 978-1-4503-2035-1
[194]
A. de Oliveira Melo, “Learning Rules With Categorical Attributes from Linked Data Sources,” Universität des Saarlandes, Saarbrücken, 2013.
Export
BibTeX
@mastersthesis{MeloMastersThesis2013, TITLE = {Learning Rules With Categorical Attributes from Linked Data Sources}, AUTHOR = {de Oliveira Melo, Andr{\'e}}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, }
Endnote
%0 Thesis %A de Oliveira Melo, André %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Learning Rules With Categorical Attributes from Linked Data Sources : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5C54-8 %I Universität des Saarlandes %C Saarbrücken %D 2013 %V master %9 master
[195]
S. Dutta, A. Narang, and S. K. Bera, “Streaming Quotient Filter: A Near Optimal Approximate Duplicate Detection Approach for Data Streams,” Proceedings of the VLDB Endowment (Proc. VLDB 2013), vol. 6, no. 8, 2013.
Export
BibTeX
@article{SouVLDB2013, TITLE = {Streaming Quotient Filter: A Near Optimal Approximate Duplicate Detection Approach for Data Streams}, AUTHOR = {Dutta, Sourav and Narang, Ankur and Bera, Suman K.}, LANGUAGE = {eng}, ISSN = {2150-8097}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, JOURNAL = {Proceedings of the VLDB Endowment (Proc. VLDB)}, VOLUME = {6}, NUMBER = {8}, PAGES = {589--600}, BOOKTITLE = {Proccedings of the 39th International Conference on Very Large Data Bases (VLDB 2013)}, EDITOR = {B{\"o}hlen, Michael and Koch, Christoph}, }
Endnote
%0 Journal Article %A Dutta, Sourav %A Narang, Ankur %A Bera, Suman K. %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Streaming Quotient Filter: A Near Optimal Approximate Duplicate Detection Approach for Data Streams : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-54B0-9 %D 2013 %J Proceedings of the VLDB Endowment %O PVLDB %V 6 %N 8 %& 589 %P 589 - 600 %I ACM %C New York, NY %@ false %B Proccedings of the 39th International Conference on Very Large Data Bases %O Riva del Garda, Trento, Italy VLDB 2013 %U http://www.vldb.org/pvldb/vol6/p589-dutta.pdf
[196]
M. Dylla, I. Miliaraki, and M. Theobald, “A Temporal-probabilistic Database Model for Information Extraction,” Proceedings of the VLDB Endowment (Proc. VLDB 2013), vol. 6, no. 14, 2013.
Export
BibTeX
@article{DBLP:journals/pvldb/DyllaMT13, TITLE = {A Temporal-probabilistic Database Model for Information Extraction}, AUTHOR = {Dylla, Maximilian and Miliaraki, Iris and Theobald, Martin}, LANGUAGE = {eng}, ISSN = {2150-8097}, URL = {http://www.vldb.org/pvldb/vol6/p1810-miliaraki.pdf}, LOCALID = {Local-ID: F77B765948DFB562C1257BEF002A1315-Dylla-VLDB2013}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, JOURNAL = {Proceedings of the VLDB Endowment (Proc. VLDB)}, VOLUME = {6}, NUMBER = {14}, PAGES = {1810--1821}, BOOKTITLE = {Proceedings of the 39th International Conference on Very Large Data Bases (VLDB 2013)}, EDITOR = {B{\"o}hlen, Michael and Koch, Christoph}, }
Endnote
%0 Journal Article %A Dylla, Maximilian %A Miliaraki, Iris %A Theobald, Martin %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T A Temporal-probabilistic Database Model for Information Extraction : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1716-2 %F OTHER: Local-ID: F77B765948DFB562C1257BEF002A1315-Dylla-VLDB2013 %U http://www.vldb.org/pvldb/vol6/p1810-miliaraki.pdf %7 2013 %D 2013 %J Proceedings of the VLDB Endowment %O PVLDP %V 6 %N 14 %& 1810 %P 1810 - 1821 %I ACM %C New York, NY %@ false %B Proceedings of the 39th International Conference on Very Large Data Bases %O Riva del Garda, Trento, Italy VLDB 2013
[197]
M. Dylla, I. Miliaraki, and M. Theobald, “Top-k Query Processing in Probabilistic Databases with Non-materialized Views,” in 29th International IEEE Conference on Data Engineering (ICDE 2013), Brisbane, Australia, 2013.
Export
BibTeX
@inproceedings{DyllaICDE2013, TITLE = {Top-k Query Processing in Probabilistic Databases with Non-materialized Views}, AUTHOR = {Dylla, Maximilian and Miliaraki, Iris and Theobald, Martin}, LANGUAGE = {eng}, ISBN = {978-1-4673-4909-3 ; 978-1-4673-4908-6}, DOI = {10.1109/ICDE.2013.6544819}, LOCALID = {Local-ID: 41ABA8E9D9176C38C1257B0C00538601-DyllaICDE2013}, PUBLISHER = {IEEE}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {29th International IEEE Conference on Data Engineering (ICDE 2013)}, PAGES = {122--133}, ADDRESS = {Brisbane, Australia}, }
Endnote
%0 Conference Proceedings %A Dylla, Maximilian %A Miliaraki, Iris %A Theobald, Martin %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Top-k Query Processing in Probabilistic Databases with Non-materialized Views : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0014-639B-F %R 10.1109/ICDE.2013.6544819 %F OTHER: Local-ID: 41ABA8E9D9176C38C1257B0C00538601-DyllaICDE2013 %D 2013 %B 29th International IEEE Conference on Data Engineering %Z date of event: 2013-04-08 - 2013-04-12 %C Brisbane, Australia %B 29th International IEEE Conference on Data Engineering %P 122 - 133 %I IEEE %@ 978-1-4673-4909-3 978-1-4673-4908-6
[198]
D. Erdős and P. Miettinen, “Walk’n'Merge: A Scalable Algorithm for Boolean Tensor Factorization,” in IEEE 13th International Conference on Data Mining (ICDM 2013), Dallas, TX, USA, 2013.
Abstract
Tensors are becoming increasingly common in data mining, and consequently, tensor factorizations are becoming more important tools for data miners. When the data is binary, it is natural to ask if we can factorize it into binary factors while simultaneously making sure that the reconstructed tensor is still binary. Such factorizations, called Boolean tensor factorizations, can provide improved interpretability and find Boolean structure that is hard to express using normal factorizations. Unfortunately the algorithms for computing Boolean tensor factorizations do not usually scale well. In this paper we present a novel algorithm for finding Boolean CP and Tucker decompositions of large and sparse binary tensors. In our experimental evaluation we show that our algorithm can handle large tensors and accurately reconstructs the latent Boolean structure.
Export
BibTeX
@inproceedings{erdos13walknmerge, TITLE = {{Walk'n'Merge}: A Scalable Algorithm for {Boolean} Tensor Factorization}, AUTHOR = {Erd{\H o}s, D{\'o}ra and Miettinen, Pauli}, LANGUAGE = {eng}, DOI = {10.1109/ICDM.2013.141}, LOCALID = {Local-ID: 4CE63F9DEBEF8E5EC1257C6A00610B3D-erdos13discovering}, PUBLISHER = {IEEE}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Tensors are becoming increasingly common in data mining, and consequently, tensor factorizations are becoming more important tools for data miners. When the data is binary, it is natural to ask if we can factorize it into binary factors while simultaneously making sure that the reconstructed tensor is still binary. Such factorizations, called Boolean tensor factorizations, can provide improved interpretability and find Boolean structure that is hard to express using normal factorizations. Unfortunately the algorithms for computing Boolean tensor factorizations do not usually scale well. In this paper we present a novel algorithm for finding Boolean CP and Tucker decompositions of large and sparse binary tensors. In our experimental evaluation we show that our algorithm can handle large tensors and accurately reconstructs the latent Boolean structure.}, BOOKTITLE = {IEEE 13th International Conference on Data Mining (ICDM 2013)}, PAGES = {1037--1042}, ADDRESS = {Dallas, TX, USA}, }
Endnote
%0 Conference Proceedings %A Erdős, Dóra %A Miettinen, Pauli %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Walk'n'Merge: A Scalable Algorithm for Boolean Tensor Factorization : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1A48-B %F OTHER: Local-ID: 4CE63F9DEBEF8E5EC1257C6A00610B3D-erdos13discovering %R 10.1109/ICDM.2013.141 %D 2013 %8 31.10.2013 %B 13th International Conference on Data Mining %Z date of event: 2013-10-07 - 2013-10-10 %C Dallas, TX, USA %X Tensors are becoming increasingly common in data mining, and consequently, tensor factorizations are becoming more important tools for data miners. When the data is binary, it is natural to ask if we can factorize it into binary factors while simultaneously making sure that the reconstructed tensor is still binary. Such factorizations, called Boolean tensor factorizations, can provide improved interpretability and find Boolean structure that is hard to express using normal factorizations. Unfortunately the algorithms for computing Boolean tensor factorizations do not usually scale well. In this paper we present a novel algorithm for finding Boolean CP and Tucker decompositions of large and sparse binary tensors. In our experimental evaluation we show that our algorithm can handle large tensors and accurately reconstructs the latent Boolean structure. %B IEEE 13th International Conference on Data Mining %P 1037 - 1042 %I IEEE
[199]
D. Erdős and P. Miettinen, “Discovering Facts with Boolean Tensor Tucker Decomposition,” in CIKM’13, 22nd ACM International Conference on Information & Knowledge Management, San Francisco, CA, USA, 2013.
Abstract
Open Information Extraction (Open IE) has gained increasing research interest in recent years. The first step in Open IE is to extract raw subject--predicate--object triples from the data. These raw triples are rarely usable per se, and need additional post-processing. To that end, we proposed the use of Boolean Tucker tensor decomposition to simultaneously find the entity and relation synonyms and the facts connecting them from the raw triples. Our method represents the synonym sets and facts using (sparse) binary matrices and tensor that can be efficiently stored and manipulated. We consider the presentation of the problem as a Boolean tensor decomposition as one of this paper's main contributions. To study the validity of this approach, we use a recent algorithm for scalable Boolean Tucker decomposition. We validate the results with empirical evaluation on a new semi-synthetic data set, generated to faithfully reproduce real-world data features, as well as with real-world data from existing Open IE extractor. We show that our method obtains high precision while the low recall can easily be remedied by considering the original data together with the decomposition.
Export
BibTeX
@inproceedings{erdos13discovering, TITLE = {Discovering Facts with {B}oolean Tensor Tucker Decomposition}, AUTHOR = {Erd{\H o}s, D{\'o}ra and Miettinen, Pauli}, LANGUAGE = {eng}, ISBN = {978-1-4503-2263-8}, DOI = {10.1145/2505515.2507846}, LOCALID = {Local-ID: 65F19E1E95609D3CC1257C6A0061B38E-erdos13walknmerge}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {Open Information Extraction (Open IE) has gained increasing research interest in recent years. The first step in Open IE is to extract raw subject--predicate--object triples from the data. These raw triples are rarely usable per se, and need additional post-processing. To that end, we proposed the use of Boolean Tucker tensor decomposition to simultaneously find the entity and relation synonyms and the facts connecting them from the raw triples. Our method represents the synonym sets and facts using (sparse) binary matrices and tensor that can be efficiently stored and manipulated. We consider the presentation of the problem as a Boolean tensor decomposition as one of this paper's main contributions. To study the validity of this approach, we use a recent algorithm for scalable Boolean Tucker decomposition. We validate the results with empirical evaluation on a new semi-synthetic data set, generated to faithfully reproduce real-world data features, as well as with real-world data from existing Open IE extractor. We show that our method obtains high precision while the low recall can easily be remedied by considering the original data together with the decomposition.}, BOOKTITLE = {CIKM'13, 22nd ACM International Conference on Information \& Knowledge Management}, EDITOR = {Nejdl, Wolfgang and Pei, Jian and Rastogi, Rajeev}, PAGES = {1569--1572}, ADDRESS = {San Francisco, CA, USA}, }
Endnote
%0 Conference Proceedings %A Erdős, Dóra %A Miettinen, Pauli %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Discovering Facts with Boolean Tensor Tucker Decomposition : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1A43-6 %F OTHER: Local-ID: 65F19E1E95609D3CC1257C6A0061B38E-erdos13walknmerge %R 10.1145/2505515.2507846 %D 2013 %B 22nd ACM International Conference on Information & Knowledge Management %Z date of event: 2013-10-27 - 2013-11-01 %C San Francisco, CA, USA %X Open Information Extraction (Open IE) has gained increasing research interest in recent years. The first step in Open IE is to extract raw subject--predicate--object triples from the data. These raw triples are rarely usable per se, and need additional post-processing. To that end, we proposed the use of Boolean Tucker tensor decomposition to simultaneously find the entity and relation synonyms and the facts connecting them from the raw triples. Our method represents the synonym sets and facts using (sparse) binary matrices and tensor that can be efficiently stored and manipulated. We consider the presentation of the problem as a Boolean tensor decomposition as one of this paper's main contributions. To study the validity of this approach, we use a recent algorithm for scalable Boolean Tucker decomposition. We validate the results with empirical evaluation on a new semi-synthetic data set, generated to faithfully reproduce real-world data features, as well as with real-world data from existing Open IE extractor. We show that our method obtains high precision while the low recall can easily be remedied by considering the original data together with the decomposition. %B CIKM'13 %E Nejdl, Wolfgang; Pei, Jian; Rastogi, Rajeev %P 1569 - 1572 %I ACM %@ 978-1-4503-2263-8
[200]
D. Erdős and P. Miettinen, “Scalable Boolean Tensor Factorizations using Random Walks,” 2013. [Online]. Available: http://arxiv.org/abs/1310.4843. (arXiv: 1310.4843)
Abstract
Tensors are becoming increasingly common in data mining, and consequently, tensor factorizations are becoming more and more important tools for data miners. When the data is binary, it is natural to ask if we can factorize it into binary factors while simultaneously making sure that the reconstructed tensor is still binary. Such factorizations, called Boolean tensor factorizations, can provide improved interpretability and find Boolean structure that is hard to express using normal factorizations. Unfortunately the algorithms for computing Boolean tensor factorizations do not usually scale well. In this paper we present a novel algorithm for finding Boolean CP and Tucker decompositions of large and sparse binary tensors. In our experimental evaluation we show that our algorithm can handle large tensors and accurately reconstructs the latent Boolean structure.
Export
BibTeX
@online{ErdosMiettinenarXiv2013, TITLE = {Scalable Boolean Tensor Factorizations using Random Walks}, AUTHOR = {Erd{\H o}s, D{\'o}ra and Miettinen, Pauli}, LANGUAGE = {eng}, URL = {http://arxiv.org/abs/1310.4843}, EPRINT = {1310.4843}, EPRINTTYPE = {arXiv}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Tensors are becoming increasingly common in data mining, and consequently, tensor factorizations are becoming more and more important tools for data miners. When the data is binary, it is natural to ask if we can factorize it into binary factors while simultaneously making sure that the reconstructed tensor is still binary. Such factorizations, called Boolean tensor factorizations, can provide improved interpretability and find Boolean structure that is hard to express using normal factorizations. Unfortunately the algorithms for computing Boolean tensor factorizations do not usually scale well. In this paper we present a novel algorithm for finding Boolean CP and Tucker decompositions of large and sparse binary tensors. In our experimental evaluation we show that our algorithm can handle large tensors and accurately reconstructs the latent Boolean structure.}, }
Endnote
%0 Report %A Erdős, Dóra %A Miettinen, Pauli %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Scalable Boolean Tensor Factorizations using Random Walks : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-4971-0 %U http://arxiv.org/abs/1310.4843 %D 2013 %X Tensors are becoming increasingly common in data mining, and consequently, tensor factorizations are becoming more and more important tools for data miners. When the data is binary, it is natural to ask if we can factorize it into binary factors while simultaneously making sure that the reconstructed tensor is still binary. Such factorizations, called Boolean tensor factorizations, can provide improved interpretability and find Boolean structure that is hard to express using normal factorizations. Unfortunately the algorithms for computing Boolean tensor factorizations do not usually scale well. In this paper we present a novel algorithm for finding Boolean CP and Tucker decompositions of large and sparse binary tensors. In our experimental evaluation we show that our algorithm can handle large tensors and accurately reconstructs the latent Boolean structure. %K Computer Science, Data Structures and Algorithms, cs.DS
[201]
L. Galárraga, N. Preda, and F. M. Suchanek, “Mining Rules to Align Knowledge Bases,” in AKBC’13, 22nd ACM International Conference on Information and Knowledge Management, San Francisco, CA, USA, 2013.
Export
BibTeX
@inproceedings{rosaakbc2013, TITLE = {Mining Rules to Align Knowledge Bases}, AUTHOR = {Gal{\'a}rraga, Luis and Preda, Nicoleta and Suchanek, Fabian M.}, LANGUAGE = {eng}, ISBN = {978-1-4503-2411-3}, DOI = {10.1145/2509558.2509566}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {AKBC'13, 22nd ACM International Conference on Information and Knowledge Management}, EDITOR = {Suchanek, Fabian and Riedel, Sebastian and Singh, Sameer and Talukdar, Partha P.}, PAGES = {43--48}, ADDRESS = {San Francisco, CA, USA}, }
Endnote
%0 Conference Proceedings %A Galárraga, Luis %A Preda, Nicoleta %A Suchanek, Fabian M. %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Ontologies, MPI for Informatics, Max Planck Society %T Mining Rules to Align Knowledge Bases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-B902-2 %R 10.1145/2509558.2509566 %D 2013 %B 22nd ACM International Conference on Information and Knowledge Management %Z date of event: 2013-10-27 - 2013-11-01 %C San Francisco, CA, USA %B AKBC'13 %E Suchanek, Fabian; Riedel, Sebastian; Singh, Sameer; Talukdar, Partha P. %P 43 - 48 %I ACM %@ 978-1-4503-2411-3
[202]
L. Galárraga, C. Teflioudi, K. Hose, and F. M. Suchanek, “AMIE: Association Rule Mining under Incomplete Evidence in Ontological Knowledge Bases,” in WWW’13, 22nd International Conference on World Wide Web, Rio de Janeiro, Brazil, 2013.
Export
BibTeX
@inproceedings{amie2013, TITLE = {{AMIE}: Association Rule Mining under Incomplete Evidence in Ontological Knowledge Bases}, AUTHOR = {Gal{\'a}rraga, Luis and Teflioudi, Christina and Hose, Katja and Suchanek, Fabian M.}, LANGUAGE = {eng}, ISBN = {978-1-4503-2035-1}, URL = {http://dl.acm.org/citation.cfm?id=2488388.2488425}, LOCALID = {Local-ID:C1257ACD0050F94E-F2B50FB8A380EA8EC1257B16005F42C3-amie2013}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {WWW{\textquoteright}13, 22nd International Conference on World Wide Web}, EDITOR = {Schwabe, Daniel and Almeida, Virgilio and Glaser, Hartmut and Baeza-Yates, Ricardo and Moon, Sue}, PAGES = {413--422}, ADDRESS = {Rio de Janeiro, Brazil}, }
Endnote
%0 Conference Proceedings %A Galárraga, Luis %A Teflioudi, Christina %A Hose, Katja %A Suchanek, Fabian M. %+ Ontologies, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Ontologies, MPI for Informatics, Max Planck Society %T AMIE: Association Rule Mining under Incomplete Evidence in Ontological Knowledge Bases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0014-544F-D %U http://dl.acm.org/citation.cfm?id=2488388.2488425 %F OTHER: Local-ID:C1257ACD0050F94E-F2B50FB8A380EA8EC1257B16005F42C3-amie2013 %D 2013 %B 22nd International Conference on World Wide Web %Z date of event: 2013-05-13 - 2013-05-17 %C Rio de Janeiro, Brazil %B WWW’13 %E Schwabe, Daniel; Almeida, Virgilio; Glaser, Hartmut; Baeza-Yates, Ricardo; Moon, Sue %P 413 - 422 %I ACM %@ 978-1-4503-2035-1
[203]
R. Gemulla, P. J. Haas, and W. Lehner, “Non-uniformity Issues and Workarounds in Bounded-size Sampling,” The VLDB Journal, vol. 22, no. 6, 2013.
Abstract
A variety of schemes have been proposed in the literature to speed up query processing and analytics by incrementally maintaining a bounded-size uniform sample from a dataset in the presence of a sequence of insertion, deletion, and update transactions. These algorithms vary according to whether the dataset is an ordinary set or a multiset and whether the transaction sequence consists only of insertions or can include deletions and updates. We report on subtle non-uniformity issues that we found in a number of these prior bounded-size sampling schemes, including some of our own. We provide workarounds that can avoid the non-uniformity problem; these workarounds are easy to implement and incur negligible additional cost. We also consider the impact of non-uniformity in practice and describe simple statistical tests that can help detect non-uniformity in new algorithms.
Export
BibTeX
@article{Gemulla2012, TITLE = {Non-uniformity Issues and Workarounds in Bounded-size Sampling}, AUTHOR = {Gemulla, Rainer and Haas, P. J. and Lehner, W.}, LANGUAGE = {eng}, ISSN = {1066-8888}, DOI = {10.1007/s00778-013-0307-0}, LOCALID = {Local-ID: AE61AAD9E8EE81FCC1257B0B00394134-Gemulla2012}, PUBLISHER = {Springer}, ADDRESS = {Berlin}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {A variety of schemes have been proposed in the literature to speed up query processing and analytics by incrementally maintaining a bounded-size uniform sample from a dataset in the presence of a sequence of insertion, deletion, and update transactions. These algorithms vary according to whether the dataset is an ordinary set or a multiset and whether the transaction sequence consists only of insertions or can include deletions and updates. We report on subtle non-uniformity issues that we found in a number of these prior bounded-size sampling schemes, including some of our own. We provide workarounds that can avoid the non-uniformity problem; these workarounds are easy to implement and incur negligible additional cost. We also consider the impact of non-uniformity in practice and describe simple statistical tests that can help detect non-uniformity in new algorithms.}, JOURNAL = {The VLDB Journal}, VOLUME = {22}, NUMBER = {6}, PAGES = {753--772}, }
Endnote
%0 Journal Article %A Gemulla, Rainer %A Haas, P. J. %A Lehner, W. %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Non-uniformity Issues and Workarounds in Bounded-size Sampling : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1A4D-1 %F OTHER: Local-ID: AE61AAD9E8EE81FCC1257B0B00394134-Gemulla2012 %R 10.1007/s00778-013-0307-0 %7 2013-02-14 %D 2013 %X A variety of schemes have been proposed in the literature to speed up query processing and analytics by incrementally maintaining a bounded-size uniform sample from a dataset in the presence of a sequence of insertion, deletion, and update transactions. These algorithms vary according to whether the dataset is an ordinary set or a multiset and whether the transaction sequence consists only of insertions or can include deletions and updates. We report on subtle non-uniformity issues that we found in a number of these prior bounded-size sampling schemes, including some of our own. We provide workarounds that can avoid the non-uniformity problem; these workarounds are easy to implement and incur negligible additional cost. We also consider the impact of non-uniformity in practice and describe simple statistical tests that can help detect non-uniformity in new algorithms. %K Database sampling, Reservoir sampling, Bernoulli sampling, Sample maintenance %J The VLDB Journal %V 22 %N 6 %& 753 %P 753 - 772 %I Springer %C Berlin %@ false
[204]
F. Grandoni, A. Gupta, S. Leonardi, P. Miettinen, P. Sankowski, and M. Singh, “Set Covering with Our Eyes Closed,” SIAM Journal on Computing, vol. 42, no. 3, 2013.
Abstract
Given a universe $U$ of $n$ elements and a weighted collection $\mathscr{S}$ of $m$ subsets of $U$, the universal set cover problem is to a priori map each element $u \in U$ to a set $S(u) \in \mathscr{S}$ containing $u$ such that any set $X{\subseteq U}$ is covered by $S(X)=\cup_{u\in XS(u)$. The aim is to find a mapping such that the cost of $S(X)$ is as close as possible to the optimal set cover cost for $X$. (Such problems are also called oblivious or a priori optimization problems.) Unfortunately, for every universal mapping, the cost of $S(X)$ can be $\Omega(\sqrt{n})$ times larger than optimal if the set $X$ is adversarially chosen. In this paper we study the performance on average, when $X$ is a set of randomly chosen elements from the universe: we show how to efficiently find a universal map whose expected cost is $O(\log mn)$ times the expected optimal cost. In fact, we give a slightly improved analysis and show that this is the best possible. We generalize these ideas to weighted set cover and show similar guarantees to (nonmetric) facility location, where we have to balance the facility opening cost with the cost of connecting clients to the facilities. We show applications of our results to universal multicut and disc-covering problems and show how all these universal mappings give us algorithms for the stochastic online variants of the problems with the same competitive factors.
Export
BibTeX
@article{grandoni13set, TITLE = {Set Covering with Our Eyes Closed}, AUTHOR = {Grandoni, Fabrizio and Gupta, Anupam and Leonardi, Stefano and Miettinen, Pauli and Sankowski, Piotr and Singh, Mohit}, LANGUAGE = {eng}, ISSN = {0097-5397}, DOI = {10.1137/100802888}, LOCALID = {Local-ID: 53C36AED23EF085AC1257C6A005E9F4B-grandoni13set}, PUBLISHER = {SIAM}, ADDRESS = {Philadelphia, PA}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {Given a universe $U$ of $n$ elements and a weighted collection $\mathscr{S}$ of $m$ subsets of $U$, the universal set cover problem is to a priori map each element $u \in U$ to a set $S(u) \in \mathscr{S}$ containing $u$ such that any set $X{\subseteq U}$ is covered by $S(X)=\cup_{u\in XS(u)$. The aim is to find a mapping such that the cost of $S(X)$ is as close as possible to the optimal set cover cost for $X$. (Such problems are also called oblivious or a priori optimization problems.) Unfortunately, for every universal mapping, the cost of $S(X)$ can be $\Omega(\sqrt{n})$ times larger than optimal if the set $X$ is adversarially chosen. In this paper we study the performance on average, when $X$ is a set of randomly chosen elements from the universe: we show how to efficiently find a universal map whose expected cost is $O(\log mn)$ times the expected optimal cost. In fact, we give a slightly improved analysis and show that this is the best possible. We generalize these ideas to weighted set cover and show similar guarantees to (nonmetric) facility location, where we have to balance the facility opening cost with the cost of connecting clients to the facilities. We show applications of our results to universal multicut and disc-covering problems and show how all these universal mappings give us algorithms for the stochastic online variants of the problems with the same competitive factors.}, JOURNAL = {SIAM Journal on Computing}, VOLUME = {42}, NUMBER = {3}, PAGES = {808--830}, }
Endnote
%0 Journal Article %A Grandoni, Fabrizio %A Gupta, Anupam %A Leonardi, Stefano %A Miettinen, Pauli %A Sankowski, Piotr %A Singh, Mohit %+ External Organizations External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T Set Covering with Our Eyes Closed : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1C37-0 %R 10.1137/100802888 %F OTHER: Local-ID: 53C36AED23EF085AC1257C6A005E9F4B-grandoni13set %7 2013-05-09 %D 2013 %X Given a universe $U$ of $n$ elements and a weighted collection $\mathscr{S}$ of $m$ subsets of $U$, the universal set cover problem is to a priori map each element $u \in U$ to a set $S(u) \in \mathscr{S}$ containing $u$ such that any set $X{\subseteq U}$ is covered by $S(X)=\cup_{u\in XS(u)$. The aim is to find a mapping such that the cost of $S(X)$ is as close as possible to the optimal set cover cost for $X$. (Such problems are also called oblivious or a priori optimization problems.) Unfortunately, for every universal mapping, the cost of $S(X)$ can be $\Omega(\sqrt{n})$ times larger than optimal if the set $X$ is adversarially chosen. In this paper we study the performance on average, when $X$ is a set of randomly chosen elements from the universe: we show how to efficiently find a universal map whose expected cost is $O(\log mn)$ times the expected optimal cost. In fact, we give a slightly improved analysis and show that this is the best possible. We generalize these ideas to weighted set cover and show similar guarantees to (nonmetric) facility location, where we have to balance the facility opening cost with the cost of connecting clients to the facilities. We show applications of our results to universal multicut and disc-covering problems and show how all these universal mappings give us algorithms for the stochastic online variants of the problems with the same competitive factors. %J SIAM Journal on Computing %V 42 %N 3 %& 808 %P 808 - 830 %I SIAM %C Philadelphia, PA %@ false
[205]
A. Grycner, P. Ernst, A. Siu, and G. Weikum, “Knowledge Discovery on Incompatibility of Medical Concepts,” in Computational Linguistics and Intelligent Text Processing (CICLing 2013), Samos, Greece, 2013.
Abstract
This work proposes a method for automatically discovering incompatible medical concepts in text corpora. The approach is distantly supervised based on a seed set of incompatible concept pairs like symptoms or conditions that rule each other out. Two concepts are considered incompatible if their definitions match a template, and contain an antonym pair derived from WordNet, VerbOcean, or a hand-crafted lexicon. Our method creates templates from dependency parse trees of definitional texts, using seed pairs. The templates are applied to a text corpus, and the resulting candidate pairs are categorized and ranked by statistical measures. Since experiments show that the results face semantic ambiguity problems, we further cluster the results into different categories. We applied this approach to the concepts in Unified Medical Language System, Human Phenotype Ontology, and Mammalian Phenotype Ontology. Out of 77,496 definitions, 1,958 concept pairs were detected as incompatible with an average precision of 0.80.
Export
BibTeX
@inproceedings{Grycner2013, TITLE = {Knowledge Discovery on Incompatibility of Medical Concepts}, AUTHOR = {Grycner, Adam and Ernst, Patrick and Siu, Amy and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-3-642-37246-9}, DOI = {10.1007/978-3-642-37247-6_10}, LOCALID = {Local-ID: 2C3D152169C55F01C1257B160035B6E6-Grycner2013}, PUBLISHER = {Springer}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {This work proposes a method for automatically discovering incompatible medical concepts in text corpora. The approach is distantly supervised based on a seed set of incompatible concept pairs like symptoms or conditions that rule each other out. Two concepts are considered incompatible if their definitions match a template, and contain an antonym pair derived from WordNet, VerbOcean, or a hand-crafted lexicon. Our method creates templates from dependency parse trees of definitional texts, using seed pairs. The templates are applied to a text corpus, and the resulting candidate pairs are categorized and ranked by statistical measures. Since experiments show that the results face semantic ambiguity problems, we further cluster the results into different categories. We applied this approach to the concepts in Unified Medical Language System, Human Phenotype Ontology, and Mammalian Phenotype Ontology. Out of 77,496 definitions, 1,958 concept pairs were detected as incompatible with an average precision of 0.80.}, BOOKTITLE = {Computational Linguistics and Intelligent Text Processing (CICLing 2013)}, EDITOR = {Gelbukh, Alexander}, PAGES = {114--125}, SERIES = {Lecture Notes in Computer Science}, VOLUME = {7816}, ADDRESS = {Samos, Greece}, }
Endnote
%0 Conference Proceedings %A Grycner, Adam %A Ernst, Patrick %A Siu, Amy %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Knowledge Discovery on Incompatibility of Medical Concepts : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1A54-F %F OTHER: Local-ID: 2C3D152169C55F01C1257B160035B6E6-Grycner2013 %R 10.1007/978-3-642-37247-6_10 %D 2013 %B 14th International Conference on Computational Linguistics and Intelligent Text Processing %Z date of event: 2013-03-24 - 2013-03-30 %C Samos, Greece %X This work proposes a method for automatically discovering incompatible medical concepts in text corpora. The approach is distantly supervised based on a seed set of incompatible concept pairs like symptoms or conditions that rule each other out. Two concepts are considered incompatible if their definitions match a template, and contain an antonym pair derived from WordNet, VerbOcean, or a hand-crafted lexicon. Our method creates templates from dependency parse trees of definitional texts, using seed pairs. The templates are applied to a text corpus, and the resulting candidate pairs are categorized and ranked by statistical measures. Since experiments show that the results face semantic ambiguity problems, we further cluster the results into different categories. We applied this approach to the concepts in Unified Medical Language System, Human Phenotype Ontology, and Mammalian Phenotype Ontology. Out of 77,496 definitions, 1,958 concept pairs were detected as incompatible with an average precision of 0.80. %B Computational Linguistics and Intelligent Text Processing %E Gelbukh, Alexander %P 114 - 125 %I Springer %@ 978-3-642-37246-9 %B Lecture Notes in Computer Science %N 7816
[206]
A. Gubichev, S. Bedathur, and S. Seufert, “Sparqling Kleene - Fast Property Paths in RDF-3X,” in First International Workshop on Graph Data Management Experiences and Systems (GRADES 2013), New York, NY, USA, 2013.
Export
BibTeX
@inproceedings{Gubichev2013, TITLE = {Sparqling {Kleene} -- Fast Property Paths in {RDF-3X}}, AUTHOR = {Gubichev, Andrey and Bedathur, Srikanta and Seufert, Stephan}, LANGUAGE = {eng}, ISBN = {978-1-4503-2188-4}, DOI = {10.1145/2484425.2484443}, LOCALID = {Local-ID: 2307D92E4A8D0ABFC1257C680057DFE6-Gubichev2013}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {First International Workshop on Graph Data Management Experiences and Systems (GRADES 2013)}, EDITOR = {Boncz, Peter A. and Neumann, Thomas}, PAGES = {1--7}, EID = {14}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %A Gubichev, Andrey %A Bedathur, Srikanta %A Seufert, Stephan %+ External Organizations External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Sparqling Kleene - Fast Property Paths in RDF-3X : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1A87-C %R 10.1145/2484425.2484443 %F OTHER: Local-ID: 2307D92E4A8D0ABFC1257C680057DFE6-Gubichev2013 %D 2013 %B First International Workshop on Graph Data Management Experiences and Systems %Z date of event: 2013-06-22 - 2013-06-27 %C New York, NY, USA %B First International Workshop on Graph Data Management Experiences and Systems %E Boncz, Peter A.; Neumann, Thomas %P 1 - 7 %Z sequence number: 14 %I ACM %@ 978-1-4503-2188-4
[207]
S. Gurajada, J. Kamps, A. Mishra, R. Schenkel, M. Theobald, and Q. Wang, “Overview of the INEX 2013 Linked Data Track,” in Working Notes for the CLEF 2013 Conference, Valencia, Spain, 2013.
Abstract
This paper provides an overview of the INEX Linked Data Track, which went into its second iteration in 2013.
Export
BibTeX
@inproceedings{INEX-LD-2012, TITLE = {Overview of the {INEX} 2013 Linked Data Track}, AUTHOR = {Gurajada, Sairam and Kamps, Jaap and Mishra, Arunav and Schenkel, Ralf and Theobald, Martin and Wang, Qiuyue}, LANGUAGE = {eng}, LOCALID = {Local-ID: 60E4C9459DE8213AC1257BBB003DE4C9-INEX-LD-2012}, PUBLISHER = {CLEF Initiative}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, ABSTRACT = {This paper provides an overview of the INEX Linked Data Track, which went into its second iteration in 2013.}, BOOKTITLE = {Working Notes for the CLEF 2013 Conference}, EDITOR = {Forner, Pamela and Navigli, Roberto and Tufis, Dan}, ADDRESS = {Valencia, Spain}, }
Endnote
%0 Conference Proceedings %A Gurajada, Sairam %A Kamps, Jaap %A Mishra, Arunav %A Schenkel, Ralf %A Theobald, Martin %A Wang, Qiuyue %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations %T Overview of the INEX 2013 Linked Data Track : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-3A84-7 %F OTHER: Local-ID: 60E4C9459DE8213AC1257BBB003DE4C9-INEX-LD-2012 %D 2013 %B CLEF 2013 Evaluation Labs and Workshop %Z date of event: 2013-09-23 - 2013-09-26 %C Valencia, Spain %X This paper provides an overview of the INEX Linked Data Track, which went into its second iteration in 2013. %B Working Notes for the CLEF 2013 Conference %E Forner, Pamela; Navigli, Roberto; Tufis, Dan %I CLEF Initiative %U http://www.clef-initiative.eu/documents/71612/2b349f08-de37-41a9-bb62-40c91f1daa0b
[208]
J. Hoffart, F. M. Suchanek, K. Berberich, and G. Weikum, “YAGO2: A Spatially and Temporally Enhanced Knowledge Base from Wikipedia: Extended Abstract,” in 23rd International Joint Conference on Artificial Intelligence (IJCAI 2013), Beijing, China, 2013.
Abstract
We present YAGO2, an extension of the YAGO knowledge base, in which entities, facts, and events are anchored in both time and space. YAGO2 is built automatically from Wikipedia, GeoNames, and WordNet. It contains 447 million facts about 9.8 million entities. Human evaluation confirmed an accuracy of 95 of the facts in YAGO2. In this paper, we present the extraction methodology and the integration of the spatio-temporal dimension.
Export
BibTeX
@inproceedings{Hoffart2013ww, TITLE = {{YAGO2:} {A} Spatially and Temporally Enhanced Knowledge Base from {Wikipedia}: Extended Abstract}, AUTHOR = {Hoffart, Johannes and Suchanek, Fabian M. and Berberich, Klaus and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-57735-633-2}, LOCALID = {Local-ID: 0F08380C815DF7A8C1257C6100731377-Hoffart2013ww}, PUBLISHER = {AAAI}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {We present YAGO2, an extension of the YAGO knowledge base, in which entities, facts, and events are anchored in both time and space. YAGO2 is built automatically from Wikipedia, GeoNames, and WordNet. It contains 447 million facts about 9.8 million entities. Human evaluation confirmed an accuracy of 95 of the facts in YAGO2. In this paper, we present the extraction methodology and the integration of the spatio-temporal dimension.}, BOOKTITLE = {23rd International Joint Conference on Artificial Intelligence (IJCAI 2013)}, PAGES = {3161--3165}, ADDRESS = {Beijing, China}, }
Endnote
%0 Conference Proceedings %A Hoffart, Johannes %A Suchanek, Fabian M. %A Berberich, Klaus %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T YAGO2: A Spatially and Temporally Enhanced Knowledge Base from Wikipedia: Extended Abstract : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1AC8-9 %F OTHER: Local-ID: 0F08380C815DF7A8C1257C6100731377-Hoffart2013ww %D 2013 %B 23rd International Joint Conference on Artificial Intelligence %Z date of event: 2013-08-03 - 2013-08-09 %C Beijing, China %X We present YAGO2, an extension of the YAGO knowledge base, in which entities, facts, and events are anchored in both time and space. YAGO2 is built automatically from Wikipedia, GeoNames, and WordNet. It contains 447 million facts about 9.8 million entities. Human evaluation confirmed an accuracy of 95 of the facts in YAGO2. In this paper, we present the extraction methodology and the integration of the spatio-temporal dimension. %B 23rd International Joint Conference on Artificial Intelligence %P 3161 - 3165 %I AAAI %@ 978-1-57735-633-2 %U http://ijcai.org/papers13/Papers/IJCAI13-478.pdf
[209]
J. Hoffart, “Discovering and Disambiguating Named Entities in Text,” in SIGMOD’13 PhD Symposium, New York, NY, USA, 2013.
Abstract
Disambiguating named entities in natural language texts maps ambiguous names to canonical entities registered in a knowledge base such as DBpedia, Freebase, or YAGO. Knowing the specific entity is an important asset for several other tasks, e.g. entity-based information retrieval or higher-level information extraction. Our approach to named entity disambiguation makes use of several ingredients: the prior probability of an entity being mentioned, the similarity between the context of the mention in the text and an entity, as well as the coherence among the entities. Extending this method, we present a novel and highly efficient measure to compute the semantic coherence between entities. This measure is especially powerful for long-tail entities or such entities that are not yet present in the knowledge base. Reliably identifying names in the input text that are not part of the knowledge base is the current focus of our work.
Export
BibTeX
@inproceedings{Hoffart2013wk, TITLE = {Discovering and Disambiguating Named Entities in Text}, AUTHOR = {Hoffart, Johannes}, LANGUAGE = {eng}, ISBN = {978-1-4503-2155-6}, DOI = {10.1145/2483574.2483582}, LOCALID = {Local-iD: CA2056C02ACB8EDDC1257C6100744944-Hoffart2013wk}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {Disambiguating named entities in natural language texts maps ambiguous names to canonical entities registered in a knowledge base such as DBpedia, Freebase, or YAGO. Knowing the specific entity is an important asset for several other tasks, e.g. entity-based information retrieval or higher-level information extraction. Our approach to named entity disambiguation makes use of several ingredients: the prior probability of an entity being mentioned, the similarity between the context of the mention in the text and an entity, as well as the coherence among the entities. Extending this method, we present a novel and highly efficient measure to compute the semantic coherence between entities. This measure is especially powerful for long-tail entities or such entities that are not yet present in the knowledge base. Reliably identifying names in the input text that are not part of the knowledge base is the current focus of our work.}, BOOKTITLE = {SIGMOD{\textquoteright}13 PhD Symposium}, EDITOR = {Lei, Chen and Dong, Xin Luna}, PAGES = {43--48}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %A Hoffart, Johannes %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Discovering and Disambiguating Named Entities in Text : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1A8E-D %F OTHER: Local-iD: CA2056C02ACB8EDDC1257C6100744944-Hoffart2013wk %R 10.1145/2483574.2483582 %D 2013 %B SIGMOD/PODS PhD Symposium %Z date of event: 2013-06-23 - 2013-06-23 %C New York, NY, USA %X Disambiguating named entities in natural language texts maps ambiguous names to canonical entities registered in a knowledge base such as DBpedia, Freebase, or YAGO. Knowing the specific entity is an important asset for several other tasks, e.g. entity-based information retrieval or higher-level information extraction. Our approach to named entity disambiguation makes use of several ingredients: the prior probability of an entity being mentioned, the similarity between the context of the mention in the text and an entity, as well as the coherence among the entities. Extending this method, we present a novel and highly efficient measure to compute the semantic coherence between entities. This measure is especially powerful for long-tail entities or such entities that are not yet present in the knowledge base. Reliably identifying names in the input text that are not part of the knowledge base is the current focus of our work. %B SIGMOD’13 PhD Symposium %E Lei, Chen; Dong, Xin Luna %P 43 - 48 %I ACM %@ 978-1-4503-2155-6
[210]
J. Hoffart, F. M. Suchanek, K. Berberich, and G. Weikum, “YAGO2: A Spatially and Temporally Enhanced Knowledge Base from Wikipedia,” Artificial Intelligence, vol. 194, 2013.
Export
BibTeX
@article{yago2aij2013, TITLE = {{YAGO2}: A Spatially and Temporally Enhanced Knowledge Base from {Wikipedia}}, AUTHOR = {Hoffart, Johannes and Suchanek, Fabian M. and Berberich, Klaus and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {0004-3702}, URL = {http://www.sciencedirect.com/science/article/pii/S0004370212000719}, DOI = {10.1016/j.artint.2012.06.001}, LOCALID = {Local-ID:C1257ACD0050F94E-8D0B6EF25CD7906FC1257B1600621DB6-yago2@aij2013}, PUBLISHER = {Elsevier}, ADDRESS = {Amsterdam}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, JOURNAL = {Artificial Intelligence}, VOLUME = {194}, PAGES = {28--61}, }
Endnote
%0 Journal Article %A Hoffart, Johannes %A Suchanek, Fabian M. %A Berberich, Klaus %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Ontologies, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T YAGO2: A Spatially and Temporally Enhanced Knowledge Base from Wikipedia : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0014-543A-C %R 10.1016/j.artint.2012.06.001 %U http://www.sciencedirect.com/science/article/pii/S0004370212000719 %F OTHER: Local-ID:C1257ACD0050F94E-8D0B6EF25CD7906FC1257B1600621DB6-yago2@aij2013 %7 2012-06-18 %D 2013 %J Artificial Intelligence %O AI %V 194 %& 28 %P 28 - 61 %I Elsevier %C Amsterdam %@ false
[211]
K. Hose and R. Schenkel, “WARP: Workload-Aware Replication and Partitioning for RDF,” in 4th International Workshop on Data Engineering meets Semantic Web (DESWeb 2013), Brisbane, Australia, 2013.
Export
BibTeX
@inproceedings{HoseSchenkel_DESWeb2013, TITLE = {{WARP}: Workload-Aware Replication and Partitioning for {RDF}}, AUTHOR = {Hose, Katja and Schenkel, Ralf}, LANGUAGE = {eng}, ISBN = {978-1-4673-5303-8}, DOI = {10.1109/ICDEW.2013.6547414}, LOCALID = {Local-ID: 17425053968C448EC1257AD100350E0C-HoseSchenkel_DESWeb2013}, PUBLISHER = {IEEE}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {4th International Workshop on Data Engineering meets Semantic Web (DESWeb 2013)}, PAGES = {1--6}, ADDRESS = {Brisbane, Australia}, }
Endnote
%0 Conference Proceedings %A Hose, Katja %A Schenkel, Ralf %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T WARP: Workload-Aware Replication and Partitioning for RDF : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1A99-4 %F OTHER: Local-ID: 17425053968C448EC1257AD100350E0C-HoseSchenkel_DESWeb2013 %R 10.1109/ICDEW.2013.6547414 %D 2013 %B 4th International Workshop on Data Engineering meets Semantic Web %Z date of event: 2013-04-08 - 2013-04-12 %C Brisbane, Australia %B 4th International Workshop on Data Engineering meets Semantic Web %P 1 - 6 %I IEEE %@ 978-1-4673-5303-8
[212]
T. Huet, J. Biega, and F. Suchanek, “Mining History with Le Monde,” in AKBC’13, 22nd ACM International Conference on Information and Knowledge Management, San Francisco, CA, USA, 2013.
Export
BibTeX
@inproceedings{Huet:2013, TITLE = {Mining History with Le Monde}, AUTHOR = {Huet, Thomas and Biega, Joanna and Suchanek, Fabian}, LANGUAGE = {eng}, ISBN = {978-1-4503-2411-3}, DOI = {10.1145/2509558.2509567}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {AKBC'13, 22nd ACM International Conference on Information and Knowledge Management}, EDITOR = {Suchanek, Fabian and Riedel, Sebastian and Singh, Sameer and Talukdar, Partha P.}, PAGES = {49--54}, ADDRESS = {San Francisco, CA, USA}, }
Endnote
%0 Conference Proceedings %A Huet, Thomas %A Biega, Joanna %A Suchanek, Fabian %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Mining History with Le Monde : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-5149-B %R 10.1145/2509558.2509567 %D 2013 %8 27.10.2013 %B 22nd ACM International Conference on Information and Knowledge Management %Z date of event: 2013-10-27 - 2013-11-01 %C San Francisco, CA, USA %K culturomics, knowledge base, le monde, yago %B AKBC'13 %E Suchanek, Fabian; Riedel, Sebastian; Singh, Sameer; Talukdar, Partha P. %P 49 - 54 %I ACM %@ 978-1-4503-2411-3
[213]
E. Ilieva, “Analyzing and Creating Top-k Entity Rankings,” Universität des Saarlandes, Saarbrücken, 2013.
Export
BibTeX
@mastersthesis{Ilieva2013, TITLE = {Analyzing and Creating Top-k Entity Rankings}, AUTHOR = {Ilieva, Evica}, LANGUAGE = {eng}, LOCALID = {Local-ID: DDA2710C9D0C5B92C1257BF00027BC81-Ilieva2013z}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, }
Endnote
%0 Thesis %A Ilieva, Evica %Y Michel, Sebastian %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society International Max Planck Research School, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Analyzing and Creating Top-k Entity Rankings : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1AC1-8 %F OTHER: Local-ID: DDA2710C9D0C5B92C1257BF00027BC81-Ilieva2013z %I Universität des Saarlandes %C Saarbrücken %D 2013 %P 67 p. %V master %9 master
[214]
E. Ilieva, S. Michel, and A. Stupar, “The Essence of Knowledge (bases) Through Entity Rankings,” in CIKM’13, 22nd ACM International Conference of Information & Knowledge Management, San Francisco, CA, USA, 2013.
Abstract
We consider the task of automatically phrasing and computing top-k rankings over the information contained in common knowledge bases (KBs), such as YAGO or DBPedia. We assemble the thematic focus and ranking criteria of rankings by inspecting the present Subject, Predicate, Object (SPO) triples. Making use of numerical attributes contained in the KB we are also able to compute the actual ranking content, i.e., entities and their performances. We further discuss the integration of existing rankings into the ranking generation process for increased coverage and ranking quality. We report on first results obtained using the YAGO knowledge base.
Export
BibTeX
@inproceedings{Ilieva2013z, TITLE = {The Essence of Knowledge (bases) Through Entity Rankings}, AUTHOR = {Ilieva, Evica and Michel, Sebastian and Stupar, Aleksandar}, LANGUAGE = {eng}, ISBN = {978-1-4503-2263-8}, DOI = {10.1145/2505515.2507838}, LOCALID = {Local-ID: 62BCC454FD2DBDEEC1257C690042AF3D-Ilieva2013z}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {We consider the task of automatically phrasing and computing top-k rankings over the information contained in common knowledge bases (KBs), such as YAGO or DBPedia. We assemble the thematic focus and ranking criteria of rankings by inspecting the present Subject, Predicate, Object (SPO) triples. Making use of numerical attributes contained in the KB we are also able to compute the actual ranking content, i.e., entities and their performances. We further discuss the integration of existing rankings into the ranking generation process for increased coverage and ranking quality. We report on first results obtained using the YAGO knowledge base.}, BOOKTITLE = {CIKM'13, 22nd ACM International Conference of Information \& Knowledge Management}, EDITOR = {Nejdl, Wolfgang and Pei, Jian and Rastogi, Rajeev}, PAGES = {1537--1540}, ADDRESS = {San Francisco, CA, USA}, }
Endnote
%0 Conference Proceedings %A Ilieva, Evica %A Michel, Sebastian %A Stupar, Aleksandar %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T The Essence of Knowledge (bases) Through Entity Rankings : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1A33-A %R 10.1145/2505515.2507838 %F OTHER: Local-ID: 62BCC454FD2DBDEEC1257C690042AF3D-Ilieva2013z %D 2013 %B 22nd ACM International Conference of Information & Knowledge Management %Z date of event: 2013-10-27 - 2013-11-01 %C San Francisco, CA, USA %X We consider the task of automatically phrasing and computing top-k rankings over the information contained in common knowledge bases (KBs), such as YAGO or DBPedia. We assemble the thematic focus and ranking criteria of rankings by inspecting the present Subject, Predicate, Object (SPO) triples. Making use of numerical attributes contained in the KB we are also able to compute the actual ranking content, i.e., entities and their performances. We further discuss the integration of existing rankings into the ranking generation process for increased coverage and ranking quality. We report on first results obtained using the YAGO knowledge base. %B CIKM'13 %E Nejdl, Wolfgang; Pei, Jian; Rastogi, Rajeev %P 1537 - 1540 %I ACM %@ 978-1-4503-2263-8
[215]
L. Jiang, P. Luo, J. Wang, Y. Xiong, B. Lin, M. Wang, and N. An, “GRIAS: An Entity-Relation Graph Based Framework For Discovering Entity Aliases,” in IEEE 13th International Conference on Data Mining (ICDM 2013), Dallas, TX, USA, 2013.
Export
BibTeX
@inproceedings{Jiang2013y, TITLE = {{GRIAS}: An Entity-Relation Graph Based Framework For Discovering Entity Aliases}, AUTHOR = {Jiang, Lili and Luo, Ping and Wang, Jianyong and Xiong, Yuhong and Lin, Binduan and Wang, Min and An, Ning}, LANGUAGE = {eng}, ISSN = {1550-4786}, DOI = {10.1109/ICDM.2013.50}, LOCALID = {Local-ID: C5190A26C9118030C1257C68005B209A-Jiang2013y}, PUBLISHER = {IEEE}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {IEEE 13th International Conference on Data Mining (ICDM 2013)}, PAGES = {310--319}, ADDRESS = {Dallas, TX, USA}, }
Endnote
%0 Conference Proceedings %A Jiang, Lili %A Luo, Ping %A Wang, Jianyong %A Xiong, Yuhong %A Lin, Binduan %A Wang, Min %A An, Ning %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations External Organizations External Organizations External Organizations External Organizations %T GRIAS: An Entity-Relation Graph Based Framework For Discovering Entity Aliases : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1ADF-8 %R 10.1109/ICDM.2013.50 %F OTHER: Local-ID: C5190A26C9118030C1257C68005B209A-Jiang2013y %D 2013 %B 13th International Conference on Data Mining %Z date of event: 2013-12-07 - 2013-12-10 %C Dallas, TX, USA %B IEEE 13th International Conference on Data Mining %P 310 - 319 %I IEEE %@ false
[216]
L. Jiang, Y. Wang, J. Hoffart, and G. Weikum, “Crowdsourced Entity Markup,” in Proceedings of the 1st International Workshop on Crowdsourcing the Semantic Web co-located with 12th International Semantic Web Conference (ISWC 2013), Sydney, Australia, 2013.
Export
BibTeX
@inproceedings{Jiang2013, TITLE = {Crowdsourced Entity Markup}, AUTHOR = {Jiang, Lili and Wang, Yafang and Hoffart, Johannes and Weikum, Gerhard}, LANGUAGE = {eng}, ISSN = {1613-0073}, URL = {urn:nbn:de:0074-1030-0}, LOCALID = {Local-ID: 4A6F03891D73CF9DC1257C68005859B2-Jiang2013}, PUBLISHER = {CEUR-WS.org}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the 1st International Workshop on Crowdsourcing the Semantic Web co-located with 12th International Semantic Web Conference (ISWC 2013)}, EDITOR = {Acosta, Maribel and Aroyo, Lora and Bernstein, Abraham and Lehmann, Jens and Noy, Natasha and Simperl, Elena}, PAGES = {65--68}, SERIES = {CEUR Workshop Proceedings}, EDITOR = {Acosta, Maribel}, VOLUME = {1030}, PAGES = {59--68}, ADDRESS = {Sydney, Australia}, }
Endnote
%0 Conference Proceedings %A Jiang, Lili %A Wang, Yafang %A Hoffart, Johannes %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Crowdsourced Entity Markup : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1AD0-6 %F OTHER: Local-ID: 4A6F03891D73CF9DC1257C68005859B2-Jiang2013 %U urn:nbn:de:0074-1030-0 %D 2013 %8 06.09.2013 %B 1st International Workshop on Crowdsourcing the Semantic Web co-located with 12th International Semantic Web Conference %Z date of event: 2013-10-21 - 2013-10-25 %C Sydney, Australia %B Proceedings of the 1st International Workshop on Crowdsourcing the Semantic Web co-located with 12th International Semantic Web Conference %E Acosta, Maribel; Aroyo, Lora; Bernstein, Abraham; Lehmann, Jens; Noy, Natasha; Simperl, Elena %P 65 - 68 %I CEUR-WS.org %B CEUR Workshop Proceedings %Y Acosta, Maribel %N 1030 %P 59 - 68 %@ false
[217]
S. Karaev, “Matrix Factorization over Max-times Algebra for Data Mining,” Universität des Saarlandes, Saarbrücken, 2013.
Export
BibTeX
@mastersthesis{KaraevMaster2013, TITLE = {Matrix Factorization over Max-times Algebra for Data Mining}, AUTHOR = {Karaev, Sanjar}, LANGUAGE = {eng}, SCHOOL = {Universit{\"a}t des Saarlandes}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, }
Endnote
%0 Thesis %A Karaev, Sanjar %Y Miettinen, Pauli %A referee: Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T Matrix Factorization over Max-times Algebra for Data Mining : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-9DD1-8 %I Universität des Saarlandes %C Saarbrücken %D 2013 %P X, 57 p. %V master %9 master
[218]
S. K. Kondreddi, P. Triantafillou, and G. Weikum, “Human Computing Games for Knowledge Acquisition,” in CIKM’13, 22nd ACM International Conference on Information & Knowledge Management, San Francisco, CA, USA, 2013.
Abstract
Automatic information extraction techniques for knowledge acquisition are known to produce noise, incomplete or incorrect facts from textual sources. Human computing offers a natural alternative to expand and complement the output of automated information extraction methods, thereby enabling us to build high-quality knowledge bases. However, relying solely on human inputs for extraction can be prohibitively expensive in practice. We demonstrate human computing games for knowledge acquisition that employ human computing to overcome the limitations in automated fact acquisition methods. We provide a combined approach that tightly integrates automated extraction techniques with human computing for effective gathering of facts. The methods we provide gather facts in the form of relationships between entities. The games we demonstrate are specifically designed to capture hard-to-extract relations between entities in narrative text -- a task that automated systems find challenging.
Export
BibTeX
@inproceedings{Kondreddi2013b, TITLE = {Human Computing Games for Knowledge Acquisition}, AUTHOR = {Kondreddi, Sarath Kumar and Triantafillou, Peter and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-2263-8}, DOI = {10.1145/2505515.2508213}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {Automatic information extraction techniques for knowledge acquisition are known to produce noise, incomplete or incorrect facts from textual sources. Human computing offers a natural alternative to expand and complement the output of automated information extraction methods, thereby enabling us to build high-quality knowledge bases. However, relying solely on human inputs for extraction can be prohibitively expensive in practice. We demonstrate human computing games for knowledge acquisition that employ human computing to overcome the limitations in automated fact acquisition methods. We provide a combined approach that tightly integrates automated extraction techniques with human computing for effective gathering of facts. The methods we provide gather facts in the form of relationships between entities. The games we demonstrate are specifically designed to capture hard-to-extract relations between entities in narrative text -- a task that automated systems find challenging.}, BOOKTITLE = {CIKM'13, 22nd ACM International Conference on Information \& Knowledge Management}, EDITOR = {Nejdl, Wolfgang and Pei, Jian and Rastogi, Rajeev}, PAGES = {2513--2516}, ADDRESS = {San Francisco, CA, USA}, }
Endnote
%0 Conference Proceedings %A Kondreddi, Sarath Kumar %A Triantafillou, Peter %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Human Computing Games for Knowledge Acquisition : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1C65-8 %@ 978-1-4503-2263-8 %R 10.1145/2505515.2508213 %D 2013 %B 22nd ACM International Conference on Information & Knowledge Management %Z date of event: 2013-10-27 - 2013-11-01 %C San Francisco, CA, USA %X Automatic information extraction techniques for knowledge acquisition are known to produce noise, incomplete or incorrect facts from textual sources. Human computing offers a natural alternative to expand and complement the output of automated information extraction methods, thereby enabling us to build high-quality knowledge bases. However, relying solely on human inputs for extraction can be prohibitively expensive in practice. We demonstrate human computing games for knowledge acquisition that employ human computing to overcome the limitations in automated fact acquisition methods. We provide a combined approach that tightly integrates automated extraction techniques with human computing for effective gathering of facts. The methods we provide gather facts in the form of relationships between entities. The games we demonstrate are specifically designed to capture hard-to-extract relations between entities in narrative text -- a task that automated systems find challenging. %B CIKM'13 %E Nejdl, Wolfgang; Pei, Jian; Rastogi, Rajeev %P 2513 - 2516 %I ACM
[219]
S. K. Kondreddi, P. Triantafillou, and G. Weikum, “HIGGINS: Knowledge Acquisition Meets the Crowds,” in WWW’13, 22nd International Conference on World Wide Web, Rio de Janeiro, Brazil, 2013.
Abstract
We present HIGGINS, a system for \em Knowledge Acquisition (KA)}, placing emphasis on its architecture. The distinguishing characteristic and novelty of HIGGINS lies in its blending of two engines: an automated {\em Information Extraction (IE)} engine, aided by {\em semantic resources} and {\em statistics}, and a game-based {\em Human Computing (HC) engine. We focus on KA from web pages and text sources and, in particular, on deriving relationships between entities. As a running application we utilize movie narratives, from which we wish to derive relationships among movie characters.
Export
BibTeX
@inproceedings{Kondreddi2013a, TITLE = {{HIGGINS}: Knowledge Acquisition Meets the Crowds}, AUTHOR = {Kondreddi, Sarath Kumar and Triantafillou, Peter and Weikum, Gerhard}, LANGUAGE = {eng}, ISBN = {978-1-4503-2038-2}, URL = {http://dl.acm.org/citation.cfm?id=2487788.2487825}, LOCALID = {Local-ID: 6A913522403405EBC1257B3A003B5625-Kondreddi2013a}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {We present HIGGINS, a system for \em Knowledge Acquisition (KA)}, placing emphasis on its architecture. The distinguishing characteristic and novelty of HIGGINS lies in its blending of two engines: an automated {\em Information Extraction (IE)} engine, aided by {\em semantic resources} and {\em statistics}, and a game-based {\em Human Computing (HC) engine. We focus on KA from web pages and text sources and, in particular, on deriving relationships between entities. As a running application we utilize movie narratives, from which we wish to derive relationships among movie characters.}, BOOKTITLE = {WWW'13, 22nd International Conference on World Wide Web}, EDITOR = {Schwabe, Daniel and Almeida, Virgilio and Glaser, Hartmut and Baeza-Yates, Ricardo and Moon, Sue}, PAGES = {85--86}, ADDRESS = {Rio de Janeiro, Brazil}, }
Endnote
%0 Conference Proceedings %A Kondreddi, Sarath Kumar %A Triantafillou, Peter %A Weikum, Gerhard %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T HIGGINS: Knowledge Acquisition Meets the Crowds : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1B79-4 %F OTHER: Local-ID: 6A913522403405EBC1257B3A003B5625-Kondreddi2013a %U http://dl.acm.org/citation.cfm?id=2487788.2487825 %D 2013 %B 22nd International Conference on World Wide Web %Z date of event: 2013-05-13 - 2013-05-17 %C Rio de Janeiro, Brazil %X We present HIGGINS, a system for \em Knowledge Acquisition (KA)}, placing emphasis on its architecture. The distinguishing characteristic and novelty of HIGGINS lies in its blending of two engines: an automated {\em Information Extraction (IE)} engine, aided by {\em semantic resources} and {\em statistics}, and a game-based {\em Human Computing (HC) engine. We focus on KA from web pages and text sources and, in particular, on deriving relationships between entities. As a running application we utilize movie narratives, from which we wish to derive relationships among movie characters. %B WWW'13 %E Schwabe, Daniel; Almeida, Virgilio; Glaser, Hartmut; Baeza-Yates, Ricardo; Moon, Sue %P 85 - 86 %I ACM %@ 978-1-4503-2038-2
[220]
K.-N. Kontonasios, J. Vreeken, and T. De Bie, “Maximum Entropy Models for Iteratively Identifying Subjectively Interesting Structure in Real-valued Data,” in Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2013), Prague, Czech Republic, 2013.
Export
BibTeX
@inproceedings{Konto2013a, TITLE = {Maximum Entropy Models for Iteratively Identifying Subjectively Interesting Structure in Real-valued Data}, AUTHOR = {Kontonasios, Kleanthis-Nikolaos and Vreeken, Jilles and De Bie, Tijl}, LANGUAGE = {eng}, ISBN = {978-3-642-33485-6}, DOI = {10.1007/978-3-642-40991-2_17}, LOCALID = {Local-ID: ED5813E38D4C4066C1257C6000547D94-Konto2013a}, PUBLISHER = {Springer}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, BOOKTITLE = {Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2013)}, EDITOR = {Blockeel, Hendrik and Kersting, Kristian and Nijssen, Siegfried and {\v Z}elenzn{\'y}, Filip}, PAGES = {256--271}, SERIES = {Lecture Notes in Artificial Intelligence}, VOLUME = {8189}, ADDRESS = {Prague, Czech Republic}, }
Endnote
%0 Conference Proceedings %A Kontonasios, Kleanthis-Nikolaos %A Vreeken, Jilles %A De Bie, Tijl %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations %T Maximum Entropy Models for Iteratively Identifying Subjectively Interesting Structure in Real-valued Data : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1CCC-4 %F OTHER: Local-ID: ED5813E38D4C4066C1257C6000547D94-Konto2013a %R 10.1007/978-3-642-40991-2_17 %D 2013 %B European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases %Z date of event: 2013-09-23 - 2013-09-27 %C Prague, Czech Republic %B Machine Learning and Knowledge Discovery in Databases %E Blockeel, Hendrik; Kersting, Kristian; Nijssen, Siegfried; Želenzný, Filip %P 256 - 271 %I Springer %@ 978-3-642-33485-6 %B Lecture Notes in Artificial Intelligence %N 8189
[221]
F. Makari, B. Awerbuch, R. Gemula, R. Khandekar, J. Mestre, and M. Sozio, “A Distributed Algorithm for Large-scale Generalized Matching,” Proceedings of the VLDB Endowment (Proc. VLDB 2013), vol. 6, no. 9, 2013.
Export
BibTeX
@article{MakariAGKMS13, TITLE = {A Distributed Algorithm for Large-scale Generalized Matching}, AUTHOR = {Makari, Faraz and Awerbuch, Baruch and Gemula, Rainer and Khandekar, Rohit and Mestre, Juli{\'a}n and Sozio, Mauro}, LANGUAGE = {eng}, PUBLISHER = {ACM}, ADDRESS = {New York, NY}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, JOURNAL = {Proceedings of the VLDB Endowment (Proc. VLDB)}, VOLUME = {6}, NUMBER = {9}, PAGES = {613--624}, BOOKTITLE = {Proceedings of the 39th International Conference on Very Large Data Bases (VLDB 2013)}, EDITOR = {B{\"o}hlen, Michael and Koch, Christoph}, }
Endnote
%0 Journal Article %A Makari, Faraz %A Awerbuch, Baruch %A Gemula, Rainer %A Khandekar, Rohit %A Mestre, Julián %A Sozio, Mauro %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations Algorithms and Complexity, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T A Distributed Algorithm for Large-scale Generalized Matching : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-9CB4-1 %7 2013 %D 2013 %J Proceedings of the VLDB Endowment %O PVLDB %V 6 %N 9 %& 613 %P 613 - 624 %I ACM %C New York, NY %B Proceedings of the 39th International Conference on Very Large Data Bases %O August 26th - 30th 2013, Riva del Garda, Trento, Italy VLDB 2013 %U http://www.vldb.org/pvldb/vol6/p613-makarimanshadi.pdf
[222]
F. Makari and R. Gemulla, “A Distributed Approximation Algorithm for Mixed Packing-covering Linear Programs,” in Proceedings of the NIPS Workshop on Big Learning, Lake Tahoe, NV, USA, 2013.
Export
BibTeX
@inproceedings{MakariG13, TITLE = {A Distributed Approximation Algorithm for Mixed Packing-covering Linear Programs}, AUTHOR = {Makari, Faraz and Gemulla, Rainer}, LANGUAGE = {eng}, URL = {http://biglearn.org/2013/files/papers/biglearning2013_submission_14.pdf}, PUBLISHER = {NIPS}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, BOOKTITLE = {Proceedings of the NIPS Workshop on Big Learning}, ADDRESS = {Lake Tahoe, NV, USA}, }
Endnote
%0 Conference Proceedings %A Makari, Faraz %A Gemulla, Rainer %+ Databases and Information Systems, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T A Distributed Approximation Algorithm for Mixed Packing-covering Linear Programs : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-9CC6-A %U http://biglearn.org/2013/files/papers/biglearning2013_submission_14.pdf %D 2013 %B NIPS 2013 Workshop on Big Learning %Z date of event: 2013-12-09 - 2013-12-09 %C Lake Tahoe, NV, USA %B Proceedings of the NIPS Workshop on Big Learning %I NIPS
[223]
F. Makari, B. Awerbuch, R. Gemulla, R. Khandekar, J. Mestre, and M. Sozio, “A Distributed Algorithm for Large-scale Generalized Matching,” Max-Planck-Institut für Informatik, Saarbrücken, MPI-I-2013-5-002, 2013.
Abstract
Generalized matching problems arise in a number of applications, including computational advertising, recommender systems, and trade markets. Consider, for example, the problem of recommending multimedia items (e.g., DVDs) to users such that (1) users are recommended items that they are likely to be interested in, (2) every user gets neither too few nor too many recommendations, and (3) only items available in stock are recommended to users. State-of-the-art matching algorithms fail at coping with large real-world instances, which may involve millions of users and items. We propose the first distributed algorithm for computing near-optimal solutions to large-scale generalized matching problems like the one above. Our algorithm is designed to run on a small cluster of commodity nodes (or in a MapReduce environment), has strong approximation guarantees, and requires only a poly-logarithmic number of passes over the input. In particular, we propose a novel distributed algorithm to approximately solve mixed packing-covering linear programs, which include but are not limited to generalized matching problems. Experiments on real-world and synthetic data suggest that our algorithm scales to very large problem sizes and can be orders of magnitude faster than alternative approaches.
Export
BibTeX
@techreport{MakariAwerbuchGemullaKhandekarMestreSozio2013, TITLE = {A Distributed Algorithm for Large-scale Generalized Matching}, AUTHOR = {Makari, Faraz and Awerbuch, Baruch and Gemulla, Rainer and Khandekar, Rohit and Mestre, Julian and Sozio, Mauro}, LANGUAGE = {eng}, ISSN = {0946-011X}, NUMBER = {MPI-I-2013-5-002}, INSTITUTION = {Max-Planck-Institut f{\"u}r Informatik}, ADDRESS = {Saarbr{\"u}cken}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, ABSTRACT = {Generalized matching problems arise in a number of applications, including computational advertising, recommender systems, and trade markets. Consider, for example, the problem of recommending multimedia items (e.g., DVDs) to users such that (1) users are recommended items that they are likely to be interested in, (2) every user gets neither too few nor too many recommendations, and (3) only items available in stock are recommended to users. State-of-the-art matching algorithms fail at coping with large real-world instances, which may involve millions of users and items. We propose the first distributed algorithm for computing near-optimal solutions to large-scale generalized matching problems like the one above. Our algorithm is designed to run on a small cluster of commodity nodes (or in a MapReduce environment), has strong approximation guarantees, and requires only a poly-logarithmic number of passes over the input. In particular, we propose a novel distributed algorithm to approximately solve mixed packing-covering linear programs, which include but are not limited to generalized matching problems. Experiments on real-world and synthetic data suggest that our algorithm scales to very large problem sizes and can be orders of magnitude faster than alternative approaches.}, TYPE = {Research Reports}, }
Endnote
%0 Report %A Makari, Faraz %A Awerbuch, Baruch %A Gemulla, Rainer %A Khandekar, Rohit %A Mestre, Julian %A Sozio, Mauro %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society Algorithms and Complexity, MPI for Informatics, Max Planck Society Algorithms and Complexity, MPI for Informatics, Max Planck Society Databases and Information Systems, MPI for Informatics, Max Planck Society %T A Distributed Algorithm for Large-scale Generalized Matching : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0024-03B4-3 %Y Max-Planck-Institut für Informatik %C Saarbrücken %D 2013 %P 39 p. %X Generalized matching problems arise in a number of applications, including computational advertising, recommender systems, and trade markets. Consider, for example, the problem of recommending multimedia items (e.g., DVDs) to users such that (1) users are recommended items that they are likely to be interested in, (2) every user gets neither too few nor too many recommendations, and (3) only items available in stock are recommended to users. State-of-the-art matching algorithms fail at coping with large real-world instances, which may involve millions of users and items. We propose the first distributed algorithm for computing near-optimal solutions to large-scale generalized matching problems like the one above. Our algorithm is designed to run on a small cluster of commodity nodes (or in a MapReduce environment), has strong approximation guarantees, and requires only a poly-logarithmic number of passes over the input. In particular, we propose a novel distributed algorithm to approximately solve mixed packing-covering linear programs, which include but are not limited to generalized matching problems. Experiments on real-world and synthetic data suggest that our algorithm scales to very large problem sizes and can be orders of magnitude faster than alternative approaches. %B Research Reports %@ false
[224]
M. Mampaey and J. Vreeken, “Summarizing Categorical Data by Clustering Attributes,” Data Mining and Knowledge Discovery, vol. 26, no. 1, 2013.
Export
BibTeX
@article{Mampaey2013a, TITLE = {Summarizing Categorical Data by Clustering Attributes}, AUTHOR = {Mampaey, Michael and Vreeken, Jilles}, LANGUAGE = {eng}, ISSN = {1384-5810}, DOI = {10.1007/s10618-011-0246-6}, LOCALID = {Local-ID: 4366BFBB9FB411E9C1257C6000528295-Mampaey2013a}, PUBLISHER = {Springer}, ADDRESS = {Berlin}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, JOURNAL = {Data Mining and Knowledge Discovery}, VOLUME = {26}, NUMBER = {1}, PAGES = {130--173}, }
Endnote
%0 Journal Article %A Mampaey, Michael %A Vreeken, Jilles %+ External Organizations Databases and Information Systems, MPI for Informatics, Max Planck Society %T Summarizing Categorical Data by Clustering Attributes : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1CD3-1 %R 10.1007/s10618-011-0246-6 %F OTHER: Local-ID: 4366BFBB9FB411E9C1257C6000528295-Mampaey2013a %7 2013-01 %D 2013 %J Data Mining and Knowledge Discovery %V 26 %N 1 %& 130 %P 130 - 173 %I Springer %C Berlin %@ false
[225]
S. Metzger, R. Schenkel, and M. Sydow, “QBEES: Query by Entity Examples,” in CIKM’13, 22nd ACM International Conference on Information & Knowledge Management, San Francisco, CA, USA, 2013.
Abstract
Structured knowledge bases are an increasingly important way for storing and retrieving information. Within such knowledge bases, an important search task is finding similar entities based on one or more example entities. We present QBEES, a novel framework for defining entity similarity based only on structural features, so-called aspects, of the entities, that includes query-dependent and query-independent entity ranking components. We present evaluation results with a number of existing entity list completion benchmarks, comparing to several state-of-the-art baselines.
Export
BibTeX
@inproceedings{MetzgerSS_CIKM2013, TITLE = {{QBEES}: Query by Entity Examples}, AUTHOR = {Metzger, Steffen and Schenkel, Ralf and Sydow, Marcin}, LANGUAGE = {eng}, ISBN = {978-1-4503-2263-8}, DOI = {10.1145/2505515.2507873}, LOCALID = {Local-ID: D07B27BEBFE9E7D8C1257BB10024BD7C-MetzgerSS_CIKM2013}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {Structured knowledge bases are an increasingly important way for storing and retrieving information. Within such knowledge bases, an important search task is finding similar entities based on one or more example entities. We present QBEES, a novel framework for defining entity similarity based only on structural features, so-called aspects, of the entities, that includes query-dependent and query-independent entity ranking components. We present evaluation results with a number of existing entity list completion benchmarks, comparing to several state-of-the-art baselines.}, BOOKTITLE = {CIKM'13, 22nd ACM International Conference on Information \& Knowledge Management}, EDITOR = {Nejdl, Wolfgang and Pei, Jian and Rastogi, Rajeev}, PAGES = {1829--1832}, ADDRESS = {San Francisco, CA, USA}, }
Endnote
%0 Conference Proceedings %A Metzger, Steffen %A Schenkel, Ralf %A Sydow, Marcin %+ Databases and Information Systems, MPI for Informatics, Max Planck Society External Organizations External Organizations %T QBEES: Query by Entity Examples : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0015-1D0B-E %F OTHER: Local-ID: D07B27BEBFE9E7D8C1257BB10024BD7C-MetzgerSS_CIKM2013 %R 10.1145/2505515.2507873 %D 2013 %B 22nd ACM International Conference on Information & Knowledge Management %Z date of event: 2013-10-27 - 2013-11-01 %C San Francisco, CA, USA %X Structured knowledge bases are an increasingly important way for storing and retrieving information. Within such knowledge bases, an important search task is finding similar entities based on one or more example entities. We present QBEES, a novel framework for defining entity similarity based only on structural features, so-called aspects, of the entities, that includes query-dependent and query-independent entity ranking components. We present evaluation results with a number of existing entity list completion benchmarks, comparing to several state-of-the-art baselines. %B CIKM'13 %E Nejdl, Wolfgang; Pei, Jian; Rastogi, Rajeev %P 1829 - 1832 %I ACM %@ 978-1-4503-2263-8
[226]
P. Miettinen, “Fully Dynamic Quasi-Biclique Edge Covers via Boolean Matrix Factorizations,” in 1st ACM SIGMOD Workshop on Dynamic Networks Management and Mining (DyNetMM 2013), New York, NY, USA, 2013.
Abstract
An important way of summarizing a bipartite graph is to give a set of (quasi-) bicliques that contain (almost) all of its edges. These quasi-bicliques are somewhat similar to clustering of the nodes, giving sets of similar nodes. Unlike clustering, however, the quasi-bicliques are not required to partition the nodes, allowing greater flexibility when creating them. When we identify the bipartite graph with its bi-adjacency matrix, the problem of finding these quasi-bicliques turns into the problem of finding the Boolean matrix factorization of the bi-adjacency matrix -- a problem that has received increasing research interest in data mining in recent years. But many real-world graphs are dynamic and evolve over time. How can we update our bicliques without having to re-compute them from the scratch? An algorithm was recently proposed for this task (Miettinen, ICMD 2012). The algorithm, however, is only able to handle the case where the new 1s are added to the matrix~--~it cannot handle the removal of existing 1s. Furthermore, the algorithm cannot adjust the rank of the factorization. This paper extends said algorithm with the capability of working in fully dynamic setting (with both additions and deletions) and with capability of adjusting its rank dynamically, as well. The behaviour and performance of the algorithm is studied in experiments conducted with both real-world and synthetic data.
Export
BibTeX
@inproceedings{miettinen13fully, TITLE = {Fully Dynamic Quasi-Biclique Edge Covers via {Boolean} Matrix Factorizations}, AUTHOR = {Miettinen, Pauli}, LANGUAGE = {eng}, ISBN = {978-1-4503-2209-6}, DOI = {10.1145/2489247.2489250}, LOCALID = {Local-ID: CA06050F2A3AFCF0C1257C6A005F39ED-miettinen13fully}, PUBLISHER = {ACM}, YEAR = {2013}, MARGINALMARK = {$\bullet$}, DATE = {2013}, ABSTRACT = {An important way of summarizing a bipartite graph is to give a set of (quasi-) bicliques that contain (almost) all of its edges. These quasi-bicliques are somewhat similar to clustering of the nodes, giving sets of similar nodes. Unlike clustering, however, the quasi-bicliques are not required to partition the nodes, allowing greater flexibility when creating them. When we identify the bipartite graph with its bi-adjacency matrix, the problem of finding these quasi-bicliques turns into the problem of finding the Boolean matrix factorization of the bi-adjacency matrix -- a problem that has received increasing research interest in data mining in recent years. But many real-world graphs are dynamic and evolve over time. How can we update our bicliques without having to re-compute them from the scratch? An algorithm was recently proposed for this task (Miettinen, ICMD 2012). The algorithm, however, is only able to handle the case where the new 1s are added to the matrix~--~it cannot handle the removal of existing 1s. Furthermore, the algorithm cannot adjust the rank of the factorization. This paper extends said algorithm with the capability of working in fully dynamic setting (with both additions and deletions) and with capability of adjusting its rank dynamically, as well. The behaviour and performance of the algorithm is studied in experiments conducted with both real-world and synthetic data.}, BOOKTITLE = {1st ACM SIGMOD Workshop on Dynamic Networks Management and Mining (DyNetMM 2013)}, PAGES = {17--24}, ADDRESS = {New York, NY, USA}, }
Endnote
%0 Conference Proceedings %A Miettinen, Pauli %+ Databases and Information Systems, MPI for Informatics, Max Planck Society %T Fully Dynamic Quasi-Biclique Edge Covers via Boolean Matrix Factorizations : %G eng %U http://hdl.handle.net/11858/00-001M-0000-0018-EF87-D %F OTHER: Local-ID: CA06050F2A3AFCF0C1257C6A005F39ED-miettinen13fully %R 10.1145/2489247.2489250 %D 2013 %B 1st ACM SIGMOD Workshop on Dynamic Networks Management and Mining %Z date of event: 2013-06-23 - 2013-06-23 %C New York, NY, USA %X An important way of summarizing a bipartite graph is to give a set of (quasi-) bicliques that contain (almost) all of its edges. These quasi-bicliques are somewhat similar to clustering of the nodes, giving sets of similar nodes. Unlike clustering, however, the quasi-bicliques are not required to partition the nodes, allowing greater flexibility when creating them. When we identify the bipartite graph with its bi-adjacency matrix, the problem of finding these quasi-bicliques turns into the problem of finding the Boolean matrix factorization of the bi-adjacency matrix -- a problem that has received increasing research interest in data mining in recent years. But many real-world graphs are dynamic and evolve over time. How can we update our bicliques without having to re-compute them from the scratch? An algorithm was recently proposed for this task (Miettinen, ICMD 2012). The algorithm, however, is only able to handle the case where the new 1s are added to the matrix~--~it cannot handle the removal of existing 1s. Furthermore, the algorithm cannot adjust the rank of the factorization. This paper extends said algorithm with the capability of working in fully dynamic setting (with both additions and deletions) and with capability of adjusting its rank dynamically, as well. The behaviour and performance of the algorithm is studied in experiments conducted with both real-world and synthetic data. %B 1st ACM SIGMOD Workshop on Dynamic Networks Management and Mining %P 17 - 24 %I ACM %@ 978-1-4503-2209-6
[227]
D. Milchevski, “Entity Recommendation Based on Wikipedia,” Universität des Saarlandes, Saarbrücken, 2013.