@article{culbertson_bayesian_2013,
title = {Bayesian machine learning via category theory},
url = {http://arxiv.org/abs/1312.1445},
abstract = {From the Bayesian perspective, the category of conditional probabilities (a variant of the Kleisli category of the Giry monad, whose objects are measurable spaces and arrows are Markov kernels) gives a nice framework for conceptualization and analysis of many aspects of machine learning. Using categorical methods, we construct models for parametric and nonparametric Bayesian reasoning on function spaces, thus providing a basis for the supervised learning problem. In particular, stochastic processes are arrows to these function spaces which serve as prior probabilities. The resulting inference maps can often be analytically constructed in this symmetric monoidal weakly closed category. We also show how to view general stochastic processes using functor categories and demonstrate the Kalman filter as an archetype for the hidden Markov model.},
urldate = {2019-11-22},
journal = {arXiv:1312.1445 [math]},
author = {Culbertson, Jared and Sturtz, Kirk},
month = dec,
year = {2013},
note = {ZSCC: 0000006
arXiv: 1312.1445},
keywords = {Bayesianism, Categorical ML, Categorical probability theory, Purely theoretical}
}
@article{ghahramani_probabilistic_2015,
title = {Probabilistic machine learning and artificial intelligence},
volume = {521},
issn = {0028-0836, 1476-4687},
url = {http://www.nature.com/articles/nature14541},
doi = {10/gdxwhq},
language = {en},
number = {7553},
urldate = {2019-11-28},
journal = {Nature},
author = {Ghahramani, Zoubin},
month = may,
year = {2015},
note = {ZSCC: 0000611},
keywords = {Bayesian inference, Classical ML, Machine learning, Probabilistic programming},
pages = {452--459}
}
@article{heckerman_tutorial_1995,
title = {A {Tutorial} on {Learning} {With} {Bayesian} {Networks}},
url = {https://www.microsoft.com/en-us/research/publication/a-tutorial-on-learning-with-bayesian-networks/},
abstract = {A Bayesian network is a graphical model that encodes probabilistic relationships among variables of interest. When used in conjunction with statistical techniques, the graphical model has several advantages for data analysis. One, because the model encodes dependencies among all variables, it readily handles situations where some data entries are missing. Two, a Bayesian network can …},
language = {en-US},
urldate = {2019-11-22},
author = {Heckerman, David},
month = mar,
year = {1995},
note = {ZSCC: 0000058},
keywords = {Bayesianism, Classical ML, Machine learning}
}
@article{jacobs_categorical_2018,
title = {Categorical {Aspects} of {Parameter} {Learning}},
url = {http://arxiv.org/abs/1810.05814},
abstract = {Parameter learning is the technique for obtaining the probabilistic parameters in conditional probability tables in Bayesian networks from tables with (observed) data --- where it is assumed that the underlying graphical structure is known. There are basically two ways of doing so, referred to as maximal likelihood estimation (MLE) and as Bayesian learning. This paper provides a categorical analysis of these two techniques and describes them in terms of basic properties of the multiset monad M, the distribution monad D and the Giry monad G. In essence, learning is about the reltionships between multisets (used for counting) on the one hand and probability distributions on the other. These relationsips will be described as suitable natural transformations.},
urldate = {2019-11-21},
journal = {arXiv:1810.05814 [cs]},
author = {Jacobs, Bart},
month = oct,
year = {2018},
note = {ZSCC: 0000001
arXiv: 1810.05814},
keywords = {Bayesianism, Categorical ML, Categorical probability theory, Machine learning}
}
@article{jacobs_predicate/state_2016,
series = {The {Thirty}-second {Conference} on the {Mathematical} {Foundations} of {Programming} {Semantics} ({MFPS} {XXXII})},
title = {A {Predicate}/{State} {Transformer} {Semantics} for {Bayesian} {Learning}},
volume = {325},
issn = {1571-0661},
url = {http://www.sciencedirect.com/science/article/pii/S1571066116300883},
doi = {10/ggdgbb},
abstract = {This paper establishes a link between Bayesian inference (learning) and predicate and state transformer operations from programming semantics and logic. Specifically, a very general definition of backward inference is given via first applying a predicate transformer and then conditioning. Analogously, forward inference involves first conditioning and then applying a state transformer. These definitions are illustrated in many examples in discrete and continuous probability theory and also in quantum theory.},
language = {en},
urldate = {2019-11-24},
journal = {Electronic Notes in Theoretical Computer Science},
author = {Jacobs, Bart and Zanasi, Fabio},
month = oct,
year = {2016},
note = {ZSCC: 0000030},
keywords = {Bayesianism, Categorical ML, Categorical probability theory, Effectus theory, Programming language theory, Semantics},
pages = {185--200}
}
@article{mccullagh_what_2002,
title = {What is a statistical model?},
volume = {30},
url = {http://projecteuclid.org/euclid.aos/1035844977},
doi = {10/bkts3m},
language = {en},
number = {5},
urldate = {2019-11-22},
journal = {The Annals of Statistics},
author = {McCullagh, Peter},
month = oct,
year = {2002},
note = {ZSCC: 0000230},
keywords = {Bayesianism, Categorical ML, Categorical probability theory, Compendium, Purely theoretical, Statistical learning theory},
pages = {1225--1310}
}
@article{tran_deep_2017,
title = {Deep {Probabilistic} {Programming}},
url = {http://arxiv.org/abs/1701.03757},
abstract = {We propose Edward, a Turing-complete probabilistic programming language. Edward defines two compositional representations---random variables and inference. By treating inference as a first class citizen, on a par with modeling, we show that probabilistic programming can be as flexible and computationally efficient as traditional deep learning. For flexibility, Edward makes it easy to fit the same model using a variety of composable inference methods, ranging from point estimation to variational inference to MCMC. In addition, Edward can reuse the modeling representation as part of inference, facilitating the design of rich variational models and generative adversarial networks. For efficiency, Edward is integrated into TensorFlow, providing significant speedups over existing probabilistic systems. For example, we show on a benchmark logistic regression task that Edward is at least 35x faster than Stan and 6x faster than PyMC3. Further, Edward incurs no runtime overhead: it is as fast as handwritten TensorFlow.},
urldate = {2019-11-27},
journal = {arXiv:1701.03757 [cs, stat]},
author = {Tran, Dustin and Hoffman, Matthew D. and Saurous, Rif A. and Brevdo, Eugene and Murphy, Kevin and Blei, David M.},
month = mar,
year = {2017},
note = {ZSCC: 0000108
arXiv: 1701.03757},
keywords = {Bayesian inference, Implementation, Machine learning, Probabilistic programming}
}
@misc{watanabe_algebraic_2009,
title = {Algebraic {Geometry} and {Statistical} {Learning} {Theory}},
url = {/core/books/algebraic-geometry-and-statistical-learning-theory/9C8FD1BDC817E2FC79117C7F41544A3A},
abstract = {Cambridge Core - Pattern Recognition and Machine Learning - Algebraic Geometry and Statistical Learning Theory - by Sumio Watanabe},
language = {en},
urldate = {2019-11-22},
journal = {Cambridge Core},
author = {Watanabe, Sumio},
month = aug,
year = {2009},
doi = {10.1017/CBO9780511800474},
note = {ZSCC: 0000276 },
keywords = {Algebra, Bayesianism, Purely theoretical, Statistical learning theory}
}
@incollection{wermuth_graphical_2001,
address = {Oxford},
title = {Graphical {Models}: {Overview}},
isbn = {978-0-08-043076-8},
shorttitle = {Graphical {Models}},
url = {http://www.sciencedirect.com/science/article/pii/B008043076700440X},
abstract = {Graphical Markov models provide a method of representing possibly complicated multivariate dependencies in such a way that the general qualitative features can be understood, that statistical independencies are highlighted, and that some properties can be derived directly. Variables are represented by the nodes of a graph. Pairs of nodes may be joined by an edge. Edges are directed if one variable is a response to the other variable considered as explanatory, but are undirected if the variables are on an equal footing. Absence of an edge typically implies statistical independence, conditional, or marginal depending on the kind of graph. The need for a number of types of graph arises because it is helpful to represent a number of different kinds of dependence structures. Of special importance are chain graphs in which variables are arranged in a sequence or chain of blocks, the variables in any one block being on an equal footing, some being possibly joint responses to variables in the past and some being jointly explanatory to variables in the future of the block considered. Some main properties of such systems are outlined, and recent research results are sketched. Suggestions for further reading are given. As an illustrative example, some analysis of data on the treatment of chronic pain is presented.},
language = {en},
urldate = {2019-11-22},
booktitle = {International {Encyclopedia} of the {Social} \& {Behavioral} {Sciences}},
publisher = {Pergamon},
author = {Wermuth, N. and Cox, D. R.},
editor = {Smelser, Neil J. and Baltes, Paul B.},
month = jan,
year = {2001},
doi = {10.1016/B0-08-043076-7/00440-X},
note = {ZSCC: NoCitationData[s0] },
keywords = {Bayesianism, Classical ML, Machine learning},
pages = {6379--6386}
}
@book{winn_model-based_2019,
title = {Model-{Based} {Machine} {Learning}},
isbn = {978-1-4987-5681-5},
abstract = {This book is unusual for a machine learning text book in that the authors do not review dozens of different algorithms. Instead they introduce all of the key ideas through a series of case studies involving real-world applications. Case studies play a central role because it is only in the context of applications that it makes sense to discuss modelling assumptions. Each chapter therefore introduces one case study which is drawn from a real-world application that has been solved using a model-based approach.},
language = {en},
publisher = {Taylor \& Francis Incorporated},
author = {Winn, John Michael},
month = jun,
year = {2019},
note = {ZSCC: NoCitationData[s1]
Google-Books-ID: 84KRtgEACAAJ},
keywords = {Bayesian inference, Classical ML, Implementation}
}