@article{culbertson_bayesian_2013,
title = {Bayesian machine learning via category theory},
url = {http://arxiv.org/abs/1312.1445},
abstract = {From the Bayesian perspective, the category of conditional probabilities (a variant of the Kleisli category of the Giry monad, whose objects are measurable spaces and arrows are Markov kernels) gives a nice framework for conceptualization and analysis of many aspects of machine learning. Using categorical methods, we construct models for parametric and nonparametric Bayesian reasoning on function spaces, thus providing a basis for the supervised learning problem. In particular, stochastic processes are arrows to these function spaces which serve as prior probabilities. The resulting inference maps can often be analytically constructed in this symmetric monoidal weakly closed category. We also show how to view general stochastic processes using functor categories and demonstrate the Kalman filter as an archetype for the hidden Markov model.},
urldate = {2019-11-22},
journal = {arXiv:1312.1445 [math]},
author = {Culbertson, Jared and Sturtz, Kirk},
month = dec,
year = {2013},
note = {ZSCC: 0000006
arXiv: 1312.1445},
keywords = {Bayesianism, Categorical ML, Categorical probability theory, Purely theoretical}
}
@article{jacobs_categorical_2018,
title = {Categorical {Aspects} of {Parameter} {Learning}},
url = {http://arxiv.org/abs/1810.05814},
abstract = {Parameter learning is the technique for obtaining the probabilistic parameters in conditional probability tables in Bayesian networks from tables with (observed) data --- where it is assumed that the underlying graphical structure is known. There are basically two ways of doing so, referred to as maximal likelihood estimation (MLE) and as Bayesian learning. This paper provides a categorical analysis of these two techniques and describes them in terms of basic properties of the multiset monad M, the distribution monad D and the Giry monad G. In essence, learning is about the reltionships between multisets (used for counting) on the one hand and probability distributions on the other. These relationsips will be described as suitable natural transformations.},
urldate = {2019-11-21},
journal = {arXiv:1810.05814 [cs]},
author = {Jacobs, Bart},
month = oct,
year = {2018},
note = {ZSCC: 0000001
arXiv: 1810.05814},
keywords = {Bayesianism, Categorical ML, Categorical probability theory, Machine learning}
}
@article{jacobs_predicate/state_2016,
series = {The {Thirty}-second {Conference} on the {Mathematical} {Foundations} of {Programming} {Semantics} ({MFPS} {XXXII})},
title = {A {Predicate}/{State} {Transformer} {Semantics} for {Bayesian} {Learning}},
volume = {325},
issn = {1571-0661},
url = {http://www.sciencedirect.com/science/article/pii/S1571066116300883},
doi = {10/ggdgbb},
abstract = {This paper establishes a link between Bayesian inference (learning) and predicate and state transformer operations from programming semantics and logic. Specifically, a very general definition of backward inference is given via first applying a predicate transformer and then conditioning. Analogously, forward inference involves first conditioning and then applying a state transformer. These definitions are illustrated in many examples in discrete and continuous probability theory and also in quantum theory.},
language = {en},
urldate = {2019-11-24},
journal = {Electronic Notes in Theoretical Computer Science},
author = {Jacobs, Bart and Zanasi, Fabio},
month = oct,
year = {2016},
note = {ZSCC: 0000030},
keywords = {Bayesianism, Categorical ML, Categorical probability theory, Effectus theory, Programming language theory, Semantics},
pages = {185--200}
}
@article{mccullagh_what_2002,
title = {What is a statistical model?},
volume = {30},
url = {http://projecteuclid.org/euclid.aos/1035844977},
doi = {10/bkts3m},
language = {en},
number = {5},
urldate = {2019-11-22},
journal = {The Annals of Statistics},
author = {McCullagh, Peter},
month = oct,
year = {2002},
note = {ZSCC: 0000230},
keywords = {Bayesianism, Categorical ML, Categorical probability theory, Compendium, Purely theoretical, Statistical learning theory},
pages = {1225--1310}
}