@article{culbertson_bayesian_2013,
title = {Bayesian machine learning via category theory},
url = {http://arxiv.org/abs/1312.1445},
abstract = {From the Bayesian perspective, the category of conditional probabilities (a variant of the Kleisli category of the Giry monad, whose objects are measurable spaces and arrows are Markov kernels) gives a nice framework for conceptualization and analysis of many aspects of machine learning. Using categorical methods, we construct models for parametric and nonparametric Bayesian reasoning on function spaces, thus providing a basis for the supervised learning problem. In particular, stochastic processes are arrows to these function spaces which serve as prior probabilities. The resulting inference maps can often be analytically constructed in this symmetric monoidal weakly closed category. We also show how to view general stochastic processes using functor categories and demonstrate the Kalman filter as an archetype for the hidden Markov model.},
urldate = {2019-11-22},
journal = {arXiv:1312.1445 [math]},
author = {Culbertson, Jared and Sturtz, Kirk},
month = dec,
year = {2013},
note = {ZSCC: 0000006
arXiv: 1312.1445},
keywords = {Bayesianism, Categorical ML, Categorical probability theory, Purely theoretical}
}
@article{fong_backprop_2019,
title = {Backprop as {Functor}: {A} compositional perspective on supervised learning},
shorttitle = {Backprop as {Functor}},
url = {http://arxiv.org/abs/1711.10455},
abstract = {A supervised learning algorithm searches over a set of functions \$A {\textbackslash}to B\$ parametrised by a space \$P\$ to find the best approximation to some ideal function \$f{\textbackslash}colon A {\textbackslash}to B\$. It does this by taking examples \$(a,f(a)) {\textbackslash}in A{\textbackslash}times B\$, and updating the parameter according to some rule. We define a category where these update rules may be composed, and show that gradient descent---with respect to a fixed step size and an error function satisfying a certain property---defines a monoidal functor from a category of parametrised functions to this category of update rules. This provides a structural perspective on backpropagation, as well as a broad generalisation of neural networks.},
urldate = {2019-11-23},
journal = {arXiv:1711.10455 [cs, math]},
author = {Fong, Brendan and Spivak, David I. and Tuyéras, Rémy},
month = may,
year = {2019},
note = {ZSCC: 0000015
arXiv: 1711.10455},
keywords = {Categorical ML, Machine learning, Purely theoretical}
}
@article{mccullagh_what_2002,
title = {What is a statistical model?},
volume = {30},
url = {http://projecteuclid.org/euclid.aos/1035844977},
doi = {10/bkts3m},
language = {en},
number = {5},
urldate = {2019-11-22},
journal = {The Annals of Statistics},
author = {McCullagh, Peter},
month = oct,
year = {2002},
note = {ZSCC: 0000230},
keywords = {Bayesianism, Categorical ML, Categorical probability theory, Compendium, Purely theoretical, Statistical learning theory},
pages = {1225--1310}
}