@article{culbertson_bayesian_2013,
title = {Bayesian machine learning via category theory},
url = {http://arxiv.org/abs/1312.1445},
abstract = {From the Bayesian perspective, the category of conditional probabilities (a variant of the Kleisli category of the Giry monad, whose objects are measurable spaces and arrows are Markov kernels) gives a nice framework for conceptualization and analysis of many aspects of machine learning. Using categorical methods, we construct models for parametric and nonparametric Bayesian reasoning on function spaces, thus providing a basis for the supervised learning problem. In particular, stochastic processes are arrows to these function spaces which serve as prior probabilities. The resulting inference maps can often be analytically constructed in this symmetric monoidal weakly closed category. We also show how to view general stochastic processes using functor categories and demonstrate the Kalman filter as an archetype for the hidden Markov model.},
urldate = {2019-11-22},
journal = {arXiv:1312.1445 [math]},
author = {Culbertson, Jared and Sturtz, Kirk},
month = dec,
year = {2013},
note = {ZSCC: 0000006
arXiv: 1312.1445},
keywords = {Bayesianism, Categorical ML, Categorical probability theory, Purely theoretical}
}
@article{fong_backprop_2019,
title = {Backprop as {Functor}: {A} compositional perspective on supervised learning},
shorttitle = {Backprop as {Functor}},
url = {http://arxiv.org/abs/1711.10455},
abstract = {A supervised learning algorithm searches over a set of functions \$A {\textbackslash}to B\$ parametrised by a space \$P\$ to find the best approximation to some ideal function \$f{\textbackslash}colon A {\textbackslash}to B\$. It does this by taking examples \$(a,f(a)) {\textbackslash}in A{\textbackslash}times B\$, and updating the parameter according to some rule. We define a category where these update rules may be composed, and show that gradient descent---with respect to a fixed step size and an error function satisfying a certain property---defines a monoidal functor from a category of parametrised functions to this category of update rules. This provides a structural perspective on backpropagation, as well as a broad generalisation of neural networks.},
urldate = {2019-11-23},
journal = {arXiv:1711.10455 [cs, math]},
author = {Fong, Brendan and Spivak, David I. and Tuyéras, Rémy},
month = may,
year = {2019},
note = {ZSCC: 0000015
arXiv: 1711.10455},
keywords = {Categorical ML, Machine learning, Purely theoretical}
}
@article{harris_characterizing_2019,
title = {Characterizing the invariances of learning algorithms using category theory},
url = {http://arxiv.org/abs/1905.02072},
abstract = {Many learning algorithms have invariances: when their training data is transformed in certain ways, the function they learn transforms in a predictable manner. Here we formalize this notion using concepts from the mathematical field of category theory. The invariances that a supervised learning algorithm possesses are formalized by categories of predictor and target spaces, whose morphisms represent the algorithm's invariances, and an index category whose morphisms represent permutations of the training examples. An invariant learning algorithm is a natural transformation between two functors from the product of these categories to the category of sets, representing training datasets and learned functions respectively. We illustrate the framework by characterizing and contrasting the invariances of linear regression and ridge regression.},
urldate = {2019-10-10},
journal = {arXiv:1905.02072 [cs, math, stat]},
author = {Harris, Kenneth D.},
month = may,
year = {2019},
note = {arXiv: 1905.02072}
}
@inproceedings{healy_neural_2004,
title = {Neural {Networks}, {Knowledge} and {Cognition}: {A} {Mathematical} {Semantic} {Model} {Based} upon {Category} {Theory}},
shorttitle = {Neural {Networks}, {Knowledge} and {Cognition}},
abstract = {Category theory can be applied to mathematically model the semantics of cognitive neural systems. We discuss semantics as a hierarchy of concepts, or symbolic descriptions of items sensed and represented in the connection weights distributed throughout a neural network. The hierarchy expresses subconcept relationships, and in a neural network it becomes represented incrementally through a Hebbian-like learning process. The categorical semantic model described here explains the learning process as the derivation of colimits and limits in a concept category. It explains the representation of the concept hierarchy in a neural network at each stage of learning as a system of functors and natural transformations, expressing knowledge coherence across the regions of a multi-regional network equipped with multiple sensors. The model yields design principles that constrain neural network designs capable of the most important aspects of cognitive behavior.},
author = {Healy, Michael J. and Caudell, Thomas P.},
year = {2004}
}
@inproceedings{izbicki_algebraic_2013,
title = {Algebraic classifiers: a generic approach to fast cross-validation, online training, and parallel training},
shorttitle = {Algebraic classifiers},
abstract = {We use abstract algebra to derive new algorithms for fast cross-validation, online learning, and parallel learning. To use these algorithms on a classification model, we must show that the model has appropriate algebraic structure. It is easy to give algebraic structure to some models, and we do this explicitly for Bayesian classifiers and a novel variation of decision stumps called HomStumps. But not all classifiers have an obvious structure, so we introduce the Free HomTrainer. This can be used to give a "generic" algebraic structure to any classifier. We use the Free HomTrainer to give algebraic structure to bagging and boosting. In so doing, we derive novel online and parallel algorithms, and present the first fast cross-validation schemes for these classifiers.},
booktitle = {{ICML}},
author = {Izbicki, Michael},
year = {2013},
note = {ZSCC: 0000013},
keywords = {Algebra, Categorical ML, Machine learning}
}
@article{jacobs_categorical_2018,
title = {Categorical {Aspects} of {Parameter} {Learning}},
url = {http://arxiv.org/abs/1810.05814},
abstract = {Parameter learning is the technique for obtaining the probabilistic parameters in conditional probability tables in Bayesian networks from tables with (observed) data --- where it is assumed that the underlying graphical structure is known. There are basically two ways of doing so, referred to as maximal likelihood estimation (MLE) and as Bayesian learning. This paper provides a categorical analysis of these two techniques and describes them in terms of basic properties of the multiset monad M, the distribution monad D and the Giry monad G. In essence, learning is about the reltionships between multisets (used for counting) on the one hand and probability distributions on the other. These relationsips will be described as suitable natural transformations.},
urldate = {2019-11-21},
journal = {arXiv:1810.05814 [cs]},
author = {Jacobs, Bart},
month = oct,
year = {2018},
note = {ZSCC: 0000001
arXiv: 1810.05814},
keywords = {Bayesianism, Categorical ML, Categorical probability theory, Machine learning}
}
@article{jacobs_neural_2018,
title = {Neural {Nets} via {Forward} {State} {Transformation} and {Backward} {Loss} {Transformation}},
url = {http://arxiv.org/abs/1803.09356},
abstract = {This article studies (multilayer perceptron) neural networks with an emphasis on the transformations involved --- both forward and backward --- in order to develop a semantical/logical perspective that is in line with standard program semantics. The common two-pass neural network training algorithms make this viewpoint particularly fitting. In the forward direction, neural networks act as state transformers. In the reverse direction, however, neural networks change losses of outputs to losses of inputs, thereby acting like a (real-valued) predicate transformer. In this way, backpropagation is functorial by construction, as shown earlier in recent other work. We illustrate this perspective by training a simple instance of a neural network.},
urldate = {2019-11-21},
journal = {arXiv:1803.09356 [cs]},
author = {Jacobs, Bart and Sprunger, David},
month = mar,
year = {2018},
note = {ZSCC: 0000001
arXiv: 1803.09356},
keywords = {Categorical ML, Effectus theory, Machine learning}
}
@article{jacobs_predicate/state_2016,
series = {The {Thirty}-second {Conference} on the {Mathematical} {Foundations} of {Programming} {Semantics} ({MFPS} {XXXII})},
title = {A {Predicate}/{State} {Transformer} {Semantics} for {Bayesian} {Learning}},
volume = {325},
issn = {1571-0661},
url = {http://www.sciencedirect.com/science/article/pii/S1571066116300883},
doi = {10/ggdgbb},
abstract = {This paper establishes a link between Bayesian inference (learning) and predicate and state transformer operations from programming semantics and logic. Specifically, a very general definition of backward inference is given via first applying a predicate transformer and then conditioning. Analogously, forward inference involves first conditioning and then applying a state transformer. These definitions are illustrated in many examples in discrete and continuous probability theory and also in quantum theory.},
language = {en},
urldate = {2019-11-24},
journal = {Electronic Notes in Theoretical Computer Science},
author = {Jacobs, Bart and Zanasi, Fabio},
month = oct,
year = {2016},
note = {ZSCC: 0000030},
keywords = {Bayesianism, Categorical ML, Categorical probability theory, Effectus theory, Programming language theory, Semantics},
pages = {185--200}
}
@article{mccullagh_what_2002,
title = {What is a statistical model?},
volume = {30},
url = {http://projecteuclid.org/euclid.aos/1035844977},
doi = {10/bkts3m},
language = {en},
number = {5},
urldate = {2019-11-22},
journal = {The Annals of Statistics},
author = {McCullagh, Peter},
month = oct,
year = {2002},
note = {ZSCC: 0000230},
keywords = {Bayesianism, Categorical ML, Categorical probability theory, Compendium, Purely theoretical, Statistical learning theory},
pages = {1225--1310}
}
@misc{murfet_dmurfet/deeplinearlogic_2018,
title = {dmurfet/deeplinearlogic},
url = {https://github.com/dmurfet/deeplinearlogic},
abstract = {Deep learning and linear logic. Contribute to dmurfet/deeplinearlogic development by creating an account on GitHub.},
urldate = {2019-11-22},
author = {Murfet, Daniel},
month = jul,
year = {2018},
note = {ZSCC: NoCitationData[s0]
original-date: 2016-11-05T09:17:10Z},
keywords = {Categorical ML, Implementation, Linear logic, Machine learning, Semantics}
}
@misc{murfet_dmurfet/polysemantics_2018,
title = {dmurfet/polysemantics},
url = {https://github.com/dmurfet/polysemantics},
abstract = {Polynomial semantics of linear logic. Contribute to dmurfet/polysemantics development by creating an account on GitHub.},
urldate = {2019-11-22},
author = {Murfet, Daniel},
month = apr,
year = {2018},
note = {ZSCC: NoCitationData[s0]
original-date: 2016-02-23T03:29:42Z},
keywords = {Categorical ML, Implementation, Linear logic, Machine learning, Semantics}
}
@article{murfet_derivatives_2019,
title = {Derivatives of {Turing} machines in {Linear} {Logic}},
url = {http://arxiv.org/abs/1805.11813},
abstract = {We calculate denotations under the Sweedler semantics of the Ehrhard-Regnier derivatives of various encodings of Turing machines into linear logic. We show that these derivatives calculate the rate of change of probabilities naturally arising in the Sweedler semantics of linear logic proofs. The resulting theory is applied to the problem of synthesising Turing machines by gradient descent.},
urldate = {2019-11-21},
journal = {arXiv:1805.11813 [math]},
author = {Murfet, Daniel and Clift, James},
month = jan,
year = {2019},
note = {ZSCC: NoCitationData[s0]
arXiv: 1805.11813},
keywords = {Abstract machines, Categorical ML, Differentiation, Linear logic, Machine learning}
}
@misc{murfet_linear_nodate,
title = {Linear logic and deep learning},
language = {en},
author = {Murfet, Daniel and Hu, Huiyi},
note = {ZSCC: NoCitationData[s0]},
keywords = {Categorical ML, Linear logic, Machine learning, Semantics}
}
@inproceedings{sprunger_differentiable_2019,
address = {Vancouver, BC, Canada},
title = {Differentiable {Causal} {Computations} via {Delayed} {Trace}},
isbn = {978-1-72813-608-0},
url = {https://ieeexplore.ieee.org/document/8785670/},
doi = {10/ggdf98},
abstract = {We investigate causal computations taking sequences of inputs to sequences of outputs where the nth output depends on the ﬁrst n inputs only. We model these in category theory via a construction taking a Cartesian category C to another category St(C) with a novel trace-like operation called “delayed trace”, which misses yanking and dinaturality axioms of the usual trace. The delayed trace operation provides a feedback mechanism in St(C) with an implicit guardedness guarantee.},
language = {en},
urldate = {2019-11-23},
booktitle = {2019 34th {Annual} {ACM}/{IEEE} {Symposium} on {Logic} in {Computer} {Science} ({LICS})},
publisher = {IEEE},
author = {Sprunger, David and Katsumata, Shin-ya},
month = jun,
year = {2019},
note = {ZSCC: 0000002},
keywords = {Categorical ML, Differentiation},
pages = {1--12}
}