@article{hamrick_analogues_2019,
series = {{SI}: 29: {Artificial} {Intelligence} (2019)},
title = {Analogues of mental simulation and imagination in deep learning},
volume = {29},
issn = {2352-1546},
url = {http://www.sciencedirect.com/science/article/pii/S2352154618301670},
doi = {10.1016/j.cobeha.2018.12.011},
abstract = {Mental simulation—the capacity to imagine what will or what could be—is a salient feature of human cognition, playing a key role in a wide range of cognitive abilities. In artificial intelligence, the last few years have seen the development of methods which are analogous to mental models and mental simulation. This paper outlines recent methods in deep learning for constructing such models from data and learning to use them via reinforcement learning, and compares such approaches to human mental simulation. Model-based methods in deep learning can serve as powerful tools for building and scaling cognitive models. However, a number of challenges remain in matching the capacity of human mental simulation for efficiency, compositionality, generalization, and creativity.},
urldate = {2019-10-10},
journal = {Current Opinion in Behavioral Sciences},
author = {Hamrick, Jessica B},
month = oct,
year = {2019},
pages = {8--16}
}
@book{winn_model-based_2019,
title = {Model-{Based} {Machine} {Learning}},
isbn = {978-1-4987-5681-5},
abstract = {This book is unusual for a machine learning text book in that the authors do not review dozens of different algorithms. Instead they introduce all of the key ideas through a series of case studies involving real-world applications. Case studies play a central role because it is only in the context of applications that it makes sense to discuss modelling assumptions. Each chapter therefore introduces one case study which is drawn from a real-world application that has been solved using a model-based approach.},
language = {en},
publisher = {Taylor \& Francis Incorporated},
author = {Winn, John Michael},
month = jun,
year = {2019},
note = {ZSCC: NoCitationData[s1]
Google-Books-ID: 84KRtgEACAAJ},
keywords = {Bayesian inference, Classical ML, Implementation}
}
@article{battaglia_relational_2018,
title = {Relational inductive biases, deep learning, and graph networks},
url = {http://arxiv.org/abs/1806.01261},
abstract = {Artificial intelligence (AI) has undergone a renaissance recently, making major progress in key domains such as vision, language, control, and decision-making. This has been due, in part, to cheap data and cheap compute resources, which have fit the natural strengths of deep learning. However, many defining characteristics of human intelligence, which developed under much different pressures, remain out of reach for current approaches. In particular, generalizing beyond one's experiences--a hallmark of human intelligence from infancy--remains a formidable challenge for modern AI. The following is part position paper, part review, and part unification. We argue that combinatorial generalization must be a top priority for AI to achieve human-like abilities, and that structured representations and computations are key to realizing this objective. Just as biology uses nature and nurture cooperatively, we reject the false choice between "hand-engineering" and "end-to-end" learning, and instead advocate for an approach which benefits from their complementary strengths. We explore how using relational inductive biases within deep learning architectures can facilitate learning about entities, relations, and rules for composing them. We present a new building block for the AI toolkit with a strong relational inductive bias--the graph network--which generalizes and extends various approaches for neural networks that operate on graphs, and provides a straightforward interface for manipulating structured knowledge and producing structured behaviors. We discuss how graph networks can support relational reasoning and combinatorial generalization, laying the foundation for more sophisticated, interpretable, and flexible patterns of reasoning. As a companion to this paper, we have released an open-source software library for building graph networks, with demonstrations of how to use them in practice.},
urldate = {2019-10-10},
journal = {arXiv:1806.01261 [cs, stat]},
author = {Battaglia, Peter W. and Hamrick, Jessica B. and Bapst, Victor and Sanchez-Gonzalez, Alvaro and Zambaldi, Vinicius and Malinowski, Mateusz and Tacchetti, Andrea and Raposo, David and Santoro, Adam and Faulkner, Ryan and Gulcehre, Caglar and Song, Francis and Ballard, Andrew and Gilmer, Justin and Dahl, George and Vaswani, Ashish and Allen, Kelsey and Nash, Charles and Langston, Victoria and Dyer, Chris and Heess, Nicolas and Wierstra, Daan and Kohli, Pushmeet and Botvinick, Matt and Vinyals, Oriol and Li, Yujia and Pascanu, Razvan},
month = jun,
year = {2018},
note = {arXiv: 1806.01261}
}
@article{brown_adversarial_2018,
title = {Adversarial {Patch}},
url = {http://arxiv.org/abs/1712.09665},
abstract = {We present a method to create universal, robust, targeted adversarial image patches in the real world. The patches are universal because they can be used to attack any scene, robust because they work under a wide variety of transformations, and targeted because they can cause a classifier to output any target class. These adversarial patches can be printed, added to any scene, photographed, and presented to image classifiers; even when the patches are small, they cause the classifiers to ignore the other items in the scene and report a chosen target class. To reproduce the results from the paper, our code is available at https://github.com/tensorflow/cleverhans/tree/master/examples/adversarial\_patch},
urldate = {2019-11-23},
journal = {arXiv:1712.09665 [cs]},
author = {Brown, Tom B. and Mané, Dandelion and Roy, Aurko and Abadi, Martín and Gilmer, Justin},
month = may,
year = {2018},
note = {ZSCC: NoCitationData[s0]
arXiv: 1712.09665},
keywords = {Adversarial attacks, Classical ML, Machine learning}
}
@article{eykholt_robust_2018,
title = {Robust {Physical}-{World} {Attacks} on {Deep} {Learning} {Models}},
url = {http://arxiv.org/abs/1707.08945},
abstract = {Recent studies show that the state-of-the-art deep neural networks (DNNs) are vulnerable to adversarial examples, resulting from small-magnitude perturbations added to the input. Given that that emerging physical systems are using DNNs in safety-critical situations, adversarial examples could mislead these systems and cause dangerous situations.Therefore, understanding adversarial examples in the physical world is an important step towards developing resilient learning algorithms. We propose a general attack algorithm,Robust Physical Perturbations (RP2), to generate robust visual adversarial perturbations under different physical conditions. Using the real-world case of road sign classification, we show that adversarial examples generated using RP2 achieve high targeted misclassification rates against standard-architecture road sign classifiers in the physical world under various environmental conditions, including viewpoints. Due to the current lack of a standardized testing method, we propose a two-stage evaluation methodology for robust physical adversarial examples consisting of lab and field tests. Using this methodology, we evaluate the efficacy of physical adversarial manipulations on real objects. Witha perturbation in the form of only black and white stickers,we attack a real stop sign, causing targeted misclassification in 100\% of the images obtained in lab settings, and in 84.8\%of the captured video frames obtained on a moving vehicle(field test) for the target classifier.},
urldate = {2019-11-23},
journal = {arXiv:1707.08945 [cs]},
author = {Eykholt, Kevin and Evtimov, Ivan and Fernandes, Earlence and Li, Bo and Rahmati, Amir and Xiao, Chaowei and Prakash, Atul and Kohno, Tadayoshi and Song, Dawn},
month = apr,
year = {2018},
note = {ZSCC: 0000005
arXiv: 1707.08945},
keywords = {Adversarial attacks, Classical ML, Machine learning}
}
@article{baydin_automatic_2018,
title = {Automatic differentiation in machine learning: a survey},
shorttitle = {Automatic differentiation in machine learning},
url = {http://arxiv.org/abs/1502.05767},
abstract = {Derivatives, mostly in the form of gradients and Hessians, are ubiquitous in machine learning. Automatic differentiation (AD), also called algorithmic differentiation or simply "autodiff", is a family of techniques similar to but more general than backpropagation for efficiently and accurately evaluating derivatives of numeric functions expressed as computer programs. AD is a small but established field with applications in areas including computational fluid dynamics, atmospheric sciences, and engineering design optimization. Until very recently, the fields of machine learning and AD have largely been unaware of each other and, in some cases, have independently discovered each other's results. Despite its relevance, general-purpose AD has been missing from the machine learning toolbox, a situation slowly changing with its ongoing adoption under the names "dynamic computational graphs" and "differentiable programming". We survey the intersection of AD and machine learning, cover applications where AD has direct relevance, and address the main implementation techniques. By precisely defining the main differentiation techniques and their interrelationships, we aim to bring clarity to the usage of the terms "autodiff", "automatic differentiation", and "symbolic differentiation" as these are encountered more and more in machine learning settings.},
urldate = {2019-11-22},
journal = {arXiv:1502.05767 [cs, stat]},
author = {Baydin, Atilim Gunes and Pearlmutter, Barak A. and Radul, Alexey Andreyevich and Siskind, Jeffrey Mark},
month = feb,
year = {2018},
note = {ZSCC: 0000318
arXiv: 1502.05767},
keywords = {Automatic differentiation, Classical ML, Differentiation, Machine learning}
}
@article{zhang_understanding_2017,
title = {Understanding deep learning requires rethinking generalization},
url = {http://arxiv.org/abs/1611.03530},
abstract = {Despite their massive size, successful deep artificial neural networks can exhibit a remarkably small difference between training and test performance. Conventional wisdom attributes small generalization error either to properties of the model family, or to the regularization techniques used during training. Through extensive systematic experiments, we show how these traditional approaches fail to explain why large neural networks generalize well in practice. Specifically, our experiments establish that state-of-the-art convolutional networks for image classification trained with stochastic gradient methods easily fit a random labeling of the training data. This phenomenon is qualitatively unaffected by explicit regularization, and occurs even if we replace the true images by completely unstructured random noise. We corroborate these experimental findings with a theoretical construction showing that simple depth two neural networks already have perfect finite sample expressivity as soon as the number of parameters exceeds the number of data points as it usually does in practice. We interpret our experimental findings by comparison with traditional models.},
urldate = {2019-11-22},
journal = {arXiv:1611.03530 [cs]},
author = {Zhang, Chiyuan and Bengio, Samy and Hardt, Moritz and Recht, Benjamin and Vinyals, Oriol},
month = feb,
year = {2017},
note = {ZSCC: NoCitationData[s0]
arXiv: 1611.03530},
keywords = {Classical ML, Machine learning}
}
@article{kurakin_adversarial_2017,
title = {Adversarial examples in the physical world},
url = {http://arxiv.org/abs/1607.02533},
abstract = {Most existing machine learning classifiers are highly vulnerable to adversarial examples. An adversarial example is a sample of input data which has been modified very slightly in a way that is intended to cause a machine learning classifier to misclassify it. In many cases, these modifications can be so subtle that a human observer does not even notice the modification at all, yet the classifier still makes a mistake. Adversarial examples pose security concerns because they could be used to perform an attack on machine learning systems, even if the adversary has no access to the underlying model. Up to now, all previous work have assumed a threat model in which the adversary can feed data directly into the machine learning classifier. This is not always the case for systems operating in the physical world, for example those which are using signals from cameras and other sensors as an input. This paper shows that even in such physical world scenarios, machine learning systems are vulnerable to adversarial examples. We demonstrate this by feeding adversarial images obtained from cell-phone camera to an ImageNet Inception classifier and measuring the classification accuracy of the system. We find that a large fraction of adversarial examples are classified incorrectly even when perceived through the camera.},
urldate = {2019-11-23},
journal = {arXiv:1607.02533 [cs, stat]},
author = {Kurakin, Alexey and Goodfellow, Ian and Bengio, Samy},
month = feb,
year = {2017},
note = {ZSCC: NoCitationData[s0]
arXiv: 1607.02533},
keywords = {Adversarial attacks, Classical ML, Machine learning}
}
@article{olah_attention_2016,
title = {Attention and {Augmented} {Recurrent} {Neural} {Networks}},
volume = {1},
issn = {2476-0757},
url = {http://distill.pub/2016/augmented-rnns},
doi = {10/gf33sg},
abstract = {A visual overview of neural attention, and the powerful extensions of neural networks being built on top of it.},
language = {en},
number = {9},
urldate = {2019-11-22},
journal = {Distill},
author = {Olah, Chris and Carter, Shan},
month = sep,
year = {2016},
note = {ZSCC: 0000042},
keywords = {Classical ML, Machine learning},
pages = {e1}
}
@article{goodfellow_explaining_2015,
title = {Explaining and {Harnessing} {Adversarial} {Examples}},
url = {http://arxiv.org/abs/1412.6572},
abstract = {Several machine learning models, including neural networks, consistently misclassify adversarial examples---inputs formed by applying small but intentionally worst-case perturbations to examples from the dataset, such that the perturbed input results in the model outputting an incorrect answer with high confidence. Early attempts at explaining this phenomenon focused on nonlinearity and overfitting. We argue instead that the primary cause of neural networks' vulnerability to adversarial perturbation is their linear nature. This explanation is supported by new quantitative results while giving the first explanation of the most intriguing fact about them: their generalization across architectures and training sets. Moreover, this view yields a simple and fast method of generating adversarial examples. Using this approach to provide examples for adversarial training, we reduce the test set error of a maxout network on the MNIST dataset.},
urldate = {2019-11-23},
journal = {arXiv:1412.6572 [cs, stat]},
author = {Goodfellow, Ian J. and Shlens, Jonathon and Szegedy, Christian},
month = mar,
year = {2015},
note = {ZSCC: 0000015
arXiv: 1412.6572},
keywords = {Adversarial attacks, Classical ML, Machine learning}
}
@article{paul_why_2015,
title = {Why does {Deep} {Learning} work? - {A} perspective from {Group} {Theory}},
shorttitle = {Why does {Deep} {Learning} work?},
url = {http://arxiv.org/abs/1412.6621},
abstract = {Why does Deep Learning work? What representations does it capture? How do higher-order representations emerge? We study these questions from the perspective of group theory, thereby opening a new approach towards a theory of Deep learning. One factor behind the recent resurgence of the subject is a key algorithmic step called pre-training: first search for a good generative model for the input samples, and repeat the process one layer at a time. We show deeper implications of this simple principle, by establishing a connection with the interplay of orbits and stabilizers of group actions. Although the neural networks themselves may not form groups, we show the existence of \{{\textbackslash}em shadow\} groups whose elements serve as close approximations. Over the shadow groups, the pre-training step, originally introduced as a mechanism to better initialize a network, becomes equivalent to a search for features with minimal orbits. Intuitively, these features are in a way the \{{\textbackslash}em simplest\}. Which explains why a deep learning network learns simple features first. Next, we show how the same principle, when repeated in the deeper layers, can capture higher order representations, and why representation complexity increases as the layers get deeper.},
urldate = {2019-11-22},
journal = {arXiv:1412.6621 [cs, stat]},
author = {Paul, Arnab and Venkatasubramanian, Suresh},
month = feb,
year = {2015},
note = {ZSCC: NoCitationData[s0]
arXiv: 1412.6621},
keywords = {Classical ML, Machine learning}
}
@article{ghahramani_probabilistic_2015,
title = {Probabilistic machine learning and artificial intelligence},
volume = {521},
issn = {0028-0836, 1476-4687},
url = {http://www.nature.com/articles/nature14541},
doi = {10/gdxwhq},
language = {en},
number = {7553},
urldate = {2019-11-28},
journal = {Nature},
author = {Ghahramani, Zoubin},
month = may,
year = {2015},
note = {ZSCC: 0000611},
keywords = {Bayesian inference, Classical ML, Machine learning, Probabilistic programming},
pages = {452--459}
}
@article{graves_neural_2014,
title = {Neural {Turing} {Machines}},
url = {http://arxiv.org/abs/1410.5401},
abstract = {We extend the capabilities of neural networks by coupling them to external memory resources, which they can interact with by attentional processes. The combined system is analogous to a Turing Machine or Von Neumann architecture but is differentiable end-to-end, allowing it to be efficiently trained with gradient descent. Preliminary results demonstrate that Neural Turing Machines can infer simple algorithms such as copying, sorting, and associative recall from input and output examples.},
urldate = {2019-11-21},
journal = {arXiv:1410.5401 [cs]},
author = {Graves, Alex and Wayne, Greg and Danihelka, Ivo},
month = dec,
year = {2014},
note = {ZSCC: 0001222
arXiv: 1410.5401},
keywords = {Abstract machines, Classical ML, Machine learning}
}
@article{goodfellow_generative_2014,
title = {Generative {Adversarial} {Networks}},
url = {http://arxiv.org/abs/1406.2661},
abstract = {We propose a new framework for estimating generative models via an adversarial process, in which we simultaneously train two models: a generative model G that captures the data distribution, and a discriminative model D that estimates the probability that a sample came from the training data rather than G. The training procedure for G is to maximize the probability of D making a mistake. This framework corresponds to a minimax two-player game. In the space of arbitrary functions G and D, a unique solution exists, with G recovering the training data distribution and D equal to 1/2 everywhere. In the case where G and D are defined by multilayer perceptrons, the entire system can be trained with backpropagation. There is no need for any Markov chains or unrolled approximate inference networks during either training or generation of samples. Experiments demonstrate the potential of the framework through qualitative and quantitative evaluation of the generated samples.},
urldate = {2019-11-28},
journal = {arXiv:1406.2661 [cs, stat]},
author = {Goodfellow, Ian J. and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua},
month = jun,
year = {2014},
note = {ZSCC: 0000010
arXiv: 1406.2661},
keywords = {Adversarial attacks, Classical ML, Implementation, Machine learning}
}
@incollection{poggio_tomaso_2013,
title = {Tomaso {A}. {Poggio} autobiography},
url = {http://poggio-lab.mit.edu/sites/default/files/cv/tomasopoggio.pdf},
author = {Poggio, Tomaso},
year = {2013},
note = {ZSCC: NoCitationData[s0]},
keywords = {Classical ML, Compendium, Machine learning},
pages = {54}
}
@inproceedings{fages_machine_2006,
address = {Berlin, Heidelberg},
series = {Lecture {Notes} in {Computer} {Science}},
title = {Machine {Learning} {Biochemical} {Networks} from {Temporal} {Logic} {Properties}},
isbn = {978-3-540-46236-1},
doi = {10/dd8},
abstract = {One central issue in systems biology is the definition of formal languages for describing complex biochemical systems and their behavior at different levels. The biochemical abstract machine BIOCHAM is based on two formal languages, one rule-based language used for modeling biochemical networks, at three abstraction levels corresponding to three semantics: boolean, concentration and population; and one temporal logic language used for formalizing the biological properties of the system. In this paper, we show how the temporal logic language can be turned into a specification language. We describe two algorithms for inferring reaction rules and kinetic parameter values from a temporal specification formalizing the biological data. Then, with an example of the cell cycle control, we illustrate how these machine learning techniques may be useful to the modeler.},
language = {en},
booktitle = {Transactions on {Computational} {Systems} {Biology} {VI}},
publisher = {Springer},
author = {Fages, François and Calzone, Laurence and Chabrier-Rivier, Nathalie and Soliman, Sylvain},
editor = {Priami, Corrado and Plotkin, Gordon},
year = {2006},
note = {ZSCC: NoCitationData[s0]},
keywords = {Abstract machines, Biology, Classical ML, Machine learning, Symbolic logic, Systems biology},
pages = {68--94}
}
@incollection{wermuth_graphical_2001,
address = {Oxford},
title = {Graphical {Models}: {Overview}},
isbn = {978-0-08-043076-8},
shorttitle = {Graphical {Models}},
url = {http://www.sciencedirect.com/science/article/pii/B008043076700440X},
abstract = {Graphical Markov models provide a method of representing possibly complicated multivariate dependencies in such a way that the general qualitative features can be understood, that statistical independencies are highlighted, and that some properties can be derived directly. Variables are represented by the nodes of a graph. Pairs of nodes may be joined by an edge. Edges are directed if one variable is a response to the other variable considered as explanatory, but are undirected if the variables are on an equal footing. Absence of an edge typically implies statistical independence, conditional, or marginal depending on the kind of graph. The need for a number of types of graph arises because it is helpful to represent a number of different kinds of dependence structures. Of special importance are chain graphs in which variables are arranged in a sequence or chain of blocks, the variables in any one block being on an equal footing, some being possibly joint responses to variables in the past and some being jointly explanatory to variables in the future of the block considered. Some main properties of such systems are outlined, and recent research results are sketched. Suggestions for further reading are given. As an illustrative example, some analysis of data on the treatment of chronic pain is presented.},
language = {en},
urldate = {2019-11-22},
booktitle = {International {Encyclopedia} of the {Social} \& {Behavioral} {Sciences}},
publisher = {Pergamon},
author = {Wermuth, N. and Cox, D. R.},
editor = {Smelser, Neil J. and Baltes, Paul B.},
month = jan,
year = {2001},
doi = {10.1016/B0-08-043076-7/00440-X},
note = {ZSCC: NoCitationData[s0] },
keywords = {Bayesianism, Classical ML, Machine learning},
pages = {6379--6386}
}
@article{heckerman_tutorial_1995,
title = {A {Tutorial} on {Learning} {With} {Bayesian} {Networks}},
url = {https://www.microsoft.com/en-us/research/publication/a-tutorial-on-learning-with-bayesian-networks/},
abstract = {A Bayesian network is a graphical model that encodes probabilistic relationships among variables of interest. When used in conjunction with statistical techniques, the graphical model has several advantages for data analysis. One, because the model encodes dependencies among all variables, it readily handles situations where some data entries are missing. Two, a Bayesian network can …},
language = {en-US},
urldate = {2019-11-22},
author = {Heckerman, David},
month = mar,
year = {1995},
note = {ZSCC: 0000058},
keywords = {Bayesianism, Classical ML, Machine learning}
}
@article{siegelmann_computational_1995,
title = {On the {Computational} {Power} of {Neural} {Nets}},
volume = {50},
issn = {0022-0000},
url = {http://www.sciencedirect.com/science/article/pii/S0022000085710136},
doi = {10/dvwtc3},
abstract = {This paper deals with finite size networks which consist of interconnections of synchronously evolving processors. Each processor updates its state by applying a "sigmoidal" function to a linear combination of the previous states of all units. We prove that one may simulate all Turing machines by such nets. In particular, one can simulate any multi-stack Turing machine in real time, and there is a net made up of 886 processors which computes a universal partial-recursive function. Products (high order nets) are not required, contrary to what had been stated in the literature. Non-deterministic Turing machines can be simulated by non-deterministic rational nets, also in real time. The simulation result has many consequences regarding the decidability, or more generally the complexity, of questions about recursive nets.},
language = {en},
number = {1},
urldate = {2019-11-28},
journal = {Journal of Computer and System Sciences},
author = {Siegelmann, H. T. and Sontag, E. D.},
month = feb,
year = {1995},
note = {ZSCC: 0000002},
keywords = {Classical ML, Machine learning},
pages = {132--150}
}
@misc{murfet_mathematics_nodate,
title = {Mathematics of {AlphaGo}},
author = {Murfet, Daniel},
note = {ZSCC: NoCitationData[s0]},
keywords = {Classical ML, Machine learning}
}
@misc{murfet_algebra_nodate,
title = {Algebra and {Artiﬁcial} {Intelligence}},
language = {en},
author = {Murfet, Daniel},
note = {ZSCC: NoCitationData[s0]},
keywords = {Algebra, Classical ML, Machine learning, Sketchy}
}