339 lines
37 KiB
Plaintext
339 lines
37 KiB
Plaintext
[38;5;12m [39m[38;2;255;187;0m[1m[4mAwesome Deep Reinforcement Learning[0m
|
||
|
||
[38;5;11m[1m▐[0m[38;5;12m [39m[38;5;14m[1mMar 1 2024 update: HILP added[0m
|
||
[38;5;11m[1m▐[0m[38;5;12m [39m
|
||
[38;5;11m[1m▐[0m[38;5;12m [39m[38;5;14m[1mJuly 2022 update: EDDICT added[0m
|
||
[38;5;11m[1m▐[0m[38;5;12m [39m
|
||
[38;5;11m[1m▐[0m[38;5;12m [39m[38;5;14m[1mMar 2022 update: a few papers released in early 2022[0m
|
||
[38;5;11m[1m▐[0m[38;5;12m [39m
|
||
[38;5;11m[1m▐[0m[38;5;12m [39m[38;5;14m[1mDec 2021 update: Unsupervised RL[0m
|
||
|
||
[38;2;255;187;0m[4mIntroduction to awesome drl[0m
|
||
[38;5;12mReinforcement learning is the fundamental framework for building AGI. Therefore we share important contributions within this awesome drl project. [39m
|
||
|
||
[38;2;255;187;0m[4mLandscape of Deep RL[0m
|
||
|
||
[38;5;12m![39m[38;5;14m[1mupdated Landscape of [0m[38;5;12mDRL[39m[38;5;12m (images/awesome-drl.png)[39m
|
||
|
||
[38;2;255;187;0m[4mContent[0m
|
||
[38;5;12m- [39m[38;5;14m[1mAwesome Deep Reinforcement Learning[0m[38;5;12m (#awesome-deep-reinforcement-learning)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mIntroduction to awesome drl[0m[38;5;12m (#introduction-to-awesome-drl)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mLandscape of Deep RL[0m[38;5;12m (#landscape-of-deep-rl)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mContent[0m[38;5;12m (#content)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mGeneral guidances[0m[38;5;12m (#general-guidances)[39m
|
||
[38;5;12m - [39m[38;5;14m[1m2022[0m[38;5;12m (#2022)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mFoundations and theory[0m[38;5;12m (#foundations-and-theory)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mGeneral benchmark frameworks[0m[38;5;12m (#general-benchmark-frameworks)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mUnsupervised[0m[38;5;12m (#unsupervised)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mOffline[0m[38;5;12m (#offline)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mValue based[0m[38;5;12m (#value-based)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mPolicy gradient[0m[38;5;12m (#policy-gradient)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mExplorations[0m[38;5;12m (#explorations)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mActor-Critic[0m[38;5;12m (#actor-critic)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mModel-based[0m[38;5;12m (#model-based)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mModel-free + Model-based[0m[38;5;12m (#model-free--model-based)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mHierarchical[0m[38;5;12m (#hierarchical)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mOption[0m[38;5;12m (#option)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mConnection with other methods[0m[38;5;12m (#connection-with-other-methods)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mConnecting value and policy methods[0m[38;5;12m (#connecting-value-and-policy-methods)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mReward design[0m[38;5;12m (#reward-design)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mUnifying[0m[38;5;12m (#unifying)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mFaster DRL[0m[38;5;12m (#faster-drl)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mMulti-agent[0m[38;5;12m (#multi-agent)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mNew design[0m[38;5;12m (#new-design)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mMultitask[0m[38;5;12m (#multitask)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mObservational Learning[0m[38;5;12m (#observational-learning)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mMeta Learning[0m[38;5;12m (#meta-learning)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mDistributional[0m[38;5;12m (#distributional)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mPlanning[0m[38;5;12m (#planning)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mSafety[0m[38;5;12m (#safety)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mInverse RL[0m[38;5;12m (#inverse-rl)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mNo reward RL[0m[38;5;12m (#no-reward-rl)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mTime[0m[38;5;12m (#time)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mAdversarial learning[0m[38;5;12m (#adversarial-learning)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mUse Natural Language[0m[38;5;12m (#use-natural-language)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mGenerative and contrastive representation learning[0m[38;5;12m (#generative-and-contrastive-representation-learning)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mBelief[0m[38;5;12m (#belief)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mPAC[0m[38;5;12m (#pac)[39m
|
||
[38;5;12m - [39m[38;5;14m[1mApplications[0m[38;5;12m (#applications)[39m
|
||
|
||
[38;5;12mIllustrations:[39m
|
||
|
||
[38;5;12m![39m[38;5;12m (images/ACER.png)[39m
|
||
|
||
[38;5;14m[1mRecommendations and suggestions are welcome[0m[38;5;12m. [39m
|
||
[38;2;255;187;0m[4mGeneral guidances[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAwesome Offline RL[0m[38;5;12m (https://github.com/hanjuku-kaso/awesome-offline-rl)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mReinforcement Learning Today[0m[38;5;12m (http://reinforcementlearning.today/)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMultiagent Reinforcement Learning by Marc Lanctot RLSS @ Lille[0m[38;5;12m (http://mlanctot.info/files/papers/Lanctot_MARL_RLSS2019_Lille.pdf) 11 July 2019[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRLDM 2019 Notes by David Abel[0m[38;5;12m (https://david-abel.github.io/notes/rldm_2019.pdf) 11 July 2019[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mA Survey of Reinforcement Learning Informed by Natural Language[0m[38;5;12m (RLNL.md) 10 Jun 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1906.03926.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mChallenges of Real-World Reinforcement Learning[0m[38;5;12m (ChallengesRealWorldRL.md) 29 Apr 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1904.12901.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRay Interference: a Source of Plateaus in Deep Reinforcement Learning[0m[38;5;12m (RayInterference.md) 25 Apr 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1904.11455.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPrinciples of Deep RL by David Silver[0m[38;5;12m (p10.md)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mUniversity AI's General introduction to deep rl (in Chinese)[0m[38;5;12m (https://www.jianshu.com/p/dfd987aa765a)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mOpenAI's spinningup[0m[38;5;12m (https://spinningup.openai.com/en/latest/)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mThe Promise of Hierarchical Reinforcement Learning[0m[38;5;12m (https://thegradient.pub/the-promise-of-hierarchical-reinforcement-learning/) 9 Mar 2019[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDeep Reinforcement Learning that Matters[0m[38;5;12m (reproducing.md) 30 Jan 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1709.06560.pdf)[39m
|
||
|
||
[38;2;255;187;0m[4m2024[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFoundation Policies with Hilbert Representations[0m[38;5;12m (HILP.md) [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/abs/2402.15567) [39m[38;5;14m[1mrepo[0m[38;5;12m (https://github.com/seohongpark/HILP) 23 Feb 2024[39m
|
||
|
||
[38;2;255;187;0m[4m2022[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;12mReinforcement Learning with Action-Free Pre-Training from Videos [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/abs/2203.13880) [39m[38;5;14m[1mrepo[0m[38;5;12m (https://github.com/younggyoseo/apv)[39m
|
||
|
||
[38;2;255;187;0m[4mGeneralist policies[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFoundation Policies with Hilbert Representations[0m[38;5;12m (HILP.md) [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/abs/2402.15567) [39m[38;5;14m[1mrepo[0m[38;5;12m (https://github.com/seohongpark/HILP) 23 Feb 2024[39m
|
||
|
||
[38;2;255;187;0m[4mFoundations and theory[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGeneral non-linear Bellman equations[0m[38;5;12m (GNLBE.md) 9 July 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1907.07331.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMonte Carlo Gradient Estimation in Machine Learning[0m[38;5;12m (MCGE.md) 25 Jun 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1906.10652.pdf)[39m
|
||
|
||
[38;2;255;187;0m[4mGeneral benchmark frameworks[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBrax[0m[38;5;12m (https://github.com/google/brax/) [39m
|
||
|
||
[38;5;12m![39m[38;5;12m (https://github.com/google/brax/raw/main/docs/img/fetch.gif)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAndroid-Env[0m[38;5;12m (https://github.com/deepmind/android_env) [39m
|
||
[38;5;12m [39m[38;5;12m [39m[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;12m![39m[38;5;12m (https://github.com/deepmind/android_env/raw/main/docs/images/device_control.gif)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMuJoCo[0m[38;5;12m (http://mujoco.org/) | [39m[38;5;14m[1mMuJoCo Chinese version[0m[38;5;12m (https://github.com/tigerneil/mujoco-zh)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mUnsupervised RL Benchmark[0m[38;5;12m (https://github.com/rll-research/url_benchmark)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDataset for Offline RL[0m[38;5;12m (https://github.com/rail-berkeley/d4rl)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSpriteworld: a flexible, configurable python-based reinforcement learning environment[0m[38;5;12m (https://github.com/deepmind/spriteworld)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mChainerrl Visualizer[0m[38;5;12m (https://github.com/chainer/chainerrl-visualizer)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBehaviour Suite for Reinforcement Learning[0m[38;5;12m (BSRL.md) 13 Aug 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1908.03568.pdf) | [39m[38;5;14m[1mcode[0m[38;5;12m (https://github.com/deepmind/bsuite)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mQuantifying Generalization in Reinforcement Learning[0m[38;5;12m (Coinrun.md) 20 Dec 2018 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1812.02341.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mS-RL Toolbox: Environments, Datasets and Evaluation Metrics for State Representation Learning[0m[38;5;12m (SRL.md) 25 Sept 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mdopamine[0m[38;5;12m (https://github.com/google/dopamine)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mStarCraft II[0m[38;5;12m (https://github.com/deepmind/pysc2)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mtfrl[0m[38;5;12m (https://github.com/deepmind/trfl)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mchainerrl[0m[38;5;12m (https://github.com/chainer/chainerrl)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPARL[0m[38;5;12m (https://github.com/PaddlePaddle/PARL) [39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDI-engine: a generalized decision intelligence engine. It supports various Deep RL algorithms[0m[38;5;12m (https://github.com/opendilab/DI-engine)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPPO x Family: Course in Chinese for Deep RL[0m[38;5;12m (https://github.com/opendilab/PPOxFamily)[39m
|
||
|
||
[38;2;255;187;0m[4mUnsupervised[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mURLB: Unsupervised Reinforcement Learning Benchmark[0m[38;5;12m (https://arxiv.org/abs/2110.15191) 28 Oct 2021[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAPS: Active Pretraining with Successor Feature[0m[38;5;12m (https://arxiv.org/abs/2108.13956) 31 Aug 2021[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBehavior From the Void: Unsupervised Active Pre-Training[0m[38;5;12m (https://arxiv.org/abs/2103.04551) 8 Mar 2021[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mReinforcement Learning with Prototypical Representations[0m[38;5;12m (https://arxiv.org/abs/2102.11271) 22 Feb 2021[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mEfficient Exploration via State Marginal Matching[0m[38;5;12m (https://arxiv.org/abs/1906.05274) 12 Jun 2019[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSelf-Supervised Exploration via Disagreement[0m[38;5;12m (https://arxiv.org/abs/1906.04161) 10 Jun 2019[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mExploration by Random Network Distillation[0m[38;5;12m (https://arxiv.org/abs/1810.12894) 30 Oct 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDiversity is All You Need: Learning Skills without a Reward Function[0m[38;5;12m (https://arxiv.org/abs/1802.06070) 16 Feb 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCuriosity-driven Exploration by Self-supervised Prediction[0m[38;5;12m (https://arxiv.org/pdf/1705.05363) 15 May 2017 [39m
|
||
|
||
[38;2;255;187;0m[4mOffline[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPerSim: Data-efficient Offline Reinforcement Learning with Heterogeneous Agents via Personalized Simulators[0m[38;5;12m (https://arxiv.org/abs/2102.06961) 10 Nov 2021[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mA General Offline Reinforcement Learning Framework for Interactive Recommendation[0m[38;5;12m () AAAI 2021[39m
|
||
|
||
|
||
[38;2;255;187;0m[4mValue based[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mHarnessing Structures for Value-Based Planning and Reinforcement Learning[0m[38;5;12m (SVRL.md) 5 Feb 2020 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/abs/1909.12255) | [39m[38;5;14m[1mcode[0m[38;5;12m (https://github.com/YyzHarry/SV-RL)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRecurrent Value Functions[0m[38;5;12m (RVF.md) 23 May 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1905.09562.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mStochastic Lipschitz Q-Learning[0m[38;5;12m (LipschitzQ.md) 24 Apr 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1904.10653.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTreeQN and ATreeC: Differentiable Tree-Structured Models for Deep Reinforcement Learning[0m[38;5;12m (https://arxiv.org/pdf/1710.11417) 8 Mar 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDISTRIBUTED PRIORITIZED EXPERIENCE REPLAY[0m[38;5;12m (https://arxiv.org/pdf/1803.00933.pdf) 2 Mar 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRainbow: Combining Improvements in Deep Reinforcement Learning[0m[38;5;12m (Rainbow.md) 6 Oct 2017[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLearning from Demonstrations for Real World Reinforcement Learning[0m[38;5;12m (DQfD.md) 12 Apr 2017[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDueling Network Architecture[0m[38;5;12m (Dueling.md)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDouble DQN[0m[38;5;12m (DDQN.md)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPrioritized Experience[0m[38;5;12m (PER.md)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDeep Q-Networks[0m[38;5;12m (DQN.md)[39m
|
||
|
||
[38;2;255;187;0m[4mPolicy gradient[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPhasic Policy Gradient[0m[38;5;12m (PPG.md) 9 Sep 2020 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/2009.04416.pdf) [39m[38;5;14m[1mcode[0m[38;5;12m (https://github.com/openai/phasic-policy-gradient)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAn operator view of policy gradient methods[0m[38;5;12m (OVPG.md) 22 Jun 2020 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/2006.11266.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDirect Policy Gradients: Direct Optimization of Policies in Discrete Action Spaces[0m[38;5;12m (DirPG.md) 14 Jun 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1906.06062.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPolicy Gradient Search: Online Planning and Expert Iteration without Search Trees[0m[38;5;12m (PGS.md) 7 Apr 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1904.03646.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSUPERVISED POLICY UPDATE FOR DEEP REINFORCEMENT LEARNING[0m[38;5;12m (SPU.md) 24 Dec 2018 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1805.11706v4.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPPO-CMA: Proximal Policy Optimization with Covariance Matrix Adaptation[0m[38;5;12m (PPO-CMA.md) 5 Oct 2018 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1810.02541v6.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mClipped Action Policy Gradient[0m[38;5;12m (CAPG.md) 22 June 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mExpected Policy Gradients for Reinforcement Learning[0m[38;5;12m (EPG.md) 10 Jan 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mProximal Policy Optimization Algorithms[0m[38;5;12m (PPO.md) 20 July 2017[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mEmergence of Locomotion Behaviours in Rich Environments[0m[38;5;12m (DPPO.md) 7 July 2017[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mInterpolated Policy Gradient: Merging On-Policy and Off-Policy Gradient Estimation for Deep Reinforcement Learning[0m[38;5;12m (IPG.md) 1 Jun 2017[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mEquivalence Between Policy Gradients and Soft Q-Learning[0m[38;5;12m (PGSQL.md)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTrust Region Policy Optimization[0m[38;5;12m (TRPO.md)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mReinforcement Learning with Deep Energy-Based Policies[0m[38;5;12m (DEBP.md)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mQ-PROP: SAMPLE-EFFICIENT POLICY GRADIENT WITH AN OFF-POLICY CRITIC[0m[38;5;12m (QPROP.md)[39m
|
||
|
||
[38;2;255;187;0m[4mExplorations[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mEntropic Desired Dynamics for Intrinsic Control[0m[38;5;12m (EDDICT.md) 2021 [39m[38;5;14m[1mopenreview[0m[38;5;12m (https://openreview.net/pdf?id=lBSSxTgXmiK)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSelf-Supervised Exploration via Disagreement[0m[38;5;12m (Disagreement.md) 10 Jun 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1906.04161.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mApproximate Exploration through State Abstraction[0m[38;5;12m (MBIE-EB.md) 24 Jan 2019[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mThe Uncertainty Bellman Equation and Exploration[0m[38;5;12m (UBE.md) 15 Sep 2017[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNoisy Networks for Exploration[0m[38;5;12m (NoisyNet.md) 30 Jun 2017 [39m[38;5;14m[1mimplementation[0m[38;5;12m (https://github.com/Kaixhin/NoisyNet-A3C)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCount-Based Exploration in Feature Space for Reinforcement Learning[0m[38;5;12m (PhiEB.md) 25 Jun 2017[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCount-Based Exploration with Neural Density Models[0m[38;5;12m (NDM.md) 14 Jun 2017[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mUCB and InfoGain Exploration via Q-Ensembles[0m[38;5;12m (QEnsemble.md) 11 Jun 2017[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMinimax Regret Bounds for Reinforcement Learning[0m[38;5;12m (MMRB.md) 16 Mar 2017[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mIncentivizing Exploration In Reinforcement Learning With Deep Predictive Models[0m[38;5;12m (incentivizing.md)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mEX2: Exploration with Exemplar Models for Deep Reinforcement Learning[0m[38;5;12m (EX2.md)[39m
|
||
|
||
[38;2;255;187;0m[4mActor-Critic[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGeneralized Off-Policy Actor-Critic[0m[38;5;12m (Geoff-PAC.md) 27 Mar 2019[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSoft Actor-Critic Algorithms and Applications[0m[38;5;12m (https://arxiv.org/pdf/1812.05905.pdf) 29 Jan 2019[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mThe Reactor: A Sample-Efficient Actor-Critic Architecture[0m[38;5;12m (REACTOR.md) 15 Apr 2017[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSAMPLE EFFICIENT ACTOR-CRITIC WITH EXPERIENCE REPLAY[0m[38;5;12m (ACER.md)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mREINFORCEMENT LEARNING WITH UNSUPERVISED AUXILIARY TASKS[0m[38;5;12m (UNREAL.md)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mContinuous control with deep reinforcement learning[0m[38;5;12m (DDPG.md)[39m
|
||
|
||
[38;2;255;187;0m[4mModel-based[0m
|
||
[38;5;12m [39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSelf-Consistent Models and Values[0m[38;5;12m (sc.md) 25 Oct 2021 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/2110.12840.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWhen to use parametric models in reinforcement learning?[0m[38;5;12m (parametric.md) 12 Jun 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1906.05243.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mModel Based Reinforcement Learning for Atari[0m[38;5;12m (https://arxiv.org/pdf/1903.00374.pdf) 5 Mar 2019[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mModel-Based Stabilisation of Deep Reinforcement Learning[0m[38;5;12m (MBDQN.md) 6 Sep 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLearning model-based planning from scratch[0m[38;5;12m (IBP.md) 19 July 2017[39m
|
||
|
||
[38;2;255;187;0m[4mModel-free + Model-based[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mImagination-Augmented Agents for Deep Reinforcement Learning[0m[38;5;12m (I2As.md) 19 July 2017[39m
|
||
|
||
[38;2;255;187;0m[4mHierarchical[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mWHY DOES HIERARCHY (SOMETIMES) WORK SO WELL IN REINFORCEMENT LEARNING?[0m[38;5;12m (HIRO.md) 23 Sep 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1909.10618.pdf) [39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLanguage as an Abstraction for Hierarchical Deep Reinforcement Learning[0m[38;5;12m (HAL.md) 18 Jun 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1906.07343.pdf)[39m
|
||
|
||
[38;2;255;187;0m[4mOption[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mVariational Option Discovery Algorithms[0m[38;5;12m (VALOR.md) 26 July 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mA Laplacian Framework for Option Discovery in Reinforcement Learning[0m[38;5;12m (LFOD.md) 16 Jun 2017[39m
|
||
|
||
[38;2;255;187;0m[4mConnection with other methods[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRobust Imitation of Diverse Behaviors[0m[38;5;12m (GVG.md)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLearning human behaviors from motion capture by adversarial imitation[0m[38;5;12m (GAIL.md)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mConnecting Generative Adversarial Networks and Actor-Critic Methods[0m[38;5;12m (GANAC.md)[39m
|
||
|
||
[38;2;255;187;0m[4mConnecting value and policy methods[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBridging the Gap Between Value and Policy Based Reinforcement Learning[0m[38;5;12m (PCL.md)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mPolicy gradient and Q-learning[0m[38;5;12m (PGQ.md)[39m
|
||
|
||
[38;2;255;187;0m[4mReward design[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mEnd-to-End Robotic Reinforcement Learning without Reward Engineering[0m[38;5;12m (VICE.md) 16 Apr 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1904.07854.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mReinforcement Learning with Corrupted Reward Channel[0m[38;5;12m (RLCRC.md) 23 May 2017[39m
|
||
|
||
[38;2;255;187;0m[4mUnifying[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMulti-step Reinforcement Learning: A Unifying Algorithm[0m[38;5;12m (MSRL.md)[39m
|
||
|
||
[38;2;255;187;0m[4mFaster DRL[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNeural Episodic Control[0m[38;5;12m (NEC.md)[39m
|
||
|
||
[38;2;255;187;0m[4mMulti-agent[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNo Press Diplomacy: Modeling Multi-Agent Gameplay[0m[38;5;12m (Dip.md) 4 Sep 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1909.02128.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mOptions as responses: Grounding behavioural hierarchies in multi-agent RL[0m[38;5;12m (OPRE) 6 Jun 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1906.01470.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mEvolutionary Reinforcement Learning for Sample-Efficient Multiagent Coordination[0m[38;5;12m (MERL.md) 18 Jun 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1906.07315.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mA Regularized Opponent Model with Maximum Entropy Objective[0m[38;5;12m (ROMMEO.md) 17 May 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1905.08087.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDeep Q-Learning for Nash Equilibria: Nash-DQN[0m[38;5;12m (NashDQN.md) 23 Apr 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1904.10554.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMalthusian Reinforcement Learning[0m[38;5;12m (MRL.md) 3 Mar 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1812.07019.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBayesian Action Decoder for Deep Multi-Agent Reinforcement Learning[0m[38;5;12m (bad.md) 4 Nov 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mINTRINSIC SOCIAL MOTIVATION VIA CAUSAL INFLUENCE IN MULTI-AGENT RL[0m[38;5;12m (ISMCI.md) 19 Oct 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mQMIX: Monotonic Value Function Factorisation for Deep Multi-Agent Reinforcement Learning[0m[38;5;12m (http://www.cs.ox.ac.uk/people/shimon.whiteson/pubs/rashidicml18.pdf) 30 Mar 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mModeling Others using Oneself in Multi-Agent Reinforcement Learning[0m[38;5;12m (SOM.md) 26 Feb 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mThe Mechanics of n-Player Differentiable Games[0m[38;5;12m (SGA.md) 15 Feb 2018 [39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mContinuous Adaptation via Meta-Learning in Nonstationary and Competitive Environments[0m[38;5;12m (RoboSumo.md) 10 Oct 2017[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLearning with Opponent-Learning Awareness[0m[38;5;12m (LOLA.md) 13 Sep 2017[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCounterfactual Multi-Agent Policy Gradients[0m[38;5;12m (COMA.md) [39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMulti-Agent Actor-Critic for Mixed Cooperative-Competitive Environments[0m[38;5;12m (MADDPG.md) 7 Jun 2017[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMultiagent Bidirectionally-Coordinated Nets for Learning to Play StarCraft Combat Games[0m[38;5;12m (BiCNet.md) 29 Mar 2017[39m
|
||
|
||
[38;2;255;187;0m[4mNew design[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mIMPALA: Scalable Distributed Deep-RL with Importance Weighted Actor-Learner Architectures[0m[38;5;12m (https://arxiv.org/pdf/1802.01561.pdf) 9 Feb 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mReverse Curriculum Generation for Reinforcement Learning[0m[38;5;12m (RECUR.md)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTrial without Error: Towards Safe Reinforcement Learning via Human Intervention[0m[38;5;12m (HIRL.md)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLearning to Design Games: Strategic Environments in Deep Reinforcement Learning[0m[38;5;12m (DualMDP.md) 5 July 2017[39m
|
||
|
||
[38;2;255;187;0m[4mMultitask[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mKickstarting Deep Reinforcement Learning[0m[38;5;12m (https://arxiv.org/pdf/1803.03835.pdf) 10 Mar 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mZero-Shot Task Generalization with Multi-Task Deep Reinforcement Learning[0m[38;5;12m (ZSTG.md) 7 Nov 2017[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDistral: Robust Multitask Reinforcement Learning[0m[38;5;12m (Distral.md) 13 July 2017[39m
|
||
|
||
[38;2;255;187;0m[4mObservational Learning[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mObservational Learning by Reinforcement Learning[0m[38;5;12m (OLRL.md) 20 Jun 2017[39m
|
||
|
||
[38;2;255;187;0m[4mMeta Learning[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDiscovery of Useful Questions as Auxiliary Tasks[0m[38;5;12m (GVF.md) 10 Sep 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1909.04607.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMeta-learning of Sequential Strategies[0m[38;5;12m (MetaSS.md) 8 May 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1905.03030.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mEfficient Off-Policy Meta-Reinforcement Learning via Probabilistic Context Variables[0m[38;5;12m (PEARL.md) 19 Mar 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1903.08254.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSome Considerations on Learning to Explore via Meta-Reinforcement Learning[0m[38;5;12m (E2.md) 11 Jan 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1803.01118.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mMeta-Gradient Reinforcement Learning[0m[38;5;12m (MGRL.md) 24 May 2018 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1805.09801.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mProMP: Proximal Meta-Policy Search[0m[38;5;12m (ProMP.md) 16 Oct 2018 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1810.06784)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mUnsupervised Meta-Learning for Reinforcement Learning[0m[38;5;12m (UML.md) 12 Jun 2018[39m
|
||
|
||
[38;2;255;187;0m[4mDistributional[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mGAN Q-learning[0m[38;5;12m (GANQL.md) 20 July 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mImplicit Quantile Networks for Distributional Reinforcement Learning[0m[38;5;12m (IQN.md) 14 Jun 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mNonlinear Distributional Gradient Temporal-Difference Learning[0m[38;5;12m (GTD.md) 20 May 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDISTRIBUTED DISTRIBUTIONAL DETERMINISTIC POLICY GRADIENTS[0m[38;5;12m (D4PG.md) 23 Apr 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mAn Analysis of Categorical Distributional Reinforcement Learning[0m[38;5;12m (C51-analysis.md) 22 Feb 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDistributional Reinforcement Learning with Quantile Regression[0m[38;5;12m (QR-DQN.md) 27 Oct 2017[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mA Distributional Perspective on Reinforcement Learning[0m[38;5;12m (C51.md) 21 July 2017[39m
|
||
|
||
[38;2;255;187;0m[4mPlanning[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSearch on the Replay Buffer: Bridging Planning and Reinforcement Learning[0m[38;5;12m (SoRB.md) 12 June 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1906.05253.pdf)[39m
|
||
|
||
[38;2;255;187;0m[4mSafety[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mRobust Reinforcement Learning for Continuous Control with Model Misspecification[0m[38;5;12m (MPO.md) 18 Jun 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1906.07516.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mVerifiable Reinforcement Learning via Policy Extraction[0m[38;5;12m (Viper.md) 22 May 2018 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1805.08328.pdf)[39m
|
||
|
||
[38;2;255;187;0m[4mInverse RL[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mADDRESSING SAMPLE INEFFICIENCY AND REWARD BIAS IN INVERSE REINFORCEMENT LEARNING[0m[38;5;12m (OP-GAIL.md) 9 Sep 2018[39m
|
||
|
||
[38;2;255;187;0m[4mNo reward RL[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mFast Task Inference with Variational Intrinsic Successor Features[0m[38;5;12m (VISR.md) 2 Jun 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1906.05030.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mCuriosity-driven Exploration by Self-supervised Prediction[0m[38;5;12m (https://arxiv.org/pdf/1705.05363) 15 May 2017 [39m
|
||
|
||
[38;2;255;187;0m[4mTime[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mInterval timing in deep reinforcement learning agents[0m[38;5;12m (Intervaltime.md) 31 May 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1905.13469.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTime Limits in Reinforcement Learning[0m[38;5;12m (PEB.md)[39m
|
||
|
||
[38;2;255;187;0m[4mAdversarial learning[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mSample-efficient Adversarial Imitation Learning from Observation[0m[38;5;12m (LQR+GAIfO.md) 18 Jun 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1906.07374.pdf) [39m
|
||
|
||
[38;2;255;187;0m[4mUse Natural Language[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mUsing[0m[38;5;14m[1m [0m[38;5;14m[1mNatural[0m[38;5;14m[1m [0m[38;5;14m[1mLanguage[0m[38;5;14m[1m [0m[38;5;14m[1mfor[0m[38;5;14m[1m [0m[38;5;14m[1mReward[0m[38;5;14m[1m [0m[38;5;14m[1mShaping[0m[38;5;14m[1m [0m[38;5;14m[1min[0m[38;5;14m[1m [0m[38;5;14m[1mReinforcement[0m[38;5;14m[1m [0m[38;5;14m[1mLearning[0m[38;5;12m [39m[38;5;12m(LEARN.md)[39m[38;5;12m [39m[38;5;12m31[39m[38;5;12m [39m[38;5;12mMay[39m[38;5;12m [39m[38;5;12m2019[39m[38;5;12m [39m[38;5;14m[1marxiv[0m[38;5;12m [39m
|
||
[38;5;12m(https://www.cs.utexas.edu/~ai-lab/downloadPublication.php?filename=http://www.cs.utexas.edu/users/ml/papers/goyal.ijcai19.pdf&pubid=127757)[39m
|
||
|
||
[38;2;255;187;0m[4mGenerative and contrastive representation learning[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mUnsupervised State Representation Learning in Atari[0m[38;5;12m (ST-DIM.md) 19 Jun 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1906.08226.pdf)[39m
|
||
|
||
[38;2;255;187;0m[4mBelief[0m
|
||
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mShaping Belief States with Generative Environment Models for RL[0m[38;5;12m (GenerativeBelief.md) 24 Jun 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1906.09237v2.pdf)[39m
|
||
|
||
[38;2;255;187;0m[4mPAC[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mProvably Convergent Off-Policy Actor-Critic with Function Approximation[0m[38;5;12m (COF-PAC.md) 11 Nov 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1911.04384.pdf)[39m
|
||
|
||
|
||
[38;2;255;187;0m[4mApplications[0m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mBenchmarks for Deep Off-Policy Evaluation[0m[38;5;12m (bdope.md) 30 Mar 2021 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/2103.16596.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mLearning Reciprocity in Complex Sequential Social Dilemmas[0m[38;5;12m (Reciprocity.md) 19 Mar 2019 [39m[38;5;14m[1marxiv[0m[38;5;12m (https://arxiv.org/pdf/1903.08082.pdf)[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mDeepMimic: Example-Guided Deep Reinforcement Learning of Physics-Based Character Skills[0m[38;5;12m (dmimic.md) 9 Apr 2018[39m
|
||
[48;5;12m[38;5;11m⟡[49m[39m[38;5;12m [39m[38;5;14m[1mTUNING RECURRENT NEURAL NETWORKS WITH REINFORCEMENT LEARNING[0m[38;5;12m (RLTUNER.md)[39m
|