Acme : サンプル : クイックスタート (強化学習) (翻訳/解説)

翻訳 : (株)クラスキャットセールスインフォメーション
作成日時 : 06/13/2020

* 本ページは、Acme の以下のドキュメントを翻訳した上で適宜、補足説明したものです：

Acme: Quickstart

* サンプルコードの動作確認はしておりますが、必要な場合には適宜、追加改変しています。
* ご自由にリンクを張って頂いてかまいませんが、sales-info@classcat.com までご一報いただけると嬉しいです。

サンプル : クイックスタート

◆ Acme をインストールして貴方の最初の D4PG エージェントを訓練するためのガイド

環境ライブラリを選択する

Note: dm_control は有効な Mujoco ライセンスを必要とします。

environment_library = 'gym'  # @param ['dm_control', 'gym']

Mujoco ライセンスをここで追加する

Note: dm_control のためだけに必要です。

mjkey = """
""".strip()

if not mjkey and environment_library == 'dm_control':
  raise ValueError(
      'A Mujoco license is required for `dm_control`, if you do not have on '
      'consider selecting `gym` from the dropdown menu in the cell above.')

インストール

Acme をインストールする

!pip install dm-acme
!pip install dm-acme[reverb]
!pip install dm-acme[tf]

環境ライブラリをインストールする

有効なライセンスなしでは dm_control 環境を利用することはできませんが、gym 環境を利用して依然としてこの colab に従うことができます。

personal Mujoco ライセンス (not an institutional one) を持つ場合、貴方のローカルマシンで Jupyter カーネルを実行するには https://research.google.com/colaboratory/local-runtimes.html の手順に従う必要があるかもしれません。これは https://github.com/deepmind/dm_control の手順に従い personal Mujoco ライセンスを使用することにより dm_control をインストールすることを可能にします。

#@test {"skip": true}
if environment_library == 'dm_control':
  mujoco_dir = "$HOME/.mujoco"

  # Install OpenGL dependencies
  !apt-get update && apt-get install -y --no-install-recommends \
    libgl1-mesa-glx libosmesa6 libglew2.0

  # Get MuJoCo binaries
  !wget -q https://www.roboti.us/download/mujoco200_linux.zip -O mujoco.zip
  !unzip -o -q mujoco.zip -d "$mujoco_dir"

  # Copy over MuJoCo license
  !echo "$mjkey" > "$mujoco_dir/mjkey.txt"

  # Install dm_control
  !pip install dm_control

  # Configure dm_control to use the OSMesa rendering backend
  %env MUJOCO_GL=osmesa

  # Check that the installation succeeded
  try:
    from dm_control import suite
    env = suite.load('cartpole', 'swingup')
    pixels = env.physics.render()
  except Exception as e:
    raise e from RuntimeError(
        'Something went wrong during installation. Check the shell output above '
        'for more information. If you do not have a valid Mujoco license, '
        'consider selecting `gym` in the dropdown menu at the top of this Colab.')
  else:
    del suite, env, pixels

elif environment_library == 'gym':
  !pip install gym

可視化パッケージをインストールする

!sudo apt-get install -y xvfb ffmpeg
!pip install imageio
!pip install PILLOW
!pip install pyvirtualdisplay

モジュールをインポートする

import IPython

from acme import environment_loop
from acme import specs
from acme import wrappers
from acme.agents.tf import d4pg
from acme.tf import networks
from acme.tf import utils as tf2_utils
from acme.utils import loggers
import numpy as np
import sonnet as snt

# Import the selected environment lib
if environment_library == 'dm_control':
  from dm_control import suite
elif environment_library == 'gym':
  import gym

# Imports required for visualization
import pyvirtualdisplay
import imageio
import base64

# Set up a virtual display for rendering.
display = pyvirtualdisplay.Display(visible=0, size=(1400, 900)).start()

環境をロードする

今では環境をロードできます。以下では環境を作成して環境の仕様を把握します。

if environment_library == 'dm_control':
  environment = suite.load('cartpole', 'balance')
  
elif environment_library == 'gym':
  environment = gym.make('MountainCarContinuous-v0')
  environment = wrappers.GymWrapper(environment)  # To dm_env interface.

else:
  raise ValueError(
      "Unknown environment library: {};".format(environment_name) +
      "choose among ['dm_control', 'gym'].")

# Make sure the environment outputs single-precision floats.
environment = wrappers.SinglePrecisionWrapper(environment)

# Grab the spec of the environment.
environment_spec = specs.make_environment_spec(environment)

D4PG エージェントを作成する

#@title Build agent networks

# Get total number of action dimensions from action spec.
num_dimensions = np.prod(environment_spec.actions.shape, dtype=int)

# Create the shared observation network; here simply a state-less operation.
observation_network = tf2_utils.batch_concat

# Create the deterministic policy network.
policy_network = snt.Sequential([
    networks.LayerNormMLP((256, 256, 256), activate_final=True),
    networks.NearZeroInitializedLinear(num_dimensions),
    networks.TanhToSpec(environment_spec.actions),
])

# Create the distributional critic network.
critic_network = snt.Sequential([
    # The multiplexer concatenates the observations/actions.
    networks.CriticMultiplexer(),
    networks.LayerNormMLP((512, 512, 256), activate_final=True),
    networks.DiscreteValuedHead(vmin=-150., vmax=150., num_atoms=51),
])

# Create a logger for the agent and environment loop.
agent_logger = loggers.TerminalLogger(label='agent', time_delta=10.)
env_loop_logger = loggers.TerminalLogger(label='env_loop', time_delta=10.)

# Create the D4PG agent.
agent = d4pg.D4PG(
    environment_spec=environment_spec,
    policy_network=policy_network,
    critic_network=critic_network,
    observation_network=observation_network,
    sigma=1.0,
    logger=agent_logger,
    checkpoint=False
)

# Create an loop connecting this agent to the environment created above.
env_loop = environment_loop.EnvironmentLoop(
    environment, agent, logger=env_loop_logger)

訓練ループを実行する

# Run a `num_episodes` training episodes.
# Rerun this cell until the agent has learned the given task.
env_loop.run(num_episodes=100)

評価ループを可視化する

レンダリングと可視化のためのヘルパー関数

# Create a simple helper function to render a frame from the current state of
# the environment.
if environment_library == 'dm_control':
  def render(env):
    return env.physics.render(camera_id=0)
elif environment_library == 'gym':
  def render(env):
    return env.environment.render(mode='rgb_array')
else:
  raise ValueError(
      "Unknown environment library: {};".format(environment_name) +
      "choose among ['dm_control', 'gym'].")

def display_video(frames, filename='temp.mp4'):
  """Save and display video."""

  # Write video
  with imageio.get_writer(filename, fps=60) as video:
    for frame in frames:
      video.append_data(frame)

  # Read video and display the video
  video = open(filename, 'rb').read()
  b64_video = base64.b64encode(video)
  video_tag = ('

エピソードのための環境でエージェントを実行して可視化する

timestep = environment.reset()
frames = [render(environment)]

while not timestep.last():
  # Simple environment loop.
  action = agent.select_action(timestep.observation)
  timestep = environment.step(action)

  # Render the scene and add it to the frame stack.
  frames.append(render(environment))

# Save and display a video of the behaviour.
display_video(np.array(frames))

以上

月	火	水	木	金	土	日
1	2	3	4	5	6	7
8	9	10	11	12	13	14
15	16	17	18	19	20	21
22	23	24	25	26	27	28
29	30