{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Advanced usage of the package\n",
    "This short example points out the interfaces of the StableRLS class."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# this contains the environment class\n",
    "import stablerls.gymFMU as gymFMU\n",
    "# this will read our config file\n",
    "import stablerls.configreader as cfg_reader\n",
    "\n",
    "import gymnasium as gym\n",
    "import numpy as np\n",
    "import logging"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "class my_env(gymFMU.StableRLS):\n",
    "    def get_action_space(self):\n",
    "        \"\"\"My custom action space\"\"\"\n",
    "        # derive the shape of this space from the FMU we can also define it on our own\n",
    "        # we can do the same for the \"get_observation_space\" but have to derive the \n",
    "        # shape from self.fmu.ouput\n",
    "        high = np.arange(len(self.fmu.input)).astype(np.float32)\n",
    "        high[:] = np.inf\n",
    "        low = high[:] * 0\n",
    "        return gym.spaces.Box(low, high)\n",
    "    \n",
    "    def reset_(self, seed=None):\n",
    "        \"\"\"My custom reset function called during each reset\"\"\"\n",
    "        # normally we get the first observation by calling \n",
    "        # self._next_observation(steps=1)\n",
    "        # in this case we will define it hard coded and dont set all inputs to zero, \n",
    "        # which is the default behavior\n",
    "        first_observation = 5\n",
    "        return first_observation\n",
    "\n",
    "    def obs_processing(self, observation):\n",
    "        \"\"\"If we dont want to train the agent with all observations, we can modify them\"\"\"\n",
    "        return 2\n",
    "\n",
    "    def FMU_external_input(self):\n",
    "        \"\"\"We can set specific inputs of the FMU independent of the agent\"\"\"\n",
    "        self.fmu.fmu.setReal([self.fmu.input[0].valueReference], [4])\n",
    "\n",
    "    def export_results(self):\n",
    "        \"\"\"This function is not called by default but allows access to save the internal results\"\"\"\n",
    "        pass\n",
    "    "
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For simplicity we already included the compiled FMU models for Linux and Windows. However, if you own Matlab you can compile the *.slx models on your own. If you want to compile the model you can keep the default FMU_path in the config file. Otherwise please change it to 00-Simulink_Windows.fmu or 00-Simulink_Linux.fmu depending on your operating system."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# First of all we have to read the config file\n",
    "config = cfg_reader.configreader('00-config.cfg')\n",
    "\n",
    "# if we want to we can compile the simulink model. \n",
    "# Matlab and Matlab Engine for python is required!\n",
    "if False:\n",
    "    import stablerls.createFMU as createFMU\n",
    "    createFMU.createFMU(config,'SimulinkExample00.slx')"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The FMU is available now and we run our custom class"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "We can see our first observation: 5\n",
      "Action: [1 2 3 4]\n",
      "Observation: 2\n",
      "Reward: 1\n",
      "\n",
      "Action: [2 4 6 8]\n",
      "Observation: 2\n",
      "Reward: 1\n",
      "\n",
      "Action: [ 4  8 12 16]\n",
      "Observation: 2\n",
      "Reward: 1\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# create instance of the model\n",
    "env = my_env(config)\n",
    "\n",
    "# default reset call bevor the simulation starts now returns 5\n",
    "obs = env.reset()\n",
    "print('We can see our first observation:', obs)\n",
    "\n",
    "# we wont change the action \n",
    "action = np.array([1,2,3,4])\n",
    "\n",
    "terminated = False\n",
    "truncated = False\n",
    "# Now we always observe 2 as specified\n",
    "while not (terminated or truncated):\n",
    "    observation, reward, terminated, truncated, info  = env.step(action)\n",
    "    print(f'Action: {action}\\nObservation: {observation}\\nReward: {reward}\\n')\n",
    "    action = action * 2\n",
    "        \n",
    "env.close()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can also see that the first action is now always equal to 4 and therefore independent of the agents actions."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 4.,  2.,  3.,  4.],\n",
       "       [ 4.,  4.,  6.,  8.],\n",
       "       [ 4.,  8., 12., 16.]])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.inputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}