dl_stock

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras.layers import LSTM\n",
    "from tensorflow.keras.layers import Dense\n",
    "from tensorflow.keras.layers import Dropout"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_train = pd.read_csv('Google_Stock_Price_Train.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_train.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "training_set = dataset_train.iloc[:, 1: 2].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "training_set.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc = MinMaxScaler(feature_range = (0, 1))\n",
    "#fit: get min/max of train data\n",
    "training_set_scaled = sc.fit_transform(training_set)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## 60 timesteps and 1 output\n",
    "X_train = []\n",
    "y_train = []\n",
    "for i in range(60, len(training_set_scaled)):\n",
    " X_train.append(training_set_scaled[i-60: i, 0])\n",
    " y_train.append(training_set_scaled[i, 0])\n",
    "X_train, y_train = np.array(X_train), np.array(y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#data reshaping\n",
    "X_train = np.reshape(X_train, newshape =\n",
    " (X_train.shape[0], X_train.shape[1], 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(18, 8))\n",
    "plt.plot(dataset_train['Open'])\n",
    "plt.title(\"Google Stock Open Prices\")\n",
    "plt.xlabel(\"Time (oldest -> latest)\")\n",
    "plt.ylabel(\"Stock Open Price\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(18, 8))\n",
    "plt.plot(dataset_train['Low'])\n",
    "plt.title(\"Google Stock Low Prices\")\n",
    "plt.xlabel(\"Time (oldest -> latest)\")\n",
    "plt.ylabel(\"Stock Lowest Price\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "regressor = Sequential()\n",
    "#add 1st lstm layer\n",
    "regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))\n",
    "regressor.add(Dropout(rate = 0.2))\n",
    "##add 2nd lstm layer: 50 neurons\n",
    "regressor.add(LSTM(units = 50, return_sequences = True))\n",
    "regressor.add(Dropout(rate = 0.2))\n",
    "##add 3rd lstm layer\n",
    "regressor.add(LSTM(units = 50, return_sequences = True))\n",
    "regressor.add(Dropout(rate = 0.2))\n",
    "##add 4th lstm layer\n",
    "regressor.add(LSTM(units = 50, return_sequences = False))\n",
    "regressor.add(Dropout(rate = 0.2))\n",
    "##add output layer\n",
    "regressor.add(Dense(units = 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "regressor.fit(x = X_train, y = y_train, batch_size = 32, epochs = 100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_test = pd.read_csv('Google_Stock_Price_Test.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "#keras only takes numpy array\n",
    "real_stock_price = dataset_test.iloc[:, 1: 2].values\n",
    "real_stock_price.shape\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']),\n",
    " axis = 0)\n",
    "##use .values to make numpy array\n",
    "inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#reshape data to only have 1 col\n",
    "inputs = inputs.reshape(-1, 1)\n",
    "#scale input\n",
    "inputs = sc.transform(inputs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(inputs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_test = []\n",
    "for i in range(60, len(inputs)):\n",
    " X_test.append(inputs[i-60:i, 0])\n",
    "X_test = np.array(X_test)\n",
    "#add dimension of indicator\n",
    "X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "predicted_stock_price = regressor.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#inverse the scaled value\n",
    "predicted_stock_price = sc.inverse_transform(predicted_stock_price)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.plot(real_stock_price, color = 'red', label = 'Real price')\n",
    "plt.plot(predicted_stock_price, color = 'blue', label = 'Predicted price')\n",
    "plt.title('Google price prediction')\n",
    "plt.xlabel('Time')\n",
    "plt.ylabel('Price')\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}