{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "2c3b23c4-de99-422e-89ae-3815b332f66a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "import torch\n",
    "from torch.utils.data import TensorDataset, DataLoader\n",
    "\n",
    "url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/ionosphere.csv'\n",
    "df = pd.read_csv(url, header=None)\n",
    "\n",
    "# Separate features and target\n",
    "X = df.iloc[:, :-1].astype('float32')\n",
    "y = df.iloc[:, -1]\n",
    "\n",
    "label_encoder = LabelEncoder()\n",
    "y = label_encoder.fit_transform(y).astype('float32')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "8bd6fa56-50dc-4cbf-9552-28392cab5095",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "    X, y, test_size=0.33, random_state=42\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "e42d2740-983e-4869-af4d-e04f894fe801",
   "metadata": {},
   "outputs": [],
   "source": [
    "# convert to torch tensors\n",
    "X_train_tensor = torch.tensor(X_train.values)\n",
    "y_train_tensor = torch.tensor(y_train).unsqueeze(1)  # Add dimension for compatibility\n",
    "X_test_tensor = torch.tensor(X_test.values)\n",
    "y_test_tensor = torch.tensor(y_test).unsqueeze(1)\n",
    "\n",
    "# dataset\n",
    "train_dataset = TensorDataset(X_train_tensor, y_train_tensor)\n",
    "test_dataset = TensorDataset(X_test_tensor, y_test_tensor)\n",
    "\n",
    "# dataloader\n",
    "train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)\n",
    "test_loader = DataLoader(test_dataset, batch_size=1024, shuffle=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "1c385811-7f26-4163-a04b-b63ffa32591f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch.nn as nn\n",
    "import torch.nn.init as init\n",
    "\n",
    "class MLP(nn.Module):\n",
    "    def __init__(self, input_size):\n",
    "        super(MLP, self).__init__()\n",
    "        self.hidden1 = nn.Linear(input_size, 10)\n",
    "        init.kaiming_uniform_(self.hidden1.weight, nonlinearity='relu') # designed for ReLU\n",
    "        self.act1 = nn.ReLU()\n",
    "        self.hidden2 = nn.Linear(10, 8)\n",
    "        init.kaiming_uniform_(self.hidden2.weight, nonlinearity='relu') # you must always remember to do this.\n",
    "        self.act2 = nn.ReLU()\n",
    "        self.hidden3 = nn.Linear(8, 1)\n",
    "        init.xavier_uniform_(self.hidden3.weight) # designed for sigmoid, tanh activations. initialisations are important!\n",
    "        self.act3 = nn.Sigmoid()\n",
    "\n",
    "    def forward(self, x):\n",
    "        x = self.act1(self.hidden1(x))\n",
    "        x = self.act2(self.hidden2(x))\n",
    "        x = self.act3(self.hidden3(x))\n",
    "        return x\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "aeb7e1b3-9ec0-4c4c-96b9-1a68fae936ce",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch.optim as optim\n",
    "\n",
    "def train_model(model, train_loader, epochs=100):\n",
    "    criterion = nn.BCELoss()\n",
    "    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)\n",
    "    model.train()\n",
    "    for epoch in range(epochs):\n",
    "        for inputs, targets in train_loader:\n",
    "            optimizer.zero_grad()\n",
    "            outputs = model(inputs)\n",
    "            loss = criterion(outputs, targets)\n",
    "            loss.backward()\n",
    "            optimizer.step()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "f3586d5f-b020-4803-b768-4424b1e79fa1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(351, 34)"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.metrics import accuracy_score\n",
    "import numpy as np\n",
    "\n",
    "def evaluate_model(model, test_loader):\n",
    "    model.eval()\n",
    "    predictions, actuals = [], []\n",
    "    with torch.no_grad():\n",
    "        for inputs, targets in test_loader:\n",
    "            outputs = model(inputs)\n",
    "            predicted = outputs.round()\n",
    "            predictions.append(predicted.numpy()) # must be on cpu with nograd set.\n",
    "            actuals.append(targets.numpy())\n",
    "    predictions = np.vstack(predictions)\n",
    "    actuals = np.vstack(actuals)\n",
    "    accuracy = accuracy_score(actuals, predictions)\n",
    "    return accuracy\n",
    "X.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "f905bf7a-4f1c-497f-8f49-9e0cbfe00545",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.853\n"
     ]
    }
   ],
   "source": [
    "model = MLP(input_size=X.shape[1]) # (samples, features) ; (row, cols) ; X = [X_i^T]\n",
    "\n",
    "train_model(model, train_loader)\n",
    "\n",
    "accuracy = evaluate_model(model, test_loader)\n",
    "print(f'Accuracy: {accuracy:.3f}')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b3300ad1-09f1-450c-b2c3-1bb8469f74e6",
   "metadata": {},
   "source": [
    "# Details"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "85e78e80-8b7b-4141-a74b-bcac0864d3ef",
   "metadata": {},
   "source": [
    "1. Title: Johns Hopkins University Ionosphere database\n",
    "\n",
    "2. Source Information:\n",
    "   -- Donor: Vince Sigillito (vgs@aplcen.apl.jhu.edu)\n",
    "   -- Date: 1989\n",
    "   -- Source: Space Physics Group\n",
    "              Applied Physics Laboratory\n",
    "              Johns Hopkins University\n",
    "              Johns Hopkins Road\n",
    "              Laurel, MD 20723 \n",
    "\n",
    "3. Past Usage:\n",
    "   -- Sigillito, V. G., Wing, S. P., Hutton, L. V., \\& Baker, K. B. (1989).\n",
    "      Classification of radar returns from the ionosphere using neural \n",
    "      networks. Johns Hopkins APL Technical Digest, 10, 262-266.\n",
    "\n",
    "      They investigated using backprop and the perceptron training algorithm\n",
    "      on this database.  Using the first 200 instances for training, which\n",
    "      were carefully split almost 50% positive and 50% negative, they found\n",
    "      that a \"linear\" perceptron attained 90.7%, a \"non-linear\" perceptron\n",
    "      attained 92%, and backprop an average of over 96% accuracy on the \n",
    "      remaining 150 test instances, consisting of 123 \"good\" and only 24 \"bad\"\n",
    "      instances.  (There was a counting error or some mistake somewhere; there\n",
    "      are a total of 351 rather than 350 instances in this domain.) Accuracy\n",
    "      on \"good\" instances was much higher than for \"bad\" instances.  Backprop\n",
    "      was tested with several different numbers of hidden units (in [0,15])\n",
    "      and incremental results were also reported (corresponding to how well\n",
    "      the different variants of backprop did after a periodic number of \n",
    "      epochs).\n",
    "\n",
    "      David Aha (aha@ics.uci.edu) briefly investigated this database.\n",
    "      He found that nearest neighbor attains an accuracy of 92.1%, that\n",
    "      Ross Quinlan's C4 algorithm attains 94.0% (no windowing), and that\n",
    "      IB3 (Aha \\& Kibler, IJCAI-1989) attained 96.7% (parameter settings:\n",
    "      70% and 80% for acceptance and dropping respectively).\n",
    "\n",
    "4. Relevant Information:\n",
    "   This radar data was collected by a system in Goose Bay, Labrador.  This\n",
    "   system consists of a phased array of 16 high-frequency antennas with a\n",
    "   total transmitted power on the order of 6.4 kilowatts.  See the paper\n",
    "   for more details.  The targets were free electrons in the ionosphere.\n",
    "   \"Good\" radar returns are those showing evidence of some type of structure \n",
    "   in the ionosphere.  \"Bad\" returns are those that do not; their signals pass\n",
    "   through the ionosphere.  \n",
    "\n",
    "   Received signals were processed using an autocorrelation function whose\n",
    "   arguments are the time of a pulse and the pulse number.  There were 17\n",
    "   pulse numbers for the Goose Bay system.  Instances in this databse are\n",
    "   described by 2 attributes per pulse number, corresponding to the complex\n",
    "   values returned by the function resulting from the complex electromagnetic\n",
    "   signal.\n",
    "\n",
    "5. Number of Instances: 351\n",
    "\n",
    "6. Number of Attributes: 34 plus the class attribute\n",
    "   -- All 34 predictor attributes are continuous\n",
    "\n",
    "7. Attribute Information:     \n",
    "   -- All 34 are continuous, as described above\n",
    "   -- The 35th attribute is either \"good\" or \"bad\" according to the definition\n",
    "      summarized above.  This is a binary classification task.\n",
    "\n",
    "8. Missing Values: None"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}