Compare revisions

a9974dbc · 1f4096cc · 3946b499 · fb90320e · 62333996 · a72cc916
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
-image: docker:git
-before_script:
-  - docker login -u gitlab-ci-token -p $CI_BUILD_TOKEN gitlab-registry.nautilus.optiputer.net
-
+image: gcr.io/kaniko-project/executor:debug-v0.16.0
+   
 stages:
  - build-and-push
-
+   
 build-and-push-job:
  stage: build-and-push
-  tags:
-    - build-as-docker
+  except:
+    changes:
+      - "**/*.yaml"
+      - "README.md"
+      - "screenshots/*.PNG"
  script:
-    - docker build -t gitlab-registry.nautilus.optiputer.net/${CI_PROJECT_NAMESPACE}/${CI_PROJECT_NAME}:${CI_COMMIT_SHA:0:8} .
-    - docker tag gitlab-registry.nautilus.optiputer.net/${CI_PROJECT_NAMESPACE}/${CI_PROJECT_NAME}:${CI_COMMIT_SHA:0:8} gitlab-registry.nautilus.optiputer.net/${CI_PROJECT_NAMESPACE}/${CI_PROJECT_NAME}:latest
-    - docker push gitlab-registry.nautilus.optiputer.net/${CI_PROJECT_NAMESPACE}/${CI_PROJECT_NAME}
+    - echo "{\"auths\":{\"$CI_REGISTRY\":{\"username\":\"$CI_REGISTRY_USER\",\"password\":\"$CI_REGISTRY_PASSWORD\"}}}" > /kaniko/.docker/config.json
+    - /kaniko/executor --context $CI_PROJECT_DIR --dockerfile $CI_PROJECT_DIR/Dockerfile --destination $CI_REGISTRY_IMAGE:${CI_COMMIT_SHA:0:8} --destination $CI_REGISTRY_IMAGE:latest
\ No newline at end of file
--- a/ClassificationExample.ipynb
+++ b/ClassificationExample.ipynb
--- a/Dockerfile
+++ b/Dockerfile
-FROM tensorflow/tensorflow:2.0.0a0-gpu-py3
-RUN apt-get update
-RUN apt-get install nano
-RUN pip install --upgrade pip
-RUN pip install numpy scipy scikit-learn pillow h5py keras 
-RUN pip install --upgrade imutils 
-RUN pip install --upgrade scikit-learn
-RUN pip install --upgrade matplotlib
-RUN pip install -q tensorflow==2.0.0-beta1
-RUN apt-get install locate
\ No newline at end of file
+ARG cuda_version=10.1
+ARG cudnn_version=7
+FROM nvidia/cuda:${cuda_version}-cudnn${cudnn_version}-devel
+
+ENV NB_USER kerasTester
+ENV NB_UID 1000
+
+# RUN mkdir /userdata/kerasData
+
+RUN apt-get update && \
+      apt-get -y install sudo
+
+RUN useradd -m -s /bin/bash -N -u $NB_UID $NB_USER && \
+    # chown $NB_USER $CONDA_DIR -R && \
+    # chown $NB_USER /userdata/kerasData -R && \
+    #    chown $NB_USER / -R && \
+    # mkdir -p / && \
+    sh -c 'echo "$NB_USER:test" | chpasswd' && \
+    usermod -aG sudo $NB_USER
+
+WORKDIR /userdata/kerasData
+
+# Install system packages
+RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
+      bzip2 \
+      g++ \
+      git \
+      graphviz \
+      libgl1-mesa-glx \
+      libhdf5-dev \
+      openmpi-bin \
+      xvfb \
+      screen \
+      wget && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install conda
+ENV CONDA_DIR /opt/conda
+ENV PATH $CONDA_DIR/bin:$PATH
+
+RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.7.12.1-Linux-x86_64.sh -O ~/miniconda.sh && \
+    /bin/bash ~/miniconda.sh -b -p /opt/conda && \
+    rm ~/miniconda.sh && \
+    /opt/conda/bin/conda clean -tipsy && \
+    ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
+    echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
+    echo "conda activate base" >> ~/.bashrc
+
+# Install Python packages and keras
+
+ARG python_version=3.6
+
+RUN conda config --append channels conda-forge
+RUN conda install -y python=${python_version} && \
+    pip install --upgrade pip && \
+    pip install \
+      sklearn_pandas \
+      opencv-python && \
+
+    conda install \
+      bcolz \
+      h5py \
+      statsmodels \
+      matplotlib \
+      mkl \
+      nose \
+      notebook \
+      Pillow \
+      pandas \
+      pydot \
+      pyyaml \
+      scikit-learn \
+      tensorflow-gpu \
+      six \
+      theano \
+      mkdocs \
+      numpy
+
+RUN pip install keras
+
+# RUN git clone git://github.com/keras-team/keras.git /src && pip install -e /src[tests]
+RUN conda clean -yt
+    # pip install git+git://github.com/keras-team/keras.git && \
+
+USER $NB_USER
+ 
+#ADD theanorc /home/keras/.theanorc
+
+ENV LC_ALL=C.UTF-8
+ENV LANG=C.UTF-8
+
+ENV PYTHONPATH='/src/:$PYTHONPATH'
--- a/Evaluator.ipynb
+++ b/Evaluator.ipynb
--- a/ImageLoader.ipynb
+++ b/ImageLoader.ipynb
--- a/README.md
+++ b/README.md
+# Keras Container on Nautilus
+
+This project allows for the usage of Keras on a jupter notebook in Nautilus (as an importable package). With this project, we are able to train keras models on the Nautilus cloud. 
+
+## Getting Started
+
+These instructions will get you a copy of the project up and running on your namespace. 
+
+### Prerequisites
+
+Nautilus namespace
+Nvidia GPU
+
+## Components
+
+The project has the following components:
+```
+- Dockerfile (Dockerfile)
+- Continous Integration Yaml (.gitlab-ci.yml)
+- An example jupter notebook (ClassificationExample.ipynb)
+- Nautilus deployment Yaml (kerasDeloyment.yaml)
+```
+
+### Dockerfile
+```
+This file is used to make the enviroment necessary to run Keras on Jupyter Notebook. Unless 
+truely needed, please avoid editing this file.
+```
+
+### Continous Integration Yaml
+```
+This file is used to utilize gitlab's continous integration feature. Nautilus uses kaniko instead of docker, which can be changed back into using a docker image by replacing the current .gitlab-ci.yml with the "dockerBased-ci.yml" file.  
+```
+
+### Jupter notebook
+```
+This was the notebook I used to train an wildfire classification model. The structure and import commands can be used to utilize keras in 
+other notebooks. I will go over the specific details below.  
+```
+
+### Nautilus Deployment Yaml
+
+If you are planning to use this implementation on another Nautilus namespace, this portion of the readme is especially important. Here are the important aspects of this yaml:
+
+
+1. Changing namespace address <br /> <br />
+![Changing the names](screenshots/nautilusDeploymentNamespaceName.PNG) <br />
+**Change the name and the namespace entries to the current working namespace and a suitable name**
+
+2. Change the resource requests <br /> <br />
+![Change the resource limits and requests](screenshots/nautilusDeploymentResources.PNG) <br />
+**Change the numbers to suit the task**
+
+
+3. Mount volumne <br /><br />
+![Mount Volume onto a path if already created. To find out how to create a persistent volumne claim, refer to Nautilus documentation](screenshots/nautilusDeploymentStorage.PNG) <br />
+**Very important for crash-resistance. I highly recommend saving all work onto mounted directory** 
+
+
+4. Choose GPU type <br /><br />
+![Choose correctly](screenshots/nautilusDeploymentGPUrequest.PNG)  <br />
+If doing intensive training, choose larger/more expensive GPUs
+
+## Using the Components
+
+### Starting the development and accessing jupyter notebook 
+1. Go into kerasDeloyment.yaml file
+2. Choose the RAW file format <br />
+![](screenshots/rawfile.PNG) <br />
+
+3. copy url of RAW file <br />
+![](screenshots/rawaddress.PNG) <br />
+
+4. execute yaml file on nautilius namespace <br />
+![](screenshots/kubectinit.PNG)
+
+5. exec into nautilus pod <br />
+![](screenshots/execinto.PNG)
+
+6. Navigate to /userdata/kerasData and Start Jupyter Notebook <br /><br />
+![](screenshots/startjupyter.PNG)<br />
+**Note: The port number choice does not matter, as long as there are not other processes running on that port. If a port is already in use, jupyter will automatically assign another port. Make sure to match the port number in the next step** <br /> <br />
+![](screenshots/usingotherports.PNG)<br />
+_What happens when a wrong port is chosen_ <br />
+
+7. Go to your computer terminal and start port-forward, matching the port in the pod <br />
+![](screenshots/portforward.PNG)<br />
+
+8. Go to the localhost address<br />
+![](screenshots/localhostaddress.png)<br />
+
+9. Test for keras <br />
+- Create a new notebook or use the ClassificationExample.ipynb file 
+- Run the following tests <br />
+![](screenshots/firstBatch.PNG) <br /><br />
+<!-- ![](screenshots/secondBatch.PNG)<br /><br /> -->
+**_Make sure that the outputs return True or some name._**<br />
+**You are now ready to use Keras on a jupyter notebook hosted on Kubernetes**
+
+### Using Keras in Notebook
+
+#### EXTREMELY IMPORTANT!
+In order to prevent Keras from assigning too much GPU memory and stalling training efforts later on, run this:
+![](screenshots/hickups.PNG) <br />
+If you see an error, shutdown the network server and try again <br />
+![](screenshots/toolate.PNG)<br/>
+If you see nvidia-smi memory allocation at 0/- you have suceeded in reseting the GPU <br />
+![](screenshots/nvidiasmireg.PNG)<br />
+
+Please refer to [Keras Documentation](https://keras.io/) for instructions and information on using Keras
+
+I used the notebook for the following:
+- Training a CNN on the notebook for reference
+- Using a LearningRateFinder to find the optimal learning rate
+
+
+## Using the Fire-Classification training
+
+1. Write the network using Keras layers  <br /> 
+![](screenshots/modelbuild.PNG) <br /> <br />
+2. Set the paths  <br />
+![](screenshots/pathfields.PNG) <br />
+The following must be set
+- FIRE_PATH = Path of the directory with the fire images
+- Non_FIRE_PATH = Path of the directory with images without fire
+- MODEL_PATH = Path where the saved model file should go
+- LRFIND_PLOT_PATH = Where the learning rate finder graph should go
+- TRAINING_PLOT_PATH = Where the training plot graph (loss & accuracy graphs) shoud go 
+3. Loading Data
+- Use the load_dataset() function of the load_data notebook ONLY for HPWREN HWB Database 
+- Otherwise, change the script so that the following numpy arrays are generated, if you want to use the prebuild loader in the Training notebook 
+MAKE SURE THESE in load_data.ipynb ![](screenshots/loading_in.PNG)<br /> MATCH THESE in ClassificationExample.ipynb <br /> ![](screenshot/THEOTHEREND.PNG)
+- Use the def loadData(pathToFiles) in ClassificationExample.ipynb
+
+
+4. Image Load Tester
+Tests the images to see if the loading worked
+5. Model Initialization <br />
+![](screenshots/init.png)<br />
+- The width, height and depth is the data format. Classes are the number of condiitons in the data. In our case: ["Fire", "Not-Fire"]
+- Change the optimization function if you know what you are doing. We are using a starndard SDG
+6. Learning Rate Finder <br />
+Run to find the place where the Network starts to learn
+![](screenshots/lrf.png) <br />
+![](screenshots/lrfplot.png) <br />
+More information is availbe here [pyimagesearch](https://www.pyimagesearch.com/2019/08/05/keras-learning-rate-finder/)
+
+Finally, fill out the INIT_LR from what you learned from above
+![](screenshots/initlr.png)<br />
+7. Train <br />
+![](screenshots/startTraining.PNG) <br />
+8. Get results <br />
+![](screenshots/results.PNG) <br />
+You will find the accuracy measures in the table. Find the model in fire_detection.model
+
+
+## TF Versions
+- TF 2.0 should be on pip path
+- TF1.15 is available in the conda environment tf15
+    - To start type: conda activate tf15
+
+
+
+
+## Contributors
+
+* **Byungheon Jeong**  - [byungheon-jeong](https://gitlab.nautilus.optiputer.net/byungheon-jeong)
+* **Spence Chen** - [Spencer](https://gitlab.nautilus.optiputer.net/Spencer123)
+* **Isaac Nealey** - [Isacc](https://gitlab.nautilus.optiputer.net/inealey)
+* **John Graham** - [John](https://gitlab.nautilus.optiputer.net/jjgraham)
+
+## Acknowledgments
+
+* The Dockerfile is from the Dockerhub of the Keras team
+* The Fire CNN and the Learning Rate finder is adapted from Adrain's excellent blog on first-detection - [Pyimagesearch](https://www.pyimagesearch.com/2019/11/18/fire-and-smoke-detection-with-keras-and-deep-learning/)
+
--- a/Test_Loader.ipynb
+++ b/Test_Loader.ipynb
--- a/dockerBasedci.yaml
+++ b/dockerBasedci.yaml
+image: docker:git
+before_script:
+  - docker login -u gitlab-ci-token -p $CI_BUILD_TOKEN gitlab-registry.nautilus.optiputer.net
+
+stages:
+  - build-and-push
+
+build-and-push-job:
+  stage: build-and-push
+  tags:
+     - build-as-docker
+  script:
+    - docker build --no-cache -t gitlab-registry.nautilus.optiputer.net/${CI_PROJECT_NAMESPACE}/${CI_PROJECT_NAME}:${CI_COMMIT_SHA:0:8} .
+    - docker tag gitlab-registry.nautilus.optiputer.net/${CI_PROJECT_NAMESPACE}/${CI_PROJECT_NAME}:${CI_COMMIT_SHA:0:8} gitlab-registry.nautilus.optiputer.net/${CI_PROJECT_NAMESPACE}/${CI_PROJECT_NAME}:latest
+    - docker push gitlab-registry.nautilus.optiputer.net/${CI_PROJECT_NAMESPACE}/${CI_PROJECT_NAME}
--- a/expTest.ipynb
+++ b/expTest.ipynb
--- a/extraction.sh
+++ b/extraction.sh
+ls |grep -v ".*-c$"|xargs -I {} rm -rf {}
+find /userdata/kerasData/hpwren.ucsd.edu/HWB/HPWREN-FIgLib/ -type f -name ".*.mp4" -print
+
+find $PWD -maxdepth 2|grep ".*html.*"|xargs -I {} rm -rf {}
\ No newline at end of file
--- a/firedata1.npy
+++ b/firedata1.npy
--- a/imageLoader.py
+++ b/imageLoader.py
+#(128,128)
+
+def load_dataset(datasetPath, image_dimensions):
+    # grab the paths to all images in our dataset directory, then
+    # initialize our lists of images
+    imagePaths = os.listdir(datasetPath)
+    trainXList = []
+    testXList = []
+    testX = np.array([])
+    trainY = np.array([])
+    trainY = np.array([])
+    testY = np.array([])
+
+    testI = 0 
+    
+    # loop over the image paths
+    for directories in imagePaths:
+        tempF= []
+        tempNF = []
+        
+        for element in os.listdir(datasetPath + "/"+ directories):
+            if re.search(".jpg", element):
+                image = cv2.imread(datasetPath + "/"+ directories + "/" + element)
+                image = cv2.resize(image, image_dimensions)
+            if "+" in element:
+                tempF.append(image)
+            else:
+                tempNF.append(image)
+                
+        tempF = np.array(tempF, dtype="float32")
+        tempNF = np.array(tempNF,  dtype="float32")
+        
+        fireLabels = np.ones((tempF.shape[0],))
+        nonFireLabels = np.zeros((tempNF.shape[0],))
+        data = np.vstack([tempF, tempNF])
+        labels = np.hstack([fireLabels, nonFireLabels])
+        labels = to_categorical(labels, num_classes=2)
+        
+        print(labels)
+        
+        data /= 255
+
+        (t_trainX, t_testX, t_trainY, t_testY) = train_test_split(data, labels,
+    test_size=0.2, random_state=42)
+        
+        trainXList.append(t_trainX)
+        testXList.append(t_testX)
+        print(t_trainY.shape, trainY.shape)
+        
+        if trainY.size == 0:
+            trainY = t_trainY
+            testY = t_testY
+        else:
+            trainY = np.append(trainY, t_trainY, axis = 0)
+            testY = np.append(testY, t_testY, axis = 0)
+
+    
+    trainX = np.vstack(trainXList)
+    testX = np.vstack(testXList)
+#     trainY = np.hstack(trainYList)
+#     testY = np.hstack(testYList)
+    
+    labels = np.append(trainY, testY)
+    labels = to_categorical(labels, num_classes=2)
+    classTotals = labels.sum(axis=0)
+    classWeight = classTotals.max() / classTotals
+    
+    print(trainX.shape, testX.shape, trainY.shape, testY.shape)
+        
+    return trainX, testX, trainY, testY, classWeight
\ No newline at end of file
--- a/kerasDeloyment.yaml
+++ b/kerasDeloyment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: keras-2
+  namespace: digits
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      k8s-app: keras-2
+  template:
+    metadata:
+      labels:
+        k8s-app: keras-2
+    spec:
+      tolerations:
+      - key: "region"
+        operator: "Equal"
+        value: "allow"
+        effect: "NoSchedule"
+      containers:
+      - name: keras-kube
+        image: gitlab-registry.nautilus.optiputer.net/ar-noc/keras-smoke-detection:latest
+        securityContext:
+          capabilities:
+            add:
+              - NET_ADMIN
+        command: ["/bin/sh", "-c"]
+        args: ["sleep infinity"]
+        resources:
+          limits:
+            memory: "64Gi"
+            cpu: "8"
+            nvidia.com/gpu: 1
+          requests:
+            memory: "32Gi"
+            cpu: "2"
+            nvidia.com/gpu: 1
+
+        volumeMounts:
+        - mountPath: /userdata/kerasData
+          name: modeltraining
+      volumes:
+        - name: modeltraining
+          persistentVolumeClaim:
+            claimName: modeltraining
+            
+      # affinity:
+      #   nodeAffinity:
+      #     requiredDuringSchedulingIgnoredDuringExecution:
+      #       nodeSelectorTerms:
+      #       - matchExpressions:
+      #         - key: gpu-type
+      #           operator: In # Use NotIn for other types
+      #           values:
+      #           - K40
+      #           - V100
+
--- a/nonfiredata1.npy
+++ b/nonfiredata1.npy
--- a/screenshots/THEOTHEREND.PNG
+++ b/screenshots/THEOTHEREND.PNG
--- a/screenshots/eng.PNG
+++ b/screenshots/eng.PNG
--- a/screenshots/execinto.PNG
+++ b/screenshots/execinto.PNG
--- a/screenshots/firstBatch.PNG
+++ b/screenshots/firstBatch.PNG
--- a/screenshots/hickups.PNG
+++ b/screenshots/hickups.PNG
--- a/screenshots/init.png
+++ b/screenshots/init.png
No results found