diff --git a/kubernestes-configs/gpu-job.yml b/kubernestes-configs/gpu-job.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a19453d29e5a2e8fd2a61a1e31d2753a83fb01b3
--- /dev/null
+++ b/kubernestes-configs/gpu-job.yml
@@ -0,0 +1,89 @@
+# batch/v1 tells it to use the JOB API
+apiVersion: batch/v1
+# we are running a Job, not a Pod
+kind: Job
+
+# set the name of the job
+metadata:
+  name: peicongjob
+
+spec:
+  # how many times should the system
+  # retry before calling it a failure
+  backoffLimit: 0
+  template:
+    spec:
+      # should we restart on failure
+      restartPolicy: Never
+      # what containers will we need
+      containers:
+        # the name of the container
+        - name: traffic-sign-recognition-system
+          # the image: can be from any pubic facing registry
+          image: gitlab-registry.nrp-nautilus.io/peicongcheng/traffic-sign-recognition-system
+          # the working dir when the container starts
+          workingDir: /path/to/mydir
+          # should Kube pull it
+          imagePullPolicy: IfNotPresent
+          # we need to expose the port
+          # that will be used for DDP
+          ports:
+            - containerPort: 8880
+          # setting of env variables
+          env:
+            # which interface to use
+            - name: NCCL_SOCKET_IFNAME
+              value: eth0
+            # prints some INFO level
+            # NCCL logs
+            - name: NCCL_DEBUG
+              value: INFO
+          # the command to run when the container starts
+          command: ["python", "-m", "train.py", "./train_cfg.py"]
+          # define the resources for this container
+          resources:
+            # limits - the max given to the container
+            limits:
+              # RAM
+              memory: 64Gi
+              # cores
+              cpu: 32
+              # NVIDIA GPUs
+              nvidia.com/gpu: 4
+            # requests - what we'd like
+            requests:
+              # RAM
+              memory: 64Gi
+              # CPU Cores
+              cpu: 32
+              # GPUs
+              nvidia.com/gpu: 4
+          # what volumes should we mount
+          volumeMounts:
+            # my datasets PVC should mount to /data
+            - mountPath: /data
+              name: peicong
+            # IMPORTANT: we need SHM for DDP
+            - mountPath: /dev/shm
+              name: dshm
+      # tell Kube where to find the volumes we want to use
+      volumes:
+        # which PVC is my data
+        - name: peicong
+          persistentVolumeClaim:
+            claimName: peicong
+        # setup shared memory as a RAM volume
+        - name: dshm
+          emptyDir:
+            medium: Memory
+      # Tell Kube what type of GPUs we want
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+              - matchExpressions:
+                  - key: nvidia.com/gpu.product
+                    operator: In
+                    values:
+                      # asking for 3090s only
+                      - NVIDIA-GeForce-RTX-3090
diff --git a/kubernestes-configs/gpu-pod.yml b/kubernestes-configs/gpu-pod.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a2d3ff0bee7a7827e0d1a57f6f04623a3b9791e0
--- /dev/null
+++ b/kubernestes-configs/gpu-pod.yml
@@ -0,0 +1,45 @@
+apiVersion: v1
+kind: Pod
+
+metadata:
+  name: peicongpod
+
+spec:
+  containers:
+    - name: peicongpod
+      image: gitlab-registry.nrp-nautilus.io/peicongcheng/traffic-sign-recognition-system
+      imagePullPolicy: IfNotPresent
+      command: ["sleep", "infinity"]
+      resources:
+        limits:
+          memory: 12Gi
+          cpu: 2
+          nvidia.com/gpu: 1
+        requests:
+          memory: 12Gi
+          cpu: 2
+          nvidia.com/gpu: 1
+      volumeMounts:
+        - mountPath: /data
+          name: peicong
+        # IMPORTANT: we need SHM for PyTorch
+        - mountPath: /dev/shm
+          name: dshm
+  volumes:
+    - name: peicong
+      persistentVolumeClaim:
+        claimName: peicong
+    # setup shared memory as a RAM volume
+    - name: dshm
+      emptyDir:
+        medium: Memory
+  affinity:
+    nodeAffinity:
+      requiredDuringSchedulingIgnoredDuringExecution:
+        nodeSelectorTerms:
+          - matchExpressions:
+              - key: nvidia.com/gpu.product
+                operator: In
+                values:
+                  - NVIDIA-GeForce-RTX-3090
+                  - Tesla-T4
diff --git a/kubernestes-configs/persistent_volume.yml b/kubernestes-configs/persistent_volume.yml
new file mode 100644
index 0000000000000000000000000000000000000000..354ef89e19b7e926290a45ff05e197f008094c19
--- /dev/null
+++ b/kubernestes-configs/persistent_volume.yml
@@ -0,0 +1,11 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: peicong
+spec:
+  storageClassName: rook-cephfs	
+  accessModes:
+  - ReadWriteMany
+  resources:
+    requests:
+      storage: 50Gi
\ No newline at end of file
diff --git a/kubernestes-configs/pod_pvc.yml b/kubernestes-configs/pod_pvc.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e9f963c97082567e60dabb3e617dc021efbce562
--- /dev/null
+++ b/kubernestes-configs/pod_pvc.yml
@@ -0,0 +1,24 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: peicongpod # YOUR POD NAME HERE
+spec:
+  containers:
+    - name: peicongpod # YOUR CONTAINER NAME HERE
+      image: gitlab-registry.nrp-nautilus.io/peicongcheng/traffic-sign-recognition-system
+      command: ["sh", "-c", "echo 'Im a new pod' && sleep infinity"]
+      resources:
+        limits:
+          memory: 12Gi
+          cpu: 2
+        requests:
+          memory: 10Gi
+          cpu: 2
+      volumeMounts:
+        - mountPath: /data
+          name: peicong # YOUR PVC NAME HERE
+  volumes:
+    - name: peicong # YOUR PVC NAME HERE
+      persistentVolumeClaim:
+        claimName: peicong # YOUR PVC NAME HERE
+