Skip to content
Snippets Groups Projects

Upload New File

Merged jim davidson requested to merge patch-1 into master
1 file
+ 255
0
Compare changes
  • Side-by-side
  • Inline
mvect-cuda.py 0 → 100644
+ 255
0
import pycuda.driver as cuda
import pycuda.autoinit
import pycuda.gpuarray
from pycuda.compiler import SourceModule
import cv2 as cv
import numpy as np
import sys
import os
import time
import math
#
# Simplified Motion vector code that does the color coding and velocity coding using Jetson NANO GPUs.
# Primarily done as a test of functionality and speed, without regard to programming style or completeness
# Specifically in the areas of error checking and recovery. Also, I'm a complete neophyte in Python. so no laughing!
downsizeRatio = 0.5
#
# CUDA Code:
#
mod = SourceModule("""
#include <stdint.h>
#include <cuda.h>
#include <math_constants.h>
__global__ void imageProc(int32_t n, // input - count of items in the arrays
float *flow, // input - coordinants from the optical flow
uint8_t *color, // output - color angles
uint8_t *velocity, // output - velocity
uint8_t *prev_velocity, // output/input - previous velocity
uint8_t *deltav) // output - delta velocity
{
const float vmax = 255.0/12.0;
const uint8_t vCutOff = 3; // minimum pixel velocity to display colors for
uint8_t vel;
float clr;
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
if(idx < n)
{
int color_idx = idx * 3;
int flow_idx = idx * 2;
float v = vmax * sqrtf((flow[flow_idx]*flow[flow_idx]) + (flow[flow_idx+1]*flow[flow_idx+1]));
// OpenCV returns 0-360, rather than the more common -180 - 180 degrees. so, make it like opencv
float ang = (180.0/CUDART_PI_F) * (CUDART_PI_F + atan2f(flow[flow_idx+1],flow[flow_idx]));
vel = 0xff & __float2uint_rn(v);
if(v > 255.0) {
vel = 255;
}
velocity[idx] = vel;
deltav[idx] = 0xff & __float2uint_rn(abs(vel - prev_velocity[idx]));
prev_velocity[idx] = velocity[idx];
color[color_idx + 0] = 0; // red
color[color_idx + 1] = 0; // green
color[color_idx + 2] = 0; // blue
if(vel > vCutOff) {
clr=(ang/360.) * 4./6.;
if(clr<=(1./6.)) {
color[color_idx + 0] = 0; // red
color[color_idx + 1] = 255 & int(255*(6*clr)); // green
color[color_idx + 2] = 255; // blue
} else if(clr<=(2./6.)) {
color[color_idx + 0] = 0; // red
color[color_idx + 1] = 255; // green
color[color_idx + 2] = 255 & int(255*(2-6*clr)); // blue
} else if(clr<=(3./6.)) {
color[color_idx + 0] = 255 & int(255*(6*clr-2)); // red
color[color_idx + 1] = 255; // green
color[color_idx + 2] = 0; // blue
} else if(clr<=(4./6.)) {
color[color_idx + 0] = 255; // red
color[color_idx + 1] = 255 & int(255*(4-6*clr)); // green
color[color_idx + 2] = 0; // blue
} else if(clr<=(5./6.)) {
color[color_idx + 0] = 255; // red
color[color_idx + 1] = 0; // green
color[color_idx + 2] = 255 & int(255*(6*clr-4)); // blue
} else {
color[color_idx + 0] = 255 & int(255*(6-6*clr)); // red
color[color_idx + 1] = 0; // green
color[color_idx + 2] = 255; // blue
}
}
}
}
""")
atitle = (sys.argv)
title = atitle[1]
#cap = cv.VideoCapture('http://login:tinkering@132.239.4.196/control/faststream.jpg?stream=full')
cap = cv.VideoCapture(title)
#ret, first_frame = cap.read()
#motion_frame = cv.resize(first_frame, (0,0), fx=downsizeRatio, fy=downsizeRatio)
ret, motion_frame = cap.read()
prev_gray = cv.cvtColor(motion_frame, cv.COLOR_BGR2GRAY)
mask = np.zeros_like(motion_frame)
color_array = pycuda.gpuarray.zeros(prev_gray.size * 3, np.uint8)
velocity_array = pycuda.gpuarray.zeros(prev_gray.size, np.uint8)
prev_velocity = pycuda.gpuarray.zeros(prev_gray.size, np.uint8)
prev_velocity.fill(0xaa);
deltav_array = pycuda.gpuarray.zeros(prev_gray.shape, np.uint8)
block_size = np.int32(prev_gray.size)
print("Block Size: {}".format(block_size))
print("motion_frame shape: {}".format(motion_frame.shape))
print("motion_frame size: {}".format(motion_frame.size))
print("color_array size: {}".format(color_array.size))
print("color_array shape: {}".format(color_array.shape))
print("deltav size: {}".format(deltav_array.size))
print("deltav shape: {}".format(deltav_array.shape))
print("color_array size: {}".format(color_array.size))
print("color_array shape: {}".format(color_array.shape))
print("prev_gray size: {}".format(prev_gray.size))
print("prev_gray shape: {}".format(prev_gray.shape))
mask[..., 1] = 255
frameCount = 0
totalProcessingTime = 0
while(cap.isOpened()):
ret, newFrame = cap.read()
frameCount += 1
start = time.time()
# Opens a new window and displays the input frame
try:
# newFrame = cv.resize(frame, (0,0), fx=downsizeRatio, fy=downsizeRatio)
# cv.imshow("input", newFrame)
# Converts each frame to grayscale - we previously only converted the first frame to grayscale
gray = cv.cvtColor(newFrame, cv.COLOR_BGR2GRAY)
# Calculates dense optical flow by Farneback method
# https://docs.opencv.org/3.0-beta/modules/video/doc/motion_analysis_and_object_tracking.html#calcopticalflowfarneback
flow = cv.calcOpticalFlowFarneback(prev_gray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
flow_array = pycuda.gpuarray.to_gpu(flow)
# Computes the magnitude and angle of the 2D vectors
#
# Output the magnitudes and the angles.
# angle colors are black unless the veliocity exceeds vCutoff
#
a = open("Data/angles-{:06d}.ppm".format(frameCount),"w+b")
v = open("Data/velocities-{:06d}.pgm".format(frameCount),"w+b")
dv = open("Data/deltav-{:06d}.pgm".format(frameCount),"w+b")
#
# Write out the file headers
#
a.write("P6\n{:d} {:d}\n255\n".format(len(flow[0]),len(flow)))
v.write("P5\n{:d} {:d}\n255\n".format(len(flow[0]),len(flow)))
dv.write("P5\n{:d} {:d}\n255\n".format(len(flow[0]),len(flow)))
#Device 0: "NVIDIA Tegra X1"
# CUDA Driver Version / Runtime Version 10.0 / 10.0
# CUDA Capability Major/Minor version number: 5.3
# Total amount of global memory: 3957 MBytes (4148756480 bytes)
# ( 1) Multiprocessors, (128) CUDA Cores/MP: 128 CUDA Cores
# GPU Max Clock rate: 922 MHz (0.92 GHz)
# Memory Clock rate: 13 Mhz
# Memory Bus Width: 64-bit
# L2 Cache Size: 262144 bytes
# Maximum Texture Dimension Size (x,y,z) 1D=(65536), 2D=(65536, 65536), 3D=(4096, 4096, 4096)
# Maximum Layered 1D Texture Size, (num) layers 1D=(16384), 2048 layers
# Maximum Layered 2D Texture Size, (num) layers 2D=(16384, 16384), 2048 layers
# Total amount of constant memory: 65536 bytes
# Total amount of shared memory per block: 49152 bytes
# Total number of registers available per block: 32768
# Warp size: 32
# Maximum number of threads per multiprocessor: 2048
# Maximum number of threads per block: 1024
# Max dimension size of a thread block (x,y,z): (1024, 1024, 64)
# Max dimension size of a grid size (x,y,z): (2147483647, 65535, 65535)
# Maximum memory pitch: 2147483647 bytes
# Texture alignment: 512 bytes
# Concurrent copy and kernel execution: Yes with 1 copy engine(s)
# Run time limit on kernels: Yes
# Integrated GPU sharing Host Memory: Yes
# Support host page-locked memory mapping: Yes
# Alignment requirement for Surfaces: Yes
# Device has ECC support: Disabled
# Device supports Unified Addressing (UVA): Yes
# Device supports Compute Preemption: No
# Supports Cooperative Kernel Launch: No
# Supports MultiDevice Co-op Kernel Launch: No
# Device PCI Domain ID / Bus ID / location ID: 0 / 0 / 0
# Compute Mode:
# < Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) >
#
#deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 10.0, CUDA Runtime Version = 10.0, NumDevs = 1
#
# Give the GPU pointers to color, velocity and delta V outputs
# and the flow inputs
#
bdim = (1024,1,1)
gridSize = (block_size+1024-1)/1024
gdim = (gridSize,1)
# print("gdim: {}".format(gdim))
# print("Velocity_array.nbytes = {}".format(velocity_array.nbytes));
# print("deltav_array.nbytes = {}".format(deltav_array.nbytes));
arg_types = ('I','P','P','P','P','P') # Tell the prepare function what to expect
func = mod.get_function("imageProc")
func.prepare(arg_types)
func.prepared_call(gdim, # grid Dimension (x,y)
bdim, # Block Dimension (x,y,z)
velocity_array.nbytes, # Size of our data block
flow_array.gpudata, # GPU Pointer to the flow
color_array.gpudata, # GPU Pointer to the color array to return
velocity_array.gpudata, # GPU Pointer to the velocity data
prev_velocity.gpudata, # GPU Pointer to the previous velocity (Persists between frames)
deltav_array.gpudata) # GPU Pointer to the delta V array
b = color_array.get()
b.tofile(a)
a.close()
velocity_array.get().tofile(v)
v.close()
deltav_array.get().tofile(dv)
dv.close()
wname = 'Data/org'+str(frameCount).zfill(6)+'.ppm'
cv.imwrite(wname,newFrame)
# Updates previous frame
prev_gray = gray
# Frames are read by intervals of 1 millisecond. The programs breaks out of the while loop when the user presses the 'q' key
if cv.waitKey(1) & 0xFF == ord('q'):
break
print("Frame: {} Elapsed: {}".format(frameCount, time.time() - start))
totalProcessingTime += time.time() - start
print("Average frame processing time: {}".format(totalProcessingTime/frameCount))
except:
raise
break
# The following frees up resources and closes all windows
cap.release()
cv.destroyAllWindows()
Loading