大版本更新。

This commit is contained in:
xinyang
2019-07-06 20:20:48 +08:00
parent 9f9050e04a
commit e9a0e9ad7b
23 changed files with 15297 additions and 20022 deletions

154
tools/TrainCNN/backward.py Normal file → Executable file
View File

@@ -5,8 +5,8 @@ from tqdm import tqdm
import generate
import forward
import cv2
import sys
import numpy as np
import mvsdk
print("Finish!")
def save_kernal(fp, val):
@@ -54,7 +54,7 @@ def save_para(folder, paras):
save_bias(fp, paras[7])
STEPS = 100000
STEPS = 5000
BATCH = 30
LEARNING_RATE_BASE = 0.01
LEARNING_RATE_DECAY = 0.99
@@ -62,12 +62,16 @@ MOVING_AVERAGE_DECAY = 0.99
def train(dataset, show_bar=False):
test_images, test_labels = dataset.all_test_sets()
x = tf.placeholder(tf.float32, [None, generate.SRC_ROWS, generate.SRC_COLS, generate.SRC_CHANNELS])
y_= tf.placeholder(tf.float32, [None, forward.OUTPUT_NODES])
nodes, vars = forward.forward(x, 0.001)
keep_rate = tf.placeholder(tf.float32)
nodes, vars = forward.forward(x, 0.01)
y = nodes[-1]
ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
# ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
ce = tf.nn.weighted_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1), pos_weight=1)
cem = tf.reduce_mean(ce)
loss= cem + tf.add_n(tf.get_collection("losses"))
@@ -87,72 +91,118 @@ def train(dataset, show_bar=False):
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
with tf.Session(config=config) as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
bar = tqdm(range(STEPS), dynamic_ncols=True)
bar = tqdm(range(STEPS), ascii=True, dynamic_ncols=True)
for i in bar:
images_samples, labels_samples = dataset.sample_train_sets(BATCH)
_, loss_value, step = sess.run(
[train_op, loss, global_step],
feed_dict={x: images_samples, y_: labels_samples}
feed_dict={x: images_samples, y_: labels_samples, keep_rate:0.7}
)
if i % 100 == 0:
if i % 1000 == 0:
test_samples, test_labels = dataset.sample_test_sets(1000)
acc = sess.run(accuracy, feed_dict={x: test_samples, y_: test_labels})
acc = sess.run(accuracy, feed_dict={x: test_images, y_: test_labels, keep_rate:1.0})
bar.set_postfix({"loss": loss_value, "acc": acc})
# video = cv2.VideoCapture("/home/xinyang/Desktop/Video.mp4")
# _ = True
# while _:
# _, frame = video.read()
# cv2.imshow("Video", frame)
# k = cv2.waitKey(10)
# if k == ord(" "):
# bbox = cv2.selectROI("frame", frame, False)
# print(bbox)
# roi = frame[bbox[1]:bbox[1]+bbox[3], bbox[0]:bbox[0]+bbox[2]]
# roi = cv2.resize(roi, (48, 36))
# cv2.imshow("roi", roi)
# cv2.waitKey(0)
# roi = roi.astype(np.float32)
# roi /= 255.0
# roi = roi.reshape([1, 36, 48, 3])
# res = sess.run(y, feed_dict={x: roi})
# res = res.reshape([forward.OUTPUT_NODES])
# print(np.argmax(res))
# elif k==ord("q"):
# break
# keep = True
# while keep:
# n = input()
# im = cv2.imread(n)
# im = cv2.resize(im, (48, 36))
# cv2.imshow("im", im)
# if cv2.waitKey(0) == ord("q"):
# keep = False
# im = im.astype(np.float32)
# im /= 255.0
# im = im.reshape([1, 36, 48, 3])
# res = sess.run(y, feed_dict={x: im})
# res = res.reshape([forward.OUTPUT_NODES])
# print(np.argmax(res))
test_samples, test_labels = dataset.sample_test_sets(100)
vars_val = sess.run(vars)
save_para("/home/xinyang/Desktop/AutoAim/tools/para", vars_val)
nodes_val = sess.run(nodes, feed_dict={x:test_samples})
return vars_val, nodes_val, test_samples
save_para("/home/xinyang/Desktop/RM_auto-aim/tools/para", vars_val)
print("save done!")
# nodes_val = sess.run(nodes, feed_dict={x:test_images})
# return vars_val, nodes_val
DevList = mvsdk.CameraEnumerateDevice()
nDev = len(DevList)
if nDev < 1:
print("No camera was found!")
return
for i, DevInfo in enumerate(DevList):
print("{}: {} {}".format(i, DevInfo.GetFriendlyName(), DevInfo.GetPortType()))
i = 0 if nDev == 1 else int(input("Select camera: "))
DevInfo = DevList[i]
print(DevInfo)
# 打开相机
hCamera = 0
try:
hCamera = mvsdk.CameraInit(DevInfo, -1, -1)
except mvsdk.CameraException as e:
print("CameraInit Failed({}): {}".format(e.error_code, e.message) )
return
# 获取相机特性描述
cap = mvsdk.CameraGetCapability(hCamera)
# 判断是黑白相机还是彩色相机
monoCamera = (cap.sIspCapacity.bMonoSensor != 0)
# 黑白相机让ISP直接输出MONO数据而不是扩展成R=G=B的24位灰度
if monoCamera:
mvsdk.CameraSetIspOutFormat(hCamera, mvsdk.CAMERA_MEDIA_TYPE_MONO8)
else:
mvsdk.CameraSetIspOutFormat(hCamera, mvsdk.CAMERA_MEDIA_TYPE_BGR8)
# 相机模式切换成连续采集
mvsdk.CameraSetTriggerMode(hCamera, 0)
# 手动曝光曝光时间30ms
mvsdk.CameraSetAeState(hCamera, 0)
mvsdk.CameraSetExposureTime(hCamera, 30 * 1000)
# 让SDK内部取图线程开始工作
mvsdk.CameraPlay(hCamera)
# 计算RGB buffer所需的大小这里直接按照相机的最大分辨率来分配
FrameBufferSize = cap.sResolutionRange.iWidthMax * cap.sResolutionRange.iHeightMax * (1 if monoCamera else 3)
# 分配RGB buffer用来存放ISP输出的图像
# 备注从相机传输到PC端的是RAW数据在PC端通过软件ISP转为RGB数据如果是黑白相机就不需要转换格式但是ISP还有其它处理所以也需要分配这个buffer
pFrameBuffer = mvsdk.CameraAlignMalloc(FrameBufferSize, 16)
while (cv2.waitKey(1) & 0xFF) != ord('q'):
# 从相机取一帧图片
try:
pRawData, FrameHead = mvsdk.CameraGetImageBuffer(hCamera, 200)
mvsdk.CameraImageProcess(hCamera, pRawData, pFrameBuffer, FrameHead)
mvsdk.CameraReleaseImageBuffer(hCamera, pRawData)
# 此时图片已经存储在pFrameBuffer中对于彩色相机pFrameBuffer=RGB数据黑白相机pFrameBuffer=8位灰度数据
# 把pFrameBuffer转换成opencv的图像格式以进行后续算法处理
frame_data = (mvsdk.c_ubyte * FrameHead.uBytes).from_address(pFrameBuffer)
frame = np.frombuffer(frame_data, dtype=np.uint8)
frame = frame.reshape((FrameHead.iHeight, FrameHead.iWidth, 1 if FrameHead.uiMediaType == mvsdk.CAMERA_MEDIA_TYPE_MONO8 else 3) )
frame = cv2.resize(frame, (640,480), interpolation = cv2.INTER_LINEAR)
cv2.imshow("Press q to end", frame)
if (cv2.waitKey(1)&0xFF) == ord(' '):
roi = cv2.selectROI("roi", frame)
roi = frame[roi[1]:roi[1]+roi[3], roi[0]:roi[0]+roi[2]]
print(roi)
cv2.imshow("box", roi)
image = cv2.resize(roi, (48, 36))
image = image.astype(np.float32) / 255.0
out = sess.run(y, feed_dict={x:[image]})
print(out)
print(np.argmax(out))
except mvsdk.CameraException as e:
if e.error_code != mvsdk.CAMERA_STATUS_TIME_OUT:
print("CameraGetImageBuffer failed({}): {}".format(e.error_code, e.message) )
# 关闭相机
mvsdk.CameraUnInit(hCamera)
# 释放帧缓存
mvsdk.CameraAlignFree(pFrameBuffer)
if __name__ == "__main__":
print("Loading data sets...")
dataset = generate.DataSet("/home/xinyang/Desktop/dataset/box")
print("Finish!")
dataset = generate.DataSet("/home/xinyang/Desktop/box_cut")
train(dataset, show_bar=True)
input("Press any key to end...")

94
tools/TrainCNN/cv_grab.py Normal file
View File

@@ -0,0 +1,94 @@
#coding=utf-8
import cv2
import numpy as np
import mvsdk
def main_loop():
# 枚举相机
DevList = mvsdk.CameraEnumerateDevice()
nDev = len(DevList)
if nDev < 1:
print("No camera was found!")
return
for i, DevInfo in enumerate(DevList):
print("{}: {} {}".format(i, DevInfo.GetFriendlyName(), DevInfo.GetPortType()))
i = 0 if nDev == 1 else int(input("Select camera: "))
DevInfo = DevList[i]
print(DevInfo)
# 打开相机
hCamera = 0
try:
hCamera = mvsdk.CameraInit(DevInfo, -1, -1)
except mvsdk.CameraException as e:
print("CameraInit Failed({}): {}".format(e.error_code, e.message) )
return
# 获取相机特性描述
cap = mvsdk.CameraGetCapability(hCamera)
# 判断是黑白相机还是彩色相机
monoCamera = (cap.sIspCapacity.bMonoSensor != 0)
# 黑白相机让ISP直接输出MONO数据而不是扩展成R=G=B的24位灰度
if monoCamera:
mvsdk.CameraSetIspOutFormat(hCamera, mvsdk.CAMERA_MEDIA_TYPE_MONO8)
else:
mvsdk.CameraSetIspOutFormat(hCamera, mvsdk.CAMERA_MEDIA_TYPE_BGR8)
# 相机模式切换成连续采集
mvsdk.CameraSetTriggerMode(hCamera, 0)
# 手动曝光曝光时间30ms
mvsdk.CameraSetAeState(hCamera, 0)
mvsdk.CameraSetExposureTime(hCamera, 30 * 1000)
# 让SDK内部取图线程开始工作
mvsdk.CameraPlay(hCamera)
# 计算RGB buffer所需的大小这里直接按照相机的最大分辨率来分配
FrameBufferSize = cap.sResolutionRange.iWidthMax * cap.sResolutionRange.iHeightMax * (1 if monoCamera else 3)
# 分配RGB buffer用来存放ISP输出的图像
# 备注从相机传输到PC端的是RAW数据在PC端通过软件ISP转为RGB数据如果是黑白相机就不需要转换格式但是ISP还有其它处理所以也需要分配这个buffer
pFrameBuffer = mvsdk.CameraAlignMalloc(FrameBufferSize, 16)
while (cv2.waitKey(1) & 0xFF) != ord('q'):
# 从相机取一帧图片
try:
pRawData, FrameHead = mvsdk.CameraGetImageBuffer(hCamera, 200)
mvsdk.CameraImageProcess(hCamera, pRawData, pFrameBuffer, FrameHead)
mvsdk.CameraReleaseImageBuffer(hCamera, pRawData)
# 此时图片已经存储在pFrameBuffer中对于彩色相机pFrameBuffer=RGB数据黑白相机pFrameBuffer=8位灰度数据
# 把pFrameBuffer转换成opencv的图像格式以进行后续算法处理
frame_data = (mvsdk.c_ubyte * FrameHead.uBytes).from_address(pFrameBuffer)
frame = np.frombuffer(frame_data, dtype=np.uint8)
frame = frame.reshape((FrameHead.iHeight, FrameHead.iWidth, 1 if FrameHead.uiMediaType == mvsdk.CAMERA_MEDIA_TYPE_MONO8 else 3) )
frame = cv2.resize(frame, (640,480), interpolation = cv2.INTER_LINEAR)
cv2.imshow("Press q to end", frame)
roi = cv2.selectROI("roi", frame)
roi = frame[roi[1]:roi[1]+roi[3], roi[0]:roi[0]+roi[2]]
print(roi)
cv2.imshow("box", roi)
except mvsdk.CameraException as e:
if e.error_code != mvsdk.CAMERA_STATUS_TIME_OUT:
print("CameraGetImageBuffer failed({}): {}".format(e.error_code, e.message) )
# 关闭相机
mvsdk.CameraUnInit(hCamera)
# 释放帧缓存
mvsdk.CameraAlignFree(pFrameBuffer)
def main():
try:
main_loop()
finally:
cv2.destroyAllWindows()
main()

View File

@@ -29,24 +29,25 @@ def max_pool_2x2(x):
CONV1_KERNAL_SIZE = 5
# 第一层卷积输出通道数
CONV1_OUTPUT_CHANNELS = 8
CONV1_OUTPUT_CHANNELS = 6
# 第二层卷积核大小
CONV2_KERNAL_SIZE = 3
# 第二层卷积输出通道数
CONV2_OUTPUT_CHANNELS = 16
CONV2_OUTPUT_CHANNELS = 10
# 第一层全连接宽度
FC1_OUTPUT_NODES = 16
# 第二层全连接宽度(输出标签类型数)
FC2_OUTPUT_NODES = 15
# 输出标签类型数
OUTPUT_NODES = FC2_OUTPUT_NODES
def forward(x, regularizer=None):
def forward(x, regularizer=None, keep_rate=tf.constant(1.0)):
vars = []
nodes = []
@@ -71,16 +72,19 @@ def forward(x, regularizer=None):
pool_shape = pool2.get_shape().as_list()
node = pool_shape[1] * pool_shape[2] * pool_shape[3]
reshaped = tf.reshape(pool2, [-1, node])
reshaped = tf.nn.dropout(reshaped, keep_rate)
fc1_w = tf.nn.dropout(get_weight([node, FC1_OUTPUT_NODES], regularizer), 0.1)
fc1_w = get_weight([node, FC1_OUTPUT_NODES], regularizer)
fc1_b = get_bias([FC1_OUTPUT_NODES])
fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_w) + fc1_b)
fc1 = tf.nn.dropout(fc1, keep_rate)
vars.extend([fc1_w, fc1_b])
nodes.extend([fc1])
fc2_w = tf.nn.dropout(get_weight([FC1_OUTPUT_NODES, FC2_OUTPUT_NODES], regularizer), 0.1)
fc2_w = get_weight([FC1_OUTPUT_NODES, FC2_OUTPUT_NODES], regularizer)
fc2_b = get_bias([FC2_OUTPUT_NODES])
fc2 = tf.nn.softmax(tf.matmul(fc1, fc2_w) + fc2_b)
# fc2 = tf.nn.softmax(tf.matmul(fc1, fc2_w) + fc2_b)
fc2 = tf.matmul(fc1, fc2_w) + fc2_b
vars.extend([fc2_w, fc2_b])
nodes.extend([fc2])

View File

@@ -2,10 +2,8 @@ import numpy as np
import os
import cv2
import random
from forward import OUTPUT_NODES
import sys
import os
from tqdm import tqdm
from forward import OUTPUT_NODES
# 原图像行数
SRC_ROWS = 36
@@ -24,7 +22,7 @@ class DataSet:
self.test_labels = []
self.generate_data_sets(folder)
def file2nparray(self, name, random=False):
def file2nparray(self, name):
image = cv2.imread(name)
image = cv2.resize(image, (SRC_COLS, SRC_ROWS))
image = image.astype(np.float32)
@@ -42,16 +40,12 @@ class DataSet:
files = os.listdir(dir)
for file in tqdm(files, postfix={"loading id": i}, dynamic_ncols=True):
if file[-3:] == "jpg":
try:
if random.random() > 0.2:
self.train_samples.append(self.file2nparray("%s/%s" % (dir, file)))
self.train_labels.append(self.id2label(i))
else:
self.test_samples.append(self.file2nparray("%s/%s" % (dir, file)))
self.test_labels.append(self.id2label(i))
except:
print("%s/%s" % (dir, file))
continue
if random.random() > 0.2:
self.train_samples.append(self.file2nparray("%s/%s" % (dir, file)))
self.train_labels.append(self.id2label(i))
else:
self.test_samples.append(self.file2nparray("%s/%s" % (dir, file)))
self.test_labels.append(self.id2label(i))
self.train_samples = np.array(self.train_samples)
self.train_labels = np.array(self.train_labels)
self.test_samples = np.array(self.test_samples)
@@ -67,15 +61,6 @@ class DataSet:
labels.append(self.train_labels[id])
return np.array(samples), np.array(labels)
def sample_test_sets(self, length):
samples = []
labels = []
for i in range(length):
id = random.randint(0, len(self.test_samples)-1)
samples.append(self.test_samples[id])
labels.append(self.test_labels[id])
return np.array(samples), np.array(labels)
def all_train_sets(self):
return self.train_samples[:], self.train_labels[:]

111
tools/TrainCNN/grab.py Normal file
View File

@@ -0,0 +1,111 @@
#coding=utf-8
import mvsdk
def main():
# 枚举相机
DevList = mvsdk.CameraEnumerateDevice()
nDev = len(DevList)
if nDev < 1:
print("No camera was found!")
return
for i, DevInfo in enumerate(DevList):
print("{}: {} {}".format(i, DevInfo.GetFriendlyName(), DevInfo.GetPortType()))
i = 0 if nDev == 1 else int(input("Select camera: "))
DevInfo = DevList[i]
print(DevInfo)
# 打开相机
hCamera = 0
try:
hCamera = mvsdk.CameraInit(DevInfo, -1, -1)
except mvsdk.CameraException as e:
print("CameraInit Failed({}): {}".format(e.error_code, e.message) )
return
# 获取相机特性描述
cap = mvsdk.CameraGetCapability(hCamera)
PrintCapbility(cap)
# 判断是黑白相机还是彩色相机
monoCamera = (cap.sIspCapacity.bMonoSensor != 0)
# 黑白相机让ISP直接输出MONO数据而不是扩展成R=G=B的24位灰度
if monoCamera:
mvsdk.CameraSetIspOutFormat(hCamera, mvsdk.CAMERA_MEDIA_TYPE_MONO8)
# 相机模式切换成连续采集
mvsdk.CameraSetTriggerMode(hCamera, 0)
# 手动曝光曝光时间30ms
mvsdk.CameraSetAeState(hCamera, 0)
mvsdk.CameraSetExposureTime(hCamera, 30 * 1000)
# 让SDK内部取图线程开始工作
mvsdk.CameraPlay(hCamera)
# 计算RGB buffer所需的大小这里直接按照相机的最大分辨率来分配
FrameBufferSize = cap.sResolutionRange.iWidthMax * cap.sResolutionRange.iHeightMax * (1 if monoCamera else 3)
# 分配RGB buffer用来存放ISP输出的图像
# 备注从相机传输到PC端的是RAW数据在PC端通过软件ISP转为RGB数据如果是黑白相机就不需要转换格式但是ISP还有其它处理所以也需要分配这个buffer
pFrameBuffer = mvsdk.CameraAlignMalloc(FrameBufferSize, 16)
# 从相机取一帧图片
try:
pRawData, FrameHead = mvsdk.CameraGetImageBuffer(hCamera, 2000)
mvsdk.CameraImageProcess(hCamera, pRawData, pFrameBuffer, FrameHead)
mvsdk.CameraReleaseImageBuffer(hCamera, pRawData)
# 此时图片已经存储在pFrameBuffer中对于彩色相机pFrameBuffer=RGB数据黑白相机pFrameBuffer=8位灰度数据
# 该示例中我们只是把图片保存到硬盘文件中
status = mvsdk.CameraSaveImage(hCamera, "./grab.bmp", pFrameBuffer, FrameHead, mvsdk.FILE_BMP, 100)
if status == mvsdk.CAMERA_STATUS_SUCCESS:
print("Save image successfully. image_size = {}X{}".format(FrameHead.iWidth, FrameHead.iHeight) )
else:
print("Save image failed. err={}".format(status) )
except mvsdk.CameraException as e:
print("CameraGetImageBuffer failed({}): {}".format(e.error_code, e.message) )
# 关闭相机
mvsdk.CameraUnInit(hCamera)
# 释放帧缓存
mvsdk.CameraAlignFree(pFrameBuffer)
def PrintCapbility(cap):
for i in range(cap.iTriggerDesc):
desc = cap.pTriggerDesc[i]
print("{}: {}".format(desc.iIndex, desc.GetDescription()) )
for i in range(cap.iImageSizeDesc):
desc = cap.pImageSizeDesc[i]
print("{}: {}".format(desc.iIndex, desc.GetDescription()) )
for i in range(cap.iClrTempDesc):
desc = cap.pClrTempDesc[i]
print("{}: {}".format(desc.iIndex, desc.GetDescription()) )
for i in range(cap.iMediaTypeDesc):
desc = cap.pMediaTypeDesc[i]
print("{}: {}".format(desc.iIndex, desc.GetDescription()) )
for i in range(cap.iFrameSpeedDesc):
desc = cap.pFrameSpeedDesc[i]
print("{}: {}".format(desc.iIndex, desc.GetDescription()) )
for i in range(cap.iPackLenDesc):
desc = cap.pPackLenDesc[i]
print("{}: {}".format(desc.iIndex, desc.GetDescription()) )
for i in range(cap.iPresetLut):
desc = cap.pPresetLutDesc[i]
print("{}: {}".format(desc.iIndex, desc.GetDescription()) )
for i in range(cap.iAeAlmSwDesc):
desc = cap.pAeAlmSwDesc[i]
print("{}: {}".format(desc.iIndex, desc.GetDescription()) )
for i in range(cap.iAeAlmHdDesc):
desc = cap.pAeAlmHdDesc[i]
print("{}: {}".format(desc.iIndex, desc.GetDescription()) )
for i in range(cap.iBayerDecAlmSwDesc):
desc = cap.pBayerDecAlmSwDesc[i]
print("{}: {}".format(desc.iIndex, desc.GetDescription()) )
for i in range(cap.iBayerDecAlmHdDesc):
desc = cap.pBayerDecAlmHdDesc[i]
print("{}: {}".format(desc.iIndex, desc.GetDescription()) )
main()

2344
tools/TrainCNN/mvsdk.py Normal file

File diff suppressed because it is too large Load Diff