增加SGBM算法测距,增加手识别算法,双目标定完成

This commit is contained in:
shenchenyang 2026-01-26 18:40:32 +08:00
parent f6391b9b63
commit f3104482c1
10 changed files with 488 additions and 113 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@
build/ build/
install/ install/
log/ log/
*.png

Binary file not shown.

26
calibrationdata/left.yaml Normal file
View File

@ -0,0 +1,26 @@
image_width: 640
image_height: 480
camera_name: left
camera_matrix:
rows: 3
cols: 3
data: [468.55945, 0. , 320.99324,
0. , 468.7278 , 220.6343 ,
0. , 0. , 1. ]
distortion_model: plumb_bob
distortion_coefficients:
rows: 1
cols: 5
data: [0.009422, -0.000327, -0.000980, -0.019506, 0.000000]
rectification_matrix:
rows: 3
cols: 3
data: [ 0.99819422, -0.00430947, -0.0599143 ,
0.00540717, 0.99982027, 0.01817108,
0.05982523, -0.01846223, 0.99803812]
projection_matrix:
rows: 3
cols: 4
data: [487.63246, 0. , 338.03998, 0. ,
0. , 487.63246, 206.55779, 0. ,
0. , 0. , 1. , 0. ]

60
calibrationdata/ost.txt Normal file
View File

@ -0,0 +1,60 @@
# oST version 5.0 parameters
[image]
width
640
height
480
[narrow_stereo/left]
camera matrix
468.559453 0.000000 320.993242
0.000000 468.727800 220.634299
0.000000 0.000000 1.000000
distortion
0.009422 -0.000327 -0.000980 -0.019506 0.000000
rectification
0.998194 -0.004309 -0.059914
0.005407 0.999820 0.018171
0.059825 -0.018462 0.998038
projection
487.632463 0.000000 338.039978 0.000000
0.000000 487.632463 206.557787 0.000000
0.000000 0.000000 1.000000 0.000000
# oST version 5.0 parameters
[image]
width
640
height
480
[narrow_stereo/right]
camera matrix
469.733732 0.000000 320.579182
0.000000 471.914916 200.728954
0.000000 0.000000 1.000000
distortion
-0.022394 0.008106 -0.011521 -0.018003 0.000000
rectification
0.999423 -0.010289 -0.032380
0.009694 0.999782 -0.018485
0.032563 0.018160 0.999305
projection
487.632463 0.000000 338.039978 -41.780241
0.000000 487.632463 206.557787 0.000000
0.000000 0.000000 1.000000 0.000000

View File

@ -0,0 +1,26 @@
image_width: 640
image_height: 480
camera_name: right
camera_matrix:
rows: 3
cols: 3
data: [469.73373, 0. , 320.57918,
0. , 471.91492, 200.72895,
0. , 0. , 1. ]
distortion_model: plumb_bob
distortion_coefficients:
rows: 1
cols: 5
data: [-0.022394, 0.008106, -0.011521, -0.018003, 0.000000]
rectification_matrix:
rows: 3
cols: 3
data: [ 0.99942267, -0.01028889, -0.03237995,
0.00969371, 0.99978215, -0.01848456,
0.03256308, 0.01816001, 0.99930469]
projection_matrix:
rows: 3
cols: 4
data: [487.63246, 0. , 338.03998, -41.78024,
0. , 487.63246, 206.55779, 0. ,
0. , 0. , 1. , 0. ]

View File

@ -1,130 +1,65 @@
import rclpy import rclpy
from rclpy.node import Node from rclpy.node import Node
import cv2
import numpy as np
from sensor_msgs.msg import Image from sensor_msgs.msg import Image
from geometry_msgs.msg import PointStamped from geometry_msgs.msg import PointStamped
from cv_bridge import CvBridge from cv_bridge import CvBridge
import cv2
import mediapipe as mp
class StereoHandDepthNode(Node): class HandTrackingNode(Node):
def __init__(self): def __init__(self):
super().__init__('stereo_hand_depth_node') super().__init__('hand_tracking_node')
# ---------- 参数 ----------
self.fx = 600.0 # 焦距(像素)👉 后面用标定值替换
self.fy = 600.0
self.cx = 320.0
self.cy = 240.0
self.baseline = 0.06 # 双目基线(米)👉 换成你真实的
# ---------- OpenCV ----------
self.bridge = CvBridge() self.bridge = CvBridge()
self.stereo = cv2.StereoSGBM_create( self.sub = self.create_subscription(
minDisparity=0,
numDisparities=128,
blockSize=7,
P1=8 * 3 * 7 ** 2,
P2=32 * 3 * 7 ** 2,
disp12MaxDiff=1,
uniquenessRatio=10,
speckleWindowSize=100,
speckleRange=32
)
# ---------- ROS ----------
# self.left = self.create_subscription(
# Image,
# '/camera/left/image_raw',
# self.image_left_callback,
# 10
# )
# self.right = self.create_subscription(
# Image,
# '/camera/right/image_raw',
# self.image_right_callback,
# 10
# )
self.image = self.create_subscription(
Image, Image,
'/image', '/camera/left/image_raw',
self.image_callback, self.image_cb,
10 10
) )
self.pub = self.create_publisher( self.pub = self.create_publisher(
PointStamped, PointStamped,
'/hand_3d', '/hand/wrist_pixel',
10 10
) )
self.get_logger().info('StereoHandDepthNode started') self.mp_hands = mp.solutions.hands.Hands(
static_image_mode=False,
max_num_hands=1,
min_detection_confidence=0.7,
min_tracking_confidence=0.7
)
def image_callback(self, msg: Image): def image_cb(self, msg):
# 转 OpenCV img = self.bridge.imgmsg_to_cv2(msg, 'bgr8')
frame = self.bridge.imgmsg_to_cv2(msg, desired_encoding='bgr8') rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
h, w, _ = frame.shape results = self.mp_hands.process(rgb)
mid = w // 2 if not results.multi_hand_landmarks:
# 左右拆分
left = frame[:, :mid]
right = frame[:, mid:]
cv2.imshow("left", left)
cv2.imshow("right", right)
# 转灰度
left_gray = cv2.cvtColor(left, cv2.COLOR_BGR2GRAY)
right_gray = cv2.cvtColor(right, cv2.COLOR_BGR2GRAY)
# 计算视差
disparity = self.stereo.compute(left_gray, right_gray).astype(np.float32) / 16.0
# ----------- 这里先用“假手点”(画面中心)-----------
x = int(self.cx)
y = int(self.cy)
# ROI 取中值,稳很多
roi = disparity[y-3:y+3, x-3:x+3]
d = np.nanmedian(roi)
if d <= 0:
return return
# 深度 h, w, _ = img.shape
Z = (self.fx * self.baseline) / d wrist = results.multi_hand_landmarks[0].landmark[0]
X = (x - self.cx) * Z / self.fx
Y = (y - self.cy) * Z / self.fy
# 发布 u = int(wrist.x * w)
msg_out = PointStamped() v = int(wrist.y * h)
msg_out.header.stamp = self.get_clock().now().to_msg()
msg_out.header.frame_id = 'camera_link'
msg_out.point.x = float(X)
msg_out.point.y = float(Y)
msg_out.point.z = float(Z)
self.pub.publish(msg_out) p = PointStamped()
p.header = msg.header
p.point.x = float(u)
p.point.y = float(v)
p.point.z = 0.0 # 暂时不用
# ----------- 可视化(调试用)----------- self.pub.publish(p)
disp_vis = cv2.normalize(disparity, None, 0, 255, cv2.NORM_MINMAX)
disp_vis = disp_vis.astype(np.uint8)
cv2.circle(disp_vis, (x, y), 5, 255, -1)
cv2.imshow('disparity', disp_vis) def main():
cv2.waitKey(10) rclpy.init()
node = HandTrackingNode()
def main(args=None):
rclpy.init(args=args)
node = StereoHandDepthNode()
rclpy.spin(node) rclpy.spin(node)
node.destroy_node() node.destroy_node()
rclpy.shutdown() rclpy.shutdown()
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -5,26 +5,61 @@ from cv_bridge import CvBridge
import cv2 import cv2
import numpy as np import numpy as np
from camera_info_manager import CameraInfoManager
from sensor_msgs.msg import CameraInfo
CAMERA_WIDTH = int(1280)
CAMERA_HEIGHT = int(480)
FRAME_WIDTH = int(CAMERA_WIDTH / 2)
FRAME_HEIGHT = int(CAMERA_HEIGHT / 2)
print(FRAME_WIDTH, FRAME_HEIGHT)
class CameraNode(Node): class CameraNode(Node):
def __init__(self): def __init__(self):
super().__init__('camera_node') super().__init__('camera_node')
left_camera_info_url = 'file:///home/shen/openarm_test/calibrationdata/left.yaml'
right_camera_info_url = 'file:///home/shen/openarm_test/calibrationdata/right.yaml'
self.left_camera_info_manager = CameraInfoManager(self, cname='left', url=left_camera_info_url, namespace='/stereo_camera/left')
self.right_camera_info_manager = CameraInfoManager(self, cname='right', url=right_camera_info_url, namespace='/stereo_camera/right')
# 设置相机信息文件的路径(不带.yaml后缀
# 假设你的yaml文件放在 /home/username/camera_info/ 目录下,名为 left.yaml 和 right.yaml
# self.left_camera_info_manager.setCameraInfoURL(left_camera_info_url)
# self.right_camera_info_manager.setCameraInfoURL(right_camera_info_url)
# self.left_camera_info_manager.setURL(left_camera_info_url)
# self.right_camera_info_manager.setURL(right_camera_info_url)
print("2ds1as23d123sa1d32as", self.left_camera_info_manager.getURL())
self.left_camera_info_manager.loadCameraInfo()
self.right_camera_info_manager.loadCameraInfo()
# 然后你可以通过CameraInfoManager获取CameraInfo消息
self.left_camera_info = self.left_camera_info_manager.getCameraInfo()
self.right_camera_info = self.right_camera_info_manager.getCameraInfo()
# 注意你需要确保yaml文件已经正确放置在指定路径并且格式正确。
self.left_camera_info_pub = self.create_publisher(CameraInfo, '/stereo_camera/left/camera_info', 10)
self.right_camera_info_pub = self.create_publisher(CameraInfo, '/stereo_camera/right/camera_info', 10)
self.bridge = CvBridge() self.bridge = CvBridge()
self.publisher_left = self.create_publisher(Image, '/camera/left/image_raw', 10) self.publisher_left = self.create_publisher(Image, '/camera/left/image_raw', 10)
self.publisher_right = self.create_publisher(Image, '/camera/right/image_raw', 10) self.publisher_right = self.create_publisher(Image, '/camera/right/image_raw', 10)
self.publisher_raw = self.create_publisher(Image, '/image', 10) self.publisher_raw = self.create_publisher(Image, '/image_1', 10)
self.get_logger().info('正在初始化摄像头...') self.get_logger().info('正在初始化摄像头...')
# 尝试打开物理摄像头 # 尝试打开物理摄像头
self.cap = None self.cap = None
for i in range(1): # 尝试设备0,1,2 for i in range(2): # 尝试设备0,1,2
self.get_logger().info(f'尝试打开摄像头设备 /dev/video{i}...') self.get_logger().info(f'尝试打开摄像头设备 /dev/video{i}...')
self.cap = cv2.VideoCapture(i) self.cap = cv2.VideoCapture(i)
if self.cap.isOpened(): if self.cap.isOpened():
# 设置摄像头参数 # 设置摄像头参数
self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1600) self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, CAMERA_WIDTH)
self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 600) self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, CAMERA_HEIGHT)
self.cap.set(cv2.CAP_PROP_FPS, 30) self.cap.set(cv2.CAP_PROP_FPS, 30)
# 让摄像头预热 # 让摄像头预热
@ -39,7 +74,7 @@ class CameraNode(Node):
break break
self.timer = self.create_timer(0.033, self.timer_callback) # 30Hz self.timer = self.create_timer(0.001, self.timer_callback) # 30Hz
self.get_logger().info('摄像头节点已启动') self.get_logger().info('摄像头节点已启动')
def timer_callback(self): def timer_callback(self):
@ -48,19 +83,23 @@ class CameraNode(Node):
# frameDist = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE) # frameDist = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE)
# cv2.imshow("camera_left", frame[:, :800]) # cv2.imshow("camera_left", frame[:, :800])
# cv2.imshow("camera_right", frame[:, 800:]) # cv2.imshow("camera_right", frame[:, 800:])
cv2.waitKey(10) cv2.waitKey(0)
msg_left= self.bridge.cv2_to_imgmsg(frame[:, :800], 'bgr8') time = self.get_clock().now().to_msg()
msg_right= self.bridge.cv2_to_imgmsg(frame[:, 800:], 'bgr8') msg_left= self.bridge.cv2_to_imgmsg(frame[:, :FRAME_WIDTH], encoding='bgr8')
msg_raw= self.bridge.cv2_to_imgmsg(frame, 'bgr8') msg_right= self.bridge.cv2_to_imgmsg(frame[:, FRAME_WIDTH:], encoding='bgr8')
msg_left.header.stamp = self.get_clock().now().to_msg() msg_raw= self.bridge.cv2_to_imgmsg(frame, encoding='bgr8')
msg_right.header.stamp = self.get_clock().now().to_msg() msg_left.header.stamp = time
msg_raw.header.stamp = self.get_clock().now().to_msg() msg_right.header.stamp = time
msg_raw.header.stamp = time
msg_left.header.frame_id = 'camera_frame_left' msg_left.header.frame_id = 'camera_frame_left'
msg_right.header.frame_id = 'camera_frame_right' msg_right.header.frame_id = 'camera_frame_right'
msg_raw.header.frame_id = 'camera_frame_raw' msg_raw.header.frame_id = 'camera_frame_raw'
self.publisher_left.publish(msg_left) self.publisher_left.publish(msg_left)
self.publisher_right.publish(msg_right) self.publisher_right.publish(msg_right)
self.publisher_raw.publish(msg_raw) self.publisher_raw.publish(msg_raw)
# 发布相机信息
self.left_camera_info_pub.publish(self.left_camera_info)
self.right_camera_info_pub.publish(self.right_camera_info)

View File

@ -0,0 +1,92 @@
import rclpy
from rclpy.node import Node
from geometry_msgs.msg import PoseStamped, PointStamped
from sensor_msgs.msg import Image, CameraInfo
from cv_bridge import CvBridge
import numpy as np
class Hand3DNode(Node):
def __init__(self):
super().__init__('hand_3d_node')
self.bridge = CvBridge()
self.fx = self.fy = self.cx = self.cy = None
self.sub_pixel = self.create_subscription(
PointStamped,
'/hand/wrist_pixel',
self.pixel_cb,
10
)
self.sub_depth = self.create_subscription(
Image,
'/camera/depth/image',
self.depth_cb,
10
)
self.sub_info = self.create_subscription(
CameraInfo,
'/camera/left/camera_info',
self.info_cb,
10
)
self.pub = self.create_publisher(
PoseStamped,
'/hand/pose_camera',
10
)
self.last_pixel = None
self.depth_img = None
def info_cb(self, msg):
self.fx = msg.k[0]
self.fy = msg.k[4]
self.cx = msg.k[2]
self.cy = msg.k[5]
def pixel_cb(self, msg):
self.last_pixel = (int(msg.point.x), int(msg.point.y), msg.header)
def depth_cb(self, msg):
self.depth_img = self.bridge.imgmsg_to_cv2(msg, desired_encoding='passthrough')
self.try_publish()
def try_publish(self):
if self.last_pixel is None or self.depth_img is None:
return
if self.fx is None:
return
u, v, header = self.last_pixel
depth = self.depth_img[v, u]
if depth <= 0.0:
return
Z = depth
X = (u - self.cx) * Z / self.fx
Y = (v - self.cy) * Z / self.fy
pose = PoseStamped()
pose.header = header
pose.header.frame_id = 'camera_frame'
pose.pose.position.x = X
pose.pose.position.y = Y
pose.pose.position.z = Z
pose.pose.orientation.w = 1.0 # 先固定姿态
self.pub.publish(pose)
def main():
rclpy.init()
node = Hand3DNode()
rclpy.spin(node)
rclpy.shutdown()
if __name__ == '__main__':
main()

View File

@ -0,0 +1,195 @@
import rclpy
from rclpy.node import Node
import cv2
import numpy as np
from sensor_msgs.msg import Image
from geometry_msgs.msg import PointStamped
from cv_bridge import CvBridge
from sensor_msgs.msg import CameraInfo
import mediapipe as mp
class StereoHandDepthNode(Node):
def __init__(self):
super().__init__('stereo_hand_depth_node')
# ---------- 参数 ----------
self.fx = 600.0 # 焦距(像素)👉 后面用标定值替换
self.fy = 600.0
self.cx = 320.0
self.cy = 240.0
self.baseline = 0.06 # 双目基线(米)👉 换成你真实的
# ---------- OpenCV ----------
self.numDisparities = 128 # 必须是16的倍数
self.bridge = CvBridge()
self.stereo = cv2.StereoSGBM_create(
minDisparity=0,
numDisparities=self.numDisparities,
blockSize=7,
P1=8 * 3 * 7 ** 2,
P2=32 * 3 * 7 ** 2,
disp12MaxDiff=1,
uniquenessRatio=10,
speckleWindowSize=100,
speckleRange=32
)
# ---------- ROS ----------
self.left_info = None
self.right_info = None
self.sub = self.create_subscription(
Image,
'/image_1',
self.image_callback,
10
)
self.sub = self.create_subscription(
CameraInfo,
'/stereo_camera/left/camera_info',
self.left_camera_info_callback,
10
)
self.sub = self.create_subscription(
CameraInfo,
'/stereo_camera/right/camera_info',
self.right_camera_info_callback,
10
)
self.pub = self.create_publisher(
PointStamped,
'/hand_3d',
10
)
self.mp_hands = mp.solutions.hands.Hands(
static_image_mode=False,
max_num_hands=1,
min_detection_confidence=0.7,
min_tracking_confidence=0.7
)
self.get_logger().info('StereoHandDepthNode started')
def init_rectify_maps(self, left_info, right_info, image_size):
K1 = np.array(left_info.k).reshape(3, 3)
D1 = np.array(left_info.d)
K2 = np.array(right_info.k).reshape(3, 3)
D2 = np.array(right_info.d)
R1 = np.array(left_info.r).reshape(3, 3)
R2 = np.array(right_info.r).reshape(3, 3)
P1 = np.array(left_info.p).reshape(3, 4)
P2 = np.array(right_info.p).reshape(3, 4)
map1_l, map2_l = cv2.initUndistortRectifyMap(
K1, D1, R1, P1[:3, :3], image_size, cv2.CV_32FC1
)
map1_r, map2_r = cv2.initUndistortRectifyMap(
K2, D2, R2, P2[:3, :3], image_size, cv2.CV_32FC1
)
return map1_l, map2_l, map1_r, map2_r
def left_camera_info_callback(self, msg: CameraInfo):
self.left_info = msg
def right_camera_info_callback(self, msg: CameraInfo):
self.right_info = msg
def image_callback(self, msg: Image):
if self.left_info is None or self.right_info is None:
return
self.map1_l, self.map2_l, self.map1_r, self.map2_r = self.init_rectify_maps(self.left_info, self.right_info, (640, 480))
self.fx = self.left_info.p[0]
self.fy = self.left_info.p[5]
self.cx = self.left_info.p[2]
self.cy = self.left_info.p[6]
self.baseline = -self.right_info.p[3] / self.fx
# self.get_logger().info(f'baseline = {self.baseline:.4f} m')
# 转 OpenCV
frame = self.bridge.imgmsg_to_cv2(msg, desired_encoding='bgr8')
h, w, _ = frame.shape
mid = w // 2
# print("Frame size:", w, h)
# 左右拆分
left = frame[:, :mid]
right = frame[:, mid:]
left_rect = cv2.remap(left, self.map1_l, self.map2_l, cv2.INTER_LINEAR)
right_rect = cv2.remap(right, self.map1_r, self.map2_r, cv2.INTER_LINEAR)
results = self.mp_hands.process(left_rect)
p = PointStamped()
if results.multi_hand_landmarks:
wrist = results.multi_hand_landmarks[0].landmark[0]
u = int(wrist.x * w)
v = int(wrist.y * h)
p.header = msg.header
p.point.x = float(u)
p.point.y = float(v)
p.point.z = 0.0 # 暂时不用
# 转灰度
left_gray = cv2.cvtColor(left_rect, cv2.COLOR_BGR2GRAY)
right_gray = cv2.cvtColor(right_rect, cv2.COLOR_BGR2GRAY)
# cv2.imshow('left', left_gray)
# 计算视差
disparity = self.stereo.compute(left_gray, right_gray).astype(np.float32) / 16.0
# ----------- 这里先用“假手点”(画面中心)-----------
x = int(p.point.x)
y = int(p.point.y)
print(f'Wrist pixel: ({x}, {y})')
# ROI 取中值,稳很多
roi = disparity[y-3:y+3, x-3:x+3]
d = np.nanmedian(roi)
print(f'Disparity at wrist: {roi}')
if d <= 0:
return
# 深度
Z = (self.fx * self.baseline) / d
X = (x - self.cx) * Z / self.fx
Y = (y - self.cy) * Z / self.fy
# 发布
msg_out = PointStamped()
msg_out.header.stamp = self.get_clock().now().to_msg()
msg_out.header.frame_id = 'camera_link'
msg_out.point.x = float(X)
msg_out.point.y = float(Y)
msg_out.point.z = float(Z)
self.pub.publish(msg_out)
# ----------- 可视化(调试用)-----------
disp_vis = cv2.normalize(disparity, None, 0, 255, cv2.NORM_MINMAX)
disp_vis = disp_vis.astype(np.uint8)
cv2.circle(disp_vis, (x, y), 5, 255, -1)
disp_vis = disp_vis[:, self.numDisparities:]
cv2.imshow('disparity', disp_vis)
cv2.waitKey(1)
def main(args=None):
rclpy.init(args=args)
node = StereoHandDepthNode()
rclpy.spin(node)
node.destroy_node()
rclpy.shutdown()
if __name__ == '__main__':
main()

View File

@ -31,6 +31,7 @@ setup(
'console_scripts': [ 'console_scripts': [
'camera_node = openarm_camera.camera:main', 'camera_node = openarm_camera.camera:main',
'camera_pub_node = openarm_camera.camera_pub:main', 'camera_pub_node = openarm_camera.camera_pub:main',
'stereo_hand_depth_node = openarm_camera.stereo_hand_depth_node:main',
], ],
}, },
) )