增加SGBM算法测距，增加手识别算法，双目标定完成

2026-01-26 18:40:32 +08:00 · 2026-01-26 18:40:32 +08:00 · f3104482c1
commit f3104482c1
parent f6391b9b63
10 changed files with 488 additions and 113 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,3 +2,4 @@
 build/
 install/
 log/
 *.png
--- a/calibrationdata.tar.gz
+++ b/calibrationdata.tar.gz
--- a/calibrationdata/left.yaml
+++ b/calibrationdata/left.yaml
@ -0,0 +1,26 @@
 image_width: 640
 image_height: 480
 camera_name: left
 camera_matrix:
  rows: 3
  cols: 3
  data: [468.55945,   0.     , 320.99324,
           0.     , 468.7278 , 220.6343 ,
           0.     ,   0.     ,   1.     ]
 distortion_model: plumb_bob
 distortion_coefficients:
  rows: 1
  cols: 5
  data: [0.009422, -0.000327, -0.000980, -0.019506, 0.000000]
 rectification_matrix:
  rows: 3
  cols: 3
  data: [ 0.99819422, -0.00430947, -0.0599143 ,
          0.00540717,  0.99982027,  0.01817108,
          0.05982523, -0.01846223,  0.99803812]
 projection_matrix:
  rows: 3
  cols: 4
  data: [487.63246,   0.     , 338.03998,   0.     ,
           0.     , 487.63246, 206.55779,   0.     ,
           0.     ,   0.     ,   1.     ,   0.     ]
--- a/calibrationdata/ost.txt
+++ b/calibrationdata/ost.txt
@ -0,0 +1,60 @@
 # oST version 5.0 parameters
 [image]
 width
 640
 height
 480
 [narrow_stereo/left]
 camera matrix
 468.559453 0.000000 320.993242
 0.000000 468.727800 220.634299
 0.000000 0.000000 1.000000
 distortion
 0.009422 -0.000327 -0.000980 -0.019506 0.000000
 rectification
 0.998194 -0.004309 -0.059914
 0.005407 0.999820 0.018171
 0.059825 -0.018462 0.998038
 projection
 487.632463 0.000000 338.039978 0.000000
 0.000000 487.632463 206.557787 0.000000
 0.000000 0.000000 1.000000 0.000000
 # oST version 5.0 parameters
 [image]
 width
 640
 height
 480
 [narrow_stereo/right]
 camera matrix
 469.733732 0.000000 320.579182
 0.000000 471.914916 200.728954
 0.000000 0.000000 1.000000
 distortion
 -0.022394 0.008106 -0.011521 -0.018003 0.000000
 rectification
 0.999423 -0.010289 -0.032380
 0.009694 0.999782 -0.018485
 0.032563 0.018160 0.999305
 projection
 487.632463 0.000000 338.039978 -41.780241
 0.000000 487.632463 206.557787 0.000000
 0.000000 0.000000 1.000000 0.000000
--- a/calibrationdata/right.yaml
+++ b/calibrationdata/right.yaml
@ -0,0 +1,26 @@
 image_width: 640
 image_height: 480
 camera_name: right
 camera_matrix:
  rows: 3
  cols: 3
  data: [469.73373,   0.     , 320.57918,
           0.     , 471.91492, 200.72895,
           0.     ,   0.     ,   1.     ]
 distortion_model: plumb_bob
 distortion_coefficients:
  rows: 1
  cols: 5
  data: [-0.022394, 0.008106, -0.011521, -0.018003, 0.000000]
 rectification_matrix:
  rows: 3
  cols: 3
  data: [ 0.99942267, -0.01028889, -0.03237995,
          0.00969371,  0.99978215, -0.01848456,
          0.03256308,  0.01816001,  0.99930469]
 projection_matrix:
  rows: 3
  cols: 4
  data: [487.63246,   0.     , 338.03998, -41.78024,
           0.     , 487.63246, 206.55779,   0.     ,
           0.     ,   0.     ,   1.     ,   0.     ]
--- a/src/openarm_camera/openarm_camera/camera.py
+++ b/src/openarm_camera/openarm_camera/camera.py
@ -1,130 +1,65 @@
 import rclpy
 from rclpy.node import Node
 import cv2
 import numpy as np
 from sensor_msgs.msg import Image
 from geometry_msgs.msg import PointStamped
 from cv_bridge import CvBridge
 import cv2
 import mediapipe as mp
-class StereoHandDepthNode(Node):
+class HandTrackingNode(Node):
    def __init__(self):
-        super().__init__('stereo_hand_depth_node')
+        super().__init__('hand_tracking_node')
        # ---------- 参数 ----------
        self.fx = 600.0   # 焦距（像素）👉 后面用标定值替换
        self.fy = 600.0
        self.cx = 320.0
        self.cy = 240.0
        self.baseline = 0.06  # 双目基线（米）👉 换成你真实的
        # ---------- OpenCV ----------
        self.bridge = CvBridge()
-        self.stereo = cv2.StereoSGBM_create(
+        self.sub = self.create_subscription(
            minDisparity=0,
            numDisparities=128,
            blockSize=7,
            P1=8 * 3 * 7 ** 2,
            P2=32 * 3 * 7 ** 2,
            disp12MaxDiff=1,
            uniquenessRatio=10,
            speckleWindowSize=100,
            speckleRange=32
        )
        # ---------- ROS ----------
        # self.left = self.create_subscription(
        #     Image,
        #     '/camera/left/image_raw',
        #     self.image_left_callback,
        #     10
        # )
        # self.right = self.create_subscription(
        #     Image,
        #     '/camera/right/image_raw',
        #     self.image_right_callback,
        #     10
        # )
        self.image = self.create_subscription(
            Image,
-            '/image',
+            '/camera/left/image_raw',
-            self.image_callback,
+            self.image_cb,
            10
        )
        self.pub = self.create_publisher(
            PointStamped,
-            '/hand_3d',
+            '/hand/wrist_pixel',
            10
        )
-        self.get_logger().info('StereoHandDepthNode started')
+        self.mp_hands = mp.solutions.hands.Hands(
            static_image_mode=False,
            max_num_hands=1,
            min_detection_confidence=0.7,
            min_tracking_confidence=0.7
        )
-    def image_callback(self, msg: Image):
+    def image_cb(self, msg):
-        # 转 OpenCV
+        img = self.bridge.imgmsg_to_cv2(msg, 'bgr8')
-        frame = self.bridge.imgmsg_to_cv2(msg, desired_encoding='bgr8')
+        rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-        h, w, _ = frame.shape
+        results = self.mp_hands.process(rgb)
-        mid = w // 2
+        if not results.multi_hand_landmarks:
        # 左右拆分
        left = frame[:, :mid]
        right = frame[:, mid:]
        cv2.imshow("left", left)
        cv2.imshow("right", right)
        # 转灰度
        left_gray = cv2.cvtColor(left, cv2.COLOR_BGR2GRAY)
        right_gray = cv2.cvtColor(right, cv2.COLOR_BGR2GRAY)
        # 计算视差
        disparity = self.stereo.compute(left_gray, right_gray).astype(np.float32) / 16.0
        # ----------- 这里先用“假手点”（画面中心）-----------
        x = int(self.cx)
        y = int(self.cy)
        # ROI 取中值，稳很多
        roi = disparity[y-3:y+3, x-3:x+3]
        d = np.nanmedian(roi)
        if d <= 0:
            return
-        # 深度
+        h, w, _ = img.shape
-        Z = (self.fx * self.baseline) / d
+        wrist = results.multi_hand_landmarks[0].landmark[0]
        X = (x - self.cx) * Z / self.fx
        Y = (y - self.cy) * Z / self.fy
-        # 发布
+        u = int(wrist.x * w)
-        msg_out = PointStamped()
+        v = int(wrist.y * h)
        msg_out.header.stamp = self.get_clock().now().to_msg()
        msg_out.header.frame_id = 'camera_link'
        msg_out.point.x = float(X)
        msg_out.point.y = float(Y)
        msg_out.point.z = float(Z)
-        self.pub.publish(msg_out)
+        p = PointStamped()
        p.header = msg.header
        p.point.x = float(u)
        p.point.y = float(v)
        p.point.z = 0.0  # 暂时不用
-        # ----------- 可视化（调试用）-----------
+        self.pub.publish(p)
        disp_vis = cv2.normalize(disparity, None, 0, 255, cv2.NORM_MINMAX)
        disp_vis = disp_vis.astype(np.uint8)
        cv2.circle(disp_vis, (x, y), 5, 255, -1)
-        cv2.imshow('disparity', disp_vis)
+def main():
-        cv2.waitKey(10)
+    rclpy.init()
-
+    node = HandTrackingNode()
 def main(args=None):
    rclpy.init(args=args)
    node = StereoHandDepthNode()
    rclpy.spin(node)
    node.destroy_node()
    rclpy.shutdown()
 if __name__ == '__main__':
    main()
--- a/src/openarm_camera/openarm_camera/camera_pub.py
+++ b/src/openarm_camera/openarm_camera/camera_pub.py
@ -5,26 +5,61 @@ from cv_bridge import CvBridge
 import cv2
 import numpy as np
 from camera_info_manager import CameraInfoManager
 from sensor_msgs.msg import CameraInfo
 CAMERA_WIDTH = int(1280) 
 CAMERA_HEIGHT = int(480)
 FRAME_WIDTH = int(CAMERA_WIDTH / 2)
 FRAME_HEIGHT = int(CAMERA_HEIGHT / 2)
 print(FRAME_WIDTH, FRAME_HEIGHT)
 class CameraNode(Node):
    def __init__(self):
        super().__init__('camera_node')
        left_camera_info_url = 'file:///home/shen/openarm_test/calibrationdata/left.yaml'
        right_camera_info_url = 'file:///home/shen/openarm_test/calibrationdata/right.yaml'
        self.left_camera_info_manager = CameraInfoManager(self, cname='left', url=left_camera_info_url, namespace='/stereo_camera/left')
        self.right_camera_info_manager = CameraInfoManager(self, cname='right', url=right_camera_info_url, namespace='/stereo_camera/right')
        # 设置相机信息文件的路径（不带.yaml后缀）
        # 假设你的yaml文件放在 /home/username/camera_info/ 目录下，名为 left.yaml 和 right.yaml
        # self.left_camera_info_manager.setCameraInfoURL(left_camera_info_url)
        # self.right_camera_info_manager.setCameraInfoURL(right_camera_info_url)
        # self.left_camera_info_manager.setURL(left_camera_info_url)
        # self.right_camera_info_manager.setURL(right_camera_info_url)
        print("2ds1as23d123sa1d32as", self.left_camera_info_manager.getURL())
        self.left_camera_info_manager.loadCameraInfo()
        self.right_camera_info_manager.loadCameraInfo()
        # 然后，你可以通过CameraInfoManager获取CameraInfo消息
        self.left_camera_info = self.left_camera_info_manager.getCameraInfo()
        self.right_camera_info = self.right_camera_info_manager.getCameraInfo()
        # 注意：你需要确保yaml文件已经正确放置在指定路径，并且格式正确。
        self.left_camera_info_pub = self.create_publisher(CameraInfo, '/stereo_camera/left/camera_info', 10)
        self.right_camera_info_pub = self.create_publisher(CameraInfo, '/stereo_camera/right/camera_info', 10)
        self.bridge = CvBridge()
        self.publisher_left = self.create_publisher(Image, '/camera/left/image_raw', 10)
        self.publisher_right = self.create_publisher(Image, '/camera/right/image_raw', 10)
-        self.publisher_raw = self.create_publisher(Image, '/image', 10)
+        self.publisher_raw = self.create_publisher(Image, '/image_1', 10)
        self.get_logger().info('正在初始化摄像头...')
        # 尝试打开物理摄像头
        self.cap = None
-        for i in range(1):  # 尝试设备0,1,2
+        for i in range(2):  # 尝试设备0,1,2
            self.get_logger().info(f'尝试打开摄像头设备 /dev/video{i}...')
            self.cap = cv2.VideoCapture(i)
            if self.cap.isOpened():
                # 设置摄像头参数
-                self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1600)
+                self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, CAMERA_WIDTH)
-                self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 600)
+                self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, CAMERA_HEIGHT)
                self.cap.set(cv2.CAP_PROP_FPS, 30)
                # 让摄像头预热
@ -39,7 +74,7 @@ class CameraNode(Node):
                    break
-        self.timer = self.create_timer(0.033, self.timer_callback)  # 30Hz
+        self.timer = self.create_timer(0.001, self.timer_callback)  # 30Hz
        self.get_logger().info('摄像头节点已启动')
    def timer_callback(self):
@ -48,19 +83,23 @@ class CameraNode(Node):
            # frameDist = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE)
            # cv2.imshow("camera_left", frame[:, :800])
            # cv2.imshow("camera_right", frame[:, 800:])
-            cv2.waitKey(10)
+            cv2.waitKey(0)
-            msg_left= self.bridge.cv2_to_imgmsg(frame[:, :800], 'bgr8')
+            time = self.get_clock().now().to_msg()
-            msg_right= self.bridge.cv2_to_imgmsg(frame[:, 800:], 'bgr8')
+            msg_left= self.bridge.cv2_to_imgmsg(frame[:, :FRAME_WIDTH], encoding='bgr8')
-            msg_raw= self.bridge.cv2_to_imgmsg(frame, 'bgr8')
+            msg_right= self.bridge.cv2_to_imgmsg(frame[:, FRAME_WIDTH:], encoding='bgr8')
-            msg_left.header.stamp = self.get_clock().now().to_msg()
+            msg_raw= self.bridge.cv2_to_imgmsg(frame, encoding='bgr8')
-            msg_right.header.stamp = self.get_clock().now().to_msg()
+            msg_left.header.stamp = time
-            msg_raw.header.stamp = self.get_clock().now().to_msg()
+            msg_right.header.stamp = time
            msg_raw.header.stamp = time
            msg_left.header.frame_id = 'camera_frame_left'
            msg_right.header.frame_id = 'camera_frame_right'
            msg_raw.header.frame_id = 'camera_frame_raw'
            self.publisher_left.publish(msg_left)
            self.publisher_right.publish(msg_right)
            self.publisher_raw.publish(msg_raw)
            # 发布相机信息
            self.left_camera_info_pub.publish(self.left_camera_info)
            self.right_camera_info_pub.publish(self.right_camera_info)
--- a/src/openarm_camera/openarm_camera/hand_3d_node.py
+++ b/src/openarm_camera/openarm_camera/hand_3d_node.py
@ -0,0 +1,92 @@
 import rclpy
 from rclpy.node import Node
 from geometry_msgs.msg import PoseStamped, PointStamped
 from sensor_msgs.msg import Image, CameraInfo
 from cv_bridge import CvBridge
 import numpy as np
 class Hand3DNode(Node):
    def __init__(self):
        super().__init__('hand_3d_node')
        self.bridge = CvBridge()
        self.fx = self.fy = self.cx = self.cy = None
        self.sub_pixel = self.create_subscription(
            PointStamped,
            '/hand/wrist_pixel',
            self.pixel_cb,
            10
        )
        self.sub_depth = self.create_subscription(
            Image,
            '/camera/depth/image',
            self.depth_cb,
            10
        )
        self.sub_info = self.create_subscription(
            CameraInfo,
            '/camera/left/camera_info',
            self.info_cb,
            10
        )
        self.pub = self.create_publisher(
            PoseStamped,
            '/hand/pose_camera',
            10
        )
        self.last_pixel = None
        self.depth_img = None
    def info_cb(self, msg):
        self.fx = msg.k[0]
        self.fy = msg.k[4]
        self.cx = msg.k[2]
        self.cy = msg.k[5]
    def pixel_cb(self, msg):
        self.last_pixel = (int(msg.point.x), int(msg.point.y), msg.header)
    def depth_cb(self, msg):
        self.depth_img = self.bridge.imgmsg_to_cv2(msg, desired_encoding='passthrough')
        self.try_publish()
    def try_publish(self):
        if self.last_pixel is None or self.depth_img is None:
            return
        if self.fx is None:
            return
        u, v, header = self.last_pixel
        depth = self.depth_img[v, u]
        if depth <= 0.0:
            return
        Z = depth
        X = (u - self.cx) * Z / self.fx
        Y = (v - self.cy) * Z / self.fy
        pose = PoseStamped()
        pose.header = header
        pose.header.frame_id = 'camera_frame'
        pose.pose.position.x = X
        pose.pose.position.y = Y
        pose.pose.position.z = Z
        pose.pose.orientation.w = 1.0  # 先固定姿态
        self.pub.publish(pose)
 def main():
    rclpy.init()
    node = Hand3DNode()
    rclpy.spin(node)
    rclpy.shutdown()
 if __name__ == '__main__':
    main()
--- a/src/openarm_camera/openarm_camera/stereo_hand_depth_node.py
+++ b/src/openarm_camera/openarm_camera/stereo_hand_depth_node.py
@ -0,0 +1,195 @@
 import rclpy
 from rclpy.node import Node
 import cv2
 import numpy as np
 from sensor_msgs.msg import Image
 from geometry_msgs.msg import PointStamped
 from cv_bridge import CvBridge
 from sensor_msgs.msg import CameraInfo
 import mediapipe as mp
 class StereoHandDepthNode(Node):
    def __init__(self):
        super().__init__('stereo_hand_depth_node')
        # ---------- 参数 ----------
        self.fx = 600.0   # 焦距（像素）👉 后面用标定值替换
        self.fy = 600.0
        self.cx = 320.0
        self.cy = 240.0
        self.baseline = 0.06  # 双目基线（米）👉 换成你真实的
        # ---------- OpenCV ----------
        self.numDisparities = 128  # 必须是16的倍数
        self.bridge = CvBridge()
        self.stereo = cv2.StereoSGBM_create(
            minDisparity=0,
            numDisparities=self.numDisparities,
            blockSize=7,
            P1=8 * 3 * 7 ** 2,
            P2=32 * 3 * 7 ** 2,
            disp12MaxDiff=1,
            uniquenessRatio=10,
            speckleWindowSize=100,
            speckleRange=32
        )
        # ---------- ROS ----------
        self.left_info = None
        self.right_info = None
        self.sub = self.create_subscription(
            Image,
            '/image_1',
            self.image_callback,
            10
        )
        self.sub = self.create_subscription(
            CameraInfo,
            '/stereo_camera/left/camera_info',
            self.left_camera_info_callback,
            10
        )
        self.sub = self.create_subscription(
            CameraInfo,
            '/stereo_camera/right/camera_info',
            self.right_camera_info_callback,
            10
        )
        self.pub = self.create_publisher(
            PointStamped,
            '/hand_3d',
            10
        )
        self.mp_hands = mp.solutions.hands.Hands(
            static_image_mode=False,
            max_num_hands=1,
            min_detection_confidence=0.7,
            min_tracking_confidence=0.7
        )
        self.get_logger().info('StereoHandDepthNode started')
    def init_rectify_maps(self, left_info, right_info, image_size):
        K1 = np.array(left_info.k).reshape(3, 3)
        D1 = np.array(left_info.d)
        K2 = np.array(right_info.k).reshape(3, 3)
        D2 = np.array(right_info.d)
        R1 = np.array(left_info.r).reshape(3, 3)
        R2 = np.array(right_info.r).reshape(3, 3)
        P1 = np.array(left_info.p).reshape(3, 4)
        P2 = np.array(right_info.p).reshape(3, 4)
        map1_l, map2_l = cv2.initUndistortRectifyMap(
            K1, D1, R1, P1[:3, :3], image_size, cv2.CV_32FC1
        )
        map1_r, map2_r = cv2.initUndistortRectifyMap(
            K2, D2, R2, P2[:3, :3], image_size, cv2.CV_32FC1
        )   
        return map1_l, map2_l, map1_r, map2_r
    def left_camera_info_callback(self, msg: CameraInfo):
        self.left_info = msg
    def right_camera_info_callback(self, msg: CameraInfo):
        self.right_info = msg
    def image_callback(self, msg: Image):
        if self.left_info is None or self.right_info is None:
            return
        self.map1_l, self.map2_l, self.map1_r, self.map2_r = self.init_rectify_maps(self.left_info, self.right_info, (640, 480))
        self.fx = self.left_info.p[0]
        self.fy = self.left_info.p[5]
        self.cx = self.left_info.p[2]
        self.cy = self.left_info.p[6]
        self.baseline = -self.right_info.p[3] / self.fx
        # self.get_logger().info(f'baseline = {self.baseline:.4f} m')
        # 转 OpenCV
        frame = self.bridge.imgmsg_to_cv2(msg, desired_encoding='bgr8')
        h, w, _ = frame.shape
        mid = w // 2
        # print("Frame size:", w, h)
        # 左右拆分
        left = frame[:, :mid]
        right = frame[:, mid:]
        left_rect = cv2.remap(left, self.map1_l, self.map2_l, cv2.INTER_LINEAR)
        right_rect = cv2.remap(right, self.map1_r, self.map2_r, cv2.INTER_LINEAR)
        results = self.mp_hands.process(left_rect)
        p = PointStamped()
        if results.multi_hand_landmarks:
            wrist = results.multi_hand_landmarks[0].landmark[0]
            u = int(wrist.x * w)
            v = int(wrist.y * h)
            p.header = msg.header
            p.point.x = float(u)
            p.point.y = float(v)
            p.point.z = 0.0  # 暂时不用
        # 转灰度
        left_gray = cv2.cvtColor(left_rect, cv2.COLOR_BGR2GRAY)
        right_gray = cv2.cvtColor(right_rect, cv2.COLOR_BGR2GRAY)
        # cv2.imshow('left', left_gray)
        # 计算视差
        disparity = self.stereo.compute(left_gray, right_gray).astype(np.float32) / 16.0
        # ----------- 这里先用“假手点”（画面中心）-----------
        x = int(p.point.x)
        y = int(p.point.y)
        print(f'Wrist pixel: ({x}, {y})')
        # ROI 取中值，稳很多
        roi = disparity[y-3:y+3, x-3:x+3]
        d = np.nanmedian(roi)
        print(f'Disparity at wrist: {roi}')
        if d <= 0:
            return
        # 深度
        Z = (self.fx * self.baseline) / d
        X = (x - self.cx) * Z / self.fx
        Y = (y - self.cy) * Z / self.fy
        # 发布
        msg_out = PointStamped()
        msg_out.header.stamp = self.get_clock().now().to_msg()
        msg_out.header.frame_id = 'camera_link'
        msg_out.point.x = float(X)
        msg_out.point.y = float(Y)
        msg_out.point.z = float(Z)
        self.pub.publish(msg_out)
        # ----------- 可视化（调试用）-----------
        disp_vis = cv2.normalize(disparity, None, 0, 255, cv2.NORM_MINMAX)
        disp_vis = disp_vis.astype(np.uint8)
        cv2.circle(disp_vis, (x, y), 5, 255, -1)
        disp_vis = disp_vis[:, self.numDisparities:]
        cv2.imshow('disparity', disp_vis)
        cv2.waitKey(1)
 def main(args=None):
    rclpy.init(args=args)
    node = StereoHandDepthNode()
    rclpy.spin(node)
    node.destroy_node()
    rclpy.shutdown()
 if __name__ == '__main__':
    main()
--- a/src/openarm_camera/setup.py
+++ b/src/openarm_camera/setup.py
@ -31,6 +31,7 @@ setup(
        'console_scripts': [
            'camera_node = openarm_camera.camera:main',
            'camera_pub_node = openarm_camera.camera_pub:main',
            'stereo_hand_depth_node = openarm_camera.stereo_hand_depth_node:main',
        ],
    },
 )