使用懒人yolo模型寻找声骸

2025-04-24 08:25:16 +00:00 · 2025-04-04 02:44:11 +08:00 · 2025-04-04 02:44:11 +08:00 · 1de903320e
commit 1de903320e
parent a3783da2e9
13 changed files with 279 additions and 91 deletions
--- a/README.md
+++ b/README.md
@ -97,4 +97,5 @@ malicious code, leading to the theft of game accounts or computer data, which is
 ### Credits

 [https://github.com/lazydog28/mc_auto_boss](https://github.com/lazydog28/mc_auto_boss) 
+[https://gitee.com/LanRenZhiNeng/ming-chao-ai](https://gitee.com/LanRenZhiNeng/ming-chao-ai)
  
--- a/README_cn.md
+++ b/README_cn.md
@ -90,4 +90,5 @@ ok-ww.exe -t 1 -e
 ### 致谢

 [https://github.com/lazydog28/mc_auto_boss](https://github.com/lazydog28/mc_auto_boss) 后台点击代码
+[https://gitee.com/LanRenZhiNeng/ming-chao-ai](https://gitee.com/LanRenZhiNeng/ming-chao-ai) 声骸yolo模型
  
--- a/assets/yolo/yolov5s_320.onnx
+++ b/assets/yolo/yolov5s_320.onnx
--- a/config.py
+++ b/config.py
@ -78,6 +78,7 @@ config = {
        'lib': 'rapidocr_openvino',
        'target_height': 1080,
    },
+    'my_app': ['src.globals', 'Globals'],
    'start_timeout': 120,  # default 60
    'wait_until_settle_time': 0,
    # required if using feature detection
--- a/requirements.txt
+++ b/requirements.txt
@ -54,9 +54,9 @@ numpy==2.2.4
    #   onnxruntime-directml
    #   opencv-python
    #   shapely
-ok-rapidocr-dml==0.0.5
+rapidocr==2.0.6
    # via -r .\requirements.in
-ok-script==0.0.514
+ok-script==0.0.515
    # via -r .\requirements.in
 omegaconf==2.3.0
    # via ok-rapidocr-dml
--- a/src/YoloDetect.py
+++ b/src/YoloDetect.py
@ -0,0 +1,135 @@
+import os
+import random
+import time
+import onnxruntime as ort
+import cv2
+import numpy as np
+
+import ok
+from ok import Logger, Box
+
+logger = Logger.get_logger(__name__)
+
+class LanRenOnnxYolov():
+
+    def __init__(self, weights='yolov5s_320.onnx', model_h=320, model_w=320, conf_thres=0.5, iou_thres=0.45):
+        """
+        yolov onnx推理
+        providers: []   ['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']
+        dic_labels: {0: 'person', 1: 'bicycle'}
+        """
+        self.dic_labels = {0: '玩家', 1: '副本开关', 2: '怪物', 3: '关闭', 4: '重新挑战', 5: 'F交互', 6: '滑翔翼', 7: '矿石', 8: '确定', 9: '取消', 10: '在水面', 11: '宠物', 12: '声骸', 13: '退出副本', 14: '在爬墙', 15: '无音区开关', 16: '血条', 17: '点击', 18: '复苏'}
+        providers = []
+        if ok.og.use_dml:
+            providers.append('DmlExecutionProvider')
+        providers.append('CPUExecutionProvider')
+        self.weights=weights
+        self.model_size = (model_w, model_h)
+        self.conf_threshold = conf_thres
+        self.iou_threshold = iou_thres
+        self.openfile_name_model = weights  # 模型名称
+
+
+        try:
+            self.session = ort.InferenceSession(self.openfile_name_model,
+                                            providers=providers)  # 在树莓派上这里不需指定推理设备
+            logger.debug("yolo use DmlExecutionProvider:")
+        except:
+            logger.error("CUDA加速失败,使用CPU推理")
+            providers=['CPUExecutionProvider']
+            self.session = ort.InferenceSession(self.openfile_name_model,
+                                            providers=providers)  # 在树莓派上这里不需指定推理设备
+
+    def _preprocess(self, image):
+        """图像预处理（保持宽高比的缩放填充）"""
+        if image.shape[2] == 4:
+            image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)
+        h, w = image.shape[:2]
+        target_w, target_h = self.model_size
+        scale = min(target_w / w, target_h / h)
+        new_w = int(w * scale)
+        new_h = int(h * scale)
+        resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
+        dw = target_w - new_w
+        dh = target_h - new_h
+        top = dh // 2
+        bottom = dh - top
+        left = dw // 2
+        right = dw - left
+        padded = cv2.copyMakeBorder(resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))
+        padded = padded[:, :, ::-1].transpose(2, 0, 1)
+        padded = np.ascontiguousarray(padded, dtype=np.float32) / 255.0
+        return (padded, scale, (left, top))
+
+    def _postprocess(self, outputs, scale, padding, orig_shape):
+        """后处理：转换坐标并应用NMS"""
+        outputs = outputs[0]
+        scores = outputs[:, 4] * outputs[:, 5:].max(axis=1)
+        valid_mask = scores > self.conf_threshold
+        outputs = outputs[valid_mask]
+        if outputs.size == 0:
+            return []
+        cxcy = outputs[:, 0:2]
+        wh = outputs[:, 2:4]
+        x1y1 = cxcy - wh / 2
+        x2y2 = cxcy + wh / 2
+        left_pad, top_pad = padding
+        x1y1 -= np.array([left_pad, top_pad])
+        x2y2 -= np.array([left_pad, top_pad])
+        boxes = np.concatenate((x1y1, x2y2), axis=1) / scale
+        boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, orig_shape[1])
+        boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, orig_shape[0])
+        scores = scores[valid_mask]
+        class_ids = np.argmax(outputs[:, 5:], axis=1)
+        indices = cv2.dnn.NMSBoxes(boxes.tolist(), scores.tolist(), self.conf_threshold, self.iou_threshold)
+        return [(boxes[i], scores[i], class_ids[i]) for i in indices]
+
+    # 推理
+    def detect(self, image, threshold=0.5, label=-1):
+        '''
+        预测
+        '''
+        # 图像预处理
+        try:
+            # orig_image = image.copy()
+            h, w = image.shape[:2]
+            preprocessed, scale, padding = self._preprocess(image)
+            input_tensor = np.expand_dims(preprocessed, axis=0)
+            outputs = self.session.run(None, {self.session.get_inputs()[0].name: input_tensor})[0]
+            detections = self._postprocess(outputs, scale, padding, (h, w))
+            results = []
+            for box, score, class_id in detections:
+                x1, y1, x2, y2 = map(int, box)
+                #画框框
+                if score < threshold:
+                    continue
+                if label >= 0 and label != int(class_id):
+                    continue
+                # 数据保存
+                box = Box(x1,y1, x2 - x1, y2 - y1)
+                box.name = self.dic_labels.get(int(class_id),'unknown')
+                box.confidence = score
+                results.append(box)
+            logger.debug(f'results {results}')
+            return results
+        except Exception as e:
+            logger.error('yolo detect error', e)
+            return []
+
+
+if __name__ == '__main__':
+    image_path="tests/images/echo.png"
+    weights="assets/yolo/yolov5s_320.onnx"
+    model_h=320
+    model_w=320
+    big_img = cv2.imdecode(np.fromfile(file=image_path, dtype=np.uint8), cv2.IMREAD_COLOR)  # 加载大图
+
+    yolov=LanRenOnnxYolov(weights=weights,model_w=model_w,model_h=model_h)
+    old_time=time.time()
+    res_loc =yolov.detect(big_img, label=12)
+    print((time.time()-old_time)*1000,res_loc[0])
+
+    res_loc = yolov.detect(cv2.imread("tests/images/echo2.png"), label=12)
+    print((time.time() - old_time) * 1000, res_loc[0])
+
+
--- a/src/globals.py
+++ b/src/globals.py
@ -0,0 +1,33 @@
+import os.path
+from os import path
+
+import cv2
+from PySide6.QtCore import Signal, QObject
+
+from ok import Config, Logger, get_path_relative_to_exe
+from src.YoloDetect import LanRenOnnxYolov
+
+logger = Logger.get_logger(__name__)
+
+
+class Globals(QObject):
+
+    def __init__(self, exit_event):
+        super().__init__()
+        self._yolo_model = None
+        self.mini_map_arrow = None
+        self.logged_in = False
+
+    @property
+    def yolo_model(self):
+        if self._yolo_model is None:
+            self._yolo_model =  LanRenOnnxYolov(weights=get_path_relative_to_exe(os.path.join("assets","yolo", "yolov5s_320.onnx")))
+        return self._yolo_model
+
+    def yolo_detect(self, image, threshold=0.5, label=-1):
+        return self.yolo_model.detect(image, threshold=threshold, label=label)
+
+
+
+if __name__ == "__main__":
+    glbs = Globals(exit_event=None)
--- a/src/task/BaseWWTask.py
+++ b/src/task/BaseWWTask.py
@ -2,9 +2,11 @@ import re
 import time
 from datetime import datetime, timedelta

-from ok import BaseTask, Logger, find_boxes_by_name
+import numpy as np
+
+from ok import BaseTask, Logger, find_boxes_by_name, og, Box
 from ok import CannotFindException
-from ok import ConfigOption
+import cv2

 logger = Logger.get_logger(__name__)
 number_re = re.compile(r'^(\d+)$')
@ -119,17 +121,26 @@ class BaseWWTask(BaseTask):
                return None
        return f

-    def walk_to_box(self, find_function, time_out=30):
+    def walk_to_box(self, find_function, time_out=30, end_condition=None):
        if not find_function():
            self.log_info('find_function not found, break')
            return False
        last_direction = None
        start = time.time()
+        ended = False
        while time.time() - start < time_out:
+            if end_condition:
+                ended = end_condition()
+                if ended:
+                    break
            treasure_icon = find_function()
            if not treasure_icon:
-                self.log_info('find_function not found, break')
-                break
+                if not end_condition:
+                    self.log_info('find_function not found, break')
+                    break
+                else:
+                    self.next_frame()
+                    continue
            x, y = treasure_icon.center()
            y = max(0, y - self.height_of_screen(0.05))
            next_direction = self.get_direction(x, y, self.width, self.height)
@ -143,38 +154,70 @@ class BaseWWTask(BaseTask):
        if last_direction:
            self.send_key_up(last_direction)
            self.sleep(0.02)
-        return last_direction is not None
+        if not end_condition:
+            return last_direction is not None
+        else:
+            return ended

    def get_direction(self, location_x, location_y, screen_width, screen_height):
        """
-        Determines the location (top, left, bottom, right) of a point
-        on a screen divided by two diagonal lines.
-
+        Determines the location (w, a, s, d) based on diagonals
+        spanning the middle 2/3 of the screen width.
+        Regions outside this middle strip default to left ('a') or right ('d').
        Args:
            location_x: The x-coordinate of the point.
            location_y: The y-coordinate of the point.
            screen_width: The width of the screen.
            screen_height: The height of the screen.
-
        Returns:
-            A string representing the location of the point:
-            "top", "left", "bottom", or "right".
+            A string "w", "a", "s", or "d".
        """
-
-        # Diagonal line 1: Top-left to bottom-right (y = (height/width) * x)
-        diagonal1_y = (screen_height / screen_width) * location_x
-
-        # Diagonal line 2: Top-right to bottom-left (y = - (height/width) * x + height)
-        diagonal2_y = - (screen_height / screen_width) * location_x + screen_height
-
-        if location_y < diagonal1_y and location_y < diagonal2_y:
-            return "w"
-        elif location_y > diagonal1_y and location_y > diagonal2_y:
-            return "s"
-        elif location_y < diagonal1_y and location_y > diagonal2_y:
-            return "d"
-        else:  # location_y > diagonal1_y and location_y < diagonal2_y:
-            return "a"
+        # Prevent division by zero or invalid inputs
+        if screen_width <= 0 or screen_height <= 0:
+            # Return a default or raise an error, here returning middle-ish default
+            return "a" if location_x < screen_width / 2 else "d"
+        # --- Define the central strip ---
+        strip_width = (2 / 3) * screen_width
+        # Start x-coordinate of the strip (left edge)
+        x_start = (screen_width - strip_width) / 2
+        # End x-coordinate of the strip (right edge)
+        x_end = x_start + strip_width
+        # --- Handle points outside the strip ---
+        if location_x < x_start:
+            return "a"  # Point is in the left 1/6th of the screen
+        elif location_x > x_end:
+            return "d"  # Point is in the right 1/6th of the screen
+        else:
+            # --- Point is within the central strip ---
+            # Calculate diagonals as if the strip (width = strip_width) was the full screen
+            # Avoid division by zero if strip_width is effectively zero
+            if strip_width < 1e-9: # Use tolerance for float comparison
+                 return "a" if location_x < screen_width / 2 else "d"
+            # Slope magnitude: rise (screen_height) / run (strip_width)
+            slope = screen_height / strip_width
+            # Adjusted Diagonal 1: From (x_start, 0) to (x_end, screen_height)
+            # Equation: y - 0 = slope * (x - x_start)
+            # Calculate y value on this diagonal *at the point's location_x*
+            adj_diagonal1_y = slope * (location_x - x_start)
+            # Adjusted Diagonal 2: From (x_end, 0) to (x_start, screen_height)
+            # Equation: y - 0 = -slope * (x - x_end)
+            # Calculate y value on this diagonal *at the point's location_x*
+            adj_diagonal2_y = -slope * (location_x - x_end)
+            # --- Apply original comparison logic using adjusted diagonals ---
+            # Note: The original comments/logic might map differently than visual intuition.
+            # We are strictly following the original code's comparison structure.
+            if location_y < adj_diagonal1_y and location_y > adj_diagonal2_y:
+                 # Below adjusted diag 1, Above adjusted diag 2 -> 'd' in original logic (Right wedge)
+                return "d"
+            elif location_y > adj_diagonal1_y and location_y < adj_diagonal2_y:
+                 # Above adjusted diag 1, Below adjusted diag 2 -> 'a' in original logic (Left wedge)
+                return "a"
+            elif location_y < adj_diagonal1_y and location_y < adj_diagonal2_y:
+                 # Below both adjusted diagonals -> 'w' in original logic (Top wedge)
+                return "w"
+            else: # location_y >= adj_diagonal1_y and location_y >= adj_diagonal2_y:
+                 # Above both adjusted diagonals -> 's' in original logic (Bottom wedge)
+                return "s"

    def find_treasure_icon(self):
        return self.find_one('treasure_icon', box=self.box_of_screen(0.1, 0.2, 0.9, 0.8))
@ -209,12 +252,6 @@ class BaseWWTask(BaseTask):
                                            use_gray_scale=False, threshold=0.5)
        return illusive_realm_exit is not None

-    def walk_find_echo(self, backward_time=1):
-        if self.walk_until_f(time_out=6, backward_time=backward_time, target_text=self.absorb_echo_text(),
-                             raise_if_not_found=False):  # find and pick echo
-            logger.debug(f'farm echo found echo move forward walk_until_f to find echo')
-            return True
-
    def walk_until_f(self, direction='w', time_out=0, raise_if_not_found=True, backward_time=0, target_text=None,
                     cancel=True):
        logger.info(f'walk_until_f direction {direction} target_text: {target_text}')
@ -338,49 +375,46 @@ class BaseWWTask(BaseTask):
            logger.info(f"found a claim reward")
            return True

-    def turn_and_find_echo(self):
-        if self.walk_until_f(target_text=self.absorb_echo_text(), raise_if_not_found=False):
+    def find_echo(self, threshold=0.5) -> Box:
+        """
+        Main function to load ONNX model, perform inference, draw bounding boxes, and display the output image.
+
+        Args:
+            onnx_model (str): Path to the ONNX model.
+            input_image (ndarray): Path to the input image.
+
+        Returns:
+            list: List of dictionaries containing detection information such as class_id, class_name, confidence, etc.
+        """
+        # Load the ONNX model
+        boxes = og.my_app.yolo_detect(self.frame, label=12)
+        ret = sorted(boxes, key=lambda detection: detection.confidence, reverse=True)
+        if ret:
+            return ret[0]
+
+    def pick_echo(self):
+        if self.find_f_with_text(target_text=self.absorb_echo_text()):
+            self.send_key('f')
+            if not self.handle_claim_button():
+                return True
+
+    def yolo_find_echo(self):
+        if self.pick_echo():
+            self.sleep(0.5)
            return True
-        box = self.box_of_screen(0.25, 0.20, 0.75, 0.53, hcenter=True)
-        highest_percent = 0
-        highest_index = 0
-        threshold = 0.02
-        highest_frame = None
        for i in range(4):
            self.middle_click_relative(0.5, 0.5, down_time=0.2)
            self.sleep(1)
-            color_percent = self.calculate_color_percentage(echo_color, box)
-            if color_percent > highest_percent:
-                highest_percent = color_percent
-                highest_index = i
-                if self.debug:
-                    highest_frame = self.frame.copy()
-                if color_percent > threshold:
-                    found = self.walk_find_echo(backward_time=0.5)
-                    if found and self.debug and highest_frame is not None:
-                        self.screenshot('echo_picked', frame=highest_frame)
-                    self.log_debug(f'found color_percent {color_percent} > {threshold}, walk now')
-                    return found
-            # if self.debug:
-            #     self.screenshot(f'find_echo_{highest_index}_{float(color_percent):.3f}_{float(highest_percent):.3f}')
-            logger.debug(f'searching for echo {i} {float(color_percent):.3f} {float(highest_percent):.3f}')
-            # self.click_relative(0.25, 0.25)
-            self.send_key('a', down_time=0.05)
-            self.sleep(0.5)
-
-        if highest_percent > 0.0001:
-            for i in range((highest_index + 1) % 4):
-                self.middle_click_relative(0.5, 0.5)
-                self.sleep(0.5)
+            echo = self.find_echo()
+            self.draw_boxes('echo', echo)
+            if not echo or echo.center()[1] > self.height_of_screen(0.5):
+                if i == 3:
+                    return False
                self.send_key('a', down_time=0.05)
                self.sleep(0.5)
-            # if self.debug:
-            #     self.screenshot(f'pick_echo_{highest_index}')
-            logger.info(f'found echo {highest_index} walk')
-            found = self.walk_find_echo(backward_time=0)
-            if found and self.debug and highest_frame is not None:
-                self.screenshot('echo_picked', frame=highest_frame)
-            return found
+                continue
+            self.walk_to_box(self.find_echo, time_out=15, end_condition=self.pick_echo)
+            return True

    def incr_drop(self, dropped):
        if dropped:
--- a/src/task/FarmEchoTask.py
+++ b/src/task/FarmEchoTask.py
@ -54,12 +54,7 @@ class FarmEchoTask(WWOneTimeTask, BaseCombatTask):

            self.combat_once()
            logger.info(f'farm echo move {self.config.get("Boss")} walk_until_f to find echo')
-            if self.config.get('Boss') != 'Jue':
-                dropped = self.turn_and_find_echo()
-                logger.debug(f'farm echo turn_and_find_echo')
-            else:
-                self.sleep(2)
-                dropped = self.turn_and_find_echo()
+            dropped = self.yolo_find_echo()
            self.incr_drop(dropped)
            self.sleep(0.5)
            self.send_key('esc')
--- a/src/task/FarmWorldBossTask.py
+++ b/src/task/FarmWorldBossTask.py
@ -28,16 +28,10 @@ class FarmWorldBossTask(WWOneTimeTask, BaseCombatTask):

        default_config = {
            'Boss1': 'N/A',
-            'Boss1 Echo Pickup Method': 'Turn Around and Search',
            'Boss2': 'N/A',
-            'Boss2 Echo Pickup Method': 'Turn Around and Search',
            'Boss3': 'N/A',
-            'Boss3 Echo Pickup Method': 'Turn Around and Search',
            'Repeat Farm Count': 1000
        }
-        self.config_type['Boss1 Echo Pickup Method'] = {'type': "drop_down", 'options': self.find_echo_method}
-        self.config_type['Boss2 Echo Pickup Method'] = {'type': "drop_down", 'options': self.find_echo_method}
-        self.config_type['Boss3 Echo Pickup Method'] = {'type': "drop_down", 'options': self.find_echo_method}
        default_config.update(self.default_config)
        self.default_config = default_config
        self.config_type["Boss1"] = {'type': "drop_down", 'options': self.boss_names}
@ -95,14 +89,8 @@ class FarmWorldBossTask(WWOneTimeTask, BaseCombatTask):
                            logger.info(f'sleep for the Boss model to disappear')
                            self.sleep(5)
                        logger.info(f'farm echo move forward walk_until_f to find echo')
-                        method = self.config.get(f'Boss{i} Echo Pickup Method', 'Walk')

-                        if method == 'Run in Circle':
-                            dropped = self.run_in_circle_to_find_echo()
-                        elif method == 'Turn Around and Search':
-                            dropped = self.turn_and_find_echo()
-                        else:
-                            dropped = self.walk_find_echo()
+                        dropped = self.yolo_find_echo()
                        self.incr_drop(dropped)

            if count < 2:
--- a/test.jpg
+++ b/test.jpg
--- a/tests/images/echo.png
+++ b/tests/images/echo.png
--- a/tests/images/echo2.png
+++ b/tests/images/echo2.png