from io import BytesIOimport onnxruntimeimport torchimport torchvisionimport numpy as npfrom mon.by import Byfrom selenium.webdriver.chrome.options import Optionsfrom selenium import webdriverimport timeimport cv2import base64from selenium.webdriver import ActionChainsimport randomfrom PIL import Imagefrom lxml import etreeimport requestsdef padded_resize(im, new_shape=(640, 640), stride=32):try:shape = im.shape[:2]r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]# dw, dh = np.mod(dw, stride), np.mod(dh, stride)dw /= 2dh /= 2if shape[::-1] != new_unpad: # resizeim = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))left, right = int(round(dw - 0.1)), int(round(dw + 0.1))im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) # add border# Convertim = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGBim = np.ascontiguousarray(im)im = torch.from_numpy(im)im = im.float()im /= 255im = im[None]im = im.cpu().numpy() # torch to numpyreturn imexcept:print("123")def xywh2xyxy(x):# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-righty = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left xy[:, 1] = x[:, 1] - x[:, 3] / 2 # top left yy[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right xy[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right yreturn ydef box_iou(box1, box2):"""Return intersection-over-union (Jaccard index) of boxes.Both sets of boxes are expected to be in (x1, y1, x2, y2) format.Arguments:box1 (Tensor[N, 4])box2 (Tensor[M, 4])Returns:iou (Tensor[N, M]): the NxM matrix containing the pairwiseIoU values for every element in boxes1 and boxes2"""def box_area(box):# box = 4xnreturn (box[2] - box[0]) * (box[3] - box[1])area1 = box_area(box1.T)area2 = box_area(box2.T)# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter)def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,labels=(), max_det=300):"""Runs Non-Maximum Suppression (NMS) on inference resultsReturns:list of detections, on (n,6) tensor per image [xyxy, conf, cls]"""nc = prediction.shape[2] - 5 # number of classesxc = prediction[..., 4] > conf_thres # candidates# Checksassert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'# Settingsmin_wh, max_wh = 2, 7680 # (pixels) minimum and maximum box width and heightmax_nms = 30000 # maximum number of boxes into torchvision.ops.nms()time_limit = 10.0 # seconds to quit afterredundant = True # require redundant detectionsmulti_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)merge = False # use merge-NMSt = time.time()output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]for xi, x in enumerate(prediction): # image index, image inference# Apply constraintsx[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-heightx = x[xc[xi]] # confidence# Cat apriori labels if autolabellingif labels and len(labels[xi]):lb = labels[xi]v = torch.zeros((len(lb), nc + 5), device=x.device)v[:, :4] = lb[:, 1:5] # boxv[:, 4] = 1.0 # confv[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # clsx = torch.cat((x, v), 0)# If none remain process next imageif not x.shape[0]:continue# Compute confx[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf# Box (center x, center y, width, height) to (x1, y1, x2, y2)box = xywh2xyxy(x[:, :4])# Detections matrix nx6 (xyxy, conf, cls)if multi_label:i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).Tx = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)else: # best class onlyconf, j = x[:, 5:].max(1, keepdim=True)x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]# Filter by classif classes is not None:x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]# Apply finite constraint# if not torch.isfinite(x).all():#x = x[torch.isfinite(x).all(1)]# Check shapen = x.shape[0] # number of boxesif not n: # no boxescontinueelif n > max_nms: # excess boxesx = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence# Batched NMSc = x[:, 5:6] * (0 if agnostic else max_wh) # classesboxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scoresi = torchvision.ops.nms(boxes, scores, iou_thres) # NMSif i.shape[0] > max_det: # limit detectionsi = i[:max_det]if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)iou = box_iou(boxes[i], boxes) > iou_thres # iou matrixweights = iou * scores[None] # box weightsx[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxesif redundant:i = i[iou.sum(1) > 1] # require redundancyoutput[xi] = x[i]if (time.time() - t) > time_limit:break # time limit exceededreturn outputdef xyxy2xywh(x):# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-righty = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x centery[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y centery[:, 2] = x[:, 2] - x[:, 0] # widthy[:, 3] = x[:, 3] - x[:, 1] # heightreturn ydef is_ascii(s=''):# Is string composed of all ASCII (no UTF) characters? (note str().isascii() introduced in python 3.7)s = str(s) # convert list, tuple, None, etc. to strreturn len(s.encode().decode('ascii', 'ignore')) == len(s)def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):# Add one xyxy box to image with labelif self.pil or not is_ascii(label):self.draw.rectangle(box, width=self.lw, outline=color) # boxif label:w, h = self.font.getsize(label) # text width, heightoutside = box[1] - h >= 0 # label fits outside boxself.draw.rectangle((box[0],box[1] - h if outside else box[1],box[0] + w + 1,box[1] + 1 if outside else box[1] + h + 1), fill=color)# self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font)else: # cv2p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA)if label:tf = max(self.lw - 1, 1) # font thicknessw, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, heightoutside = p1[1] - h - 3 >= 0 # label fits outside boxp2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA) # filledcv2.putText(self.im, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, self.lw / 3, txt_color,thickness=tf, lineType=cv2.LINE_AA)def return_coordinates(xyxy, conf):conf = float(conf.numpy())gain = 1.02pad = 10xyxy = torch.tensor(xyxy).view(-1, 4)b = xyxy2xywh(xyxy) # boxesb[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + padxyxy = xywh2xyxy(b).long()c1, c2 = (int(xyxy[0, 0]) + 6, int(xyxy[0, 1]) + 6), (int(xyxy[0, 2]) - 6, int(xyxy[0, 3]) - 6)# print(f"leftTop:{c1},rightBottom:{c2},Confidence:{conf*100}%")result_dict = {"leftTop": c1, "rightBottom": c2, "Confidence": conf}return result_dictdef clip_coords(boxes, shape):# Clip bounding xyxy bounding boxes to image shape (height, width)if isinstance(boxes, torch.Tensor): # faster individuallyboxes[:, 0].clamp_(0, shape[1]) # x1boxes[:, 1].clamp_(0, shape[0]) # y1boxes[:, 2].clamp_(0, shape[1]) # x2boxes[:, 3].clamp_(0, shape[0]) # y2else: # np.array (faster grouped)boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):# Rescale coords (xyxy) from img1_shape to img0_shapeif ratio_pad is None: # calculate from img0_shapegain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / newpad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh paddingelse:gain = ratio_pad[0][0]pad = ratio_pad[1]coords[:, [0, 2]] -= pad[0] # x paddingcoords[:, [1, 3]] -= pad[1] # y paddingcoords[:, :4] /= gainclip_coords(coords, img0_shape)return coordsdef onnx_model_main(path):# onnxsession = onnxruntime.InferenceSession("last.onnx", providers=["CPUExecutionProvider"])start = time.time()image = open(path, "rb").read()img = np.array(Image.open(BytesIO(image)))# img = cv2.imread(path)# 图像处理img = img[:, :, :3]im = padded_resize(img)# 模型调度pred = session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: im})[0]pred = torch.tensor(pred)pred = non_max_suppression(pred, conf_thres=0.25, iou_thres=0.60, max_det=1000) # 大于百分之六十的置信度coordinate_list = []for i, det in enumerate(pred):det[:, :4] = scale_coords(im.shape[2:], det[:, :4], img.shape).round()for *xyxy, conf, cls in reversed(det):# 返回坐标和置信度coordinates = return_coordinates(xyxy, conf)coordinate_list.append(coordinates)# 坐标列表coordinate = sorted(coordinate_list, key=lambda a: a["Confidence"])# 用时duration = str((time.time() - start))if len(coordinate) == 0:data = {'message': 'error', 'time': duration}else:coordinate = coordinate[-1]x = coordinate.get('leftTop')[0]y = coordinate.get('leftTop')[1]w = coordinate.get('rightBottom')[0] - coordinate.get('leftTop')[0]h = coordinate.get('rightBottom')[1] - coordinate.get('leftTop')[1]point = f"{x}|{y}|{w}|{h}"data = {'message': 'success', 'time': duration, 'point': point}data.update(coordinate)print(data)return datadef drow_rectangle(coordinate, path):img = cv2.imread(path)# 画框result = cv2.rectangle(img, coordinate.get("leftTop"), coordinate.get("rightBottom"), (0, 0, 255), 2)cv2.imwrite("drow_rectangle.jpg", result) # 返回圈中矩形的图片print("返回坐标矩形成功")


coordinate_onnx = onnx_model_main("1.png")#需要识别的图片路径#返回内容#{'message': 'success', 'time': '0.5251204967498779', 'point': '260|90|59|56', 'leftTop': (260, 90), 'rightBottom': (319, 146), 'Confidence': 0.31054314970970154}##这个是根据返回信息在同级目录下生成一个加了框的图片drow_rectangle(coordinate_onnx, "1.png")#需要识别的图片路径



class Test():def __init__(self):option = Options()option.add_experimental_option('excludeSwitches', ['enable-automation'])option.add_argument('--disable-blink-features=AutomationControlled')self.driver = webdriver.Chrome(options=option)def __ease_out_expo(self, sep):if sep == 1:return 1else:return 1 - pow(2, -10 * sep)def generate_tracks(self, distance):"""根据滑动距离生成滑动轨迹:param distance: 需要滑动的距离:return: 滑动轨迹<type 'list'>: [[x,y,t], ...]x: 已滑动的横向距离y: 已滑动的纵向距离, 除起点外, 均为0t: 滑动过程消耗的时间, 单位: 毫秒"""if not isinstance(distance, int) or distance < 0:raise ValueError(f"distance类型必须是大于等于0的整数: distance: {distance}, type: {type(distance)}")# 初始化轨迹列表slide_track = [[random.randint(-50, -10), random.randint(-50, -10), 0],[0, 0, 0],]# 共记录count次滑块位置信息count = 30 + int(distance / 2)# 初始化滑动时间t = random.randint(50, 100)# 记录上一次滑动的距离_x = 0_y = 0for i in range(count):# 已滑动的横向距离x = round(self.__ease_out_expo(i / count) * distance)# 滑动过程消耗的时间t += random.randint(10, 20)if x == _x:continueslide_track.append([x, _y, t])_x = xslide_track.append(slide_track[-1])return slide_track# 顶象测试def dx_test(self):url = 'https://www.dingxiang-/business/captcha?utm_source=baidu1sem&utm_medium=%E4%BA%A7%E5%93%81%E8%AF%8D&utm_campaign=%E4%BA%A7%E5%93%81%E8%AF%8D-%E9%AA%8C%E8%AF%81%E7%A0%81&utm_term=%E9%AA%8C%E8%AF%81%E7%A0%81&e_matchtype=1&e_keywordid=317912325143&bd_vid=9281303599717237405'# Chrome浏览器self.driver.get(url=url)# //li[@class="item-2"]/h3self.driver.maximize_window()# 滑动到最底部js_button = 'q=document.body.scrollTop=500'# 执行js,滑动到最底部self.driver.execute_script(js_button)self.driver.find_element(By.XPATH,'//li[@class="item-2"]/h3').click()time.sleep(2)self.driver.find_element(By.XPATH,'//div[@id="dx_captcha_oneclick_bar-inform_3"]').click()time.sleep(2)self.download_yzm()distence = onnx_model_main('1.png')drow_rectangle(distence,'1.png')time.sleep(1)distence = int(distence['leftTop'][0])-30source = self.driver.find_element(By.XPATH,'//div[@id="dx_captcha_basic_slider-img-animated-wrap_4"]/span[1]')action = ActionChains(self.driver,duration=20)action.click_and_hold(source)a = 0time.sleep(1)for x in self.generate_tracks(distence):print(x)action.move_by_offset(xoffset=x[0] - a, yoffset=x[1])a = x[0]time.sleep(0.5)action.release().perform()input()# 网易测试def wy_test(self):url = '/trial/jigsaw' # Chrome浏览器self.driver.get(url=url)self.driver.maximize_window()js_button = 'q=document.body.scrollTop=500'# 执行js,滑动到最底部self.driver.execute_script(js_button)self.driver.find_element(By.XPATH,'//li[@class="tcapt-tabs__tab"][1]').click()time.sleep(2)self.driver.find_element(By.XPATH, '//button[@class="yidun_refresh"]').click()html = self.driver.page_sourcehtml = etree.HTML(html)url = html.xpath('//img[@class="yidun_bg-img"]/@src')[0]response = requests.get(url)print(url)with open('1.png', 'wb')as f:f.write(response.content)time.sleep(2)input(11111111111111)self.download_yzm()distence = onnx_model_main('1.png')drow_rectangle(distence,'1.png')time.sleep(1)distence = int(distence['leftTop'][0])-30source = self.driver.find_element(By.XPATH,'//div[@id="dx_captcha_basic_slider-img-animated-wrap_4"]/span[1]')action = ActionChains(self.driver,duration=20)action.click_and_hold(source)a = 0time.sleep(1)for x in self.generate_tracks(distence):print(x)action.move_by_offset(xoffset=x[0] - a, yoffset=x[1])a = x[0]time.sleep(0.5)action.release().perform()input()# 测试代码用到的下载图片的东西def download_yzm(self):js = "return document.querySelector('.dx_captcha_basic_bg > canvas').toDataURL('image/png')"image_data = self.driver.execute_script(js) # 执行js代码得到图片数据data = image_data.split(',')[1]image_data = base64.b64decode(data)with open('1.png', 'wb') as f:f.write(image_data)if __name__ == '__main__':a = Test()a.dx_test()# coordinate_onnx = onnx_model_main("1.png")# drow_rectangle(coordinate_onnx, "1.png")



2 、action = ActionChains(self.driver,duration=20) 这里报错


4、 self.driver = webdriver.Chrome(options=option)这里报错




2、删掉duration=20就好了 或 升级下selenium版本。

3、改成find_element_by_xpath(‘xxx’) 或 升级下selenium版本。

4、指定下chromedriver.exe位置 或 放在python目录下。




2、百度云下载: /s/1Nt2Z2Pu45RaQBs4dsS2kKg?pwd=82xg

提取码:82xg (老慢了)



github: /Bump-mann/simple_ocr











