QRCode_det / python /QRCode_onnx_infer_v8.py
wzf19947's picture
first commit
b5aeeeb
import onnxruntime as ort
import cv2
import numpy as np
import time
import yaml
import glob
import os
import pyzbar.pyzbar as pyzbar
names=['QRCode']
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
shape = im.shape[:2]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup:
r = min(r, 1.0)
ratio = r, r
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
if auto:
dw, dh = np.mod(dw, stride), np.mod(dh, stride)
elif scaleFill:
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]
dw /= 2
dh /= 2
if shape[::-1] != new_unpad:
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
return im, ratio, (dw, dh)
def data_process_cv2(frame, input_shape):
'''
对输入的图像进行预处理
:param frame:
:param input_shape:
:return:
'''
im0 = cv2.imread(frame)
img = letterbox(im0, input_shape, auto=False, stride=32)[0]
org_data = img.copy()
img = np.ascontiguousarray(img[:, :, ::-1].transpose(2, 0, 1))
img = np.asarray(img, dtype=np.float32)
img = np.expand_dims(img, 0)
img /= 255.0
return img, im0, org_data
def non_max_suppression(prediction,
conf_thres=0.25,
iou_thres=0.45,
classes=None,
agnostic=False,
multi_label=False,
labels=(),
max_det=300,
nm=0 # number of masks
):
"""
Perform Non-Maximum Suppression (NMS) on the boxes to filter out overlapping boxes.
Parameters:
prediction (ndarray): Predictions from the model.
conf_thres (float): Confidence threshold to filter boxes.
iou_thres (float): Intersection over Union (IoU) threshold for NMS.
classes (list): Filter boxes by classes.
agnostic (bool): If True, perform class-agnostic NMS.
multi_label (bool): If True, perform multi-label NMS.
labels (list): Labels for auto-labelling.
max_det (int): Maximum number of detections.
nm (int): Number of masks.
Returns:
list: A list of filtered boxes.
"""
bs = prediction.shape[0] # batch size
nc = prediction.shape[2] - nm - 5 # number of classes
xc = prediction[..., 4] > conf_thres # candidates
max_wh = 7680 # (pixels) maximum box width and height
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
time_limit = 0.5 + 0.05 * bs # seconds to quit after
# redundant = True # require redundant detections
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
# merge = False # use merge-NMS
t = time.time()
mi = 5 + nc # mask start index
output = [np.zeros((0, 6 + nm))] * bs
for xi, x in enumerate(prediction): # image index, image inference
# Apply constraints
x = x[xc[xi]] # confidence
# Cat apriori labels if autolabelling
if labels and len(labels[xi]):
lb = labels[xi]
v = np.zeros((len(lb), nc + nm + 5))
v[:, :4] = lb[:, 1:5] # box
v[:, 4] = 1.0 # conf
v[np.arange(len(lb)), lb[:, 0].astype(int) + 5] = 1.0 # cls
x = np.concatenate((x, v), 0)
# If none remain process next image
if not x.shape[0]:
continue
# Compute conf
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
# Box/Mask
box = xywh2xyxy(x[:, :4]) # center_x, center_y, width, height) to (x1, y1, x2, y2)
mask = x[:, mi:] # zero columns if no masks
# Detections matrix nx6 (xyxy, conf, cls)
if multi_label:
i, j = np.nonzero(x[:, 5:mi] > conf_thres)
x = np.concatenate((box[i], x[i, 5 + j][:, None], j[:, None].astype(float), mask[i]), 1)
else: # best class only
# conf = x[:, 5:mi].max(1, keepdims=True)
# j = x[:, 5:mi].argmax(1,keepdims=True)
conf = np.max(x[:, 5:mi], 1).reshape(box.shape[:1][0], 1)
j = np.argmax(x[:, 5:mi], 1).reshape(box.shape[:1][0], 1)
x = np.concatenate((box, conf, j.astype(float), mask), 1)[conf[:, 0] > conf_thres]
# Filter by class
if classes is not None:
x = x[(x[:, 5:6] == np.array(classes)[:, None]).any(1)]
# Check shape
n = x.shape[0] # number of boxes
if not n: # no boxes
continue
sorted_indices = np.argsort(x[:, 4])[::-1]
x = x[sorted_indices][:max_nms] # sort by confidence and remove excess boxes
# Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = nms(boxes, scores, iou_thres) # NMS
i = i[:max_det] # limit detections
output[xi] = x[i]
# if mps:
# output[xi] = output[xi].to(device)
if (time.time() - t) > time_limit:
# LOGGER.warning(f'WARNING NMS time limit {time_limit:.3f}s exceeded')
break # time limit exceeded
return output
# Define the function for NMS using numpy
def nms(boxes, scores, iou_threshold):
"""
Perform Non-Maximum Suppression (NMS) on the given boxes with scores using numpy.
Parameters:
boxes (ndarray): The bounding boxes, shaped (N, 4).
scores (ndarray): The confidence scores for each box, shaped (N,).
iou_threshold (float): The IoU threshold for suppressing overlapping boxes.
Returns:
ndarray: The indices of the selected boxes after NMS.
"""
if len(boxes) == 0:
return []
# Sort boxes by their scores
indices = np.argsort(scores)[::-1]
selected_indices = []
while len(indices) > 0:
# Select the box with the highest score
current_index = indices[0]
selected_indices.append(current_index)
# Compute IoU between the current box and all other boxes
current_box = boxes[current_index]
other_boxes = boxes[indices[1:]]
iou = calculate_iou(current_box, other_boxes)
# Remove boxes with IoU higher than the threshold
indices = indices[1:][iou <= iou_threshold]
return np.array(selected_indices)
def calculate_iou(box, boxes):
"""
Calculate the Intersection over Union (IoU) between a given box and a set of boxes.
Parameters:
box (ndarray): The coordinates of the first box, shaped (4,).
boxes (ndarray): The coordinates of the other boxes, shaped (N, 4).
Returns:
ndarray: The IoU between the given box and each box in the set, shaped (N,).
"""
# Calculate intersection coordinates
x1 = np.maximum(box[0], boxes[:, 0])
y1 = np.maximum(box[1], boxes[:, 1])
x2 = np.minimum(box[2], boxes[:, 2])
y2 = np.minimum(box[3], boxes[:, 3])
# Calculate intersection area
intersection_area = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
# Calculate areas of both bounding boxes
box_area = (box[2] - box[0]) * (box[3] - box[1])
boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
# Calculate IoU
iou = intersection_area / (box_area + boxes_area - intersection_area)
return iou
# Define xywh2xyxy function for converting bounding box format
def xywh2xyxy(x):
"""
Convert bounding boxes from (center_x, center_y, width, height) to (x1, y1, x2, y2) format.
Parameters:
x (ndarray): Bounding boxes in (center_x, center_y, width, height) format, shaped (N, 4).
Returns:
ndarray: Bounding boxes in (x1, y1, x2, y2) format, shaped (N, 4).
"""
y = x.copy()
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
return y
def xyxy2xywh(x):
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
y = np.copy(x)
y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
y[:, 2] = x[:, 2] - x[:, 0] # width
y[:, 3] = x[:, 3] - x[:, 1] # height
return y
def post_process_yolo(det, im, im0, gn, save_path, img_name):
detections = []
if len(det):
det[:, :4] = scale_boxes(im.shape[:2], det[:, :4], im0.shape).round()
colors = Colors()
for *xyxy, conf, cls in reversed(det):
# print("class:",int(cls), "left:%.0f" % xyxy[0],"top:%.0f" % xyxy[1],"right:%.0f" % xyxy[2],"bottom:%.0f" % xyxy[3], "conf:",'{:.0f}%'.format(float(conf)*100))
int_coords = [int(tensor.item()) for tensor in xyxy]
# print(int_coords)
detections.append(int_coords)
# c = int(cls)
# label = names[c]
# res_img = plot_one_box(xyxy, im0, label=f'{label}:{conf:.2f}', color=colors(c, True), line_thickness=4)
# cv2.imwrite(f'{save_path}/{img_name}.jpg',res_img)
# xywh = (xyxy2xywh(np.array(xyxy,dtype=np.float32).reshape(1, 4)) / gn).reshape(-1).tolist() # normalized xywh
# line = (cls, *xywh) # label format
# with open(f'{save_path}/{img_name}.txt', 'a') as f:
# f.write(('%g ' * len(line)).rstrip() % line + '\n')
return detections
def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
if ratio_pad is None:
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
boxes[..., [0, 2]] -= pad[0]
boxes[..., [1, 3]] -= pad[1]
boxes[..., :4] /= gain
clip_boxes(boxes, img0_shape)
return boxes
def clip_boxes(boxes, shape):
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])
def yaml_load(file='coco128.yaml'):
with open(file, errors='ignore') as f:
return yaml.safe_load(f)
class Colors:
# Ultralytics color palette https://ultralytics.com/
def __init__(self):
"""
Initializes the Colors class with a palette derived from Ultralytics color scheme, converting hex codes to RGB.
Colors derived from `hex = matplotlib.colors.TABLEAU_COLORS.values()`.
"""
hexs = (
"FF3838",
"FF9D97",
"FF701F",
"FFB21D",
"CFD231",
"48F90A",
"92CC17",
"3DDB86",
"1A9334",
"00D4BB",
"2C99A8",
"00C2FF",
"344593",
"6473FF",
"0018EC",
"8438FF",
"520085",
"CB38FF",
"FF95C8",
"FF37C7",
)
self.palette = [self.hex2rgb(f"#{c}") for c in hexs]
self.n = len(self.palette)
def __call__(self, i, bgr=False):
"""Returns color from palette by index `i`, in BGR format if `bgr=True`, else RGB; `i` is an integer index."""
c = self.palette[int(i) % self.n]
return (c[2], c[1], c[0]) if bgr else c
@staticmethod
def hex2rgb(h):
"""Converts hex color codes to RGB values (i.e. default PIL order)."""
return tuple(int(h[1 + i: 1 + i + 2], 16) for i in (0, 2, 4))
def plot_one_box(x, im, color=None, label=None, line_thickness=3, steps=2, orig_shape=None):
assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.'
tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
cv2.rectangle(im, c1, c2, color, thickness=tl*1//3, lineType=cv2.LINE_AA)
if label:
if len(label.split(':')) > 1:
tf = max(tl - 1, 1)
t_size = cv2.getTextSize(label, 0, fontScale=tl / 6, thickness=tf)[0]
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA)
cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 6, [225, 255, 255], thickness=tf//2, lineType=cv2.LINE_AA)
return im
def model_load(model):
providers = ['CPUExecutionProvider']
session = ort.InferenceSession(model, providers=providers)
input_name = session.get_inputs()[0].name
output_names = [ x.name for x in session.get_outputs()]
return session, output_names
def make_anchors(feats, strides, grid_cell_offset=0.5):
"""Generate anchors from features."""
anchor_points, stride_tensor = [], []
assert feats is not None
dtype = feats[0].dtype
for i, stride in enumerate(strides):
_, _, h, w = feats[i].shape
sx = np.arange(w, dtype=dtype) + grid_cell_offset # shift x
sy = np.arange(h, dtype=dtype) + grid_cell_offset # shift y
sy, sx = np.meshgrid(sy, sx, indexing='ij')
anchor_points.append(np.stack((sx, sy), axis=-1).reshape(-1, 2))
stride_tensor.append(np.full((h * w, 1), stride, dtype=dtype))
return np.concatenate(anchor_points), np.concatenate(stride_tensor)
def dist2bbox(distance, anchor_points, xywh=True, dim=-1):
"""Transform distance(ltrb) to box(xywh or xyxy)."""
lt, rb = np.split(distance, 2, axis=dim)
x1y1 = anchor_points - lt
x2y2 = anchor_points + rb
if xywh:
c_xy = (x1y1 + x2y2) / 2
wh = x2y2 - x1y1
return np.concatenate((c_xy, wh), axis=dim) # xywh bbox
return np.concatenate((x1y1, x2y2), axis=dim) # xyxy bbox
class DFL:
"""
NumPy implementation of Distribution Focal Loss (DFL) integral module.
Original paper: Generalized Focal Loss (IEEE TPAMI 2023)
"""
def __init__(self, c1=16):
"""Initialize with given number of distribution channels"""
self.c1 = c1
# 初始化权重矩阵(等效于原conv层的固定权重)
self.weights = np.arange(c1, dtype=np.float32).reshape(1, c1, 1, 1)
def __call__(self, x):
"""
前向传播逻辑
参数:
x: 输入张量,形状为(batch, channels, anchors)
返回:
处理后的张量,形状为(batch, 4, anchors)
"""
b, c, a = x.shape
# 等效于原view->transpose->softmax操作
x_reshaped = x.reshape(b, 4, self.c1, a)
x_transposed = np.transpose(x_reshaped, (0, 2, 1, 3))
x_softmax = np.exp(x_transposed) / np.sum(np.exp(x_transposed), axis=1, keepdims=True)
# 等效卷积操作(通过张量乘积实现)
conv_result = np.sum(self.weights * x_softmax, axis=1)
return conv_result.reshape(b, 4, a)
class YOLOV8Detector:
def __init__(self, model_path, imgsz=[640,640]):
self.model_path = model_path
self.session, self.output_names = model_load(self.model_path)
self.imgsz = imgsz
self.stride = [8.,16.,32.]
self.reg_max = 16
self.nc = 1
self.no = self.nc + self.reg_max * 4
self.dfl = DFL(self.reg_max)
def detect_objects(self, image, save_path):
im, im0, org_data = data_process_cv2(image, self.imgsz)
img_name = os.path.basename(image).split('.')[0]
infer_start_time = time.time()
x = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
infer_end_time = time.time()
print(f"infer time: {infer_end_time - infer_start_time:.4f}s")
x = [np.transpose(x[i],(0,3,1,2)) for i in range(3)] #to nchw
anchors,strides = (np.transpose(x,(1, 0)) for x in make_anchors(x, self.stride, 0.5))
x_cat = np.concatenate([xi.reshape(1, self.no, -1) for xi in x], axis=2)
box = x_cat[:, :self.reg_max * 4,:]
cls = x_cat[:, self.reg_max * 4:,:]
dbox = dist2bbox(self.dfl(box), np.expand_dims(anchors, axis=0), xywh=True, dim=1) * strides
y = np.concatenate((dbox, 1/(1 + np.exp(-cls))), axis=1)
pred = y.transpose([0, 2, 1])
pred_class = pred[..., 4:]
pred_conf = np.max(pred_class, axis=-1)
pred = np.insert(pred, 4, pred_conf, axis=-1)
pred = non_max_suppression(pred, conf_thres=0.25, iou_thres=0.45, max_det=1000)
gn = np.array(org_data.shape)[[1, 0, 1, 0]].astype(np.float32)
res = post_process_yolo(pred[0], org_data, im0, gn, save_path, img_name)
return res, im0
class QRCodeDecoder:
def crop_qr_regions(self, image, regions):
"""
根据检测到的边界框裁剪二维码区域
"""
cropped_images = []
for idx, region in enumerate(regions):
x1, y1, x2, y2 = region
# 外扩15个像素缓解因检测截断造成无法识别的情况,视检测情况而定
x1-=15
y1-=15
x2+=15
y2+=15
# 裁剪图像
cropped = image[y1:y2, x1:x2]
if cropped.size > 0:
cropped_images.append({
'image': cropped,
'bbox': region,
})
# cv2.imwrite(f'cropped_qr_{idx}.jpg', cropped)
return cropped_images
def decode_qrcode_pyzbar(self, cropped_image):
"""
使用pyzbar解码二维码
"""
try:
# 转换为灰度图像
if len(cropped_image.shape) == 3:
gray = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
else:
gray = cropped_image
# cv2.imwrite('cropped_gray.jpg',gray)
# 使用pyzbar解码
decoded_objects = pyzbar.decode(gray)
results = []
for obj in decoded_objects:
try:
data = obj.data.decode('utf-8')
results.append({
'data': data,
'type': obj.type,
'points': obj.polygon
})
except:
continue
return results
except Exception as e:
print(f"decode error: {e}")
return []
if __name__ == '__main__':
import time
detector = YOLOV8Detector(model_path='./yolov8n.onnx',imgsz=[640,640])
decoder = QRCodeDecoder()
img_path = './images'
det_path='./v8_det_res'
crop_path='./v8_crop_res'
os.makedirs(det_path, exist_ok=True)
os.makedirs(crop_path, exist_ok=True)
imgs = glob.glob(f"{img_path}/*.jpg")
totoal = len(imgs)
success = 0
fail = 0
start_time = time.time()
for idx,img in enumerate(imgs):
pic_name=os.path.basename(img).split('.')[0]
loop_start_time = time.time()
det_result, res_img = detector.detect_objects(img,det_path)
# cv2.imwrite(os.path.join(det_path, pic_name+'.jpg'), res_img)
# Crop deteted QRCode & decode QRCode by pyzbar
cropped_images = decoder.crop_qr_regions(res_img, det_result)
# for i,cropped in enumerate(cropped_images):
# cv2.imwrite(os.path.join(crop_path, f'{pic_name}_crop_{i}.jpg'), cropped['image'])
all_decoded_results = []
for i, cropped_data in enumerate(cropped_images):
decoded_results = decoder.decode_qrcode_pyzbar(cropped_data['image'])
all_decoded_results.extend(decoded_results)
# for result in decoded_results:
# print(f"decode result: {result['data']} (type: {result['type']})")
if all_decoded_results:
success += 1
print("识别成功!")
else:
fail += 1
print("识别失败!")
loop_end_time = time.time()
print(f"图片 {img} 处理耗时: {loop_end_time - loop_start_time:.4f} 秒")
end_time = time.time() # 记录总结束时间
total_time = end_time - start_time # 记录总耗时
print(f"总共测试图片数量: {totoal}")
print(f"识别成功数量: {success}")
print(f"识别失败数量: {fail}")
print(f"识别成功率: {success/totoal*100:.2f}%")
print(f"整体处理耗时: {total_time:.4f} 秒")
print(f"平均每张图片处理耗时: {total_time/totoal:.4f} 秒")