#!/usr/bin/env python
# -*- coding: utf-8 -*-
import _init_paths
import caffe
import cv2
import numpy as np
def bbreg(boundingbox, reg):
reg = reg.T
# calibrate bouding boxes
if reg.shape[1] == 1:
pass # reshape of reg
w = boundingbox[:,2] - boundingbox[:,0] + 1
h = boundingbox[:,3] - boundingbox[:,1] + 1
bb0 = boundingbox[:,0] + reg[:,0]*w
bb1 = boundingbox[:,1] + reg[:,1]*h
bb2 = boundingbox[:,2] + reg[:,2]*w
bb3 = boundingbox[:,3] + reg[:,3]*h
boundingbox[:,0:4] = np.array([bb0, bb1, bb2, bb3]).T
#print "bb", boundingbox
return boundingbox
def pad(boxesA, w, h):
boxes = boxesA.copy() # shit, value parameter!!!
tmph = boxes[:,3] - boxes[:,1] + 1
tmpw = boxes[:,2] - boxes[:,0] + 1
numbox = boxes.shape[0]
dx = np.ones(numbox)
dy = np.ones(numbox)
edx = tmpw
edy = tmph
x = boxes[:,0:1][:,0]
y = boxes[:,1:2][:,0]
ex = boxes[:,2:3][:,0]
ey = boxes[:,3:4][:,0]
tmp = np.where(ex > w)[0]
if tmp.shape[0] != 0:
edx[tmp] = -ex[tmp] + w-1 + tmpw[tmp]
ex[tmp] = w-1
tmp = np.where(ey > h)[0]
if tmp.shape[0] != 0:
edy[tmp] = -ey[tmp] + h-1 + tmph[tmp]
ey[tmp] = h-1
tmp = np.where(x < 1)[0]
if tmp.shape[0] != 0:
dx[tmp] = 2 - x[tmp]
x[tmp] = np.ones_like(x[tmp])
tmp = np.where(y < 1)[0]
if tmp.shape[0] != 0:
dy[tmp] = 2 - y[tmp]
y[tmp] = np.ones_like(y[tmp])
# for python index from 0, while matlab from 1
dy = np.maximum(0, dy-1)
dx = np.maximum(0, dx-1)
y = np.maximum(0, y-1)
x = np.maximum(0, x-1)
edy = np.maximum(0, edy-1)
edx = np.maximum(0, edx-1)
ey = np.maximum(0, ey-1)
ex = np.maximum(0, ex-1)
#print 'boxes', boxes
return [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]
def rerec(bboxA):
# convert bboxA to square
w = bboxA[:,2] - bboxA[:,0]
h = bboxA[:,3] - bboxA[:,1]
l = np.maximum(w,h).T
bboxA[:,0] = bboxA[:,0] + w*0.5 - l*0.5
bboxA[:,1] = bboxA[:,1] + h*0.5 - l*0.5
bboxA[:,2:4] = bboxA[:,0:2] + np.repeat([l], 2, axis = 0).T
return bboxA
def nms(boxes, threshold, type):
"""nms
:boxes: [:,0:5]
:threshold: 0.5 like
:type: 'Min' or others
:returns: TODO"""
if boxes.shape[0] == 0:
return np.array([])
x1 = boxes[:,0]
y1 = boxes[:,1]
x2 = boxes[:,2]
y2 = boxes[:,3]
s = boxes[:,4]
area = np.multiply(x2-x1+1, y2-y1+1)
I = np.array(s.argsort()) # read s using I
pick = [];
while len(I) > 0:
xx1 = np.maximum(x1[I[-1]], x1[I[0:-1]])
yy1 = np.maximum(y1[I[-1]], y1[I[0:-1]])
xx2 = np.minimum(x2[I[-1]], x2[I[0:-1]])
yy2 = np.minimum(y2[I[-1]], y2[I[0:-1]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
if type == 'Min':
o = inter / np.minimum(area[I[-1]], area[I[0:-1]])
else:
o = inter / (area[I[-1]] + area[I[0:-1]] - inter)
pick.append(I[-1])
I = I[np.where( o <= threshold)[0]]
return pick
def generateBoundingBox(map, reg, scale, t):
stride = 2
cellsize = 12
map = map.T
dx1 = reg[0,:,:].T
dy1 = reg[1,:,:].T
dx2 = reg[2,:,:].T
dy2 = reg[3,:,:].T
(x, y) = np.where(map >= t)
yy = y
xx = x
'''
if y.shape[0] == 1: # only one point exceed threshold
y = y.T
x = x.T
score = map[x,y].T
dx1 = dx1.T
dy1 = dy1.T
dx2 = dx2.T
dy2 = dy2.T
# a little stange, when there is only one bb created by PNet
#print "1: x,y", x,y
a = (x*map.shape[1]) + (y+1)
x = a/map.shape[0]
y = a%map.shape[0] - 1
#print "2: x,y", x,y
else:
score = map[x,y]'''
score = map[x,y]
reg = np.array([dx1[x,y], dy1[x,y], dx2[x,y], dy2[x,y]])
if reg.shape[0] == 0:
pass
boundingbox = np.array([yy, xx]).T
bb1 = np.fix((stride * (boundingbox) + 1) / scale).T # matlab index from 1, so with "boundingbox-1"
bb2 = np.fix((stride * (boundingbox) + cellsize - 1 + 1) / scale).T # while python don't have to
score = np.array([score])
boundingbox_out = np.concatenate((bb1, bb2, score, reg), axis=0)
return boundingbox_out.T
def drawBoxes(im, boxes):
x1 = boxes[:,0]
y1 = boxes[:,1]
x2 = boxes[:,2]
y2 = boxes[:,3]
for i in range(x1.shape[0]):
cv2.rectangle(im, (int(x1[i]), int(y1[i])), (int(x2[i]), int(y2[i])), (0,255,0), 1)
return im
def drawPoints(im,points):
for i in range(points.shape[0]):
left_eye = (int(points[i][0]),int(points[i][5]))
cv2.circle(im, left_eye,2, (0,0,255), 2)
right_eye = (int(points[i][1]),int(points[i][6]))
cv2.circle(im, right_eye,2, (0,0,255), 2)
nose = (int(points[i][2]),int(points[i][7]))
cv2.circle(im, nose,2, (0,0,255), 2)
left_mouth = (int(points[i][3]),int(points[i][8]))
cv2.circle(im, left_mouth,2, (0,0,255), 2)
right_mouth = (int(points[i][4]),int(points[i][9]))
cv2.circle(im, right_mouth,2, (0,0,255), 2)
return im
def drawPatch(im,points):
for i in range(points.shape[0]):
left_eye = (int(points[i][0]),int(points[i][5]))
right_eye = (int(points[i][1]),int(points[i][6]))
nose = (int(points[i][2]),int(points[i][7]))
left_mouth = (int(points[i][3]),int(points[i][8]))
right_mouth = (int(points[i][4]),int(points[i][9]))
eye_length = np.sqrt((left_eye[0]-right_eye[0])*(left_eye[0]-right_eye[0])+(left_eye[1]-right_eye[1])*(left_eye[1]-right_eye[1]))
mouth_length = np.sqrt((left_mouth[0]-right_mouth[0])*(left_mouth[0]-right_mouth[0])+(left_mouth[1]-right_mouth[1])*(left_mouth[1]-right_mouth[1]))
t11_x = left_eye[0]
t11_y = left_eye[1] - 0.8*eye_length
t12_x = left_eye[0] + eye_length
t12_y = left_eye[1] - 0.4*eye_length
cv2.rectangle(im, (int(t11_x), int(t11_y)), (int(t12_x), int(t12_y)), (0,255,0), 1)
t21_x = (left_eye[0] + right_eye[0])/2 - 0.15*eye_length
t21_y = (left_eye[1] + right_eye[1])/2 - 0.3*eye_length
t22_x = (left_eye[0] + right_eye[0])/2 + 0.15*eye_length
t22_y = (left_eye[1] + right_eye[1])/2
cv2.rectangle(im, (int(t21_x), int(t21_y)), (int(t22_x), int(t22_y)), (0,255,0), 1)
t31_x = (left_eye[0] +nose[0])/2-0.1*eye_length
t31_y = ((left_eye[1] + right_eye[1])/2 + nose[1])/2 - 0.1*eye_length
t32_x = (left_eye[0] +nose[0])/2+0.1*eye_length
t32_y = ((left_eye[1] + right_eye[1])/2 + nose[1])/2 + 0.1*eye_length
cv2.rectangle(im, (int(t31_x), int(t31_y)), (int(t32_x), int(t32_y)), (0,255,0), 1)
t41_x = (right_eye[0] +nose[0])/2-0.1*eye_length
t41_y = ((left_eye[1] + right_eye[1])/2 + nose[1])/2 - 0.1*eye_length
t42_x = (right_eye[0] +nose[0])/2+0.1*eye_length
t42_y = ((left_eye[1] + right_eye[1])/2 + nose[1])/2 + 0.1*eye_length
cv2.rectangle(im, (int(t41_x), int(t41_y)), (int(t42_x), int(t42_y)), (0,255,0), 1)
t51_x = nose[0]-0.1*eye_length
t51_y = nose[1]-0.1*eye_length
t52_x = nose[0]+0.1*eye_length
t52_y = nose[1]+0.1*eye_length
cv2.rectangle(im, (int(t51_x), int(t51_y)), (int(t52_x), int(t52_y)), (0,255,0), 1)
t61_x = nose[0]-0.1*eye_length
t61_y = nose[1]+0.2*eye_length
t62_x = nose[0]+0.1*eye_length
t62_y = nose[1]+0.4*eye_length
cv2.rectangle(im, (int(t61_x), int(t61_y)), (int(t62_x), int(t62_y)), (0,255,0), 1)
t71_x = left_mouth[0] - 0.7*mouth_length
t71_y = left_mouth[1] - mouth_length
t72_x = left_mouth[0]
t72_y = left_mouth[1]
cv2.rectangle(im, (int(t71_x), int(t71_y)), (int(t72_x), int(t72_y)), (0,255,0), 1)
t81_x = right_mouth[0]
t81_y = right_mouth[1] - mouth_length
t82_x = right_mouth[0] + 0.7*mouth_length
t82_y = left_mouth[1]
cv2.rectangle(im, (int(t81_x), int(t81_y)), (int(t82_x), int(t82_y)), (0,255,0), 1)
return im
from time import time
_tstart_stack = []
def tic():
_tstart_stack.append(time())
def toc(fmt="Elapsed: %s s"):
print fmt % (time()-_tstart_stack.pop())
def detect_face(img, minsize, PNet, RNet, ONet, threshold, fastresize, factor):
img2 = img.copy()
factor_count = 0
total_boxes = np.zeros((0,9), np.float)
points = []
h = img.shape[0]
w = img.shape[1]
minl = min(h, w)
img = img.astype(float)
m = 12.0/minsize
minl = minl*m
# create scale pyramid
scales = []
while minl >= 12:
scales.append(m * pow(factor, factor_count))
minl *= factor
factor_count += 1
# first stage
for scale in scales:
hs = int(np.ceil(h*scale))
ws = int(np.ceil(w*scale))
if fastresize:
im_data = (img-127.5)*0.0078125 # [0,255] -> [-1,1]
im_data = cv2.resize(im_data, (ws,hs)) # default is bilinear
else:
im_data = cv2.resize(img, (ws,hs)) # default is bilinear
im_data = (im_data-127.5)*0.0078125 # [0,255] -> [-1,1]
im_data = np.swapaxes(im_data, 0, 2)
im_data = np.array([im_data], dtype = np.float)
PNet.blobs['data'].reshape(1, 3, ws, hs)
PNet.blobs['data'].data[...] = im_data
out = PNet.forward()
boxes = generateBoundingBox(out['prob1'][0,1,:,:], out['conv4-2'][0], scale, threshold[0])
if boxes.shape[0] != 0:
pick = nms(boxes, 0.5, 'Union')
if len(pick) > 0 :
boxes = boxes[pick, :]
if boxes.shape[0] != 0:
total_boxes = np.concatenate((total_boxes, boxes), axis=0)
#####
# 1 #
#####
numbox = total_boxes.shape[0]
if numbox > 0:
# nms
pick = nms(total_boxes, 0.7, 'Union')
total_boxes = total_boxes[pick, :]
# revise and convert to square
regh = total_boxes[:,3] - total_boxes[:,1]
regw = total_boxes[:,2] - total_boxes[:,0]
t1 = total_boxes[:,0] + total_boxes[:,5]*regw
t2 = total_boxes[:,1] + total_boxes[:,6]*regh
t3 = total_boxes[:,2] + total_boxes[:,7]*regw
t4 = total_boxes[:,3] + total_boxes[:,8]*regh
t5 = total_boxes[:,4]
total_boxes = np.array([t1,t2,t3,t4,t5]).T
total_boxes = rerec(total_boxes) # convert box to square
total_boxes[:,0:4] = np.fix(total_boxes[:,0:4])
[dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(total_boxes, w, h)
#print total_boxes.shape
#print total_boxes
numbox = total_boxes.shape[0]
if numbox > 0:
# second stage
# construct input for RNet
tempimg = np.zeros((numbox, 24, 24, 3)) # (24, 24, 3, numbox)
for k in range(numbox):
tmp = np.zeros((tmph[k], tmpw[k],3))
tmp[dy[k]:edy[k]+1, dx[k]:edx[k]+1] = img[y[k]:ey[k]+1, x[k]:ex[k]+1]
tempimg[k,:,:,:] = cv2.resize(tmp, (24, 24))
tempimg = (tempimg-127.5)*0.0078125 # done in imResample function wrapped by python
# RNet
tempimg = np.swapaxes(tempimg, 1, 3)
RNet.blobs['data'].reshape(numbox, 3, 24, 24)
RNet.blobs['data'].data[...] = tempimg
out = RNet.forward()
score = out['prob1'][:,1]
pass_t = np.where(score>threshold[1])[0]
score = np.array([score[pass_t]]).T
total_boxes = np.concatenate( (total_boxes[pass_t, 0:4], score), axis = 1)
mv = out['conv5-2'][pass_t, :].T
if total_boxes.shape[0] > 0:
pick = nms(total_boxes, 0.7, 'Union')
if len(pick) > 0 :
total_boxes = total_boxes[pick, :]
total_boxes = bbreg(total_boxes, mv[:, pick])
total_boxes = rerec(total_boxes)
#####
# 2 #
#####
numbox = total_boxes.shape[0]
if numbox > 0:
# third stage
total_boxes = np.fix(total_boxes)
[dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(total_boxes, w, h)
tempimg = np.zeros((numbox, 48, 48, 3))
for k in range(numbox):
tmp = np.zeros((tmph[k], tmpw[k],3))
tmp[dy[k]:edy[k]+1, dx[k]:edx[k]+1] = img[y[k]:ey[k]+1, x[k]:ex[k]+1]
tempimg[k,:,:,:] = cv2.resize(tmp, (48, 48))
tempimg = (tempimg-127.5)*0.0078125 # [0,255] -> [-1,1]
# ONet
tempimg = np.swapaxes(tempimg, 1, 3)
ONet.blobs['data'].reshape(numbox, 3, 48, 48)
ONet.blobs['data'].data[...] = tempimg
out = ONet.forward()
score = out['prob1'][:,1]
points = out['conv6-3']
pass_t = np.where(score>threshold[2])[0]
points = points[pass_t, :]
score = np.array([score[pass_t]]).T
total_boxes = np.concatenate( (total_boxes[pass_t, 0:4], score), axis=1)
mv = out['conv6-2'][pass_t, :].T
w = total_boxes[:,3] - total_boxes[:,1] + 1
h = total_boxes[:,2] - total_boxes[:,0] + 1
points[:, 0:5] = np.tile(w, (5,1)).T * points[:, 0:5] + np.tile(total_boxes[:,0], (5,1)).T - 1
points[:, 5:10] = np.tile(h, (5,1)).T * points[:, 5:10] + np.tile(total_boxes[:,1], (5,1)).T -1
if total_boxes.shape[0] > 0:
total_boxes = bbreg(total_boxes, mv[:,:])
pick = nms(total_boxes, 0.7, 'Min')
if len(pick) > 0 :
total_boxes = total_boxes[pick, :]
points = points[pick, :]
#####
# 3 #
#####
return total_boxes, points
def initFaceDetector():
minsize = 20
caffe_model_path = "/home/duino/iactive/mtcnn/model"
threshold = [0.6, 0.7, 0.7]
factor = 0.709
caffe.set_mode_cpu()
PNet = caffe.Net(caffe_model_path+"/det1.prototxt", caffe_model_path+"/det1.caffemodel", caffe.TEST)
RNet = caffe.Net(caffe_model_path+"/det2.prototxt", caffe_model_path+"/det2.caffemodel", caffe.TEST)
ONet = caffe.Net(caffe_model_path+"/det3.prototxt", caffe_model_path+"/det3.caffemodel", caffe.TEST)
return (minsize, PNet, RNet, ONet, threshold, factor)
def haveFace(img, facedetector):
minsize = facedetector[0]
PNet = facedetector[1]
RNet = facedetector[2]
ONet = facedetector[3]
threshold = facedetector[4]
factor = facedetector[5]
if max(img.shape[0], img.shape[1]) < minsize:
return False, []
img_matlab = img.copy()
tmp = img_matlab[:,:,2].copy()
img_matlab[:,:,2] = img_matlab[:,:,0]
img_matlab[:,:,0] = tmp
#tic()
boundingboxes, points = detect_face(img_matlab, minsize, PNet, RNet, ONet, threshold, False, factor)
#toc()
containFace = (True, False)[boundingboxes.shape[0]==0]
return containFace, boundingboxes
def main():
minsize = 50
caffe_model_path = "/home/zou/mtcnn/model"
threshold = [0.6, 0.7, 0.7]
factor = 0.709
caffe.set_mode_gpu()
PNet = caffe.Net(caffe_model_path+"/det1.prototxt", caffe_model_path+"/det1.caffemodel", caffe.TEST)
RNet = caffe.Net(caffe_model_path+"/det2.prototxt", caffe_model_path+"/det2.caffemodel", caffe.TEST)
ONet = caffe.Net(caffe_model_path+"/det3.prototxt", caffe_model_path+"/det3.caffemodel", caffe.TEST)
camera = cv2.VideoCapture(0)
while True:
_, img = camera.read()
h,w = img.shape[:2]
if h>=w:
w = int(w/(h/500.0))
h = 500;
else:
h = int(h/(w/500.0))
w = 500
img = cv2.resize(img,(w,h))
img_matlab = img.copy()
tmp = img_matlab[:,:,2].copy()
img_matlab[:,:,2] = img_matlab[:,:,0]
img_matlab[:,:,0] = tmp
# check rgb position
tic()
boundingboxes, points = detect_face(img_matlab, minsize, PNet, RNet, ONet, threshold, False, factor)
toc()
if (len(boundingboxes)>0)&1:
img = drawBoxes(img, boundingboxes)
img = drawPoints(img, points)
if (len(boundingboxes)>0)&0:
img = drawBoxes(img, boundingboxes)
img = drawPatch(img, points)
cv2.imshow('img', img)
ch = cv2.waitKey(1)
if ch == 27:
break
#if boundingboxes.shape[0] > 0:
# error.append[imgpath]
#print error
if __name__ == "__main__":
main()