Browse Source

init project

zangruirui 8 years ago
parent
commit
75237aa108

+ 1 - 0
.gitignore

@@ -0,0 +1 @@
+models/*

+ 5 - 5
README.md

@@ -1,15 +1,15 @@
 # facenet_face_regonistant
 利用facenet实现检测图片中的人脸,将识别到的人脸向量存入数据库,此外利用post提交一个新图片 返回数据库中相似的人脸的信息
 
-###安装准备
-#####安装python包 
+### 安装准备
+##### 安装python包 
 按照requirements.txt中的包全部安装即可(其中mysql-connector-python 我采用的yum install 安装的)
 
-#####提前建立数据库 
+##### 提前建立数据库 
 建表语句再database.sql 
 (需要提前建立数据库,名字自己定义)
 
-#####模型准备
+##### 模型准备
 本项目是根据[facenet](https://github.com/davidsandberg/facenet)中提取关键的代码,将其进行封装使用
 所以需要提交下载facenect提供的模型 [模型地址](https://drive.google.com/file/d/0B5MzpY9kBtDVZ2RpVDYwWmxoSUk/edit) 需要可访问谷歌
 
@@ -17,7 +17,7 @@
 下载下来后按照models\facenet\20170512-110547  这个目录结构存放即可
 百度网盘链接 : 链接:http://pan.baidu.com/s/1i4YhAdB 密码:avbl
 
-###如何使用?
+### 如何使用?
 模拟post请求,如图所示
 图中依次是插入、查询时的场景
 

+ 10 - 0
database.sql

@@ -0,0 +1,10 @@
+CREATE TABLE `face_json` (
+  `id` int(32) NOT NULL AUTO_INCREMENT COMMENT 'id自增',
+  `ugroup` varchar(255) DEFAULT NULL COMMENT '用户群组',
+  `uid` varchar(64) DEFAULT NULL COMMENT '图片用户id',
+  `json` text COMMENT '人脸的向量',
+  `pic_name` varchar(255) DEFAULT NULL COMMENT '图片名称',
+  `date` datetime DEFAULT NULL COMMENT '插入时间',
+  `state` tinyint(1) DEFAULT NULL,
+  PRIMARY KEY (`id`)
+) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

+ 45 - 0
face_mysql.py

@@ -0,0 +1,45 @@
+# -*- coding:utf-8 -*-
+
+import mysql.connector
+import datetime
+
+
+class face_mysql:
+    def __init__(self):
+        pass
+    #设置数据库和密码
+    def conn_mysql(self):
+        db = mysql.connector.connect(user='root', password='123456', host='127.0.0.1', database='face_data')
+        return db
+
+    def insert_facejson(self, pic_name, pic_json, uid, ugroup):
+        db = self.conn_mysql()
+        cursor = db.cursor()
+        dt = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        sql = "insert into face_json(json,pic_name,date,state,uid,ugroup) values('%s' ,'%s','%s','%d','%s','%s') ;" % (
+            pic_json, pic_name, dt, 1, uid, ugroup)
+        #print("sql=",sql)
+        try:
+            # 执行sql语句
+            cursor.execute(sql)
+            # 提交到数据库执行
+            lastid = int(cursor.lastrowid)
+            db.commit()
+        except:
+            # Rollback in case there is any error
+            db.rollback()
+        db.close()
+        return lastid
+
+    def findall_facejson(self, ugroup):
+        db = self.conn_mysql()
+        cursor = db.cursor()
+
+        sql = "select * from face_json where state=1 and ugroup= '%s' ;" % (ugroup)
+        try:
+            cursor.execute(sql)
+            results = cursor.fetchall()
+            return results
+        except:
+            print("Error:unable to fecth data")
+        db.close()

+ 257 - 0
face_recognition_api.py

@@ -0,0 +1,257 @@
+# -*- coding:utf-8 -*-
+from flask import Flask, jsonify, abort, make_response, request, url_for
+from flask_httpauth import HTTPBasicAuth
+import json
+
+import os
+import ntpath
+import argparse
+
+import face_mysql
+import tensorflow as tf
+
+import src.facenet
+import src.align.detect_face
+import numpy as np
+from scipy import misc
+import matrix_fun
+
+import urllib
+
+app = Flask(__name__)
+# 图片最大为16M
+app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
+auth = HTTPBasicAuth()
+
+#设置最大的相似距离,1.22是facenet基于lfw计算得到的
+MAX_DISTINCT=1.22
+
+# 设置上传的图片路径和格式
+from werkzeug import secure_filename
+
+#设置post请求中获取的图片保存的路径
+UPLOAD_FOLDER = './pic_tmp/'
+if not os.path.exists(UPLOAD_FOLDER):
+    os.makedirs(UPLOAD_FOLDER)
+else:
+    pass
+ALLOWED_EXTENSIONS = set(['png', 'jpg', 'jpeg'])
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+
+
+def allowed_file(filename):
+    return '.' in filename and filename.rsplit('.', 1)[1] in ALLOWED_EXTENSIONS
+
+
+with tf.Graph().as_default():
+    gpu_memory_fraction = 1.0
+    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction)
+    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
+    with sess.as_default():
+        pnet, rnet, onet = src.align.detect_face.create_mtcnn(sess, None)
+
+#训练模型的路径
+modelpath = "./models/facenet/20170512-110547"
+with tf.Graph().as_default():
+    sess = tf.Session()
+    # src.facenet.load_model(modelpath)
+    # 加载模型
+    meta_file, ckpt_file = src.facenet.get_model_filenames(modelpath)
+    saver = tf.train.import_meta_graph(os.path.join(modelpath, meta_file))
+    saver.restore(sess, os.path.join(modelpath, ckpt_file))
+    # Get input and output tensors
+    images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
+    embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
+    phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
+
+    # 进行人脸识别,加载
+    print('Creating networks and loading parameters')
+
+    #获取post中的图片并执行插入到库 返回数据库中保存的id
+    @app.route('/face/insert', methods=['POST'])
+    def face_insert():
+        #分别获取post请求中的uid 和ugroup作为图片信息
+        uid = request.form['uid']
+        ugroup = request.form['ugroup']
+        upload_files = request.files['imagefile']
+
+        #从post请求图片保存到本地路径中
+        file = upload_files
+        if file and allowed_file(file.filename):
+            filename = secure_filename(file.filename)
+            file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
+        image_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+        print(image_path)
+
+
+        #opencv读取图片,开始进行人脸识别
+        img = misc.imread(os.path.expanduser(image_path), mode='RGB')
+        # 设置默认插入时 detect_multiple_faces =Flase只检测图中的一张人脸,True则检测人脸中的多张
+        #一般入库时只检测一张人脸,查询时检测多张人脸
+        images = image_array_align_data(img, image_path, pnet, rnet, onet, detect_multiple_faces=False)
+
+        feed_dict = {images_placeholder: images, phase_train_placeholder: False}
+        #emb_array保存的是经过facenet转换的128维的向量
+        emb_array = sess.run(embeddings, feed_dict=feed_dict)
+        filename_base, file_extension = os.path.splitext(image_path)
+        id_list = []
+        #存入数据库
+        for j in range(0, len(emb_array)):
+            face_mysql_instant = face_mysql.face_mysql()
+            last_id = face_mysql_instant.insert_facejson(filename_base + "_" + str(j),
+                                                         ",".join(str(li) for li in emb_array[j].tolist()), uid, ugroup)
+            id_list.append(str(last_id))
+
+        #设置返回类型
+        request_result = {}
+        request_result['id'] = ",".join(id_list)
+        if len(id_list) > 0:
+            request_result['state'] = 'sucess'
+        else:
+            request_result['state'] = 'error'
+
+        print(request_result)
+        return json.dumps(request_result)
+
+
+    @app.route('/face/query', methods=['POST'])
+    def face_query():
+
+        #获取查询条件  在ugroup中查找相似的人脸
+        ugroup = request.form['ugroup']
+        upload_files = request.files['imagefile']
+
+        #获取post请求的图片到本地
+        file = upload_files
+        if file and allowed_file(file.filename):
+            filename = secure_filename(file.filename)
+            file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
+        image_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+        print(image_path)
+
+        #读取本地的图片
+        img = misc.imread(os.path.expanduser(image_path), mode='RGB')
+        images = image_array_align_data(img, image_path, pnet, rnet, onet)
+
+        #判断如果如图没有检测到人脸则直接返回
+        if images.shape[0] == 1: return json.dumps({'error': "not found face"})
+
+        feed_dict = {images_placeholder: images, phase_train_placeholder: False}
+        emb_array = sess.run(embeddings, feed_dict=feed_dict)
+        face_query = matrix_fun.matrix()
+        #分别获取距离该图片中人脸最相近的人脸信息
+        # pic_min_scores 是数据库中人脸距离(facenet计算人脸相似度根据人脸距离进行的)
+        # pic_min_names 是当时入库时保存的文件名
+        # pic_min_uid  是对应的用户id
+        pic_min_scores, pic_min_names, pic_min_uid = face_query.get_socres(emb_array, ugroup)
+
+        #如果提交的query没有group 则返回
+        if len(pic_min_scores) == 0: return json.dumps({'error': "not found user group"})
+
+        #设置返回结果
+        result = []
+        for i in range(0, len(pic_min_scores)):
+            if pic_min_scores[i]<MAX_DISTINCT:
+                rdict = {'uid': pic_min_uid[i],
+                         'distance': pic_min_scores[i],
+                         'pic_name': pic_min_names[i] }
+            result.append(rdict)
+        print(result)
+        return json.dumps(result)
+
+
+#检测图片中的人脸  image_arr是opencv读取图片后的3维矩阵  返回图片中人脸的位置信息
+def image_array_align_data(image_arr, image_path, pnet, rnet, onet, image_size=160, margin=32, gpu_memory_fraction=1.0,
+                           detect_multiple_faces=True):
+    minsize = 20  # minimum size of face
+    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
+    factor = 0.709  # scale factor
+
+    img = image_arr
+    bounding_boxes, _ = src.align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
+    nrof_faces = bounding_boxes.shape[0]
+
+    nrof_successfully_aligned = 0
+    if nrof_faces > 0:
+        det = bounding_boxes[:, 0:4]
+        det_arr = []
+        img_size = np.asarray(img.shape)[0:2]
+        if nrof_faces > 1:
+            if detect_multiple_faces:
+                for i in range(nrof_faces):
+                    det_arr.append(np.squeeze(det[i]))
+            else:
+                bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1])
+                img_center = img_size / 2
+                offsets = np.vstack(
+                    [(det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0]])
+                offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
+                index = np.argmax(
+                    bounding_box_size - offset_dist_squared * 2.0)  # some extra weight on the centering
+                det_arr.append(det[index, :])
+        else:
+            det_arr.append(np.squeeze(det))
+
+        images = np.zeros((len(det_arr), image_size, image_size, 3))
+        for i, det in enumerate(det_arr):
+            det = np.squeeze(det)
+            bb = np.zeros(4, dtype=np.int32)
+            bb[0] = np.maximum(det[0] - margin / 2, 0)
+            bb[1] = np.maximum(det[1] - margin / 2, 0)
+            bb[2] = np.minimum(det[2] + margin / 2, img_size[1])
+            bb[3] = np.minimum(det[3] + margin / 2, img_size[0])
+            cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
+            # 进行图片缩放 cv2.resize(img,(w,h))
+            scaled = misc.imresize(cropped, (image_size, image_size), interp='bilinear')
+            nrof_successfully_aligned += 1
+
+            # 保存检测的头像
+            filename_base = './pic_tmp'
+            filename = os.path.basename(image_path)
+            filename_name, file_extension = os.path.splitext(filename)
+            #多个人脸时,在picname后加_0 _1 _2 依次累加。
+            output_filename_n = "{}/{}_{}{}".format(filename_base, filename_name, i, file_extension)
+            misc.imsave(output_filename_n, scaled)
+
+            scaled = src.facenet.prewhiten(scaled)
+            scaled = src.facenet.crop(scaled, False, 160)
+            scaled = src.facenet.flip(scaled, False)
+
+            images[i] = scaled
+    if nrof_faces > 0:
+        return images
+    else:
+        # 如果没有检测到人脸  直接返回一个1*3的0矩阵  多少维度都行  只要能和是不是一个图片辨别出来就行
+        return np.zeros((1, 3))
+
+
+# 备用 通过urllib的方式从远程地址获取一个图片到本地
+# 利用该方法可以提交一个图片的url地址,则也是先保存到本地再进行后续处理
+def get_url_imgae(picurl):
+    response = urllib.urlopen(picurl)
+    pic = response.read()
+    pic_name = "./pic_tmp/" + os.path.basename(picurl)
+    with open(pic_name, 'wb') as f:
+        f.write(pic)
+    return pic_name
+
+
+@auth.get_password
+def get_password(username):
+    if username == 'face':
+        return 'face'
+    return None
+
+
+@auth.error_handler
+def unauthorized():
+    return make_response(jsonify({'error': 'Unauthorized access'}), 401)
+
+
+@app.errorhandler(400)
+def not_found(error):
+    return make_response(jsonify({'error': 'Invalid data!'}), 400)
+
+
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=8088)

BIN
img/insert.png


BIN
img/query.png


BIN
img/query1.png


+ 71 - 0
matrix_fun.py

@@ -0,0 +1,71 @@
+# -*- coding:utf-8 -*-
+import numpy as np
+import face_mysql
+
+
+class matrix:
+    def __init__(self):
+        pass
+
+    # 两个矩阵的欧式距离
+    def EuclideanDistances(self, A, B):
+        BT = B.transpose()
+        # vecProd = A * BT
+        vecProd = np.dot(A, BT)
+        # print(vecProd)
+        SqA = A ** 2
+        # print(SqA)
+        sumSqA = np.matrix(np.sum(SqA, axis=1))
+        sumSqAEx = np.tile(sumSqA.transpose(), (1, vecProd.shape[1]))
+        # print(sumSqAEx)
+
+        SqB = B ** 2
+        sumSqB = np.sum(SqB, axis=1)
+        sumSqBEx = np.tile(sumSqB, (vecProd.shape[0], 1))
+        SqED = sumSqBEx + sumSqAEx - 2 * vecProd
+        SqED[SqED < 0] = 0.0
+        ED = np.sqrt(SqED)
+        return ED.transpose()
+    #
+    def get_socres(self, A, ugroup):
+        # 设置每次处理的最大数据库记录数
+        #如果数据库中记录太多时可分批进行处理
+        maxlen = 128
+
+        fmysql = face_mysql.face_mysql()
+        results = np.array(fmysql.findall_facejson(ugroup))
+
+        #如果没有数据库找到时 直接返回空的list
+        if results.shape[0] == 0: return [],[],[]
+
+        pic_scores_all = []
+        #获取数据库中的入库时的图片名称  pic_names在数据库中存的是数组列索引4这个位置
+        pic_names = results[:, 4]
+        #获取入库时图片对象的uid  pic_uid在数据库中存的是数组列索引2这个位置
+        pic_uid = results[:, 2]
+        for i in range(0, len(results), maxlen):
+            pic_vectors = results[i:i + maxlen, 3]
+            # 效率待优化,现在是每行处理
+            pic_vectors = [[float(j) for j in i.split(',')] for i in pic_vectors]
+            pic_socores = self.EuclideanDistances(A, np.array(pic_vectors))
+            pic_socores_list = np.array(pic_socores).tolist()
+
+            pic_scores_all.extend(pic_socores_list)
+        pic_scores_all = np.array(pic_scores_all).transpose()
+
+        # 获取距离最近的值
+        # np.argsort() 返回排序后的索引
+        pic_min_scores = np.amin(pic_scores_all, axis=1)
+        pic_min_names = []
+        pic_min_uid = []
+        for i in range(0, len(pic_min_scores)):
+            # 获取最小值的index
+            index = np.where(pic_scores_all[i] == pic_min_scores[i])
+            # print(int(index[0]))
+            # 有多个符合条件的只取第一个
+            pic_min_names.append(pic_names[index[0][0]])
+            pic_min_uid.append(pic_uid[index[0][0]])
+        # print(pic_min_names)
+        return pic_min_scores.tolist(), pic_min_names, pic_min_uid
+
+

+ 13 - 0
requirements.txt

@@ -0,0 +1,13 @@
+tensorflow==1.2
+scipy
+scikit-learn
+opencv-python
+h5py
+matplotlib
+Pillow
+requests
+psutil
+mysql-connector-python
+Werkzeug
+Flask
+Flask-HTTPAuth

+ 3 - 0
src/__init__.py

@@ -0,0 +1,3 @@
+# flake8: noqa
+
+

BIN
src/__init__.pyc


+ 0 - 0
src/align/__init__.py


BIN
src/align/__init__.pyc


BIN
src/align/__pycache__/__init__.cpython-36.pyc


BIN
src/align/__pycache__/detect_face.cpython-36.pyc


+ 137 - 0
src/align/align_dataset.py

@@ -0,0 +1,137 @@
+"""Performs face alignment and stores face thumbnails in the output directory."""
+
+# MIT License
+# 
+# Copyright (c) 2016 David Sandberg
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# 
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from scipy import misc
+import sys
+import os
+import argparse
+import random
+import align_dlib  # @UnresolvedImport
+import facenet
+
+def main(args):
+    align = align_dlib.AlignDlib(os.path.expanduser(args.dlib_face_predictor))
+    landmarkIndices = align_dlib.AlignDlib.OUTER_EYES_AND_NOSE
+    output_dir = os.path.expanduser(args.output_dir)
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    # Store some git revision info in a text file in the log directory
+    src_path,_ = os.path.split(os.path.realpath(__file__))
+    facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
+    dataset = facenet.get_dataset(args.input_dir)
+    random.shuffle(dataset)
+    # Scale the image such that the face fills the frame when cropped to crop_size
+    scale = float(args.face_size) / args.image_size
+    nrof_images_total = 0
+    nrof_prealigned_images = 0
+    nrof_successfully_aligned = 0
+    for cls in dataset:
+        output_class_dir = os.path.join(output_dir, cls.name)
+        if not os.path.exists(output_class_dir):
+            os.makedirs(output_class_dir)
+        random.shuffle(cls.image_paths)
+        for image_path in cls.image_paths:
+            nrof_images_total += 1
+            filename = os.path.splitext(os.path.split(image_path)[1])[0]
+            output_filename = os.path.join(output_class_dir, filename+'.png')
+            if not os.path.exists(output_filename):
+                try:
+                    img = misc.imread(image_path)
+                except (IOError, ValueError, IndexError) as e:
+                    errorMessage = '{}: {}'.format(image_path, e)
+                    print(errorMessage)
+                else:
+                    if img.ndim == 2:
+                        img = facenet.to_rgb(img)
+                    if args.use_center_crop:
+                        scaled = misc.imresize(img, args.prealigned_scale, interp='bilinear')
+                        sz1 = scaled.shape[1]/2
+                        sz2 = args.image_size/2
+                        aligned = scaled[(sz1-sz2):(sz1+sz2),(sz1-sz2):(sz1+sz2),:]
+                    else:
+                        aligned = align.align(args.image_size, img, landmarkIndices=landmarkIndices, 
+                                              skipMulti=False, scale=scale)
+                    if aligned is not None:
+                        print(image_path)
+                        nrof_successfully_aligned += 1
+                        misc.imsave(output_filename, aligned)
+                    elif args.prealigned_dir:
+                        # Face detection failed. Use center crop from pre-aligned dataset
+                        class_name = os.path.split(output_class_dir)[1]
+                        image_path_without_ext = os.path.join(os.path.expanduser(args.prealigned_dir), 
+                                                              class_name, filename)
+                        # Find the extension of the image
+                        exts = ('jpg', 'png')
+                        for ext in exts:
+                            temp_path = image_path_without_ext + '.' + ext
+                            image_path = ''
+                            if os.path.exists(temp_path):
+                                image_path = temp_path
+                                break
+                        try:
+                            img = misc.imread(image_path)
+                        except (IOError, ValueError, IndexError) as e:
+                            errorMessage = '{}: {}'.format(image_path, e)
+                            print(errorMessage)
+                        else:
+                            scaled = misc.imresize(img, args.prealigned_scale, interp='bilinear')
+                            sz1 = scaled.shape[1]/2
+                            sz2 = args.image_size/2
+                            cropped = scaled[(sz1-sz2):(sz1+sz2),(sz1-sz2):(sz1+sz2),:]
+                            print(image_path)
+                            nrof_prealigned_images += 1
+                            misc.imsave(output_filename, cropped)
+                    else:
+                        print('Unable to align "%s"' % image_path)
+                            
+    print('Total number of images: %d' % nrof_images_total)
+    print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
+    print('Number of pre-aligned images: %d' % nrof_prealigned_images)
+            
+
+def parse_arguments(argv):
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument('input_dir', type=str, help='Directory with unaligned images.')
+    parser.add_argument('output_dir', type=str, help='Directory with aligned face thumbnails.')
+    parser.add_argument('--dlib_face_predictor', type=str,
+        help='File containing the dlib face predictor.', default='../data/shape_predictor_68_face_landmarks.dat')
+    parser.add_argument('--image_size', type=int,
+        help='Image size (height, width) in pixels.', default=110)
+    parser.add_argument('--face_size', type=int,
+        help='Size of the face thumbnail (height, width) in pixels.', default=96)
+    parser.add_argument('--use_center_crop', 
+        help='Use the center crop of the original image after scaling the image using prealigned_scale.', action='store_true')
+    parser.add_argument('--prealigned_dir', type=str,
+        help='Replace image with a pre-aligned version when face detection fails.', default='')
+    parser.add_argument('--prealigned_scale', type=float,
+        help='The amount of scaling to apply to prealigned images before taking the center crop.', default=0.87)
+    return parser.parse_args(argv)
+
+if __name__ == '__main__':
+    main(parse_arguments(sys.argv[1:]))

+ 162 - 0
src/align/align_dataset_mtcnn.py

@@ -0,0 +1,162 @@
+"""Performs face alignment and stores face thumbnails in the output directory."""
+# MIT License
+# 
+# Copyright (c) 2016 David Sandberg
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# 
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from scipy import misc
+import sys
+import os
+import argparse
+import tensorflow as tf
+import numpy as np
+import facenet
+import align.detect_face
+import random
+from time import sleep
+
+def main(args):
+    sleep(random.random())
+    output_dir = os.path.expanduser(args.output_dir)
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    # Store some git revision info in a text file in the log directory
+    src_path,_ = os.path.split(os.path.realpath(__file__))
+    facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
+    dataset = facenet.get_dataset(args.input_dir)
+    
+    print('Creating networks and loading parameters')
+    
+    with tf.Graph().as_default():
+        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
+        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
+        with sess.as_default():
+            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)
+    
+    minsize = 20 # minimum size of face
+    threshold = [ 0.6, 0.7, 0.7 ]  # three steps's threshold
+    factor = 0.709 # scale factor
+
+    # Add a random key to the filename to allow alignment using multiple processes
+    random_key = np.random.randint(0, high=99999)
+    bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key)
+    
+    with open(bounding_boxes_filename, "w") as text_file:
+        nrof_images_total = 0
+        nrof_successfully_aligned = 0
+        if args.random_order:
+            random.shuffle(dataset)
+        for cls in dataset:
+            output_class_dir = os.path.join(output_dir, cls.name)
+            if not os.path.exists(output_class_dir):
+                os.makedirs(output_class_dir)
+                if args.random_order:
+                    random.shuffle(cls.image_paths)
+            for image_path in cls.image_paths:
+                nrof_images_total += 1
+                filename = os.path.splitext(os.path.split(image_path)[1])[0]
+                output_filename = os.path.join(output_class_dir, filename+'.png')
+                print(image_path)
+                if not os.path.exists(output_filename):
+                    try:
+                        img = misc.imread(image_path)
+                    except (IOError, ValueError, IndexError) as e:
+                        errorMessage = '{}: {}'.format(image_path, e)
+                        print(errorMessage)
+                    else:
+                        if img.ndim<2:
+                            print('Unable to align "%s"' % image_path)
+                            text_file.write('%s\n' % (output_filename))
+                            continue
+                        #判断是否为2维数组(即为灰度图像) 转换为三通道
+                        if img.ndim == 2:
+                            img = facenet.to_rgb(img)
+                        #分离RGB通道  依次是bgr 蓝绿红
+                        img = img[:,:,0:3]
+
+
+                        #bounding_boxes 保存的是检测出的人脸是一个n*5的数组 n表示检测出的人脸个数
+                        bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
+                        #获取人脸的个数
+                        nrof_faces = bounding_boxes.shape[0]
+                        if nrof_faces>0:
+                            det = bounding_boxes[:,0:4]
+                            det_arr = []
+                            img_size = np.asarray(img.shape)[0:2]
+                            if nrof_faces>1:
+                                if args.detect_multiple_faces:
+                                    for i in range(nrof_faces):
+                                        det_arr.append(np.squeeze(det[i]))
+                                else:
+                                    bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1])
+                                    img_center = img_size / 2
+                                    offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ])
+                                    offset_dist_squared = np.sum(np.power(offsets,2.0),0)
+                                    index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering
+                                    det_arr.append(det[index,:])
+                            else:
+                                det_arr.append(np.squeeze(det))
+
+                            for i, det in enumerate(det_arr):
+                                det = np.squeeze(det)
+                                bb = np.zeros(4, dtype=np.int32)
+                                bb[0] = np.maximum(det[0]-args.margin/2, 0)
+                                bb[1] = np.maximum(det[1]-args.margin/2, 0)
+                                bb[2] = np.minimum(det[2]+args.margin/2, img_size[1])
+                                bb[3] = np.minimum(det[3]+args.margin/2, img_size[0])
+                                cropped = img[bb[1]:bb[3],bb[0]:bb[2],:]
+                                #进行图片缩放 cv2.resize(img,(w,h))
+                                scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear')
+                                nrof_successfully_aligned += 1
+                                filename_base, file_extension = os.path.splitext(output_filename)
+                                output_filename_n = "{}_{}{}".format(filename_base, i, file_extension)
+                                misc.imsave(output_filename_n, scaled)
+                                text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3]))
+                        else:
+                            print('Unable to align "%s"' % image_path)
+                            text_file.write('%s\n' % (output_filename))
+                            
+    print('Total number of images: %d' % nrof_images_total)
+    print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
+            
+
+def parse_arguments(argv):
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument('input_dir', type=str, help='Directory with unaligned images.')
+    parser.add_argument('output_dir', type=str, help='Directory with aligned face thumbnails.')
+    parser.add_argument('--image_size', type=int,
+        help='Image size (height, width) in pixels.', default=182)
+    parser.add_argument('--margin', type=int,
+        help='Margin for the crop around the bounding box (height, width) in pixels.', default=44)
+    parser.add_argument('--random_order', 
+        help='Shuffles the order of images to enable alignment using multiple processes.', action='store_true')
+    parser.add_argument('--gpu_memory_fraction', type=float,
+        help='Upper bound on the amount of GPU memory that will be used by the process.', default=1.0)
+    parser.add_argument('--detect_multiple_faces', type=bool,
+                        help='Detect and align multiple faces per image.', default=False)
+    return parser.parse_args(argv)
+
+if __name__ == '__main__':
+    main(parse_arguments(sys.argv[1:]))

+ 204 - 0
src/align/align_dlib.py

@@ -0,0 +1,204 @@
+# Copyright 2015-2016 Carnegie Mellon University
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module for dlib-based alignment."""
+
+# NOTE: This file has been copied from the openface project.
+#  https://github.com/cmusatyalab/openface/blob/master/openface/align_dlib.py
+
+import cv2
+import dlib
+import numpy as np
+
+TEMPLATE = np.float32([
+    (0.0792396913815, 0.339223741112), (0.0829219487236, 0.456955367943),
+    (0.0967927109165, 0.575648016728), (0.122141515615, 0.691921601066),
+    (0.168687863544, 0.800341263616), (0.239789390707, 0.895732504778),
+    (0.325662452515, 0.977068762493), (0.422318282013, 1.04329000149),
+    (0.531777802068, 1.06080371126), (0.641296298053, 1.03981924107),
+    (0.738105872266, 0.972268833998), (0.824444363295, 0.889624082279),
+    (0.894792677532, 0.792494155836), (0.939395486253, 0.681546643421),
+    (0.96111933829, 0.562238253072), (0.970579841181, 0.441758925744),
+    (0.971193274221, 0.322118743967), (0.163846223133, 0.249151738053),
+    (0.21780354657, 0.204255863861), (0.291299351124, 0.192367318323),
+    (0.367460241458, 0.203582210627), (0.4392945113, 0.233135599851),
+    (0.586445962425, 0.228141644834), (0.660152671635, 0.195923841854),
+    (0.737466449096, 0.182360984545), (0.813236546239, 0.192828009114),
+    (0.8707571886, 0.235293377042), (0.51534533827, 0.31863546193),
+    (0.516221448289, 0.396200446263), (0.517118861835, 0.473797687758),
+    (0.51816430343, 0.553157797772), (0.433701156035, 0.604054457668),
+    (0.475501237769, 0.62076344024), (0.520712933176, 0.634268222208),
+    (0.565874114041, 0.618796581487), (0.607054002672, 0.60157671656),
+    (0.252418718401, 0.331052263829), (0.298663015648, 0.302646354002),
+    (0.355749724218, 0.303020650651), (0.403718978315, 0.33867711083),
+    (0.352507175597, 0.349987615384), (0.296791759886, 0.350478978225),
+    (0.631326076346, 0.334136672344), (0.679073381078, 0.29645404267),
+    (0.73597236153, 0.294721285802), (0.782865376271, 0.321305281656),
+    (0.740312274764, 0.341849376713), (0.68499850091, 0.343734332172),
+    (0.353167761422, 0.746189164237), (0.414587777921, 0.719053835073),
+    (0.477677654595, 0.706835892494), (0.522732900812, 0.717092275768),
+    (0.569832064287, 0.705414478982), (0.635195811927, 0.71565572516),
+    (0.69951672331, 0.739419187253), (0.639447159575, 0.805236879972),
+    (0.576410514055, 0.835436670169), (0.525398405766, 0.841706377792),
+    (0.47641545769, 0.837505914975), (0.41379548902, 0.810045601727),
+    (0.380084785646, 0.749979603086), (0.477955996282, 0.74513234612),
+    (0.523389793327, 0.748924302636), (0.571057789237, 0.74332894691),
+    (0.672409137852, 0.744177032192), (0.572539621444, 0.776609286626),
+    (0.5240106503, 0.783370783245), (0.477561227414, 0.778476346951)])
+
+INV_TEMPLATE = np.float32([
+                            (-0.04099179660567834, -0.008425234314031194, 2.575498465013183),
+                            (0.04062510634554352, -0.009678089746831375, -1.2534351452524177),
+                            (0.0003666902601348179, 0.01810332406086298, -0.32206331976076663)])
+
+TPL_MIN, TPL_MAX = np.min(TEMPLATE, axis=0), np.max(TEMPLATE, axis=0)
+MINMAX_TEMPLATE = (TEMPLATE - TPL_MIN) / (TPL_MAX - TPL_MIN)
+
+
+class AlignDlib:
+    """
+    Use `dlib's landmark estimation <http://blog.dlib.net/2014/08/real-time-face-pose-estimation.html>`_ to align faces.
+
+    The alignment preprocess faces for input into a neural network.
+    Faces are resized to the same size (such as 96x96) and transformed
+    to make landmarks (such as the eyes and nose) appear at the same
+    location on every image.
+
+    Normalized landmarks:
+
+    .. image:: ../images/dlib-landmark-mean.png
+    """
+
+    #: Landmark indices corresponding to the inner eyes and bottom lip.
+    INNER_EYES_AND_BOTTOM_LIP = [39, 42, 57]
+
+    #: Landmark indices corresponding to the outer eyes and nose.
+    OUTER_EYES_AND_NOSE = [36, 45, 33]
+
+    def __init__(self, facePredictor):
+        """
+        Instantiate an 'AlignDlib' object.
+
+        :param facePredictor: The path to dlib's
+        :type facePredictor: str
+        """
+        assert facePredictor is not None
+
+        #pylint: disable=no-member
+        self.detector = dlib.get_frontal_face_detector()
+        self.predictor = dlib.shape_predictor(facePredictor)
+
+    def getAllFaceBoundingBoxes(self, rgbImg):
+        """
+        Find all face bounding boxes in an image.
+
+        :param rgbImg: RGB image to process. Shape: (height, width, 3)
+        :type rgbImg: numpy.ndarray
+        :return: All face bounding boxes in an image.
+        :rtype: dlib.rectangles
+        """
+        assert rgbImg is not None
+
+        try:
+            return self.detector(rgbImg, 1)
+        except Exception as e: #pylint: disable=broad-except
+            print("Warning: {}".format(e))
+            # In rare cases, exceptions are thrown.
+            return []
+
+    def getLargestFaceBoundingBox(self, rgbImg, skipMulti=False):
+        """
+        Find the largest face bounding box in an image.
+
+        :param rgbImg: RGB image to process. Shape: (height, width, 3)
+        :type rgbImg: numpy.ndarray
+        :param skipMulti: Skip image if more than one face detected.
+        :type skipMulti: bool
+        :return: The largest face bounding box in an image, or None.
+        :rtype: dlib.rectangle
+        """
+        assert rgbImg is not None
+
+        faces = self.getAllFaceBoundingBoxes(rgbImg)
+        if (not skipMulti and len(faces) > 0) or len(faces) == 1:
+            return max(faces, key=lambda rect: rect.width() * rect.height())
+        else:
+            return None
+
+    def findLandmarks(self, rgbImg, bb):
+        """
+        Find the landmarks of a face.
+
+        :param rgbImg: RGB image to process. Shape: (height, width, 3)
+        :type rgbImg: numpy.ndarray
+        :param bb: Bounding box around the face to find landmarks for.
+        :type bb: dlib.rectangle
+        :return: Detected landmark locations.
+        :rtype: list of (x,y) tuples
+        """
+        assert rgbImg is not None
+        assert bb is not None
+
+        points = self.predictor(rgbImg, bb)
+        #return list(map(lambda p: (p.x, p.y), points.parts()))
+        return [(p.x, p.y) for p in points.parts()]
+
+    #pylint: disable=dangerous-default-value
+    def align(self, imgDim, rgbImg, bb=None,
+              landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP,
+              skipMulti=False, scale=1.0):
+        r"""align(imgDim, rgbImg, bb=None, landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP)
+
+        Transform and align a face in an image.
+
+        :param imgDim: The edge length in pixels of the square the image is resized to.
+        :type imgDim: int
+        :param rgbImg: RGB image to process. Shape: (height, width, 3)
+        :type rgbImg: numpy.ndarray
+        :param bb: Bounding box around the face to align. \
+                   Defaults to the largest face.
+        :type bb: dlib.rectangle
+        :param landmarks: Detected landmark locations. \
+                          Landmarks found on `bb` if not provided.
+        :type landmarks: list of (x,y) tuples
+        :param landmarkIndices: The indices to transform to.
+        :type landmarkIndices: list of ints
+        :param skipMulti: Skip image if more than one face detected.
+        :type skipMulti: bool
+        :param scale: Scale image before cropping to the size given by imgDim.
+        :type scale: float
+        :return: The aligned RGB image. Shape: (imgDim, imgDim, 3)
+        :rtype: numpy.ndarray
+        """
+        assert imgDim is not None
+        assert rgbImg is not None
+        assert landmarkIndices is not None
+
+        if bb is None:
+            bb = self.getLargestFaceBoundingBox(rgbImg, skipMulti)
+            if bb is None:
+                return
+
+        if landmarks is None:
+            landmarks = self.findLandmarks(rgbImg, bb)
+
+        npLandmarks = np.float32(landmarks)
+        npLandmarkIndices = np.array(landmarkIndices)
+
+        #pylint: disable=maybe-no-member
+        H = cv2.getAffineTransform(npLandmarks[npLandmarkIndices],
+                                   imgDim * MINMAX_TEMPLATE[npLandmarkIndices]*scale + imgDim*(1-scale)/2)
+        thumbnail = cv2.warpAffine(rgbImg, H, (imgDim, imgDim))
+        
+        return thumbnail

BIN
src/align/det1.npy


BIN
src/align/det2.npy


BIN
src/align/det3.npy


+ 778 - 0
src/align/detect_face.py

@@ -0,0 +1,778 @@
+""" Tensorflow implementation of the face detection / alignment algorithm found at
+https://github.com/kpzhang93/MTCNN_face_detection_alignment
+"""
+# MIT License
+# 
+# Copyright (c) 2016 David Sandberg
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# 
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from six import string_types, iteritems
+
+import numpy as np
+import tensorflow as tf
+#from math import floor
+import cv2
+import os
+
+def layer(op):
+    '''Decorator for composable network layers.'''
+
+    def layer_decorated(self, *args, **kwargs):
+        # Automatically set a name if not provided.
+        name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
+        # Figure out the layer inputs.
+        if len(self.terminals) == 0:
+            raise RuntimeError('No input variables found for layer %s.' % name)
+        elif len(self.terminals) == 1:
+            layer_input = self.terminals[0]
+        else:
+            layer_input = list(self.terminals)
+        # Perform the operation and get the output.
+        layer_output = op(self, layer_input, *args, **kwargs)
+        # Add to layer LUT.
+        self.layers[name] = layer_output
+        # This output is now the input for the next layer.
+        self.feed(layer_output)
+        # Return self for chained calls.
+        return self
+
+    return layer_decorated
+
+class Network(object):
+
+    def __init__(self, inputs, trainable=True):
+        # The input nodes for this network
+        self.inputs = inputs
+        # The current list of terminal nodes
+        self.terminals = []
+        # Mapping from layer names to layers
+        self.layers = dict(inputs)
+        # If true, the resulting variables are set as trainable
+        self.trainable = trainable
+
+        self.setup()
+
+    def setup(self):
+        '''Construct the network. '''
+        raise NotImplementedError('Must be implemented by the subclass.')
+
+    def load(self, data_path, session, ignore_missing=False):
+        '''Load network weights.
+        data_path: The path to the numpy-serialized network weights
+        session: The current TensorFlow session
+        ignore_missing: If true, serialized weights for missing layers are ignored.
+        '''
+        data_dict = np.load(data_path, encoding='latin1').item() #pylint: disable=no-member
+
+        for op_name in data_dict:
+            with tf.variable_scope(op_name, reuse=True):
+                for param_name, data in iteritems(data_dict[op_name]):
+                    try:
+                        var = tf.get_variable(param_name)
+                        session.run(var.assign(data))
+                    except ValueError:
+                        if not ignore_missing:
+                            raise
+
+    def feed(self, *args):
+        '''Set the input(s) for the next operation by replacing the terminal nodes.
+        The arguments can be either layer names or the actual layers.
+        '''
+        assert len(args) != 0
+        self.terminals = []
+        for fed_layer in args:
+            if isinstance(fed_layer, string_types):
+                try:
+                    fed_layer = self.layers[fed_layer]
+                except KeyError:
+                    raise KeyError('Unknown layer name fed: %s' % fed_layer)
+            self.terminals.append(fed_layer)
+        return self
+
+    def get_output(self):
+        '''Returns the current network output.'''
+        return self.terminals[-1]
+
+    def get_unique_name(self, prefix):
+        '''Returns an index-suffixed unique name for the given prefix.
+        This is used for auto-generating layer names based on the type-prefix.
+        '''
+        ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
+        return '%s_%d' % (prefix, ident)
+
+    def make_var(self, name, shape):
+        '''Creates a new TensorFlow variable.'''
+        return tf.get_variable(name, shape, trainable=self.trainable)
+
+    def validate_padding(self, padding):
+        '''Verifies that the padding is one of the supported ones.'''
+        assert padding in ('SAME', 'VALID')
+
+    @layer
+    def conv(self,
+             inp,
+             k_h,
+             k_w,
+             c_o,
+             s_h,
+             s_w,
+             name,
+             relu=True,
+             padding='SAME',
+             group=1,
+             biased=True):
+        # Verify that the padding is acceptable
+        self.validate_padding(padding)
+        # Get the number of channels in the input
+        c_i = int(inp.get_shape()[-1])
+        # Verify that the grouping parameter is valid
+        assert c_i % group == 0
+        assert c_o % group == 0
+        # Convolution for a given input and kernel
+        convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
+        with tf.variable_scope(name) as scope:
+            kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o])
+            # This is the common-case. Convolve the input without any further complications.
+            output = convolve(inp, kernel)
+            # Add the biases
+            if biased:
+                biases = self.make_var('biases', [c_o])
+                output = tf.nn.bias_add(output, biases)
+            if relu:
+                # ReLU non-linearity
+                output = tf.nn.relu(output, name=scope.name)
+            return output
+
+    @layer
+    def prelu(self, inp, name):
+        with tf.variable_scope(name):
+            i = int(inp.get_shape()[-1])
+            alpha = self.make_var('alpha', shape=(i,))
+            output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp))
+        return output
+
+    @layer
+    def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'):
+        self.validate_padding(padding)
+        return tf.nn.max_pool(inp,
+                              ksize=[1, k_h, k_w, 1],
+                              strides=[1, s_h, s_w, 1],
+                              padding=padding,
+                              name=name)
+
+    @layer
+    def fc(self, inp, num_out, name, relu=True):
+        with tf.variable_scope(name):
+            input_shape = inp.get_shape()
+            if input_shape.ndims == 4:
+                # The input is spatial. Vectorize it first.
+                dim = 1
+                for d in input_shape[1:].as_list():
+                    dim *= int(d)
+                feed_in = tf.reshape(inp, [-1, dim])
+            else:
+                feed_in, dim = (inp, input_shape[-1].value)
+            weights = self.make_var('weights', shape=[dim, num_out])
+            biases = self.make_var('biases', [num_out])
+            op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
+            fc = op(feed_in, weights, biases, name=name)
+            return fc
+
+
+    """
+    Multi dimensional softmax,
+    refer to https://github.com/tensorflow/tensorflow/issues/210
+    compute softmax along the dimension of target
+    the native softmax only supports batch_size x dimension
+    """
+    @layer
+    def softmax(self, target, axis, name=None):
+        max_axis = tf.reduce_max(target, axis, keep_dims=True)
+        target_exp = tf.exp(target-max_axis)
+        normalize = tf.reduce_sum(target_exp, axis, keep_dims=True)
+        softmax = tf.div(target_exp, normalize, name)
+        return softmax
+    
+class PNet(Network):
+    def setup(self):
+        (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
+             .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1')
+             .prelu(name='PReLU1')
+             .max_pool(2, 2, 2, 2, name='pool1')
+             .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2')
+             .prelu(name='PReLU2')
+             .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3')
+             .prelu(name='PReLU3')
+             .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1')
+             .softmax(3,name='prob1'))
+
+        (self.feed('PReLU3') #pylint: disable=no-value-for-parameter
+             .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2'))
+        
+class RNet(Network):
+    def setup(self):
+        (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
+             .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1')
+             .prelu(name='prelu1')
+             .max_pool(3, 3, 2, 2, name='pool1')
+             .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2')
+             .prelu(name='prelu2')
+             .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
+             .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3')
+             .prelu(name='prelu3')
+             .fc(128, relu=False, name='conv4')
+             .prelu(name='prelu4')
+             .fc(2, relu=False, name='conv5-1')
+             .softmax(1,name='prob1'))
+
+        (self.feed('prelu4') #pylint: disable=no-value-for-parameter
+             .fc(4, relu=False, name='conv5-2'))
+
+class ONet(Network):
+    def setup(self):
+        (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
+             .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1')
+             .prelu(name='prelu1')
+             .max_pool(3, 3, 2, 2, name='pool1')
+             .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2')
+             .prelu(name='prelu2')
+             .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
+             .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3')
+             .prelu(name='prelu3')
+             .max_pool(2, 2, 2, 2, name='pool3')
+             .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4')
+             .prelu(name='prelu4')
+             .fc(256, relu=False, name='conv5')
+             .prelu(name='prelu5')
+             .fc(2, relu=False, name='conv6-1')
+             .softmax(1, name='prob1'))
+
+        (self.feed('prelu5') #pylint: disable=no-value-for-parameter
+             .fc(4, relu=False, name='conv6-2'))
+
+        (self.feed('prelu5') #pylint: disable=no-value-for-parameter
+             .fc(10, relu=False, name='conv6-3'))
+
+def create_mtcnn(sess, model_path):
+    if not model_path:
+        model_path,_ = os.path.split(os.path.realpath(__file__))
+
+    with tf.variable_scope('pnet'):
+        data = tf.placeholder(tf.float32, (None,None,None,3), 'input')
+        pnet = PNet({'data':data})
+        pnet.load(os.path.join(model_path, 'det1.npy'), sess)
+    with tf.variable_scope('rnet'):
+        data = tf.placeholder(tf.float32, (None,24,24,3), 'input')
+        rnet = RNet({'data':data})
+        rnet.load(os.path.join(model_path, 'det2.npy'), sess)
+    with tf.variable_scope('onet'):
+        data = tf.placeholder(tf.float32, (None,48,48,3), 'input')
+        onet = ONet({'data':data})
+        onet.load(os.path.join(model_path, 'det3.npy'), sess)
+        
+    pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img})
+    rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img})
+    onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img})
+    return pnet_fun, rnet_fun, onet_fun
+
+def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
+    # im: input image
+    # minsize: minimum of faces' size
+    # pnet, rnet, onet: caffemodel
+    # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold
+    # fastresize: resize img from last scale (using in high-resolution images) if fastresize==true
+    factor_count=0
+    total_boxes=np.empty((0,9))
+    points=np.empty(0)
+    h=img.shape[0]
+    w=img.shape[1]
+    minl=np.amin([h, w])
+    m=12.0/minsize
+    minl=minl*m
+    # creat scale pyramid
+    scales=[]
+    while minl>=12:
+        scales += [m*np.power(factor, factor_count)]
+        minl = minl*factor
+        factor_count += 1
+
+    # first stage
+    for j in range(len(scales)):
+        scale=scales[j]
+        hs=int(np.ceil(h*scale))
+        ws=int(np.ceil(w*scale))
+        im_data = imresample(img, (hs, ws))
+        im_data = (im_data-127.5)*0.0078125
+        img_x = np.expand_dims(im_data, 0)
+        img_y = np.transpose(img_x, (0,2,1,3))
+        out = pnet(img_y)
+        out0 = np.transpose(out[0], (0,2,1,3))
+        out1 = np.transpose(out[1], (0,2,1,3))
+        
+        boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0])
+        
+        # inter-scale nms
+        pick = nms(boxes.copy(), 0.5, 'Union')
+        if boxes.size>0 and pick.size>0:
+            boxes = boxes[pick,:]
+            total_boxes = np.append(total_boxes, boxes, axis=0)
+
+    numbox = total_boxes.shape[0]
+    if numbox>0:
+        pick = nms(total_boxes.copy(), 0.7, 'Union')
+        total_boxes = total_boxes[pick,:]
+        regw = total_boxes[:,2]-total_boxes[:,0]
+        regh = total_boxes[:,3]-total_boxes[:,1]
+        qq1 = total_boxes[:,0]+total_boxes[:,5]*regw
+        qq2 = total_boxes[:,1]+total_boxes[:,6]*regh
+        qq3 = total_boxes[:,2]+total_boxes[:,7]*regw
+        qq4 = total_boxes[:,3]+total_boxes[:,8]*regh
+        total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))
+        total_boxes = rerec(total_boxes.copy())
+        total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)
+        dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
+
+    numbox = total_boxes.shape[0]
+    if numbox>0:
+        # second stage
+        tempimg = np.zeros((24,24,3,numbox))
+        for k in range(0,numbox):
+            tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
+            tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
+            if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
+                tempimg[:,:,:,k] = imresample(tmp, (24, 24))
+            else:
+                return np.empty()
+        tempimg = (tempimg-127.5)*0.0078125
+        tempimg1 = np.transpose(tempimg, (3,1,0,2))
+        out = rnet(tempimg1)
+        out0 = np.transpose(out[0])
+        out1 = np.transpose(out[1])
+        score = out1[1,:]
+        ipass = np.where(score>threshold[1])
+        total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
+        mv = out0[:,ipass[0]]
+        if total_boxes.shape[0]>0:
+            pick = nms(total_boxes, 0.7, 'Union')
+            total_boxes = total_boxes[pick,:]
+            total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
+            total_boxes = rerec(total_boxes.copy())
+
+    numbox = total_boxes.shape[0]
+    if numbox>0:
+        # third stage
+        total_boxes = np.fix(total_boxes).astype(np.int32)
+        dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
+        tempimg = np.zeros((48,48,3,numbox))
+        for k in range(0,numbox):
+            tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
+            tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
+            if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
+                tempimg[:,:,:,k] = imresample(tmp, (48, 48))
+            else:
+                return np.empty()
+        tempimg = (tempimg-127.5)*0.0078125
+        tempimg1 = np.transpose(tempimg, (3,1,0,2))
+        out = onet(tempimg1)
+        out0 = np.transpose(out[0])
+        out1 = np.transpose(out[1])
+        out2 = np.transpose(out[2])
+        score = out2[1,:]
+        points = out1
+        ipass = np.where(score>threshold[2])
+        points = points[:,ipass[0]]
+        total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
+        mv = out0[:,ipass[0]]
+
+        w = total_boxes[:,2]-total_boxes[:,0]+1
+        h = total_boxes[:,3]-total_boxes[:,1]+1
+        points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1
+        points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1
+        if total_boxes.shape[0]>0:
+            total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
+            pick = nms(total_boxes.copy(), 0.7, 'Min')
+            total_boxes = total_boxes[pick,:]
+            points = points[:,pick]
+                
+    return total_boxes, points
+
+
+def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor):
+    # im: input image
+    # minsize: minimum of faces' size
+    # pnet, rnet, onet: caffemodel
+    # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1]
+
+    all_scales = [None] * len(images)
+    images_with_boxes = [None] * len(images)
+
+    for i in range(len(images)):
+        images_with_boxes[i] = {'total_boxes': np.empty((0, 9))}
+
+    # create scale pyramid
+    for index, img in enumerate(images):
+        all_scales[index] = []
+        h = img.shape[0]
+        w = img.shape[1]
+        minsize = int(detection_window_size_ratio * np.minimum(w, h))
+        factor_count = 0
+        minl = np.amin([h, w])
+        if minsize <= 12:
+            minsize = 12
+
+        m = 12.0 / minsize
+        minl = minl * m
+        while minl >= 12:
+            all_scales[index].append(m * np.power(factor, factor_count))
+            minl = minl * factor
+            factor_count += 1
+
+    # # # # # # # # # # # # #
+    # first stage - fast proposal network (pnet) to obtain face candidates
+    # # # # # # # # # # # # #
+
+    images_obj_per_resolution = {}
+
+    # TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images
+
+    for index, scales in enumerate(all_scales):
+        h = images[index].shape[0]
+        w = images[index].shape[1]
+
+        for scale in scales:
+            hs = int(np.ceil(h * scale))
+            ws = int(np.ceil(w * scale))
+
+            if (ws, hs) not in images_obj_per_resolution:
+                images_obj_per_resolution[(ws, hs)] = []
+
+            im_data = imresample(images[index], (hs, ws))
+            im_data = (im_data - 127.5) * 0.0078125
+            img_y = np.transpose(im_data, (1, 0, 2))  # caffe uses different dimensions ordering
+            images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index})
+
+    for resolution in images_obj_per_resolution:
+        images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]]
+        outs = pnet(images_per_resolution)
+
+        for index in range(len(outs[0])):
+            scale = images_obj_per_resolution[resolution][index]['scale']
+            image_index = images_obj_per_resolution[resolution][index]['index']
+            out0 = np.transpose(outs[0][index], (1, 0, 2))
+            out1 = np.transpose(outs[1][index], (1, 0, 2))
+
+            boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0])
+
+            # inter-scale nms
+            pick = nms(boxes.copy(), 0.5, 'Union')
+            if boxes.size > 0 and pick.size > 0:
+                boxes = boxes[pick, :]
+                images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'],
+                                                                          boxes,
+                                                                          axis=0)
+
+    for index, image_obj in enumerate(images_with_boxes):
+        numbox = image_obj['total_boxes'].shape[0]
+        if numbox > 0:
+            h = images[index].shape[0]
+            w = images[index].shape[1]
+            pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union')
+            image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
+            regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0]
+            regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1]
+            qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw
+            qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh
+            qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw
+            qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh
+            image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]]))
+            image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
+            image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32)
+            dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
+
+            numbox = image_obj['total_boxes'].shape[0]
+            tempimg = np.zeros((24, 24, 3, numbox))
+
+            if numbox > 0:
+                for k in range(0, numbox):
+                    tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
+                    tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
+                    if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
+                        tempimg[:, :, :, k] = imresample(tmp, (24, 24))
+                    else:
+                        return np.empty()
+
+                tempimg = (tempimg - 127.5) * 0.0078125
+                image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
+
+    # # # # # # # # # # # # #
+    # second stage - refinement of face candidates with rnet
+    # # # # # # # # # # # # #
+
+    bulk_rnet_input = np.empty((0, 24, 24, 3))
+    for index, image_obj in enumerate(images_with_boxes):
+        if 'rnet_input' in image_obj:
+            bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0)
+
+    out = rnet(bulk_rnet_input)
+    out0 = np.transpose(out[0])
+    out1 = np.transpose(out[1])
+    score = out1[1, :]
+
+    i = 0
+    for index, image_obj in enumerate(images_with_boxes):
+        if 'rnet_input' not in image_obj:
+            continue
+
+        rnet_input_count = image_obj['rnet_input'].shape[0]
+        score_per_image = score[i:i + rnet_input_count]
+        out0_per_image = out0[:, i:i + rnet_input_count]
+
+        ipass = np.where(score_per_image > threshold[1])
+        image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
+                                              np.expand_dims(score_per_image[ipass].copy(), 1)])
+
+        mv = out0_per_image[:, ipass[0]]
+
+        if image_obj['total_boxes'].shape[0] > 0:
+            h = images[index].shape[0]
+            w = images[index].shape[1]
+            pick = nms(image_obj['total_boxes'], 0.7, 'Union')
+            image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
+            image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick]))
+            image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
+
+            numbox = image_obj['total_boxes'].shape[0]
+
+            if numbox > 0:
+                tempimg = np.zeros((48, 48, 3, numbox))
+                image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32)
+                dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
+
+                for k in range(0, numbox):
+                    tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
+                    tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
+                    if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
+                        tempimg[:, :, :, k] = imresample(tmp, (48, 48))
+                    else:
+                        return np.empty()
+                tempimg = (tempimg - 127.5) * 0.0078125
+                image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
+
+        i += rnet_input_count
+
+    # # # # # # # # # # # # #
+    # third stage - further refinement and facial landmarks positions with onet
+    # # # # # # # # # # # # #
+
+    bulk_onet_input = np.empty((0, 48, 48, 3))
+    for index, image_obj in enumerate(images_with_boxes):
+        if 'onet_input' in image_obj:
+            bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0)
+
+    out = onet(bulk_onet_input)
+
+    out0 = np.transpose(out[0])
+    out1 = np.transpose(out[1])
+    out2 = np.transpose(out[2])
+    score = out2[1, :]
+    points = out1
+
+    i = 0
+    ret = []
+    for index, image_obj in enumerate(images_with_boxes):
+        if 'onet_input' not in image_obj:
+            ret.append(None)
+            continue
+
+        onet_input_count = image_obj['onet_input'].shape[0]
+
+        out0_per_image = out0[:, i:i + onet_input_count]
+        score_per_image = score[i:i + onet_input_count]
+        points_per_image = points[:, i:i + onet_input_count]
+
+        ipass = np.where(score_per_image > threshold[2])
+        points_per_image = points_per_image[:, ipass[0]]
+
+        image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
+                                              np.expand_dims(score_per_image[ipass].copy(), 1)])
+        mv = out0_per_image[:, ipass[0]]
+
+        w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1
+        h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1
+        points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile(
+            image_obj['total_boxes'][:, 0], (5, 1)) - 1
+        points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile(
+            image_obj['total_boxes'][:, 1], (5, 1)) - 1
+
+        if image_obj['total_boxes'].shape[0] > 0:
+            image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv))
+            pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min')
+            image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
+            points_per_image = points_per_image[:, pick]
+
+            ret.append((image_obj['total_boxes'], points_per_image))
+        else:
+            ret.append(None)
+
+        i += onet_input_count
+
+    return ret
+
+
+# function [boundingbox] = bbreg(boundingbox,reg)
+def bbreg(boundingbox,reg):
+    # calibrate bounding boxes
+    if reg.shape[1]==1:
+        reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
+
+    w = boundingbox[:,2]-boundingbox[:,0]+1
+    h = boundingbox[:,3]-boundingbox[:,1]+1
+    b1 = boundingbox[:,0]+reg[:,0]*w
+    b2 = boundingbox[:,1]+reg[:,1]*h
+    b3 = boundingbox[:,2]+reg[:,2]*w
+    b4 = boundingbox[:,3]+reg[:,3]*h
+    boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ]))
+    return boundingbox
+ 
+def generateBoundingBox(imap, reg, scale, t):
+    # use heatmap to generate bounding boxes
+    stride=2
+    cellsize=12
+
+    imap = np.transpose(imap)
+    dx1 = np.transpose(reg[:,:,0])
+    dy1 = np.transpose(reg[:,:,1])
+    dx2 = np.transpose(reg[:,:,2])
+    dy2 = np.transpose(reg[:,:,3])
+    y, x = np.where(imap >= t)
+    if y.shape[0]==1:
+        dx1 = np.flipud(dx1)
+        dy1 = np.flipud(dy1)
+        dx2 = np.flipud(dx2)
+        dy2 = np.flipud(dy2)
+    score = imap[(y,x)]
+    reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ]))
+    if reg.size==0:
+        reg = np.empty((0,3))
+    bb = np.transpose(np.vstack([y,x]))
+    q1 = np.fix((stride*bb+1)/scale)
+    q2 = np.fix((stride*bb+cellsize-1+1)/scale)
+    boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg])
+    return boundingbox, reg
+ 
+# function pick = nms(boxes,threshold,type)
+def nms(boxes, threshold, method):
+    if boxes.size==0:
+        return np.empty((0,3))
+    x1 = boxes[:,0]
+    y1 = boxes[:,1]
+    x2 = boxes[:,2]
+    y2 = boxes[:,3]
+    s = boxes[:,4]
+    area = (x2-x1+1) * (y2-y1+1)
+    I = np.argsort(s)
+    pick = np.zeros_like(s, dtype=np.int16)
+    counter = 0
+    while I.size>0:
+        i = I[-1]
+        pick[counter] = i
+        counter += 1
+        idx = I[0:-1]
+        xx1 = np.maximum(x1[i], x1[idx])
+        yy1 = np.maximum(y1[i], y1[idx])
+        xx2 = np.minimum(x2[i], x2[idx])
+        yy2 = np.minimum(y2[i], y2[idx])
+        w = np.maximum(0.0, xx2-xx1+1)
+        h = np.maximum(0.0, yy2-yy1+1)
+        inter = w * h
+        if method is 'Min':
+            o = inter / np.minimum(area[i], area[idx])
+        else:
+            o = inter / (area[i] + area[idx] - inter)
+        I = I[np.where(o<=threshold)]
+    pick = pick[0:counter]
+    return pick
+
+# function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h)
+def pad(total_boxes, w, h):
+    # compute the padding coordinates (pad the bounding boxes to square)
+    tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32)
+    tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32)
+    numbox = total_boxes.shape[0]
+
+    dx = np.ones((numbox), dtype=np.int32)
+    dy = np.ones((numbox), dtype=np.int32)
+    edx = tmpw.copy().astype(np.int32)
+    edy = tmph.copy().astype(np.int32)
+
+    x = total_boxes[:,0].copy().astype(np.int32)
+    y = total_boxes[:,1].copy().astype(np.int32)
+    ex = total_boxes[:,2].copy().astype(np.int32)
+    ey = total_boxes[:,3].copy().astype(np.int32)
+
+    tmp = np.where(ex>w)
+    edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1)
+    ex[tmp] = w
+    
+    tmp = np.where(ey>h)
+    edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1)
+    ey[tmp] = h
+
+    tmp = np.where(x<1)
+    dx.flat[tmp] = np.expand_dims(2-x[tmp],1)
+    x[tmp] = 1
+
+    tmp = np.where(y<1)
+    dy.flat[tmp] = np.expand_dims(2-y[tmp],1)
+    y[tmp] = 1
+    
+    return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph
+
+# function [bboxA] = rerec(bboxA)
+def rerec(bboxA):
+    # convert bboxA to square
+    h = bboxA[:,3]-bboxA[:,1]
+    w = bboxA[:,2]-bboxA[:,0]
+    l = np.maximum(w, h)
+    bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5
+    bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5
+    bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1)))
+    return bboxA
+
+def imresample(img, sz):
+    im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #@UndefinedVariable
+    return im_data
+
+    # This method is kept for debugging purpose
+#     h=img.shape[0]
+#     w=img.shape[1]
+#     hs, ws = sz
+#     dx = float(w) / ws
+#     dy = float(h) / hs
+#     im_data = np.zeros((hs,ws,3))
+#     for a1 in range(0,hs):
+#         for a2 in range(0,ws):
+#             for a3 in range(0,3):
+#                 im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3]
+#     return im_data
+

BIN
src/align/detect_face.pyc


+ 544 - 0
src/facenet.py

@@ -0,0 +1,544 @@
+"""Functions for building the face recognition network.
+"""
+# MIT License
+# 
+# Copyright (c) 2016 David Sandberg
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# 
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# pylint: disable=missing-docstring
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+from subprocess import Popen, PIPE
+import tensorflow as tf
+from tensorflow.python.framework import ops
+import numpy as np
+from scipy import misc
+from sklearn.model_selection import KFold
+from scipy import interpolate
+from tensorflow.python.training import training
+import random
+import re
+from tensorflow.python.platform import gfile
+from six import iteritems
+
+def triplet_loss(anchor, positive, negative, alpha):
+    """Calculate the triplet loss according to the FaceNet paper
+    
+    Args:
+      anchor: the embeddings for the anchor images.
+      positive: the embeddings for the positive images.
+      negative: the embeddings for the negative images.
+  
+    Returns:
+      the triplet loss according to the FaceNet paper as a float tensor.
+    """
+    with tf.variable_scope('triplet_loss'):
+        pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1)
+        neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1)
+        
+        basic_loss = tf.add(tf.subtract(pos_dist,neg_dist), alpha)
+        loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0)
+      
+    return loss
+  
+def decov_loss(xs):
+    """Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf
+    'Reducing Overfitting In Deep Networks by Decorrelating Representation'
+    """
+    x = tf.reshape(xs, [int(xs.get_shape()[0]), -1])
+    m = tf.reduce_mean(x, 0, True)
+    z = tf.expand_dims(x-m, 2)
+    corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0,2,1])), 0)
+    corr_frob_sqr = tf.reduce_sum(tf.square(corr))
+    corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr)))
+    loss = 0.5*(corr_frob_sqr - corr_diag_sqr)
+    return loss 
+  
+def center_loss(features, label, alfa, nrof_classes):
+    """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
+       (http://ydwen.github.io/papers/WenECCV16.pdf)
+    """
+    nrof_features = features.get_shape()[1]
+    centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
+        initializer=tf.constant_initializer(0), trainable=False)
+    label = tf.reshape(label, [-1])
+    centers_batch = tf.gather(centers, label)
+    diff = (1 - alfa) * (centers_batch - features)
+    centers = tf.scatter_sub(centers, label, diff)
+    loss = tf.reduce_mean(tf.square(features - centers_batch))
+    return loss, centers
+
+def get_image_paths_and_labels(dataset):
+    image_paths_flat = []
+    labels_flat = []
+    for i in range(len(dataset)):
+        image_paths_flat += dataset[i].image_paths
+        labels_flat += [i] * len(dataset[i].image_paths)
+    return image_paths_flat, labels_flat
+
+def shuffle_examples(image_paths, labels):
+    shuffle_list = list(zip(image_paths, labels))
+    random.shuffle(shuffle_list)
+    image_paths_shuff, labels_shuff = zip(*shuffle_list)
+    return image_paths_shuff, labels_shuff
+
+def read_images_from_disk(input_queue):
+    """Consumes a single filename and label as a ' '-delimited string.
+    Args:
+      filename_and_label_tensor: A scalar string tensor.
+    Returns:
+      Two tensors: the decoded image, and the string label.
+    """
+    label = input_queue[1]
+    file_contents = tf.read_file(input_queue[0])
+    example = tf.image.decode_image(file_contents, channels=3)
+    return example, label
+  
+def random_rotate_image(image):
+    angle = np.random.uniform(low=-10.0, high=10.0)
+    return misc.imrotate(image, angle, 'bicubic')
+  
+def read_and_augment_data(image_list, label_list, image_size, batch_size, max_nrof_epochs, 
+        random_crop, random_flip, random_rotate, nrof_preprocess_threads, shuffle=True):
+    
+    images = ops.convert_to_tensor(image_list, dtype=tf.string)
+    labels = ops.convert_to_tensor(label_list, dtype=tf.int32)
+    
+    # Makes an input queue
+    input_queue = tf.train.slice_input_producer([images, labels],
+        num_epochs=max_nrof_epochs, shuffle=shuffle)
+
+    images_and_labels = []
+    for _ in range(nrof_preprocess_threads):
+        image, label = read_images_from_disk(input_queue)
+        if random_rotate:
+            image = tf.py_func(random_rotate_image, [image], tf.uint8)
+        if random_crop:
+            image = tf.random_crop(image, [image_size, image_size, 3])
+        else:
+            image = tf.image.resize_image_with_crop_or_pad(image, image_size, image_size)
+        if random_flip:
+            image = tf.image.random_flip_left_right(image)
+        #pylint: disable=no-member
+        image.set_shape((image_size, image_size, 3))
+        image = tf.image.per_image_standardization(image)
+        images_and_labels.append([image, label])
+
+    image_batch, label_batch = tf.train.batch_join(
+        images_and_labels, batch_size=batch_size,
+        capacity=4 * nrof_preprocess_threads * batch_size,
+        allow_smaller_final_batch=True)
+  
+    return image_batch, label_batch
+  
+def _add_loss_summaries(total_loss):
+    """Add summaries for losses.
+  
+    Generates moving average for all losses and associated summaries for
+    visualizing the performance of the network.
+  
+    Args:
+      total_loss: Total loss from loss().
+    Returns:
+      loss_averages_op: op for generating moving averages of losses.
+    """
+    # Compute the moving average of all individual losses and the total loss.
+    loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
+    losses = tf.get_collection('losses')
+    loss_averages_op = loss_averages.apply(losses + [total_loss])
+  
+    # Attach a scalar summmary to all individual losses and the total loss; do the
+    # same for the averaged version of the losses.
+    for l in losses + [total_loss]:
+        # Name each loss as '(raw)' and name the moving average version of the loss
+        # as the original loss name.
+        tf.summary.scalar(l.op.name +' (raw)', l)
+        tf.summary.scalar(l.op.name, loss_averages.average(l))
+  
+    return loss_averages_op
+
+def train(total_loss, global_step, optimizer, learning_rate, moving_average_decay, update_gradient_vars, log_histograms=True):
+    # Generate moving averages of all losses and associated summaries.
+    loss_averages_op = _add_loss_summaries(total_loss)
+
+    # Compute gradients.
+    with tf.control_dependencies([loss_averages_op]):
+        if optimizer=='ADAGRAD':
+            opt = tf.train.AdagradOptimizer(learning_rate)
+        elif optimizer=='ADADELTA':
+            opt = tf.train.AdadeltaOptimizer(learning_rate, rho=0.9, epsilon=1e-6)
+        elif optimizer=='ADAM':
+            opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1)
+        elif optimizer=='RMSPROP':
+            opt = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9, epsilon=1.0)
+        elif optimizer=='MOM':
+            opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True)
+        else:
+            raise ValueError('Invalid optimization algorithm')
+    
+        grads = opt.compute_gradients(total_loss, update_gradient_vars)
+        
+    # Apply gradients.
+    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
+  
+    # Add histograms for trainable variables.
+    if log_histograms:
+        for var in tf.trainable_variables():
+            tf.summary.histogram(var.op.name, var)
+   
+    # Add histograms for gradients.
+    if log_histograms:
+        for grad, var in grads:
+            if grad is not None:
+                tf.summary.histogram(var.op.name + '/gradients', grad)
+  
+    # Track the moving averages of all trainable variables.
+    variable_averages = tf.train.ExponentialMovingAverage(
+        moving_average_decay, global_step)
+    variables_averages_op = variable_averages.apply(tf.trainable_variables())
+  
+    with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
+        train_op = tf.no_op(name='train')
+  
+    return train_op
+
+def prewhiten(x):
+    mean = np.mean(x)
+    std = np.std(x)
+    std_adj = np.maximum(std, 1.0/np.sqrt(x.size))
+    y = np.multiply(np.subtract(x, mean), 1/std_adj)
+    return y  
+
+def crop(image, random_crop, image_size):
+    if image.shape[1]>image_size:
+        sz1 = int(image.shape[1]//2)
+        sz2 = int(image_size//2)
+        if random_crop:
+            diff = sz1-sz2
+            (h, v) = (np.random.randint(-diff, diff+1), np.random.randint(-diff, diff+1))
+        else:
+            (h, v) = (0,0)
+        image = image[(sz1-sz2+v):(sz1+sz2+v),(sz1-sz2+h):(sz1+sz2+h),:]
+    return image
+  
+def flip(image, random_flip):
+    if random_flip and np.random.choice([True, False]):
+        image = np.fliplr(image)
+    return image
+
+def to_rgb(img):
+    w, h = img.shape
+    ret = np.empty((w, h, 3), dtype=np.uint8)
+    ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
+    return ret
+  
+def load_data(image_paths, do_random_crop, do_random_flip, image_size, do_prewhiten=True):
+    nrof_samples = len(image_paths)
+    images = np.zeros((nrof_samples, image_size, image_size, 3))
+    for i in range(nrof_samples):
+        img = misc.imread(image_paths[i])
+        if img.ndim == 2:
+            img = to_rgb(img)
+        if do_prewhiten:
+            img = prewhiten(img)
+        img = crop(img, do_random_crop, image_size)
+        img = flip(img, do_random_flip)
+        images[i,:,:,:] = img
+    return images
+
+def get_label_batch(label_data, batch_size, batch_index):
+    nrof_examples = np.size(label_data, 0)
+    j = batch_index*batch_size % nrof_examples
+    if j+batch_size<=nrof_examples:
+        batch = label_data[j:j+batch_size]
+    else:
+        x1 = label_data[j:nrof_examples]
+        x2 = label_data[0:nrof_examples-j]
+        batch = np.vstack([x1,x2])
+    batch_int = batch.astype(np.int64)
+    return batch_int
+
+def get_batch(image_data, batch_size, batch_index):
+    nrof_examples = np.size(image_data, 0)
+    j = batch_index*batch_size % nrof_examples
+    if j+batch_size<=nrof_examples:
+        batch = image_data[j:j+batch_size,:,:,:]
+    else:
+        x1 = image_data[j:nrof_examples,:,:,:]
+        x2 = image_data[0:nrof_examples-j,:,:,:]
+        batch = np.vstack([x1,x2])
+    batch_float = batch.astype(np.float32)
+    return batch_float
+
+def get_triplet_batch(triplets, batch_index, batch_size):
+    ax, px, nx = triplets
+    a = get_batch(ax, int(batch_size/3), batch_index)
+    p = get_batch(px, int(batch_size/3), batch_index)
+    n = get_batch(nx, int(batch_size/3), batch_index)
+    batch = np.vstack([a, p, n])
+    return batch
+
+def get_learning_rate_from_file(filename, epoch):
+    with open(filename, 'r') as f:
+        for line in f.readlines():
+            line = line.split('#', 1)[0]
+            if line:
+                par = line.strip().split(':')
+                e = int(par[0])
+                lr = float(par[1])
+                if e <= epoch:
+                    learning_rate = lr
+                else:
+                    return learning_rate
+
+class ImageClass():
+    "Stores the paths to images for a given class"
+    def __init__(self, name, image_paths):
+        self.name = name
+        self.image_paths = image_paths
+  
+    def __str__(self):
+        return self.name + ', ' + str(len(self.image_paths)) + ' images'
+  
+    def __len__(self):
+        return len(self.image_paths)
+  
+def get_dataset(paths, has_class_directories=True):
+    dataset = []
+    for path in paths.split(':'):
+        path_exp = os.path.expanduser(path)
+        classes = os.listdir(path_exp)
+        classes.sort()
+        nrof_classes = len(classes)
+        for i in range(nrof_classes):
+            class_name = classes[i]
+            facedir = os.path.join(path_exp, class_name)
+            image_paths = get_image_paths(facedir)
+            dataset.append(ImageClass(class_name, image_paths))
+  
+    return dataset
+
+def get_image_paths(facedir):
+    image_paths = []
+    if os.path.isdir(facedir):
+        images = os.listdir(facedir)
+        image_paths = [os.path.join(facedir,img) for img in images]
+    return image_paths
+  
+def split_dataset(dataset, split_ratio, mode):
+    if mode=='SPLIT_CLASSES':
+        nrof_classes = len(dataset)
+        class_indices = np.arange(nrof_classes)
+        np.random.shuffle(class_indices)
+        split = int(round(nrof_classes*split_ratio))
+        train_set = [dataset[i] for i in class_indices[0:split]]
+        test_set = [dataset[i] for i in class_indices[split:-1]]
+    elif mode=='SPLIT_IMAGES':
+        train_set = []
+        test_set = []
+        min_nrof_images = 2
+        for cls in dataset:
+            paths = cls.image_paths
+            np.random.shuffle(paths)
+            split = int(round(len(paths)*split_ratio))
+            if split<min_nrof_images:
+                continue  # Not enough images for test set. Skip class...
+            train_set.append(ImageClass(cls.name, paths[0:split]))
+            test_set.append(ImageClass(cls.name, paths[split:-1]))
+    else:
+        raise ValueError('Invalid train/test split mode "%s"' % mode)
+    return train_set, test_set
+
+def load_model(model):
+    # Check if the model is a model directory (containing a metagraph and a checkpoint file)
+    #  or if it is a protobuf file with a frozen graph
+    model_exp = os.path.expanduser(model)
+    if (os.path.isfile(model_exp)):
+        print('Model filename: %s' % model_exp)
+        with gfile.FastGFile(model_exp,'rb') as f:
+            graph_def = tf.GraphDef()
+            graph_def.ParseFromString(f.read())
+            tf.import_graph_def(graph_def, name='')
+    else:
+        print('Model directory: %s' % model_exp)
+        meta_file, ckpt_file = get_model_filenames(model_exp)
+        
+        print('Metagraph file: %s' % meta_file)
+        print('Checkpoint file: %s' % ckpt_file)
+      
+        saver = tf.train.import_meta_graph(os.path.join(model_exp, meta_file))
+        saver.restore(tf.get_default_session(), os.path.join(model_exp, ckpt_file))
+    
+def get_model_filenames(model_dir):
+    files = os.listdir(model_dir)
+    meta_files = [s for s in files if s.endswith('.meta')]
+    if len(meta_files)==0:
+        raise ValueError('No meta file found in the model directory (%s)' % model_dir)
+    elif len(meta_files)>1:
+        raise ValueError('There should not be more than one meta file in the model directory (%s)' % model_dir)
+    meta_file = meta_files[0]
+    meta_files = [s for s in files if '.ckpt' in s]
+    max_step = -1
+    for f in files:
+        step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f)
+        if step_str is not None and len(step_str.groups())>=2:
+            step = int(step_str.groups()[1])
+            if step > max_step:
+                max_step = step
+                ckpt_file = step_str.groups()[0]
+    return meta_file, ckpt_file
+
+def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10):
+    assert(embeddings1.shape[0] == embeddings2.shape[0])
+    assert(embeddings1.shape[1] == embeddings2.shape[1])
+    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
+    nrof_thresholds = len(thresholds)
+    k_fold = KFold(n_splits=nrof_folds, shuffle=False)
+    
+    tprs = np.zeros((nrof_folds,nrof_thresholds))
+    fprs = np.zeros((nrof_folds,nrof_thresholds))
+    accuracy = np.zeros((nrof_folds))
+    
+    diff = np.subtract(embeddings1, embeddings2)
+    dist = np.sum(np.square(diff),1)
+    indices = np.arange(nrof_pairs)
+    
+    for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
+        
+        # Find the best threshold for the fold
+        acc_train = np.zeros((nrof_thresholds))
+        for threshold_idx, threshold in enumerate(thresholds):
+            _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
+        best_threshold_index = np.argmax(acc_train)
+        for threshold_idx, threshold in enumerate(thresholds):
+            tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])
+        _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
+          
+    tpr = np.mean(tprs,0)
+    fpr = np.mean(fprs,0)
+    return tpr, fpr, accuracy
+
+def calculate_accuracy(threshold, dist, actual_issame):
+    predict_issame = np.less(dist, threshold)
+    tp = np.sum(np.logical_and(predict_issame, actual_issame))
+    fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
+    tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))
+    fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
+  
+    tpr = 0 if (tp+fn==0) else float(tp) / float(tp+fn)
+    fpr = 0 if (fp+tn==0) else float(fp) / float(fp+tn)
+    acc = float(tp+tn)/dist.size
+    return tpr, fpr, acc
+
+
+  
+def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10):
+    assert(embeddings1.shape[0] == embeddings2.shape[0])
+    assert(embeddings1.shape[1] == embeddings2.shape[1])
+    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
+    nrof_thresholds = len(thresholds)
+    k_fold = KFold(n_splits=nrof_folds, shuffle=False)
+    
+    val = np.zeros(nrof_folds)
+    far = np.zeros(nrof_folds)
+    
+    diff = np.subtract(embeddings1, embeddings2)
+    dist = np.sum(np.square(diff),1)
+    indices = np.arange(nrof_pairs)
+    
+    for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
+      
+        # Find the threshold that gives FAR = far_target
+        far_train = np.zeros(nrof_thresholds)
+        for threshold_idx, threshold in enumerate(thresholds):
+            _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
+        if np.max(far_train)>=far_target:
+            f = interpolate.interp1d(far_train, thresholds, kind='slinear')
+            threshold = f(far_target)
+        else:
+            threshold = 0.0
+    
+        val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])
+  
+    val_mean = np.mean(val)
+    far_mean = np.mean(far)
+    val_std = np.std(val)
+    return val_mean, val_std, far_mean
+
+
+def calculate_val_far(threshold, dist, actual_issame):
+    predict_issame = np.less(dist, threshold)
+    true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
+    false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
+    n_same = np.sum(actual_issame)
+    n_diff = np.sum(np.logical_not(actual_issame))
+    val = float(true_accept) / float(n_same)
+    far = float(false_accept) / float(n_diff)
+    return val, far
+
+def store_revision_info(src_path, output_dir, arg_string):
+  
+    # Get git hash
+    gitproc = Popen(['git', 'rev-parse', 'HEAD'], stdout = PIPE, cwd=src_path)
+    (stdout, _) = gitproc.communicate()
+    git_hash = stdout.strip()
+  
+    # Get local changes
+    gitproc = Popen(['git', 'diff', 'HEAD'], stdout = PIPE, cwd=src_path)
+    (stdout, _) = gitproc.communicate()
+    git_diff = stdout.strip()
+    
+    # Store a text file in the log directory
+    rev_info_filename = os.path.join(output_dir, 'revision_info.txt')
+    with open(rev_info_filename, "w") as text_file:
+        text_file.write('arguments: %s\n--------------------\n' % arg_string)
+        text_file.write('git hash: %s\n--------------------\n' % git_hash)
+        text_file.write('%s' % git_diff)
+
+def list_variables(filename):
+    reader = training.NewCheckpointReader(filename)
+    variable_map = reader.get_variable_to_shape_map()
+    names = sorted(variable_map.keys())
+    return names
+
+def put_images_on_grid(images, shape=(16,8)):
+    nrof_images = images.shape[0]
+    img_size = images.shape[1]
+    bw = 3
+    img = np.zeros((shape[1]*(img_size+bw)+bw, shape[0]*(img_size+bw)+bw, 3), np.float32)
+    for i in range(shape[1]):
+        x_start = i*(img_size+bw)+bw
+        for j in range(shape[0]):
+            img_index = i*shape[0]+j
+            if img_index>=nrof_images:
+                break
+            y_start = j*(img_size+bw)+bw
+            img[x_start:x_start+img_size, y_start:y_start+img_size, :] = images[img_index, :, :, :]
+        if img_index>=nrof_images:
+            break
+    return img
+
+def write_arguments_to_file(args, filename):
+    with open(filename, 'w') as f:
+        for key, value in iteritems(vars(args)):
+            f.write('%s: %s\n' % (key, str(value)))

BIN
src/facenet.pyc