Tuesday, May 10, 2011

Head Pose Estimation by using POSIT in OpenCV

I want to use OpenCV functions to do "Head Pose Estimation", I found an interesting post by "Roy" at http://www.morethantechnical.com/2010/03/19/quick-and-easy-head-pose-estimation-with-opencv-w-code.
In his post, OpenCV solvePnP was used in order to calculate fundamental matrix that can be used to map 3D points in model to 2D point in images. But I found OpenCV also provides POSIT implementation and very nice POSIT tutorial at http://opencv.willowgarage.com/wiki/Posit. So I decide to use "Roy" data and apply to POSIT function of OpenCV.

#include <cxcore.h>
#include <cv.h>
#include <highgui.h>


using namespace std;
using namespace cv;

#define FOCAL_LENGTH 1000
#define CUBE_SIZE 10

static const char* IMAGE_DATA_PATH = "/Users/mhrinc/Desktop/image_data/";
#define IMAGE_EXTENSION ".jpeg"
#define DATA_EXTENSION ".txt"

std::vector<CvPoint3D32f> modelPoints;
std::vector<CvPoint3D32f> glassesPoints;

void getImagePath(char* result,const char* imageFileName) {
    sprintf(result,"%s%s%s", IMAGE_DATA_PATH, imageFileName,IMAGE_EXTENSION);
}
void getDataPath(char* result,const char* dataFileName) {
    sprintf(result,"%s%s%s", IMAGE_DATA_PATH, dataFileName,DATA_EXTENSION);
}

vector<const char* > listOfImages() {
    vector<const char* > arrayOfImages;
    //Man
    
    arrayOfImages.push_back("man/barack_obama_1");
    arrayOfImages.push_back("man/barack_obama_2");
    arrayOfImages.push_back("man/bill-gates_1");
    arrayOfImages.push_back("man/bill-gates_2");
    arrayOfImages.push_back("man/david_beckham_1");
    arrayOfImages.push_back("man/david_beckham_2");
    arrayOfImages.push_back("man/nguyen_tan_dung_1");
    arrayOfImages.push_back("man/nguyen_tan_dung_2");
    arrayOfImages.push_back("man/bill_clinton_1");
    arrayOfImages.push_back("man/bill_clinton_2");
    //Woman
    
    arrayOfImages.push_back("woman/britney_spears_1");
    arrayOfImages.push_back("woman/cheryl_cole_2");
    arrayOfImages.push_back("woman/jang_nara_1");
    arrayOfImages.push_back("woman/kim_tae_hee_2");
    arrayOfImages.push_back("woman/britney_spears_2");
    arrayOfImages.push_back("woman/harry_clinton_1");
    arrayOfImages.push_back("woman/jang_nara_2");
    arrayOfImages.push_back("woman/michelle_obama_1");
    arrayOfImages.push_back("woman/cheryl_cole_1");
    arrayOfImages.push_back("woman/harry_clinton_2");
    arrayOfImages.push_back("woman/kim_tae_hee_1");
    arrayOfImages.push_back("woman/michelle_obama_2");
    arrayOfImages.push_back("woman/Angelina_Jolie_0002");
    arrayOfImages.push_back("woman/Angelina_Jolie_0003");
    arrayOfImages.push_back("woman/Angelina_Jolie_0004");
    arrayOfImages.push_back("woman/Angelina_Jolie_0005");
    arrayOfImages.push_back("woman/Angelina_Jolie_0006");
    arrayOfImages.push_back("woman/Angelina_Jolie_0007");
    arrayOfImages.push_back("woman/Angelina_Jolie_0008");
    
    return arrayOfImages;
}

void loadNext(CvPOSITObject* positObject, const char* imageFileName) {
    int _w, _h;
    char buf[256] = {0};
    getDataPath(&buf[0], imageFileName);
    cout << "Data:"<< buf << endl;
    vector<CvPoint2D32f > points;
    FILE* f;
    f = fopen(buf,"r");
    fscanf(f,"%i",&_w);
    fscanf(f,"%i",&_h);
    cout << "widht:"<<_w<<",height:"<<_h<<endl;
    float _width = _w/2.0;
    float _height = _h/2.0;
    for(int i=0;i<7;i++) {
        int x,y;
        fscanf(f,"%d",&x);
        fscanf(f,"%d",&y);
        cout << x<<","<<y<<",";
        points.push_back(cvPoint2D32f(-_width+x,_height - y));
    }
    cout << endl;
    fclose(f);
    
    getImagePath(&buf[0], imageFileName);
    cout << "Image:"<< buf << endl;
    
    Mat img = imread(buf);
    for(unsigned int i=0;i<points.size();i++) {
        CvPoint2D32f p = cvPoint2D32f(points[i].x+_width,_height-points[i].y);
        circle(img,p,2,Scalar(255,0,255),CV_FILLED);
    }

    //Estimate the pose
    CvMatr32f rotation_matrix = new float[9];
    CvVect32f translation_vector = new float[3];    
    CvTermCriteria criteria = cvTermCriteria(CV_TERMCRIT_EPS | CV_TERMCRIT_ITER, 100, 1.0e-4f);
    cvPOSIT( positObject, &points[0], FOCAL_LENGTH, criteria, rotation_matrix, translation_vector );

    cout << "\n-.- SOURCE IMAGE POINTS -.-\n";
    for ( size_t p=0; p<points.size(); p++ )
        cout << points[p].x << ", " << points[p].y << " \n";
    
    cout << "\n-.- ESTIMATED ROTATION\n";
    for ( size_t p=0; p<3; p++ )
        cout << rotation_matrix[p*3] << " | " << rotation_matrix[p*3+1] << " | " << rotation_matrix[p*3+2] << "\n";
    
    cout << "\n-.- ESTIMATED TRANSLATION\n";
    cout << translation_vector[0] << " | " << translation_vector[1] << " | " << translation_vector[2] << "\n";
    
    //Project the model points with the estimated pose
    std::vector<CvPoint2D32f> projectedPoints;
    for ( size_t  p=0; p<glassesPoints.size(); p++ )
    {
        CvPoint3D32f point3D;
        point3D.x = rotation_matrix[0] * glassesPoints[p].x + 
        rotation_matrix[1] * glassesPoints[p].y +
        rotation_matrix[2] * glassesPoints[p].z +
        translation_vector[0];
        point3D.y = rotation_matrix[3] * glassesPoints[p].x + 
        rotation_matrix[4] * glassesPoints[p].y +
        rotation_matrix[5] * glassesPoints[p].z +
        translation_vector[1];
        point3D.z = rotation_matrix[6] * glassesPoints[p].x + 
        rotation_matrix[7] * glassesPoints[p].y +
        rotation_matrix[8] * glassesPoints[p].z +
        translation_vector[2];
        CvPoint2D32f point2D = cvPoint2D32f( 0.0, 0.0 );
        if ( point3D.z != 0 )
        {
            point2D.x = FOCAL_LENGTH * point3D.x / point3D.z; 
            point2D.y = FOCAL_LENGTH * point3D.y / point3D.z;    
        }
        projectedPoints.push_back( point2D );
    }
    cout << "\n-.- PROJECTED POINTS -.-\n";
    for ( size_t p=0; p<projectedPoints.size(); p++ )
        cout << projectedPoints[p].x << ", " << projectedPoints[p].y << " \n";
    for(unsigned int i=0;i<projectedPoints.size();i++) {
        CvPoint2D32f p = cvPoint2D32f(projectedPoints[i].x+_width,_height-projectedPoints[i].y);
        circle(img,p,2,Scalar(255,0,0),CV_FILLED);
    }
    
    delete rotation_matrix;
    delete translation_vector;
    imshow("1", img);
    cvWaitKey(0);
}    

int main(int argc, char** argv)
{
    cout << "OpenCV POSIT tutorial" << endl;
    cout << "by Javier Barandiaran(jbarandiaran@gmail.com)" << endl;
    
    //Create the model pointss
    float xOffset = 36.9522f;
    float yOffset = -39.3518f;
    float zOffset = -47.1217f;
    modelPoints.push_back(cvPoint3D32f(xOffset+-36.9522f,yOffset+39.3518f,zOffset+47.1217f));    //l eye
    modelPoints.push_back(cvPoint3D32f(xOffset+35.446f,yOffset+38.4345f,zOffset+47.6468f));        //r eye
    modelPoints.push_back(cvPoint3D32f(xOffset+-0.0697709f,yOffset+18.6015f,zOffset+87.9695f)); //nose
    modelPoints.push_back(cvPoint3D32f(xOffset+-27.6439f,yOffset+-29.6388f,zOffset+73.8551f));    //l mouth
    modelPoints.push_back(cvPoint3D32f(xOffset+28.7793f,yOffset+-29.2935f,zOffset+72.7329f));    //r mouth
    
    float xx,yy;
    xx = 20, yy = 15;
    glassesPoints.push_back(cvPoint3D32f(xOffset+-36.9522f - xx,yOffset+39.3518f + yy,zOffset+47.1217f));    //l eye
    glassesPoints.push_back(cvPoint3D32f(xOffset+-36.9522f - xx,yOffset+39.3518f - yy,zOffset+47.1217f));    //l eye
    glassesPoints.push_back(cvPoint3D32f(xOffset+35.446f + xx,yOffset+38.4345f + yy,zOffset+47.6468f));            //r eye
    glassesPoints.push_back(cvPoint3D32f(xOffset+35.446f + xx,yOffset+38.4345f - yy,zOffset+47.6468f));            //r eye
//    modelPoints.push_back(Point3f(-87.2155f,15.5829f,-45.1352f));    //l ear
//    modelPoints.push_back(Point3f(85.8383f,14.9023f,-46.3169f));    //r ear
    cout << "\n-.- SOURCE MODEL POINTS -.-\n";
    for ( size_t  p=0; p<modelPoints.size(); p++ )
        cout << modelPoints[p].x << ", " << modelPoints[p].y << ", " << modelPoints[p].z << "\n";
    
    //Create the POSIT object with the model points
    CvPOSITObject* positObject;
    positObject = cvCreatePOSITObject( &modelPoints[0], (int)modelPoints.size() );
    
    //Do processing
    int imageIndex = 0;
    vector<const char* > images = listOfImages();
    while (imageIndex < images.size()) {
        const char* imageFileName = images.at(imageIndex);
        loadNext(positObject, imageFileName);
        imageIndex++;
    }

    cvReleasePOSITObject(&positObject);
    return 0;
}

6 comments:

  1. Nice post Mr. Xuvif!

    It is really hard to find good examples of the POSIT with openCV these days.

    I am working on a simple posit app. After reading your code I was so happy but got stuck now. How come my compiler does not recognize the CvMatr32f and CvVect32f types?

    I've got openCV 2.3 and Qt 4.8 doing good but cannot compile the code because of these 2 "alien-types" =]

    Is there any hope for me?

    Best regards,

    Arnaldo

    ReplyDelete
  2. Hi! I had exactly the same problem when I was trying to compile the POSIT example program: nehegl_glut (http://opencv.willowgarage.com/wiki/Posit?action=AttachFile&do=view&target=POSIT.rar)

    Finally I included the next headers:
    #include <opencv2/legacy/blobtrack.hpp>
    #include <opencv2/legacy/compat.hpp>
    #include <opencv2/legacy/legacy.hpp>
    #include <opencv2/legacy/streams.hpp>
    #include <opencv2/opencv.hpp>
    #include <opencv2/highgui/highgui.hpp>

    And commented out the next headers:
    //#include <cxcore.h>
    //#include <cv.h>

    Then it compiled nicely. The main trick was to link the legacy headers. I hope this will help you too.

    ReplyDelete
    Replies
    1. You can just use the new datatypes, and remove the deprecated ones:

      Mat rotation_matrix(3, 3, CV_32FC1);
      Mat translation_vector(1, 3, CV_32FC1); // 1 row, 3 columns
      CvTermCriteria criteria = cvTermCriteria(CV_TERMCRIT_EPS | CV_TERMCRIT_ITER, 100, 1.0e-4f);
      cvPOSIT(positObject.get(), &srcImagePoints[0], FOCAL_LENGTH, criteria, rotation_matrix.ptr(0), translation_vector.ptr(0));

      That way you have save, and clean modern C++ code.

      Delete
  3. Hi there,
    I´m trying POSIT too as part of a larger project. But I don´t understand what to do after I have the points of interest in the image to be analyzed. The image is acquired from a camera that can provide me the depth so X,Y and Z values are available.
    What to I assign to the 2D array, the X Y coordinates? and for the positobject, do I assign the same X Y coordinates BUT including Z?

    Thanks in advance!

    ReplyDelete