Monday, September 12, 2011

2D/3D estimation using solvePnP in opencv (NOT SOLVED)

In opencv "solvePnP" is used to find known points on a known 3D object. doing so the objects orientation relative to the camera coordinate system can be found.
the function is equivalent to finding the extrinsic camera parameters. which makes me believe its more for planar objects. need to do a few more experiments to find out why.

im using code from:
http://www.morethantechnical.com/2010/03/19/quick-and-easy-head-pose-estimation-with-opencv-w-code/  




#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/calib3d/calib3d.hpp>
#include <opencv2/highgui/highgui.hpp>


#include <iostream>
#include <string>


using namespace cv;

#include <vector>

using namespace std;

#include <GL/gl.h>
#include <GL/glu.h>
#include <glut.h>

void loadNext();
void loadWithPoints(Mat& ip, Mat& img);

const GLfloat light_ambient[]  = { 0.0f, 0.0f, 0.0f, 1.0f };
const GLfloat light_diffuse[]  = { 1.0f, 1.0f, 1.0f, 1.0f };
const GLfloat light_specular[] = { 1.0f, 1.0f, 1.0f, 1.0f };
const GLfloat light_position[] = { 2.0f, 5.0f, 5.0f, 0.0f };

const GLfloat mat_ambient[]    = { 0.7f, 0.7f, 0.7f, 1.0f };
const GLfloat mat_diffuse[]    = { 0.8f, 0.8f, 0.8f, 1.0f };
const GLfloat mat_specular[]   = { 1.0f, 1.0f, 1.0f, 1.0f };
const GLfloat high_shininess[] = { 100.0f };

double rot[9] = {0};
Vec3d eav;
GLuint textureID;
Mat backPxls;
vector<double> rv(3), tv(3);
Mat rvec(rv),tvec(tv);
Mat camMatrix;

void resize(int width, int height)
{
    const float ar = (float) width / (float) height;

    glViewport(0, 0, width, height);

    glMatrixMode(GL_PROJECTION);
    glLoadIdentity();
    gluPerspective(40,1.0,0.01,1000.0);

    glMatrixMode(GL_MODELVIEW);
    glLoadIdentity() ;
}

int __w=250,__h=250;

void key(unsigned char key, int x, int y)
{
    //static int counter = 0;

    switch (key)
    {
    case 27 :
    case 'Q':
    case 'q':
        break;
    case ' ':
        loadNext();

        glBindTexture(GL_TEXTURE_2D, textureID);//This binds the texture to a texture target
        glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_LINEAR);//set our filter
        glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_LINEAR);    //set our filter
        glTexImage2D(GL_TEXTURE_2D, 0, 3, backPxls.cols, backPxls.rows, 0, GL_RGB, GL_UNSIGNED_BYTE, backPxls.data);

        break;
    default:
        break;
    }

    glutPostRedisplay();
}

void idle(void)
{
    glutPostRedisplay();
}



void myGLinit() {
    glClearColor(1,1,1,1);

    glShadeModel(GL_SMOOTH);

    glEnable(GL_DEPTH_TEST);
    glDepthFunc(GL_LEQUAL);

    glEnable(GL_LIGHT0);
    glEnable(GL_NORMALIZE);
    glEnable(GL_COLOR_MATERIAL);
    glColorMaterial ( GL_FRONT, GL_AMBIENT_AND_DIFFUSE );

    glLightfv(GL_LIGHT0, GL_AMBIENT,  light_ambient);
    glLightfv(GL_LIGHT0, GL_DIFFUSE,  light_diffuse);
    glLightfv(GL_LIGHT0, GL_SPECULAR, light_specular);
    glLightfv(GL_LIGHT0, GL_POSITION, light_position);

    glMaterialfv(GL_FRONT, GL_AMBIENT,   mat_ambient);
    glMaterialfv(GL_FRONT, GL_DIFFUSE,   mat_diffuse);
    glMaterialfv(GL_FRONT, GL_SPECULAR,  mat_specular);
    glMaterialfv(GL_FRONT, GL_SHININESS, high_shininess);

    glEnable(GL_LIGHTING);

    glGenTextures(1, &textureID);
}



void display(void)
{
    glClearColor(1.0f, 1.0f, 1.0f, 0.5f);
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);    // Clear Screen And Depth Buffer

    glMatrixMode(GL_MODELVIEW);
    glLoadIdentity();

    gluLookAt(0,0,0,0,0,1,0,1,0);

    //----------Axes
    glPushMatrix();
    glTranslated(0,0,5);

    glPushMatrix();
    double _d[16] = {    rot[0],rot[1],rot[2],0,
                        rot[3],rot[4],rot[5],0,
                        rot[6],rot[7],rot[8],0,
                        0,       0,      0        ,1};
    glMultMatrixd(_d);
    glRotated(180,1,0,0);

    //Z = red
    glPushMatrix();
    glRotated(180,0,1,0);
    glColor3d(1,0,0);
    glutSolidCone(0.05,1,15,20);
    glTranslated(0,0,1);
    glScaled(.1,.1,.1);
    glutSolidTetrahedron();
    glPopMatrix();

    //Y = green
    glPushMatrix();
    glRotated(-90,1,0,0);
    glColor3d(0,1,0);
    glutSolidCone(0.05,1,15,20);
    glTranslated(0,0,1);
    glScaled(.1,.1,.1);
    glutSolidTetrahedron();
    glPopMatrix();

    //X = blue
    glPushMatrix();
    glRotated(-90,0,1,0);
    glColor3d(0,0,1);
    glutSolidCone(0.05,1,15,20);
   
    glTranslated(0,0,1);
    glScaled(.1,.1,.1);
    glutSolidTetrahedron();
    glPopMatrix();

    glPopMatrix();
    glPopMatrix();
    //----------End axes

    glutSwapBuffers();
}

int start_opengl_with_stereo(int argc,char** argv) {
    glutInitWindowSize(250,250);
    glutInitWindowPosition(40,40);
    glutInit(&argc, argv);
    glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH | GLUT_MULTISAMPLE);
    glutCreateWindow("3D points coordinate");

    myGLinit();

    glutReshapeFunc(resize);
    glutDisplayFunc(display);
    glutKeyboardFunc(key);
    glutIdleFunc(idle);

    glutMainLoop();

    return 1;
}

Mat op;

void loadNext() {
    vector<Point2f > points;

float x,y;
 
  x=282;y=274;
  points.push_back(cv::Point2f(x,y));
  x=397;y=227;
  points.push_back(cv::Point2f(x,y));
  x=577;y=271;
  points.push_back(cv::Point2f(x,y));
  x=462;y=318;
  points.push_back(cv::Point2f(x,y));
  x=270;y=479;
  points.push_back(cv::Point2f(x,y));
  x=450;y=523;
  points.push_back(cv::Point2f(x,y));
  x=566;y=475;
  points.push_back(cv::Point2f(x,y));

    Mat ip(points);

    Mat img =  Mat::zeros( 800, 600, CV_8UC3 );
    for(unsigned int i = 0; i < points.size(); ++i)
    {
    std::cout << points[i] << std::endl;
    cv::circle(img,points[i],2,cv::Scalar(0, 0, 255, 0),1,8,0);
    }
    loadWithPoints(ip,img);

}

void loadWithPoints(Mat& ip, Mat& img) {
    double _dc[] = {0,0,0,0};
    solvePnP(op,ip,camMatrix,Mat(1,4,CV_64FC1,_dc),rvec,tvec,true);

    Mat rotM(3,3,CV_64FC1,rot);
    Rodrigues(rvec,rotM);
    double* _r = rotM.ptr<double>();

    Mat tmp,tmp1,tmp2,tmp3,tmp4,tmp5;
    double _pm[12] = {_r[0],_r[1],_r[2],0,
                      _r[3],_r[4],_r[5],0,
                      _r[6],_r[7],_r[8],0};
    decomposeProjectionMatrix(Mat(3,4,CV_64FC1,_pm),tmp,tmp1,tmp2,tmp3,tmp4,tmp5,eav);   
    imshow("tmp",img);
}


int main(int argc, char** argv)
{

    vector<Point3f > modelPoints;
    float x,y,z;
 
  x=.5;y=.5;z=-.5;
  modelPoints.push_back(cv::Point3f(x,y,z));
  x=.5;y=.5;z=.5;
  modelPoints.push_back(cv::Point3f(x,y,z));
  x=-.5;y=.5;z=.5;
  modelPoints.push_back(cv::Point3f(x,y,z));
  x=-.5;y=.5;z=-.5;
  modelPoints.push_back(cv::Point3f(x,y,z));
  x=.5;y=-.5;z=-.5;
  modelPoints.push_back(cv::Point3f(x,y,z));
  x=-.5;y=-.5;z=-.5;
  modelPoints.push_back(cv::Point3f(x,y,z));
  x=-.5;y=-.5;z=.5;
  modelPoints.push_back(cv::Point3f(x,y,z));


    op = Mat(modelPoints);

    rvec = Mat(rv);
    double _d[9] = {1,0,0,
                    0,-1,0,
                    0,0,-1};
    Rodrigues(Mat(3,3,CV_64FC1,_d),rvec);
    tv[0]=0;tv[1]=0;tv[2]=1;
    tvec = Mat(tv);
    double _cm[9] = { 40, 0, 400,
                      0, 40, 500,
                      0,  0,   40 }; //caliberation matrix PROBLEM!?
    camMatrix = Mat(3,3,CV_64FC1,_cm);

    namedWindow("tmp",1);
    loadNext();

    start_opengl_with_stereo(argc,argv);

    return 0;
}

3 comments:

  1. Hello,

    In my project i am required to use multiple Asus Xtion Pro live cameras in a fixed configuration inside a room to capture dynamic scenes. First step is to of course calibrate these cameras with respect to each other. I am using the stereo calibration functionality provided by OpenCV for this purpose calibrating each pair at a time first and then will move towards ICP registration both local and global. Since i have 3D data from both cameras i thought that i could also use the solvePnp() function provided by OpenCV with 2D points from one camera and 3D points from another camera as input and the R and T between them as output same as StereoCalibrate() function.

    The question i have is regarding the arrangement of the input data and interpretation of the resulting output. From my understanding StereoCalibrate() provides an R and T which transforms the points in camera 2's coordinate system to camera 1's coordinate system and it is not dependent on which camera is the left camera and which camera is the right camera. I would like to confirm this?

    Secondly from what i have studied solvePnP() transforms the points in 3D from object's coordinate system to points in camera's 2D coordinate system. Therefore if StereoCalibrate() gives R and T which transforms points from Camera 2 to Camera 1 then to get equivalent results from solvePnP() we have to give it as input 3D points from the second camera and corresponding 2D points from the first camera upto my understanding. I would really like if someone could confirm this or point out a mistake in it?

    Best regards,
    Hassan

    ReplyDelete
  2. Hi Hassan,

    i got the same problem you had. I am also using two Kinects in my project and need to know the absolute pose of one kinect with respect to the other. so, did you solve it using SolvePnP()? the point you were asking above correct? or how did you solve this problem. Could you please let me know.

    Thanks,
    ndona.

    ReplyDelete
  3. double _cm[9] = { 40, 0, 400,
    0, 40, 500,
    0, 0, 1.0 }; //WRONG last element of matrix!!!!

    ReplyDelete