Skip to main content

2D/3D estimation using solvePnP in opencv (NOT SOLVED)

In opencv "solvePnP" is used to find known points on a known 3D object. doing so the objects orientation relative to the camera coordinate system can be found.
the function is equivalent to finding the extrinsic camera parameters. which makes me believe its more for planar objects. need to do a few more experiments to find out why.

im using code from:
http://www.morethantechnical.com/2010/03/19/quick-and-easy-head-pose-estimation-with-opencv-w-code/  




#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/calib3d/calib3d.hpp>
#include <opencv2/highgui/highgui.hpp>


#include <iostream>
#include <string>


using namespace cv;

#include <vector>

using namespace std;

#include <GL/gl.h>
#include <GL/glu.h>
#include <glut.h>

void loadNext();
void loadWithPoints(Mat& ip, Mat& img);

const GLfloat light_ambient[]  = { 0.0f, 0.0f, 0.0f, 1.0f };
const GLfloat light_diffuse[]  = { 1.0f, 1.0f, 1.0f, 1.0f };
const GLfloat light_specular[] = { 1.0f, 1.0f, 1.0f, 1.0f };
const GLfloat light_position[] = { 2.0f, 5.0f, 5.0f, 0.0f };

const GLfloat mat_ambient[]    = { 0.7f, 0.7f, 0.7f, 1.0f };
const GLfloat mat_diffuse[]    = { 0.8f, 0.8f, 0.8f, 1.0f };
const GLfloat mat_specular[]   = { 1.0f, 1.0f, 1.0f, 1.0f };
const GLfloat high_shininess[] = { 100.0f };

double rot[9] = {0};
Vec3d eav;
GLuint textureID;
Mat backPxls;
vector<double> rv(3), tv(3);
Mat rvec(rv),tvec(tv);
Mat camMatrix;

void resize(int width, int height)
{
    const float ar = (float) width / (float) height;

    glViewport(0, 0, width, height);

    glMatrixMode(GL_PROJECTION);
    glLoadIdentity();
    gluPerspective(40,1.0,0.01,1000.0);

    glMatrixMode(GL_MODELVIEW);
    glLoadIdentity() ;
}

int __w=250,__h=250;

void key(unsigned char key, int x, int y)
{
    //static int counter = 0;

    switch (key)
    {
    case 27 :
    case 'Q':
    case 'q':
        break;
    case ' ':
        loadNext();

        glBindTexture(GL_TEXTURE_2D, textureID);//This binds the texture to a texture target
        glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_LINEAR);//set our filter
        glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_LINEAR);    //set our filter
        glTexImage2D(GL_TEXTURE_2D, 0, 3, backPxls.cols, backPxls.rows, 0, GL_RGB, GL_UNSIGNED_BYTE, backPxls.data);

        break;
    default:
        break;
    }

    glutPostRedisplay();
}

void idle(void)
{
    glutPostRedisplay();
}



void myGLinit() {
    glClearColor(1,1,1,1);

    glShadeModel(GL_SMOOTH);

    glEnable(GL_DEPTH_TEST);
    glDepthFunc(GL_LEQUAL);

    glEnable(GL_LIGHT0);
    glEnable(GL_NORMALIZE);
    glEnable(GL_COLOR_MATERIAL);
    glColorMaterial ( GL_FRONT, GL_AMBIENT_AND_DIFFUSE );

    glLightfv(GL_LIGHT0, GL_AMBIENT,  light_ambient);
    glLightfv(GL_LIGHT0, GL_DIFFUSE,  light_diffuse);
    glLightfv(GL_LIGHT0, GL_SPECULAR, light_specular);
    glLightfv(GL_LIGHT0, GL_POSITION, light_position);

    glMaterialfv(GL_FRONT, GL_AMBIENT,   mat_ambient);
    glMaterialfv(GL_FRONT, GL_DIFFUSE,   mat_diffuse);
    glMaterialfv(GL_FRONT, GL_SPECULAR,  mat_specular);
    glMaterialfv(GL_FRONT, GL_SHININESS, high_shininess);

    glEnable(GL_LIGHTING);

    glGenTextures(1, &textureID);
}



void display(void)
{
    glClearColor(1.0f, 1.0f, 1.0f, 0.5f);
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);    // Clear Screen And Depth Buffer

    glMatrixMode(GL_MODELVIEW);
    glLoadIdentity();

    gluLookAt(0,0,0,0,0,1,0,1,0);

    //----------Axes
    glPushMatrix();
    glTranslated(0,0,5);

    glPushMatrix();
    double _d[16] = {    rot[0],rot[1],rot[2],0,
                        rot[3],rot[4],rot[5],0,
                        rot[6],rot[7],rot[8],0,
                        0,       0,      0        ,1};
    glMultMatrixd(_d);
    glRotated(180,1,0,0);

    //Z = red
    glPushMatrix();
    glRotated(180,0,1,0);
    glColor3d(1,0,0);
    glutSolidCone(0.05,1,15,20);
    glTranslated(0,0,1);
    glScaled(.1,.1,.1);
    glutSolidTetrahedron();
    glPopMatrix();

    //Y = green
    glPushMatrix();
    glRotated(-90,1,0,0);
    glColor3d(0,1,0);
    glutSolidCone(0.05,1,15,20);
    glTranslated(0,0,1);
    glScaled(.1,.1,.1);
    glutSolidTetrahedron();
    glPopMatrix();

    //X = blue
    glPushMatrix();
    glRotated(-90,0,1,0);
    glColor3d(0,0,1);
    glutSolidCone(0.05,1,15,20);
   
    glTranslated(0,0,1);
    glScaled(.1,.1,.1);
    glutSolidTetrahedron();
    glPopMatrix();

    glPopMatrix();
    glPopMatrix();
    //----------End axes

    glutSwapBuffers();
}

int start_opengl_with_stereo(int argc,char** argv) {
    glutInitWindowSize(250,250);
    glutInitWindowPosition(40,40);
    glutInit(&argc, argv);
    glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH | GLUT_MULTISAMPLE);
    glutCreateWindow("3D points coordinate");

    myGLinit();

    glutReshapeFunc(resize);
    glutDisplayFunc(display);
    glutKeyboardFunc(key);
    glutIdleFunc(idle);

    glutMainLoop();

    return 1;
}

Mat op;

void loadNext() {
    vector<Point2f > points;

float x,y;
 
  x=282;y=274;
  points.push_back(cv::Point2f(x,y));
  x=397;y=227;
  points.push_back(cv::Point2f(x,y));
  x=577;y=271;
  points.push_back(cv::Point2f(x,y));
  x=462;y=318;
  points.push_back(cv::Point2f(x,y));
  x=270;y=479;
  points.push_back(cv::Point2f(x,y));
  x=450;y=523;
  points.push_back(cv::Point2f(x,y));
  x=566;y=475;
  points.push_back(cv::Point2f(x,y));

    Mat ip(points);

    Mat img =  Mat::zeros( 800, 600, CV_8UC3 );
    for(unsigned int i = 0; i < points.size(); ++i)
    {
    std::cout << points[i] << std::endl;
    cv::circle(img,points[i],2,cv::Scalar(0, 0, 255, 0),1,8,0);
    }
    loadWithPoints(ip,img);

}

void loadWithPoints(Mat& ip, Mat& img) {
    double _dc[] = {0,0,0,0};
    solvePnP(op,ip,camMatrix,Mat(1,4,CV_64FC1,_dc),rvec,tvec,true);

    Mat rotM(3,3,CV_64FC1,rot);
    Rodrigues(rvec,rotM);
    double* _r = rotM.ptr<double>();

    Mat tmp,tmp1,tmp2,tmp3,tmp4,tmp5;
    double _pm[12] = {_r[0],_r[1],_r[2],0,
                      _r[3],_r[4],_r[5],0,
                      _r[6],_r[7],_r[8],0};
    decomposeProjectionMatrix(Mat(3,4,CV_64FC1,_pm),tmp,tmp1,tmp2,tmp3,tmp4,tmp5,eav);   
    imshow("tmp",img);
}


int main(int argc, char** argv)
{

    vector<Point3f > modelPoints;
    float x,y,z;
 
  x=.5;y=.5;z=-.5;
  modelPoints.push_back(cv::Point3f(x,y,z));
  x=.5;y=.5;z=.5;
  modelPoints.push_back(cv::Point3f(x,y,z));
  x=-.5;y=.5;z=.5;
  modelPoints.push_back(cv::Point3f(x,y,z));
  x=-.5;y=.5;z=-.5;
  modelPoints.push_back(cv::Point3f(x,y,z));
  x=.5;y=-.5;z=-.5;
  modelPoints.push_back(cv::Point3f(x,y,z));
  x=-.5;y=-.5;z=-.5;
  modelPoints.push_back(cv::Point3f(x,y,z));
  x=-.5;y=-.5;z=.5;
  modelPoints.push_back(cv::Point3f(x,y,z));


    op = Mat(modelPoints);

    rvec = Mat(rv);
    double _d[9] = {1,0,0,
                    0,-1,0,
                    0,0,-1};
    Rodrigues(Mat(3,3,CV_64FC1,_d),rvec);
    tv[0]=0;tv[1]=0;tv[2]=1;
    tvec = Mat(tv);
    double _cm[9] = { 40, 0, 400,
                      0, 40, 500,
                      0,  0,   40 }; //caliberation matrix PROBLEM!?
    camMatrix = Mat(3,3,CV_64FC1,_cm);

    namedWindow("tmp",1);
    loadNext();

    start_opengl_with_stereo(argc,argv);

    return 0;
}

Comments

  1. Hello,

    In my project i am required to use multiple Asus Xtion Pro live cameras in a fixed configuration inside a room to capture dynamic scenes. First step is to of course calibrate these cameras with respect to each other. I am using the stereo calibration functionality provided by OpenCV for this purpose calibrating each pair at a time first and then will move towards ICP registration both local and global. Since i have 3D data from both cameras i thought that i could also use the solvePnp() function provided by OpenCV with 2D points from one camera and 3D points from another camera as input and the R and T between them as output same as StereoCalibrate() function.

    The question i have is regarding the arrangement of the input data and interpretation of the resulting output. From my understanding StereoCalibrate() provides an R and T which transforms the points in camera 2's coordinate system to camera 1's coordinate system and it is not dependent on which camera is the left camera and which camera is the right camera. I would like to confirm this?

    Secondly from what i have studied solvePnP() transforms the points in 3D from object's coordinate system to points in camera's 2D coordinate system. Therefore if StereoCalibrate() gives R and T which transforms points from Camera 2 to Camera 1 then to get equivalent results from solvePnP() we have to give it as input 3D points from the second camera and corresponding 2D points from the first camera upto my understanding. I would really like if someone could confirm this or point out a mistake in it?

    Best regards,
    Hassan

    ReplyDelete
  2. Hi Hassan,

    i got the same problem you had. I am also using two Kinects in my project and need to know the absolute pose of one kinect with respect to the other. so, did you solve it using SolvePnP()? the point you were asking above correct? or how did you solve this problem. Could you please let me know.

    Thanks,
    ndona.

    ReplyDelete
  3. double _cm[9] = { 40, 0, 400,
    0, 40, 500,
    0, 0, 1.0 }; //WRONG last element of matrix!!!!

    ReplyDelete

Post a Comment

Popular posts from this blog

Computing Entropy of an image (CORRECTED)

entropy is a measure of the uncertainty associated with a random variable. basically i want to get a single value representing the entropy of an image. 1. Assign 255 bins for the range of values between 0-255 2. separate the image into its 3 channels 3. compute histogram for each channel 4. normalize all 3 channels unifirmely 5. for each channel get the bin value (Hc) and use its absolute value (negative log is infinity) 6. compute Hc*log10(Hc) 7. add to entropy and continue with 5 until a single value converges 5. get the frequency of each channel - add all the values of the bin 6. for each bin get a probability - if bin 1 = 20 bin 2 = 30 then frequency is 50 and probability is 20/50 and 30/50 then compute using shannon formula  REFERENCE: http://people.revoledu.com/kardi/tutorial/DecisionTree/how-to-measure-impurity.htm class atsHistogram { public:     cv::Mat DrawHistogram(Mat src)     {      ...

Blob Detection, Connected Component (Pure Opencv)

Connected-component labeling (alternatively connected-component analysis, blob extraction, region labeling, blob discovery, or region extraction) is an algorithmic application of graph theory, where subsets of connected components are uniquely labeled based on a given heuristic. Connected-component labeling is not to be confused with segmentation. i got the initial code from this URL: http://nghiaho.com/?p=1102 However the code did not compile with my setup of OpenCV 2.2, im guessing it was an older version. so a refactored and corrected the errors to come up with this Class class atsBlobFinder     {     public:         atsBlobFinder()         {         }         ///Original Code by http://nghiaho.com/?p=1102         ///Changed and added commments. Removed Errors     ...

Region of interest selection ROI

#include <stdlib.h> #include <stdio.h> #include <math.h> #include <string.h> #include<opencv2\opencv.hpp> #include <opencv2\highgui\highgui.hpp> int main(int argc, char *argv[]) { CvCapture *capture = 0; IplImage *frame = 0; int key = 0; /* initialize camera */ capture = cvCaptureFromCAM( 0 ); /* always check */ if ( !capture ) { printf("Cannot open initialize webcam!\n" ); exit(0); } /* create a window for the video */ cvNamedWindow( "result", CV_WINDOW_AUTOSIZE ); while( key != 'q' ) { /* get a frame */ frame = cvQueryFrame( capture ); /* always check */ if( !frame ) break; /* sets the Region of Interest*/ cvSetImageROI(frame, cvRect(150, 50, 150, 250)); /* create destination image */ IplImage *img2 = cvCreateImage(cvGetSize(frame), frame->depth, frame->nChannels); /* * do the main processing with subimage here. * in this example, we simply invert the subimage ...