Skip to main content

2D/3D estimation using solvePnP in opencv (NOT SOLVED)

In opencv "solvePnP" is used to find known points on a known 3D object. doing so the objects orientation relative to the camera coordinate system can be found.
the function is equivalent to finding the extrinsic camera parameters. which makes me believe its more for planar objects. need to do a few more experiments to find out why.

im using code from:
http://www.morethantechnical.com/2010/03/19/quick-and-easy-head-pose-estimation-with-opencv-w-code/  




#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/calib3d/calib3d.hpp>
#include <opencv2/highgui/highgui.hpp>


#include <iostream>
#include <string>


using namespace cv;

#include <vector>

using namespace std;

#include <GL/gl.h>
#include <GL/glu.h>
#include <glut.h>

void loadNext();
void loadWithPoints(Mat& ip, Mat& img);

const GLfloat light_ambient[]  = { 0.0f, 0.0f, 0.0f, 1.0f };
const GLfloat light_diffuse[]  = { 1.0f, 1.0f, 1.0f, 1.0f };
const GLfloat light_specular[] = { 1.0f, 1.0f, 1.0f, 1.0f };
const GLfloat light_position[] = { 2.0f, 5.0f, 5.0f, 0.0f };

const GLfloat mat_ambient[]    = { 0.7f, 0.7f, 0.7f, 1.0f };
const GLfloat mat_diffuse[]    = { 0.8f, 0.8f, 0.8f, 1.0f };
const GLfloat mat_specular[]   = { 1.0f, 1.0f, 1.0f, 1.0f };
const GLfloat high_shininess[] = { 100.0f };

double rot[9] = {0};
Vec3d eav;
GLuint textureID;
Mat backPxls;
vector<double> rv(3), tv(3);
Mat rvec(rv),tvec(tv);
Mat camMatrix;

void resize(int width, int height)
{
    const float ar = (float) width / (float) height;

    glViewport(0, 0, width, height);

    glMatrixMode(GL_PROJECTION);
    glLoadIdentity();
    gluPerspective(40,1.0,0.01,1000.0);

    glMatrixMode(GL_MODELVIEW);
    glLoadIdentity() ;
}

int __w=250,__h=250;

void key(unsigned char key, int x, int y)
{
    //static int counter = 0;

    switch (key)
    {
    case 27 :
    case 'Q':
    case 'q':
        break;
    case ' ':
        loadNext();

        glBindTexture(GL_TEXTURE_2D, textureID);//This binds the texture to a texture target
        glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_LINEAR);//set our filter
        glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_LINEAR);    //set our filter
        glTexImage2D(GL_TEXTURE_2D, 0, 3, backPxls.cols, backPxls.rows, 0, GL_RGB, GL_UNSIGNED_BYTE, backPxls.data);

        break;
    default:
        break;
    }

    glutPostRedisplay();
}

void idle(void)
{
    glutPostRedisplay();
}



void myGLinit() {
    glClearColor(1,1,1,1);

    glShadeModel(GL_SMOOTH);

    glEnable(GL_DEPTH_TEST);
    glDepthFunc(GL_LEQUAL);

    glEnable(GL_LIGHT0);
    glEnable(GL_NORMALIZE);
    glEnable(GL_COLOR_MATERIAL);
    glColorMaterial ( GL_FRONT, GL_AMBIENT_AND_DIFFUSE );

    glLightfv(GL_LIGHT0, GL_AMBIENT,  light_ambient);
    glLightfv(GL_LIGHT0, GL_DIFFUSE,  light_diffuse);
    glLightfv(GL_LIGHT0, GL_SPECULAR, light_specular);
    glLightfv(GL_LIGHT0, GL_POSITION, light_position);

    glMaterialfv(GL_FRONT, GL_AMBIENT,   mat_ambient);
    glMaterialfv(GL_FRONT, GL_DIFFUSE,   mat_diffuse);
    glMaterialfv(GL_FRONT, GL_SPECULAR,  mat_specular);
    glMaterialfv(GL_FRONT, GL_SHININESS, high_shininess);

    glEnable(GL_LIGHTING);

    glGenTextures(1, &textureID);
}



void display(void)
{
    glClearColor(1.0f, 1.0f, 1.0f, 0.5f);
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);    // Clear Screen And Depth Buffer

    glMatrixMode(GL_MODELVIEW);
    glLoadIdentity();

    gluLookAt(0,0,0,0,0,1,0,1,0);

    //----------Axes
    glPushMatrix();
    glTranslated(0,0,5);

    glPushMatrix();
    double _d[16] = {    rot[0],rot[1],rot[2],0,
                        rot[3],rot[4],rot[5],0,
                        rot[6],rot[7],rot[8],0,
                        0,       0,      0        ,1};
    glMultMatrixd(_d);
    glRotated(180,1,0,0);

    //Z = red
    glPushMatrix();
    glRotated(180,0,1,0);
    glColor3d(1,0,0);
    glutSolidCone(0.05,1,15,20);
    glTranslated(0,0,1);
    glScaled(.1,.1,.1);
    glutSolidTetrahedron();
    glPopMatrix();

    //Y = green
    glPushMatrix();
    glRotated(-90,1,0,0);
    glColor3d(0,1,0);
    glutSolidCone(0.05,1,15,20);
    glTranslated(0,0,1);
    glScaled(.1,.1,.1);
    glutSolidTetrahedron();
    glPopMatrix();

    //X = blue
    glPushMatrix();
    glRotated(-90,0,1,0);
    glColor3d(0,0,1);
    glutSolidCone(0.05,1,15,20);
   
    glTranslated(0,0,1);
    glScaled(.1,.1,.1);
    glutSolidTetrahedron();
    glPopMatrix();

    glPopMatrix();
    glPopMatrix();
    //----------End axes

    glutSwapBuffers();
}

int start_opengl_with_stereo(int argc,char** argv) {
    glutInitWindowSize(250,250);
    glutInitWindowPosition(40,40);
    glutInit(&argc, argv);
    glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH | GLUT_MULTISAMPLE);
    glutCreateWindow("3D points coordinate");

    myGLinit();

    glutReshapeFunc(resize);
    glutDisplayFunc(display);
    glutKeyboardFunc(key);
    glutIdleFunc(idle);

    glutMainLoop();

    return 1;
}

Mat op;

void loadNext() {
    vector<Point2f > points;

float x,y;
 
  x=282;y=274;
  points.push_back(cv::Point2f(x,y));
  x=397;y=227;
  points.push_back(cv::Point2f(x,y));
  x=577;y=271;
  points.push_back(cv::Point2f(x,y));
  x=462;y=318;
  points.push_back(cv::Point2f(x,y));
  x=270;y=479;
  points.push_back(cv::Point2f(x,y));
  x=450;y=523;
  points.push_back(cv::Point2f(x,y));
  x=566;y=475;
  points.push_back(cv::Point2f(x,y));

    Mat ip(points);

    Mat img =  Mat::zeros( 800, 600, CV_8UC3 );
    for(unsigned int i = 0; i < points.size(); ++i)
    {
    std::cout << points[i] << std::endl;
    cv::circle(img,points[i],2,cv::Scalar(0, 0, 255, 0),1,8,0);
    }
    loadWithPoints(ip,img);

}

void loadWithPoints(Mat& ip, Mat& img) {
    double _dc[] = {0,0,0,0};
    solvePnP(op,ip,camMatrix,Mat(1,4,CV_64FC1,_dc),rvec,tvec,true);

    Mat rotM(3,3,CV_64FC1,rot);
    Rodrigues(rvec,rotM);
    double* _r = rotM.ptr<double>();

    Mat tmp,tmp1,tmp2,tmp3,tmp4,tmp5;
    double _pm[12] = {_r[0],_r[1],_r[2],0,
                      _r[3],_r[4],_r[5],0,
                      _r[6],_r[7],_r[8],0};
    decomposeProjectionMatrix(Mat(3,4,CV_64FC1,_pm),tmp,tmp1,tmp2,tmp3,tmp4,tmp5,eav);   
    imshow("tmp",img);
}


int main(int argc, char** argv)
{

    vector<Point3f > modelPoints;
    float x,y,z;
 
  x=.5;y=.5;z=-.5;
  modelPoints.push_back(cv::Point3f(x,y,z));
  x=.5;y=.5;z=.5;
  modelPoints.push_back(cv::Point3f(x,y,z));
  x=-.5;y=.5;z=.5;
  modelPoints.push_back(cv::Point3f(x,y,z));
  x=-.5;y=.5;z=-.5;
  modelPoints.push_back(cv::Point3f(x,y,z));
  x=.5;y=-.5;z=-.5;
  modelPoints.push_back(cv::Point3f(x,y,z));
  x=-.5;y=-.5;z=-.5;
  modelPoints.push_back(cv::Point3f(x,y,z));
  x=-.5;y=-.5;z=.5;
  modelPoints.push_back(cv::Point3f(x,y,z));


    op = Mat(modelPoints);

    rvec = Mat(rv);
    double _d[9] = {1,0,0,
                    0,-1,0,
                    0,0,-1};
    Rodrigues(Mat(3,3,CV_64FC1,_d),rvec);
    tv[0]=0;tv[1]=0;tv[2]=1;
    tvec = Mat(tv);
    double _cm[9] = { 40, 0, 400,
                      0, 40, 500,
                      0,  0,   40 }; //caliberation matrix PROBLEM!?
    camMatrix = Mat(3,3,CV_64FC1,_cm);

    namedWindow("tmp",1);
    loadNext();

    start_opengl_with_stereo(argc,argv);

    return 0;
}

Comments

  1. Hello,

    In my project i am required to use multiple Asus Xtion Pro live cameras in a fixed configuration inside a room to capture dynamic scenes. First step is to of course calibrate these cameras with respect to each other. I am using the stereo calibration functionality provided by OpenCV for this purpose calibrating each pair at a time first and then will move towards ICP registration both local and global. Since i have 3D data from both cameras i thought that i could also use the solvePnp() function provided by OpenCV with 2D points from one camera and 3D points from another camera as input and the R and T between them as output same as StereoCalibrate() function.

    The question i have is regarding the arrangement of the input data and interpretation of the resulting output. From my understanding StereoCalibrate() provides an R and T which transforms the points in camera 2's coordinate system to camera 1's coordinate system and it is not dependent on which camera is the left camera and which camera is the right camera. I would like to confirm this?

    Secondly from what i have studied solvePnP() transforms the points in 3D from object's coordinate system to points in camera's 2D coordinate system. Therefore if StereoCalibrate() gives R and T which transforms points from Camera 2 to Camera 1 then to get equivalent results from solvePnP() we have to give it as input 3D points from the second camera and corresponding 2D points from the first camera upto my understanding. I would really like if someone could confirm this or point out a mistake in it?

    Best regards,
    Hassan

    ReplyDelete
  2. Hi Hassan,

    i got the same problem you had. I am also using two Kinects in my project and need to know the absolute pose of one kinect with respect to the other. so, did you solve it using SolvePnP()? the point you were asking above correct? or how did you solve this problem. Could you please let me know.

    Thanks,
    ndona.

    ReplyDelete
  3. double _cm[9] = { 40, 0, 400,
    0, 40, 500,
    0, 0, 1.0 }; //WRONG last element of matrix!!!!

    ReplyDelete

Post a Comment

Popular posts from this blog

Computing Entropy of an image (CORRECTED)

entropy is a measure of the uncertainty associated with a random variable. basically i want to get a single value representing the entropy of an image. 1. Assign 255 bins for the range of values between 0-255 2. separate the image into its 3 channels 3. compute histogram for each channel 4. normalize all 3 channels unifirmely 5. for each channel get the bin value (Hc) and use its absolute value (negative log is infinity) 6. compute Hc*log10(Hc) 7. add to entropy and continue with 5 until a single value converges 5. get the frequency of each channel - add all the values of the bin 6. for each bin get a probability - if bin 1 = 20 bin 2 = 30 then frequency is 50 and probability is 20/50 and 30/50 then compute using shannon formula  REFERENCE: http://people.revoledu.com/kardi/tutorial/DecisionTree/how-to-measure-impurity.htm class atsHistogram { public:     cv::Mat DrawHistogram(Mat src)     {         /// Separate the image in 3 places ( R, G and B )    

Artificial Intelligence (K Nearest Neighbor) in OPENCV

In pattern recognition , the k -nearest neighbor algorithm ( k -NN) is a method for classifying objects based on closest training examples in the feature space . k -NN is a type of instance-based learning , or lazy learning where the function is only approximated locally and all computation is deferred until classification. The k -nearest neighbor algorithm is amongst the simplest of all machine learning algorithms: an object is classified by a majority vote of its neighbors, with the object being assigned to the class most common amongst its k nearest neighbors ( k is a positive integer , typically small). If k = 1, then the object is simply assigned to the class of its nearest neighbor. The k -NN algorithm can also be adapted for use in estimating continuous variables. One such implementation uses an inverse distance weighted average of the k -nearest multivariate neighbors. This algorithm functions as follows: Compute Euclidean or Mahalanobis distance from target plo

Blob Detection, Connected Component (Pure Opencv)

Connected-component labeling (alternatively connected-component analysis, blob extraction, region labeling, blob discovery, or region extraction) is an algorithmic application of graph theory, where subsets of connected components are uniquely labeled based on a given heuristic. Connected-component labeling is not to be confused with segmentation. i got the initial code from this URL: http://nghiaho.com/?p=1102 However the code did not compile with my setup of OpenCV 2.2, im guessing it was an older version. so a refactored and corrected the errors to come up with this Class class atsBlobFinder     {     public:         atsBlobFinder()         {         }         ///Original Code by http://nghiaho.com/?p=1102         ///Changed and added commments. Removed Errors         ///works with VS2010 and OpenCV 2.2+         void FindBlobs(const cv::Mat &binary, vector < vector<cv::Point>  > &blobs)         {             blobs.clear();             // Fill the la