OpenCV - bagofwords_scan.cpp

bagofwords_scan.cpp

Joel Mckay, 2012-05-04 09:18 pm

 /* Warning: This sample is still rough... I cleaned it up a bit, but have not tested it yet.. */
 /*****************************************************************************************
         This program reads in a generic trained VOC2010 sample xml params, vocabulary,
         and configuration. It works in conjunction with OpenCV's sample code
         bagofwords_classification.cpp training class, and is mostly based on its class design.
 Joel Mckay
         [email protected]
         Disclaimer:
         THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
         "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
         LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
         FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
         COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
         INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
         BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
         LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
         CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
         LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
         WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
         OF SUCH DAMAGE.
  *****************************************************************************************/
 #include "global_headers.hpp"
 const string paramsFile = "params.xml";
 const string vocabularyFile = "vocabulary.xml.gz";
 const string bowImageDescriptorsDir = "/bowImageDescriptors";
 const string svmsDir = "/svms";
 const string plotsDir = "/plots";
 /****************************************************************************************\
 *                          OpenCV's  Sample on image classification                             *
 \****************************************************************************************/
 //
 // This part of the code was a little refactor
 //
 struct DDMParams
+{
     DDMParams() : detectorType("SURF"), descriptorType("SURF"), matcherType("BruteForce") {}
     DDMParams( const string _detectorType, const string _descriptorType, const string& _matcherType ) :
         detectorType(_detectorType), descriptorType(_descriptorType), matcherType(_matcherType){}
     void read( const FileNode& fn )
+    {
         fn["detectorType"] >> detectorType;
         fn["descriptorType"] >> descriptorType;
         fn["matcherType"] >> matcherType;
+    }
     void write( FileStorage& fs ) const
+    {
         fs << "detectorType" << detectorType;
         fs << "descriptorType" << descriptorType;
         fs << "matcherType" << matcherType;
+    }
     void print() const
+    {
         cout << "detectorType: " << detectorType << endl;
         cout << "descriptorType: " << descriptorType << endl;
         cout << "matcherType: " << matcherType << endl;
+    }
     string detectorType;
     string descriptorType;
     string matcherType;
 };
 struct VocabTrainParams
+{
     VocabTrainParams() : trainObjClass("chair"), vocabSize(VISUAL_VOCABULARY_FOR_BOG), memoryUse(VISUAL_VOCABULARY_MEMORY_LIMIT), descProportion(VISUAL_VOCABULARY_DESCRIPTORS_FROM_EACH_IMAGE_PROPORTION_OF_TOTAL) {}
     VocabTrainParams( const string _trainObjClass, size_t _vocabSize, size_t _memoryUse, float _descProportion ) :
             trainObjClass(_trainObjClass), vocabSize(_vocabSize), memoryUse(_memoryUse), descProportion(_descProportion) {}
     void read( const FileNode& fn )
+    {
         fn["trainObjClass"] >> trainObjClass;
         fn["vocabSize"] >> vocabSize;
         fn["memoryUse"] >> memoryUse;
         fn["descProportion"] >> descProportion;
+    }
     void write( FileStorage& fs ) const
+    {
         fs << "trainObjClass" << trainObjClass;
         fs << "vocabSize" << vocabSize;
         fs << "memoryUse" << memoryUse;
         fs << "descProportion" << descProportion;
+    }
     void print() const
+    {
         cout << "trainObjClass: " << trainObjClass << endl;
         cout << "vocabSize: " << vocabSize << endl;
         cout << "memoryUse: " << memoryUse << endl;
         cout << "descProportion: " << descProportion << endl;
+    }
     string trainObjClass; // Object class used for training visual vocabulary.
                           // It shouldn't matter which object class is specified here - visual vocab will still be the same.
     int vocabSize; //number of visual words in vocabulary to train
     int memoryUse; // Memory to preallocate (in MB) when training vocab.
                       // Change this depending on the size of the dataset/available memory.
     float descProportion; // Specifies the number of descriptors to use from each image as a proportion of the total num descs.
 };
 struct SVMTrainParamsExt
+{
     SVMTrainParamsExt() : descPercent(VISUAL_VOCABULARY_DESCRIPTORS_FROM_EACH_TRAINING_PROPORTION_IMAGE), targetRatio(VISUAL_VOCABULARY_TRAINING_SUCEESS_TARGET), balanceClasses(true) {}
     SVMTrainParamsExt( float _descPercent, float _targetRatio, bool _balanceClasses ) :
             descPercent(_descPercent), targetRatio(_targetRatio), balanceClasses(_balanceClasses) {}
     void read( const FileNode& fn )
+    {
         fn["descPercent"] >> descPercent;
         fn["targetRatio"] >> targetRatio;
         fn["balanceClasses"] >> balanceClasses;
+    }
     void write( FileStorage& fs ) const
+    {
         fs << "descPercent" << descPercent;
         fs << "targetRatio" << targetRatio;
         fs << "balanceClasses" << balanceClasses;
+    }
     void print() const
+    {
         cout << "descPercent: " << descPercent << endl;
         cout << "targetRatio: " << targetRatio << endl;
         cout << "balanceClasses: " << balanceClasses << endl;
+    }
     float descPercent; // Percentage of extracted descriptors to use for training.
     float targetRatio; // Try to get this ratio of positive to negative samples (minimum).
     bool balanceClasses;    // Balance class weights by number of samples in each (if true cSvmTrainTargetRatio is ignored).
 };
 void printUsedParams( const string& mediaPath, const string& resDir,
                       const DDMParams& ddmParams, const VocabTrainParams& vocabTrainParams,
                       const SVMTrainParamsExt& svmTrainParamsExt )
+{
     cout << "CURRENT SCANNER CONFIGURATION" << endl;
     cout << "----------------------------------------------------------------" << endl;
     cout << "mediaPath: " << mediaPath << endl;
     cout << "resDir: " << resDir << endl;
     cout << endl; ddmParams.print();
     cout << endl; vocabTrainParams.print();
     cout << endl; svmTrainParamsExt.print();
     cout << "----------------------------------------------------------------" << endl << endl;
+}
 bool readVocabulary( const string& filename, Mat& vocabulary )
+{
     #if defined(DEBUG_MODE)
     cout << "Reading vocabulary...";
     #endif
     FileStorage fs( filename, FileStorage::READ );
     if( fs.isOpened() )
+    {
         fs["vocabulary"] >> vocabulary;
         return true;
+    }
     return false;
+}
   bool writeBowImageDescriptor( const string& file, const Mat& bowImageDescriptor )
+{
     FileStorage fs( file, FileStorage::WRITE );
     if( fs.isOpened() )
+    {
         fs << "imageDescriptor" << bowImageDescriptor;
         return true;
+    }
     return false;
+}
 /***********************************************************************************/
 //This function scans the SVM dir to push the specific file names one at a time to a vector list
 void loadListFromDir( string dir , vector<string>* m_object_classes)
+{
 string filepath;
 string filename;
 string filebasename;
 string fileext;
 int arrTemplatesCount;
 DIR *dp;
 struct dirent *dirp;
 struct stat filestat;
 arrTemplatesCount=0;
 dp = opendir( dir.c_str() );        //try to open the directory
 if (dp == NULL)
+{
     cout << "Error opening " << dir << endl;
     return;
+}
 while ((dirp = readdir( dp )) && (arrTemplatesCount < 100000)) //scan the dir
+{
         filename=dirp->d_name;
         filepath = dir + "/" + filename;
         // file invalid? we'll skip it...
         if (stat( filepath.c_str(), &filestat )) continue;
         //is a file?
         if (S_ISREG( filestat.st_mode ))  //is a real file?
+        {
                 fileext = filename.substr(filename.find_last_of(".") + 1);
                 std::transform(fileext.begin(), fileext.end(),fileext.begin(), ::tolower); //str ::tolower   ::toupper
                 if(( fileext == "xml") || ( fileext == "xml.gz") || ( fileext == "gz"))        //gzip or XML ?
+                {
                         filebasename = filename.substr(0, filename.find_first_of(".") );
                         #if defined(DEBUG_MODE)
                                 cout << "Loaded: " << filebasename << "  " << filepath.c_str() << endl;
                         #endif
                         (*m_object_classes).push_back(filebasename);
+                }
+        }
         arrTemplatesCount++;        //prevents endless loops from ln -s tricks =)
+}
 closedir( dp );        //done loading list
+}
 /***********************************************************************************/
 int main(int argc, char** argv)
+{
     if( argc != 3 && argc != 6 )
+    {
         echo <<"\nbagofwords_scan </path/to/some/video/file.avi> </path/to/the/trained/BOW/VOCDATA/output> <SURF> <OpponentSURF> <BruteForce>"   << endl;
         exit(-1);
+    }
     CvMemStorage* storageTmp = cvCreateMemStorage(0);
     const string mediaPath = argv[1], resPath = argv[2];
     // Read default parameters file
     string vocName;
     DDMParams ddmParams;
     VocabTrainParams vocabTrainParams;
     SVMTrainParamsExt svmTrainParamsExt;
     FileStorage paramsFS( resPath + "/" + paramsFile, FileStorage::READ );
     if( paramsFS.isOpened() )
+    {
             const FileNode& fn=paramsFS.root();                        //parse the XML file for the type of trained data settings
             fn["vocName"] >> vocName;
             FileNode currFn = fn;
             currFn = fn["ddmParams"];
             ddmParams.read( currFn );
             currFn = fn["vocabTrainParams"];
             vocabTrainParams.read( currFn );
             currFn = fn["svmTrainParamsExt"];
             svmTrainParamsExt.read( currFn );
     }else{
         cout << "\n Could open the file " <<  resPath << "/" << paramsFile << endl;
         exit(-1);
+    }
     // Create detector, descriptor, matcher.
     Ptr<FeatureDetector> featureDetector = FeatureDetector::create( ddmParams.detectorType );
     Ptr<DescriptorExtractor> descExtractor = DescriptorExtractor::create( ddmParams.descriptorType );
         cout << "\nHeisenbug: descExtractor " << descExtractor->descriptorType() << "=" << CV_32FC1 << " ?\n";
     Ptr<BOWImgDescriptorExtractor> bowExtractor;
     if( featureDetector.empty() || descExtractor.empty() )
+    {
         cout << "featureDetector or descExtractor was not created" << endl;
         exit(-1);
     }else{
         Ptr<DescriptorMatcher> descMatcher = DescriptorMatcher::create( ddmParams.matcherType );
         if( featureDetector.empty() || descExtractor.empty() || descMatcher.empty() )
+        {
             cout << "descMatcher was not created" << endl;
             exit(-1);
+        }
         bowExtractor = new BOWImgDescriptorExtractor( descExtractor, descMatcher );
+    }
     // Print configuration to screen
         printUsedParams( mediaPath, resPath, ddmParams, vocabTrainParams, svmTrainParamsExt );
         cout << "\n Threshold for calculated " << MINIMUM_BOW_CONFIDENCE_SCORE << " class confidence...\n" << endl;
     // 1. Load visual word pre-calculated vocabulary file from previous run
         Mat vocabulary;
             string vocabularyFilename = resPath + "/" + vocabularyFile;
         if( !readVocabulary( vocabularyFilename, vocabulary) )
+        {
                 cout << "\n Could not load vocabulary file! \n" << vocabularyFilename << endl;
                 return -1;
+        }
         bowExtractor->setVocabulary( vocabulary );
         #if defined(DEBUG_MODE)
         cout << "\nSet Vocabulary: rows=" << vocabulary.rows << "  cols="<< vocabulary.cols << endl << endl;
         #endif
     // 2. check for classifier and run a query for each object
         //define available object_classes for VOC2010 dataset etc...
         //by scanning for svm trained object classes
         vector<string> m_object_classes;
         string svmFileLocation = resPath + svmsDir ;
         loadListFromDir(svmFileLocation, &m_object_classes);
         std::vector<BogClassifierTracker> bogClasses;        //track all hits to the BOG classes
         //TODO: Prepare to query objects  (ptr as we may add a context subset filter later)
          const vector<string>& objClasses=m_object_classes;
     #if defined(DEBUG_MODE)
                 cout << "\n Loaded Vocabulary: bowExtractor->descriptorSize()=" << bowExtractor->descriptorSize() << endl;
     #endif
     CV_Assert( !bowExtractor->getVocabulary().empty() );        //poll Vocabulary is valid ?
         cout << "Load SVM files for selected Visual Vocabulary:" << endl;
             for( size_t classIdx = 0; (classIdx < objClasses.size()); ++classIdx )
+            {
                 /* first check if a previously trained svm for the current class has been saved to file */
                 string svmFilename = resPath + svmsDir + "/" + objClasses[classIdx] + ".xml.gz";
                 FileStorage fs( svmFilename, FileStorage::READ);
                 if( fs.isOpened() )
+                {
                         // Load a classifier from the trainer dataset
                         BogClassifierTracker BOGCLassRecord = BogClassifierTracker(objClasses[classIdx], svmFilename);
                         bogClasses.push_back(BOGCLassRecord);
                         #if defined(DEBUG_MODE)
                         cout << "*** LOADING SVM CLASSIFIER FOR CLASS " << bogClasses[classIdx].nameOfClass << " ***" << endl;
                         cout << svmFilename << endl;
                         #endif
                         cout <<   bogClasses[classIdx].nameOfClass << " " << std::flush;
                         fs.release();
+                }
+            }
         cout << "\n---------------------------------------------------------------" << endl;
         /* probe reference video for valid data */
         IplImage        *imgBuffer, *img;
         CvCapture *capture=cvCreateFileCapture(mediaPath.c_str());
         int frameCounter = 0;
         /******************** Open target file buffer? ******************/
         cvGrabFrame(capture);
         imgBuffer = cvRetrieveFrame(capture);
         if( imgBuffer == 0 ) {
                 fprintf( stderr, "Cannot load video target file %s!\n", mediaPath.c_str());
                 exit(-1);
+        }
         img = cvCreateImage(cvGetSize(imgBuffer), IPL_DEPTH_8U, 3); // imgBuffer->depth,   imgBuffer->nChannels); //frame copy
         /* create new image for the grayscale version */
         IplImage *imgBufferGray = cvCreateImage( cvGetSize(imgBuffer), IPL_DEPTH_8U, 1 );
         //cvNamedWindow("image_src", 1);
         while(cvGrabFrame(capture))
+        {
                 imgBuffer = cvRetrieveFrame(capture);      //buffer frame
         //        cvShowImage("image_src",imgBuffer);
         //        cvWaitKey(0);
                 frameCounter++;
                 cout << "\rFrame number " << frameCounter << "                                             ";
                 Mat imgMat = imgBuffer;                 //Fast copy Pointer construct from buffer (not parallel)
                 //Mat imgMat(imgBuffer);                 //copy and construct from buffer (to go parallel later)
                 size_t i = 0;
                 vector<KeyPoint> keypoints;
                 vector<Mat> bowImageDescriptors;
                 #if defined(DEBUG_MODE)
                 cout << "\nComputing descriptors for image... " ;
                 #endif
                 featureDetector->detect( imgMat, keypoints );                //svn  r8280 breaks this call
                 #if defined(DEBUG_MODE)
                 cout << "\nGenerating BoW vector... " << endl ;
                 #endif
                 bowImageDescriptors.resize( (i+1) ); //images size = 1
                 bowExtractor->compute( imgMat, keypoints, bowImageDescriptors[i] );
                 float imageKeypointsSize = keypoints.size();
                 // Skip images for descriptors that could not be calculated
                 if( bowImageDescriptors[i].empty() || (bowImageDescriptors[i].cols == 0) || (bowImageDescriptors[i].rows == 0)  || (imageKeypointsSize < 1))
+                {
                         cout << "\n Error: bow image descriptor empty.\n" << endl;                //coomon if the image is a black screen etc...
                         //exit(-1);
                 }else{
                         #if defined(DEBUG_MODE)
                         cout << "\nNote: bowImageDescriptors.size=" << bowImageDescriptors.size()
                                 << " col=" << bowImageDescriptors[i].cols
                                 << " row="<< bowImageDescriptors[i].rows << endl;
                         #endif
                         //display frame keypoints for selected SVM checker
                         drawKeypoints(imgMat, keypoints, imgMat, Scalar(0,255,255));
                         float signMul = -1.f;                //1.f
                         for( size_t imageIdx = 0; imageIdx < bogClasses.size(); imageIdx++ )
+                        {
                                  // Use the bag of words vectors to calculate classifier output for each image in test set
                                 #if defined(DEBUG_MODE)
                                         cout << "\nFrame " << frameCounter
                                                 << ": CALCULATING CONFIDENCE SCORE FOR CLASS " << bogClasses[imageIdx].nameOfClass << endl;
                                 #endif
                                 #if defined(DEBUG_MODE)
                                 float svmFeaturesUsed = (*bogClasses[imageIdx].svm).get_var_count();
                                 #endif
                                 float scoreVal = (*bogClasses[imageIdx].svm).predict( bowImageDescriptors[i], true );
                                 float classVal=0;
                                 //no change in output seen
                         //        if( imageIdx == 0 )
+                                {
                                     // In the first iteration, determine the sign of the positive class
                                     classVal = (*bogClasses[imageIdx].svm).predict(bowImageDescriptors[i], false );
                                     signMul = (classVal < 0) == (scoreVal < 0) ? 1.f : -1.f;
+                                }
                                 // svm output of decision function
                                 float confidence = signMul * scoreVal;
                                 #if defined(DEBUG_MODE)
                                 cout << "\nConfidence=" << confidence << endl;
                                 #endif
                                 #if defined(DEBUG_MODE)
                                 cout << "\n classVal=" << classVal << "  scoreVal=" << scoreVal << endl;
                                 #endif
                                 #if defined(DEBUG_MODE)
                                 cout << "\n Keypoints=" << imageKeypointsSize << "    Used Points=" << svmFeaturesUsed <<endl;
                                 #endif
                                 if( (confidence > MINIMUM_BOW_CONFIDENCE_SCORE) && (confidence < MAXIMUM_BOW_CONFIDENCE_SCORE))
+                                {
                                         // Show support vector count
                                         int supportVectorCount     = (*bogClasses[imageIdx].svm).get_support_vector_count();
                                         #if defined(DEBUG_MODE)
                                                 cout << "\n  support vector count: " << supportVectorCount << endl ;
                                         #endif
                                         #if defined(DEBUG_MODE)
                                                 cout << "\n score: " << bogClasses[imageIdx].nameOfClass  << " = " << confidence << " [ " << scoreVal << " ] ";
                                         #endif
                                 }else {
                                         #if defined(DEBUG_MODE)
                                                 cout << "\n Skipped: " << bogClasses[imageIdx].nameOfClass  << " = " << confidence << " [ " << scoreVal << " ] ";
                                         #endif
+                                }
+                        }
                         #if defined(DEBUG_MODE)
                         imshow("image_keypoints",imgMat);
                         int sc = waitKey(1000);
                         cout << "\n---------------------------------------------------------------" << endl;
                         #else
                         imshow("image_keypoints",imgMat);
                         int sc = waitKey(0);
                         #endif
                         //debug
                         /*
                         if( !writeBowImageDescriptor( "example.jpg.xml.gz", bowImageDescriptors[i] ) )
+                        {
                                 cout << "Error: file example can not be opened to write bow image descriptor" << endl;
                                 exit(-1);
+                        }
                         */
+                }
+        }
         cvReleaseCapture(&capture);
         return 0;
+}

Login	Password