/************************************************************************ * $Id: emsocr.h 642 2011-03-02 21:30:33Z elzubeir $ * * ------------ * Description: * ------------ * * (C) Copyright 2009,2010 ALLCONTENT. All rights reserved. * * ----------------- * Revision Details: (Updated by Revision Control System) * ----------------- * $Date: 2011-03-03 01:30:33 +0400 (Thu, 03 Mar 2011) $ * $Author: elzubeir $ * $Revision: 642 $ * $HeadURL: file:///opt/svn/socialhose/trunk/app/emsOCR/emsocr.h $ * ************************************************************************/ #ifndef _emsOCR_H_ #define _emsOCR_H_ #include #include #include #include #include #include #include "word.h" #include "tag.h" using std::string; using std::vector; struct PageInfo { //! page id int m_pageName; //! id of the section pages entry int m_sectionPagesId; //! id for the publication int m_publicationId; //! type of the publication int m_publicationType; //! date of the issue QString m_publicationDate; //! publication issue int m_publicationIssue; //! country int m_country; //! remote path of the page QString m_remotePath; //! local path of the page QString m_localPath; //! custom name of the page QString m_customName; //! is this page arabic bool m_isArabic; //! has this page copied bool m_isCopied; //! is this page prcessed bool m_isProcessed; //! count of keywords found in this page int m_keywordsCount; //! processing time of this page double m_processingTime; }; //! struct to hold info of tag struct TagInfo { //! id of the tag int m_id; //! name of the tag QString m_name; }; class emsOCRDialog; class emsOCR : public QThread { public: //! \fn emsOCR() //! default constructor of the ocr thread //! \author elzubeir emsOCR(); //! \fn ~emsOCR() //! destructor //! \author elzubeir ~emsOCR(); //! \fn Initalize() //! initalizes the thread to work //! \author elzubeir bool Initalize(); //! \fn Process() //! start processing of the pages //! \author elzubeir void Process(); //! \fn Stop() //! stop processing of the pages //! \author elzubeir void Stop(); //! \fn SetOCRDialog() //! set the dialog to work with this thread //! \param[in] dialog object to set //! \author elzubeir void SetOCRDialog(emsOCRDialog *dlg); private: //! \fn ReadSettings() //! This function reads the settings for the application from a config.ini file //! and populate the member variables with the values read from the ini file //! \return bool //! - true if all the values are populated //! - false otherwise //! \author elzubeir bool ReadSettings(); //! \fn ConnectToDatabase() //! Connect to master and slave databases based on the valuess read from "config.ini" file for the connection strings //! \return bool //! - true if the two connections are established correctly //! - false if error occurs //! \author elzubeir bool ConnectToDatabase(); //! \fn GetIssuePath() //! get the issue path, and drive letter from configuration table in db //! \author elzubeir void GetIssuePath(); //! \fn ConstructPagesPath() //! construct the full paths for the pages in the queue to be processed and getting their values from the db //! \author elzubeir void ConstructPagesPath(); //! \fn SectionName() //! This functions returns the section name for the passed section id //! \param[in] section integer id of the section to query for its name //! \return QString the name of the section passed //! \author elzubeir QString SectionName(int id); //! \fn ProcessPage() //! send ths page to be processed by the ocr //! \param[in] path of the page to be processed //! \param[in] is this page arabic page or not //! \return bool //! - true if page processed successfully //! - false if error occurs //! \author elzubeir bool ProcessPage(QString &pagePath, bool isArabic); //! \fn GetPagesToProcess() //! get the list of pages to be processed by the ocr from the database and add them to the queue //! \author elzubeir void GetPagesToProcess(); //! \fn UpdateStatus() //! update the status of the processed pages to be set to 3 (prcessed) //! \param[in] page id of the pages to be updated //! \author elzubeir void UpdateStatus(int sectionPagesId); //! \fn ResetUnprocessedPages() //! reset the status of unprocessed pages //! \author elzubeir void ResetUnprocessedPages(); //! \fn SearchTags() //! search for tags in the processed pages, and return a list of the found tags //! \param[in] is the page arabic //! \returns a list of the tags found in the page //! \author elzubeir QList SearchTags(bool isArabic); //! \fn GetTagCoordinates() //! returns a list of the tag's coordinates //! \param[in] the tag to be searched for //! \param[in] is this word a multi-word //! \returns a list of the tags coordinates //! \author elzubeir QStringList GetTagCoordinates(QString tag, bool ismultiword); //! \fn DeleteFromDatabase() //! deletes pages text, word corrdinats, and tag coordinates from the database //! \param[in] the tag to be searched for //! \param[in] is this word a multi-word //! \returns a list of the tags coordinates //! \author elzubeir void DeleteFromDatabase(int id_section_pages); //! \fn AddToDatabase() //! adds the tags, words, and coordinates of them to the the database s //! \param[in] the page to which we should insert the info to //! \param[in] the list of tags to insert ot db //! \author elzubeir void AddToDatabase(PageInfo pi, QList &tags); //! \fn ConvertToSingleBox() //! converts the mutli-word tag (on the same line) from multiple coordinates to a single coordinate. //! \param[out] the string holding the coordinates of the multi-word seperated by :, and converted to a single coordinates on the same line //! \param[in] is this word arabic word //! \author elzubeir void ConvertToSingleBox(QString &str, bool isArabic); //! \fn RemoveDashAndTheFollowingSpace() //! remove the '- ' from the text, this is used on the english words //! \param[out] the text with its dashes removed //! \author elzubeir void RemoveDashAndTheFollowingSpace(QString &text); //! \fn GetTagsFromDatabase() //! get the full list of tags from the database, and saves them in tag list //! \param[in] are the tags arabic or not //! \author elzubeir void GetTagsFromDatabase(bool isArabic); //! \fn CurrentDateTime() //! returns the current date and time //! \author elzubeir QString CurrentDateTime(); //! \fn ShowException () //! show the exception for the IDRS. //! \param[in] the exception occuring //! \author elzubeir void ShowException ( IDRS::IDRSException & theIDRSException ); //! \fn SetPagesInView() //! set the pages info in the view of the dialog //! \author elzubeir void SetPagesInView(); //! \fn ClearPagesInView() //! clear the pages info from the view of the dialog //! \author elzubeir void ClearPagesInView(); //! \fn AddPageToProcessedTable() //! add the processed page to the list of processed pages in the view //! \param[in] is this a successful page //! \param[in] the date to add //! \param[in] the page name //! \param[in] the time of processing //! \param[in] the number of keywords //! \author elzubeir void AddPageToProcessedTable(bool successful, QString date, QString page, double time, int keywordCount); //! \fn IncrementTotalProcssedPages() //! increment the number of processed pages by 1 //! \author elzubeir void IncrementTotalProcssedPages(); void UpdateOCRStatus(int id_publication, int id_publication_issue, int latest_page_number); protected: //! \fn run() //! start the thread //! \author elzubeir void run(); private: //! the idrs reader IDRS::CReader m_reader; //! list of the pages to be processed QList m_pagesToProcess; //! list of words of the page QList m_words; //! list of tags from the page QList m_databaseTags; //! is the ocr engin ready to be processed bool m_isReady; //! database instance QSqlDatabase m_database; //! ocr database instance QSqlDatabase m_ocrdatabase; //! current page text QString m_pageText; //! issues path QString m_issuePath; //! database connection values QString m_databaseName; QString m_server; QString m_user; QString m_password; int m_port; //! ocr database connetion values QString m_ocrdatabaseName; QString m_ocrserver; QString m_ocruser; QString m_ocrpassword; int m_ocrport; //! current index of processed pages int m_currentIndex; //! start or stop the thread bool m_start; //! the dialog to show the results at emsOCRDialog* m_ocrDialog; //! debug mode on/off bool m_debug; //! sleep time int m_sleep; bool m_isAdvancedPreprocessingLoaded; bool m_processArabic; bool m_useAPPreprocessing; bool m_useAPDarkBorderRemoval; bool m_useAPBinarization; bool m_useAPLineRemoval; bool m_useAPDespeckle; int m_useAPDespeckleValue; bool m_useSkew; bool m_useBinarization; bool m_useLineRemoval; bool m_useDespeckle; int m_useDespeckleValue; int m_id; }; #endif //_emsOCR_H_