initial import

This commit is contained in:
Mo Elzubeir
2022-12-13 09:18:00 -06:00
commit d2d561810c
322 changed files with 1147261 additions and 0 deletions
+368
View File
@@ -0,0 +1,368 @@
/************************************************************************
* $Id: emsocr.h 642 2011-03-02 21:30:33Z elzubeir $
*
* ------------
* Description:
* ------------
*
* (C) Copyright 2009,2010 ALLCONTENT. All rights reserved.
*
* -----------------
* Revision Details: (Updated by Revision Control System)
* -----------------
* $Date: 2011-03-03 01:30:33 +0400 (Thu, 03 Mar 2011) $
* $Author: elzubeir $
* $Revision: 642 $
* $HeadURL: file:///opt/svn/socialhose/trunk/app/emsOCR/emsocr.h $
*
************************************************************************/
#ifndef _emsOCR_H_
#define _emsOCR_H_
#include <idrs.h>
#include <string>
#include <vector>
#include <QtSql/QtSql>
#include <QtCore/QMutex>
#include <QtCore/QThread>
#include "word.h"
#include "tag.h"
using std::string;
using std::vector;
struct PageInfo
{
//! page id
int m_pageName;
//! id of the section pages entry
int m_sectionPagesId;
//! id for the publication
int m_publicationId;
//! type of the publication
int m_publicationType;
//! date of the issue
QString m_publicationDate;
//! publication issue
int m_publicationIssue;
//! country
int m_country;
//! remote path of the page
QString m_remotePath;
//! local path of the page
QString m_localPath;
//! custom name of the page
QString m_customName;
//! is this page arabic
bool m_isArabic;
//! has this page copied
bool m_isCopied;
//! is this page prcessed
bool m_isProcessed;
//! count of keywords found in this page
int m_keywordsCount;
//! processing time of this page
double m_processingTime;
};
//! struct to hold info of tag
struct TagInfo
{
//! id of the tag
int m_id;
//! name of the tag
QString m_name;
};
class emsOCRDialog;
class emsOCR : public QThread
{
public:
//! \fn emsOCR()
//! default constructor of the ocr thread
//! \author elzubeir
emsOCR();
//! \fn ~emsOCR()
//! destructor
//! \author elzubeir
~emsOCR();
//! \fn Initalize()
//! initalizes the thread to work
//! \author elzubeir
bool Initalize();
//! \fn Process()
//! start processing of the pages
//! \author elzubeir
void Process();
//! \fn Stop()
//! stop processing of the pages
//! \author elzubeir
void Stop();
//! \fn SetOCRDialog()
//! set the dialog to work with this thread
//! \param[in] dialog object to set
//! \author elzubeir
void SetOCRDialog(emsOCRDialog *dlg);
private:
//! \fn ReadSettings()
//! This function reads the settings for the application from a config.ini file
//! and populate the member variables with the values read from the ini file
//! \return bool
//! - true if all the values are populated
//! - false otherwise
//! \author elzubeir
bool ReadSettings();
//! \fn ConnectToDatabase()
//! Connect to master and slave databases based on the valuess read from "config.ini" file for the connection strings
//! \return bool
//! - true if the two connections are established correctly
//! - false if error occurs
//! \author elzubeir
bool ConnectToDatabase();
//! \fn GetIssuePath()
//! get the issue path, and drive letter from configuration table in db
//! \author elzubeir
void GetIssuePath();
//! \fn ConstructPagesPath()
//! construct the full paths for the pages in the queue to be processed and getting their values from the db
//! \author elzubeir
void ConstructPagesPath();
//! \fn SectionName()
//! This functions returns the section name for the passed section id
//! \param[in] section integer id of the section to query for its name
//! \return QString the name of the section passed
//! \author elzubeir
QString SectionName(int id);
//! \fn ProcessPage()
//! send ths page to be processed by the ocr
//! \param[in] path of the page to be processed
//! \param[in] is this page arabic page or not
//! \return bool
//! - true if page processed successfully
//! - false if error occurs
//! \author elzubeir
bool ProcessPage(QString &pagePath, bool isArabic);
//! \fn GetPagesToProcess()
//! get the list of pages to be processed by the ocr from the database and add them to the queue
//! \author elzubeir
void GetPagesToProcess();
//! \fn UpdateStatus()
//! update the status of the processed pages to be set to 3 (prcessed)
//! \param[in] page id of the pages to be updated
//! \author elzubeir
void UpdateStatus(int sectionPagesId);
//! \fn ResetUnprocessedPages()
//! reset the status of unprocessed pages
//! \author elzubeir
void ResetUnprocessedPages();
//! \fn SearchTags()
//! search for tags in the processed pages, and return a list of the found tags
//! \param[in] is the page arabic
//! \returns a list of the tags found in the page
//! \author elzubeir
QList<Tag> SearchTags(bool isArabic);
//! \fn GetTagCoordinates()
//! returns a list of the tag's coordinates
//! \param[in] the tag to be searched for
//! \param[in] is this word a multi-word
//! \returns a list of the tags coordinates
//! \author elzubeir
QStringList GetTagCoordinates(QString tag, bool ismultiword);
//! \fn DeleteFromDatabase()
//! deletes pages text, word corrdinats, and tag coordinates from the database
//! \param[in] the tag to be searched for
//! \param[in] is this word a multi-word
//! \returns a list of the tags coordinates
//! \author elzubeir
void DeleteFromDatabase(int id_section_pages);
//! \fn AddToDatabase()
//! adds the tags, words, and coordinates of them to the the database s
//! \param[in] the page to which we should insert the info to
//! \param[in] the list of tags to insert ot db
//! \author elzubeir
void AddToDatabase(PageInfo pi, QList<Tag> &tags);
//! \fn ConvertToSingleBox()
//! converts the mutli-word tag (on the same line) from multiple coordinates to a single coordinate.
//! \param[out] the string holding the coordinates of the multi-word seperated by :, and converted to a single coordinates on the same line
//! \param[in] is this word arabic word
//! \author elzubeir
void ConvertToSingleBox(QString &str, bool isArabic);
//! \fn RemoveDashAndTheFollowingSpace()
//! remove the '- ' from the text, this is used on the english words
//! \param[out] the text with its dashes removed
//! \author elzubeir
void RemoveDashAndTheFollowingSpace(QString &text);
//! \fn GetTagsFromDatabase()
//! get the full list of tags from the database, and saves them in tag list
//! \param[in] are the tags arabic or not
//! \author elzubeir
void GetTagsFromDatabase(bool isArabic);
//! \fn CurrentDateTime()
//! returns the current date and time
//! \author elzubeir
QString CurrentDateTime();
//! \fn ShowException ()
//! show the exception for the IDRS.
//! \param[in] the exception occuring
//! \author elzubeir
void ShowException ( IDRS::IDRSException & theIDRSException );
//! \fn SetPagesInView()
//! set the pages info in the view of the dialog
//! \author elzubeir
void SetPagesInView();
//! \fn ClearPagesInView()
//! clear the pages info from the view of the dialog
//! \author elzubeir
void ClearPagesInView();
//! \fn AddPageToProcessedTable()
//! add the processed page to the list of processed pages in the view
//! \param[in] is this a successful page
//! \param[in] the date to add
//! \param[in] the page name
//! \param[in] the time of processing
//! \param[in] the number of keywords
//! \author elzubeir
void AddPageToProcessedTable(bool successful, QString date, QString page, double time, int keywordCount);
//! \fn IncrementTotalProcssedPages()
//! increment the number of processed pages by 1
//! \author elzubeir
void IncrementTotalProcssedPages();
void UpdateOCRStatus(int id_publication, int id_publication_issue, int latest_page_number);
protected:
//! \fn run()
//! start the thread
//! \author elzubeir
void run();
private:
//! the idrs reader
IDRS::CReader m_reader;
//! list of the pages to be processed
QList<PageInfo> m_pagesToProcess;
//! list of words of the page
QList<Word> m_words;
//! list of tags from the page
QList<TagInfo> m_databaseTags;
//! is the ocr engin ready to be processed
bool m_isReady;
//! database instance
QSqlDatabase m_database;
//! ocr database instance
QSqlDatabase m_ocrdatabase;
//! current page text
QString m_pageText;
//! issues path
QString m_issuePath;
//! database connection values
QString m_databaseName;
QString m_server;
QString m_user;
QString m_password;
int m_port;
//! ocr database connetion values
QString m_ocrdatabaseName;
QString m_ocrserver;
QString m_ocruser;
QString m_ocrpassword;
int m_ocrport;
//! current index of processed pages
int m_currentIndex;
//! start or stop the thread
bool m_start;
//! the dialog to show the results at
emsOCRDialog* m_ocrDialog;
//! debug mode on/off
bool m_debug;
//! sleep time
int m_sleep;
bool m_isAdvancedPreprocessingLoaded;
bool m_processArabic;
bool m_useAPPreprocessing;
bool m_useAPDarkBorderRemoval;
bool m_useAPBinarization;
bool m_useAPLineRemoval;
bool m_useAPDespeckle;
int m_useAPDespeckleValue;
bool m_useSkew;
bool m_useBinarization;
bool m_useLineRemoval;
bool m_useDespeckle;
int m_useDespeckleValue;
int m_id;
};
#endif //_emsOCR_H_