Improved tesseract dictionnary accuracy + fixed 'ami' to 'amphi'
This commit is contained in:
parent
ea77b64c32
commit
5dae2ef258
|
@ -51,9 +51,7 @@
|
|||
=========================================================*/ {
|
||||
|
||||
/* (1) Process tesseract */
|
||||
$read = shell_exec("tesseract ".$this->fname." stdout -l fra --user-words ".__ROOT__."/config/edt.user-words -c language_model_penalty_non_freq_dict_word=0.1 -c language_model_penalty_non_dict_word=.15 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 2>/dev/null");
|
||||
|
||||
// var_dump($read);
|
||||
$read = shell_exec("tesseract ".$this->fname." stdout -l fra --user-words ".__ROOT__."/config/edt.user-words -c language_model_penalty_non_freq_dict_word=0.3 -c language_model_penalty_non_dict_word=.25 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 2>/dev/null");
|
||||
|
||||
/* (2) If empty */
|
||||
if( is_null($read) || !preg_match('@\n@m', $read) )
|
||||
|
@ -88,7 +86,7 @@
|
|||
for( $i = count($lines)-1 ; $i > 0 ; $i-- ){
|
||||
|
||||
// Amphi ... //
|
||||
if( preg_match('@^a[nm][bp][hl]i ?(.+)$@i', $lines[$i], $m) ) // 'amphi A', 'amphi 600 droit'
|
||||
if( preg_match('@^a[nm]([bp][hln])?[ir] ?(.+)$@i', $lines[$i], $m) ) // 'amphi A', 'amphi 600 droit'
|
||||
return [ $title, "Amphi ${m[1]}" ];
|
||||
|
||||
|
||||
|
|
|
@ -16,4 +16,15 @@ droit
|
|||
gauche
|
||||
grammaire
|
||||
methodologie
|
||||
Cours
|
||||
Cours
|
||||
Modélisation
|
||||
Systèmes
|
||||
concurrents
|
||||
synthèse
|
||||
d'image
|
||||
3D
|
||||
LV1
|
||||
LV2
|
||||
Génie
|
||||
logiciel
|
||||
Interface
|
Loading…
Reference in New Issue