Improved tesseract dictionnary accuracy + fixed 'ami' to 'amphi'
This commit is contained in:
parent
ea77b64c32
commit
5dae2ef258
|
@ -51,9 +51,7 @@
|
||||||
=========================================================*/ {
|
=========================================================*/ {
|
||||||
|
|
||||||
/* (1) Process tesseract */
|
/* (1) Process tesseract */
|
||||||
$read = shell_exec("tesseract ".$this->fname." stdout -l fra --user-words ".__ROOT__."/config/edt.user-words -c language_model_penalty_non_freq_dict_word=0.1 -c language_model_penalty_non_dict_word=.15 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 2>/dev/null");
|
$read = shell_exec("tesseract ".$this->fname." stdout -l fra --user-words ".__ROOT__."/config/edt.user-words -c language_model_penalty_non_freq_dict_word=0.3 -c language_model_penalty_non_dict_word=.25 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 2>/dev/null");
|
||||||
|
|
||||||
// var_dump($read);
|
|
||||||
|
|
||||||
/* (2) If empty */
|
/* (2) If empty */
|
||||||
if( is_null($read) || !preg_match('@\n@m', $read) )
|
if( is_null($read) || !preg_match('@\n@m', $read) )
|
||||||
|
@ -88,7 +86,7 @@
|
||||||
for( $i = count($lines)-1 ; $i > 0 ; $i-- ){
|
for( $i = count($lines)-1 ; $i > 0 ; $i-- ){
|
||||||
|
|
||||||
// Amphi ... //
|
// Amphi ... //
|
||||||
if( preg_match('@^a[nm][bp][hl]i ?(.+)$@i', $lines[$i], $m) ) // 'amphi A', 'amphi 600 droit'
|
if( preg_match('@^a[nm]([bp][hln])?[ir] ?(.+)$@i', $lines[$i], $m) ) // 'amphi A', 'amphi 600 droit'
|
||||||
return [ $title, "Amphi ${m[1]}" ];
|
return [ $title, "Amphi ${m[1]}" ];
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -17,3 +17,14 @@ gauche
|
||||||
grammaire
|
grammaire
|
||||||
methodologie
|
methodologie
|
||||||
Cours
|
Cours
|
||||||
|
Modélisation
|
||||||
|
Systèmes
|
||||||
|
concurrents
|
||||||
|
synthèse
|
||||||
|
d'image
|
||||||
|
3D
|
||||||
|
LV1
|
||||||
|
LV2
|
||||||
|
Génie
|
||||||
|
logiciel
|
||||||
|
Interface
|
Loading…
Reference in New Issue