diff --git a/build/service/Tesseract.php b/build/service/Tesseract.php index d96b643..4d08ab5 100755 --- a/build/service/Tesseract.php +++ b/build/service/Tesseract.php @@ -51,9 +51,7 @@ =========================================================*/ { /* (1) Process tesseract */ - $read = shell_exec("tesseract ".$this->fname." stdout -l fra --user-words ".__ROOT__."/config/edt.user-words -c language_model_penalty_non_freq_dict_word=0.1 -c language_model_penalty_non_dict_word=.15 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 2>/dev/null"); - - // var_dump($read); + $read = shell_exec("tesseract ".$this->fname." stdout -l fra --user-words ".__ROOT__."/config/edt.user-words -c language_model_penalty_non_freq_dict_word=0.3 -c language_model_penalty_non_dict_word=.25 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 2>/dev/null"); /* (2) If empty */ if( is_null($read) || !preg_match('@\n@m', $read) ) @@ -88,7 +86,7 @@ for( $i = count($lines)-1 ; $i > 0 ; $i-- ){ // Amphi ... // - if( preg_match('@^a[nm][bp][hl]i ?(.+)$@i', $lines[$i], $m) ) // 'amphi A', 'amphi 600 droit' + if( preg_match('@^a[nm]([bp][hln])?[ir] ?(.+)$@i', $lines[$i], $m) ) // 'amphi A', 'amphi 600 droit' return [ $title, "Amphi ${m[1]}" ]; diff --git a/config/edt.user-words b/config/edt.user-words index 483d38e..ca0a986 100644 --- a/config/edt.user-words +++ b/config/edt.user-words @@ -16,4 +16,15 @@ droit gauche grammaire methodologie -Cours \ No newline at end of file +Cours +Modélisation +Systèmes +concurrents +synthèse +d'image +3D +LV1 +LV2 +Génie +logiciel +Interface \ No newline at end of file