Improved location read + tesseract
This commit is contained in:
parent
89bf9722c4
commit
960f490beb
|
@ -266,7 +266,7 @@
|
||||||
|
|
||||||
/* (2) Manage copy error */
|
/* (2) Manage copy error */
|
||||||
if( !$copied )
|
if( !$copied )
|
||||||
throw new \Exception("Cannot clip image");
|
return [ null, null ];
|
||||||
|
|
||||||
/* (3) Save to jpeg */
|
/* (3) Save to jpeg */
|
||||||
\imagesavealpha($clip, true);
|
\imagesavealpha($clip, true);
|
||||||
|
@ -292,7 +292,7 @@
|
||||||
/* (2) Manage error */
|
/* (2) Manage error */
|
||||||
}catch(\Exception $e){
|
}catch(\Exception $e){
|
||||||
|
|
||||||
$read = [ 'unkown', 'unknown' ];
|
$read = [ null, null ];
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -430,9 +430,10 @@
|
||||||
$RAW .= "DTSTART:${start_t}\n";
|
$RAW .= "DTSTART:${start_t}\n";
|
||||||
$RAW .= "DTEND:${data[0]}\n";
|
$RAW .= "DTEND:${data[0]}\n";
|
||||||
$RAW .= "UID:$start_t-univ-pau-ics\n"; // required
|
$RAW .= "UID:$start_t-univ-pau-ics\n"; // required
|
||||||
$RAW .= "SUMMARY:${data[1]}\n";
|
if( !is_null($data[1]) )
|
||||||
$RAW .= "LOCATION:${data[2]}\n";
|
$RAW .= "SUMMARY:${data[1]}\n";
|
||||||
// $RAW .= "ATTACH;ENCODING=BASE64;VALUE=BINARY;FILENAME=att.jpg:${data[1]}\n";
|
if( !is_null($data[2]) )
|
||||||
|
$RAW .= "LOCATION:${data[2]}\n";
|
||||||
$RAW .= "CATEGORIES: UPPA Calendar\n";
|
$RAW .= "CATEGORIES: UPPA Calendar\n";
|
||||||
$RAW .= "END:VEVENT\n";
|
$RAW .= "END:VEVENT\n";
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,37 +48,60 @@
|
||||||
public function read(){
|
public function read(){
|
||||||
|
|
||||||
/* [1] Record the text from the image
|
/* [1] Record the text from the image
|
||||||
=========================================================*/
|
=========================================================*/ {
|
||||||
/* (1) Process tesseract */
|
|
||||||
$read = shell_exec("tesseract ".$this->fname." stdout -l fra --user-words ".__ROOT__."/config/edt.user-words -c language_model_penalty_non_freq_dict_word=0.1 -c language_model_penalty_non_dict_word=.15 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 2>/dev/null");
|
|
||||||
|
|
||||||
/* (2) If empty */
|
/* (1) Process tesseract */
|
||||||
if( is_null($read) || !preg_match('@\n@g', $read) )
|
$read = shell_exec("tesseract ".$this->fname." stdout -l fra --user-words ".__ROOT__."/config/edt.user-words -c language_model_penalty_non_freq_dict_word=0.1 -c language_model_penalty_non_dict_word=.15 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 2>/dev/null");
|
||||||
throw new \Exception("Nothing read");
|
|
||||||
|
|
||||||
/* (3) Split by lines */
|
// var_dump($read);
|
||||||
$by_line = explode("\n", $read);
|
|
||||||
|
|
||||||
/* (4) Get first line (title) */
|
/* (2) If empty */
|
||||||
$title = $by_line[0];
|
if( is_null($read) || !preg_match('@\n@m', $read) )
|
||||||
|
throw new \Exception("Nothing read");
|
||||||
|
|
||||||
/* (5) Get last non-empty line */
|
/* (3) Split by lines */
|
||||||
for( $i = count($by_line)-1 ; $i > 0 ; $i-- ){
|
$by_line = explode("\n", $read);
|
||||||
|
$lines = [];
|
||||||
|
|
||||||
// {1} Check not empty //
|
/* (4) Remove empty lines */
|
||||||
if( empty($by_line[$i]) )
|
for( $i = 0 ; $i < count($by_line) ; $i++ ){
|
||||||
continue;
|
|
||||||
|
|
||||||
// {2} Matches //
|
if( !empty( trim($by_line[$i]) ))
|
||||||
if( preg_match('@^amphi@i', $by_line[$i]) || // 'amphi A', 'amphi 600 droit'
|
$lines[] = $by_line[$i];
|
||||||
preg_match('@^S\d+@i', $by_line[$i]) // 'S10', 'S22'
|
|
||||||
)
|
}
|
||||||
return [ $title, $by_line[$i] ];
|
|
||||||
|
/* (5) Manage if empty */
|
||||||
|
if( count($lines) < 2 )
|
||||||
|
throw new \Exception("Nothing read");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
return [ $title, 'unknown' ];
|
/* [2] Extract data
|
||||||
|
=========================================================*/ {
|
||||||
|
|
||||||
|
/* (1) Get first non-empty line (title) */
|
||||||
|
$title = $lines[0];
|
||||||
|
|
||||||
|
/* (2) Get last non-empty line */
|
||||||
|
for( $i = count($lines)-1 ; $i > 0 ; $i-- ){
|
||||||
|
|
||||||
|
// Amphi ... //
|
||||||
|
if( preg_match('@^a[nm][bp][hl]i ?(.+)$@i', $lines[$i], $m) ) // 'amphi A', 'amphi 600 droit'
|
||||||
|
return [ $title, "Amphi ${m[1]}" ];
|
||||||
|
|
||||||
|
|
||||||
|
// S... OR 5... //
|
||||||
|
if( preg_match('@^[S|5] ?(\d+)@i', $lines[$i], $m) ) // 'S10', 'S22'
|
||||||
|
return [ $title, "S. ${m[1]}" ];
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return [ $title, null ];
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue