diff --git a/build/router/controller/ics.php b/build/router/controller/ics.php index aa326fb..2f0e188 100644 --- a/build/router/controller/ics.php +++ b/build/router/controller/ics.php @@ -39,11 +39,13 @@ /* [2] Display file =========================================================*/ /* (1) Headers */ - header('Content-Type: text/calendar; charset=utf-8'); - header('Content-Disposition: attachment; filename='.$this->diplome_id.'.ics'); + // header('Content-Type: text/calendar; charset=utf-8'); + // header('Content-Disposition: attachment; filename='.$this->diplome_id.'.ics'); /* (2) Body */ + echo "
";
 			readfile($file_name);
+			echo "
"; } diff --git a/build/service/CalendarExtractor.php b/build/service/CalendarExtractor.php index 399246c..44c9fe2 100644 --- a/build/service/CalendarExtractor.php +++ b/build/service/CalendarExtractor.php @@ -3,6 +3,7 @@ namespace service; use \lightdb\core\lightdb; + use \service\Tesseract; class CalendarExtractor{ @@ -197,7 +198,9 @@ $this->event[$uid][$time] = [ $this->yToTime($day_n, $y) ]; // {6} Exctract event's image // - $this->event[$uid][$time][1] = $this->extractEvent("$time-$uid", [$col_x, $start_y+1], [$col_ind[$day_n+1]-1, $y]); + $ev = $this->extractEvent("$time-$uid", [$col_x, $start_y+1], [$col_ind[$day_n+1]-1, $y]); + $this->event[$uid][$time][1] = $ev[0]; + $this->event[$uid][$time][2] = $ev[1]; } @@ -240,42 +243,72 @@ $link = __ROOT__."/tmp/$uid.jpeg"; $width = $stop[0]-$start[0]; $height = $stop[1]-$start[1]; + $resize_factor = 2; /* [1] Get the right clip =========================================================*/ { + /* (1) Create clipped copy */ - $clip = \imagecreatetruecolor($width, $height); + $clip = \imagecreatetruecolor($width*$resize_factor, $height*$resize_factor); $copied = \imagecopyresized( - $clip, // destin img - $this->img_res, // source img - 0, // dest x - 0, // dest y - $start[0], // src x - $start[1], // src y - $width, // dest w - $height, // dest h - $width, // src w - $height // src h + $clip, // destin img + $this->img_res, // source img + 0, // dest x + 0, // dest y + $start[0], // src x + $start[1], // src y + $width*$resize_factor, // dest w + $height*$resize_factor, // dest h + $width, // src w + $height // src h ); /* (2) Manage copy error */ if( !$copied ) throw new \Exception("Cannot clip image"); - /* (3) Save to base64 */ + /* (3) Save to jpeg */ \imagesavealpha($clip, true); - ob_start(); - \imagejpeg($clip); - $image_data = \base64_encode(ob_get_contents()); - ob_end_clean(); + // ob_start(); + \imagejpeg($clip, $link); + // $image_data = \base64_encode(ob_get_contents()); + // ob_end_clean(); + + } + + + + /* [2] Apply Tesseract + =========================================================*/ { + + /* (1) Load image with tesseract */ + try{ + + $tesseract = new Tesseract($link); + $read = $tesseract->read(); + + /* (2) Manage error */ + }catch(\Exception $e){ + + $read = [ 'unkown', 'unknown' ]; + + } } - return $image_data; + + /* [3] End procedure + =========================================================*/ + /* (1) Remove file */ + unlink($link); + + /* (2) Return read value */ + return $read; + } @@ -389,11 +422,6 @@ =========================================================*/ foreach($this->event as $event_col=>$events){ - $type = "unknown"; - - if( isset($col_assoc[$event_col]) ) - $type = $col_assoc[$event_col]; - /* (2) For each event of each type ---------------------------------------------------------*/ foreach($events as $start_t=>$data){ @@ -402,8 +430,9 @@ $RAW .= "DTSTART:${start_t}\n"; $RAW .= "DTEND:${data[0]}\n"; $RAW .= "UID:$start_t-univ-pau-ics\n"; // required - $RAW .= "SUMMARY:$type\n"; - $RAW .= "ATTACH;ENCODING=BASE64;VALUE=BINARY;FILENAME=att.jpg:${data[1]}\n"; + $RAW .= "SUMMARY:${data[1]}\n"; + $RAW .= "LOCATION:${data[2]}\n"; + // $RAW .= "ATTACH;ENCODING=BASE64;VALUE=BINARY;FILENAME=att.jpg:${data[1]}\n"; $RAW .= "CATEGORIES: UPPA Calendar\n"; $RAW .= "END:VEVENT\n"; } diff --git a/build/service/Tesseract.php b/build/service/Tesseract.php index e025f62..c4da651 100755 --- a/build/service/Tesseract.php +++ b/build/service/Tesseract.php @@ -6,86 +6,81 @@ /* [1] Attributes =========================================================*/ - private $filename = null; - private $content = null; - private $course = null; - private $teacher = null; - private $room = null; + private $fname = null; /* (1) Constructs and initialise a readed file * + * @fname Path of the image + * * @return instance New Tesseract * ---------------------------------------------------------*/ - public function __construct($filename){ - $this->filename = $filename; + public function __construct($fname=null){ + + /* [1] Check argument + =========================================================*/ { + + /* (1) Check type */ + if( !is_string($fname) ) + throw new \Exception("Tesseract.__construct() expected but Tesseract.__construct(<".gettype($fname).">) received"); + + /* (2) Check file validity */ + if( !file_exists($fname) ) + throw new \Exception("Tesseract.__construct() but is not valid"); + + } + + + /* [2] Store as attribute + =========================================================*/ + $this->fname = $fname; - return $this; } /* (2) Read the image file * - * @return this + * @return read The read content * ---------------------------------------------------------*/ public function read(){ /* [1] Record the text from the image =========================================================*/ - $filename = $this->filename; - $this->content = shell_exec("tesseract $filename stdout -l fra"); + /* (1) Process tesseract */ + $read = shell_exec("tesseract ".$this->fname." stdout -l fra --user-words ".__ROOT__."/config/edt.user-words -c language_model_penalty_non_freq_dict_word=0.1 -c language_model_penalty_non_dict_word=.15 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 2>/dev/null"); - $lists = explode(chr(10), $this->content); + /* (2) If empty */ + if( is_null($read) ) + throw new \Exception("Nothing read"); - if (count($lists) < 3) { - throw new \Exception('Result not interpreted'); - } + /* (3) Split by lines */ + $by_line = explode("\n", $read); - $this->course = $lists[0]; - $this->teacher = $lists[1]; - $this->room = $lists[2]; + /* (4) Get first line (title) */ + $title = $by_line[0]; + + /* (5) Get last non-empty line */ + for( $i = count($by_line)-1 ; $i > 0 ; $i-- ){ + + // {1} Check not empty // + if( empty($by_line[$i]) ) + continue; + + // {2} Matches // + if( preg_match('@^amphi@i', $by_line[$i]) || // 'amphi A', 'amphi 600 droit' + preg_match('@^S\d+@i', $by_line[$i]) // 'S10', 'S22' + ) + return [ $title, $by_line[$i] ]; + + } + + + return [ $title, 'unknown' ]; - return $this; } - /* (3) Return the text readed by the Tesseract OCR - * - * @return $this->content - * - ---------------------------------------------------------*/ - public function getContent() { - return $this->content; - } - - /* (4) Return the course readed by the Tesseract OCR - * - * @return $this->course - * - ---------------------------------------------------------*/ - public function getCourse() { - return $this->course; - } - - /* (5) Return the teacher readed by the Tesseract OCR - * - * @return $this->teacher - * - ---------------------------------------------------------*/ - public function getTeacher() { - return $this->teacher; - } - - /* (6) Return the room class readed by the Tesseract OCR - * - * @return $this->room - * - ---------------------------------------------------------*/ - public function getRoom() { - return $this->room; - } - } \ No newline at end of file diff --git a/config/edt.user-words b/config/edt.user-words new file mode 100644 index 0000000..483d38e --- /dev/null +++ b/config/edt.user-words @@ -0,0 +1,19 @@ +intro +introduction +diversite +evolution +mathematiques +general +Gestion +gestion +Molécules +cellulaire +biologie +physique +amphi +CTD +droit +gauche +grammaire +methodologie +Cours \ No newline at end of file diff --git a/install.sh b/install.sh index fa70fb0..abd261c 100644 --- a/install.sh +++ b/install.sh @@ -1,4 +1,5 @@ #!/bin/bash sudo apt-get install php7.0-gd; -sudo service apache2 restart; \ No newline at end of file +sudo service apache2 restart; +sudo apt-get install tesseract-ocr tesseract-ocr-fra; \ No newline at end of file