This commit is contained in:
xdrm-brackets 2017-09-14 15:54:41 +02:00
parent 8da9dff2a9
commit 6464bd57c9
5 changed files with 129 additions and 83 deletions

View File

@ -39,11 +39,13 @@
/* [2] Display file /* [2] Display file
=========================================================*/ =========================================================*/
/* (1) Headers */ /* (1) Headers */
header('Content-Type: text/calendar; charset=utf-8'); // header('Content-Type: text/calendar; charset=utf-8');
header('Content-Disposition: attachment; filename='.$this->diplome_id.'.ics'); // header('Content-Disposition: attachment; filename='.$this->diplome_id.'.ics');
/* (2) Body */ /* (2) Body */
echo "<pre>";
readfile($file_name); readfile($file_name);
echo "</pre>";
} }

View File

@ -3,6 +3,7 @@
namespace service; namespace service;
use \lightdb\core\lightdb; use \lightdb\core\lightdb;
use \service\Tesseract;
class CalendarExtractor{ class CalendarExtractor{
@ -197,7 +198,9 @@
$this->event[$uid][$time] = [ $this->yToTime($day_n, $y) ]; $this->event[$uid][$time] = [ $this->yToTime($day_n, $y) ];
// {6} Exctract event's image // // {6} Exctract event's image //
$this->event[$uid][$time][1] = $this->extractEvent("$time-$uid", [$col_x, $start_y+1], [$col_ind[$day_n+1]-1, $y]); $ev = $this->extractEvent("$time-$uid", [$col_x, $start_y+1], [$col_ind[$day_n+1]-1, $y]);
$this->event[$uid][$time][1] = $ev[0];
$this->event[$uid][$time][2] = $ev[1];
} }
@ -240,11 +243,13 @@
$link = __ROOT__."/tmp/$uid.jpeg"; $link = __ROOT__."/tmp/$uid.jpeg";
$width = $stop[0]-$start[0]; $width = $stop[0]-$start[0];
$height = $stop[1]-$start[1]; $height = $stop[1]-$start[1];
$resize_factor = 2;
/* [1] Get the right clip /* [1] Get the right clip
=========================================================*/ { =========================================================*/ {
/* (1) Create clipped copy */ /* (1) Create clipped copy */
$clip = \imagecreatetruecolor($width, $height); $clip = \imagecreatetruecolor($width*$resize_factor, $height*$resize_factor);
$copied = \imagecopyresized( $copied = \imagecopyresized(
$clip, // destin img $clip, // destin img
@ -253,8 +258,8 @@
0, // dest y 0, // dest y
$start[0], // src x $start[0], // src x
$start[1], // src y $start[1], // src y
$width, // dest w $width*$resize_factor, // dest w
$height, // dest h $height*$resize_factor, // dest h
$width, // src w $width, // src w
$height // src h $height // src h
); );
@ -263,19 +268,47 @@
if( !$copied ) if( !$copied )
throw new \Exception("Cannot clip image"); throw new \Exception("Cannot clip image");
/* (3) Save to base64 */ /* (3) Save to jpeg */
\imagesavealpha($clip, true); \imagesavealpha($clip, true);
ob_start(); // ob_start();
\imagejpeg($clip); \imagejpeg($clip, $link);
$image_data = \base64_encode(ob_get_contents()); // $image_data = \base64_encode(ob_get_contents());
ob_end_clean(); // ob_end_clean();
}
/* [2] Apply Tesseract
=========================================================*/ {
/* (1) Load image with tesseract */
try{
$tesseract = new Tesseract($link);
$read = $tesseract->read();
/* (2) Manage error */
}catch(\Exception $e){
$read = [ 'unkown', 'unknown' ];
}
} }
return $image_data;
/* [3] End procedure
=========================================================*/
/* (1) Remove file */
unlink($link);
/* (2) Return read value */
return $read;
} }
@ -389,11 +422,6 @@
=========================================================*/ =========================================================*/
foreach($this->event as $event_col=>$events){ foreach($this->event as $event_col=>$events){
$type = "unknown";
if( isset($col_assoc[$event_col]) )
$type = $col_assoc[$event_col];
/* (2) For each event of each type /* (2) For each event of each type
---------------------------------------------------------*/ ---------------------------------------------------------*/
foreach($events as $start_t=>$data){ foreach($events as $start_t=>$data){
@ -402,8 +430,9 @@
$RAW .= "DTSTART:${start_t}\n"; $RAW .= "DTSTART:${start_t}\n";
$RAW .= "DTEND:${data[0]}\n"; $RAW .= "DTEND:${data[0]}\n";
$RAW .= "UID:$start_t-univ-pau-ics\n"; // required $RAW .= "UID:$start_t-univ-pau-ics\n"; // required
$RAW .= "SUMMARY:$type\n"; $RAW .= "SUMMARY:${data[1]}\n";
$RAW .= "ATTACH;ENCODING=BASE64;VALUE=BINARY;FILENAME=att.jpg:${data[1]}\n"; $RAW .= "LOCATION:${data[2]}\n";
// $RAW .= "ATTACH;ENCODING=BASE64;VALUE=BINARY;FILENAME=att.jpg:${data[1]}\n";
$RAW .= "CATEGORIES: UPPA Calendar\n"; $RAW .= "CATEGORIES: UPPA Calendar\n";
$RAW .= "END:VEVENT\n"; $RAW .= "END:VEVENT\n";
} }

View File

@ -6,85 +6,80 @@
/* [1] Attributes /* [1] Attributes
=========================================================*/ =========================================================*/
private $filename = null; private $fname = null;
private $content = null;
private $course = null;
private $teacher = null;
private $room = null;
/* (1) Constructs and initialise a readed file /* (1) Constructs and initialise a readed file
* *
* @fname<String> Path of the image
*
* @return instance<Tesseract> New Tesseract * @return instance<Tesseract> New Tesseract
* *
---------------------------------------------------------*/ ---------------------------------------------------------*/
public function __construct($filename){ public function __construct($fname=null){
$this->filename = $filename;
/* [1] Check argument
=========================================================*/ {
/* (1) Check type */
if( !is_string($fname) )
throw new \Exception("Tesseract.__construct(<String>) expected but Tesseract.__construct(<".gettype($fname).">) received");
/* (2) Check file validity */
if( !file_exists($fname) )
throw new \Exception("Tesseract.__construct(<PATH>) but <PATH> is not valid");
}
/* [2] Store as attribute
=========================================================*/
$this->fname = $fname;
return $this;
} }
/* (2) Read the image file /* (2) Read the image file
* *
* @return this * @return read<String> The read content
* *
---------------------------------------------------------*/ ---------------------------------------------------------*/
public function read(){ public function read(){
/* [1] Record the text from the image /* [1] Record the text from the image
=========================================================*/ =========================================================*/
$filename = $this->filename; /* (1) Process tesseract */
$this->content = shell_exec("tesseract $filename stdout -l fra"); $read = shell_exec("tesseract ".$this->fname." stdout -l fra --user-words ".__ROOT__."/config/edt.user-words -c language_model_penalty_non_freq_dict_word=0.1 -c language_model_penalty_non_dict_word=.15 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 2>/dev/null");
$lists = explode(chr(10), $this->content); /* (2) If empty */
if( is_null($read) )
throw new \Exception("Nothing read");
/* (3) Split by lines */
$by_line = explode("\n", $read);
/* (4) Get first line (title) */
$title = $by_line[0];
/* (5) Get last non-empty line */
for( $i = count($by_line)-1 ; $i > 0 ; $i-- ){
// {1} Check not empty //
if( empty($by_line[$i]) )
continue;
// {2} Matches //
if( preg_match('@^amphi@i', $by_line[$i]) || // 'amphi A', 'amphi 600 droit'
preg_match('@^S\d+@i', $by_line[$i]) // 'S10', 'S22'
)
return [ $title, $by_line[$i] ];
if (count($lists) < 3) {
throw new \Exception('Result not interpreted');
} }
$this->course = $lists[0];
$this->teacher = $lists[1];
$this->room = $lists[2];
return $this; return [ $title, 'unknown' ];
}
/* (3) Return the text readed by the Tesseract OCR
*
* @return $this->content
*
---------------------------------------------------------*/
public function getContent() {
return $this->content;
}
/* (4) Return the course readed by the Tesseract OCR
*
* @return $this->course
*
---------------------------------------------------------*/
public function getCourse() {
return $this->course;
}
/* (5) Return the teacher readed by the Tesseract OCR
*
* @return $this->teacher
*
---------------------------------------------------------*/
public function getTeacher() {
return $this->teacher;
}
/* (6) Return the room class readed by the Tesseract OCR
*
* @return $this->room
*
---------------------------------------------------------*/
public function getRoom() {
return $this->room;
} }

19
config/edt.user-words Normal file
View File

@ -0,0 +1,19 @@
intro
introduction
diversite
evolution
mathematiques
general
Gestion
gestion
Molécules
cellulaire
biologie
physique
amphi
CTD
droit
gauche
grammaire
methodologie
Cours

View File

@ -2,3 +2,4 @@
sudo apt-get install php7.0-gd; sudo apt-get install php7.0-gd;
sudo service apache2 restart; sudo service apache2 restart;
sudo apt-get install tesseract-ocr tesseract-ocr-fra;