PhpLabWare version 0.3 
/ -> cron.php

1  <?php
2 
3  // cron.php - Maintance tasks. To be called from cron.
4  // cron.php - author: Nico Stuurman <nicost@sourceforge.net>
5 
6    /***************************************************************************
7    * Copyright (c) 2002 by Nico Stuurman *
8    * ------------------------------------------------------------------------ *
9    * This program is free software; you can redistribute it and/or modify it *
10    * under the terms of the GNU General Public License as published by the *
11    * Free Software Foundation; either version 2 of the License, or (at your *
12    * option) any later version. *
13    \**************************************************************************/
14 
15 
16  // This might take a while:
17  ini_set("max_execution_time","0");
18 
19  include ("includes/defines_inc.php");
20  include ("includes/functions_inc.php");
21  include ("includes/init_inc.php");
22  include ("includes/db_inc.php");
23 
24  ////
25  // !Writes the index files needed for full text searches of files
26  function doindexfile ($db,$filetext,$fileid,$indextable,$recordid,$pagenr)
27  {
28     if (!$pagenr)
29        $pagenr=1;
30     $thetext=split("[ ,.:;\"\n]",$filetext);
31     foreach ($thetext as $word) {
32        if (strlen($word)>3) {
33           $r=$db->Execute("SELECT id FROM words WHERE word='$word'");
34           $wordid=$r->fields[0];
35           if (!$wordid) {
36              $wordid=$db->GenID("word_seq");
37              $db->Execute("INSERT INTO words VALUES ($wordid,'$word')");
38           }
39           $db->Execute("INSERT INTO $indextable VALUES ($wordid,$fileid,$pagenr,$recordid)");
40        }
41     }
42     return true;
43  }
44 
45 
46  // main body
47 
48  // text/html files are indexed directly, pdf files are first converted
49  // with ghostscript
50  // words are entered in table 'words'
51  // links between files/records are kept in specifc tables
52 
53  // we keep track of the time it takes to do the indexing
54  $starttime=microtime();
55 
56  $host=getenv("HTTP_HOST");
57  if (! ($host=="localhost" ||$host=="127.0.0.1") ) {
58     echo "This script should only be called by the CRON daemon.";
59     exit ();
60  }
61 
62  $gs=$system_settings["gs"];
63  if (!@is_readable($gs))
64     echo "Could not read ghostscipt binary (gs) at '$gs'.<br>";
65     
66     
67  // find unindexed files with mime types we can work with
68  $rfiles=$db->Execute("SELECT id,filename,tablesfk,ftableid,mime,ftablecolumnid FROM files WHERE indexed IS NULL AND (mime LIKE '%text%' OR mime LIKE '%pdf%')");
69 
70  while ($rfiles && !($rfiles->EOF)) {
71 
72     // find out to which table we are going to write the index
73     $rdesc=$db->Execute("SELECT table_desc_name FROM tableoftables WHERE id=".$rfiles->fields[tablesfk]);
74     if($rdesc->fields[table_desc_name]) {
75        $rindextable=$db->Execute("SELECT associated_table FROM ".$rdesc->fields[table_desc_name]." WHERE id=".$rfiles->fields[ftablecolumnid]);
76        if ($rindextable->fields[associated_table]) {
77           // treat text files and pdf files differently
78           if (strstr($rfiles->fields[mime],"text")) {
79              $fp=fopen(file_path($db,$rfiles->fields[id]),"r");
80              if ($fp) {
81                 while (!feof($fp)) {
82                    $filetext.=fgetss($fp,64000);
83                 }
84                 fclose($fp);
85              }
86              $filetext=strtolower($filetext);
87              if (doindexfile ($db,$filetext,$rfiles->fields[id],$rindextable->fields[associated_table],$rfiles->fields[ftableid],1)) {
88                 $db->Execute ("UPDATE files SET indexed=1 WHERE id=".$rfiles->fields[id]);
89                 $textfilecounter++;
90              }
91           }
92           // for pdf files we use ghostscript. Part of this code was taken from docmgr
93           elseif (strstr($rfiles->fields[mime],"pdf") && $gs) {
94              //first we have to figure out how many pages
95              //are in the file. this is a rough method.
96              //we have gs kick up an error after it opens
97              //the file and sees how many pages there are
98 
99              $filepath=file_path($db,$rfiles->fields[id]);
100              $numpages = `$gs -dNODISPLAY "$filepath" -c quit`;
101              $pos1 = strpos($numpages,"through");
102              $numpages = substr($numpages,$pos1);
103              $pos2 = strpos($numpages,".");
104              $numpages= trim(substr($numpages,8,$pos2-8));
105 
106              for ($page=1;$page<=$numpages;$page++) {
107                 //gs the page and return as a string
108                 $tempstring=`$gs -q -dNODISPLAY -dNOBIND -dWRITESYSTEMDICT -dSIMPLE -dFirstPage=$page -dLastPage=$page -c save -f ps2ascii.ps "$filepath" -c quit`;
109                //strip out all the trash from the string
110                //$tempstring = string_clean($tempstring,$preventIndex,$keepIndex);
111                 $filetext=strtolower($tempstring);
112                 doindexfile ($db,$filetext,$rfiles->fields[id],$rindextable->fields[associated_table],$rfiles->fields[ftableid],$page);
113              }
114              $db->Execute ("UPDATE files SET indexed=1 WHERE id=".$rfiles->fields[id]);
115              $pdffilecounter++;
116           }
117        }
118     }
119     echo 'Indexed file: '.$rfiles->fields['filename'].'.<br>';
120     $rfiles->MoveNext();
121  }
122 
123  // The rest just serves to report some statistics..
124  if (!$textfilecounter)
125     $textfilecounter=0;
126  if (!$pdffilecounter)
127    $pdffilecounter=0;
128  $endtime=microtime();
129  list($startmu,$starts)=explode(" ",$starttime);
130  list($endmu,$ends)=explode(" ",$endtime);
131  $process=$ends-$starts;
132  $procesmu=$endmu-$startmu;
133  $pt=$process+$procesmu;
134  $ptime=sprintf("%0f",$pt);
135 
136  echo "Indexed $textfilecounter text files and $pdffilecounter pdf files in $ptime seconds<br>";
137 
138  // load plugin php code if it has been defined
139  if ($HTTP_GET_VARS[tablename]) {
140     $tableinfo=new tableinfo($db);
141     $plugin_code=get_cell($db,"tableoftables","plugin_code","id",$tableinfo->id);
142     if ($plugin_code) {
143        @include($plugin_code);
144        // and execute the cron plugin
145        if (function_exists("plugin_cron"))
146           plugin_cron($db,$tableinfo);
147     }
148  }
149 
150  // we'll do the postgres maintenance
151  if (substr($db_type,0,8)=='postgres') {
152     $db->Execute('VACUUM');
153     $db->Execute('ANALYZE');
154     $db->Execute('VACUUM ANALYZE');
155     echo "Finished postgres maintenance.<br>";
156  }
157  ?>


Generated: Sun Oct 5 21:17:35 2003 SourceForge Logo Generated by PHPXref 0.2