在文件(doc,docx,xl​​sx,pdf)中用短语search

到目前为止,我已经build立了一个searchfunction的网站。 有不同的文件扩展名(docx,doc,pdf等),用户可以在数据库中上传。 所以我需要通过其内容search文件。 由于我无法正确地search它。 我做了两个function1.按文件名search2.按短语search

按名称search是完美的工作,但有问题的部分search短语。我可以将这些文件转换成文本文件。 但是我不知道为什么我不能在这个文件中search。所以,谁能告诉我我错在哪里,或者为我提供另一个解决scheme。

这里是代码….

homepage.php

<form method="post" action="search1.php" class="container 50%" id="searchform"> <input type="text" name="name" placeholder="Enter the terms you wish to search for" /> <input type="submit" name="submit" value="Search" class="fit special" /> <input type="radio" id="name" name="search" value="name" class="fit special" /> <input type="radio" id="phrase" name="search" value="phrase" class="fit special" /> </form> 

search1.php

  <?php require_once("/includes/functions.php"); ?> <?php require_once("/includes/class.php"); ?> <?php $dbhost = "localhost"; $dbuser = "root"; $dbpass = "sandeep"; $dbname = "dbtuts"; mysql_connect($dbhost,$dbuser,$dbpass) or die('cannot connect to the server'); mysql_select_db($dbname) or die('database selection problem'); ?> <!DOCTYPE html> <html> <head> <title>SEARCHED FILES</title> <link rel="stylesheet" href="assets/css/main.css" /> </head> <body> <section> <div class="table-wrapper"> <table class="alt"> <thead> <tr> <th>File Name</th> <th>View</th> </tr> </thead> <?php if(isset($_POST['submit'])){ $name=$_POST['name']; if($name!=NULL) { if (!empty($_POST['search'])) { if ($_POST['search']=="phrase") { //search by phrase $searchthis = $name; $matches = array(); $query = "SELECT file from ada "; $query .= "UNION "; $query .= "SELECT file from cdr "; $query .= "UNION "; $query .= "SELECT file from others "; $query .= "UNION "; $query .= "SELECT file from pdr "; $query .= "UNION "; $query .= "SELECT file from rr "; $query .= "UNION "; $query .= "SELECT file from sdd "; $query .= "UNION "; $query .= "SELECT file from tbl_uploads "; $result = mysql_query($query); $new_file = fopen("sample.txt","w") or die("Unable to open file!!"); while($row=mysql_fetch_array($result)) { $filepath = getcwd() . "\uploads\\".$row['file']; $path = str_replace('//', '\\', $filepath); $Obj = new DocxConversion($path); $Text= $Obj->convertToText(); fwrite($new_file,$Text); echo $new_file."<br/>"; $handle = fopen($new_file, "r"); if ($handle) { while (!feof($handle)) { $buffer = fgets($handle); if(strpos($buffer, $searchthis) !== FALSE) { $matches[] = $row['file']; break; } } fclose($handle); } } $matches = array_filter($matches); if (!empty($matches)) { foreach($matches as $row) { ?> <tr> <td><?php echo $row ?></td> <td><a href="uploads/<?php echo $row ?>" target="_blank">view file</a></td> </tr> <?php } } else { //echo " Phrase not found!!!"; ?> <script> alert('Phrase not Found'); window.location.href='homepage.php'; </script> <?php } } else{ //search by name $array = array( "db1" => "ada", "db2" => "cdr", "db3" => "others", "db4" => "pdr", "db5" => "rr", "db6" => "sdd", "db7" => "tbl_uploads", ); //connect to the database $db=mysql_connect("localhost","root","sandeep") or die ('I cannot connect to the database because:'.mysql_error()); //-select the database to use $mydb=mysql_select_db("dbtuts"); $no_of_access = false; while ($db_name = current($array)) { //-query the database table $sql = "SELECT * FROM $db_name WHERE (file LIKE '%$name%')"; //-run the query against the mysql query function $result=mysql_query($sql); $num_rows = mysql_num_rows($result); if($num_rows > 0) { //-create while loop and loop through result set $no_of_access = true; while($row=mysql_fetch_array($result)) { ?> <tr> <td><?php echo $row['file'] ?></td> <td><a href="uploads/<?php echo $row['file'] ?>" target="_blank">view file</a></td> </tr> <?php } } else { if(!$no_of_access && $db_name == "tbl_uploads") //echo "<p> Result not found!!<p>"; { ?> <script> alert('Result Not Found!!'); window.location.href='homepage.php'; </script> <?php } } next($array); } } } else { //echo "<p>Please select an option</p>"; ?> <script> alert('Please Select an option'); window.location.href='homepage.php'; </script> <?php } } else { //echo "<p>Please enter a search query</p>"; ?> <script> alert('Please enter a search query'); window.location.href='homepage.php'; </script> <?php } } ?> </table> </div> </section> </body> </html> 

上面的代码完美地search了文件的名字,但是在短语部分存在一些问题。

class.php

  <?php require_once("/includes/pdf.php"); ?> <?php class DocxConversion{ private $filename; public function __construct($filePath) { $this->filename = $filePath; } /************************doc file************************************/ private function read_doc() { $fileHandle = fopen($this->filename, "r"); $line = @fread($fileHandle, filesize($this->filename)); $lines = explode(chr(0x0D),$line); $outtext = ""; foreach($lines as $thisline) { $pos = strpos($thisline, chr(0x00)); if (($pos !== FALSE)||(strlen($thisline)==0)) { } else { $outtext .= $thisline." "; } } $outtext = preg_replace("/[^a-zA-Z0-9\s\,\.\-\n\r\t@\/\_\(\)]/","",$outtext); return $outtext; } /************************docx file************************************/ private function read_docx(){ $striped_content = ''; $content = ''; $zip = zip_open($this->filename); if (!$zip || is_numeric($zip)) return false; while ($zip_entry = zip_read($zip)) { if (zip_entry_open($zip, $zip_entry) == FALSE) continue; if (zip_entry_name($zip_entry) != "word/document.xml") continue; $content .= zip_entry_read($zip_entry, zip_entry_filesize($zip_entry)); zip_entry_close($zip_entry); }// end while zip_close($zip); $content = str_replace('</w:r></w:p></w:tc><w:tc>', " ", $content); $content = str_replace('</w:r></w:p>', "\r\n", $content); $striped_content = strip_tags($content); return $striped_content; } /************************PDF file************************************/ private function read_pdf(){ $a=new PDF2Text(); $a->setFilename($this->filename); $a->decodePDF(); echo $a->output(); } /************************excel sheet************************************/ function xlsx_to_text($input_file){ $xml_filename = "xl/sharedStrings.xml"; //content file name $zip_handle = new ZipArchive; $output_text = ""; if(true === $zip_handle->open($input_file)){ if(($xml_index = $zip_handle->locateName($xml_filename)) !== false){ $xml_datas = $zip_handle->getFromIndex($xml_index); $xml_handle = new DOMDocument(); $xml_handle->loadXML($xml_datas, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING); $output_text = strip_tags($xml_handle->saveXML()); }else{ $output_text .=""; } $zip_handle->close(); }else{ $output_text .=""; } return $output_text; } /*************************power point files*****************************/ function pptx_to_text($input_file){ $zip_handle = new ZipArchive; $output_text = ""; if(true === $zip_handle->open($input_file)){ $slide_number = 1; //loop through slide files while(($xml_index = $zip_handle->locateName("ppt/slides/slide".$slide_number.".xml")) !== false){ $xml_datas = $zip_handle->getFromIndex($xml_index); $xml_handle = new DOMDocument(); $xml_handle->loadXML($xml_datas, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING); $output_text .= strip_tags($xml_handle->saveXML()); $slide_number++; } if($slide_number == 1){ $output_text .=""; } $zip_handle->close(); }else{ $output_text .=""; } return $output_text; } public function convertToText() { if(isset($this->filename) && !file_exists($this->filename)) { return "File Not exists"; } $fileArray = pathinfo($this->filename); $file_ext = $fileArray['extension']; if($file_ext == "doc" || $file_ext == "docx" || $file_ext == "xlsx" || $file_ext == "pptx" || $file_ext == "pdf") { if($file_ext == "doc") { return $this->read_doc($this->filename); } elseif($file_ext == "docx") { return $this->read_docx($this->filename); } elseif($file_ext == "xlsx") { return $this->xlsx_to_text($this->filename); }elseif($file_ext == "pptx") { return $this->pptx_to_text($this->filename); }elseif($file_ext == "pdf") { return $this->read_pdf($this->filename); } } else { return "Invalid File Type"; } } } ?> 

上面的代码class.php将doc,docx,xl​​sx,pdf转换为文本。

pdf.php http://pastebin.com/dvwySU1a这个类将PDF文件转换为文本文件。

这部分是错的(我认为):

  fwrite($new_file,$Text); echo $new_file."<br/>"; $handle = fopen($new_file, "r"); 

在$ new_file中,你有前面的fopen中的“file pointer或者FALSE”。你也不closurestxt文件(如果你打算打开它,你应该在fwrite之后调用fclose)。

为什么你不只是searchstring的短语..为什么你需要把它写到另一个txt文件? 你可以像这里一样search文本

最后,我自己得到了解决scheme

search1.php

 <?php require_once("/includes/functions.php"); ?> <?php require_once("/includes/class.php"); ?> <?php $dbhost = "localhost"; $dbuser = "root"; $dbpass = "sandeep"; $dbname = "dbtuts"; mysql_connect($dbhost,$dbuser,$dbpass) or die('cannot connect to the server'); mysql_select_db($dbname) or die('database selection problem'); ?> <!DOCTYPE html> <html> <head> <title>SEARCHED FILES</title> <link rel="stylesheet" href="assets/css/main.css" /> </head> <body> <section> <div class="table-wrapper"> <table class="alt"> <thead> <tr> <th>File Name</th> <th>View</th> </tr> </thead> <?php if(isset($_POST['submit'])){ $name=$_POST['name']; if($name!=NULL) { if (!empty($_POST['search'])) { if ($_POST['search']=="phrase") { //search by phrase $searchthis = strtolower($name); $matches = array(); $array = array( "db1" => "ada", "db2" => "cdr", "db3" => "others", "db4" => "pdr", "db5" => "rr", "db6" => "sdd", "db7" => "tbl_uploads", ); while ($db_name = current($array)) { $query= "SELECT file FROM $db_name"; $result = mysql_query($query); while($row=mysql_fetch_array($result)) { $filepath = getcwd() . "\uploads\\".$row['file']; $path = str_replace('//', '\\', $filepath); $Obj = new DocxConversion($path); $Text= $Obj->convertToText(); $new_file = fopen("sample.txt","w") or die("Unable to open file!!"); fwrite($new_file,strtolower($Text)); $handle = fopen("sample.txt", "r"); if ($handle) { while (!feof($handle)) { $buffer = fgets($handle); if(strpos($buffer, $searchthis) !== FALSE) { $matches[] = $row['file']; break; } } fclose($handle); }fclose($new_file); }next($array); } $matches = array_filter($matches); if (!empty($matches)) { foreach($matches as $row) { ?> <tr> <td><?php echo $row ?></td> <td><a href="uploads/<?php echo $row ?>" target="_blank">view file</a></td> </tr> <?php } } else { //echo " Phrase not found!!!"; ?> <script> alert('Phrase not Found'); window.location.href='homepage.php'; </script> <?php } } else{ //search by name $array = array( "db1" => "ada", "db2" => "cdr", "db3" => "others", "db4" => "pdr", "db5" => "rr", "db6" => "sdd", "db7" => "tbl_uploads", ); //connect to the database $db=mysql_connect("localhost","root","sandeep") or die ('I cannot connect to the database because:'.mysql_error()); //-select the database to use $mydb=mysql_select_db("dbtuts"); $no_of_access = false; while ($db_name = current($array)) { //-query the database table $sql = "SELECT * FROM $db_name WHERE (file LIKE '%$name%')"; //-run the query against the mysql query function $result=mysql_query($sql); $num_rows = mysql_num_rows($result); if($num_rows > 0) { //-create while loop and loop through result set $no_of_access = true; while($row=mysql_fetch_array($result)) { ?> <tr> <td><?php echo $row['file'] ?></td> <td><a href="uploads/<?php echo $row['file'] ?>" target="_blank">view file</a></td> </tr> <?php } } else { if(!$no_of_access && $db_name == "tbl_uploads") //echo "<p> Result not found!!<p>"; { ?> <script> alert('Result Not Found!!'); window.location.href='homepage.php'; </script> <?php } } next($array); } } } else { //echo "<p>Please select an option</p>"; ?> <script> alert('Please Select an option'); window.location.href='homepage.php'; </script> <?php } } else { //echo "<p>Please enter a search query</p>"; ?> <script> alert('Please enter a search query'); window.location.href='homepage.php'; </script> <?php } } ?> </table> </div> </section> </body> </html>