5) { $items_in_summary = 3; } // text type $text_type = isset($_REQUEST['contentType']) ? intval($_REQUEST['contentType']) : -1; if (abs($text_type) > 1) { $text_type = -1; } $summ_result = array(); if (isset($summ_type) && $summ_type != 'doc') { $summ_result = summarize($summ_text, 'getSummarizedSentences', $items_in_summary, $text_type, $summ_type); if (is_array($summ_result)) { $summary = markingSummaizedText($summ_result); if ($summ_type == "URL") { $original_text = 'View original text'; } else { $original_text = markingOriginalText($summ_text, $summ_result); } } else { $summary = nl2br(htmlspecialchars(strip_tags($summ_result))); $original_text = $summ_text; } } else { $doc_id = isset($_REQUEST['docNum']) ? intval($_REQUEST['docNum']) : 0; $original_text = $predefinedSummarizerTexts[$doc_id][1]; // summarizing predefined text $messageBefore = getMessage(); $sumContent = summarizeFromXML(getSummarizedCache($doc_id)); if ($messageBefore !== getMessage()) { //something wrong resetCache($docNum, $summarizedCacheFilename); } //$ar=markingSummaizedText($sumContent); $summary = nl2br(htmlspecialchars(strip_tags($sumContent))); } $tpl_content = str_replace('%items_in_summary%', $items_in_summary, $tpl_content); for ($i = -1; $i <= 1; $i++) { $tpl_content = str_replace('%content_type_' . $i . '%', (($i == $text_type) ? 'selected' : ''), $tpl_content); } for ($i = 1; $i <= 5; $i++) { $tpl_content = str_replace('%items_in_summary_' . $i . '%', (($i == $items_in_summary) ? 'selected' : ''), $tpl_content); } $tpl_content = str_replace('%sumtext%', htmlspecialchars($summ_text_orig), $tpl_content); $tpl_content = str_replace('%sumurl%' , htmlspecialchars($summ_url_orig), $tpl_content); $tpl_content = str_replace('%summary%', $summary, $tpl_content); $tpl_content = str_replace('%original text%', $original_text, $tpl_content); foreach (array('Text', 'URL') as $k) { $tpl_content = str_replace('%selected_method_' . $k . '%', (isset($summ_type) && $summ_type == $k) ? 'summ_method_selected' : '', $tpl_content); } } else { // open template file if (file_exists($demoSummarizerSourceTemplate) && is_readable($demoSummarizerSourceTemplate)) { $tpl_content = file_get_contents($demoSummarizerSourceTemplate); } else { showError("Can't read template file[$demoSummarizerSourceTemplate]"); exit(); } list($head, $body, $foot) = splitTemplate($tpl_content); $docs = array(); $output = ''; for ($x = 0; $x < $exampleCount && $x < count($predefinedSummarizerTexts); $x++) { //summary document number $docNum = -1; while ( (in_array($docNum, $docs)) || ($docNum == -1) ) { $docNum = $x; //uncomment the line for random generated summaries //$docNum = rand(0, count($predefinedSummarizerTexts) - 1); } array_push($docs, $docNum); $sumTitle = $predefinedSummarizerTexts[$docNum][0]; $originalText = $predefinedSummarizerTexts[$docNum][1]; // summarizing predefined text $messageBefore = getMessage(); $sumContent = summarizeFromXML(getSummarizedCache($docNum)); if ($messageBefore !== getMessage()) { //something wrong resetCache($docNum, $summarizedCacheFilename); } $tBody = str_replace("%title%", $sumTitle, $body); $tBody = str_replace("%content%", $sumContent, $tBody); $tBody = str_replace("%docNum%", $docNum, $tBody); $output .= $tBody; } $output = $head . $output . $foot; $output = str_replace("%original text%", "", $output); $tpl_content = str_replace("%help_vis%", 'hidden', $output); } if(!empty($Message)) { $tpl_content = getFileContent($webServiceError); } print($tpl_content); putError("", "", "
"); ob_end_flush(); /* case "start": default: // splits template list($head, $body, $foot) = splitTemplate($templateBody); $docs = array(); for ($x = 0; $x < $exampleCount && $x < count($predefinedSummarizerTexts); $x++) //generates random examples { //summary document number $docNum = -1; while ( (in_array($docNum, $docs)) || ($docNum == -1) ) { $docNum = $x; //uncomment the line for random generated summaries //$docNum = rand(0, count($predefinedSummarizerTexts) - 1); } array_push($docs, $docNum); $sumTitle = $predefinedSummarizerTexts[$docNum][0]; $originalText = $predefinedSummarizerTexts[$docNum][1]; // summarizing predefined text $messageBefore = getMessage(); $sumContent = summarizeFromXML(getSummarizedCache($docNum)); if ($messageBefore !== getMessage()) { //something wrong resetCache($docNum, $summarizedCacheFilename); } $tBody = str_replace("%title%", $sumTitle, $body); $tBody = str_replace("%content%", $sumContent, $tBody); $tBody = str_replace("%docNum%", $docNum, $tBody); $output .= $tBody; } $output = $head . $output . $foot; $output = str_replace("%original text%", "", $output); $output = str_replace("%help_vis%", 'hidden', $output); break; } $exampleCount = 3; //number of examples to be shown on the start summarizer page //preformat text $originalText = preformatTextForSummarizer( getCleanString( $_POST["sumtext"] ) ); //original text (doesn't allow to write tags inside) $originalText = nl2br( htmlspecialchars( $originalText ) ); $docID = getCleanString($_POST["docNum"]); if (strlen($docID) < 1) { if (strlen($originalText) < 1) { $action = "start"; } else { $action = "summarize"; } } else { $action = "docID"; if ((!isWholeNumber($docID)) || ($docID < 0) || ($docID >= count($predefinedSummarizerTexts))) { showError("Wrong summarizer docID: $docID (predefinedText count: " . count($predefinedSummarizerTexts) . ")"); $action = "start"; } } switch($action) { case "docID": case "summarize": $demoSummarizerTemplate = $demoSummarizerResultTemplate; break; case "start": default: $demoSummarizerTemplate = $demoSummarizerSourceTemplate; break; } $templateBody = getFileContent($demoSummarizerTemplate); switch ($action) { case "summarize": $restrItem = (($_POST["restrItem"] < 1) || empty($_POST["restrItem"])) ? 3 : ($_POST["restrItem"] > 5 ? 5 : $_POST["restrItem"]); $contentType = empty($_POST["contentType"]) ? -1 : $_POST["contentType"]; $sumResult = summarize($originalText, 'getSummarizedSentences', $restrItem, $contentType, $_POST["srcType"]); //trace($sumResult); if(is_array($sumResult)) { $output = str_replace("%summary%", markingSummaizedText($sumResult), $templateBody); if($_POST["srcType"] == "URL") $output = str_replace("%original text%", 'View original text', $output); else $output = str_replace("%original text%", markingOriginalText($originalText, $sumResult), $output); $output = str_replace("%help_vis%", 'visible', $output); } else { $output = str_replace("%summary%", nl2br(htmlspecialchars(strip_tags($sumResult))), $templateBody); // original text has been screened already $output = str_replace("%original text%", $originalText, $output); $output = str_replace("%help_vis%", 'hidden', $output); } break; case "docID": $originalText = $predefinedSummarizerTexts[$docID][1]; // summarizing predefined text $messageBefore = getMessage(); $sumContent = summarizeFromXML(getSummarizedCache($docID)); if ($messageBefore !== getMessage()) { //something wrong resetCache($docNum, $summarizedCacheFilename); } //$ar=markingSummaizedText($sumContent); $output = str_replace("%summary%", nl2br(htmlspecialchars(strip_tags($sumContent))), $templateBody); $output = str_replace("%original text%", $originalText, $output); $output = str_replace("%help_vis%", 'hidden', $output); break; } global $Message; if(!empty($Message)) { $output = getFileContent($webServiceError); } echo $output; putError("", "", "
"); ob_end_flush(); */ // returns a summarized text by an array function getSummarizedText($arr) { foreach($arr as $k=>$v) { if ($k === "SummarizedText") { if (is_array($v)) { return $v[0]; } } elseif (is_array($v)) { $obj = getSummarizedText($v); } } return isset($obj) ? $obj : null; } function getSummarizedSentences($arr) { foreach($arr as $k=>$v) { if ($k === "SummarySentences") { if (is_array($v)) { return $v[0]["string"]; } } elseif (is_array($v)) { $obj = getSummarizedSentences($v); } } return $obj; } // returns a summarized text from an XML function summarizeFromXML($xml, $resultType = 'getSummarizedText') { $phpVars = parseXML($xml); //trace($phpVars); eval("\$ret = $resultType(".'$phpVars'.");"); return $ret; } // summarizes an input text // returns summarized text function summarize($originalText, $resultType = 'getSummarizedText', $restrItem = 3, $contentType = -1, $srcType='Text') { GLOBAL $demoSummarizerLog; $calledMethod = ($srcType == "URL") ? "SummarizeURL" : "SummarizeVary"; $summaryResult = summarizeFromXML(summarizeInXML($originalText, $calledMethod, $restrItem, $contentType), $resultType); logMessage($demoSummarizerLog, html_entity_decode(br2nl($originalText)), 0); $summarizedText = ""; if(is_array($summaryResult)) { foreach($summaryResult as $k=>$v) $summarizedText .= $v.' '; } else { $summarizedText = $summaryResult; } dbLogSummary($_POST["sumtext"], trim($summarizedText), $_SERVER['REMOTE_ADDR'], 0); return $summaryResult; } function markingSummaizedText($sumSentences) { global $markColor; $retVal = ""; foreach($sumSentences as $k=>$v) { $retVal .= ""; } $retVal .= "
". nl2br(htmlspecialchars(strip_tags($v)))."
"; return $retVal; /* concept Concept ". ++$k." */ } function markingOriginalText($originTxt, $sumSentences) { global $markColor; foreach($sumSentences as $k=>$v) { $pattern = getPattern($v); preg_match_all($pattern, $originTxt, $matches); $list = "
"; for($i=0; $i Concept ".($i+1)." "; else $list .= ""; } $list .= '
Top Concept ".($i+1)." 
'; $originTxt = preg_replace($pattern, "$list\${1}", $originTxt); } return $originTxt; } function getPattern($string) { preg_match_all('/\w+/', $string, $matches); $retPattern = implode('[^a-zA-Z]*', $matches[0]); return '/('.$retPattern.'[.?!]*)/'; } ?>