fetch(PDO::FETCH_OBJ)) { // Process HTML in row's columns $columnsChanged = $this->processInformationObjectI18nHtml($io); // Update total column values changed if ($columnsChanged) { $changedCount++; $columnsChangedCount += $columnsChanged; } // Report progress $message = 'Processed information object '.$io->id; if ($columnsChanged) { $message .= ' ('. $columnsChanged . ' changes)'; } print $message ."\n"; $rowCount++; } // Report summary of processing $message = 'Processed '. $rowCount .' information objects.'; if ($changedCount) { $message .= ' Changed '. $changedCount .' information objects'; $message .= ' ('. $columnsChangedCount .' field values changed).'; } print $message ."\n"; } /** * Determine what information object i18n columns are populated * and update them. * * @param stdClass $io row of information object i18n data * * @return integer number of columns changed */ private function processInformationObjectI18nHtml(&$io) { // Determine what column values contain HTML $columnValues = array(); foreach(i18nRemoveHtmlTags::$columns as $column) { // Store column name/value for processing if it contains tags if ($io->{$column} && ($io->{$column} != strip_tags($io->{$column}))) { $columnValues[$column] = $io->{$column}; } } // Update database with transformed column values $this->transformHtmlInI18nTableColumns( 'information_object_i18n', $io->id, $io->culture, $columnValues); return count($columnValues); } /** * Transform HTML column values into text and update specified i18n table row * * @param string i18n table name * @param integer $id ID of row in an i18n table * @param string $culture culture code of a row in an i18n table * @param array $columnValues key/value array of column/value data to process * * @return void */ private function transformHtmlInI18nTableColumns($table, $id, $culture, $columnValues) { // Aseemble query and note parsed column values $values = array(); $query = 'UPDATE '. $table .' SET '; foreach($columnValues as $column => $value) { // Only update if tags are found if ($value != strip_tags($value)) { $transformedValue = $this->transformHtmlToText($value); $query .= (count($values)) ? ', ' : ''; $query .= $column ."=?"; $values[] = $transformedValue; } } $query .= " WHERE id='". $id ."' AND culture='". $culture ."'"; if (count($values)) { QubitPdo::prepareAndExecute($query, $values); } } /** * Transform HTML into text using the Document Object Model * * @param string $html HTML to transform into text * * @return string transformed text */ private function transformHtmlToText($html) { // Parse HTML $doc = new DOMDocument(); $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')); // Apply transformations $this->transformDocument($doc); // Convert to string and strip leading/trailing whitespace return trim(strip_tags($doc->saveXml($doc->documentElement))); } /** * Transform specific tags within a DOM document * * @param DOMDocument $doc DOM document * * @return void */ private function transformDocument(&$doc) { // Create text representations of various HTML tags $this->transformDocumentLinks($doc); $this->transformDocumentLists($doc); $this->transformDocumentDescriptionLists($doc); $this->transformDocumentBreaks($doc); // Deal with paragraphs last, as other transformations create them $this->transformDocumentParasIntoNewlines($doc); } /** * Transform link tags into text * * @param DOMDocument $doc DOM document * * @return void */ private function transformDocumentLinks(&$doc) { $linkList = $doc->getElementsByTagName('a'); // Loop through each tag and replace with text content while ($linkList->length > 0) { $linkNode = $linkList->item(0); $linkText = $linkNode->textContent; $linkHref = $linkNode->getAttribute('href'); if ($linkHref) { if (0 === strpos(strtolower($linkHref), 'mailto:')) { $linkHref = removeFromStartOfString($linkHref, 'mailto:'); } $linkText .= ' ['. $linkHref .']'; } $newTextNode = $doc->createTextNode($linkText); $linkNode->parentNode->replaceChild($newTextNode, $linkNode); } } /** * Transform unordered list-related tags into text and enclose in *

tags * * @param DOMDocument $doc DOM document * * @return void */ private function transformDocumentLists(&$doc) { $ulList = $doc->getElementsByTagName('ul'); // Loop through each