Strip XML- or HTML-like tags from a string
1 2 private String stripTags(String HTMLString) { 3 String noHTMLString = HTMLString.replaceAll("\\<.*?>",""); 4 return noHTMLString; 5 }
13482 users tagging and storing useful source code snippets
Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world
1 2 private String stripTags(String HTMLString) { 3 String noHTMLString = HTMLString.replaceAll("\\<.*?>",""); 4 return noHTMLString; 5 }
1 2 class REXML::Document 3 def text() 4 self.to_s.gsub(/<\/?span>/," ").gsub(/<\/?[^>]+>/, "") 5 end 6 end 7 8 html = "<body><p><span>guardian.co.uk, UK -</span><span>11 hours ago</span> 9 </p><p>Baltimore is not the inner circle of <strong>hell</strong>. It is not 10 entirely devoured by a drug economy that serves as its last viable industry. 11 <strong>...</strong></p><div style='clear:both'/></body>" 12 13 doc_html = Document.new(html) 14 puts doc_html.text
1 2 using System.Text.RegularExpressions; 3 ... 4 public static string RemoveHTML(string in_HTML) 5 { 6 return Regex.Replace(lv_HTML, "<(.|\n)*?>", ""); 7 }
1 2 tag.gsub(/^/,'“').gsub(/$/,'”') if tag[0].match(/[^A-Za-z0-9\-_.]/)
1 2 #!/usr/bin/python 3 import sys 4 5 # requires ID3 module, easily googled 6 from ID3 import * 7 for arg in sys.argv: 8 fullfilename = arg 9 10 # This only works for mp3 files, I would love suggestions for mp4 tags 11 id3info = ID3(fullfilename) 12 13 # Print command useful for logging. 14 print id3info 15 16 # Check if album info exists 17 if not id3info.has_key('ALBUM'): 18 print 'appending album tag' 19 # truncate to just containing directory: 20 folder = fullfilename[1:rfind(fullfilename,'/')] 21 # define album based on podcast's directory 22 album = (folder[rfind(folder,'/'):]).strip('/') 23 id3info.album = album 24 if id3info.album == album: 25 print 'success!' 26 else: 27 print 'nothing to change' 28
1 2 find * -type f -exec perl -i -wpe 's/<\?php/<\?/g' {} \; 3 find * -type f -exec perl -i -wpe 's/<\?/<\?php/g' {} \;
1 2 class String 3 # Strips any html markup from a string 4 TYPO_TAG_KEY = TYPO_ATTRIBUTE_KEY = /[\w:_-]+/ 5 TYPO_ATTRIBUTE_VALUE = /(?:[A-Za-z0-9]+|(?:'[^']*?'|"[^"]*?"))/ 6 TYPO_ATTRIBUTE = /(?:#{TYPO_ATTRIBUTE_KEY}(?:\s*=\s*#{TYPO_ATTRIBUTE_VALUE})?)/ 7 TYPO_ATTRIBUTES = /(?:#{TYPO_ATTRIBUTE}(?:\s+#{TYPO_ATTRIBUTE})*)/ 8 TAG = %r{<[!/?\[]?(?:#{TYPO_TAG_KEY}|--)(?:\s+#{TYPO_ATTRIBUTES})?\s*(?:[!/?\]]+|--)?>} 9 def strip_html 10 self.gsub(TAG, '').gsub(/\s+/, ' ').strip 11 end 12 13 def tags 14 scan(/<a\s+[^>]*\s*rel=\s*(.?)tag\1[^>]*>(.+?)<\/a>/i). 15 map { |match| match.last.strip_html rescue nil }. 16 compact.select { |s| !s.strip.empty? } 17 end 18 end 19 20 # Example usage 21 22 s = %{<a href="http://www.docstrangelove.com/tag/civil-war" rel="tag">civil war</a> <a href="http://www.technorati.com/tag/civil+war" rel="tag"><img src="http://www.docstrangelove.com/wp-content/plugins/UltimateTagWarrior/technoratiicon.jpg" alt="Technorati tag page for civil war"/></a> <a href="http://www.docstrangelovecom/tag/iraq" rel="tag">iraq</a> <a href="http://www.technorati.com/tag/iraq" rel="tag"><img src="http://www.docstrangelove.com/wp-content/plugins/UltimateTagWarrior/technoratiicon.jpg" alt="Technorati tag page for iraq"/></a>} 23 24 s.tags 25 # => ["civil war", "iraq"]
1 2 /** 3 * Parses a String of Tags 4 * 5 * Tags are space delimited. Either single or double quotes mark a phrase. 6 * Odd quotes will cause everything on their right to reflect as one single 7 * tag or phrase. All white-space within a phrase is converted to single 8 * space characters. Quotes burried within tags are ignored! Duplicate tags 9 * are ignored, even duplicate phrases that are equivalent. 10 * 11 * Returns an array of tags. 12 */ 13 function ParseTagString($sTagString) 14 { 15 $arTags = array(); // Array of Output 16 $cPhraseQuote = null; // Record of the quote that opened the current phrase 17 $sPhrase = null; // Temp storage for the current phrase we are building 18 19 // Define some constants 20 static $sTokens = " \r\n\t"; // Space, Return, Newline, Tab 21 static $sQuotes = "'\""; // Single and Double Quotes 22 23 // Start the State Machine 24 do 25 { 26 // Get the next token, which may be the first 27 $sToken = isset($sToken)? strtok($sTokens) : strtok($sTagString, $sTokens); 28 29 // Are there more tokens? 30 if ($sToken === false) 31 { 32 // Ensure that the last phrase is marked as ended 33 $cPhraseQuote = null; 34 } 35 else 36 { 37 // Are we within a phrase or not? 38 if ($cPhraseQuote !== null) 39 { 40 // Will the current token end the phrase? 41 if (substr($sToken, -1, 1) === $cPhraseQuote) 42 { 43 // Trim the last character and add to the current phrase, with a single leading space if necessary 44 if (strlen($sToken) > 1) $sPhrase .= ((strlen($sPhrase) > 0)? ' ' : null) . substr($sToken, 0, -1); 45 $cPhraseQuote = null; 46 } 47 else 48 { 49 // If not, add the token to the phrase, with a single leading space if necessary 50 $sPhrase .= ((strlen($sPhrase) > 0)? ' ' : null) . $sToken; 51 } 52 } 53 else 54 { 55 // Will the current token start a phrase? 56 if (strpos($sQuotes, $sToken[0]) !== false) 57 { 58 // Will the current token end the phrase? 59 if ((strlen($sToken) > 1) && ($sToken[0] === substr($sToken, -1, 1))) 60 { 61 // The current token begins AND ends the phrase, trim the quotes 62 $sPhrase = substr($sToken, 1, -1); 63 } 64 else 65 { 66 // Remove the leading quote 67 $sPhrase = substr($sToken, 1); 68 $cPhraseQuote = $sToken[0]; 69 } 70 } 71 else 72 $sPhrase = $sToken; 73 } 74 } 75 76 // If, at this point, we are not within a phrase, the prepared phrase is complete and can be added to the array 77 if (($cPhraseQuote === null) && ($sPhrase != null)) 78 { 79 $sPhrase = strtolower($sPhrase); 80 if (!in_array($sPhrase, $arTags)) $arTags[] = $sPhrase; 81 $sPhrase = null; 82 } 83 } 84 while ($sToken !== false); // Stop when we receive FALSE from strtok() 85 return $arTags; 86 }
1 2 /** 3 * Reverses ParseTagString() 4 */ 5 function CreateTagString($arTags) 6 { 7 // Prepare each tag to be imploded 8 for ($i = 0; $i < sizeof($arTags); $i++) 9 { 10 // Record findings 11 $bContainsWhitespace = false; // Was whitespace found? 12 $cRequiredQuote = '"'; // Use double-quote by default 13 $cLastChar = null; 14 15 // Search the tag 16 for ($j = 0; $j < strlen($arTags[$i]); $j++) 17 { 18 $c = $arTags[$i][$j]; 19 20 // If the current character is a space 21 if ($c === ' ') 22 { 23 $bContainsWhitespace = true; 24 25 // If the previous char was a double quote, we require single quotes round our phrase 26 if ($cLastChar === '"') 27 { 28 $cRequiredQuote = "'"; 29 break; // There is no more point in continuing our search, we cant handle double-mixed quotes 30 } 31 } 32 33 // Record this char as the last char 34 $cLastChar = $c; 35 } 36 37 // Quote if necessary 38 if ($bContainsWhitespace) $arTags[$i] = $cRequiredQuote . $arTags[$i] . $cRequiredQuote; 39 } 40 return implode(' ', $arTags); 41 }
1 2 $arTestInputs = array( 3 "this test ensures that words are correctly split", 4 "in this test \"phrases\" and \"multi-word phrases\" are tested", 5 "this test shows the behaviour if an \"odd quote is detected", 6 "this test shows that 'different quotes' work too", 7 "but mixed quotes fail: \"test phrase' does not stop on the quote", 8 "which can be usefull in some cases where \"the systems' requirements\" state that it is necessary", 9 "quotes need not be attached to \" their phrase \"", 10 "embedded\"quotes are ignored!", 11 "this is also usefull and demonstrates the system's coolness", 12 "redundant white-space is removed from \" tags and phrases\"", 13 "\"\"double quotes\"\" will result in single quotes!", 14 "remember that 'double-quotes\" may be nested within single quotes'", 15 "TaGs ArE NOT case SENsITiVE!", 16 "a duplicate tag will be removed from the tag list", 17 "even a \" complex phrase\" that is equivalent to another 'compleX PHrASe '" 18 ); 19 20 foreach ($arTestInputs as $sTest) 21 { 22 print ("<pre>$sTest</pre>"); 23 print "<pre>"; 24 print_r (ParseTagString($sTest)); 25 print "</pre>"; 26 print "<pre>"; 27 print CreateTagString(ParseTagString($sTest)); 28 print "</pre>"; 29 print "<hr />"; 30 }
1 2 # for tag 'series60' AND 'sql' 3 http://www.bigbold.com/snippets/tags/series60/sql/ 4 5 # this doesn't work 6 http://www.bigbold.com/snippets/tag/series60/sql/ 7 8 # all of these work (single tag case) 9 http://bigbold.com/snippets/tag/series60/ 10 http://bigbold.com/snippets/tags/series60/ 11 http://www.bigbold.com/snippets/tag/series60/ 12 http://www.bigbold.com/snippets/tags/series60/
1 2 function updateTags($tags,$table,$db) {//category handling stuff 3 include("recordExists.php");//see http://bigbold.com/snippets/posts/show/464 4 $cats = explode(";",$tags);//parse tags into array cats 5 foreach($cats as $cat) {//loop through cats 6 $tmp = array($cat);//dummy array with cat in it 7 $tmp = array_diff($cats,$tmp);//get everything in cats that is not cat 8 if(!recordExists("tagname",$cat,$table,$db)) {//if there is no category by this name yet 9 $tmp = implode(";",$tmp);//make tmp into semicolon-separated string 10 $result = mysql_query("INSERT INTO ".$table." (tagname,relatedtags) VALUES ('$cat','$tmp')", $db) or die(mysql_error());//insert category into database 11 } else { 12 $result = mysql_query("SELECT relatedtags FROM ".$table." WHERE tagname='$cat'", $db) or die(mysql_error());//select already related tags 13 $result = mysql_fetch_array($result);//get the result row as an array 14 $result = explode(";",$result['relatedtags']);//parse relatedtags into result 15 $result = array_merge($tmp,$result);//merge the cats in this $tags (without cat, hence tmp) with the ones that were in there already 16 $result = array_unique($result);//strip duplicates 17 $result = implode(";",$result);//make result into semicolon-separated string 18 $result = mysql_query("UPDATE ".$table." SET relatedtags='$result' WHERE tagname='$cat'", $db) or die(mysql_error());//update category 19 }//end if-else !recordExists 20 }//end foreach cats 21 }//end function updateTags