summaryrefslogtreecommitdiffstats
path: root/lib/class
diff options
context:
space:
mode:
authorPaul Arthur <paul.arthur@flowerysong.com>2013-01-29 11:33:32 -0500
committerPaul Arthur <paul.arthur@flowerysong.com>2013-01-29 11:33:32 -0500
commit04347b9a5906c5545ffe1ec7dc539fcc94c12f67 (patch)
tree711cc307305a70ae08d5144616c0cc5b3c603b32 /lib/class
parent4fe5de82b75b114144efa0959b312480b726b8df (diff)
downloadampache-04347b9a5906c5545ffe1ec7dc539fcc94c12f67.tar.gz
ampache-04347b9a5906c5545ffe1ec7dc539fcc94c12f67.tar.bz2
ampache-04347b9a5906c5545ffe1ec7dc539fcc94c12f67.zip
Move UTF-8 cleanup into its own method
Diffstat (limited to 'lib/class')
-rw-r--r--lib/class/ui.class.php19
1 files changed, 19 insertions, 0 deletions
diff --git a/lib/class/ui.class.php b/lib/class/ui.class.php
index 36607e19..1a3efa15 100644
--- a/lib/class/ui.class.php
+++ b/lib/class/ui.class.php
@@ -89,6 +89,25 @@ class UI {
}
/**
+ * clean_utf8
+ *
+ * Removes characters that aren't valid in XML (which is a subset of valid
+ * UTF-8, but close enough for our purposes.)
+ * See http://www.w3.org/TR/2006/REC-xml-20060816/#charsets
+ */
+ public static function clean_utf8($string) {
+ if ($string) {
+ $clean = preg_replace('/[^\x{9}\x{a}\x{d}\x{20}-\x{d7ff}\x{e000}-\x{fffd}\x{10000}-\x{10ffff}]|[\x{7f}-\x{84}\x{86}-\x{9f}\x{fdd0}-\x{fddf}\x{1fffe}-\x{1ffff}\x{2fffe}-\x{2ffff}\x{3fffe}-\x{3ffff}\x{4fffe}-\x{4ffff}\x{5fffe}-\x{5ffff}\x{6fffe}-\x{6ffff}\x{7fffe}-\x{7ffff}\x{8fffe}-\x{8ffff}\x{9fffe}-\x{9ffff}\x{afffe}-\x{affff}\x{bfffe}-\x{bffff}\x{cfffe}-\x{cffff}\x{dfffe}-\x{dffff}\x{efffe}-\x{effff}\x{ffffe}-\x{fffff}\x{10fffe}-\x{10ffff}]/u', '', $string);
+
+ if ($clean) {
+ return $clean;
+ }
+
+ debug_event('UI', 'Charset cleanup failed, something might break', 1);
+ }
+ }
+
+ /**
* flip_class
*
* First initialised with an array of two class names. Subsequent calls