Remove and Replace non-alphanumeric Characters from Form Inputs with regex
Remove and Clean up Form inputs on POST and GET
For your contact us forms, you do not want bad people to inject malware into your email, so you need to scrub and slug the posts with regex to remove any html code, and non-alphas that can be used to do nasty things to end users.
Example: (on ssh console)
$ php slug.php
<HELLO@HELLO.COM>
DIRTY:<Joe_Johnson_1234@gmail.com><?>
CLEAN:joe_johnson_1234@gmail.com
DIRTY:Hello Mr. O'Leary, I am calling to ask your help with releasing $10000<br/> in lost cash from nigeria. Click Here: <a href="http://www.getavirus.com">Free Virus</a> You gotta "trust" \'me\'
CLEAN:hello mr. o-leary, i am calling to ask your help with releasing $10000br in lost cash from nigeria. click here a href-httpwww.getavirus.com-free virusa you gotta -trust- -me
For your contact us forms, you do not want bad people to inject malware into your email, so you need to scrub and slug the posts with regex to remove any html code, and non-alphas that can be used to do nasty things to end users.
Shared on GIST
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
function remove_accent($str) | |
{ | |
$a = array('▒~@', '▒~A', '▒~B', '▒~C', '▒~D', '▒~E', '▒~F', '▒~G', '▒~H', '▒~I', '▒~J', '▒~K', '▒~L', '▒~M', '▒~N', '▒~O', '▒~P', '▒~Q', '▒~R', '▒~S', '▒~T', '▒~U', '▒~V', '▒~X', '▒~Y', '▒~Z', '▒~[', '▒~\', '▒~]', '▒~_', '▒| ', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'ç', 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 'ñ', 'ò', 'ó', 'ô', 'õ', 'ö'' | |
, 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'ÿ', '▒~@', '▒~A', '▒~B', '▒~C', '▒~D', '▒~E', '▒~F', '▒~G', '▒~H', '▒~I', '▒~J', '▒~K', '▒~L', '▒~M', '▒~N', '▒~O', '▒~P', '▒~Q', '▒~R', | |
'▒~S', '▒~T', '▒~U', '▒~V', '▒~W', '▒~X', '▒~Y', '▒~Z', '▒~[', '▒~\', '▒~]', '▒~^', '▒~_', '▒| ', 'ġ', 'Ģ', 'ģ', 'Ĥ', 'ĥ', 'Ħ', 'ħ', 'Ĩ', 'ĩ', 'Ī', 'ī', 'Ĭ', 'ĭ', '' | |
Į', 'į', 'İ', 'ı', 'IJ', 'ij', 'Ĵ', 'ĵ', 'Ķ', 'ķ', 'Ĺ', 'ĺ', 'Ļ', 'ļ', 'Ľ', 'ľ', 'Ŀ', '▒~@', '▒~A', '▒~B', '▒~C', '▒~D', '▒~E', '▒~F', '▒~G', '▒~H', '▒~I', '▒~L',, | |
'▒~M', '▒~N', '▒~O', '▒~P', '▒~Q', '▒~R', '▒~S', '▒~T', '▒~U', '▒~V', '▒~W', '▒~X', '▒~Y', '▒~Z', '▒~[', '▒~\', '▒~]', '▒~^', '▒~_', '▒| ', 'š', 'Ţ', 'ţ', 'Ť', 'ť', 'Ŧ', | |
'ŧ', 'Ũ', 'ũ', 'Ū', 'ū', 'Ŭ', 'ŭ', 'Ů', 'ů', 'Ű', 'ű', 'Ų', 'ų', 'Ŵ', 'ŵ', 'Ŷ', 'ŷ', 'Ÿ', 'Ź', 'ź', 'Ż', 'ż', 'Ž', 'ž', 'ſ', '▒~R', '▒| ', 'ơ', 'Ư', '' | |
ư', '▒~M', '▒~N', '▒~O', '▒~P', '▒~Q', '▒~R', '▒~S', '▒~T', '▒~U', '▒~V', '▒~W', '▒~X', '▒~Y', '▒~Z', '▒~[', '▒~\', 'Ǻ', 'ǻ', 'Ǽ', 'ǽ', 'Ǿ', 'ǿ'); | |
$b = array('A', 'A', 'A', 'A', 'A', 'A', 'AE', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I', 'D', 'N', 'O', 'O', 'O', 'O', 'O', 'O', 'U', 'U', 'U', 'U', 'Y', 's', 'a', 'a', 'a', 'a', 'a', 'a', 'ae', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 'n', 'o', 'o', 'o', 'o', 'o', 'o', 'u', 'u', 'u', 'u', 'y', 'y', 'A', 'a', 'A', 'a', 'A', 'a', 'C', 'c', 'C', 'c', 'C', 'c', 'C', 'c', 'D', 'd', 'D', 'd', 'E', 'e', 'E', 'e', 'E', 'e', 'E', 'e', 'E', 'e', 'G', 'g', 'G', 'g', 'G', 'g', 'G', 'g', 'H', 'h', 'H', 'h', 'I', 'i', 'I', 'i', 'I', 'i', 'I', 'i', 'I', 'i', 'IJ', 'ij', 'J', 'j', 'K', 'k', 'L', 'l', 'L', 'l', 'L', 'l', 'L', 'l', 'l', 'l', 'N', 'n', 'N', 'n', 'N', 'n', 'n', 'O', 'o', 'O', 'o', 'O', 'o', 'OE', 'oe', 'R', 'r', 'R', 'r', 'R', 'r', 'S', 's', 'S', 's', 'S', 's', 'S', 's', 'T', 't', 'T', 't', 'T', 't', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'W', 'w', 'Y', 'y', 'Y', 'Z', 'z', 'Z', 'z', 'Z', 'z', 's', 'f', 'O', 'o', 'U', 'u', 'A', 'a', 'I', 'i', 'O', 'o', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'A', 'a', 'AE', 'ae', 'O', 'o'); | |
return str_replace($a, $b, $str); | |
} | |
function post_slug($str) | |
{ | |
return strtolower(preg_replace(array('/[^a-zA-Z0-9 -._@]/', '/[ -]+/', '/^-|-$/'), | |
array('', '-', ''), remove_accent($str))); | |
} | |
function post_slug2($str) | |
{ | |
return strtolower(preg_replace(array('/[^a-zA-Z0-9 -._@]/', '/[\"\']+/', '/^-|-$/'), | |
array('', '-', ''), remove_accent($str))); | |
} | |
function test_slug() | |
{ | |
$string = "<HELLO@HELLO.COM>"; | |
echo preg_replace("/^[a-zA-Z0-9@-]/", "", $string); | |
echo "\n "; | |
$username="<Joe_Johnson_1234@gmail.com><?>"; | |
$output=post_slug($username); | |
echo "\n"; | |
echo "\nDIRTY:".$username; | |
echo "\nCLEAN:".$output; | |
$message="Hello Mr. O'Leary, I am calling to ask your help with releasing $10000<br/>". | |
" in lost cash from nigeria. Click Here: <a href=\"http://www.getavirus.com\">Free Virus</a>". | |
" You gotta \"trust\" \'me\'"; | |
$output=post_slug2($message); | |
echo "\n"; | |
echo "\nDIRTY:".$message; | |
echo "\nCLEAN:".$output; | |
} | |
?> |
$ php slug.php
<HELLO@HELLO.COM>
DIRTY:<Joe_Johnson_1234@gmail.com><?>
CLEAN:joe_johnson_1234@gmail.com
DIRTY:Hello Mr. O'Leary, I am calling to ask your help with releasing $10000<br/> in lost cash from nigeria. Click Here: <a href="http://www.getavirus.com">Free Virus</a> You gotta "trust" \'me\'
CLEAN:hello mr. o-leary, i am calling to ask your help with releasing $10000br in lost cash from nigeria. click here a href-httpwww.getavirus.com-free virusa you gotta -trust- -me
Comments
Post a Comment