Clean Word HTML using Regular Expressions

function cleanHTML($html) { /// /// Removes all FONT and SPAN tags, and all Class and Style attributes. /// Designed to get rid of non-standard Microsoft Word HTML tags. /// // start by completely removing all unwanted tags $html = ereg_replace(“<(/)?(font|span|del|ins)[^>]*>”,””,$html); // then run another pass over the html (twice), removing unwanted attributes $html = ereg_replace(“<([^>]*)(class|lang|style|size|face)=(\”[^\”]*\”|'[^’]*’|[^>]+)([^>]*)>”,”<\\1>”,$html); …