True normalization for strings


function convert2Uri(name) {
	var result:String=name.toLowerCase();
	var pattern:java.util.regex.Pattern;
	
	result=@ReplaceSubstring(result, 
			["?","&","@","£","$","%","^","<",">","*","/","'","#","~","(",")","+","=","!",";","\"",":",",","|","\\","{","}","[","]"],
			[""]
	);

	result=@ReplaceSubstring(result, 
			["--"," - "," "],
			["-"]
	);
	
	result=@ReplaceSubstring(result, 
			[" - ","--","---"],
			["-"]
	);

	result=java.text.Normalizer.normalize(result, java.text.Normalizer.Form.NFKD);
	
	pattern = java.util.regex.Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
	result=pattern.matcher(result).replaceAll("");	
	
	// dotless i cannot be normalized by Java!:
	pattern = java.util.regex.Pattern.compile("\\u0131");
	result=pattern.matcher(result).replaceAll("i");
	
	return result;
}
All code submitted to OpenNTF XSnippets, whether submitted as a "Snippet" or in the body of a Comment, is provided under the Apache License Version 2.0. See Terms of Use for full details.
No comments yetLogin first to comment...