This URI is not allowed. There should be no dots in the URI.

'; $result = ''; $httpstatus = 'not applicable.'; $uri = ''; } else { $result = load($uri,$options); #var_dump($result); $httpstatus = substr($result['headers']['firstline'],9); } // fail if not HTTP 200 if (! $fail && (substr($httpstatus,0,3) != '200') ) { $fail = true; $failuremessage = '

A problem occurred while trying to access the page. The HTTP error code is: '.$httpstatus.'

'; } // fail if mime-type not text/html or application/xhtml+xml // and set mimetype and mimetypename if (! $fail) { // get mimetype $mimetype = 'unknown'; $mimetypename = 'Unknown'; if (strpos($result['headers']['Content-Type'], 'xml')) { $mimetype = 'xml'; } else if (strpos($result['headers']['Content-Type'], 'html')) { $mimetype = 'html'; } if (strpos($result['headers']['Content-Type'], ';')) { $parts = explode(';', $result['headers']['Content-Type']); $mimetypename = $parts[0]; } else { $mimetypename = $result['headers']['Content-Type']; } if ($mimetypename != 'text/html' && $mimetypename != 'application/xhtml+xml') { $fail = true; $failuremessage = "

This application only supports analysis of pages served as text/html or application/xhtml+xml. You tried to access a page that was served as $mimetypename."; } } if (! $fail) { #============================== GET DATA ================================= $errors = array(); $warnings = array(); $comments = array(); $nonUTF8 = ''; $char_encoding = array(); // assoc array containing encoding values and code // top level can be 'http', 'bom', 'xmldecl', 'httpequiv', 'html5' // 2nd level is either 'value' or 'code' $morehttpequivs = ''; $langxmllangmismatches = ''; $incorrectvalues = ''; $onlylang = ''; $onlyxmllang = ''; $s_inappropriate_xmllang = ''; // list of elements containing xml:lang in an html file $languageattributearray = array(); $xmllangonlyarray = array(); // if this is an xhtml doc, will contain a list of elements containing xml:lang but no lang attribute $langonlyarray = array(); // if this is an xhtml doc, will contain a list of elements containing lang but no xml:lang attribute $s_xmllangonlyarray = ''; // a string containing xmllangonlyarray elements as list items $s_langonlyarray = ''; // a string containing langonlyarray elements as list items $mismatchedlanguagevalues = array(); // holds elements with mismatched lang and xml:lang attribute values $incorrectvalues = array(); // holds elements with non-well-formed language values // GET HTTP CONTENT-TYPE HEADER $httpcharsetValue = ''; $httpcontenttypeHeader = ''; $char_encoding['http']['value']=''; $char_encoding['http']['code']=''; if (isset($result['headers']['Content-Type'])) { $charset = strpos($result['headers']['Content-Type'], 'charset='); if ($charset === false) { $char_encoding['http']['code']='Content-Type: '.$result['headers']['Content-Type']; } else { $char_encoding['http']['value'] = substr($result['headers']['Content-Type'],$charset+8); $char_encoding['http']['code']='Content-Type: '.$result['headers']['Content-Type']; } } // BYTE ORDER MARK // if UTF-16, convert to UTF-8 $char_encoding['bom']['value']=''; $char_encoding['bom']['code']=''; $filestart = substr($result['body'],0,3); if (ord($filestart{0})== 239 && ord($filestart{1})== 187 && ord($filestart{2})== 191) { $char_encoding['bom']['value'] = 'UTF-8'; } $filestart = substr($result['body'],0,2); if (ord($filestart{0})== 254 && ord($filestart{1})== 255) { $char_encoding['bom']['value'] = 'UTF-16BE'; } $filestart = substr($result['body'],0,2); if (ord($filestart{0})== 255 && ord($filestart{1})== 254) { $char_encoding['bom']['value'] = 'UTF-16LE'; } if ($char_encoding['bom']['value'] == 'UTF-16LE') { $result['body'] = mb_convert_encoding( $result['body'], 'UTF-8', 'UTF-16LE'); } if ($char_encoding['bom']['value'] == 'UTF-16BE') { $result['body'] = mb_convert_encoding( $result['body'], 'UTF-8', 'UTF-16BE'); } if ($char_encoding['bom']['value'] != '') { $char_encoding['bom']['code'] = "Byte-order mark: {$char_encoding['bom']['value']}"; } // DOCTYPE $doctypename = 'No DOCTYPE'; if (preg_match_all("/!DOCTYPE [^>]+>/i", $result['body'], $match)) { if (strpos($match[0][0], 'W3C//DTD HTML 4')) { $doctype = 'html'; $doctypename = "HTML"; } else if (strpos($match[0][0], 'W3C//DTD XHTML 1.0')) { $doctype = 'xhtml'; $doctypename = "XHTML 1.0"; } else if (strpos($match[0][0], 'W3C//DTD XHTML 1.1')) { $doctype = 'xhtml11'; $doctypename = "XHTML 1.1"; } else if (preg_match("/!DOCTYPE\s+html\s*>/i", $match[0][0])) { $doctype = 'html5'; $doctypename = "HTML5"; } else { $doctype = 'unknown'; $doctypename = "Unknown";} } else { $doctype='none'; } // get html tag $htmltag = ''; if (preg_match_all("/]*>/i", $result['body'], $match)) { $htmltag = str_replace('<','<',$match[0][0]); } // get body tag $bodytag = ''; if (preg_match_all("/]*>/i", $result['body'], $matches)) { $bodytag = str_replace('<','<',$matches[0][0]); } $bodylangfound = strpos($bodytag, ' lang'); $bodyxmllangfound = strpos($bodytag, ' xml:lang'); // XML DECLARATION $xmlcharsetValue = ''; $xmldeclTag = ''; $char_encoding['xmldecl']['value']=''; $char_encoding['xmldecl']['code']=''; if (preg_match_all("/<\?xml.*? encoding=([\"\'][^\"\'>]*[\"\']|[^ \"\'>]+)[^>]*>/i", $result['body'], $xmldecltagA)) { $xmldeclTag = str_replace('<','<',$xmldecltagA[0][count($xmldecltagA[0])-1]); $char_encoding['xmldecl']['code']= $xmldeclTag; if (count($xmldecltagA[1]>0)) { $char_encoding['xmldecl']['value'] = $xmldecltagA[1][count($xmldecltagA[0])-1]; $char_encoding['xmldecl']['value'] = str_replace('\'','',$char_encoding['xmldecl']['value']); $char_encoding['xmldecl']['value'] = str_replace('"','',$char_encoding['xmldecl']['value']); } else { $char_encoding['xmldecl']['value'] = ''; } } // META CHARSET ELEMENT $char_encoding['httpequiv']['value']=''; $char_encoding['httpequiv']['code']=''; $metacharsetValue = ''; $metatagCode = ''; $metacharsetCount=0; if (preg_match_all("/]*>/i", $result['body'], $metatagA)) { $char_encoding['httpequiv']['code'] = $metatagA[0][count($metatagA[0])-1]; $char_encoding['httpequiv']['code'] = str_replace('<','<',$char_encoding['httpequiv']['code']); preg_match_all("/charset=([^\"\'>\s]+)/i", $char_encoding['httpequiv']['code'], $encvalueA); if (count($encvalueA)>0) { $char_encoding['httpequiv']['value'] = $encvalueA[1][0]; } else { $char_encoding['httpequiv']['code'] = ''; } $char_encoding['httpequiv']['value'] = str_replace('\'','',$char_encoding['httpequiv']['value']); $char_encoding['httpequiv']['value'] = str_replace('"','',$char_encoding['httpequiv']['value']); if (count($metatagA[0])>1) { for ($i=0;$i'; } } } /* HTML5 CHARSET META $html5charsetValue = ''; $html5charsetTag = ''; $charset = preg_match_all("//i", $result['body'], $match); if ($charset == true) { $matchstr = $match[0][0]; $start = strpos($matchstr, 'charset='); $encoding = substr($matchstr,$start+8); $end = preg_match_all("/[a-zA-Z0-9\-\:\_\.]+/i", $encoding, $match2); $html5charsetValue = $match2[0][0]; $html5charsetTag = str_replace('<','<',$matchstr); $html5charsetTag = str_replace('>','>',$html5charsetTag); } */ // HTML5 CHARSET META $char_encoding['html5']['value']=''; $char_encoding['html5']['code']=''; if (preg_match_all("//i", $result['body'], $match)) { $char_encoding['html5']['code'] = $match[0][count($match[0])-1]; $char_encoding['html5']['code'] = str_replace('<','<',$char_encoding['html5']['code']); //preg_match_all("/charset=([^\"\'>\s]+|[^\"\'>\s]+[\"\'>\s])/i", $char_encoding['html5']['code'], $encvalueA); preg_match_all("/charset=[\"\'>\s]*([^\"\'>\s]+)/i", $char_encoding['html5']['code'], $encvalueA); if (count($encvalueA)>0) { $char_encoding['html5']['value'] = $encvalueA[1][0]; } else { $char_encoding['html5']['code'] = ''; } $char_encoding['html5']['value'] = str_replace('\'','',$char_encoding['html5']['value']); $char_encoding['html5']['value'] = str_replace('"','',$char_encoding['html5']['value']); // if multiple meta charset declarations, add to morehttpequivs list if (count($metatagA[0])>1) { for ($i=0;$i'; } } } //var_dump($char_encoding); // check for non-UTF8 encodings $nonUTF8 = ''; foreach ($char_encoding as $enctype){ if (strtolower($enctype['value']) != 'utf-8' && $enctype['value'] != '') { $nonUTF8 .= '

'.$enctype['code'].'

'; } } // make list of encoding values $encodingslist = ''; foreach ($char_encoding as $value) { if ($value['value'] != '') { $encodingslist .= '

'.$value['code'].'

'; } } // determine page encoding using precedence rules $pageencoding = ''; if ($httpcharsetValue != '') { $pageencoding = $httpcharsetValue; } //else if ($mimetype == 'html' && // need to work out whether document is html5, and figure out preference order for meta charset vs meta content-type // also add a check that the declaration is within the first 512 bytes of start // also add an error if there is both an html5 and content-type declaration // warn against use of utf-32 // add warning about URL submissions to advice to use utf-8 // add a check for utf-8 or utf-1 BOM for html5 // HTML LANG $htmllangValue = ''; $langs = preg_match_all("/\slang=[\"\']?([^\s\"\'\\>]+)[\s\"\'\/>]/i", $htmltag, $match); if ($langs) { $htmllangValue = $match[1][0]; } // HTML XML:LANG $htmlxmllangValue = ''; $langs = preg_match_all("/\sxml:lang=[\"\']?([^\s\"\'\\>]+)[\s\"\'\/>]/i", $htmltag, $match); if ($langs) { $htmlxmllangValue = $match[1][0]; } // HTTP CONTENT-LANGUAGE $httpcontentlangValue = ''; $httpcontentlangHeader = ''; if (isset($result['headers']['Content-Language'])) { $httpcontentlangHeader = "Content-Language: ".$result['headers']['Content-Language']; $httpcontentlangValue = $result['headers']['Content-Language']; } // META CONTENT-LANGUAGE $metacontentlangValue = ''; $metacontentlangTag = ''; $metaCLfound = preg_match_all("/]+content-language[^<>]+content\=[a-zA-Z0-9\-\s\"\'\=,]+(\/)?>/i", $result['body'], $match); if ($metaCLfound == true) { $matchstr = $match[0][0]; $start = strpos($matchstr, 'content='); $encoding = substr($matchstr,$start+8); $end = preg_match_all("/[a-zA-Z0-9\-,\s]+/i", $encoding, $match2); $metacontentlangValue = $match2[0][0]; $matchstr = str_replace('<','<',$matchstr); $metacontentlangTag = str_replace('>','>',$matchstr); } // make a list of elements containing lang and/or xml:lang attributes $langs = preg_match_all("/<[^>]+( xml:lang=| lang=)[^>]+>/i", $result['body'], $languageattributearray); //var_dump($languageattributearray); if ($doctype=='xhtml' && $mimetype=='html') { // make a list of elements that don't contain xml:lang foreach ($languageattributearray[0] as $tag) { if (strpos($tag, ' xml:lang=') === false) { // $xmllangonlyarray[] = $tag; $s_langonlyarray .= '