2019-05-08 12:47:52 +02:00
#!/usr/bin/env python
2008-10-15 15:38:22 +00:00
"""
2023-01-02 23:24:59 +01:00
Copyright (c) 2006-2023 sqlmap developers (https://sqlmap.org/)
2017-10-11 14:50:46 +02:00
See the file ' LICENSE ' for copying permission
2008-10-15 15:38:22 +00:00
"""
2010-06-30 12:09:33 +00:00
import codecs
2013-07-13 19:25:49 +02:00
import gzip
2019-03-26 14:37:01 +01:00
import io
2011-04-19 22:54:13 +00:00
import logging
2008-10-15 15:38:22 +00:00
import re
2012-09-12 11:50:38 +02:00
import struct
2013-07-13 19:25:49 +02:00
import zlib
2008-10-15 15:38:22 +00:00
2016-05-31 11:08:23 +02:00
from lib . core . common import Backend
2010-12-25 10:16:20 +00:00
from lib . core . common import extractErrorMessage
2011-01-04 15:49:20 +00:00
from lib . core . common import extractRegexResult
2019-03-29 02:28:16 +01:00
from lib . core . common import filterNone
2014-01-02 12:09:58 +01:00
from lib . core . common import getPublicTypeMembers
2019-01-22 01:20:27 +01:00
from lib . core . common import getSafeExString
2018-01-02 00:42:20 +01:00
from lib . core . common import isListLike
2015-10-29 16:52:17 +01:00
from lib . core . common import randomStr
2012-01-11 14:28:08 +00:00
from lib . core . common import readInput
2012-03-08 10:19:34 +00:00
from lib . core . common import resetCookieJar
2011-06-08 14:42:48 +00:00
from lib . core . common import singleTimeLogMessage
2012-09-11 14:58:52 +02:00
from lib . core . common import singleTimeWarnMessage
2018-01-02 00:42:20 +01:00
from lib . core . common import unArrayizeValue
2019-05-03 13:20:15 +02:00
from lib . core . convert import decodeHex
from lib . core . convert import getBytes
2019-05-27 13:09:13 +02:00
from lib . core . convert import getText
2019-05-06 00:54:21 +02:00
from lib . core . convert import getUnicode
2008-10-15 15:38:22 +00:00
from lib . core . data import conf
from lib . core . data import kb
2010-06-30 12:09:33 +00:00
from lib . core . data import logger
2016-09-09 11:06:38 +02:00
from lib . core . decorators import cachedmethod
2019-10-31 20:59:14 +01:00
from lib . core . decorators import lockedmethod
2019-07-03 10:56:05 +02:00
from lib . core . dicts import HTML_ENTITIES
2016-05-31 11:08:23 +02:00
from lib . core . enums import DBMS
2013-03-20 11:10:24 +01:00
from lib . core . enums import HTTP_HEADER
2012-01-11 14:28:08 +00:00
from lib . core . enums import PLACE
2012-12-06 14:14:19 +01:00
from lib . core . exception import SqlmapCompressionException
2014-12-03 10:06:21 +01:00
from lib . core . settings import BLOCKED_IP_REGEX
2012-01-11 14:28:08 +00:00
from lib . core . settings import DEFAULT_COOKIE_DELIMITER
2012-10-19 11:02:14 +02:00
from lib . core . settings import EVENTVALIDATION_REGEX
2021-01-07 14:20:03 +01:00
from lib . core . settings import HEURISTIC_PAGE_SIZE_THRESHOLD
2019-05-28 14:12:35 +02:00
from lib . core . settings import IDENTYWAF_PARSE_LIMIT
2012-09-12 11:50:38 +02:00
from lib . core . settings import MAX_CONNECTION_TOTAL_SIZE
2011-01-04 15:49:20 +00:00
from lib . core . settings import META_CHARSET_REGEX
2011-11-22 12:18:24 +00:00
from lib . core . settings import PARSE_HEADERS_LIMIT
2021-06-08 21:48:43 +02:00
from lib . core . settings import PRINTABLE_BYTES
2016-05-31 11:08:23 +02:00
from lib . core . settings import SELECT_FROM_TABLE_REGEX
2019-11-11 12:24:42 +01:00
from lib . core . settings import UNICODE_ENCODING
2012-10-19 11:02:14 +02:00
from lib . core . settings import VIEWSTATE_REGEX
2008-11-17 00:00:54 +00:00
from lib . parse . headers import headersParser
2008-10-15 15:38:22 +00:00
from lib . parse . html import htmlParser
2019-04-19 11:24:34 +02:00
from thirdparty import six
2012-07-14 17:01:04 +02:00
from thirdparty . chardet import detect
2019-05-24 13:54:10 +02:00
from thirdparty . identywaf import identYwaf
2019-03-11 14:36:01 +01:00
from thirdparty . odict import OrderedDict
2019-05-15 10:57:22 +02:00
from thirdparty . six import unichr as _unichr
2019-05-24 13:54:10 +02:00
from thirdparty . six . moves import http_client as _http_client
2008-10-15 15:38:22 +00:00
2019-10-31 20:59:14 +01:00
@lockedmethod
2017-08-16 03:08:58 +02:00
def forgeHeaders ( items = None , base = None ) :
2008-10-15 15:38:22 +00:00
"""
2011-02-11 23:07:03 +00:00
Prepare HTTP Cookie, HTTP User-Agent and HTTP Referer headers to use when performing
2008-10-15 15:38:22 +00:00
the HTTP requests
"""
2012-01-30 09:17:22 +00:00
items = items or { }
2019-01-22 03:00:44 +01:00
for _ in list ( items . keys ( ) ) :
2012-01-30 09:17:22 +00:00
if items [ _ ] is None :
del items [ _ ]
2018-03-26 15:39:48 +02:00
headers = OrderedDict ( conf . httpHeaders if base is None else base )
2014-10-22 23:16:46 +02:00
headers . update ( items . items ( ) )
2008-10-15 15:38:22 +00:00
2014-01-02 12:09:58 +01:00
class _str ( str ) :
def capitalize ( self ) :
return _str ( self )
def title ( self ) :
return _str ( self )
_ = headers
2014-10-22 13:32:49 +02:00
headers = OrderedDict ( )
2014-01-02 12:09:58 +01:00
for key , value in _ . items ( ) :
success = False
2015-06-05 17:18:21 +02:00
for _ in headers :
if _ . upper ( ) == key . upper ( ) :
del headers [ _ ]
break
2014-01-02 12:09:58 +01:00
if key . upper ( ) not in ( _ . upper ( ) for _ in getPublicTypeMembers ( HTTP_HEADER , True ) ) :
try :
headers [ _str ( key ) ] = value # dirty hack for http://bugs.python.org/issue12455
except UnicodeEncodeError : # don't do the hack on non-ASCII header names (they have to be properly encoded later on)
pass
else :
success = True
if not success :
key = ' - ' . join ( _ . capitalize ( ) for _ in key . split ( ' - ' ) )
headers [ key ] = value
2012-10-07 20:28:24 +02:00
2012-01-11 14:28:08 +00:00
if conf . cj :
2013-03-20 11:10:24 +01:00
if HTTP_HEADER . COOKIE in headers :
2012-01-11 14:28:08 +00:00
for cookie in conf . cj :
2022-12-28 16:35:26 +01:00
if cookie is None or cookie . domain_specified and not ( conf . hostname or " " ) . endswith ( cookie . domain ) :
2013-04-12 19:20:33 +02:00
continue
2017-05-26 14:34:32 +02:00
if ( " %s = " % getUnicode ( cookie . name ) ) in getUnicode ( headers [ HTTP_HEADER . COOKIE ] ) :
2013-04-12 19:20:33 +02:00
if conf . loadCookies :
2019-03-29 02:28:16 +01:00
conf . httpHeaders = filterNone ( ( item if item [ 0 ] != HTTP_HEADER . COOKIE else None ) for item in conf . httpHeaders )
2013-04-12 19:20:33 +02:00
elif kb . mergeCookies is None :
2019-10-31 21:40:28 +01:00
message = " you provided a HTTP %s header value, while " % HTTP_HEADER . COOKIE
message + = " target URL provides its own cookies within "
message + = " HTTP %s header which intersect with yours. " % HTTP_HEADER . SET_COOKIE
2017-02-26 09:05:36 -05:00
message + = " Do you want to merge them in further requests? [Y/n] "
2017-04-18 15:48:05 +02:00
kb . mergeCookies = readInput ( message , default = ' Y ' , boolean = True )
2012-01-11 14:28:08 +00:00
2015-03-26 11:40:19 +01:00
if kb . mergeCookies and kb . injection . place != PLACE . COOKIE :
2018-03-13 13:45:42 +01:00
def _ ( value ) :
return re . sub ( r " (?i) \ b %s =[^ %s ]+ " % ( re . escape ( getUnicode ( cookie . name ) ) , conf . cookieDel or DEFAULT_COOKIE_DELIMITER ) , ( " %s = %s " % ( getUnicode ( cookie . name ) , getUnicode ( cookie . value ) ) ) . replace ( ' \\ ' , r ' \\ ' ) , value )
2014-10-28 13:44:55 +01:00
headers [ HTTP_HEADER . COOKIE ] = _ ( headers [ HTTP_HEADER . COOKIE ] )
2012-01-11 14:28:08 +00:00
if PLACE . COOKIE in conf . parameters :
conf . parameters [ PLACE . COOKIE ] = _ ( conf . parameters [ PLACE . COOKIE ] )
2012-07-13 10:28:03 +02:00
2013-03-20 11:10:24 +01:00
conf . httpHeaders = [ ( item [ 0 ] , item [ 1 ] if item [ 0 ] != HTTP_HEADER . COOKIE else _ ( item [ 1 ] ) ) for item in conf . httpHeaders ]
2012-01-11 14:28:08 +00:00
elif not kb . testMode :
2016-06-10 17:52:22 +02:00
headers [ HTTP_HEADER . COOKIE ] + = " %s %s = %s " % ( conf . cookieDel or DEFAULT_COOKIE_DELIMITER , getUnicode ( cookie . name ) , getUnicode ( cookie . value ) )
2012-01-11 14:28:08 +00:00
2015-11-06 11:19:55 +01:00
if kb . testMode and not any ( ( conf . csrfToken , conf . safeUrl ) ) :
2012-03-08 10:19:34 +00:00
resetCookieJar ( conf . cj )
2008-10-15 15:38:22 +00:00
return headers
2017-05-17 00:22:18 +02:00
def parseResponse ( page , headers , status = None ) :
2008-10-15 15:38:22 +00:00
"""
@param page: the page to parse to feed the knowledge base htmlFp
(back-end DBMS fingerprint based upon DBMS error messages return
through the web application) list and absFilePaths (absolute file
paths) set.
"""
2008-11-17 00:00:54 +00:00
if headers :
headersParser ( headers )
2008-10-15 15:38:22 +00:00
2008-11-17 00:00:54 +00:00
if page :
2017-05-17 00:22:18 +02:00
htmlParser ( page if not status else " %s \n \n %s " % ( status , page ) )
2008-10-15 15:38:22 +00:00
2016-09-09 11:06:38 +02:00
@cachedmethod
2012-09-25 10:17:25 +02:00
def checkCharEncoding ( encoding , warn = True ) :
2013-03-13 19:42:22 +01:00
"""
Checks encoding name, repairs common misspellings and adjusts to
proper namings used in codecs module
>>> checkCharEncoding( ' iso-8858 ' , False)
' iso8859-1 '
>>> checkCharEncoding( ' en_us ' , False)
' utf8 '
"""
2019-05-02 00:45:44 +02:00
if isinstance ( encoding , six . binary_type ) :
encoding = getUnicode ( encoding )
2018-01-02 00:42:20 +01:00
if isListLike ( encoding ) :
encoding = unArrayizeValue ( encoding )
2010-07-15 08:44:42 +00:00
if encoding :
encoding = encoding . lower ( )
else :
2020-02-06 22:44:37 +01:00
return encoding
2010-07-15 08:44:42 +00:00
2013-01-10 13:18:44 +01:00
# Reference: http://www.destructor.de/charsets/index.htm
2018-03-13 13:45:42 +01:00
translate = { " windows-874 " : " iso-8859-11 " , " utf-8859-1 " : " utf8 " , " en_us " : " utf8 " , " macintosh " : " iso-8859-1 " , " euc_tw " : " big5_tw " , " th " : " tis-620 " , " unicode " : " utf8 " , " utc8 " : " utf8 " , " ebcdic " : " ebcdic-cp-be " , " iso-8859 " : " iso8859-1 " , " iso-8859-0 " : " iso8859-1 " , " ansi " : " ascii " , " gbk2312 " : " gbk " , " windows-31j " : " cp932 " , " en " : " us " }
2010-07-15 08:44:42 +00:00
2011-06-12 08:36:21 +00:00
for delimiter in ( ' ; ' , ' , ' , ' ( ' ) :
2010-11-02 18:01:10 +00:00
if delimiter in encoding :
2011-06-12 08:36:21 +00:00
encoding = encoding [ : encoding . find ( delimiter ) ] . strip ( )
2010-10-14 15:28:54 +00:00
2015-12-19 23:45:10 +01:00
encoding = encoding . replace ( " " " , " " )
2011-03-24 09:27:19 +00:00
# popular typos/errors
2012-02-06 09:48:44 +00:00
if " 8858 " in encoding :
2013-01-10 13:18:44 +01:00
encoding = encoding . replace ( " 8858 " , " 8859 " ) # iso-8858 -> iso-8859
2012-02-06 09:48:44 +00:00
elif " 8559 " in encoding :
2013-01-10 13:18:44 +01:00
encoding = encoding . replace ( " 8559 " , " 8859 " ) # iso-8559 -> iso-8859
2017-02-16 15:52:07 +01:00
elif " 8895 " in encoding :
encoding = encoding . replace ( " 8895 " , " 8859 " ) # iso-8895 -> iso-8859
2012-02-06 09:48:44 +00:00
elif " 5889 " in encoding :
2013-01-10 13:18:44 +01:00
encoding = encoding . replace ( " 5889 " , " 8859 " ) # iso-5889 -> iso-8859
2012-06-25 16:24:33 +00:00
elif " 5589 " in encoding :
2013-01-10 13:18:44 +01:00
encoding = encoding . replace ( " 5589 " , " 8859 " ) # iso-5589 -> iso-8859
2012-02-06 09:48:44 +00:00
elif " 2313 " in encoding :
2013-01-10 13:18:44 +01:00
encoding = encoding . replace ( " 2313 " , " 2312 " ) # gb2313 -> gb2312
2013-10-21 20:04:48 +02:00
elif encoding . startswith ( " x- " ) :
encoding = encoding [ len ( " x- " ) : ] # x-euc-kr -> euc-kr / x-mac-turkish -> mac-turkish
2013-04-07 11:02:43 +02:00
elif " windows-cp " in encoding :
encoding = encoding . replace ( " windows-cp " , " windows " ) # windows-cp-1254 -> windows-1254
2011-04-04 18:24:16 +00:00
# name adjustment for compatibility
2012-02-06 09:48:44 +00:00
if encoding . startswith ( " 8859 " ) :
encoding = " iso- %s " % encoding
elif encoding . startswith ( " cp- " ) :
encoding = " cp %s " % encoding [ 3 : ]
elif encoding . startswith ( " euc- " ) :
encoding = " euc_ %s " % encoding [ 4 : ]
elif encoding . startswith ( " windows " ) and not encoding . startswith ( " windows- " ) :
encoding = " windows- %s " % encoding [ 7 : ]
elif encoding . find ( " iso-88 " ) > 0 :
encoding = encoding [ encoding . find ( " iso-88 " ) : ]
elif encoding . startswith ( " is0- " ) :
encoding = " iso %s " % encoding [ 4 : ]
elif encoding . find ( " ascii " ) > 0 :
encoding = " ascii "
elif encoding . find ( " utf8 " ) > 0 :
encoding = " utf8 "
2015-12-19 23:45:10 +01:00
elif encoding . find ( " utf-8 " ) > 0 :
encoding = " utf-8 "
2011-04-04 18:24:16 +00:00
2013-01-10 13:18:44 +01:00
# Reference: http://philip.html5.org/data/charsets-2.html
2011-04-04 18:24:16 +00:00
if encoding in translate :
encoding = translate [ encoding ]
2017-05-15 00:34:13 +02:00
elif encoding in ( " null " , " {charset} " , " charset " , " * " ) or not re . search ( r " \ w " , encoding ) :
2020-02-06 22:44:37 +01:00
return None
2010-10-14 15:28:54 +00:00
2013-01-10 13:18:44 +01:00
# Reference: http://www.iana.org/assignments/character-sets
# Reference: http://docs.python.org/library/codecs.html
2010-06-30 12:09:33 +00:00
try :
2019-04-19 11:24:34 +02:00
codecs . lookup ( encoding )
except :
2011-04-18 13:38:46 +00:00
encoding = None
2010-11-02 18:01:10 +00:00
2015-10-29 16:52:17 +01:00
if encoding :
try :
2020-02-06 22:52:45 +01:00
six . text_type ( getBytes ( randomStr ( ) ) , encoding )
except :
2015-10-29 16:52:17 +01:00
if warn :
warnMsg = " invalid web page charset ' %s ' " % encoding
singleTimeLogMessage ( warnMsg , logging . WARN , encoding )
encoding = None
2010-06-30 12:09:33 +00:00
return encoding
2011-04-18 13:38:46 +00:00
def getHeuristicCharEncoding ( page ) :
"""
Returns page encoding charset detected by usage of heuristics
2019-05-09 16:22:18 +02:00
Reference: https://chardet.readthedocs.io/en/latest/usage.html
>>> getHeuristicCharEncoding(b " <html></html> " )
' ascii '
2011-04-18 13:38:46 +00:00
"""
2016-09-09 11:06:38 +02:00
key = hash ( page )
2021-03-03 23:08:00 +01:00
retVal = kb . cache . encoding [ key ] if key in kb . cache . encoding else detect ( page [ : HEURISTIC_PAGE_SIZE_THRESHOLD ] ) [ " encoding " ]
2016-09-09 11:06:38 +02:00
kb . cache . encoding [ key ] = retVal
2011-04-18 13:38:46 +00:00
2019-11-11 12:24:42 +01:00
if retVal and retVal . lower ( ) . replace ( ' - ' , " " ) == UNICODE_ENCODING . lower ( ) . replace ( ' - ' , " " ) :
2013-04-30 18:16:32 +02:00
infoMsg = " heuristics detected web page charset ' %s ' " % retVal
singleTimeLogMessage ( infoMsg , logging . INFO , retVal )
2011-04-18 13:38:46 +00:00
return retVal
2019-11-12 22:51:11 +01:00
def decodePage ( page , contentEncoding , contentType , percentDecode = True ) :
2010-01-02 02:02:12 +00:00
"""
2010-06-09 14:40:36 +00:00
Decode compressed/charset HTTP response
2019-05-09 16:22:18 +02:00
>>> getText(decodePage(b " <html>foo&bar</html> " , None, " text/html; charset=utf-8 " ))
' <html>foo&bar</html> '
2022-11-02 10:49:09 +01:00
>>> getText(decodePage(b " 	 " , None, " text/html; charset=utf-8 " ))
' \\ t '
2010-01-02 02:02:12 +00:00
"""
2011-01-20 11:01:01 +00:00
if not page or ( conf . nullConnection and len ( page ) < 2 ) :
return getUnicode ( page )
2019-03-28 13:53:54 +01:00
if hasattr ( contentEncoding , " lower " ) :
2017-11-09 12:24:58 +01:00
contentEncoding = contentEncoding . lower ( )
else :
contentEncoding = " "
2019-03-28 13:53:54 +01:00
if hasattr ( contentType , " lower " ) :
2017-11-09 12:24:58 +01:00
contentType = contentType . lower ( )
else :
contentType = " "
if contentEncoding in ( " gzip " , " x-gzip " , " deflate " ) :
2012-09-12 11:50:38 +02:00
if not kb . pageCompress :
return None
2011-07-06 05:44:47 +00:00
try :
2017-11-09 12:24:58 +01:00
if contentEncoding == " deflate " :
2019-03-26 14:37:01 +01:00
data = io . BytesIO ( zlib . decompress ( page , - 15 ) ) # Reference: http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations
2012-09-11 12:08:34 +02:00
else :
2019-03-26 14:37:01 +01:00
data = gzip . GzipFile ( " " , " rb " , 9 , io . BytesIO ( page ) )
2012-09-12 11:50:38 +02:00
size = struct . unpack ( " <l " , page [ - 4 : ] ) [ 0 ] # Reference: http://pydoc.org/get.cgi/usr/local/lib/python2.5/gzip.py
if size > MAX_CONNECTION_TOTAL_SIZE :
2013-01-03 23:20:55 +01:00
raise Exception ( " size too large " )
2012-09-11 12:08:34 +02:00
2011-07-06 05:44:47 +00:00
page = data . read ( )
2019-01-22 01:20:27 +01:00
except Exception as ex :
2021-01-06 16:09:40 +01:00
if b " <html " not in page : # in some cases, invalid "Content-Encoding" appears for plain HTML (should be ignored)
2016-05-27 16:43:01 +02:00
errMsg = " detected invalid data for declared content "
2019-01-22 01:20:27 +01:00
errMsg + = " encoding ' %s ' ( ' %s ' ) " % ( contentEncoding , getSafeExString ( ex ) )
2016-05-27 16:43:01 +02:00
singleTimeLogMessage ( errMsg , logging . ERROR )
2012-09-11 14:58:52 +02:00
2016-05-27 16:43:01 +02:00
warnMsg = " turning off page compression "
singleTimeWarnMessage ( warnMsg )
2012-09-11 14:58:52 +02:00
2016-05-27 16:43:01 +02:00
kb . pageCompress = False
raise SqlmapCompressionException
2010-11-03 10:08:27 +00:00
2017-09-21 14:35:24 +02:00
if not conf . encoding :
2011-05-17 22:55:22 +00:00
httpCharset , metaCharset = None , None
2011-01-04 15:49:20 +00:00
2013-01-10 13:18:44 +01:00
# Reference: http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode
2017-11-09 12:24:58 +01:00
if contentType . find ( " charset= " ) != - 1 :
2012-07-23 14:26:42 +02:00
httpCharset = checkCharEncoding ( contentType . split ( " charset= " ) [ - 1 ] )
2010-11-07 16:23:03 +00:00
2012-12-03 12:13:59 +01:00
metaCharset = checkCharEncoding ( extractRegexResult ( META_CHARSET_REGEX , page ) )
2011-04-20 08:35:47 +00:00
2021-06-08 21:48:43 +02:00
if ( any ( ( httpCharset , metaCharset ) ) and ( not all ( ( httpCharset , metaCharset ) ) or isinstance ( page , six . binary_type ) and all ( _ in PRINTABLE_BYTES for _ in page ) ) ) or ( httpCharset == metaCharset and all ( ( httpCharset , metaCharset ) ) ) :
2014-12-01 11:15:45 +01:00
kb . pageEncoding = httpCharset or metaCharset # Reference: http://bytes.com/topic/html-css/answers/154758-http-equiv-vs-true-header-has-precedence
2012-12-19 11:16:42 +01:00
debugMsg = " declared web page charset ' %s ' " % kb . pageEncoding
singleTimeLogMessage ( debugMsg , logging . DEBUG , debugMsg )
2011-05-17 22:55:22 +00:00
else :
kb . pageEncoding = None
2011-04-20 08:35:47 +00:00
else :
2017-09-21 14:35:24 +02:00
kb . pageEncoding = conf . encoding
2011-01-04 12:56:55 +00:00
2012-07-23 15:14:52 +02:00
# can't do for all responses because we need to support binary files too
2019-04-19 11:24:34 +02:00
if isinstance ( page , six . binary_type ) and " text/ " in contentType :
2019-11-18 11:40:07 +01:00
if not kb . disableHtmlDecoding :
# e.g. 	Ãëàâà
if b " &# " in page :
2022-11-02 10:49:09 +01:00
page = re . sub ( b " &#x([0-9a-f] { 1,2}); " , lambda _ : decodeHex ( _ . group ( 1 ) if len ( _ . group ( 1 ) ) == 2 else b " 0 %s " % _ . group ( 1 ) ) , page )
2019-11-18 11:40:07 +01:00
page = re . sub ( b " &#( \\ d { 1,3}); " , lambda _ : six . int2byte ( int ( _ . group ( 1 ) ) ) if int ( _ . group ( 1 ) ) < 256 else _ . group ( 0 ) , page )
# e.g. %20%28%29
if percentDecode :
if b " % " in page :
2020-02-25 12:36:07 +01:00
page = re . sub ( b " % ([0-9a-f] {2} ) " , lambda _ : decodeHex ( _ . group ( 1 ) ) , page )
page = re . sub ( b " % ([0-9A-F] {2} ) " , lambda _ : decodeHex ( _ . group ( 1 ) ) , page ) # Note: %DeepSee_SQL in CACHE
2019-11-18 11:40:07 +01:00
# e.g. &
page = re . sub ( b " &([^;]+); " , lambda _ : six . int2byte ( HTML_ENTITIES [ getText ( _ . group ( 1 ) ) ] ) if HTML_ENTITIES . get ( getText ( _ . group ( 1 ) ) , 256 ) < 256 else _ . group ( 0 ) , page )
kb . pageEncoding = kb . pageEncoding or checkCharEncoding ( getHeuristicCharEncoding ( page ) )
if ( kb . pageEncoding or " " ) . lower ( ) == " utf-8-sig " :
kb . pageEncoding = " utf-8 "
2020-09-02 17:07:28 +02:00
if page and page . startswith ( b " \xef \xbb \xbf " ) : # Reference: https://docs.python.org/2/library/codecs.html (Note: noticed problems when "utf-8-sig" is left to Python for handling)
2019-11-18 11:40:07 +01:00
page = page [ 3 : ]
page = getUnicode ( page , kb . pageEncoding )
# e.g. ’…™
if " &# " in page :
def _ ( match ) :
retVal = match . group ( 0 )
try :
retVal = _unichr ( int ( match . group ( 1 ) ) )
except ( ValueError , OverflowError ) :
pass
return retVal
page = re . sub ( r " &#( \ d+); " , _ , page )
# e.g. ζ
page = re . sub ( r " &([^;]+); " , lambda _ : _unichr ( HTML_ENTITIES [ _ . group ( 1 ) ] ) if HTML_ENTITIES . get ( _ . group ( 1 ) , 0 ) > 255 else _ . group ( 0 ) , page )
else :
page = getUnicode ( page , kb . pageEncoding )
2012-07-23 18:38:46 +02:00
2011-01-27 22:00:34 +00:00
return page
2010-12-25 10:16:20 +00:00
2019-05-24 13:54:10 +02:00
def processResponse ( page , responseHeaders , code = None , status = None ) :
2011-11-22 12:18:24 +00:00
kb . processResponseCounter + = 1
2014-12-03 10:06:21 +01:00
page = page or " "
2017-05-17 00:22:18 +02:00
parseResponse ( page , responseHeaders if kb . processResponseCounter < PARSE_HEADERS_LIMIT else None , status )
2011-01-07 15:41:09 +00:00
2016-05-31 11:08:23 +02:00
if not kb . tableFrom and Backend . getIdentifiedDbms ( ) in ( DBMS . ACCESS , ) :
kb . tableFrom = extractRegexResult ( SELECT_FROM_TABLE_REGEX , page )
2016-09-29 18:02:20 +02:00
else :
kb . tableFrom = None
2016-05-31 11:08:23 +02:00
2010-12-25 10:16:20 +00:00
if conf . parseErrors :
msg = extractErrorMessage ( page )
if msg :
2016-05-31 10:49:34 +02:00
logger . warning ( " parsed DBMS error message: ' %s ' " % msg . rstrip ( ' . ' ) )
2012-10-19 11:02:14 +02:00
2020-08-23 21:16:56 +02:00
if not conf . skipWaf and kb . processResponseCounter < IDENTYWAF_PARSE_LIMIT :
2021-01-07 14:20:03 +01:00
rawResponse = " %s %s %s \n %s \n %s " % ( _http_client . HTTPConnection . _http_vsn_str , code or " " , status or " " , " " . join ( getUnicode ( responseHeaders . headers if responseHeaders else [ ] ) ) , page [ : HEURISTIC_PAGE_SIZE_THRESHOLD ] )
2019-05-25 08:23:05 +02:00
2022-12-06 11:55:03 +01:00
with kb . locks . identYwaf :
identYwaf . non_blind . clear ( )
if identYwaf . non_blind_check ( rawResponse , silent = True ) :
for waf in set ( identYwaf . non_blind ) :
if waf not in kb . identifiedWafs :
kb . identifiedWafs . add ( waf )
errMsg = " WAF/IPS identified as ' %s ' " % identYwaf . format_name ( waf )
singleTimeLogMessage ( errMsg , logging . CRITICAL )
2019-05-24 13:54:10 +02:00
2012-10-19 11:29:03 +02:00
if kb . originalPage is None :
for regex in ( EVENTVALIDATION_REGEX , VIEWSTATE_REGEX ) :
2012-10-29 10:48:49 +01:00
match = re . search ( regex , page )
2012-10-19 11:29:03 +02:00
if match and PLACE . POST in conf . parameters :
name , value = match . groups ( )
if PLACE . POST in conf . paramDict and name in conf . paramDict [ PLACE . POST ] :
if conf . paramDict [ PLACE . POST ] [ name ] in page :
continue
2016-09-06 15:03:17 +02:00
else :
msg = " do you want to automatically adjust the value of ' %s ' ? [y/N] " % name
2017-04-18 15:48:05 +02:00
if not readInput ( msg , default = ' N ' , boolean = True ) :
2016-09-06 15:03:17 +02:00
continue
2017-04-18 15:48:05 +02:00
2016-09-06 15:03:17 +02:00
conf . paramDict [ PLACE . POST ] [ name ] = value
2018-05-29 14:54:43 +02:00
conf . parameters [ PLACE . POST ] = re . sub ( r " (?i)( %s =)[^&]+ " % re . escape ( name ) , r " \ g<1> %s " % value . replace ( ' \\ ' , r ' \\ ' ) , conf . parameters [ PLACE . POST ] )
2014-12-03 10:06:21 +01:00
2017-06-07 12:55:14 +02:00
if not kb . browserVerification and re . search ( r " (?i)browser.?verification " , page or " " ) :
kb . browserVerification = True
warnMsg = " potential browser verification protection mechanism detected "
if re . search ( r " (?i)CloudFlare " , page ) :
warnMsg + = " (CloudFlare) "
singleTimeWarnMessage ( warnMsg )
2016-06-01 15:48:04 +02:00
if not kb . captchaDetected and re . search ( r " (?i)captcha " , page or " " ) :
for match in re . finditer ( r " (?si)<form.+?</form> " , page ) :
if re . search ( r " (?i)captcha " , match . group ( 0 ) ) :
kb . captchaDetected = True
break
2019-11-15 22:27:33 +01:00
if re . search ( r " <meta[^>]+ \ brefresh \ b[^>]+ \ bcaptcha \ b " , page ) :
kb . captchaDetected = True
if kb . captchaDetected :
warnMsg = " potential CAPTCHA protection mechanism detected "
if re . search ( r " (?i)<title>[^<]*CloudFlare " , page ) :
warnMsg + = " (CloudFlare) "
singleTimeWarnMessage ( warnMsg )
2014-12-03 10:06:21 +01:00
if re . search ( BLOCKED_IP_REGEX , page ) :
2016-06-26 01:46:49 +02:00
warnMsg = " it appears that you have been blocked by the target server "
singleTimeWarnMessage ( warnMsg )