I would like to find a regular expression that could find (in given HTML) the following images:
- Those captured in: src=""
- Those captured in: src=''
- Those captured in: background=""
- Those captured in: background=''
- Those captured in: url("")
- Those captured in: url('')
- Those captured in: url()
So far i came up with:
preg_match_all("/src=((\"|'|)?(.*\.(png|gif|jpg))(\"|'|))/Ui", $strHTML, $arrMatches);
preg_match_all("/background=((\"|'|)?(.*\.(png|gif|jpg))(\"|'|))/Ui", $strHTML, $arrMatches);
preg_match_all("/url\((\"|'|)?((.*\.(png|gif|jpg))(\"|'|))\)/Ui", $strHTML, $arrMatches);
But those are incomplete in that they don't include the prefix (src/background/url). Also, security wise I think they can be improved further, to prevent somebody from entering src="http://somesite.com/someurl.exe?ext=jpg"
Any help in the right direction is appreciated.
edit:
I think i got it, although the code can surely be improved, possibly even combined and/or optimized :)
/* match CSS url() links */
preg_match_all("/(url\((\"|'|)(.*\.(png|gif|jpg|jpeg))(\"|'|)\))/Ui", $strHTML, $arrMatches);
Array
(
    [0] => Array
        (
            [0] => url('test1.gif')
            [1] => url(test2.gif)
            [2] => url("test3.gif")
        )
    [1] => Array
        (
            [0] => url('test1.gif')
            [1] => url(test2.gif)
            [2] => url("test3.gif")
        )
    [2] => Array
        (
            [0] => '
            [1] => 
            [2] => "
        )
    [3] => Array
        (
            [0] => test1.gif
            [1] => test2.gif
            [2] => test3.gif
        )
    [4] => Array
        (
            [0] => gif
            [1] => gif
            [2] => gif
        )
    [5] => Array
        (
            [0] => '
            [1] => 
            [2] => "
        )
)
/* match img links */
preg_match_all("/(src=(\"\'??)(.*\.(png|gif|jpg|jpeg))(\"\'??))/Ui", $strHTML, $arrMatches);
/* match background links */
preg_match_all("/(background=(\"\'??)(.*\.(png|gif|jpg|jpeg))(\"\'??))/Ui", $strHTML, $arrMatches);
 
     
    