I try crawl many url in the same domain. I have to url list in the string. I want to search regex in string and find urls. But re.match() always return none. I test my regex and it working. This is my code:
# -*- coding: UTF-8 -*-
import scrapy
import codecs 
import re
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy import Request
from scrapy.selector import HtmlXPathSelector
from hurriyet.items import HurriyetItem
class hurriyet_spider(CrawlSpider):
    name = 'hurriyet'
    allowed_domains = ['hurriyet.com.tr']
    start_urls = ['http://www.hurriyet.com.tr/gundem/']
    rules = (Rule(SgmlLinkExtractor(allow=('\/gundem(\/\S*)?.asp$')),'parse',follow=True),) 
    def parse(self, response):
        image = HurriyetItem()
        text =  response.xpath("//a/@href").extract()
        print text
        urls = ''.join(text)
        page_links = re.match("(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'\".,<>?«»“”‘’]))", urls, re.M)
        image['title'] = response.xpath("//h1[@class = 'title selectionShareable'] | //h1[@itemprop = 'name']/text()").extract()
        image['body'] = response.xpath("//div[@class = 'detailSpot']").extract()
        image['body2'] = response.xpath("//div[@class = 'ctx_content'] ").extract()
        print page_links
        return image, text
 
    