So I'm making a couple of scrapers and now I'm trying to make a script that runs the corresponding spiders with URLs collected from a DB but I can't find a way to do this.
I have this in my spider:
class ElCorteIngles(scrapy.Spider):
name = 'ElCorteIngles'
url = ''
DEBUG = False
def start_requests(self):
    if self.url != '':
        yield scrapy.Request(url=self.url, callback=self.parse)
def parse(self, response):
    # Get product name
    try:
        self.p_name = response.xpath('//*[@id="product-info"]/h2[1]/a/text()').get()
    except:
        print(f'{CERROR} Problem while getting product name from website - {self.name}')
    # Get product price
    try:
        self.price_no_cent = response.xpath('//*[@id="price-container"]/div/span[2]/text()').get()
        self.cent = response.xpath('//*[@id="price-container"]/div/span[2]/span[1]/text()').get()
        self.currency = response.xpath('//*[@id="price-container"]/div/span[2]/span[2]/text()').get()
        if self.currency == None:
            self.currency = response.xpath('//*[@id="price-container"]/div/span[2]/span[1]/text()').get()
            self.cent = None
    except:
        print(f'{CERROR} Problem while getting product price from website - {self.name}')
    # Join self.price_no_cent with self.cent
    try:
        if self.cent != None:
            self.price = str(self.price_no_cent) + str(self.cent)
            self.price = self.price.replace(',', '.')
        else:
            self.price = self.price_no_cent
    except:
        print(f'{ERROR} Problem while joining price with cents - {self.name}')
    # Return data
    if self.DEBUG == True:
        print([self.p_name, self.price, self.currency])
    data_collected = ShopScrapersItems()
    data_collected['url'] = response.url
    data_collected['p_name'] = self.p_name
    data_collected['price'] = self.price
    data_collected['currency'] = self.currency
    yield data_collected
Normally when I run the spider from the console I do:
scrapy crawl ElCorteIngles -a url='https://www.elcorteingles.pt/electrodomesticos/A26601428-depiladora-braun-senso-smart-5-5500/'
and now I need a way to do the same on a external script and get the output yield data_collected
What I currently have in my external script is this:
import scrapy
from scrapy.crawler import CrawlerProcess
import sqlalchemy as db
# Import internal libraries
from Ruby.Ruby.spiders import *
# Variables
engine = db.create_engine('mysql+pymysql://DATABASE_INFO')
class Worker(object):
    def __init__(self):
        self.crawler = CrawlerProcess({})
    def scrape_new_links(self):
        conn = engine.connect()
        # Get all new links from DB and scrape them
        query = 'SELECT * FROM Ruby.New_links'
        result = conn.execute(query)
        for x in result:
            telegram_id = x[1]
            email = x[2]
            phone_number = x[3]
            url = x[4]
            spider = x[5]
            
            # In this cade the spider will be ElCorteIngles and
            # the url https://www.elcorteingles.pt/electrodomesticos/A26601428-depiladora- 
            # braun-senso-smart-5-5500/'
            self.crawler.crawl(spider, url=url)
            self.crawler.start()
Worker().scrape_new_links()
I also don't know if doing url=url in self.crawler.crawl() is the proper way to give the URL to the spider but let me know what you think.
All data from yield is being returned by a pipeline.
I think there is no need for extra info but if you need any just let me know!
 
     
     
    