I need to scrape a lot of pages in paralel, while my UI thread must not be blocked. I am creating thread for each page (url) and instantiateing webBrowser control in that thread to execute javascript and get html after that. When the webBrowser gets the html I m raising event on UI thread to register that browser has done its job, beacuse I want to know when all the browsers have fetched html so I can merge all the data and display it.
1.)The first probem is, some threads never raise an event so I'm stuck waiting.
2.)The second problem is I cant dispose browser without causing external browser to fire, allways pulling the rug beneath the browser so he decides to continue by opening page in users default browser I guess. But if not disposing at all, I m running out of ram.
I v been searching around, found a lot of related stuff but I fail to implement it for my use case. Here s my code:
 [System.Runtime.InteropServices.ComVisibleAttribute(true)]
    public partial class Form1 : Form
    {
        public delegate void ThreadFinishedEventHandler(object source, EventArgs e);
        public event ThreadFinishedEventHandler threadFinishedEvent;
        int threadCount = 0;
        int threadReturnedCount = 0;
        List<string> linksGlobal;
        public Form1()
        {
            InitializeComponent();
            threadFinishedEvent += new ThreadFinishedEventHandler(OnThreadFinished); 
        }
        private void Form1_Load(object sender, EventArgs e)
        {
        }
        private void btnGO_Click(object sender, EventArgs e)
        {
            scrapeLinksWithBrowsersInSeparateThreads();
        }
        private void scrapeLinksWithBrowsersInSeparateThreads()
        {
            linksGlobal = getLinks(); //10 urls all the same -> https://sports.betway.com
            threadCount = linksGlobal.Count;
            Random rand = new Random(123);
            int waitTime = 0;//trying not to be registered as DOS attack or smth
            foreach (string url in linksGlobal)
            {
                runBrowserThread(url, waitTime);
                waitTime += rand.Next(500, 3000) + 500;//each browser will start navigating withing 1 - 4 seconds interval from each other
            }
        }
        public void runBrowserThread(string url, int waitTime)
        {
            var th = new Thread(() =>
            {
                try
                {
                    WebBrowserDocumentCompletedEventHandler completed = null;
                    WebBrowser wb = new WebBrowser();
                    completed = (sndr, e) =>
                    {
                        if (e.Url.AbsolutePath != (sndr as WebBrowser).Url.AbsolutePath)
                        {
                            wb.DocumentCompleted -= completed;
                            string html = (sndr as WebBrowser).Document.Body.InnerHtml;
                            threadFinishedEvent.Raise(this, EventArgs.Empty); // I have EventExtension allowing me this
                            //wb.Dispose(); //whenever and wherever I put this it causes external browser to fire
                            // Application.ExitThread();  //this sometimes seems to cause event never firing, not shure
                        }
                    };
                    wb.DocumentCompleted += completed;
                    wb.ScriptErrorsSuppressed = true;
                    Thread.Sleep(waitTime); //tryin not to get registerd as DOS attck or smth, each browser will start navigating withing 1 - 4 seconds interval from each other
                    wb.Navigate(url);
                    Application.Run();
                }
                catch (Exception ex)
                {
                    throw ex;
                }
            });
            th.SetApartmentState(ApartmentState.STA);
            th.Start();
        }
        private void OnThreadFinished(object source, EventArgs e)
        {
            threadReturnedCount++; // i get this for smth like 3 - 5 out od 11 threads, then this event stops being raised, dunno why
            if (threadReturnedCount == threadCount)
            {
                // Do work
                //this never happens cos a lot of threads never raise event, some do
            }
        }
        private List<string> getLinks()
        {
            List<string> links = new List<string>();
            links.Add("https://sports.betway.com");
            links.Add("https://sports.betway.com");
            links.Add("https://sports.betway.com");
            links.Add("https://sports.betway.com");
            links.Add("https://sports.betway.com");
            links.Add("https://sports.betway.com");
            links.Add("https://sports.betway.com");
            links.Add("https://sports.betway.com");
            links.Add("https://sports.betway.com");
            links.Add("https://sports.betway.com");
            links.Add("https://sports.betway.com");
            return links;
        }
      }
P.S. returnign data from threads is separate problem, I did not implement it yet but first I want to solve this. I will use objectFactory which will be called from each thread like Factory.createObject(html), I will have to use some kind of locking on that Factory since it will be located on main thread.
 
    