I need to calculate intersection between trade_name column in a grls_data table and data ->> 'product_info' in fsa_raw table by substring matching.
Value count in the tables:
- grls: 38596
- grls_data: 47434
- fsa_raw: 651380
I have a following SQL schema:
CREATE TABLE public.grls
(
  id integer NOT NULL DEFAULT nextval('grls_id_seq'::regclass),
  use_version integer,
  CONSTRAINT grls_pkey PRIMARY KEY (id),
  CONSTRAINT grls_use_version_foreign FOREIGN KEY (use_version)
      REFERENCES public.grls_data (id) MATCH SIMPLE
      ON UPDATE CASCADE ON DELETE RESTRICT
)
WITH (
  OIDS=FALSE
);
CREATE INDEX grls_use_version_index
  ON public.grls
  USING btree
  (use_version);
CREATE TABLE public.grls_data
(
  id integer NOT NULL DEFAULT nextval('grls_data_id_seq'::regclass),
  grls_id integer NOT NULL,
  trade_name text NOT NULL,
  // other columns
  CONSTRAINT grls_data_pkey PRIMARY KEY (id),
  CONSTRAINT grls_data_grls_id_foreign FOREIGN KEY (grls_id)
      REFERENCES public.grls (id) MATCH SIMPLE
      ON UPDATE CASCADE ON DELETE RESTRICT
)
WITH (
  OIDS=FALSE
);
CREATE INDEX grls_data_grls_id_index
  ON public.grls_data
  USING btree
  (grls_id);
CREATE INDEX grls_data_trade_name_index
  ON public.grls_data
  USING hash
  (trade_name COLLATE pg_catalog."default");
CREATE TABLE public.fsa_raw
(
  id integer NOT NULL DEFAULT nextval('fsa_raw_id_seq'::regclass),
  data jsonb,
  CONSTRAINT fsa_raw_pkey PRIMARY KEY (id)
)
WITH (
  OIDS=FALSE
);
CREATE INDEX fsa_raw_data_idx
  ON public.fsa_raw
  USING gin
  (data jsonb_path_ops);
CREATE INDEX words_trgm_gin
  ON public.fsa_raw
  USING gin
  ((data ->> 'product_info'::text) COLLATE pg_catalog."default" gin_trgm_ops);
So, i wrote a following SQL query for test:
WITH tns AS (
   SELECT DISTINCT ('%' || trade_name || '%') AS tn FROM grls JOIN grls_data ON
   grls.use_version = grls_data.id
)
SELECT COUNT(*) FROM fsa_raw
WHERE (data ->> 'product_info') ILIKE ANY (SELECT tn FROM tns)
But my query have a really giant cost:
Aggregate (cost=169494199.21..169494199.22 rows=1 width=8)
I can't wait for weeks, months to get query results. How do i optimize my SQL-query to get results much faster?
 
    