Я выполняю несколько запросов к базе данных Postgres SQL, и большинство из них выполняются очень быстро, но один выполняется очень медленно. Даже при тестировании, когда я ограничиваю только 100 записей из основной таблицы выпуска (все запросы основаны на таблице выпуска), это занимает 50 секунд, тогда как все остальные запросы занимают меньше секунды.
Вот ключевые таблицы, содержащие более нескольких строк
l_artist_recording
Column | Type | Modifiers
----------------+--------------------------+-----------------------------------------------------------------
id | integer | not null default nextval('l_artist_recording_id_seq'::regclass)
link | integer | not null
entity0 | integer | not null
entity1 | integer | not null
edits_pending | integer | not null default 0
last_updated | timestamp with time zone | default now()
link_order | integer | not null default 0
entity0_credit | text | not null default ''::text
entity1_credit | text | not null default ''::text
Indexes:
"l_artist_recording_pkey" PRIMARY KEY, btree (id)
"l_artist_recording_idx_uniq" UNIQUE, btree (entity0, entity1, link, link_order)
"l_artist_recording_entity1_idx" btree (entity1)
"l_artist_recording_idx_entity1" btree (entity1)
"l_artist_recording_link" btree (link)
Check constraints:
"l_artist_recording_edits_pending_check" CHECK (edits_pending >= 0)
"l_artist_recording_link_order_check" CHECK (link_order >= 0)
релиз_записи
Table "musicbrainz.recording_release"
Column | Type | Modifiers
--------------+---------+-----------
recording_id | integer | not null
release_id | integer | not null
Indexes:
"recording_release_pkey" PRIMARY KEY, btree (recording_id, release_id)
"recording_release_recording_id" btree (recording_id)
"recording_release_release_id" btree (release_id)
исполнитель
Column | Type | Modifiers
------------------+--------------------------+-----------------------------------------------------
id | integer | not null default nextval('artist_id_seq'::regclass)
gid | uuid | not null
name | character varying | not null
sort_name | character varying | not null
begin_date_year | smallint |
begin_date_month | smallint |
begin_date_day | smallint |
end_date_year | smallint |
end_date_month | smallint |
end_date_day | smallint |
type | integer |
area | integer |
gender | integer |
comment | character varying(255) | not null default ''::character varying
edits_pending | integer | not null default 0
last_updated | timestamp with time zone | default now()
ended | boolean | not null default false
begin_area | integer |
end_area | integer |
Indexes:
"artist_pkey" PRIMARY KEY, btree (id)
"artist_idx_gid" UNIQUE, btree (gid)
"artist_idx_area" btree (area)
"artist_idx_begin_area" btree (begin_area)
"artist_idx_end_area" btree (end_area)
"artist_idx_lower_name" btree (lower(name::text))
"artist_idx_name" btree (name)
"artist_idx_sort_name" btree (sort_name)
Check constraints:
"artist_edits_pending_check" CHECK (edits_pending >= 0)
"artist_ended_check" CHECK ((end_date_year IS NOT NULL OR end_date_month IS NOT NULL OR end_date_day IS NOT NULL) AND ended = true OR end_date_year IS NULL AND end_date_month IS NULL AND end_date_day IS NULL)
это мой запрос
SELECT DISTINCT t1.entity1 as recordingId,
t1.entity0 as artistId,
t2.id as linkid,
t3.name as type,
t4.name,
t4.sort_name,
t4.gid,
t9.latinname as latinName,
t9.islatin,
t7.name as linkattribute,
at.name as artistType
FROM l_artist_recording t1
INNER JOIN link t2 on t1.link=t2.id
INNER JOIN link_type t3 on t2.link_type=t3.id
AND (t3.name='performer' OR t3.name='instrument' OR t3.name='vocal' )
LEFT JOIN link_attribute t6
ON t6.link = t2.id
LEFT JOIN link_attribute_type t7
ON t6.attribute_type = t7.id
INNER JOIN artist t4 on t1.entity0=t4.id
INNER JOIN artist_latin t9 on t9.id=t4.id
LEFT JOIN artist_type at on t4.type=at.id
INNER JOIN recording t5 on t1.entity1=t5.id
INNER JOIN recording_release rr ON t5.id = rr.recording_id
WHERE rr.release_id BETWEEN ? AND ?
ORDER BY recordingId, artistId, linkid, linkattribute";
Я проверил все таблицы, чтобы убедиться, что у них есть индекс для любого присоединяемого поля, в некоторых случаях он отсутствовал, поэтому я добавил. Я также запускал VACUUM ANALYZE
на этих таблицах, но это не имело значения.
Это мой вывод объяснения (анализа, буферизации)
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Unique (cost=336796.42..336797.05 rows=21 width=99) (actual time=93229.472..93233.580 rows=1271 loops=1)
Buffers: shared hit=5215 read=97399, temp read=138990 written=138908
-> Sort (cost=336796.42..336796.48 rows=21 width=99) (actual time=93229.468..93230.653 rows=1286 loops=1)
Sort Key: t1.entity1, t1.entity0, t2.id, t7.name, t3.name, t4.name, t4.sort_name, t4.gid, t9.latinname, t9.islatin, at.name
Sort Method: quicksort Memory: 245kB
Buffers: shared hit=5215 read=97399, temp read=138990 written=138908
-> Nested Loop (cost=334176.87..336795.96 rows=21 width=99) (actual time=91632.892..93226.628 rows=1286 loops=1)
Buffers: shared hit=5197 read=97399, temp read=138990 written=138908
-> Hash Join (cost=334176.44..336665.45 rows=21 width=103) (actual time=91632.830..93211.718 rows=1286 loops=1)
Hash Cond: (rr.recording_id = t1.entity1)
Buffers: shared hit=151 read=97315, temp read=138990 written=138908
-> Index Scan using recording_release_release_id on recording_release rr (cost=0.44..1833.24 rows=891 width=4) (actual time=0.028..2.371 rows=825 loops=1)
Index Cond: ((release_id >= 1) AND (release_id <= 100))
Buffers: shared hit=130 read=121
-> Hash (cost=333485.62..333485.62 rows=55230 width=99) (actual time=91631.881..91631.881 rows=3813157 loops=1)
Buckets: 8192 Batches: 32 (originally 1) Memory Usage: 20481kB
Buffers: shared hit=21 read=97194, temp read=61835 written=116275
-> Hash Left Join (cost=132574.41..333485.62 rows=55230 width=99) (actual time=12380.770..84898.987 rows=3813157 loops=1)
Hash Cond: (t4.type = at.id)
.....
и это суть вопроса, мой EXPLAIN говорит мне, что запрос будет выполняться плохо или нет, я не могу сказать, хотя я замечаю некоторые последовательные сканирования, которые я считаю плохими.
Чтобы получить некоторое представление о размере базы данных, таблица RELEASE содержит чуть более двух миллионов записей.
Как я могу улучшить производительность этого запроса?
Обновить предложение по никам сработало
SELECT DISTINCT t1.entity1 as recordingId,
t1.entity0 as artistId, t2.id as linkid,
t3.name as type,
t4.name,
t4.sort_name,
t4.gid,
t9.latinname as latinName,
t9.islatin,
t7.name as linkattribute,
at.name as artistType
FROM
(
SELECT t1.*
FROM recording_release rr
INNER JOIN l_artist_recording t1 on t1.entity1=rr.recording_id
WHERE rr.release_id between ? AND ?
) AS t1
INNER JOIN link t2 on t1.link=t2.id
INNER JOIN link_type t3 on t2.link_type=t3.id
AND (t3.name='performer' OR t3.name='instrument' OR t3.name='vocal' )
LEFT JOIN link_attribute t6 ON t6.link = t2.id
LEFT JOIN link_attribute_type t7 ON t6.attribute_type = t7.id
INNER JOIN artist t4 on t1.entity0=t4.id
INNER JOIN artist_latin t9 on t9.id=t4.id
LEFT JOIN artist_type at on t4.type=at.id
ORDER BY recordingId, artistId, linkid, linkattribute;
Обновление 2 Хорошо, это работало нормально при выполнении от 1 до 100 записей, но это было только временное ограничение для тестирования, потому что оно работало очень медленно.
При увеличении от 1 до 1000 (что по-прежнему намного меньше обычного размера пакета в 20 000) это заняло 13 секунд, хотя это намного лучше, чем раньше, но все же намного медленнее, чем все остальные запросы tat al занимают меньше секунды даже при 1000 записях.
Таким образом, кажется, что здесь все еще есть какая-то основная проблема, вызывающая проблему
Объяснение новых запросов Sql от 1 до 100
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Unique (cost=49182.37..49183.00 rows=21 width=99) (actual time=917.393..920.229 rows=1271 loops=1)
Buffers: shared hit=21647
-> Sort (cost=49182.37..49182.43 rows=21 width=99) (actual time=917.390..918.206 rows=1286 loops=1)
Sort Key: t1.entity1, t1.entity0, t2.id, t7.name, t3.name, t4.name, t4.sort_name, t4.gid, t9.latinname, t9.islatin, at.name
Sort Method: quicksort Memory: 245kB
Buffers: shared hit=21647
-> Hash Left Join (cost=9574.07..49181.91 rows=21 width=99) (actual time=861.507..915.523 rows=1286 loops=1)
Hash Cond: (t4.type = at.id)
Buffers: shared hit=21629
-> Nested Loop (cost=9572.93..49180.54 rows=21 width=96) (actual time=861.479..913.388 rows=1286 loops=1)
Buffers: shared hit=21628
-> Nested Loop (cost=9572.50..49028.19 rows=21 width=86) (actual time=861.455..904.887 rows=1286 loops=1)
Buffers: shared hit=16484
-> Hash Left Join (cost=9572.08..48875.86 rows=21 width=34) (actual time=861.419..895.695 rows=1286 loops=1)
Hash Cond: (t6.attribute_type = t7.id)
Buffers: shared hit=11340
-> Nested Loop Left Join (cost=9530.00..48833.50 rows=21 width=28) (actual time=859.792..891.958 rows=1286 loops=1)
Buffers: shared hit=11320
-> Hash Join (cost=9529.58..48752.33 rows=21 width=24) (actual time=859.709..884.275 rows=1232 loops=1)
Hash Cond: (t1.link = t2.id)
Buffers: shared hit=7623
-> Nested Loop (cost=0.87..39213.26 rows=2708 width=12) (actual time=0.043..12.256 rows=2559 loops=1)
Buffers: shared hit=4005
-> Index Scan using recording_release_release_id on recording_release rr (cost=0.44..1833.24 rows=891 width=4) (actual time=0.020..1.477 rows=825 loops=1)
Index Cond: ((release_id >= 1) AND (release_id <= 100))
Buffers: shared hit=251
-> Index Scan using l_artist_recording_entity1_idx on l_artist_recording t1 (cost=0.43..41.84 rows=11 width=12) (actual time=0.003..0.007 rows=3 loops=825)
Index Cond: (entity1 = rr.recording_id)
Buffers: shared hit=3754
-> Hash (cost=9487.20..9487.20 rows=3321 width=16) (actual time=859.601..859.601 rows=179079 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 8069kB
Buffers: shared hit=3618
-> Hash Join (cost=23.60..9487.20 rows=3321 width=16) (actual time=0.095..721.936 rows=179079 loops=1)
Hash Cond: (t2.link_type = t3.id)
Buffers: shared hit=3618
-> Seq Scan on link t2 (cost=0.00..7842.74 rows=423374 width=8) (actual time=0.003..286.605 rows=423374 loops=1)
Buffers: shared hit=3609
-> Hash (cost=23.55..23.55 rows=4 width=16) (actual time=0.075..0.075 rows=6 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 1kB
Buffers: shared hit=9
-> Bitmap Heap Scan on link_type t3 (cost=12.84..23.55 rows=4 width=16) (actual time=0.044..0.068 rows=6 loops=1)
Recheck Cond: (((name)::text = 'performer'::text) OR ((name)::text = 'instrument'::text) OR ((name)::text = 'vocal'::text))
Buffers: shared hit=9
-> BitmapOr (cost=12.84..12.84 rows=4 width=0) (actual time=0.032..0.032 rows=0 loops=1)
Buffers: shared hit=6
-> Bitmap Index Scan on link_type_name_idx (cost=0.00..4.28 rows=1 width=0) (actual time=0.015..0.015 rows=2 loops=1)
Index Cond: ((name)::text = 'performer'::text)
Buffers: shared hit=2
-> Bitmap Index Scan on link_type_name_idx (cost=0.00..4.28 rows=1 width=0) (actual time=0.012..0.012 rows=2 loops=1)
Index Cond: ((name)::text = 'instrument'::text)
Buffers: shared hit=2
-> Bitmap Index Scan on link_type_name_idx (cost=0.00..4.28 rows=1 width=0) (actual time=0.003..0.003 rows=2 loops=1)
Index Cond: ((name)::text = 'vocal'::text)
Buffers: shared hit=2
-> Index Only Scan using link_attribute_pkey on link_attribute t6 (cost=0.42..3.84 rows=2 width=8) (actual time=0.003..0.003 rows=1 loops=1232)
Index Cond: (link = t2.id)
Heap Fetches: 0
Buffers: shared hit=3697
-> Hash (cost=29.81..29.81 rows=981 width=14) (actual time=1.602..1.602 rows=981 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 47kB
Buffers: shared hit=20
-> Seq Scan on link_attribute_type t7 (cost=0.00..29.81 rows=981 width=14) (actual time=0.011..0.827 rows=981 loops=1)
Buffers: shared hit=20
-> Index Scan using artist_pkey on artist t4 (cost=0.43..7.24 rows=1 width=52) (actual time=0.003..0.004 rows=1 loops=1286)
Index Cond: (id = t1.entity0)
Buffers: shared hit=5144
-> Index Scan using artist_latin_pkey on artist_latin t9 (cost=0.43..7.24 rows=1 width=18) (actual time=0.003..0.004 rows=1 loops=1286)
Index Cond: (id = t1.entity0)
Buffers: shared hit=5144
-> Hash (cost=1.06..1.06 rows=6 width=11) (actual time=0.014..0.014 rows=6 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 1kB
Buffers: shared hit=1
-> Seq Scan on artist_type at (cost=0.00..1.06 rows=6 width=11) (actual time=0.003..0.007 rows=6 loops=1)
Buffers: shared hit=1
Total runtime: 922.078 ms
(75 rows)
Объяснение новых запросов Sql от 1 до 1000
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Unique (cost=213157.51..213163.93 rows=214 width=99) (actual time=27763.225..27797.833 rows=11003 loops=1)
Buffers: shared hit=186910 read=59034, temp read=26077 written=26047
-> Sort (cost=213157.51..213158.04 rows=214 width=99) (actual time=27763.223..27774.013 rows=17087 loops=1)
Sort Key: t1.entity1, t1.entity0, t2.id, t7.name, t3.name, t4.name, t4.sort_name, t4.gid, t9.latinname, t9.islatin, at.name
Sort Method: quicksort Memory: 3291kB
Buffers: shared hit=186910 read=59034, temp read=26077 written=26047
-> Hash Left Join (cost=184770.48..213149.22 rows=214 width=99) (actual time=26757.748..27732.727 rows=17087 loops=1)
Hash Cond: (t4.type = at.id)
Buffers: shared hit=186892 read=59034, temp read=26077 written=26047
-> Nested Loop (cost=184769.35..213145.63 rows=214 width=96) (actual time=26757.717..27705.989 rows=17087 loops=1)
Buffers: shared hit=186891 read=59034, temp read=26077 written=26047
-> Nested Loop (cost=184768.92..211593.13 rows=214 width=86) (actual time=26757.687..27597.879 rows=17087 loops=1)
Buffers: shared hit=118447 read=59034, temp read=26077 written=26047
-> Hash Left Join (cost=184768.49..210040.78 rows=214 width=34) (actual time=26757.646..27484.937 rows=17087 loops=1)
Hash Cond: (t6.attribute_type = t7.id)
Buffers: shared hit=50080 read=59034, temp read=26077 written=26047
-> Nested Loop Left Join (cost=184726.42..209995.76 rows=214 width=28) (actual time=26756.055..27456.678 rows=17087 loops=1)
Buffers: shared hit=50060 read=59034, temp read=26077 written=26047
-> Hash Join (cost=184726.00..209168.66 rows=214 width=24) (actual time=26756.000..27370.294 rows=14507 loops=1)
Hash Cond: (rr.recording_id = t1.entity1)
Buffers: shared hit=6525 read=59034, temp read=26077 written=26047
-> Index Scan using recording_release_release_id on recording_release rr (cost=0.44..17820.60 rows=8992 width=4) (actual time=0.019..13.477 rows=10502 loops=1)
Index Cond: ((release_id >= 1) AND (release_id <= 1000))
Buffers: shared hit=2416
-> Hash (cost=184035.19..184035.19 rows=55230 width=24) (actual time=26755.546..26755.546 rows=3637342 loops=1)
Buckets: 8192 Batches: 16 (originally 1) Memory Usage: 20481kB
Buffers: shared hit=4109 read=59034, temp written=17302
-> Hash Join (cost=9528.71..184035.19 rows=55230 width=24) (actual time=850.530..23553.356 rows=3637342 loops=1)
Hash Cond: (t1.link = t2.id)
Buffers: shared hit=4109 read=59034
-> Seq Scan on l_artist_recording t1 (cost=0.00..129942.91 rows=7041791 width=12) (actual time=0.027..4867.525 rows=7041650 loops=1)
Buffers: shared hit=491 read=59034
-> Hash (cost=9487.20..9487.20 rows=3321 width=16) (actual time=850.421..850.421 rows=179079 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 8069kB
Buffers: shared hit=3618
-> Hash Join (cost=23.60..9487.20 rows=3321 width=16) (actual time=0.100..713.303 rows=179079 loops=1)
Hash Cond: (t2.link_type = t3.id)
Buffers: shared hit=3618
-> Seq Scan on link t2 (cost=0.00..7842.74 rows=423374 width=8) (actual time=0.003..281.553 rows=423374 loops=1)
Buffers: shared hit=3609
-> Hash (cost=23.55..23.55 rows=4 width=16) (actual time=0.079..0.079 rows=6 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 1kB
Buffers: shared hit=9
-> Bitmap Heap Scan on link_type t3 (cost=12.84..23.55 rows=4 width=16) (actual time=0.042..0.070 rows=6 loops=1)
Recheck Cond: (((name)::text = 'performer'::text) OR ((name)::text = 'instrument'::text) OR ((name)::text = 'vocal'::text))
Buffers: shared hit=9
-> BitmapOr (cost=12.84..12.84 rows=4 width=0) (actual time=0.034..0.034 rows=0 loops=1)
Buffers: shared hit=6
-> Bitmap Index Scan on link_type_name_idx (cost=0.00..4.28 rows=1 width=0) (actual time=0.016..0.016 rows=2 loops=1)
Index Cond: ((name)::text = 'performer'::text)
Buffers: shared hit=2
-> Bitmap Index Scan on link_type_name_idx (cost=0.00..4.28 rows=1 width=0) (actual time=0.011..0.011 rows=2 loops=1)
Index Cond: ((name)::text = 'instrument'::text)
Buffers: shared hit=2
-> Bitmap Index Scan on link_type_name_idx (cost=0.00..4.28 rows=1 width=0) (actual time=0.003..0.003 rows=2 loops=1)
Index Cond: ((name)::text = 'vocal'::text)
Buffers: shared hit=2
-> Index Only Scan using link_attribute_pkey on link_attribute t6 (cost=0.42..3.84 rows=2 width=8) (actual time=0.002..0.003 rows=1 loops=14507)
Index Cond: (link = t2.id)
Heap Fetches: 0
Buffers: shared hit=43535
-> Hash (cost=29.81..29.81 rows=981 width=14) (actual time=1.568..1.568 rows=981 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 47kB
Buffers: shared hit=20
-> Seq Scan on link_attribute_type t7 (cost=0.00..29.81 rows=981 width=14) (actual time=0.010..0.785 rows=981 loops=1)
Buffers: shared hit=20
-> Index Scan using artist_pkey on artist t4 (cost=0.43..7.24 rows=1 width=52) (actual time=0.003..0.004 rows=1 loops=17087)
Index Cond: (id = t1.entity0)
Buffers: shared hit=68367
-> Index Scan using artist_latin_pkey on artist_latin t9 (cost=0.43..7.24 rows=1 width=18) (actual time=0.003..0.004 rows=1 loops=17087)
Index Cond: (id = t1.entity0)
Buffers: shared hit=68444
-> Hash (cost=1.06..1.06 rows=6 width=11) (actual time=0.013..0.013 rows=6 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 1kB
Buffers: shared hit=1
-> Seq Scan on artist_type at (cost=0.00..1.06 rows=6 width=11) (actual time=0.003..0.006 rows=6 loops=1)
Buffers: shared hit=1
Total runtime: 27805.582 ms
(78 rows)
Обновление 3
Основываясь на предложении Патрика, этот запрос работает нормально для 100 записей, для 1000 он по-прежнему медленный, но немного быстрее на 12 секунд.
SELECT DISTINCT t1.entity1 as recordingId,
t1.entity0 as artistId,
t2.id as linkid,
t3.name as type,
t4.name,
t4.sort_name,
t4.gid,
t9.latinname as latinName,
t9.islatin,
t7.name as linkattribute,
at.name as artistType
FROM recording_release rr
INNER JOIN l_artist_recording t1 on t1.entity1=rr.recording_id
AND rr.release_id between ? AND ?
INNER JOIN link t2 on t1.link=t2.id
INNER JOIN link_type t3 on t2.link_type=t3.id
AND (t3.name='performer' OR t3.name='instrument' OR t3.name='vocal' )
LEFT JOIN link_attribute t6
ON t6.link = t2.id
LEFT JOIN link_attribute_type t7
ON t6.attribute_type = t7.id
INNER JOIN artist t4 on t1.entity0=t4.id
INNER JOIN artist_latin t9 on t9.id=t4.id
LEFT JOIN artist_type at on t4.type=at.id
ORDER BY recordingId, artistId, linkid, linkattribute;