СУБД. Лекция 3

СУБД
Стрелков Никита
Лекция 3

Выборка данных

Организационные моменты

Выборка данных

SELECT

[ WITH [ RECURSIVE ] with_query [, ...] ]
SELECT [ ALL | DISTINCT [ ON ( expression [, ...] ) ] ]
    [ * | expression [ [ AS ] output_name ] [, ...] ]
    [ FROM from_item [, ...] ]
    [ WHERE condition ]
    [ GROUP BY grouping_element [, ...] ]
    [ HAVING condition [, ...] ]
    [ { UNION | INTERSECT | EXCEPT } [ ALL | DISTINCT ] select ]
    [ ORDER BY expression [ ASC | DESC | USING operator ] [, ...] ]
    [ LIMIT { count | ALL } ]
    [ OFFSET start [ ROW | ROWS ] ]
    [ FETCH { FIRST | NEXT } [ count ] { ROW | ROWS } ONLY ]
    [ FOR { UPDATE | NO KEY UPDATE | SHARE | KEY SHARE }
        [ OF table_name [, ...] ] [ NOWAIT | SKIP LOCKED ] [...] ]

FROM. Типы таблиц

WHERE. Фильтрация

SELECT *
FROM persons
WHERE department_id IN (
    SELECT id FROM departments WHERE company_id = 42
)

CASE

CASE WHEN condition THEN result
     [WHEN ...]
     [ELSE result]
END
NULLIF(value1, value2)
GREATEST(value [, ...])
LEAST(value [, ...])

SELECT a,
   CASE WHEN a=1 THEN 'one'
        WHEN a=2 THEN 'two'
        ELSE 'other'
   END
FROM test;

ORDER BY. Сортировка

SELECT select_list
FROM table_expression
ORDER BY sort_expression1 [ASC | DESC] [NULLS { FIRST | LAST }]
      [, sort_expression2 [ASC | DESC] [NULLS { FIRST | LAST }] ...]

SELECT a, b FROM table1 ORDER BY a + b, c;

SELECT a + b AS sum, c FROM table1 ORDER BY sum;
SELECT a, max(b) FROM table1 GROUP BY a ORDER BY 1;

# SELECT a + b AS sum, c FROM table1 ORDER BY sum + c;
ERROR:  column "sum" does not exist
LINE 1: SELECT a + b AS sum, c FROM table1 ORDER BY sum + c;
                                                    ^

GROUP BY. Формирование групп

GROUP BY. Формирование групп

SELECT * FROM employee;
id name department grade
1 John HR A
2 Mark HR A
3 Smit IT A
4 Lili IT B
5 Alex PR B
6 Walton HR B
7 Kim PR B
8 Marry HR A
9 Paul PR C
10 Christ HR A
11 Jiji HR B
12 Bob PR B

GROUP BY. Формирование групп

SELECT grade, department, COUNT(*)
FROM employee
GROUP BY grade, department
ORDER BY grade, department;
grade department COUNT(*)
A HR 4
A IT 1
B HR 2
B IT 1
B PR 3
C PR 1

Агрегаторы

aggregate_name (
    [ ALL | DISTINCT ] expression [ , ... ]
    [ order_by_clause ]
) [ FILTER ( WHERE filter_clause ) ]

SELECT
  string_agg(name, ',' ORDER BY name),
  avg(age),
  count(*) FILTER (WHERE age BETWEEN 13 AND 19) AS teens
FROM persons;

Агрегаторы

Функция Описание
avg(expression) Среднее арифметическое
max(expression) Максимальное значение
min(expression) Минимальное значение
sum(expression) Сумма
count(*) Кол-во строк
count(expression) Кол-во не NULL-значений

Агрегаторы

Функция Описание
bit_and(expression) Битовое "И"
bit_or(expression) Битовое "ИЛИ"
bool_and(expression), every(expression) Логическое "И"
bool_or(expression) Логическое "ИЛИ"
stddev(expression) Стандартное отклонение
variance(expression) Дисперсия (квадрат отклонения)

Агрегаторы

Функция Описание
array_agg(expression) Сформировать массив значений
json_agg(expression) Сформировать JSON-массив
jsonb_agg(expression) Сформировать JSONB-массив
json_object_agg(key, value) Сформировать JSON-объект
jsonb_object_agg(key, value) Сформировать JSONB-объект
string_agg(expression, delimiter) Конкатенация строк
xmlagg(expression) Конкатенация XML-элементов

HAVING. Фильтрация после группировки

SELECT grade, department, COUNT(*)
FROM employee
GROUP BY grade, department
HAVING COUNT(*) > 3;

SELECT * FROM (
    SELECT grade, department, COUNT(*) as employee_cnt
    FROM employee
    GROUP BY grade, department
) a
WHERE employee_cnt > 3;

LIMIT. Разбивка на страницы

ISO SQL:2003 - Window Functions:

SELECT * FROM (
  SELECT
    ROW_NUMBER() OVER (ORDER BY age ASC) AS rownumber,
    person_id, person_name, age
  FROM person
) AS foo
WHERE rownumber <= 10;

SELECT * FROM (
  SELECT
    RANK() OVER (ORDER BY age ASC) AS ranking,
    person_id, person_name, age
  FROM person
) AS foo
WHERE ranking <= 10;

LIMIT. Разбивка на страницы

Синтаксис MySQL:

SELECT
  person_id, person_name, age
FROM person
LIMIT 10
OFFSET 20;

Синтаксис DB2:

SELECT
  person_id, person_name, age
FROM person
FETCH FIRST 10 ROWS ONLY;

PostgreSQL поддерживает оба синтаксиса

SUBQUERIES


SELECT
    col0,
    (SELECT col1 FROM table1 WHERE table1.id = table0.id),
    (SELECT col2 FROM table1 WHERE table1.id = table0.id)
FROM
    table0;

SELECT *
FROM t1
WHERE column1 = (SELECT MAX(column2) FROM t2);

SELECT *
FROM t1 AS t
WHERE 2 = (SELECT COUNT(*) FROM t1 WHERE t1.id = t.id);

SUBQUERIES

Коррелирующий подзапрос

Это подзапрос, который содержит ссылку на столбцы из включающего его запроса (назовем его основным). Таким образом, коррелирующий подзапрос будет выполняться для каждой строки основного запроса, так как значения столбцов основного запроса будут меняться.

Коррелирующий:

SELECT *
FROM t1 AS t
WHERE 2 = (SELECT COUNT(*) FROM t1 WHERE t1.id = t.id);
Не коррелирующий:

SELECT *
FROM t1
WHERE column1 = (SELECT MAX(column2) FROM t2);

SUBQUERIES: Expressions

SELECT s1 FROM t1 WHERE s1 > ANY (SELECT s1 FROM t2);

SELECT s1 FROM t1 WHERE s1 = ANY (SELECT s1 FROM t2);
SELECT s1 FROM t1 WHERE s1 IN (SELECT s1 FROM t2);

SELECT s1 FROM t1 WHERE s1 > ALL (SELECT s1 FROM t2);

SELECT s1 FROM t1 WHERE s1 <> ALL (SELECT s1 FROM t2);
SELECT s1 FROM t1 WHERE s1 NOT IN (SELECT s1 FROM t2);

ROW SUBQUERIES

SELECT *
FROM t1
WHERE (col1, col2)
	= (SELECT col3, col4 FROM t2 WHERE id = 10);
SELECT *
FROM t1
WHERE ROW (col1, col2)
	= (SELECT col3, col4 FROM t2 WHERE id = 10);

SELECT column1, column2, column3
FROM t1
WHERE (column1, column2, column3)
    IN (SELECT column1, column2, column3 FROM t2);

[NOT] EXISTS

SELECT DISTINCT store_type
FROM stores
WHERE EXISTS (
    SELECT *
    FROM cities_stores
    WHERE cities_stores.store_type = stores.store_type
);
SELECT DISTINCT store_type
FROM stores
WHERE NOT EXISTS (
    SELECT *
    FROM cities_stores
    WHERE cities_stores.store_type = stores.store_type
);

SUBQUERIES in FROM

SELECT ... FROM (subquery) [AS] name ...
SELECT AVG(sum_column1)
FROM (
    SELECT SUM(column1) AS sum_column1
    FROM t1
    GROUP BY column1
) AS t1;

UNION

SELECT ...
UNION [ALL | DISTINCT]
SELECT ...
[UNION [ALL | DISTINCT]
SELECT ...]
person amount
Joe 1000
Alex 2000
Bob 5000
person amount
Joe 2000
Alex 2000
Zach 35000
SELECT * FROM sales2010
UNION
SELECT * FROM sales2011;
person amount
Alex 2000
Bob 5000
Joe 1000
Joe 2000
Zach 35000
SELECT * FROM sales2010
UNION ALL
SELECT * FROM sales2011;
person amount
Joe 1000
Alex 2000
Bob 5000
Joe 2000
Alex 2000
Zach 35000

Объединения таблиц (JOIN)

Набор данных

Employee
LastName DepartmentID
Rafferty 31
Jones 33
Steinberg 33
Robinson 34
Smith 34
John
Department
DepartmentID DepartmentName
31 Sales
33 Engineering
34 Clerical
35 Marketing

CROSS JOIN

SELECT * FROM Employee CROSS JOIN Department;
Employee.
LastName
Employee.
DepartmentID
Department.
DepartmentID
Department.
DepartmentName
Rafferty 31 31 Sales
Jones 33 31 Sales
Steinberg 33 31 Sales
Robinson 34 31 Sales
Smith 34 31 Sales
John 31 Sales
Rafferty 31 33 Engineering
Jones 33 33 Engineering
Steinberg 33 33 Engineering
Robinson 34 33 Engineering
Smith 34 33 Engineering
John 33 Engineering
Rafferty 31 34 Clerical
Jones 33 34 Clerical
Steinberg 33 34 Clerical
Robinson 34 34 Clerical
Smith 34 34 Clerical
John 34 Clerical
Rafferty 31 35 Marketing
Jones 33 35 Marketing
Steinberg 33 35 Marketing
Robinson 34 35 Marketing
Smith 34 35 Marketing
John 35 Marketing

CROSS JOIN

SELECT * FROM Employee, Department;
Employee.
LastName
Employee.
DepartmentID
Department.
DepartmentID
Department.
DepartmentName
Rafferty 31 31 Sales
Jones 33 31 Sales
Steinberg 33 31 Sales
Robinson 34 31 Sales
Smith 34 31 Sales
John 31 Sales
Rafferty 31 33 Engineering
Jones 33 33 Engineering
Steinberg 33 33 Engineering
Robinson 34 33 Engineering
Smith 34 33 Engineering
John 33 Engineering
Rafferty 31 34 Clerical
Jones 33 34 Clerical
Steinberg 33 34 Clerical
Robinson 34 34 Clerical
Smith 34 34 Clerical
John 34 Clerical
Rafferty 31 35 Marketing
Jones 33 35 Marketing
Steinberg 33 35 Marketing
Robinson 34 35 Marketing
Smith 34 35 Marketing
John 35 Marketing

INNER JOIN

SELECT *
FROM Employee
INNER JOIN Department
ON Employee.DepartmentID = Department.DepartmentID;
Employee.
LastName
Employee.
DepartmentID
Department.
DepartmentID
Department.
DepartmentName
Rafferty 31 31 Sales
Jones 33 33 Engineering
Steinberg 33 33 Engineering
Robinson 34 34 Clerical
Smith 34 34 Clerical

INNER JOIN

SELECT *
FROM Employee, Department
WHERE Employee.DepartmentID = Department.DepartmentID;

Employee.
LastName
Employee.
DepartmentID
Department.
DepartmentID
Department.
DepartmentName
Rafferty 31 31 Sales
Jones 33 33 Engineering
Steinberg 33 33 Engineering
Robinson 34 34 Clerical
Smith 34 34 Clerical

NATURAL JOIN

SELECT *
FROM Employee
NATURAL JOIN Department;

DepartmentID Employee.
LastName
Department.
DepartmentName
31 Rafferty Sales
33 Jones Engineering
33 Steinberg Engineering
34 Robinson Clerical
34 Smith Clerical

JOIN USING

SELECT *
FROM Employee
JOIN Department USING (DepartmentID);

DepartmentID Employee.
LastName
Department.
DepartmentName
31 Rafferty Sales
33 Jones Engineering
33 Steinberg Engineering
34 Robinson Clerical
34 Smith Clerical

LEFT OUTER JOIN

SELECT *
FROM Employee
LEFT OUTER JOIN Department
ON Employee.DepartmentID = Department.DepartmentID;
Employee.
LastName
Employee.
DepartmentID
Department.
DepartmentID
Department.
DepartmentName
Rafferty 31 31 Sales
Jones 33 33 Engineering
Steinberg 33 33 Engineering
John
Robinson 34 34 Clerical
Smith 34 34 Clerical

RIGHT OUTER JOIN

SELECT Employee.*, Department.*
FROM Department
RIGHT OUTER JOIN Employee
ON Employee.DepartmentID = Department.DepartmentID;
Employee.
LastName
Employee.
DepartmentID
Department.
DepartmentID
Department.
DepartmentName
Rafferty 31 31 Sales
Jones 33 33 Engineering
Steinberg 33 33 Engineering
John
Robinson 34 34 Clerical
Smith 34 34 Clerical

FULL OUTER JOIN

SELECT *
FROM Employee
FULL OUTER JOIN Department
ON Employee.DepartmentID = Department.DepartmentID;
Employee.
LastName
Employee.
DepartmentID
Department.
DepartmentID
Department.
DepartmentName
Rafferty 31 31 Sales
Jones 33 33 Engineering
Steinberg 33 33 Engineering
John
Robinson 34 34 Clerical
Smith 34 34 Clerical
35 Marketing

FULL OUTER JOIN

SELECT *
FROM Employee
FULL OUTER JOIN Department
ON Employee.DepartmentID = Department.DepartmentID;

SELECT *
FROM Employee
LEFT OUTER JOIN Department
ON Employee.DepartmentID = Department.DepartmentID;
UNION ALL
SELECT *
FROM Employee
RIGHT OUTER JOIN Department
ON Employee.DepartmentID = Department.DepartmentID
WHERE Employee.DepartmentID IS NULL;

SELF-JOIN

SELECT F.EmployeeID, F.LastName, S.EmployeeID, S.LastName, F.Country
FROM Employee F
INNER JOIN Employee S ON F.Country = S.Country
WHERE F.EmployeeID < S.EmployeeID
ORDER BY F.EmployeeID, S.EmployeeID;
EmployeeID LastName DepartmentID Country
123 Rafferty 31 Australia
124 Jones 33 Australia
145 Steinberg 33 Australia
201 Robinson 34 United States
305 Smith 34 Germany
306 John Germany

SELF-JOIN

EmployeeID LastName DepartmentID Country
123 Rafferty 31 Australia
124 Jones 33 Australia
145 Steinberg 33 Australia
201 Robinson 34 United States
305 Smith 34 Germany
306 John Germany
F.EmployeeID F.LastName S.EmployeeID S.LastName F.Country
123 Rafferty 124 Jones Australia
123 Rafferty 145 Steinberg Australia
124 Jones 145 Steinberg Australia
305 Smith 306 John Germany

SUBQUERIES vs JOIN

Коррелирующий подзапрос

SELECT E.*
FROM Employee E WHERE EXISTS (
  SELECT *
  FROM Department D WHERE D.DepartmentID = E.DepartmentID
);

Не коррелирующий подзапрос

SELECT E.*
FROM Employee E WHERE E.DepartmentID IN (
  SELECT DepartmentID
  FROM Department D
);

JOIN

SELECT E.*
FROM Employee E
JOIN Department D ON (E.DepartmentID = D.DepartmentID);

FAKE TABLE

n
1
2
3
4
5
SELECT 1 AS n
UNION ALL
SELECT 2
UNION ALL
SELECT 3
UNION ALL
SELECT 4
UNION ALL
SELECT 5;

FAKE TABLE

n
1
2
3
4
5
WITH RECURSIVE series(n) AS (
  SELECT 1
  UNION ALL
  SELECT n + 1 FROM series WHERE n < 5
)
SELECT * FROM series ORDER BY n;

SELECT n FROM unnest(ARRAY[
    1, 2, 3, 4, 5
]) n ORDER BY n;

SELECT n
FROM generate_series(1, 5) n;

FAKE TABLE

a b
1 foo
2 bar
3 baz
SELECT * FROM unnest(ARRAY[
    1, 2, 3
], ARRAY[
    'foo', 'bar', 'baz'
]) n (a, b) ORDER BY a;

SELECT * FROM (
  VALUES (1, 'foo'), (2, 'bar'), (3, 'baz')
) n (a, b)

Получение идентификаторов до вставки

CREATE TABLE city (
    id bigserial NOT NULL PRIMARY KEY,
    region_id bigint NOT NULL,
    name varchar(128) NOT NULL
);

SELECT nextval(pg_get_serial_sequence('city', 'id'));

SELECT nextval(pg_get_serial_sequence('city', 'id'))
FROM generate_series(1, 100);

SELF UNION

SELECT col1 FROM tbl
UNION ALL
SELECT col2 FROM tbl;

SELECT unnest(array[col1, col2]) as col1 FROM tbl;

WITH (Common Table Expressions)

WITH regional_sales AS (
        SELECT region, SUM(amount) AS total_sales
        FROM orders
        GROUP BY region
     ),
     top_regions AS (
        SELECT region
        FROM regional_sales
        WHERE total_sales > (SELECT SUM(total_sales)/10 FROM regional_sales)
     )
SELECT region,
       product,
       SUM(quantity) AS product_units,
       SUM(amount) AS product_sales
FROM orders
WHERE region IN (SELECT region FROM top_regions)
GROUP BY region, product;

WITH (Common Table Expressions)

Главный плюс и минус CTE в том, что запрос описанный в WITH будет вычисляться единожды ДО выполнения основного запроса.

Если запрос WITH является нерекурсивным и не имеет побочных эффектов, он может быть свёрнут в родительский запрос, что позволит оптимизировать совместно два уровня запросов.

По-умолчанию это происходит, только если запрос WITH задействуется в родительском запросе всего в одном месте, а не в нескольких.

WITH w AS (
    SELECT * FROM big_table
)
SELECT * FROM w WHERE key = 123;

Будет аналогичен:

SELECT * FROM big_table WHERE key = 123;

WITH RECURSIVE

WITH RECURSIVE recursetree (id, parent_id, level, name) AS (
    SELECT id, parent_id, 0, name FROM tree
    WHERE parent_id = 0
  UNION ALL
    SELECT t.id, t.parent_id, level+1, t.name
    FROM tree t
    JOIN recursetree rt ON rt.id = t.parent_id
  )
SELECT * FROM recursetree;

id parent level name
1 0 0 Россия
2 0 0 США
3 1 1 Москва
4 2 1 Нью Йорк
5 2 1 Вашингтон
6 3 2 Бутово

WITH RECURSIVE

WITH RECURSIVE recursetree (id, path, name) AS (
    SELECT id, array_append('{}'::int[], id), name FROM tree
    WHERE parent_id = 0
  UNION ALL
    SELECT t.id, array_append(path, t.id), t.name
    FROM tree t
    JOIN recursetree rt ON rt.id = t.parent_id
  )
SELECT id, array_to_string(path, '.') as path, name FROM recursetree
ORDER BY path;
id path name
1 1 Россия
3 1.3 Москва
6 1.3.6 Бутово
2 2 США
4 2.4 Нью Йорк
5 2.5 Вашингтон

WITH RECURSIVE

WITH RECURSIVE t(n) AS (
    VALUES (1)
    UNION ALL
    SELECT n+1 FROM t WHERE n < 100
)
SELECT sum(n) FROM t;
    
sum
5050

Иерархические структуры в базах данных

Список смежных вершин (Adjacency List)

Список смежных вершин (Adjacency List)


CREATE TABLE IF NOT EXISTS category(
    id     serial primary key,
    name   varchar(20) not null,
    parent int default null references category (id)
);

INSERT INTO category VALUES
        (1,'ELECTRONICS',NULL),
        (2,'TELEVISIONS',1),(3,'TUBE',2),
        (4,'LCD',2),
        (5,'PLASMA',2),
        (6,'PORTABLE ELECTRONICS',1),
        (7,'MP3 PLAYERS',6),
        (8,'FLASH',7),
        (9,'CD PLAYERS',6),
        (10,'2 WAY RADIOS',6);

Список смежных вершин (Adjacency List)


SELECT * FROM category ORDER BY id;
id name parent
1ELECTRONICSnull
2TELEVISIONS1
3TUBE2
4LCD2
5PLASMA2
6PORTABLE ELECTRONICS1
7MP3 PLAYERS6
8FLASH7
9CD PLAYERS6
102 WAY RADIOS6

Список смежных вершин (Adjacency List)

Получение всего дерева


SELECT t1.name AS lvl1, t2.name as lvl2, t3.name as lvl3, t4.name as lvl4
FROM category AS t1
LEFT JOIN category AS t2 ON t2.parent = t1.id
LEFT JOIN category AS t3 ON t3.parent = t2.id
LEFT JOIN category AS t4 ON t4.parent = t3.id
WHERE t1.name = 'ELECTRONICS';
lvl1lvl2lvl3lvl4
ELECTRONICSPORTABLE ELECTRONICSMP3 PLAYERSFLASH
ELECTRONICSPORTABLE ELECTRONICS2 WAY RADIOSNULL
ELECTRONICSTELEVISIONSPLASMANULL
ELECTRONICSTELEVISIONSLCDNULL
ELECTRONICSTELEVISIONSTUBENULL
ELECTRONICSPORTABLE ELECTRONICSCD PLAYERSNULL

Список смежных вершин (Adjacency List)

Получение всего дерева рекурсивно


WITH RECURSIVE tree(id, name, parent, lvl) AS (
    SELECT t.id, t.name, t.parent, 0 from category t WHERE t.parent IS NULL
    UNION ALL
    SELECT t.id, t.name, t.parent, t1.lvl + 1 FROM category t
        JOIN tree AS t1 ON t1.id = t.parent
) SELECT CONCAT(REPEAT('-', lvl::int), name) FROM tree ORDER BY id;
name
ELECTRONICS
-TELEVISIONS
--TUBE
--LCD
--PLASMA
-PORTABLE ELECTRONICS
--MP3 PLAYERS
---FLASH
--CD PLAYERS
--2 WAY RADIOS

Список смежных вершин (Adjacency List)

Получение листовых узлов


SELECT t1.name
FROM category as t1
         LEFT JOIN category as t2 on t1.id = t2.parent
WHERE t2.id is null;
name
2 WAY RADIOS
FLASH
PLASMA
LCD
TUBE
CD PLAYERS

Список смежных вершин (Adjacency List)

Получение одной ветви


SELECT t1.name AS lvl1, t2.name as lvl2, t3.name as lvl3, t4.name as lvl4
FROM category AS t1
         LEFT JOIN category AS t2 ON t2.parent = t1.id
         LEFT JOIN category AS t3 ON t3.parent = t2.id
         LEFT JOIN category AS t4 ON t4.parent = t3.id
WHERE t1.name = 'ELECTRONICS' AND t4.name = 'FLASH';
lvl1lvl2lvl3lvl4
ELECTRONICSPORTABLE ELECTRONICSMP3 PLAYERSFLASH

Список смежных вершин (Adjacency List)

Получение путей рекурсивно


WITH RECURSIVE tree (id, path, name) AS (
    SELECT id, array[id], name FROM category WHERE parent is null
    UNION ALL
    SELECT t.id, array_append(path, t.id), t.name
        FROM category t JOIN tree rt ON rt.id = t.parent
)
SELECT array_to_string(path, '.') as path, name FROM tree ORDER BY path;
pathname
1ELECTRONICS
1.2TELEVISIONS
1.2.3TUBE
1.2.4LCD
1.2.5PLASMA
1.6PORTABLE ELECTRONICS
1.6.102 WAY RADIOS
1.6.7MP3 PLAYERS
1.6.7.8FLASH
1.6.9CD PLAYERS

Список смежных вершин (Adjacency List)

- Главный недостаток такого подхода — необходимо достоверно знать количество уровней вложенности в вашей иерархии, кроме того, чем больше иерархия, тем больше JOIN'ов — тем ниже производительность.

+ Тем не менее, данный способ обладает и существенными достоинствами — в дерево легко вносить изменения, менять местами и удалять узлы.

+ Данный алгоритм хорошо применим, если вы оперируете с небольшими древовидными структурами, которые часто поддаются изменениям.

+ Алгоритм также довольно уверенно себя чувствует и с большими деревьями, если считывать их порциями вида «знаю родителя — прочитать всех наследников». Хороший пример такого случая — динамически подгружаемые деревья.

- Но он плохо применим, когда нужно вычитывать какие-либо иные куски дерева, находить пути, предыдущие и следующие узлы при обходе и вычитывать ветки дерева целиком (на всю глубину).

Вложенное множество (Nested Set)

Вложенное множество (Nested Set)

Вложенное множество (Nested Set)

Вложенное множество (Nested Set)


CREATE TABLE IF NOT EXISTS nested_category
(
    id   serial primary key,
    name text not null,
    lft  int  not null,
    rgt  int  not null
);

INSERT INTO nested_category
VALUES (1, 'ELECTRONICS', 1, 20),
       (2, 'TELEVISIONS', 2, 9),
       (3, 'TUBE', 3, 4),
       (4, 'LCD', 5, 6),
       (5, 'PLASMA', 7, 8),
       (6, 'PORTABLE ELECTRONICS', 10, 19),
       (7, 'MP3 PLAYERS', 11, 14),
       (8, 'FLASH', 12, 13),
       (9, 'CD PLAYERS', 15, 16),
       (10, '2 WAY RADIOS', 17, 18);

Вложенное множество (Nested Set)


SELECT * FROM nested_category ORDER BY id;
idnamelftrgt
1ELECTRONICS120
2TELEVISIONS29
3TUBE34
4LCD56
5PLASMA78
6PORTABLE ELECTRONICS1019
7MP3 PLAYERS1114
8FLASH1213
9CD PLAYERS1516
102 WAY RADIOS1718

Вложенное множество (Nested Set)


SELECT node.name
FROM nested_category AS node,
        nested_category AS parent
WHERE node.lft BETWEEN parent.lft AND parent.rgt
        AND parent.name = 'ELECTRONICS'
ORDER BY node.lft;
idnamelftrgt
1ELECTRONICS120
2TELEVISIONS29
3TUBE34
4LCD56
5PLASMA78
6PORTABLE ELECTRONICS1019
7MP3 PLAYERS1114
8FLASH1213
9CD PLAYERS1516
102 WAY RADIOS1718

Вложенное множество (Nested Set)

Получение всего дерева


SELECT CONCAT(REPEAT('-', (COUNT(parent.name) - 1)::int), node.name) AS name
FROM nested_category AS node,
     nested_category AS parent
WHERE node.lft BETWEEN parent.lft AND parent.rgt
GROUP BY node.name, node.lft
ORDER BY node.lft;
name
ELECTRONICS
-TELEVISIONS
--TUBE
--LCD
--PLASMA
-PORTABLE ELECTRONICS
--MP3 PLAYERS
---FLASH
--CD PLAYERS
--2 WAY RADIOS

Вложенное множество (Nested Set)

Получение листовых узлов


SELECT name
FROM nested_category
WHERE rgt = lft + 1;
    
name
TUBE
LCD
PLASMA
FLASH
CD PLAYERS
2 WAY RADIOS

Вложенное множество (Nested Set)

Получение одной ветви


SELECT parent.name
FROM nested_category AS node, nested_category AS parent
WHERE node.lft BETWEEN parent.lft AND parent.rgt AND node.name = 'FLASH'
ORDER BY parent.lft;
    
name
ELECTRONICS
PORTABLE ELECTRONICS
MP3 PLAYERS
FLASH

Вложенное множество (Nested Set)

+ Nested Set действительно хорош, когда нам необходимо считывать структуру деревьев из БД. При этом он одинаково хорош для деревьев любого объема.

- Тем не менее, для иерархических структур, которые подвергаются частому изменению он, очевидно, не будет являться оптимальным выбором.

Материализованный путь (Materialized Path)

Материализованный путь (Materialized Path)


CREATE TABLE IF NOT EXISTS materialized_category
(
    id   SERIAL PRIMARY KEY,
    name TEXT  NOT NULL,
    path INT[] NOT NULL
);

INSERT INTO materialized_category
VALUES (1, 'ELECTRONICS', '{1}'),
       (2, 'TELEVISIONS', '{1,2}'),
       (3, 'TUBE', '{1,2,3}'),
       (4, 'LCD', '{1,2,4}'),
       (5, 'PLASMA', '{1,2,5}'),
       (6, 'PORTABLE ELECTRONICS', '{1,6}'),
       (7, 'MP3 PLAYERS', '{1,6,7}'),
       (8, 'FLASH', '{1,6,7,8}'),
       (9, 'CD PLAYERS', '{1,6,9}'),
       (10, '2 WAY RADIOS', '{1,6,10}');

Материализованный путь (Materialized Path)


SELECT * FROM materialized_category ORDER BY id;
idnamepath
1ELECTRONICS{1}
2TELEVISIONS{1,2}
3TUBE{1,2,3}
4LCD{1,2,4}
5PLASMA{1,2,5}
6PORTABLE ELECTRONICS{1,6}
7MP3 PLAYERS{1,6,7}
8FLASH{1,6,7,8}
9CD PLAYERS{1,6,9}
102 WAY RADIOS{1,6,10}

Материализованный путь (Materialized Path)

Получение всего дерева


SELECT id,
        CONCAT(REPEAT('-', (ARRAY_LENGTH(path, 1) - 1)::int), name),
        path
FROM materialized_category;
idnamepath
1ELECTRONICS{1}
2-TELEVISIONS{1,2}
3--TUBE{1,2,3}
4--LCD{1,2,4}
5--PLASMA{1,2,5}
6-PORTABLE ELECTRONICS{1,6}
7--MP3 PLAYERS{1,6,7}
8---FLASH{1,6,7,8}
9--CD PLAYERS{1,6,9}
10--2 WAY RADIOS{1,6,10}

Материализованный путь (Materialized Path)

+ алгоритм довольно уверенно себя чувствует на достаточно больших объемах данных.

+ удаление, добавление в конец или изменение узла — это операции довольно простые, и, как правило, не вызывают сложностей в данной модели.

- наиболее неприятной в данном алгоритме будет операция вставки узла в середину уже существующей структуры и перенос одной ветки в другую.

- эффективность алгоритма будет зависеть от конкретной реализации, например, от выбора типа данных для хранения пути



Полезное расширение Postgres для самостоятельного изучения - ltree

Комбинированный подход

Имеет смысл комбинировать следующие подходы:

Комбинировать же Nested Set и Materialized Path особого смысла не имеет, т.к. существенного выигрыша ни в чтении, ни в записи вы не получите.

Стрелков Никита
E-mail: nikita.strelkov@gmail.com
Спасибо за внимание!