PostgreSQL高级特性:JSON、全文搜索与高级索引
引言
PostgreSQL是功能最强大的开源关系型数据库,它的许多高级特性让开发者能够在保持SQL优势的同时,处理半结构化数据、实现复杂的搜索功能。本文将深入探讨PostgreSQL的JSON数据类型、全文搜索、以及高级索引技术。
一、JSON数据类型详解
1.1 JSON vs JSONB
PostgreSQL提供两种JSON数据类型:
| 特性 | JSON | JSONB |
|---|---|---|
| 存储格式 | 文本 | 二进制 |
| 解析时机 | 每次访问时解析 | 插入时解析 |
| 索引支持 | 无 | GIN索引 |
| 查询性能 | 较慢 | 较快 |
| 保留空白 | 是 | 否 |
| 保留键顺序 | 是 | 否 |
| 重复键处理 | 全部保留 | 只保留最后一个 |
1.2 JSON操作符
-- 创建JSONB列
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(255),
attributes JSONB,
metadata JSONB DEFAULT '{}'
);
-- 插入JSONB数据
INSERT INTO products (name, attributes) VALUES (
'Laptop',
'{
"brand": "Apple",
"specs": {
"cpu": "M2",
"ram": "16GB",
"storage": "512GB"
},
"tags": ["electronics", "computer", "premium"]
}'::JSONB
);
-- 查询JSONB字段
SELECT attributes->>'brand' AS brand FROM products;
SELECT attributes->'specs'->>'cpu' AS cpu FROM products;
SELECT attributes->'tags' FROM products;
-- 使用containment操作符
SELECT * FROM products WHERE attributes @> '{"brand": "Apple"}';
SELECT * FROM products WHERE attributes @> '{"specs": {"cpu": "M2"}}';
-- 使用keys existence
SELECT * FROM products WHERE attributes ? 'brand';
SELECT * FROM products WHERE attributes ?| ARRAY['brand', 'model'];
SELECT * FROM products WHERE attributes ?& ARRAY['brand', 'model'];
1.3 Go语言JSONB操作
package postgres
import (
"context"
"database/sql"
"encoding/json"
"fmt"
)
type JSONBOperations struct {
db *sql.DB
}
func NewJSONBOperations(db *sql.DB) *JSONBOperations {
return &JSONBOperations{db: db}
}
type Product struct {
ID int64
Name string
Attributes Attributes
}
type Attributes struct {
Brand string `json:"brand"`
Specs Specs `json:"specs"`
Tags []string `json:"tags"`
Price float64 `json:"price,omitempty"`
}
type Specs struct {
CPU string `json:"cpu"`
RAM string `json:"ram"`
Storage string `json:"storage"`
}
func (j *JSONBOperations) Insert(ctx context.Context, name string, attrs Attributes) error {
attrsJSON, err := json.Marshal(attrs)
if err != nil {
return fmt.Errorf("failed to marshal attributes: %w", err)
}
query := `
INSERT INTO products (name, attributes)
VALUES ($1, $2::JSONB)
`
_, err = j.db.ExecContext(ctx, query, name, attrsJSON)
if err != nil {
return fmt.Errorf("failed to insert product: %w", err)
}
return nil
}
func (j *JSONBOperations) FindByAttribute(ctx context.Context, key, value string) ([]*Product, error) {
query := `
SELECT id, name, attributes
FROM products
WHERE attributes @> $1::JSONB
`
rows, err := j.db.QueryContext(ctx, query, fmt.Sprintf(`{"%s": "%s"}`, key, value))
if err != nil {
return nil, fmt.Errorf("failed to query products: %w", err)
}
defer rows.Close()
return j.scanProducts(rows)
}
func (j *JSONBOperations) FindByNestedAttribute(ctx context.Context, path string, value interface{}) ([]*Product, error) {
query := `
SELECT id, name, attributes
FROM products
WHERE attributes #>> $1 = $2
`
rows, err := j.db.QueryContext(ctx, query, path, fmt.Sprintf("%v", value))
if err != nil {
return nil, fmt.Errorf("failed to query products: %w", err)
}
defer rows.Close()
return j.scanProducts(rows)
}
func (j *JSONBOperations) UpdateAttribute(ctx context.Context, id int64, key string, value interface{}) error {
query := `
UPDATE products
SET attributes = jsonb_set(attributes, $1, $2)
WHERE id = $3
`
_, err := j.db.ExecContext(ctx, query, []string{key}, fmt.Sprintf(`"%v"`, value), id)
return err
}
func (j *JSONBOperations) AddToArray(ctx context.Context, id int64, arrayPath string, value interface{}) error {
query := `
UPDATE products
SET attributes = jsonb_insert(attributes, $1, $2)
WHERE id = $3
`
_, err := j.db.ExecContext(ctx, query, arrayPath, fmt.Sprintf(`"%v"`, value), id)
return err
}
func (j *JSONBOperations) GetAllWithTag(ctx context.Context, tag string) ([]*Product, error) {
query := `
SELECT id, name, attributes
FROM products
WHERE attributes->'tags' ? $1
`
rows, err := j.db.QueryContext(ctx, query, tag)
if err != nil {
return nil, fmt.Errorf("failed to query products: %w", err)
}
defer rows.Close()
return j.scanProducts(rows)
}
func (j *JSONBOperations) scanProducts(rows *sql.Rows) ([]*Product, error) {
var products []*Product
for rows.Next() {
var id int64
var name string
var attrsJSON []byte
if err := rows.Scan(&id, &name, &attrsJSON); err != nil {
return nil, fmt.Errorf("failed to scan product: %w", err)
}
var attrs Attributes
if err := json.Unmarshal(attrsJSON, &attrs); err != nil {
return nil, fmt.Errorf("failed to unmarshal attributes: %w", err)
}
products = append(products, &Product{
ID: id,
Name: name,
Attributes: attrs,
})
}
return products, nil
}
二、JSONB索引
2.1 GIN索引
-- 为JSONB字段创建GIN索引
CREATE INDEX idx_products_attributes ON products USING GIN (attributes);
-- 为JSONB中的特定路径创建索引
CREATE INDEX idx_products_brand ON products ((attributes->>'brand'));
-- 创建表达式索引
CREATE INDEX idx_products_price ON products ((COALESCE((attributes->>'price')::numeric, 0)));
2.2 索引使用示例
package postgres
import (
"context"
"database/sql"
"fmt"
)
type JSONBIndex struct {
db *sql.DB
}
func NewJSONBIndex(db *sql.DB) *JSONBIndex {
return &JSONBIndex{db: db}
}
func (ji *JSONBIndex) CreateGINIndex(ctx context.Context, table, column string) error {
query := fmt.Sprintf(`
CREATE INDEX CONCURRENTLY idx_%s_%s_gin
ON %s USING GIN (%s)
`, table, column, table, column)
_, err := ji.db.ExecContext(ctx, query)
return err
}
func (ji *JSONBIndex) CreateExpressionIndex(ctx context.Context, table, column, path, indexName string) error {
query := fmt.Sprintf(`
CREATE INDEX CONCURRENTLY idx_%s
ON %s ((%s->>%s))
`, indexName, table, column, path)
_, err := ji.db.ExecContext(ctx, query)
return err
}
func (ji *JSONBIndex) ExplainIndexUsage(ctx context.Context, query string) (string, error) {
var plan string
err := ji.db.QueryRowContext(ctx, "EXPLAIN "+query).Scan(&plan)
return plan, err
}
三、全文搜索
3.1 全文搜索配置
-- 查看全文搜索配置
SHOW default_text_search_config;
-- 创建自定义全文搜索配置
CREATE TEXT SEARCH CONFIGURATION chinese_zh (COPY = simple);
-- 添加词典和映射
ALTER TEXT SEARCH CONFIGURATION chinese_zh
ADD MAPPING FOR hword1 WITH simple;
ALTER TEXT SEARCH CONFIGURATION chinese_zh
ADD MAPPING FOR hword2 WITH simple;
ALTER TEXT SEARCH CONFIGURATION chinese_zh
ADD MAPPING FOR hword3 WITH simple;
ALTER TEXT SEARCH CONFIGURATION chinese_zh
ADD MAPPING FOR hword4 WITH simple;
ALTER TEXT SEARCH CONFIGURATION chinese_zh
ADD MAPPING FOR ideograph WITH simple;
3.2 全文搜索实现
-- 创建支持全文搜索的表
CREATE TABLE articles (
id SERIAL PRIMARY KEY,
title VARCHAR(255),
content TEXT,
title_tsv TSVECTOR,
content_tsv TSVECTOR,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- 创建自动更新TSVECTOR的触发器
CREATE FUNCTION article_search_trigger() RETURNS trigger AS $$
BEGIN
NEW.title_tsv :=
setweight(to_tsvector('simple', coalesce(NEW.title,'')), 'A') ||
setweight(to_tsvector('simple', coalesce(NEW.content,'')), 'B');
RETURN NEW;
END
$$ LANGUAGE plpgsql;
CREATE TRIGGER article_search_update
BEFORE INSERT OR UPDATE ON articles
FOR EACH ROW EXECUTE FUNCTION article_search_trigger();
-- 插入测试数据
INSERT INTO articles (title, content) VALUES
('PostgreSQL Advanced Features', 'PostgreSQL is a powerful open source object-relational database system.'),
('Full Text Search in PostgreSQL', 'Learn how to implement full text search using PostgreSQL.'),
('JSONB Data Type', 'PostgreSQL supports JSON data type with advanced indexing capabilities.');
3.3 Go语言全文搜索实现
package postgres
import (
"context"
"database/sql"
"fmt"
"strings"
)
type FullTextSearch struct {
db *sql.DB
}
func NewFullTextSearch(db *sql.DB) *FullTextSearch {
return &FullTextSearch{db: db}
}
type Article struct {
ID int64
Title string
Content string
Rank float64
}
func (fts *FullTextSearch) Search(ctx context.Context, query string, limit int) ([]*Article, error) {
searchQuery := strings.Join(strings.Fields(query), " & ")
sqlQuery := `
SELECT id, title, content, ts_rank(title_tsv || content_tsv, to_tsquery('simple', $1)) as rank
FROM articles
WHERE to_tsquery('simple', $1) @@ (title_tsv || content_tsv)
ORDER BY rank DESC
LIMIT $2
`
rows, err := fts.db.QueryContext(ctx, sqlQuery, searchQuery, limit)
if err != nil {
return nil, fmt.Errorf("failed to search articles: %w", err)
}
defer rows.Close()
var articles []*Article
for rows.Next() {
article := &Article{}
if err := rows.Scan(&article.ID, &article.Title, &article.Content, &article.Rank); err != nil {
return nil, fmt.Errorf("failed to scan article: %w", err)
}
articles = append(articles, article)
}
return articles, nil
}
func (fts *FullTextSearch) SearchWithHighlight(ctx context.Context, query string, limit int) ([]*Article, error) {
searchQuery := strings.Join(strings.Fields(query), " & ")
sqlQuery := `
SELECT
id,
title,
content,
ts_rank(title_tsv || content_tsv, to_tsquery('simple', $1)) as rank,
ts_headline('simple', title, to_tsquery('simple', $1)) as title_highlight,
ts_headline('simple', content, to_tsquery('simple', $1)) as content_highlight
FROM articles
WHERE to_tsquery('simple', $1) @@ (title_tsv || content_tsv)
ORDER BY rank DESC
LIMIT $2
`
rows, err := fts.db.QueryContext(ctx, sqlQuery, searchQuery, limit)
if err != nil {
return nil, fmt.Errorf("failed to search articles: %w", err)
}
defer rows.Close()
var articles []*Article
for rows.Next() {
article := &Article{}
var titleHighlight, contentHighlight string
if err := rows.Scan(
&article.ID,
&article.Title,
&article.Content,
&article.Rank,
&titleHighlight,
&contentHighlight,
); err != nil {
return nil, fmt.Errorf("failed to scan article: %w", err)
}
articles = append(articles, article)
}
return articles, nil
}
func (fts *FullTextSearch) Suggest(ctx context.Context, prefix string, limit int) ([]string, error) {
suggestQuery := strings.Join(strings.Fields(prefix), " & ")
sqlQuery := `
SELECT DISTINCT title
FROM articles
WHERE to_tsquery('simple', $1) @@ title_tsv
ORDER BY ts_rank(title_tsv, to_tsquery('simple', $1)) DESC
LIMIT $2
`
rows, err := fts.db.QueryContext(ctx, sqlQuery, suggestQuery, limit)
if err != nil {
return nil, fmt.Errorf("failed to get suggestions: %w", err)
}
defer rows.Close()
var suggestions []string
for rows.Next() {
var title string
if err := rows.Scan(&title); err != nil {
return nil, fmt.Errorf("failed to scan suggestion: %w", err)
}
suggestions = append(suggestions, title)
}
return suggestions, nil
}
四、高级索引类型
4.1 部分索引
-- 只对活跃用户创建索引
CREATE INDEX idx_active_users_email ON users (email)
WHERE status = 'active';
-- 只对未删除的订单创建索引
CREATE INDEX idx_pending_orders_user ON orders (user_id)
WHERE status = 'pending' AND deleted_at IS NULL;
4.2 表达式索引
-- 对email的小写形式创建索引
CREATE INDEX idx_users_email_lower ON users (lower(email));
-- 对日期创建索引
CREATE INDEX idx_orders_date ON orders (DATE(created_at));
-- 对计算表达式创建索引
CREATE INDEX idx_users_email_domain ON users ((split_part(email, '@', 2)));
4.3 复合索引设计
-- 复合索引,遵循最左前缀原则
CREATE INDEX idx_orders_user_status ON orders (user_id, status, created_at DESC);
-- 支持以下查询使用索引
SELECT * FROM orders WHERE user_id = 1;
SELECT * FROM orders WHERE user_id = 1 AND status = 'pending';
SELECT * FROM orders WHERE user_id = 1 AND status = 'pending' AND created_at > '2024-01-01';
4.4 Go语言创建索引
package postgres
import (
"context"
"database/sql"
"fmt"
)
type IndexManager struct {
db *sql.DB
}
func NewIndexManager(db *sql.DB) *IndexManager {
return &IndexManager{db: db}
}
type IndexDefinition struct {
Name string
Table string
Columns []string
IndexType string
Unique bool
Partial string
Expression string
}
func (im *IndexManager) CreateIndex(ctx context.Context, idx *IndexDefinition) error {
query := "CREATE "
if idx.Unique {
query += "UNIQUE "
}
if idx.IndexType != "" {
query += idx.IndexType + " "
}
query += "INDEX "
if idx.Partial != "" {
query += "INDEX IF NOT EXISTS "
}
query += fmt.Sprintf("%s ON %s ", idx.Name, idx.Table)
if len(idx.Columns) > 0 {
query += "(" + strings.Join(idx.Columns, ", ") + ")"
} else if idx.Expression != "" {
query += "(" + idx.Expression + ")"
}
if idx.Partial != "" {
query += " WHERE " + idx.Partial
}
_, err := im.db.ExecContext(ctx, query)
return err
}
func (im *IndexManager) CreatePartialIndex(ctx context.Context, name, table, where string, columns ...string) error {
query := fmt.Sprintf(`
CREATE INDEX IF NOT EXISTS %s ON %s (%s) WHERE %s
`, name, table, strings.Join(columns, ", "), where)
_, err := im.db.ExecContext(ctx, query)
return err
}
func (im *IndexManager) ListIndexes(ctx context.Context, table string) ([]string, error) {
query := `
SELECT indexname
FROM pg_indexes
WHERE tablename = $1
AND schemaname = 'public'
`
rows, err := im.db.QueryContext(ctx, query, table)
if err != nil {
return nil, fmt.Errorf("failed to list indexes: %w", err)
}
defer rows.Close()
var indexes []string
for rows.Next() {
var name string
if err := rows.Scan(&name); err != nil {
return nil, err
}
indexes = append(indexes, name)
}
return indexes, nil
}
func (im *IndexManager) CheckIndexUsage(ctx context.Context, indexName string) (int64, error) {
query := `
SELECT COALESCE(SUM(idx_scan), 0)
FROM pg_stat_user_indexes
WHERE indexrelname = $1
`
var scans int64
err := im.db.QueryRowContext(ctx, query, indexName).Scan(&scans)
return scans, err
}
import "strings"
五、分区表
5.1 范围分区
-- 创建分区表
CREATE TABLE orders (
id SERIAL,
user_id INT NOT NULL,
total DECIMAL(10, 2) NOT NULL,
status VARCHAR(20),
created_at DATE NOT NULL
) PARTITION BY RANGE (created_at);
-- 创建月度分区
CREATE TABLE orders_2024_01 PARTITION OF orders
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
CREATE TABLE orders_2024_02 PARTITION OF orders
FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');
CREATE TABLE orders_2024_03 PARTITION OF orders
FOR VALUES FROM ('2024-03-01') TO ('2024-04-01');
5.2 列表分区
-- 按地区分区
CREATE TABLE customers (
id SERIAL,
name VARCHAR(100),
region VARCHAR(50)
) PARTITION BY LIST (region);
CREATE TABLE customers_north PARTITION OF customers
FOR VALUES IN ('North', 'Northeast');
CREATE TABLE customers_south PARTITION OF customers
FOR VALUES IN ('South', 'Southeast');
CREATE TABLE customers_west PARTITION OF customers
FOR VALUES IN ('West', 'Southwest');
5.3 Go语言分区管理
package postgres
import (
"context"
"fmt"
"time"
)
type PartitionManager struct {
db *sql.DB
}
func NewPartitionManager(db *sql.DB) *PartitionManager {
return &PartitionManager{db: db}
}
func (pm *PartitionManager) CreateMonthlyPartition(ctx context.Context, table string, year, month int) error {
startDate := time.Date(year, time.Month(month), 1, 0, 0, 0, 0, time.UTC)
endDate := startDate.AddDate(0, 1, 0)
partitionName := fmt.Sprintf("%s_%d_%02d", table, year, month)
query := fmt.Sprintf(`
CREATE TABLE IF NOT EXISTS %s PARTITION OF %s
FOR VALUES FROM ('%s') TO ('%s')
`, partitionName, table, startDate.Format("2006-01-02"), endDate.Format("2006-01-02"))
_, err := pm.db.ExecContext(ctx, query)
return err
}
func (pm *PartitionManager) EnsureCurrentPartitions(ctx context.Context, table string, monthsAhead int) error {
now := time.Now()
for i := -1; i <= monthsAhead; i++ {
date := now.AddDate(0, i, 0)
if err := pm.CreateMonthlyPartition(ctx, table, date.Year(), int(date.Month())); err != nil {
return err
}
}
return nil
}
func (pm *PartitionManager) ListPartitions(ctx context.Context, table string) ([]string, error) {
query := `
SELECT child.relname::text
FROM pg_inherits
JOIN pg_class parent ON pg_inherits.inhparent = parent.oid
JOIN pg_class child ON pg_inherits.inhrelid = child.oid
WHERE parent.relname = $1
`
rows, err := pm.db.QueryContext(ctx, query, table)
if err != nil {
return nil, fmt.Errorf("failed to list partitions: %w", err)
}
defer rows.Close()
var partitions []string
for rows.Next() {
var name string
if err := rows.Scan(&name); err != nil {
return nil, err
}
partitions = append(partitions, name)
}
return partitions, nil
}
六、总结
PostgreSQL的高级特性让它成为处理复杂数据场景的利器:
- JSONB类型:适合存储半结构化数据,支持强大的查询能力
- 全文搜索:无需外部搜索引擎即可实现高效搜索
- GIN索引:为JSONB和全文搜索提供高效索引支持
- 部分索引:只为特定行创建索引,节省空间提高性能
- 表达式索引:支持在索引中使用函数和表达式
- 分区表:将大表拆分,提高查询性能便于管理
掌握这些高级特性,能够帮助您在实际项目中更好地利用PostgreSQL的强大能力。

1313

被折叠的 条评论
为什么被折叠?



