PostgreSQL高级特性:JSON、全文搜索与高级索引

PostgreSQL高级特性:JSON、全文搜索与高级索引

引言

PostgreSQL是功能最强大的开源关系型数据库,它的许多高级特性让开发者能够在保持SQL优势的同时,处理半结构化数据、实现复杂的搜索功能。本文将深入探讨PostgreSQL的JSON数据类型、全文搜索、以及高级索引技术。

一、JSON数据类型详解

1.1 JSON vs JSONB

PostgreSQL提供两种JSON数据类型:

特性JSONJSONB
存储格式文本二进制
解析时机每次访问时解析插入时解析
索引支持GIN索引
查询性能较慢较快
保留空白
保留键顺序
重复键处理全部保留只保留最后一个

1.2 JSON操作符

-- 创建JSONB列
CREATE TABLE products (
    id SERIAL PRIMARY KEY,
    name VARCHAR(255),
    attributes JSONB,
    metadata JSONB DEFAULT '{}'
);

-- 插入JSONB数据
INSERT INTO products (name, attributes) VALUES (
    'Laptop',
    '{
        "brand": "Apple",
        "specs": {
            "cpu": "M2",
            "ram": "16GB",
            "storage": "512GB"
        },
        "tags": ["electronics", "computer", "premium"]
    }'::JSONB
);

-- 查询JSONB字段
SELECT attributes->>'brand' AS brand FROM products;
SELECT attributes->'specs'->>'cpu' AS cpu FROM products;
SELECT attributes->'tags' FROM products;

-- 使用containment操作符
SELECT * FROM products WHERE attributes @> '{"brand": "Apple"}';
SELECT * FROM products WHERE attributes @> '{"specs": {"cpu": "M2"}}';

-- 使用keys existence
SELECT * FROM products WHERE attributes ? 'brand';
SELECT * FROM products WHERE attributes ?| ARRAY['brand', 'model'];
SELECT * FROM products WHERE attributes ?& ARRAY['brand', 'model'];

1.3 Go语言JSONB操作

package postgres

import (
    "context"
    "database/sql"
    "encoding/json"
    "fmt"
)

type JSONBOperations struct {
    db *sql.DB
}

func NewJSONBOperations(db *sql.DB) *JSONBOperations {
    return &JSONBOperations{db: db}
}

type Product struct {
    ID         int64
    Name       string
    Attributes Attributes
}

type Attributes struct {
    Brand   string       `json:"brand"`
    Specs   Specs        `json:"specs"`
    Tags    []string     `json:"tags"`
    Price   float64      `json:"price,omitempty"`
}

type Specs struct {
    CPU     string `json:"cpu"`
    RAM     string `json:"ram"`
    Storage string `json:"storage"`
}

func (j *JSONBOperations) Insert(ctx context.Context, name string, attrs Attributes) error {
    attrsJSON, err := json.Marshal(attrs)
    if err != nil {
        return fmt.Errorf("failed to marshal attributes: %w", err)
    }

    query := `
        INSERT INTO products (name, attributes)
        VALUES ($1, $2::JSONB)
    `

    _, err = j.db.ExecContext(ctx, query, name, attrsJSON)
    if err != nil {
        return fmt.Errorf("failed to insert product: %w", err)
    }

    return nil
}

func (j *JSONBOperations) FindByAttribute(ctx context.Context, key, value string) ([]*Product, error) {
    query := `
        SELECT id, name, attributes
        FROM products
        WHERE attributes @> $1::JSONB
    `

    rows, err := j.db.QueryContext(ctx, query, fmt.Sprintf(`{"%s": "%s"}`, key, value))
    if err != nil {
        return nil, fmt.Errorf("failed to query products: %w", err)
    }
    defer rows.Close()

    return j.scanProducts(rows)
}

func (j *JSONBOperations) FindByNestedAttribute(ctx context.Context, path string, value interface{}) ([]*Product, error) {
    query := `
        SELECT id, name, attributes
        FROM products
        WHERE attributes #>> $1 = $2
    `

    rows, err := j.db.QueryContext(ctx, query, path, fmt.Sprintf("%v", value))
    if err != nil {
        return nil, fmt.Errorf("failed to query products: %w", err)
    }
    defer rows.Close()

    return j.scanProducts(rows)
}

func (j *JSONBOperations) UpdateAttribute(ctx context.Context, id int64, key string, value interface{}) error {
    query := `
        UPDATE products
        SET attributes = jsonb_set(attributes, $1, $2)
        WHERE id = $3
    `

    _, err := j.db.ExecContext(ctx, query, []string{key}, fmt.Sprintf(`"%v"`, value), id)
    return err
}

func (j *JSONBOperations) AddToArray(ctx context.Context, id int64, arrayPath string, value interface{}) error {
    query := `
        UPDATE products
        SET attributes = jsonb_insert(attributes, $1, $2)
        WHERE id = $3
    `

    _, err := j.db.ExecContext(ctx, query, arrayPath, fmt.Sprintf(`"%v"`, value), id)
    return err
}

func (j *JSONBOperations) GetAllWithTag(ctx context.Context, tag string) ([]*Product, error) {
    query := `
        SELECT id, name, attributes
        FROM products
        WHERE attributes->'tags' ? $1
    `

    rows, err := j.db.QueryContext(ctx, query, tag)
    if err != nil {
        return nil, fmt.Errorf("failed to query products: %w", err)
    }
    defer rows.Close()

    return j.scanProducts(rows)
}

func (j *JSONBOperations) scanProducts(rows *sql.Rows) ([]*Product, error) {
    var products []*Product

    for rows.Next() {
        var id int64
        var name string
        var attrsJSON []byte

        if err := rows.Scan(&id, &name, &attrsJSON); err != nil {
            return nil, fmt.Errorf("failed to scan product: %w", err)
        }

        var attrs Attributes
        if err := json.Unmarshal(attrsJSON, &attrs); err != nil {
            return nil, fmt.Errorf("failed to unmarshal attributes: %w", err)
        }

        products = append(products, &Product{
            ID:         id,
            Name:       name,
            Attributes: attrs,
        })
    }

    return products, nil
}

二、JSONB索引

2.1 GIN索引

-- 为JSONB字段创建GIN索引
CREATE INDEX idx_products_attributes ON products USING GIN (attributes);

-- 为JSONB中的特定路径创建索引
CREATE INDEX idx_products_brand ON products ((attributes->>'brand'));

-- 创建表达式索引
CREATE INDEX idx_products_price ON products ((COALESCE((attributes->>'price')::numeric, 0)));

2.2 索引使用示例

package postgres

import (
    "context"
    "database/sql"
    "fmt"
)

type JSONBIndex struct {
    db *sql.DB
}

func NewJSONBIndex(db *sql.DB) *JSONBIndex {
    return &JSONBIndex{db: db}
}

func (ji *JSONBIndex) CreateGINIndex(ctx context.Context, table, column string) error {
    query := fmt.Sprintf(`
        CREATE INDEX CONCURRENTLY idx_%s_%s_gin
        ON %s USING GIN (%s)
    `, table, column, table, column)

    _, err := ji.db.ExecContext(ctx, query)
    return err
}

func (ji *JSONBIndex) CreateExpressionIndex(ctx context.Context, table, column, path, indexName string) error {
    query := fmt.Sprintf(`
        CREATE INDEX CONCURRENTLY idx_%s
        ON %s ((%s->>%s))
    `, indexName, table, column, path)

    _, err := ji.db.ExecContext(ctx, query)
    return err
}

func (ji *JSONBIndex) ExplainIndexUsage(ctx context.Context, query string) (string, error) {
    var plan string
    err := ji.db.QueryRowContext(ctx, "EXPLAIN "+query).Scan(&plan)
    return plan, err
}

三、全文搜索

3.1 全文搜索配置

-- 查看全文搜索配置
SHOW default_text_search_config;

-- 创建自定义全文搜索配置
CREATE TEXT SEARCH CONFIGURATION chinese_zh (COPY = simple);

-- 添加词典和映射
ALTER TEXT SEARCH CONFIGURATION chinese_zh
    ADD MAPPING FOR hword1 WITH simple;

ALTER TEXT SEARCH CONFIGURATION chinese_zh
    ADD MAPPING FOR hword2 WITH simple;

ALTER TEXT SEARCH CONFIGURATION chinese_zh
    ADD MAPPING FOR hword3 WITH simple;

ALTER TEXT SEARCH CONFIGURATION chinese_zh
    ADD MAPPING FOR hword4 WITH simple;

ALTER TEXT SEARCH CONFIGURATION chinese_zh
    ADD MAPPING FOR ideograph WITH simple;

3.2 全文搜索实现

-- 创建支持全文搜索的表
CREATE TABLE articles (
    id SERIAL PRIMARY KEY,
    title VARCHAR(255),
    content TEXT,
    title_tsv TSVECTOR,
    content_tsv TSVECTOR,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- 创建自动更新TSVECTOR的触发器
CREATE FUNCTION article_search_trigger() RETURNS trigger AS $$
BEGIN
    NEW.title_tsv :=
        setweight(to_tsvector('simple', coalesce(NEW.title,'')), 'A') ||
        setweight(to_tsvector('simple', coalesce(NEW.content,'')), 'B');
    RETURN NEW;
END
$$ LANGUAGE plpgsql;

CREATE TRIGGER article_search_update
    BEFORE INSERT OR UPDATE ON articles
    FOR EACH ROW EXECUTE FUNCTION article_search_trigger();

-- 插入测试数据
INSERT INTO articles (title, content) VALUES
    ('PostgreSQL Advanced Features', 'PostgreSQL is a powerful open source object-relational database system.'),
    ('Full Text Search in PostgreSQL', 'Learn how to implement full text search using PostgreSQL.'),
    ('JSONB Data Type', 'PostgreSQL supports JSON data type with advanced indexing capabilities.');

3.3 Go语言全文搜索实现

package postgres

import (
    "context"
    "database/sql"
    "fmt"
    "strings"
)

type FullTextSearch struct {
    db *sql.DB
}

func NewFullTextSearch(db *sql.DB) *FullTextSearch {
    return &FullTextSearch{db: db}
}

type Article struct {
    ID        int64
    Title     string
    Content   string
    Rank      float64
}

func (fts *FullTextSearch) Search(ctx context.Context, query string, limit int) ([]*Article, error) {
    searchQuery := strings.Join(strings.Fields(query), " & ")

    sqlQuery := `
        SELECT id, title, content, ts_rank(title_tsv || content_tsv, to_tsquery('simple', $1)) as rank
        FROM articles
        WHERE to_tsquery('simple', $1) @@ (title_tsv || content_tsv)
        ORDER BY rank DESC
        LIMIT $2
    `

    rows, err := fts.db.QueryContext(ctx, sqlQuery, searchQuery, limit)
    if err != nil {
        return nil, fmt.Errorf("failed to search articles: %w", err)
    }
    defer rows.Close()

    var articles []*Article
    for rows.Next() {
        article := &Article{}
        if err := rows.Scan(&article.ID, &article.Title, &article.Content, &article.Rank); err != nil {
            return nil, fmt.Errorf("failed to scan article: %w", err)
        }
        articles = append(articles, article)
    }

    return articles, nil
}

func (fts *FullTextSearch) SearchWithHighlight(ctx context.Context, query string, limit int) ([]*Article, error) {
    searchQuery := strings.Join(strings.Fields(query), " & ")

    sqlQuery := `
        SELECT
            id,
            title,
            content,
            ts_rank(title_tsv || content_tsv, to_tsquery('simple', $1)) as rank,
            ts_headline('simple', title, to_tsquery('simple', $1)) as title_highlight,
            ts_headline('simple', content, to_tsquery('simple', $1)) as content_highlight
        FROM articles
        WHERE to_tsquery('simple', $1) @@ (title_tsv || content_tsv)
        ORDER BY rank DESC
        LIMIT $2
    `

    rows, err := fts.db.QueryContext(ctx, sqlQuery, searchQuery, limit)
    if err != nil {
        return nil, fmt.Errorf("failed to search articles: %w", err)
    }
    defer rows.Close()

    var articles []*Article
    for rows.Next() {
        article := &Article{}
        var titleHighlight, contentHighlight string
        if err := rows.Scan(
            &article.ID,
            &article.Title,
            &article.Content,
            &article.Rank,
            &titleHighlight,
            &contentHighlight,
        ); err != nil {
            return nil, fmt.Errorf("failed to scan article: %w", err)
        }
        articles = append(articles, article)
    }

    return articles, nil
}

func (fts *FullTextSearch) Suggest(ctx context.Context, prefix string, limit int) ([]string, error) {
    suggestQuery := strings.Join(strings.Fields(prefix), " & ")

    sqlQuery := `
        SELECT DISTINCT title
        FROM articles
        WHERE to_tsquery('simple', $1) @@ title_tsv
        ORDER BY ts_rank(title_tsv, to_tsquery('simple', $1)) DESC
        LIMIT $2
    `

    rows, err := fts.db.QueryContext(ctx, sqlQuery, suggestQuery, limit)
    if err != nil {
        return nil, fmt.Errorf("failed to get suggestions: %w", err)
    }
    defer rows.Close()

    var suggestions []string
    for rows.Next() {
        var title string
        if err := rows.Scan(&title); err != nil {
            return nil, fmt.Errorf("failed to scan suggestion: %w", err)
        }
        suggestions = append(suggestions, title)
    }

    return suggestions, nil
}

四、高级索引类型

4.1 部分索引

-- 只对活跃用户创建索引
CREATE INDEX idx_active_users_email ON users (email)
WHERE status = 'active';

-- 只对未删除的订单创建索引
CREATE INDEX idx_pending_orders_user ON orders (user_id)
WHERE status = 'pending' AND deleted_at IS NULL;

4.2 表达式索引

-- 对email的小写形式创建索引
CREATE INDEX idx_users_email_lower ON users (lower(email));

-- 对日期创建索引
CREATE INDEX idx_orders_date ON orders (DATE(created_at));

-- 对计算表达式创建索引
CREATE INDEX idx_users_email_domain ON users ((split_part(email, '@', 2)));

4.3 复合索引设计

-- 复合索引,遵循最左前缀原则
CREATE INDEX idx_orders_user_status ON orders (user_id, status, created_at DESC);

-- 支持以下查询使用索引
SELECT * FROM orders WHERE user_id = 1;
SELECT * FROM orders WHERE user_id = 1 AND status = 'pending';
SELECT * FROM orders WHERE user_id = 1 AND status = 'pending' AND created_at > '2024-01-01';

4.4 Go语言创建索引

package postgres

import (
    "context"
    "database/sql"
    "fmt"
)

type IndexManager struct {
    db *sql.DB
}

func NewIndexManager(db *sql.DB) *IndexManager {
    return &IndexManager{db: db}
}

type IndexDefinition struct {
    Name       string
    Table      string
    Columns    []string
    IndexType  string
    Unique     bool
    Partial    string
    Expression string
}

func (im *IndexManager) CreateIndex(ctx context.Context, idx *IndexDefinition) error {
    query := "CREATE "

    if idx.Unique {
        query += "UNIQUE "
    }

    if idx.IndexType != "" {
        query += idx.IndexType + " "
    }

    query += "INDEX "

    if idx.Partial != "" {
        query += "INDEX IF NOT EXISTS "
    }

    query += fmt.Sprintf("%s ON %s ", idx.Name, idx.Table)

    if len(idx.Columns) > 0 {
        query += "(" + strings.Join(idx.Columns, ", ") + ")"
    } else if idx.Expression != "" {
        query += "(" + idx.Expression + ")"
    }

    if idx.Partial != "" {
        query += " WHERE " + idx.Partial
    }

    _, err := im.db.ExecContext(ctx, query)
    return err
}

func (im *IndexManager) CreatePartialIndex(ctx context.Context, name, table, where string, columns ...string) error {
    query := fmt.Sprintf(`
        CREATE INDEX IF NOT EXISTS %s ON %s (%s) WHERE %s
    `, name, table, strings.Join(columns, ", "), where)

    _, err := im.db.ExecContext(ctx, query)
    return err
}

func (im *IndexManager) ListIndexes(ctx context.Context, table string) ([]string, error) {
    query := `
        SELECT indexname
        FROM pg_indexes
        WHERE tablename = $1
        AND schemaname = 'public'
    `

    rows, err := im.db.QueryContext(ctx, query, table)
    if err != nil {
        return nil, fmt.Errorf("failed to list indexes: %w", err)
    }
    defer rows.Close()

    var indexes []string
    for rows.Next() {
        var name string
        if err := rows.Scan(&name); err != nil {
            return nil, err
        }
        indexes = append(indexes, name)
    }

    return indexes, nil
}

func (im *IndexManager) CheckIndexUsage(ctx context.Context, indexName string) (int64, error) {
    query := `
        SELECT COALESCE(SUM(idx_scan), 0)
        FROM pg_stat_user_indexes
        WHERE indexrelname = $1
    `

    var scans int64
    err := im.db.QueryRowContext(ctx, query, indexName).Scan(&scans)
    return scans, err
}

import "strings"

五、分区表

5.1 范围分区

-- 创建分区表
CREATE TABLE orders (
    id SERIAL,
    user_id INT NOT NULL,
    total DECIMAL(10, 2) NOT NULL,
    status VARCHAR(20),
    created_at DATE NOT NULL
) PARTITION BY RANGE (created_at);

-- 创建月度分区
CREATE TABLE orders_2024_01 PARTITION OF orders
    FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');

CREATE TABLE orders_2024_02 PARTITION OF orders
    FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');

CREATE TABLE orders_2024_03 PARTITION OF orders
    FOR VALUES FROM ('2024-03-01') TO ('2024-04-01');

5.2 列表分区

-- 按地区分区
CREATE TABLE customers (
    id SERIAL,
    name VARCHAR(100),
    region VARCHAR(50)
) PARTITION BY LIST (region);

CREATE TABLE customers_north PARTITION OF customers
    FOR VALUES IN ('North', 'Northeast');

CREATE TABLE customers_south PARTITION OF customers
    FOR VALUES IN ('South', 'Southeast');

CREATE TABLE customers_west PARTITION OF customers
    FOR VALUES IN ('West', 'Southwest');

5.3 Go语言分区管理

package postgres

import (
    "context"
    "fmt"
    "time"
)

type PartitionManager struct {
    db *sql.DB
}

func NewPartitionManager(db *sql.DB) *PartitionManager {
    return &PartitionManager{db: db}
}

func (pm *PartitionManager) CreateMonthlyPartition(ctx context.Context, table string, year, month int) error {
    startDate := time.Date(year, time.Month(month), 1, 0, 0, 0, 0, time.UTC)
    endDate := startDate.AddDate(0, 1, 0)

    partitionName := fmt.Sprintf("%s_%d_%02d", table, year, month)

    query := fmt.Sprintf(`
        CREATE TABLE IF NOT EXISTS %s PARTITION OF %s
        FOR VALUES FROM ('%s') TO ('%s')
    `, partitionName, table, startDate.Format("2006-01-02"), endDate.Format("2006-01-02"))

    _, err := pm.db.ExecContext(ctx, query)
    return err
}

func (pm *PartitionManager) EnsureCurrentPartitions(ctx context.Context, table string, monthsAhead int) error {
    now := time.Now()

    for i := -1; i <= monthsAhead; i++ {
        date := now.AddDate(0, i, 0)
        if err := pm.CreateMonthlyPartition(ctx, table, date.Year(), int(date.Month())); err != nil {
            return err
        }
    }

    return nil
}

func (pm *PartitionManager) ListPartitions(ctx context.Context, table string) ([]string, error) {
    query := `
        SELECT child.relname::text
        FROM pg_inherits
        JOIN pg_class parent ON pg_inherits.inhparent = parent.oid
        JOIN pg_class child ON pg_inherits.inhrelid = child.oid
        WHERE parent.relname = $1
    `

    rows, err := pm.db.QueryContext(ctx, query, table)
    if err != nil {
        return nil, fmt.Errorf("failed to list partitions: %w", err)
    }
    defer rows.Close()

    var partitions []string
    for rows.Next() {
        var name string
        if err := rows.Scan(&name); err != nil {
            return nil, err
        }
        partitions = append(partitions, name)
    }

    return partitions, nil
}

六、总结

PostgreSQL的高级特性让它成为处理复杂数据场景的利器:

  1. JSONB类型:适合存储半结构化数据,支持强大的查询能力
  2. 全文搜索:无需外部搜索引擎即可实现高效搜索
  3. GIN索引:为JSONB和全文搜索提供高效索引支持
  4. 部分索引:只为特定行创建索引,节省空间提高性能
  5. 表达式索引:支持在索引中使用函数和表达式
  6. 分区表:将大表拆分,提高查询性能便于管理

掌握这些高级特性,能够帮助您在实际项目中更好地利用PostgreSQL的强大能力。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值