ac自动机

Python3.8

Python3.8

Conda
Python

Python 是一种高级、解释型、通用的编程语言,以其简洁易读的语法而闻名,适用于广泛的应用,包括Web开发、数据分析、人工智能和自动化脚本

ac自动机

# -*- coding: utf-8 -*-
# @Time    : 2019/8/14 14:42
# @Author  :
# common包下的 py_ac.py

from collections import defaultdict


class TrieNode(object):
    def __init__(self, value=None):
        # 值
        self.value = value
        # fail指针
        self.fail = None
        # 尾标志:标志为i表示第i个模式串串尾,默认为0
        self.tail = 0
        # 子节点,{value:TrieNode}
        self.children = {}


class Trie(object):
    def __init__(self, words):
        # 根节点
        self.root = TrieNode()
        # 模式串个数
        self.count = 0
        self.words = words
        for word in words:
            self.insert(word)
        self.ac_automation()

    def insert(self, sequence):
        """
        基操,插入一个字符串
        :param sequence: 字符串
        :return:
        """
        self.count += 1
        cur_node = self.root
        for item in sequence:
            if item not in cur_node.children:
                # 插入结点
                child = TrieNode(value=item)
                cur_node.children[item] = child
                cur_node = child
            else:
                cur_node = cur_node.children[item]
        cur_node.tail = self.count

    def ac_automation(self):
        """
        构建失败路径
        :return:
        """
        queue = [self.root]
        # BFS遍历字典树
        while len(queue):
            temp_node = queue[0]
            # 取出队首元素
            queue.remove(temp_node)
            for value in temp_node.children.values():
                # 根的子结点fail指向根自己
                if temp_node == self.root:
                    value.fail = self.root
                else:
                    # 转到fail指针
                    p = temp_node.fail
                    while p:
                        # 若结点值在该结点的子结点中,则将fail指向该结点的对应子结点
                        if value.value in p.children:
                            value.fail = p.children[value.value]
                            break
                        # 转到fail指针继续回溯
                        p = p.fail
                    # 若为None,表示当前结点值在之前都没出现过,则其fail指向根结点
                    if not p:
                        value.fail = self.root
                # 将当前结点的所有子结点加到队列中
                queue.append(value)

    def search(self, text):
        """
        模式匹配
        :param self:
        :param text: 长文本
        :return:
        """
        p = self.root
        # 记录匹配起始位置下标
        start_index = 0
        # 成功匹配结果集
        rst = defaultdict(list)
        for i in range(len(text)):
            single_char = text[i]
            while single_char not in p.children and p is not self.root:
                p = p.fail
            # 有一点瑕疵,原因在于匹配子串的时候,若字符串中部分字符由两个匹配词组成,此时后一个词的前缀下标不会更新
            # 这是由于KMP算法本身导致的,目前与下文循环寻找所有匹配词存在冲突
            # 但是问题不大,因为其标记的位置均为匹配成功的字符
            if single_char in p.children and p is self.root:
                start_index = i
            # 若找到匹配成功的字符结点,则指向那个结点,否则指向根结点
            if single_char in p.children:
                p = p.children[single_char]
            else:
                start_index = i
                p = self.root
            temp = p
            while temp is not self.root:
                # 尾标志为0不处理,但是tail需要-1从而与敏感词字典下标一致
                # 循环原因在于,有些词本身只是另一个词的后缀,也需要辨识出来
                if temp.tail:
                    rst[self.words[temp.tail - 1]].append((start_index, i))
                temp = temp.fail
        return rst


def main():

    test_words = ["air rifle scopes"]
    test_text = """Choosing from among the dozens of air rifle scopes. A scope's job is to magnify an image
 Air rifle scopes are built to withstand the double recoil and the vibrations of the rifle. 
 Variable air rifle scopes operate differently than the fixed scope. 
 Air rifle scopes are an important addition to a shooter's air rifle and can really make a difference in the accuracy and quality of the shooting experience. Knowing what the differences are between air rifle scopes helps a shooter determine which will work best with their air rifle and their individual shooting style."""
    model = Trie(test_words)
    print(dict(model.search(test_text.lower())))


if __name__ == "__main__":
    main()

记录下标

# -*- coding: utf-8 -*-
# @Time    : 2019/7/31 15:52
# @Author  :
# common包中的 __init__.py

import os
import json


def set_json_data(filename, data):
    """将数据写入json文件中"""
    path = os.path.dirname(filename)
    if not os.path.exists(path):
        os.makedirs(path)
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(json.dumps(data))


def get_json_data(filename):
    """从json文件中获取相应的数据"""

    is_exit = os.path.exists(filename)
    if not is_exit:
        set_json_data(filename, data=0)
    with open(filename, 'r', encoding='utf-8') as f:
        response = f.read()
    return json.loads(response)


def get_id(filename):
    """获取id"""
    filename = os.path.dirname(os.path.abspath(__file__)) + '/store/' + os.path.basename(filename).replace('.py', '.json')
    last_pid = get_json_data(filename)
    return last_pid


def set_id(filename,last_pid):
    """设置id"""
    filename = os.path.dirname(os.path.abspath(__file__)) + '/store/' + os.path.basename(filename).replace('.py', '.json')
    set_json_data(filename, last_pid)


if __name__ == '__main__':
    pass

mysql封装

# -*- coding: utf-8 -*-
# @Time : 2021/12/28 16:52
# @Author : Cocktail_py
import time
from traceback import format_exc
import eventlet
import pymysql
import sys
version = sys.version_info.major
try:
    from dbutils.pooled_db import PooledDB
except:
    from DBUtils.PooledDB import PooledDB
from pymysql.converters import escape_string
class MysqlHelper(object):
    """mysql读写封装"""
    def __init__(self, host='localhost', user='root', password='123456789',
                 database='adwords', port=3306,
                 charset='utf8'):
        self.host = host
        self.user = user
        self.password = password
        self.database = database
        self.port = port
        self.charset = charset
        self.connect()
    def connect(self):
        """连接
        # 通过创建数据库连接池来创建数据库连接对象的方式解决数据库性能问题
        """
        db_config = {
        "creator": pymysql,
        "maxconnections": 1000,
        "mincached": 2,
        "maxcached": 5,
        "maxshared": 3,
        "blocking": True,
        "maxusage": None,
        "setsession": [],
        "ping": 0,
        "host": self.host,
        "port": self.port,
        "user": self.user,
        "database": self.database,
        "charset": self.charset,
        "autocommit": True
        }
        if version ==2:
            db_config["passwd"] = self.password
        else:
            db_config["password"] = self.password
        self.__pool = PooledDB(**db_config)
        self.conn = self.__pool.connection()
        self.cursor = self.conn.cursor(pymysql.cursors.DictCursor)
        return self.conn, self.cursor
    def __edit(self, sql, params=None):
        while True:
            try:
                count = 0
                with eventlet.Timeout(30, False):
                    self.conn.ping(reconnect=True)
                    self.cursor.execute(sql, params)
                    self.conn.commit()
                    count += 1
                if count != 0:
                    return True
                else:
                    continue
            except Exception as e:
                msg = format_exc()
                print(msg)
                if 'error: [Errno 32] Broken pipe' in msg:
                    self.close()
                    self.connect()
                    time.sleep(3)
                return False
    def execute(self, sql, params=None):
        """执行sql语句操作"""
        return self.__edit(sql, params=params)
    def escape(self, m_str):
        """
        mysql转义
        :param str:
        :return:
        """
        new_str = escape_string(m_str)
        return new_str
    # 查
    def get_all(self, sql, params=()):
        """获取所有相关的数据"""
        # 从连接池获取独立连接,避免并发冲突
        conn = self.__pool.connection()
        cursor = conn.cursor(pymysql.cursors.DictCursor)
        try:
            cursor.execute(sql, params)
            r = cursor.fetchall()
        except pymysql.OperationalError:
            msg = format_exc()
            print(msg)
            cursor.close()
            conn.close()
            # 重试一次
            conn = self.__pool.connection()
            cursor = conn.cursor(pymysql.cursors.DictCursor)
            try:
                cursor.execute(sql, params)
                r = cursor.fetchall()
            finally:
                cursor.close()
                conn.close()
        except Exception as e:
            msg = format_exc()
            if 'error: [Errno 32] Broken pipe' in msg:
                cursor.close()
                conn.close()
                time.sleep(3)
            r = []
            raise Exception(msg)
        finally:
            cursor.close()
            conn.close()
        return r
    def just_insert(self, table, **kwargs):
        """
        插入
        :param table:
        :param kwargs:
        :return:
        """
        table = table
        keys = ','.join(kwargs.keys())
        values = ','.join(['%s'] * len(kwargs))
        sql = 'INSERT INTO {table}({keys})values ({values})'.format(table=table, keys=keys, values=values)
        # print(sql % tuple(kwargs.values()))
        # print()
        result = self.execute(sql, tuple(kwargs.values()))
        return result
    def file_insert(self, table, **kwargs):
        table = table
        keys = ','.join(kwargs.keys())
        values = ','.join(['%r'] * len(kwargs))
        sql = 'INSERT ignore INTO {table}({keys})values ({values});'.format(table=table, keys=keys, values=values)
        with open('%s.sql' % table, 'a', encoding='utf-8') as f:
            f.write(sql % tuple(kwargs.values()) + '\n')
        print('file_insert_success')
    # 改
    def update(self, sql):
        """
        :param table:
        :param kwargs:
        :return:
        """
        result = self.execute(sql)
        print('update:', result)
    # 存在情况下进行修改
    def insert_or_update(self, table, **kwargs):
        """
        插入更新:数据值不能为整数,否则报错
        :param table: 表名
        :param kwargs:
        :return:
        """
        table = table
        keys = ','.join(kwargs.keys())
        values = ','.join(['{}'] * len(kwargs))
        sql = 'INSERT INTO {table}({keys})values ({values})ON DUPLICATE KEY UPDATE '.format(table=table, keys=keys,
                                                                                            values=values)
        update = ','.join(['{key}'.format(key=key) + '= {}' for key in kwargs])
        sql += update
        # 避免数据库因为单双引号报错的问题,做个正形判断
        values = ["'%s'" % escape_string(i) if type(i) != int else i for i in list(kwargs.values())]
        sql = sql.format(*tuple(values) * 2) + ';'
        result = self.execute(sql)
        print('mysql_insert_or_update:', result)
        return result
    def file_insert_or_update(self, table, **kwargs):
        """
        插入更新:数据值不能为整数,否则报错
        :param table: 表名
        :param kwargs:
        :return:
        """
        table = table
        keys = ','.join(kwargs.keys())
        values = ','.join(['{}'] * len(kwargs))
        sql = 'INSERT INTO {table}({keys})values ({values})ON DUPLICATE KEY UPDATE '.format(table=table, keys=keys,
                                                                                            values=values)
        update = ','.join(['{key}'.format(key=key) + '= {}' for key in kwargs])
        sql += update
        # 避免数据库因为单双引号报错的问题,做个正形判断
        values = ["'%s'" % escape_string(i) if type(i) != int else i for i in list(kwargs.values())]
        sql = sql.format(*tuple(values) * 2) + ';'
        with open('%s.sql' % table, 'a', encoding='utf-8') as f:
            f.write(sql + '\n')
        return 'Successful'
    def close(self):
        try:
            self.cursor.close()
        except:
            print(format_exc())
        try:
            self.conn.close()
        except:
            print(format_exc())
if __name__ == '__main__':
    pass


参考链接 https://blog.csdn.net/danengbinggan33/article/details/83338789

您可能感兴趣的与本文相关的镜像

Python3.8

Python3.8

Conda
Python

Python 是一种高级、解释型、通用的编程语言,以其简洁易读的语法而闻名,适用于广泛的应用,包括Web开发、数据分析、人工智能和自动化脚本

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Cocktail_py

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值