ac自动机
# -*- coding: utf-8 -*-
# @Time : 2019/8/14 14:42
# @Author :
# common包下的 py_ac.py
from collections import defaultdict
class TrieNode(object):
def __init__(self, value=None):
# 值
self.value = value
# fail指针
self.fail = None
# 尾标志:标志为i表示第i个模式串串尾,默认为0
self.tail = 0
# 子节点,{value:TrieNode}
self.children = {}
class Trie(object):
def __init__(self, words):
# 根节点
self.root = TrieNode()
# 模式串个数
self.count = 0
self.words = words
for word in words:
self.insert(word)
self.ac_automation()
def insert(self, sequence):
"""
基操,插入一个字符串
:param sequence: 字符串
:return:
"""
self.count += 1
cur_node = self.root
for item in sequence:
if item not in cur_node.children:
# 插入结点
child = TrieNode(value=item)
cur_node.children[item] = child
cur_node = child
else:
cur_node = cur_node.children[item]
cur_node.tail = self.count
def ac_automation(self):
"""
构建失败路径
:return:
"""
queue = [self.root]
# BFS遍历字典树
while len(queue):
temp_node = queue[0]
# 取出队首元素
queue.remove(temp_node)
for value in temp_node.children.values():
# 根的子结点fail指向根自己
if temp_node == self.root:
value.fail = self.root
else:
# 转到fail指针
p = temp_node.fail
while p:
# 若结点值在该结点的子结点中,则将fail指向该结点的对应子结点
if value.value in p.children:
value.fail = p.children[value.value]
break
# 转到fail指针继续回溯
p = p.fail
# 若为None,表示当前结点值在之前都没出现过,则其fail指向根结点
if not p:
value.fail = self.root
# 将当前结点的所有子结点加到队列中
queue.append(value)
def search(self, text):
"""
模式匹配
:param self:
:param text: 长文本
:return:
"""
p = self.root
# 记录匹配起始位置下标
start_index = 0
# 成功匹配结果集
rst = defaultdict(list)
for i in range(len(text)):
single_char = text[i]
while single_char not in p.children and p is not self.root:
p = p.fail
# 有一点瑕疵,原因在于匹配子串的时候,若字符串中部分字符由两个匹配词组成,此时后一个词的前缀下标不会更新
# 这是由于KMP算法本身导致的,目前与下文循环寻找所有匹配词存在冲突
# 但是问题不大,因为其标记的位置均为匹配成功的字符
if single_char in p.children and p is self.root:
start_index = i
# 若找到匹配成功的字符结点,则指向那个结点,否则指向根结点
if single_char in p.children:
p = p.children[single_char]
else:
start_index = i
p = self.root
temp = p
while temp is not self.root:
# 尾标志为0不处理,但是tail需要-1从而与敏感词字典下标一致
# 循环原因在于,有些词本身只是另一个词的后缀,也需要辨识出来
if temp.tail:
rst[self.words[temp.tail - 1]].append((start_index, i))
temp = temp.fail
return rst
def main():
test_words = ["air rifle scopes"]
test_text = """Choosing from among the dozens of air rifle scopes. A scope's job is to magnify an image
Air rifle scopes are built to withstand the double recoil and the vibrations of the rifle.
Variable air rifle scopes operate differently than the fixed scope.
Air rifle scopes are an important addition to a shooter's air rifle and can really make a difference in the accuracy and quality of the shooting experience. Knowing what the differences are between air rifle scopes helps a shooter determine which will work best with their air rifle and their individual shooting style."""
model = Trie(test_words)
print(dict(model.search(test_text.lower())))
if __name__ == "__main__":
main()
记录下标
# -*- coding: utf-8 -*-
# @Time : 2019/7/31 15:52
# @Author :
# common包中的 __init__.py
import os
import json
def set_json_data(filename, data):
"""将数据写入json文件中"""
path = os.path.dirname(filename)
if not os.path.exists(path):
os.makedirs(path)
with open(filename, 'w', encoding='utf-8') as f:
f.write(json.dumps(data))
def get_json_data(filename):
"""从json文件中获取相应的数据"""
is_exit = os.path.exists(filename)
if not is_exit:
set_json_data(filename, data=0)
with open(filename, 'r', encoding='utf-8') as f:
response = f.read()
return json.loads(response)
def get_id(filename):
"""获取id"""
filename = os.path.dirname(os.path.abspath(__file__)) + '/store/' + os.path.basename(filename).replace('.py', '.json')
last_pid = get_json_data(filename)
return last_pid
def set_id(filename,last_pid):
"""设置id"""
filename = os.path.dirname(os.path.abspath(__file__)) + '/store/' + os.path.basename(filename).replace('.py', '.json')
set_json_data(filename, last_pid)
if __name__ == '__main__':
pass
mysql封装
# -*- coding: utf-8 -*-
# @Time : 2021/12/28 16:52
# @Author : Cocktail_py
import time
from traceback import format_exc
import eventlet
import pymysql
import sys
version = sys.version_info.major
try:
from dbutils.pooled_db import PooledDB
except:
from DBUtils.PooledDB import PooledDB
from pymysql.converters import escape_string
class MysqlHelper(object):
"""mysql读写封装"""
def __init__(self, host='localhost', user='root', password='123456789',
database='adwords', port=3306,
charset='utf8'):
self.host = host
self.user = user
self.password = password
self.database = database
self.port = port
self.charset = charset
self.connect()
def connect(self):
"""连接
# 通过创建数据库连接池来创建数据库连接对象的方式解决数据库性能问题
"""
db_config = {
"creator": pymysql,
"maxconnections": 1000,
"mincached": 2,
"maxcached": 5,
"maxshared": 3,
"blocking": True,
"maxusage": None,
"setsession": [],
"ping": 0,
"host": self.host,
"port": self.port,
"user": self.user,
"database": self.database,
"charset": self.charset,
"autocommit": True
}
if version ==2:
db_config["passwd"] = self.password
else:
db_config["password"] = self.password
self.__pool = PooledDB(**db_config)
self.conn = self.__pool.connection()
self.cursor = self.conn.cursor(pymysql.cursors.DictCursor)
return self.conn, self.cursor
def __edit(self, sql, params=None):
while True:
try:
count = 0
with eventlet.Timeout(30, False):
self.conn.ping(reconnect=True)
self.cursor.execute(sql, params)
self.conn.commit()
count += 1
if count != 0:
return True
else:
continue
except Exception as e:
msg = format_exc()
print(msg)
if 'error: [Errno 32] Broken pipe' in msg:
self.close()
self.connect()
time.sleep(3)
return False
def execute(self, sql, params=None):
"""执行sql语句操作"""
return self.__edit(sql, params=params)
def escape(self, m_str):
"""
mysql转义
:param str:
:return:
"""
new_str = escape_string(m_str)
return new_str
# 查
def get_all(self, sql, params=()):
"""获取所有相关的数据"""
# 从连接池获取独立连接,避免并发冲突
conn = self.__pool.connection()
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
cursor.execute(sql, params)
r = cursor.fetchall()
except pymysql.OperationalError:
msg = format_exc()
print(msg)
cursor.close()
conn.close()
# 重试一次
conn = self.__pool.connection()
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
cursor.execute(sql, params)
r = cursor.fetchall()
finally:
cursor.close()
conn.close()
except Exception as e:
msg = format_exc()
if 'error: [Errno 32] Broken pipe' in msg:
cursor.close()
conn.close()
time.sleep(3)
r = []
raise Exception(msg)
finally:
cursor.close()
conn.close()
return r
def just_insert(self, table, **kwargs):
"""
插入
:param table:
:param kwargs:
:return:
"""
table = table
keys = ','.join(kwargs.keys())
values = ','.join(['%s'] * len(kwargs))
sql = 'INSERT INTO {table}({keys})values ({values})'.format(table=table, keys=keys, values=values)
# print(sql % tuple(kwargs.values()))
# print()
result = self.execute(sql, tuple(kwargs.values()))
return result
def file_insert(self, table, **kwargs):
table = table
keys = ','.join(kwargs.keys())
values = ','.join(['%r'] * len(kwargs))
sql = 'INSERT ignore INTO {table}({keys})values ({values});'.format(table=table, keys=keys, values=values)
with open('%s.sql' % table, 'a', encoding='utf-8') as f:
f.write(sql % tuple(kwargs.values()) + '\n')
print('file_insert_success')
# 改
def update(self, sql):
"""
:param table:
:param kwargs:
:return:
"""
result = self.execute(sql)
print('update:', result)
# 存在情况下进行修改
def insert_or_update(self, table, **kwargs):
"""
插入更新:数据值不能为整数,否则报错
:param table: 表名
:param kwargs:
:return:
"""
table = table
keys = ','.join(kwargs.keys())
values = ','.join(['{}'] * len(kwargs))
sql = 'INSERT INTO {table}({keys})values ({values})ON DUPLICATE KEY UPDATE '.format(table=table, keys=keys,
values=values)
update = ','.join(['{key}'.format(key=key) + '= {}' for key in kwargs])
sql += update
# 避免数据库因为单双引号报错的问题,做个正形判断
values = ["'%s'" % escape_string(i) if type(i) != int else i for i in list(kwargs.values())]
sql = sql.format(*tuple(values) * 2) + ';'
result = self.execute(sql)
print('mysql_insert_or_update:', result)
return result
def file_insert_or_update(self, table, **kwargs):
"""
插入更新:数据值不能为整数,否则报错
:param table: 表名
:param kwargs:
:return:
"""
table = table
keys = ','.join(kwargs.keys())
values = ','.join(['{}'] * len(kwargs))
sql = 'INSERT INTO {table}({keys})values ({values})ON DUPLICATE KEY UPDATE '.format(table=table, keys=keys,
values=values)
update = ','.join(['{key}'.format(key=key) + '= {}' for key in kwargs])
sql += update
# 避免数据库因为单双引号报错的问题,做个正形判断
values = ["'%s'" % escape_string(i) if type(i) != int else i for i in list(kwargs.values())]
sql = sql.format(*tuple(values) * 2) + ';'
with open('%s.sql' % table, 'a', encoding='utf-8') as f:
f.write(sql + '\n')
return 'Successful'
def close(self):
try:
self.cursor.close()
except:
print(format_exc())
try:
self.conn.close()
except:
print(format_exc())
if __name__ == '__main__':
pass
参考链接 https://blog.csdn.net/danengbinggan33/article/details/83338789

8713

被折叠的 条评论
为什么被折叠?



