python(字节流、buffer、多进程)

二进制、字节流

  1. PIL.Image.Image 转二进制流
import io
import base64

def pil_image_byte(image):
	# image: PIL.Image.Image
	# 这里的操作是先将PIL.Image.Image的图片转为二进制流,然后再进行base64编码
    img_byte=io.BytesIO()
    image.save(img_byte,format='PNG')
    image_data = img_byte.getvalue()
    image_base64 = base64.b64encode(image_data).decode("utf-8")
    # base64解码成图片
    img = cv2.imdecode(np.frombuffer(base64.b64decode(image_base64), np.uint8), cv2.IMREAD_COLOR)

def byte2image(byte_data):
	# 二进制流转PIL image
	image = Image.open(io.BytesIO(byte_data))
    return image
  1. 二进制流处理pdf
from urllib import parse, request
import io
from pdfplumber.pdf import PDF

img_url = 'http://apis.v30.edge.customs.dev.amiintellect.com/api/customs/core/storage/preview?id=/uploads/amiintellect-customs/dec/2020/3Q/9/7823107e-7d90-4200-814a-a29d63ed9cbf/gbdw/BG2020092500013/invoice_file/48852核注清单 - 副本.pdf'
image_path = parse.quote(img_url, encoding='utf8', safe='/:?=&')
resp = request.urlopen(image_path)

buffer = bytearray(resp.read())
stream = io.BytesIO(buffer)
pdf = PDF(stream)
for i, page in enumerate(pdf.pages):
        pdf_size = (page.height, page.width)
        page_words = page.extract_words()
        page_tables = page.find_tables()
        print([i['text']for i in page_words])
  1. base64编码与解码
import cv2
import numpy as np
import base64

def np_img2base64(np_img):
    image = cv2.imencode('.png', np_img)[1]
    image = np.squeeze(image, 1)
    image_code = base64.b64encode(image)
    return image_code
    
def base642np_image(base64_str):
    missing_padding = 4 - len(base64_str) % 4
    if missing_padding:
        base64_str += b'=' * missing_padding
    raw_str = base64.b64decode(base64_str)
    np_img = np.fromstring(raw_str, dtype=np.uint8)
    img = cv2.imdecode(np_img, cv2.COLOR_RGB2BGR)
    return img

buffer 与 stream

  1. PDF buffer转stream(二进制转stream)
import io
buffer = open('G://1.pdf', 'rb').read()
stream = io.BytesIO(buffer)
  1. 图片 buffer转strteam
import numpy as np
import cv2
buffer = open('G://1.png', 'rb').read()
img = cv2.imdecode(np.frombuffer(buffer, np.uint8), cv2.IMREAD_COLOR)
  1. excel buffer处理
import pandas as pd

buffer = resp.body.buffer
if suffix == 'txt':
   excel_data = pd.read_table(buffer, header=None)
else:
   excel_data = pd.read_excel(buffer, header=None, keep_default_na=True)
  1. wordbuffer处理
import io
import docx
buffer = resp.body.buffer
wordFile = io.BytesIO(buffer)
file = docx.Document(wordFile)
print([para.text for para in file.paragraphs])
  1. html buffer处理
from bs4 import BeautifulSoup
buffer = resp.body.buffer
soup = BeautifulSoup(buffer, 'html.parser')
  1. numpy转bytes
# 需要将numpy类型的图片转换为bytes,然后进行base64
# 抑制参数img是一个np.ndarray
import io
import base64
img_byte = io.BytesIO()  #初始化一个空字节流
#把我们得图片以‘PNG’保存到空字节流
img.save(img_byte, format='PNG')
# 无视指针,获取全部内容,类型由io流变成bytes。
image_data = img_byte.getvalue()    
image_base64 = base64.b64encode(image_data).decode("utf-8")  

多进程

  1. 多进程需要在main函数中运行
import time
import os
from glob import glob
from multiprocessing import Pool



def extract_frame(video_path):
    print(video_path)
    time.sleep(5)
    return True


if __name__ == '__main__':
    pool = Pool(3)
    for _ in range(3):
        dir_ = 'E:/tumi/material/video'
        file_path = 'E:/tumi/material/video/*/*'
        file_list = glob(file_path)
        file_list = sorted(file_list, key=lambda x: os.path.basename(x))
        pool.map(extract_frame, file_list)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值