4 读写文件_爬虫之旅4-CSV文件读写

本文受《Python爬虫开发与实战-从入门到实战》启发,介绍Python对CSV文件的读写操作。写入方式包括pandas写入、csv写入(字典、列表、单行、上下文写入),读取方式有pandas读取和csv模块读取,能让爬取数据的存储逻辑更易理解。

CSV文件如何读写

写出这篇文章的原因主要是最近在看一本书《「Python爬虫开发与实战-从入门到实战」》里面提到了CSV这个模块,我立马进行了尝试,发现非常好用,比之前的xlwt好用多了。

关键是爬取到数据之后,整个存储数据的逻辑更容易理解(可能还是自己太菜吧 )本文中介绍的通过pandasCSV 模块对数据进行读写操作

096b2580561c8d1ace1ef7645d00663b.png

写入文件

pandas写入

# 1-pandas写入

import pandas as pd

data = [{"name":"yangming","age":32,"height":180,"address":"shenzhen"},
        {"name":"xiaoming","age":24,"height":168,"address":"guangzhou"},
        {"name":"zhoujun","age":29,"height":184,"address":"shanghai"},
        {"name":"zhangshan","age":20,"height":170,"address":"changsha"}
       ]

df = pd.DataFrame(data)
df

name age height address 0 yangming 32 180 shenzhen 1 xiaoming 24 168 guangzhou 2 zhoujun 29 184 shanghai 3 zhangshan 20 170 changsha

# 将DataFrame存储为csv,index表示是否显示行名,default=True

df.to_csv("tocsvfile-pandas.csv",sep=",")
pd.read_csv("tocsvfile-pandas.csv")

Unnamed: 0 name age height address 0 0 yangming 32 180 shenzhen 1 1 xiaoming 24 168 guangzhou 2 2 zhoujun 29 184 shanghai 3 3 zhangshan 20 170 changsha

# !!!如何理解index参数

df.to_csv("tocsvfile-pandas-1.csv",index=False,sep=",")
pd.read_csv("tocsvfile-pandas-1.csv")

name age height address 0 yangming 32 180 shenzhen 1 xiaoming 24 168 guangzhou 2 zhoujun 29 184 shanghai 3 zhangshan 20 170 changsha

1609630140e3f1ef8c4e603e625aa05b.png

csv写入

字典形式写入

### csv通过字典形式存储文件

import csv

data = [{"name":"yangming","age":32,"height":180,"address":"shenzhen"},
        {"name":"xiaoming","age":24,"height":168,"address":"guangzhou"},
        {"name":"zhoujun","age":29,"height":184,"address":"shanghai"},
        {"name":"zhangshan","age":20,"height":170,"address":"changsha"}
       ]

with open("information.csv","w",encoding="utf-8") as f:
    writer = csv.DictWriter(f,fieldnames=["name","age","height","address"])
    writer.writeheader()
    writer.writerows(data)  # 写入整个数据data-----用writerows
    writer.writerow({"name":"Peter","age":28,"height":176,"address":"shenzhen"})  # 单独写入一条数据----用writerow

import pandas as pd

data = pd.read_csv("information.csv")
data

name age height address 0 yangming 32 180 shenzhen 1 xiaoming 24 168 guangzhou 2 zhoujun 29 184 shanghai 3 zhangshan 20 170 changsha 4 Peter 28 176 shenzhen

列表形式写入

# with 语句写在开头,防止属性字段重复写入文件中
# 例子:简书APP之旅

with open("information-1.csv",'a',newline ='',encoding="utf-8") as f:
        writer = csv.DictWriter(f,fieldnames=["name","age","height","address"])
        writer.writeheader()   
        
        for i in range(1,5):
            name_list = ["xiaoming","yanghong","peter","Tom"] * i
            age_list = [19,27,32,24] * i
            height_list = [176,180,172,183] * i
            address_list = ["shenzhen","guangzhou","shanghai","changsha"] * i

            infomation_list = []
            for j in range(len(name_list)):
                information = {
                    "name":name_list[j],
                    "age":age_list[j],
                    "height":height_list[j],
                    "address":address_list[j]
                }
                infomation_list.append(information)

        writer.writerows(infomation_list)

# 读取数据
import pandas as pd

data = pd.read_csv("information-1.csv")
data

name age height address 0 xiaoming 19 176 shenzhen 1 yanghong 27 180 guangzhou 2 peter 32 172 shanghai 3 Tom 24 183 changsha 4 xiaoming 19 176 shenzhen 5 yanghong 27 180 guangzhou 6 peter 32 172 shanghai 7 Tom 24 183 changsha 8 xiaoming 19 176 shenzhen 9 yanghong 27 180 guangzhou 10 peter 32 172 shanghai 11 Tom 24 183 changsha 12 xiaoming 19 176 shenzhen 13 yanghong 27 180 guangzhou 14 peter 32 172 shanghai 15 Tom 24 183 changsha

单行写入

import csv

# 1-设置文件头
fileHeader = ["name", "score"]

# 2-待写入3行数据
d1 = ["Wang", "100"]
d2 = ["Li", "80"]
d3 = ["xiaosi","92"]

# 3-写入数据
f = open("instance_1.csv", "w")
writer = csv.writer(f)   # 生成writer对象

# 写入的内容都是以列表的形式整体传入函数
# writer.writerows([fileHeader, d1, d2, d3])  # 这行等效于下面的三行代码

# 4-数据单个形式传入
writer.writerow(fileHeader)   
writer.writerow(d1)
writer.writerow(d2)
writer.writerow(d3)

# 5-需要关闭文件!!!
f.close()

pd.read_csv("instance_1.csv")

name score 0 Wang 100 1 Li 80 2 xiaosi 92

上下文写入-with

import csv

# 文件头
fileHeader = ["name", "score"]

# 写入的两行数据
d1 = ["Wang", "100"]
d2 = ["Li", "80"]
d3 = ["xiaosi","92"]

# 写入数据

with open("instance_2.csv", "a") as f:
    writer = csv.writer(f)
    writer.writerows([fileHeader, d1, d2, d3])

pd.read_csv("instance_2.csv")

name score 0 Wang 100 1 Li 80 2 xiaosi 92

读文件

pandas读取文件

# 1-通过pandas读取文件

import pandas as pd

csvfile = pd.read_csv("information.csv")
csvfile

name age height address 0 yangming 32 180 shenzhen 1 xiaoming 24 168 guangzhou 2 zhoujun 29 184 shanghai 3 zhangshan 20 170 changsha 4 Peter 28 176 shenzhen

csv模块读取文件

# 2-通过csv模块读取文件

import csv

with open("information-1.csv") as f:
    csvfile = csv.reader(f)
    for line in csvfile:   # 不需要用readlines
        print(line)

['name', 'age', 'height', 'address']
['xiaoming', '19', '176', 'shenzhen']
['yanghong', '27', '180', 'guangzhou']
['peter', '32', '172', 'shanghai']
['Tom', '24', '183', 'changsha']
['xiaoming', '19', '176', 'shenzhen']
['yanghong', '27', '180', 'guangzhou']
['peter', '32', '172', 'shanghai']
['Tom', '24', '183', 'changsha']
['xiaoming', '19', '176', 'shenzhen']
['yanghong', '27', '180', 'guangzhou']
['peter', '32', '172', 'shanghai']
['Tom', '24', '183', 'changsha']
['xiaoming', '19', '176', 'shenzhen']
['yanghong', '27', '180', 'guangzhou']
['peter', '32', '172', 'shanghai']
['Tom', '24', '183', 'changsha'] 
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值