pandas 数据分析部分

小懒胖熊

1844人浏览 · 2019-06-04 20:44:54

小懒胖熊 · 2019-06-04 20:44:54 发布

pandas数据分析部分共8题

import pandas as pd
df =pd.read_csv("qiancheng.csv")
print(df.shape)
df = df[~df['work_name'].str.contains('-')]#~取反
df = df[~df['work_name'].str.contains('/')]
df = df[~df['work_name'].str.contains('\(')]
df = df[~df['work_name'].str.contains('（')]

df = df[~df['work_location'].str.contains('异地招聘')]
df=df.dropna(subset=['work_location'])
df = df[~df['work_location'].str.contains('空')]
df = df.reset_index().drop('index', axis=1)#清洗 链接起来去空的数据
salary_min=[]
salary_max=[]
for i in range(len(df)):
    salary=df['salary'][i].split('-')

    if '万'in salary[1]:
        salary1=salary[1].split('万')[0]
        salary_min.append(str(float(salary[0])*10000)+"元/月")
        salary_max.append(str(float(salary1) * 10000)+"元/月")

    else:
        salary1 = salary[1].split('千')[0]
        salary_min.append(str(float(salary[0]) * 1000) + "元/月")
        salary_max.append(str(float(salary1) * 1000) + "元/月")
df['salary_min']=salary_min
df['salary_max']=salary_max
del df['salary']
#无   初中及以下 高中/中技/中专   大专  本科  硕士  博士
df1=df[df["edu_level"].str.contains("无")|df["edu_level"].str.contains("空")]
df1['edu_level']="无"
df2=df[df["edu_level"].str.contains("初中及以下")|df["edu_level"].str.contains("高中")|df["edu_level"].str.contains("中技")|df["edu_level"].str.contains("中专")]
df2['edu_level']="初中及以下 高中/中技/中专"
df3=df[df["edu_level"].str.contains("大专")]
df3['edu_level']="大专"
df4=df[df["edu_level"].str.contains("本科")]
df4['edu_level']="本科"
df5=df[df["edu_level"].str.contains("硕士")]
df5['edu_level']="硕士"
df6=df[df["edu_level"].str.contains("博士")]
df6['edu_level']="博士"
df=pd.concat([df1,df2,df2,df4,df5,df6],ignore_index=True)
people_list=[]
for i in range(len(df)):
    people=df['work_require_people'][i].split('招')[1]
    people=people.split('人')[0]

    if '若干'in people:
        people_list.append(1)

    else:
        people_list.append(people)
df['work_require_people']=people_list

exp_list=[]
for i in range(len(df)):
    exp=df['work_exp'][i]
    if '年' in exp:
        exp=exp.split('年')[0]
        exp=exp.split('-')
        if len(exp)==2:
            exp_list.append(exp[0])
        else:
            exp_list.append(exp[0])
    else:
        exp_list.append(0)
df['work_exp']=exp_list

company_industry_list=[]
for i in range(len(df)):
    company_industry=df['company_industry'][i]

    if '/' in company_industry:
        company_industry=company_industry.split('/')[0]
        company_industry_list.append(company_industry)
    else:
        company_industry_list.append(company_industry)
df['company_industry']=company_industry_list
print(df)

NVIDIA DRIVE 智能汽车专区

更多推荐