# Python代码示例:数据清洗
import pandas as pd
def clean_data(df):
df = df.dropna() # 删除缺失值
df = df[df['age'] > 0] # 过滤年龄小于等于0的记录
return df
data = pd.read_csv('data.csv')
cleaned_data = clean_data(data)
cleaned_data.to_csv('cleaned_data.csv', index=False)
]]>
# Python代码示例:数据分析
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv('cleaned_data.csv')
print(data.describe()) # 输出数据描述性统计信息
# 绘制年龄分布图
plt.hist(data['age'], bins=20)
plt.title('Age Distribution')
plt.xlabel('Age')
plt.ylabel('Count')
plt.show()
]]>