import pandas as pd
# 读取CSV文件
data = pd.read_csv('example.csv')
print(data.head())
]]>
# 去除重复行
data.drop_duplicates(inplace=True)
# 删除缺失值过多的列
data.dropna(axis=1, thresh=len(data)*0.7, inplace=True)
]]>
# 计算某个字段的平均值
average_value = data['field'].mean()
print(f"Average value: {average_value}")
# 找出最大值
max_value = data['field'].max()
print(f"Maximum value: {max_value}")
]]>
import matplotlib.pyplot as plt
# 绘制柱状图
plt.figure(figsize=(10,6))
plt.bar(data['category'], data['value'])
plt.title('Category vs Value')
plt.xlabel('Category')
plt.ylabel('Value')
plt.show()
]]>
from sklearn.linear_model import LinearRegression
# 准备训练数据
X = data[['feature1', 'feature2']]
y = data['target']
# 创建模型
model = LinearRegression()
model.fit(X, y)
# 预测
predictions = model.predict(X)
print(predictions)
]]>