import pandas as pd
# 加载数据
data = pd.read_csv('customer_data.csv')
# 清洗数据:去除重复值
data.drop_duplicates(inplace=True)
# 填充缺失值
data.fillna(method='ffill', inplace=True)
]]>
# 合并多个数据源
orders = pd.read_csv('orders.csv')
products = pd.read_csv('products.csv')
merged_data = pd.merge(data, orders, on='CustomerID')
merged_data = pd.merge(merged_data, products, on='ProductID')
]]>
# 定期检查并更新数据
def update_data(new_data):
global data
data = pd.concat([data, new_data], ignore_index=True)
data.drop_duplicates(inplace=True)
# 示例调用
update_data(pd.read_csv('new_customer_data.csv'))
]]>