import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
# 数据加载与预处理
def load_data(file_path):
return pd.read_csv(file_path)
def preprocess_data(df):
# 假设处理缺失值和标准化特征
df.fillna(0, inplace=True)
return df
# 数据建模
def build_model(X, y):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = LinearRegression()
model.fit(X_train, y_train)
return model
if __name__ == "__main__":
data = load_data("student_performance.csv")
processed_data = preprocess_data(data)
X = processed_data.drop(columns=["target"])
y = processed_data["target"]
trained_model = build_model(X, y)
]]>