from pyspark import SparkContext
sc = SparkContext("local", "First App")
data = [1, 2, 3, 4, 5]
distData = sc.parallelize(data)
def compute_squared(x):
return x * x
squared_data = distData.map(compute_squared)
print(squared_data.collect())
]]>
from sklearn.linear_model import LinearRegression
import numpy as np
X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
y = np.dot(X, np.array([1, 2])) + 3
reg = LinearRegression().fit(X, y)
print(reg.score(X, y))
]]>