Simulated Regression Data

To illustrate basic usage of regression methods, we will often use the make_regression function from sklearn.

import numpy as np
from sklearn.datasets import make_regression
X, y = make_regression(
    n_samples=100,
    n_features=2,
    n_informative=1,
    noise=1,
    random_state=42)
X = np.round(X, 2)
y = np.round(y, 1)
X.shape, X.dtype
((100, 2), dtype('float64'))
print(X[:10, ])
[[-1.42 -0.42]
 [ 0.52  0.3 ]
 [-0.89 -0.82]
 [-0.88  0.15]
 [ 0.74  0.17]
 [-0.26  2.72]
 [ 1.14  0.75]
 [ 0.36  1.54]
 [ 0.81  1.36]
 [-0.22  0.71]]
y.shape, y.dtype
((100,), dtype('float64'))
print(y)
[-123.8   46.4  -77.8  -79.3   64.8  -23.6   99.3   32.5   73.7  -19.2
  -98.3  -74.9  -52.5    1.7  -21.5 -140.1    8.2   28.5  -36.    -4.5
  -41.3  167.3  139.9   10.3   73.7 -102.4   49.5 -117.4  -10.7   44.5
  -19.7 -134.7 -151.1  163.1   -5.9 -169.6   57.9  -60.3  101.7    7.4
  -22.    21.5 -229.4   23.8   82.   -78.8  -61.6   27.   -61.3   70.7
  130.   -71.7  -72.1  -93.3   33.1  -40.3  -41.5 -100.   -88.2  -15.
  128.   -81.1  -46.8    8.7    5.    22.6   55.8  -15.5 -108.5   29.7
  -84.8  -68.7  -52.6  -28.4   90.1   20.4  -88.8   22.1   85.    68.1
   42.1   26.9   18.5   -3.1  -92.5   72.    19.3   -2.3   29.2    0.5
   14.5  -46.4   21.9    5.9   -3.1    3.9  123.  -131.5  -40.1  -38.3]