I am trying to train a model using linear regression and gradient descent to predict the house prices based on area in sq ft but idk what is wrong, the predicted hx (hypothesis) is larger than values. have not coded the for the prediction as i am getting error. help me out. ty
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
db = pd.read_csv("train.csv")
x = np.array(db.iloc[:40,5])
y = np.array(db.iloc[:40,11])
m = len(x)
t0=0
t1=1
alpha = 0.001
for i in range(m):
hx = t0 + t1*x
dt0 = (-2/m)*sum((hx - y))
dt1 = (-2/m)*sum(x*(hx - y))
t0 = t0 - dt0*alpha
t1 = t1 - dt1*alpha
print(t0,t1)
plt.scatter(x,y)
plt.plot(x,hx)
also dataset,
x = [1300.236407 , 1275. , 933.1597222, 929.9211427, 999.009247 , 1250. , 1495.053957 , 1181.012946 , 1040. , 879.1208791, 1350.308642 , 1333.010179 , 927.1779023, 1122.171946 , 649.9837504, 1394.117647 , 1800.08471 , 2124.896706 , 1100. , 2178.649237 , 881.1435285, 944.8818898, 1310.147689 , 630.00063 , 1219.80971 , 780.141844 , 1600. , 1180.412371 , 1000. , 1000. , 1400.107701 , 943.1266076, 1150.146382 , 864.0674394, 857.7861968, 1174.210077 , 1020.087884 , 1650.165017 , 1000. , 1300.052002 ]
y = [ 55. , 51. , 43. , 62.5, 60.5, 42. , 66.5, 52. , 41.6, 36. , 35. , 110. , 48. , 62. , 20. , 71.1, 85. , 180. , 22. , 120. , 45. , 42. , 55. , 300. , 50. , 27.5, 46. , 22.9, 39. , 12.5, 52. , 33. , 55. , 82. , 240. , 55. , 65. , 65. , 35. , 75. ]