```
import numpy as np
import sys

# solve linear system of equations by minimizing ||Ax - b||^2 by gradient descent
def SolveLinearSystemByGradientDescent(A, b, x0, eta, eps):
# inputs A and b define the cost function to be minimizeed (Ax - b)
# input x0 is the initial guess for the minimum
# input eta defines the scaling factor for the update step
# input eps determines the minimum gradient for continuing to update

x = x0
i = 0
maxI = 100000 # maximum number of iterations to make, prevents program from running infinitely
printFreq = 1000 # only print some lines to speed up longer runs/condense output

loss = (A * x.T) - b # used to compute gradient
grad = A.T * loss / A.shape[0]
x = x - (eta * grad.T) # core of the algorithm, this updates the estimate of the minimum
loss = (A * x.T) - b # computer new gradient
grad = A.T * loss / A.shape[0]
if(not (i % printFreq)):
sys.stdout.write(&quot;Iteration &quot; + str(i) + &quot; x: &quot;)
sys.stdout.write(str(x))
i += 1
if(i &gt; maxI):
print(&quot;Algorithm has not converged in &quot; + str(maxI) + &quot; iterations, halting execution.&quot;)
return
print(&quot;Converged in &quot; + str(i) + &quot; iterations.&quot;)
return x, A*x.T - b

################################################################################
n = 20
m = 20
A = np.random.rand(m,n)
b = np.random.rand(m,1)
x0 = np.asmatrix(np.zeros(n))
eta = 0.01
eps = 0.01

print(A)
print(b)
optimal = SolveLinearSystemByGradientDescent(A, b, x0, eta, eps)
if(optimal):
print(&quot;Optimal solution at &quot; + str(optimal[0]) + &quot; with error &quot; + str(optimal[1].T))
```