%run Datasets.ipynb
%run Plot\ Utilities.ipynb
%run Chapter\ 2\ Utilities.ipynb
%run Animations.ipynb
import numpy as np
import json
import math


training_set = json.loads(open('chapter_1_mushroom_dataset.json').read())
df1 = pd.DataFrame(training_set[:2])
df1 = df1[['color', 'width', 'height', 'label']]
df1


df2 = uniform_circle_dataset([(1, 1), (2.5, 4)], [1, 1], 10)
df2


plot_2d(df2)


plot_2d(df2, weights = [1, -1], show_separator = True)


Proposition_1([-2, 1, 1])


Proposition_2([2, 1, -1])


def perceptron(df, label = 'y', epochs = 100, bias = True):
    
    if bias:
        df = df.copy()
        df.insert(0, '_x0_', 1)
        
    w = np.zeros(len(df.columns) - 1)
    features = [column for column in df.columns if column != label]

    for _ in range(epochs):
        errors = 0
        for _, row in df.iterrows():
            x = row[features]
            y = row[label]
            if y * np.dot(w, x) <= 0:
                w = w + y * x
                errors += 1
            yield w.copy()
        if errors == 0:
            break


positive_misclassified_geometry()


negative_misclassified_geometry()


%%capture
df3 = uniform_circle_dataset([(1, 1), (2.5, 4)], [1, 1], 10)
df3 = df3.sample(frac = 1.)
weights = perceptron(df3)
anim = PerceptronAnim(df3, weights, trail = True)
afunc1 = anim.animate()


HTML(afunc1.to_jshtml())


training_set = json.loads(open('chapter_1_mushroom_dataset.json').read())
mushrooms = pd.DataFrame(training_set)
mushrooms = mushrooms[['color', 'width', 'height', 'label']]
mushrooms['label'] = mushrooms.apply(lambda row: 1 if row['label'] == 'poisonous' else -1, axis = 1)
mushrooms.insert(0, 'bias', 1)
mushrooms.head(2)


weights = list(perceptron(mushrooms, label = 'label', bias = False))[-1]
weights

bias      0.000000
color    -0.160639
width     0.459405
height   -0.468180
dtype: float64


for index in range(2):
    print('w.x for instance %d = %.2f' % (index, np.dot(weights, mushrooms.iloc[index][weights.index].values)))

w.x for instance 0 = -0.22
w.x for instance 1 = 0.23


perceptron_loss_plot(ywx = -1.5)


perceptron_loss_plot(ywx = -1.5, window = True)


perceptron_loss_plot(ywx = -1.5, hinge = True)


perceptron_loss_plot(ywx = -1.5, hinge = True, window = True)


%%capture 
anim = GradientDescentAnim(lambda w: (w - 4)**2 + 1, [2, 6], 2.5)
anim.step_size = 0.05
afunc2 = anim.animate()


HTML(afunc2.to_jshtml())


%%capture 
anim = GradientDescentAnim(lambda w: w**4 + 2*w**3 - 12*w**2 - 2*w + 6, [-5, 3], 0.25)
anim.step_size = 0.01
afunc3 = anim.animate()


HTML(afunc3.to_jshtml())


perceptron_loss_plot_3d([-0.5, -1], 1)


%%capture
xor = pd.DataFrame({'x1': [-1, 1, -1, 1],
                    'x2': [-1, -1, 1, 1],
                    'y': [-1, 1, 1, -1]})
xor = xor.sample(frac = 1.)
weights = perceptron(xor, epochs = 5)
anim = PerceptronAnim(xor, weights, trail = True)
afunc4 = anim.animate()


HTML(afunc4.to_jshtml())


plot_2d(uniform_circle_dataset([(0, 0), (0, 5)], [1, 1], 50),
        weights = (-2.5, 0, 1), show_separator = True)


plot_2d(uniform_circle_dataset([(0, 0), (0, 5)], [2.5, 2.5], 50),
        weights = (-2.5, 0, 1), show_separator = True)


mc = pd.read_json('multiclass_perceptron.json')
plot_2d(mc)


def multiclass_perceptron(df, label = 'y', epochs = 100, bias = True):
    
    if bias:
        df = df.copy()
        df.insert(0, '_x0_', 1)
      
    w = {c:np.zeros(len(df.columns) - 1) for c in df[label].unique()}   
    features = [column for column in df.columns if column != label]

    for _ in range(epochs):
        errors = 0
        for _, row in df.iterrows():
            x = row[features]
            y = row[label]
            
            yhat = sorted(w.keys(), 
                          key = lambda c: np.dot(w[c], x), 
                          reverse = True)[0]
            
            if y != yhat:
                w[yhat] -= x
                w[y] += x
                errors += 1
            yield w.copy()
        if errors == 0:
            break


plot_2d(mc, weights = list(multiclass_perceptron(mc))[0], show_separator = True)

	color	width	height	label
0	-0.311688	0.358501	0.936567	edible
1	-0.472327	0.817906	0.468387	poisonous

	x1	x2	y
0	0.048589	1.120275	-1
1	0.200023	0.956716	-1
2	1.595538	1.023582	-1
3	1.315929	1.452371	-1
4	1.087080	1.513219	-1
5	0.512235	1.594651	-1
6	0.265039	1.008506	-1
7	1.606480	1.571889	-1
8	0.977585	1.550227	-1
9	1.908708	1.121259	-1
10	2.503476	3.002576	1
11	2.347718	3.317169	1
12	2.589111	4.574411	1
13	2.172832	3.858062	1
14	2.122016	4.518051	1
15	2.569766	4.309350	1
16	2.499833	4.648621	1
17	1.505223	4.097880	1
18	2.786078	3.691874	1
19	2.444665	4.208357	1

$L_{0-1}$	$\hat y = -1$	$\hat y = 1$
$y = -1$	0	1
$y = 1$	1	0

2.1 Weights and Hyperplanes¶

2.2 The Learning Algorithm¶

2.3 Running the Perceptron¶

2.4 Deriving the Perceptron Learning Algorithm from First Principles¶

2.5 Linear Separability and Convergence¶

2.6 The Multi-Class Perceptron¶

2.7 Final Thoughts¶