My example oneHot to save space.

import numpy as np
import pandas as pd
class OneHot:
def __init__(self, i_dataFrame):
self.h={}
self.hh={}
self.length=0
self.H(i_dataFrame)
def H(self,n):
a=np.array(n.tolist())
self.h={}
self.hh={}
count=0
for i in np.sort(np.unique(a)):
self.hh[count]=i
self.h[i]=count
count+=1
self.length=count
def oH(self,i):
b=np.zeros(self.length,dtype=int)
b[self.h[i]]=1
return b
# Example Usage
# Define a dataframe
d=pd.DataFrame(np.array([[1,2,2,3],[4,5,6,7],[4,8,2,7]]))
# You want d[0] to be oneHot
oneHot = OneHot(d[0])
d['oh']=d[0].apply(lambda x: oneHot.oH(x))
# Returns
0 1 2 3 oh <---- This row col names.
0 1 2 2 3 [1, 0]
1 4 5 6 7 [0, 1]
2 4 8 2 7 [0, 1]
# Okay, not that you want to encode 1,4,4 as one hot
# encoding. But you only have 2 unique values (1,4).
# So, to save space, make this only length 2. And,
# fill in the gap
view raw oneHot.py hosted with ❤ by GitHub