///////////////////////////////////program 2//////////////////////////////////////// import pandas as pd import numpy as np data=pd.read_csv('pro2.csv') data features=np.array(data)[:,:-1] features target=np.array(data)[:,-1] target for i,h in enumerate(features): if target[i]=='yes': specific_h=features[i].copy() break print("initializatio of specific_h and general_h") print(specific_h) general_h=[["?" for i in range(len(specific_h))]for i in range(len(specific_h))] print(general_h) for i,h in enumerate(features): if target[i]=="yes": for x in range(len(specific_h)): if h[x]!=specific_h[x]: specific_h[x]='?' general_h[x][x]='?' if target[i]=="no": for x in range(len(specific_h)): if h[x]!=specific_h[x]: general_h[x][x]=specific_h[x] else: general_h[x][x]='?' print(specific_h,"\n") print(general_h,"\n") indices=[i for i,val in enumerate(general_h)if val==['?','?','?','?','?','?']] for i in indices: general_h.remove(['?','?','?','?','?','?']) print("\n final specific_h:",specific_h,sep="\n") print("\n final general_h:",general_h,sep="\n") sky,temp,humidity,wind,water,forecast,enjoy sport sunny,warm,normal,strong,warm,same,yes sunny,warm,high,strong,warm,same,yes rainy,cloud,high,strong,warm,change,no sunny,warm,high,strong,cool,change,yes ///////////////////////////////////////program 3///////////////////////////////////////// import pandas as pd import numpy as np df=pd.read_csv("pro3.csv") df df.head() df.tail() df.describe() df.info() df.isnull().sum() df.drop("name",axis=1,inplace=True) df df.drop(10,axis=0,inplace=True) df df["age"]=df["age"].fillna(df["age"].mean()) df df["salary"]=df["salary"].fillna(df["salary"].max()) df df.drop_duplicates() x=np.array(df)[:,:-1] x y=np.array(df)[:,-1] y from sklearn.compose import ColumnTransformer from sklearn.preprocessing import OneHotEncoder ct=ColumnTransformer(transformers=[('encoder',OneHotEncoder(),[0])],remainder="passthrough") x=np.array(ct.fit_transform(x)) x from sklearn.preprocessing import LabelEncoder le=LabelEncoder() y=le.fit_transform(y) y from sklearn.model_selection import train_test_split x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0) from sklearn.preprocessing import StandardScaler st_x=StandardScaler() x_train=st_x.fit_transform(x_train) x_test=st_x.transform(x_test) x_test y_train y_test city,name,age,salary,eligible for bonus mumbai,john,27,51000,yes newyork,sam,27,48000,yes mumbai,john,27,51000,yes mumbai,ram,30,52000,no newyork,ajay,,66000,no tokyo,udgeeth,48,,yes tokyo,sahaja,,51000,no singapoor,rithesh,33,69000,no newyork,rithvik,40,79000,yes mumbai,srija,38,,yes newyork,,,, singapoor,ricky,35,38000,no tokyo,mahesh,,56000,no singapoor,jenifer,35,72000,no newyork,peter,45,79000,yes mumbai,charvi,31,81000,yes singapoor,reyansh,37,49000,no ////////////////////////////////////program 4/////////////////////////////////////////////////// import pandas as pd from collections import Counter import math tennis=pd.read_csv('pro4.csv') print("\n given play tennis data set:\n\n",tennis) def entropy(alist): c=Counter(x for x in alist) instances=len(alist) prob=[x/instances for x in c.values()] return sum([-p*math.log(p,2)for p in prob]) def information_gain(d,split,target): splitting=d.groupby(split) n=len(d.index) agent=splitting.agg({target:[entropy,lambda x:len(x)/n]})[target] agent.columns=['Entropy','observations'] newentropy=sum(agent['Entropy']*agent['observations']) oldentropy=entropy(d[target]) return oldentropy-newentropy def id3(sub,target,a): count=Counter(x for x in sub[target]) if len(count)==1: return next(iter(count)) else: gain=[information_gain(sub,attr,target)for attr in a] print("\n Gain=",gain) maximum=gain.index(max(gain)) best=a[maximum] print("\nBest attribute:",best) tree={best:{}} remaining=[i for i in a if i!=best] for val,subset in sub.groupby(best): subtree=id3(subset,target,remaining) tree[best][val]=subtree return tree names=list(tennis.columns) print("\n list of attributes:",names) names.remove('PlayTennis') print("\n predicting attributes:",names) tree=id3(tennis,'PlayTennis',names) print("\n\nthe resultant decision tree is :") print(tree) PlayTennis,Outlook,Temperature,Humidity,Wind No,Sunny,Hot,High,Weak No,Sunny,Hot,High,Strong Yes,Overcast,Hot,High,Weak Yes,Rain,Mild,High,Weak Yes,Rain,Cool,Normal,Weak No,Rain,Cool,Normal,Strong Yes,Overcast,Cool,Normal,Strong No,Sunny,Mild,High,Weak Yes,Sunny,Cool,Normal,Weak Yes,Rain,Mild,Normal,Weak Yes,Sunny,Mild,Normal,Strong Yes,Overcast,Mild,High,Strong Yes,Overcast,Hot,Normal,Weak No,Rain,Mild,High,Strong ////////////////////////////////////////program 5//////////////////////////////////////// from sklearn import datasets iris=datasets.load_iris() print(iris.target_names) print(iris.feature_names) X,y=datasets.load_iris(return_X_y=True) from sklearn.model_selection import train_test_split X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30) from sklearn.ensemble import RandomForestClassifier import pandas as pd data=pd.DataFrame({'sepallength':iris.data[:,0],'sepalwidth':iris.data[:,1], 'petallength':iris.data[:,2],'petalwidth':iris.data[:,3], 'species':iris.target}) print(data.head()) clf=RandomForestClassifier(n_estimators = 2) clf.fit(X_train , y_train) y_pred=clf.predict(X_test) from sklearn import metrics print() print("ACCURACY OF THE MODEL:",metrics.accuracy_score(y_test , y_pred)) clf.predict([[3,3,2,2]]) clf.predict([[6,6,6,6]]) ///////////////////////////////////////////////////////////////////////////////////////////////