Posts

Showing posts from November, 2023

Write a Python program to find null values in given dataset and remove them

# importing Libraries import pandas as pd # Reading dataset dataset = pd.read_csv( 'C: \\ Users \\ Desktop \\ Desktop \\ Soleha\Mcs ml \\ ML PRACTICAL SOLUTIONS \\ CSV Files \\ employees.csv' ) print (dataset.describe()) print (dataset) dataset.dropna( inplace = True ) print (dataset)

# Import necessary modules

# Import necessary modules from sklearn.neighbors import KNeighborsClassifier from sklearn.model_selection import train_test_split from sklearn.datasets import load_iris import numpy as np import matplotlib.pyplot as plt irisData = load_iris() # Create feature and target arrays X = irisData.data y = irisData.target # Split into training and test set X_train, X_test, y_train, y_test = train_test_split(             X, y, test_size = 0.2 , random_state = 42 ) # Setup arrays to store train and test accuracies neighbors = np.arange( 1 , 9 ) train_accuracy = np.empty( len (neighbors)) test_accuracy = np.empty( len (neighbors)) # Loop over different values of k for i, k in enumerate (neighbors):         # Setup a k-NN Classifier with k neighbors: knn     knn = KNeighborsClassifier( n_neighbors = k)       # Fit the classifier to the training data     knn.fit(X_train, y_train)    ...

# importing required libraries

import numpy as np import matplotlib.pyplot as plt import pandas as pd from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler #import the breast _cancer dataset from sklearn.datasets import load_breast_cancer data = load_breast_cancer() data.keys()   # Check the output classes print (data[ 'target_names' ])   # Check the input attributes print (data[ 'feature_names' ]) # construct a dataframe using pandas df1 = pd.DataFrame(data[ 'data' ], columns = data[ 'feature_names' ])   # Scale data before applying PCA scaling = StandardScaler()   # Use fit and transform method scaling.fit(df1) Scaled_data = scaling.transform(df1)   # Set the n_components=3 principal = PCA( n_components = 3 ) principal.fit(Scaled_data) x = principal.transform(Scaled_data)   # Check the dimensions of data after PCA print (x.shape) # Check the values of eigen vectors # prodeced by principal components principal.components_ plt.figure( figsi...

Write a Python program to implement Support Vector Machine (SVM)

# importing required libraries import pandas as pd import matplotlib.pyplot as plt import numpy as np # importing scikit learn with make_blobs from sklearn.datasets.samples_generator import make_blobs # creating datasets X containing n_samples # Y containing two classes X, Y = make_blobs( n_samples = 500 , centers = 2 ,                 random_state = 0 , cluster_std = 0.40 ) import matplotlib.pyplot as plt # plotting scatters plt.scatter(X[:, 0 ], X[:, 1 ], c = Y, s = 50 , cmap = 'spring' ); plt.show() # creating line space between -1 to 3.5   xfit = np.linspace( - 1 , 3.5 )   # plotting scatter plt.scatter(X[:, 0 ], X[:, 1 ], c = Y, s = 50 , cmap = 'spring' )   # plot a line between the different sets of data for m, b, d in [( 1 , 0.65 , 0.33 ), ( 0.5 , 1.6 , 0.55 ), ( - 0.2 , 2.9 , 0.2 )]:     yfit = m * xfit + b     plt.plot(xfit, yfit, '-k' )     plt.fill_between(xfit, yfit - d, yf...

Write python program to implement decision tree whether or not to play tennis

# importing data import numpy as np import pandas as pd df = pd.read_csv( 'H: \\ ML_Practical \\ CSV Files \\ weather.csv' ) # converting categorical variables into dummies/indicator variables df_getdummy = pd.get_dummies( data = df, columns = [ 'Temperature' , 'Outlook' , 'Windy' , 'Humidity' ]) # separating the training set and test set from sklearn.model_selection import train_test_split X = df_getdummy.drop( 'Played?' , axis = 1 ) y = df_getdummy[ 'Played?' ] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30 , random_state = 101 ) # visualising the decision tree diagram from sklearn.tree import DecisionTreeClassifier dtree = DecisionTreeClassifier( max_depth = 3 ) dtree.fit(X_train,y_train) predictions = dtree.predict(X_test) print (predictions) from sklearn.tree import plot_tree import matplotlib.pyplot as plt fig = plt.figure( figsize = ( 6 , 6 )) plot_tree(dtree, feature_names = d...

# load the iris dataset

  # load the iris dataset from sklearn.datasets import load_iris iris = load_iris() # store the feature matrix (X) and response vector (y) X = iris.data y = iris.target # splitting X and y into training and testing sets from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.4 , random_state = 1 ) # training the model on training set from sklearn.naive_bayes import GaussianNB gnb = GaussianNB() gnb.fit(X_train, y_train) # making predictions on the testing set y_pred = gnb.predict(X_test) # comparing actual response values (y_test) with predicted response values (y_pred) from sklearn import metrics print ( "Gaussian Naive Bayes model accuracy(in %):" , metrics.accuracy_score(y_test, y_pred) * 100 )

Write a python program to implement polynomial regression for given dataset.

# Importing the libraries import numpy as np import matplotlib.pyplot as plt import pandas as pd   #Importing the dataset datas = pd.read_csv( 'H: \\ ML_Practical \\ CSV Files \\ data1.csv' ) print (datas) print (datas.head()) # Dividing the dataset into 2 components X = datas.iloc[:, 1 : 2 ].values y = datas.iloc[:, 2 ].values # Fitting Linear Regression to the dataset from sklearn.linear_model import LinearRegression line = LinearRegression()   line.fit(X, y) # Visualising the Linear Regression results plt.scatter(X, y, color = 'blue' )   plt.plot(X,line.predict(X), color = 'red' ) plt.title( 'Linear Regression' ) plt.xlabel( 'Temperature' ) plt.ylabel( 'Pressure' )   plt.show() # Fitting Polynomial Regression to the dataset from sklearn.preprocessing import PolynomialFeatures   poly = PolynomialFeatures( degree = 8 ) X_poly = poly.fit_transform(X) lin2 = LinearRegression() lin2.fit(X_poly, y) plt.scatter(X, y, col...

Write a python program to implement multiple linear regression for given dataset

import pandas as pd from sklearn import linear_model import matplotlib.pyplot as plt import statsmodels.api as sm Stock_Market = { 'Year' : [ 2017 , 2017 , 2017 , 2017 , 2017 , 2017 , 2017 , 2017 , 2017 , 2017 , 2017 , 2017 , 2016 , 2016 , 2016 , 2016 , 2016 , 2016 , 2016 , 2016 , 2016 , 2016 , 2016 , 2016 ],                 'Month' : [ 12 , 11 , 10 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 , 12 , 11 , 10 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 ],                 'Interest_Rate' : [ 2.75 , 2.5 , 2.5 , 2.5 , 2.5 , 2.5 , 2.5 , 2.25 , 2.25 , 2.25 , 2 , 2 , 2 , 1.75 , 1.75 , 1.75 , 1.75 , 1.75 , 1.75 , 1.75 , 1.75 , 1.75 , 1.75 , 1.75 ],                 'Unemployment_Rate' : [ 5.3 , 5.3 , 5.3 , 5.3 , 5.4 , 5.6 , 5.5 , 5.5 , 5.5 , 5.6 , 5.7 , 5.9 , 6 , 5.9 , 5.8 , 6.1 , 6.2 , 6.1 , 6.1 , 6.1 , 5.9 , 6.2 , 6.2 , 6.1 ],                 '...

Write a python program to implement simple linear regression to predict house price

# Importing Libraries import pandas as pd import numpy as np from sklearn import linear_model from sklearn.model_selection import train_test_split #Load the Boston Housing Data Set from sklearn.datasets and print it #from sklearn.datasets import load_boston boston = pd.read_csv( 'H: \\ ML_Practical \\ CSV Files \\ housing.csv' ) print ( "************ Priting dataset ************" ) print (boston) #Transform the data set into a data frame # NOTE : boston.data = the data we want, #      boston.feature_names = the column names of the data #      boston.target = Our target variable or the price of the houses #boston_data=pd.read_csv('housing.csv',usecols=['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',     #'RAD', 'TAX', 'PTRATIO']) boston_feature_names = [ 'CRIM' , 'ZN' , 'INDUS' , 'CHAS' , 'NOX' , ...

Write a python program to encode categorical values into numeric values for given dataset

#importing libraries import pandas as pd # Loading dataset from csv file dataset = pd.read_csv( 'H: \\ ML_Practical \\ CSV Files \\ iris.csv' ) # Converting column to category using pandas dataset[ "variety" ] = dataset[ "variety" ].astype( 'category' ) print ( "******* Data types of fields in iris.csv ******" ) print (dataset.dtypes) print (dataset[ "variety" ]) # Assigning encoded variable to new column using cat.codes print ( "***** Adding new column ******" ) dataset[ 'variety_num' ] = dataset[ 'variety' ].cat.codes print (dataset.dtypes) print () print ( "_______________________________________________________" ) # To view full dataset pd.set_option( 'display.max_rows' , 150 ) pd.set_option( 'display.max_columns' , 7 ) print (dataset)

Write a Python program to prepare scatter plot (use Forge/iris Dataset)

  #Write a Python program to prepare scatter plot (use Forge/iris Dataset) #importing libraries import matplotlib.pyplot as plt import pandas as pd #loading data from csv file dataset = pd.read_csv( 'C: \\ Users \\ Desktop \\ Desktop \\ MERAJ \\ TYBCS \\ ML \\ ML Practical \\ CSV Files \\ iris.csv' ) # Creating 3 different dataframes for variety values setosa = dataset[dataset[ 'variety' ] == 'Setosa' ] virginica = dataset[dataset[ 'variety' ] == 'Virginica' ] versicolor = dataset[dataset[ 'variety' ] == 'Versicolor' ] print (dataset.describe()) fig,ax = plt.subplots( 1 , 2 , figsize = ( 9 , 9 )) setosa.plot( x = "sepal.length" , y = "sepal.width" , kind = "scatter" , ax = ax[ 0 ], label = 'Setosa' , color = 'r' ) versicolor.plot( x = "sepal.length" , y = "sepal.width" , kind = "scatter" , ax = ax[ 0 ], label = 'Versicolor' , color = 'b...