ML

 1. Write an Python program to perform data preprocessing on given data set.


import pandas as pd


# Load the dataset from the Excel file into a DataFrame

file_path = 'C:\\Users\\datset_noise.xlsx'

df = pd.read_excel(file_path, engine='openpyxl')


print("Original DataFrame:")

print(df.to_string(index=False))


# Step 2: Remove rows with missing values

df_no_missing = df.dropna()

print("\nDataFrame after removing rows with missing values:")

print(df_no_missing.to_string(index=False))

print(f"\nTotal count after removing rows with missing values: {len(df_no_missing)}")


# Store rows with missing values

df_missing_rows = df[df.isna().any(axis=1)] 


# Step 3: Remove duplicate rows

df_no_duplicates = df_no_missing.drop_duplicates(subset=["Name","DOB","Age","Date of Joining"])

print("\nDataFrame after removing duplicate rows:")

print(df_no_duplicates.to_string(index=False))

print(f"\nTotal count after removing duplicate rows: {len(df_no_duplicates)}")


# Store duplicate rows

df_duplicate_rows = df_no_missing[df_no_missing.duplicated(subset=["Name","DOB","Age","Date of Joining"])]


# Step 4: Remove outliers (Assume Age > 60 as an outlier for example purposes)

df_no_outliers = df_no_duplicates[df_no_duplicates["Age"] <= 80 ]

print("\nDataFrame after removing outliers:")

print(df_no_outliers.to_string(index=False))

print(f"\nTotal count after removing outliers: {len(df_no_outliers)}")


# Store outlier rows

df_outlier_rows = df_no_duplicates[df_no_duplicates["Age"] > 80]


# Display the final cleaned DataFrame

print("\nFinal Cleaned DataFrame:")

print(df_no_outliers.to_string(index=False))

total_count = len(df_no_outliers)

print(f"\nTotal count of records in the cleaned data: {total_count}")


# Save the cleaned DataFrame to a CSV file

df_no_outliers.to_csv("Cleaned_Employee_Data.csv", index=False)


# Combine all deleted records

df_deleted_records = pd.concat([df_missing_rows, df_duplicate_rows, df_outlier_rows], ignore_index=True)


# Display deleted records

print("\nDeleted Records:")

print(df_deleted_records.to_string(index=False))

total_deleted = len(df) - len(df_no_outliers) 

print(f"\nTotal count of deleted records: {total_deleted}")




2. Write a Python program to illustrate Naive Bayes algorithm.


from sklearn.naive_bayes import GaussianNB

import numpy as np

X = np.array([[-3,7],[1,5], [1,2], [-2,0], [2,3], [-4,0], [-1,1], [1,1], [-2,2], [2,7], [-4,1], [-2,7]]) 

Y = np.array([3, 3, 3, 3, 4, 3, 3, 4, 3, 4, 4, 4])     

model = GaussianNB()

model.fit(X, Y)

prediction= model.predict([[-2,0]])

print(prediction)


**********************************************************

import numpy as np

from sklearn.naive_bayes import GaussianNB

 

# Sample dataset (Study hours, Past grades)

X = np.array([

 [1, 3], [2, 4], [3, 5], [4, 6], [5, 7], # Fail group

 [6, 8], [7, 9], [8, 9], [9, 10] # Pass group explain the code step by step

])

 

# Labels (0 = Fail, 1 = Pass)

Y = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1])

 

# Create and train the Naive Bayes classifier

model = GaussianNB()

model.fit(X, Y)

 

# Predict for a new student with 6 hours of study and past grade of 9

new_student = [[4,7]]

prediction = model.predict(new_student)





3. Write a Python program to illustrate Linear regression algorithm.


from sklearn.linear_model import LinearRegression

X = [[2001,5.2],[2002,5.1],[2003,5.1],[2004,4.9],[2005,5.0],[2006,5.1],[2007,5.4],[2008,5.6],[2009,5.9]]

Y = [2.5,2.52,2.54,2.48,3.28,3.2,3.15,3.26,3.29,]

len(X), len(Y)

LinR_model = LinearRegression()

LinR_model.fit(X, Y)

prediction = LinR_model.predict([[2010,5.1]])

print(prediction)

prediction_2011 = LinR_model.predict([[2011,5.2]])

print(prediction_2011)




4. Write a Python program to illustrate Logistic regression algorithm.


import numpy as np

from sklearn import linear_model

import matplotlib.pyplot as plt

X = [[165,19],[175,32],[136,35],[174,65],[141,28],[176,15],[131,32],

[166,6],[128,32],[179,10],[136,34],[186,2],[126,25],[176,28],[112,38],

[169,9],[171,36],[116,25],[196,25]]


Y = ['Man','Woman','Woman','Man','Woman','Man','Woman','Man','Woman',

'Man','Woman','Man','Woman','Woman','Woman','Man','Woman','Woman','Man']

data_feature_names = ['height','age']

LR_model = linear_model.LogisticRegression()

LR_model.fit(X, Y)

prediction = LR_model.predict([[169,19]])

print(prediction)

print('Accuracy on the training subset is:',format(LR_model.score(X,Y)))




5. Write a Python program to illustrate SVM algorithm .


from sklearn.svm import SVC

data_feature_names = ['height','age'] # This is nothing but the 2 Columns of my dataset

X = [[165,19],[175,32],[136,35],[174,65],[141,28],[176,15],[131,32],

[166,6],[128,32],[179,10],[136,34],[186,2],[126,25],[176,28],[112,38],

[169,9],[171,36],[116,25],[196,25]]

Y = ['Man','Woman','Woman','Man','Woman','Man','Woman','Man','Woman',

'Man','Woman','Man','Woman','Woman','Woman','Man','Woman','Woman','Man']

SVC_model = SVC(gamma='auto') 

SVC_model.fit(X, Y)

print(SVC_model.predict([[156, 53]]))

print('Accuracy on the training subset:',format(SVC_model.score(X,Y)))




6. Write a Python program to illustrate KNN algorithm.


import numpy as np # Numerical computations.

import matplotlib.pyplot as plt # Data Visualization

from sklearn.neighbors import NearestNeighbors

A = np.array(

              [[3.1, 2.3],[2.3, 4.2],[3.9, 3.5],[3.7, 6.4],[4.8, 1.9],[8.3, 3.1],[5.2, 7.5],[4.8, 4.7],[3.5, 5.1],[4.4, 2.9]]

            )

plt.figure()

plt.title('Input data')

plt.scatter(A[:,0], A[:,1], marker = 'x', s = 50, color = 'red')

test_data = [5.2, 2.9]

knn_model = NearestNeighbors(n_neighbors = 3, algorithm = 'auto')

knn_model.fit(A)

distances, indices = knn_model.kneighbors([test_data])

print(distances) 

print(indices)   

print("\nK Nearest Neighbors:")

for rank, index in enumerate(indices[0][:3], start = 1): # Iterates over the indices of the k-nearest neighbors. indices[0][:3] because indices is a 2D array which has only one row which contains the index number of the Nearest Neighbors.

    print(str(rank) + " is", A[index])   

    plt.figure()

plt.title('Nearest neighbors')

plt.scatter(A[:, 0], A[:, 1], marker = 'x', s = 100, color = 'red')

plt.scatter(test_data[0], test_data[1],marker = 'x', s = 100, color = 'blue')

plt.show()




7.Write a Python program to illustrate K-means algorithm.


from sklearn.cluster import KMeans

data_features = ["Height", "Age"]

X = [[165,19],[175,32],[136,35],[174,65],[141,28],[176,15],[131,32],

[166,6],[128,32],[179,10],[136,34],[186,20],[126,25],[176,28],[112,38],

[169,9],[171,36],[116,25],[196,25]]

model = KMeans(n_clusters=3)

# Fitting Model

model.fit(X)

cluster_labels = model.predict(X)

# Printing Predictions

print(cluster_labels)

x1 = [] # hieght

x2 = [] # age

for item in X:

    x1.append(item[0])

    x2.append(item[1])

print(x1)

print(x2)

import matplotlib.pyplot as plt

plt.scatter(x1,x2, c=model.labels_)

plt.show()




8. Write a Python program to illustrate Apriori algorithm.


#importing tht necessary library

from efficient_apriori import apriori

transactions=[

    ['butter','milk','bread'],

    ['butter','milk','apple'],

    ['bread','milk','banana'],

    ['milk','bread','butter']

]

itemsets,rules=apriori(transactions,min_support=0.3,min_confidence=0.8)

print("Frequent itemsets:")

for k,v in itemsets.items():

    print(f"level {k}:{v}")

print("\n Association Rules:")

for rule in rules:

    print(rule)

Comments

Popular posts from this blog

Cloud