You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
104 lines
3.4 KiB
Python
104 lines
3.4 KiB
Python
10 months ago
|
import pandas as pd
|
||
|
import numpy as np
|
||
|
import random
|
||
|
|
||
|
input_data = pd.read_csv('Automobile_data.csv', sep=',')
|
||
|
|
||
|
print("---First task---")
|
||
|
print("First 5 rows:")
|
||
|
print(input_data.head(5))
|
||
|
print("Last 5 rows:")
|
||
|
print(input_data.tail(5))
|
||
|
|
||
|
print("---Second task---")
|
||
|
input_data.replace("?", np.NaN, inplace=True)
|
||
|
input_data.replace("N.a", np.NaN, inplace=True)
|
||
|
|
||
|
numeric_cols = input_data.select_dtypes(include=['number']).columns
|
||
|
input_data[numeric_cols] = input_data[numeric_cols].fillna(input_data[numeric_cols].median())
|
||
|
|
||
|
non_numeric_cols = input_data.select_dtypes(exclude=['number']).columns
|
||
|
input_data[non_numeric_cols] = input_data[non_numeric_cols].fillna(input_data[non_numeric_cols].mode())
|
||
|
|
||
|
input_data.to_csv("Automobile_Data_Cleaned.csv", sep=",", index=False)
|
||
|
|
||
|
print("---Third task---")
|
||
|
|
||
|
most_expensive_company = input_data.loc[input_data['price'].idxmax(), 'company']
|
||
|
print(f"Most expensive company: {most_expensive_company}")
|
||
|
most_expensive_cars = input_data[input_data['price'] == input_data['price'].max()]
|
||
|
print("\nThe most expensive cars are:")
|
||
|
print(most_expensive_cars[['company', 'price']])
|
||
|
|
||
|
print("---Fourth task---")
|
||
|
toyota_cars = input_data[input_data["company"] == "toyota"]
|
||
|
print(toyota_cars)
|
||
|
|
||
|
print("---Fifth task---")
|
||
|
count_group_by_company = input_data.groupby(["company"])["index"].count()
|
||
|
print(count_group_by_company)
|
||
|
|
||
|
print("---Sixth task---")
|
||
|
group_by_company = input_data.groupby("company")["price"].idxmax()
|
||
|
group_by_company_df = input_data.loc[group_by_company]
|
||
|
print(group_by_company_df)
|
||
|
|
||
|
print("---Seventh task---")
|
||
|
group_by_company = input_data.groupby("company")["average-mileage"].mean()
|
||
|
group_by_company_df = group_by_company.reset_index()
|
||
|
print(group_by_company_df)
|
||
|
|
||
|
print("---Eigth task---")
|
||
|
|
||
|
sorted_by_price = input_data.sort_values(by="price", ascending=True)
|
||
|
print(sorted_by_price)
|
||
|
|
||
|
print("---Ninth task---")
|
||
|
GermanCars = {'Company': ['Ford', 'Mercedes', 'BMV', 'Audi'], 'Price': [23845, 171995, 135925, 71400]}
|
||
|
japaneseCars = {'Company': ['Toyota', 'Honda', 'Nissan', 'Mitsubishi '], 'Price': [29995, 23600, 61500, 58900]}
|
||
|
|
||
|
german_cars_df = pd.DataFrame(GermanCars)
|
||
|
japanese_cars_df = pd.DataFrame(japaneseCars)
|
||
|
|
||
|
print(german_cars_df)
|
||
|
print(japanese_cars_df)
|
||
|
|
||
|
print("---Tenth task---")
|
||
|
|
||
|
Car_Price = {'Company': ['Toyota', 'Honda', 'BMV', 'Audi'], 'Price': [23845, 17995, 135925, 71400]}
|
||
|
car_Horsepower = {'Company': ['Toyota', 'Honda', 'BMV', 'Audi'], 'horsepower': [141, 80, 182, 160]}
|
||
|
|
||
|
car_price_df = pd.DataFrame(Car_Price)
|
||
|
car_horsepower_df = pd.DataFrame(car_Horsepower)
|
||
|
|
||
|
merged_df = pd.merge(car_price_df, car_horsepower_df, how="inner", on="Company")
|
||
|
|
||
|
print(merged_df)
|
||
|
|
||
|
print("---===Second dataset===---")
|
||
|
|
||
|
second_dataset = pd.read_csv("world_alcohol.csv", sep=",")
|
||
|
|
||
|
print("---Eleventh task---")
|
||
|
print(second_dataset.sample(n=random.randint(1, 10)))
|
||
|
|
||
|
print("---Twelfth task---")
|
||
|
|
||
|
group_by_region = second_dataset.groupby(["WHO region", "Year"])
|
||
|
for region, year in group_by_region.groups:
|
||
|
if year == 1989:
|
||
|
print(group_by_region.get_group((region, year)))
|
||
|
|
||
|
print("--Thirteenth task---")
|
||
|
|
||
|
america_1985_data = second_dataset[(second_dataset["WHO region"] == "Americas") & (second_dataset["Year"] == 1985)]
|
||
|
print(america_1985_data)
|
||
|
|
||
|
print("---Fourteenth task---")
|
||
|
|
||
|
data_14 = second_dataset[(second_dataset["Display Value"] >= 5) & (second_dataset["Beverage Types"] == "Beer")]
|
||
|
print(data_14)
|
||
|
|
||
|
print("---Fifteenth task---")
|
||
|
data_wine = second_dataset[(second_dataset["Display Value"] >= 2) & (second_dataset["Beverage Types"] == "Wine")]
|
||
|
print(data_wine)
|