You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

104 lines
3.4 KiB
Python

import pandas as pd
import numpy as np
import random
input_data = pd.read_csv('Automobile_data.csv', sep=',')
print("---First task---")
print("First 5 rows:")
print(input_data.head(5))
print("Last 5 rows:")
print(input_data.tail(5))
print("---Second task---")
input_data.replace("?", np.NaN, inplace=True)
input_data.replace("N.a", np.NaN, inplace=True)
numeric_cols = input_data.select_dtypes(include=['number']).columns
input_data[numeric_cols] = input_data[numeric_cols].fillna(input_data[numeric_cols].median())
non_numeric_cols = input_data.select_dtypes(exclude=['number']).columns
input_data[non_numeric_cols] = input_data[non_numeric_cols].fillna(input_data[non_numeric_cols].mode())
input_data.to_csv("Automobile_Data_Cleaned.csv", sep=",", index=False)
print("---Third task---")
most_expensive_company = input_data.loc[input_data['price'].idxmax(), 'company']
print(f"Most expensive company: {most_expensive_company}")
most_expensive_cars = input_data[input_data['price'] == input_data['price'].max()]
print("\nThe most expensive cars are:")
print(most_expensive_cars[['company', 'price']])
print("---Fourth task---")
toyota_cars = input_data[input_data["company"] == "toyota"]
print(toyota_cars)
print("---Fifth task---")
count_group_by_company = input_data.groupby(["company"])["index"].count()
print(count_group_by_company)
print("---Sixth task---")
group_by_company = input_data.groupby("company")["price"].idxmax()
group_by_company_df = input_data.loc[group_by_company]
print(group_by_company_df)
print("---Seventh task---")
group_by_company = input_data.groupby("company")["average-mileage"].mean()
group_by_company_df = group_by_company.reset_index()
print(group_by_company_df)
print("---Eigth task---")
sorted_by_price = input_data.sort_values(by="price", ascending=True)
print(sorted_by_price)
print("---Ninth task---")
GermanCars = {'Company': ['Ford', 'Mercedes', 'BMV', 'Audi'], 'Price': [23845, 171995, 135925, 71400]}
japaneseCars = {'Company': ['Toyota', 'Honda', 'Nissan', 'Mitsubishi '], 'Price': [29995, 23600, 61500, 58900]}
german_cars_df = pd.DataFrame(GermanCars)
japanese_cars_df = pd.DataFrame(japaneseCars)
print(german_cars_df)
print(japanese_cars_df)
print("---Tenth task---")
Car_Price = {'Company': ['Toyota', 'Honda', 'BMV', 'Audi'], 'Price': [23845, 17995, 135925, 71400]}
car_Horsepower = {'Company': ['Toyota', 'Honda', 'BMV', 'Audi'], 'horsepower': [141, 80, 182, 160]}
car_price_df = pd.DataFrame(Car_Price)
car_horsepower_df = pd.DataFrame(car_Horsepower)
merged_df = pd.merge(car_price_df, car_horsepower_df, how="inner", on="Company")
print(merged_df)
print("---===Second dataset===---")
second_dataset = pd.read_csv("world_alcohol.csv", sep=",")
print("---Eleventh task---")
print(second_dataset.sample(n=random.randint(1, 10)))
print("---Twelfth task---")
group_by_region = second_dataset.groupby(["WHO region", "Year"])
for region, year in group_by_region.groups:
if year == 1989:
print(group_by_region.get_group((region, year)))
print("--Thirteenth task---")
america_1985_data = second_dataset[(second_dataset["WHO region"] == "Americas") & (second_dataset["Year"] == 1985)]
print(america_1985_data)
print("---Fourteenth task---")
data_14 = second_dataset[(second_dataset["Display Value"] >= 5) & (second_dataset["Beverage Types"] == "Beer")]
print(data_14)
print("---Fifteenth task---")
data_wine = second_dataset[(second_dataset["Display Value"] >= 2) & (second_dataset["Beverage Types"] == "Wine")]
print(data_wine)