import pandas as pd import numpy as np import random input_data = pd.read_csv('Automobile_data.csv', sep=',') print("---First task---") print("First 5 rows:") print(input_data.head(5)) print("Last 5 rows:") print(input_data.tail(5)) print("---Second task---") input_data.replace("?", np.NaN, inplace=True) input_data.replace("N.a", np.NaN, inplace=True) numeric_cols = input_data.select_dtypes(include=['number']).columns input_data[numeric_cols] = input_data[numeric_cols].fillna(input_data[numeric_cols].median()) non_numeric_cols = input_data.select_dtypes(exclude=['number']).columns input_data[non_numeric_cols] = input_data[non_numeric_cols].fillna(input_data[non_numeric_cols].mode()) input_data.to_csv("Automobile_Data_Cleaned.csv", sep=",", index=False) print("---Third task---") most_expensive_company = input_data.loc[input_data['price'].idxmax(), 'company'] print(f"Most expensive company: {most_expensive_company}") most_expensive_cars = input_data[input_data['price'] == input_data['price'].max()] print("\nThe most expensive cars are:") print(most_expensive_cars[['company', 'price']]) print("---Fourth task---") toyota_cars = input_data[input_data["company"] == "toyota"] print(toyota_cars) print("---Fifth task---") count_group_by_company = input_data.groupby(["company"])["index"].count() print(count_group_by_company) print("---Sixth task---") group_by_company = input_data.groupby("company")["price"].idxmax() group_by_company_df = input_data.loc[group_by_company] print(group_by_company_df) print("---Seventh task---") group_by_company = input_data.groupby("company")["average-mileage"].mean() group_by_company_df = group_by_company.reset_index() print(group_by_company_df) print("---Eigth task---") sorted_by_price = input_data.sort_values(by="price", ascending=True) print(sorted_by_price) print("---Ninth task---") GermanCars = {'Company': ['Ford', 'Mercedes', 'BMV', 'Audi'], 'Price': [23845, 171995, 135925, 71400]} japaneseCars = {'Company': ['Toyota', 'Honda', 'Nissan', 'Mitsubishi '], 'Price': [29995, 23600, 61500, 58900]} german_cars_df = pd.DataFrame(GermanCars) japanese_cars_df = pd.DataFrame(japaneseCars) print(german_cars_df) print(japanese_cars_df) print("---Tenth task---") Car_Price = {'Company': ['Toyota', 'Honda', 'BMV', 'Audi'], 'Price': [23845, 17995, 135925, 71400]} car_Horsepower = {'Company': ['Toyota', 'Honda', 'BMV', 'Audi'], 'horsepower': [141, 80, 182, 160]} car_price_df = pd.DataFrame(Car_Price) car_horsepower_df = pd.DataFrame(car_Horsepower) merged_df = pd.merge(car_price_df, car_horsepower_df, how="inner", on="Company") print(merged_df) print("---===Second dataset===---") second_dataset = pd.read_csv("world_alcohol.csv", sep=",") print("---Eleventh task---") print(second_dataset.sample(n=random.randint(1, 10))) print("---Twelfth task---") group_by_region = second_dataset.groupby(["WHO region", "Year"]) for region, year in group_by_region.groups: if year == 1989: print(group_by_region.get_group((region, year))) print("--Thirteenth task---") america_1985_data = second_dataset[(second_dataset["WHO region"] == "Americas") & (second_dataset["Year"] == 1985)] print(america_1985_data) print("---Fourteenth task---") data_14 = second_dataset[(second_dataset["Display Value"] >= 5) & (second_dataset["Beverage Types"] == "Beer")] print(data_14) print("---Fifteenth task---") data_wine = second_dataset[(second_dataset["Display Value"] >= 2) & (second_dataset["Beverage Types"] == "Wine")] print(data_wine)