""" author: Wojciech Janota Laboratory: Lab 2, ex. 3 """ import pandas as pd import numpy as np input_data = pd.read_csv('Diet_R.csv', sep=',') input_data["gender"].replace(" ", np.NaN, inplace=True) input_data["gender"].fillna(input_data["gender"].mode()[0], inplace=True) summary_stats = input_data.describe() median = summary_stats.loc['50%', :] first_quartile = summary_stats.loc['25%', :] third_quartile = summary_stats.loc['75%', :] mean = summary_stats.loc['mean', :] std_dev = summary_stats.loc['std', :] print("Statistics") # Print the results print("Median:\n", median) print("\nFirst Quartile:\n", first_quartile) print("\nThird Quartile:\n", third_quartile) print("\nMean:\n", mean) print("\nStandard Deviation:\n", std_dev) with open("result.txt", "w") as output_file: result = f""" \t\tGeneral statistics: \tMedian: {median} \tFirst Quartile: {first_quartile} \tThird Quartile: {third_quartile} \tMean: {mean} \tStd. deviation: {std_dev} """ output_file.write(result) grouped_stats = input_data.groupby('gender').describe() print("Statistics grouped by gender") # Extract specific values median = grouped_stats.xs(key='50%', level=1, axis=1) first_quartile = grouped_stats.xs(key='25%', level=1, axis=1) third_quartile = grouped_stats.xs(key='75%', level=1, axis=1) mean = grouped_stats.xs(key='mean', level=1, axis=1) std_dev = grouped_stats.xs(key='std', level=1, axis=1) # Print the results print("Median:\n", median) print("\nFirst Quartile:\n", first_quartile) print("\nThird Quartile:\n", third_quartile) print("\nMean:\n", mean) print("\nStandard Deviation:\n", std_dev) with open("result.txt", "a") as output_file: result = f""" \n\n \t\tStatistics per gender: \tMedian: {median} \tFirst Quartile: {first_quartile} \tThird Quartile: {third_quartile} \tMean: {mean} \tStd. deviation: {std_dev} """ output_file.write(result)