You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

80 lines
1.8 KiB
Python

"""
author: Wojciech Janota
Laboratory: Lab 2, ex. 3
"""
import pandas as pd
import numpy as np
input_data = pd.read_csv('Diet_R.csv', sep=',')
input_data["gender"].replace(" ", np.NaN, inplace=True)
input_data["gender"].fillna(input_data["gender"].mode()[0], inplace=True)
summary_stats = input_data.describe()
median = summary_stats.loc['50%', :]
first_quartile = summary_stats.loc['25%', :]
third_quartile = summary_stats.loc['75%', :]
mean = summary_stats.loc['mean', :]
std_dev = summary_stats.loc['std', :]
print("Statistics")
# Print the results
print("Median:\n", median)
print("\nFirst Quartile:\n", first_quartile)
print("\nThird Quartile:\n", third_quartile)
print("\nMean:\n", mean)
print("\nStandard Deviation:\n", std_dev)
with open("result.txt", "w") as output_file:
result = f"""
\t\tGeneral statistics:
\tMedian:
{median}
\tFirst Quartile:
{first_quartile}
\tThird Quartile:
{third_quartile}
\tMean:
{mean}
\tStd. deviation:
{std_dev}
"""
output_file.write(result)
grouped_stats = input_data.groupby('gender').describe()
print("Statistics grouped by gender")
# Extract specific values
median = grouped_stats.xs(key='50%', level=1, axis=1)
first_quartile = grouped_stats.xs(key='25%', level=1, axis=1)
third_quartile = grouped_stats.xs(key='75%', level=1, axis=1)
mean = grouped_stats.xs(key='mean', level=1, axis=1)
std_dev = grouped_stats.xs(key='std', level=1, axis=1)
# Print the results
print("Median:\n", median)
print("\nFirst Quartile:\n", first_quartile)
print("\nThird Quartile:\n", third_quartile)
print("\nMean:\n", mean)
print("\nStandard Deviation:\n", std_dev)
with open("result.txt", "a") as output_file:
result = f"""
\n\n
\t\tStatistics per gender:
\tMedian:
{median}
\tFirst Quartile:
{first_quartile}
\tThird Quartile:
{third_quartile}
\tMean:
{mean}
\tStd. deviation:
{std_dev}
"""
output_file.write(result)