You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
80 lines
1.8 KiB
Python
80 lines
1.8 KiB
Python
10 months ago
|
"""
|
||
|
author: Wojciech Janota
|
||
|
Laboratory: Lab 2, ex. 3
|
||
|
"""
|
||
|
import pandas as pd
|
||
|
import numpy as np
|
||
|
|
||
|
input_data = pd.read_csv('Diet_R.csv', sep=',')
|
||
|
|
||
|
input_data["gender"].replace(" ", np.NaN, inplace=True)
|
||
|
input_data["gender"].fillna(input_data["gender"].mode()[0], inplace=True)
|
||
|
|
||
|
summary_stats = input_data.describe()
|
||
|
|
||
|
median = summary_stats.loc['50%', :]
|
||
|
first_quartile = summary_stats.loc['25%', :]
|
||
|
third_quartile = summary_stats.loc['75%', :]
|
||
|
mean = summary_stats.loc['mean', :]
|
||
|
std_dev = summary_stats.loc['std', :]
|
||
|
|
||
|
print("Statistics")
|
||
|
|
||
|
# Print the results
|
||
|
print("Median:\n", median)
|
||
|
print("\nFirst Quartile:\n", first_quartile)
|
||
|
print("\nThird Quartile:\n", third_quartile)
|
||
|
print("\nMean:\n", mean)
|
||
|
print("\nStandard Deviation:\n", std_dev)
|
||
|
|
||
|
with open("result.txt", "w") as output_file:
|
||
|
result = f"""
|
||
|
\t\tGeneral statistics:
|
||
|
\tMedian:
|
||
|
{median}
|
||
|
\tFirst Quartile:
|
||
|
{first_quartile}
|
||
|
\tThird Quartile:
|
||
|
{third_quartile}
|
||
|
\tMean:
|
||
|
{mean}
|
||
|
\tStd. deviation:
|
||
|
{std_dev}
|
||
|
"""
|
||
|
output_file.write(result)
|
||
|
|
||
|
grouped_stats = input_data.groupby('gender').describe()
|
||
|
|
||
|
print("Statistics grouped by gender")
|
||
|
|
||
|
# Extract specific values
|
||
|
median = grouped_stats.xs(key='50%', level=1, axis=1)
|
||
|
first_quartile = grouped_stats.xs(key='25%', level=1, axis=1)
|
||
|
third_quartile = grouped_stats.xs(key='75%', level=1, axis=1)
|
||
|
mean = grouped_stats.xs(key='mean', level=1, axis=1)
|
||
|
std_dev = grouped_stats.xs(key='std', level=1, axis=1)
|
||
|
|
||
|
# Print the results
|
||
|
print("Median:\n", median)
|
||
|
print("\nFirst Quartile:\n", first_quartile)
|
||
|
print("\nThird Quartile:\n", third_quartile)
|
||
|
print("\nMean:\n", mean)
|
||
|
print("\nStandard Deviation:\n", std_dev)
|
||
|
|
||
|
with open("result.txt", "a") as output_file:
|
||
|
result = f"""
|
||
|
\n\n
|
||
|
\t\tStatistics per gender:
|
||
|
\tMedian:
|
||
|
{median}
|
||
|
\tFirst Quartile:
|
||
|
{first_quartile}
|
||
|
\tThird Quartile:
|
||
|
{third_quartile}
|
||
|
\tMean:
|
||
|
{mean}
|
||
|
\tStd. deviation:
|
||
|
{std_dev}
|
||
|
"""
|
||
|
output_file.write(result)
|