Lab2
This commit is contained in:
commit
d24a3848a2
3
lab2/.idea/.gitignore
generated
vendored
Normal file
3
lab2/.idea/.gitignore
generated
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
30
lab2/.idea/csv-editor.xml
generated
Normal file
30
lab2/.idea/csv-editor.xml
generated
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="CsvFileAttributes">
|
||||||
|
<option name="attributeMap">
|
||||||
|
<map>
|
||||||
|
<entry key="/Diet_R.csv">
|
||||||
|
<value>
|
||||||
|
<Attribute>
|
||||||
|
<option name="separator" value="," />
|
||||||
|
</Attribute>
|
||||||
|
</value>
|
||||||
|
</entry>
|
||||||
|
<entry key="/detected_outliers.csv">
|
||||||
|
<value>
|
||||||
|
<Attribute>
|
||||||
|
<option name="separator" value="," />
|
||||||
|
</Attribute>
|
||||||
|
</value>
|
||||||
|
</entry>
|
||||||
|
<entry key="/zb_6_cleaned_outliers.csv">
|
||||||
|
<value>
|
||||||
|
<Attribute>
|
||||||
|
<option name="separator" value="," />
|
||||||
|
</Attribute>
|
||||||
|
</value>
|
||||||
|
</entry>
|
||||||
|
</map>
|
||||||
|
</option>
|
||||||
|
</component>
|
||||||
|
</project>
|
6
lab2/.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
lab2/.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<settings>
|
||||||
|
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||||
|
<version value="1.0" />
|
||||||
|
</settings>
|
||||||
|
</component>
|
10
lab2/.idea/lab2.iml
generated
Normal file
10
lab2/.idea/lab2.iml
generated
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$">
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
||||||
|
</content>
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
</module>
|
7
lab2/.idea/misc.xml
generated
Normal file
7
lab2/.idea/misc.xml
generated
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="Black">
|
||||||
|
<option name="sdkName" value="Python 3.11 (lab2)" />
|
||||||
|
</component>
|
||||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (lab2)" project-jdk-type="Python SDK" />
|
||||||
|
</project>
|
8
lab2/.idea/modules.xml
generated
Normal file
8
lab2/.idea/modules.xml
generated
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/lab2.iml" filepath="$PROJECT_DIR$/.idea/lab2.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
79
lab2/Diet_R.csv
Normal file
79
lab2/Diet_R.csv
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
Person,gender,Age,Height,pre.weight,Diet,weight6weeks
|
||||||
|
25, ,41,171,60,2,60
|
||||||
|
26, ,32,174,103,2,103
|
||||||
|
1,0,22,159,58,1,54.2
|
||||||
|
2,0,46,192,60,1,54
|
||||||
|
3,0,55,170,64,1,63.3
|
||||||
|
4,0,33,171,64,1,61.1
|
||||||
|
5,0,50,170,65,1,62.2
|
||||||
|
6,0,50,201,66,1,64
|
||||||
|
7,0,37,174,67,1,65
|
||||||
|
8,0,28,176,69,1,60.5
|
||||||
|
9,0,28,165,70,1,68.1
|
||||||
|
10,0,45,165,70,1,66.9
|
||||||
|
11,0,60,173,72,1,70.5
|
||||||
|
12,0,48,156,72,1,69
|
||||||
|
13,0,41,163,72,1,68.4
|
||||||
|
14,0,37,167,82,1,81.1
|
||||||
|
27,0,44,174,58,2,60.1
|
||||||
|
28,0,37,172,58,2,56
|
||||||
|
29,0,41,165,59,2,57.3
|
||||||
|
30,0,43,171,61,2,56.7
|
||||||
|
31,0,20,169,62,2,55
|
||||||
|
32,0,51,174,63,2,62.4
|
||||||
|
33,0,31,163,63,2,60.3
|
||||||
|
34,0,54,173,63,2,59.4
|
||||||
|
35,0,50,166,65,2,62
|
||||||
|
36,0,48,163,66,2,64
|
||||||
|
37,0,16,165,68,2,63.8
|
||||||
|
38,0,37,167,68,2,63.3
|
||||||
|
39,0,30,161,76,2,72.7
|
||||||
|
40,0,29,169,77,2,77.5
|
||||||
|
52,0,51,165,60,3,53
|
||||||
|
53,0,35,169,62,3,56.4
|
||||||
|
54,0,21,159,64,3,60.6
|
||||||
|
55,0,22,169,65,3,58.2
|
||||||
|
56,0,36,160,66,3,58.2
|
||||||
|
57,0,20,169,67,3,61.6
|
||||||
|
58,0,35,163,67,3,60.2
|
||||||
|
59,0,45,155,69,3,61.8
|
||||||
|
60,0,58,141,70,3,63
|
||||||
|
61,0,37,170,70,3,62.7
|
||||||
|
62,0,31,170,72,3,71.1
|
||||||
|
63,0,35,171,72,3,64.4
|
||||||
|
64,0,56,171,73,3,68.9
|
||||||
|
65,0,48,153,75,3,68.7
|
||||||
|
66,0,41,157,76,3,71
|
||||||
|
15,1,39,168,71,1,71.6
|
||||||
|
16,1,31,158,72,1,70.9
|
||||||
|
17,1,40,173,74,1,69.5
|
||||||
|
18,1,50,160,78,1,73.9
|
||||||
|
19,1,43,162,80,1,71
|
||||||
|
20,1,25,165,80,1,77.6
|
||||||
|
21,1,52,177,83,1,79.1
|
||||||
|
22,1,42,166,85,1,81.5
|
||||||
|
23,1,39,166,87,1,81.9
|
||||||
|
24,1,40,190,88,1,84.5
|
||||||
|
41,1,51,191,71,2,66.8
|
||||||
|
42,1,38,199,75,2,72.6
|
||||||
|
43,1,54,196,75,2,69.2
|
||||||
|
44,1,33,190,76,2,72.5
|
||||||
|
45,1,45,160,78,2,72.7
|
||||||
|
46,1,37,194,78,2,76.3
|
||||||
|
47,1,44,163,79,2,73.6
|
||||||
|
48,1,40,171,79,2,72.9
|
||||||
|
49,1,37,198,79,2,71.1
|
||||||
|
50,1,39,180,80,2,81.4
|
||||||
|
51,1,31,182,80,2,75.7
|
||||||
|
67,1,36,155,71,3,68.5
|
||||||
|
68,1,47,179,73,3,72.1
|
||||||
|
69,1,29,166,76,3,72.5
|
||||||
|
70,1,37,173,78,3,77.5
|
||||||
|
71,1,31,177,78,3,75.2
|
||||||
|
72,1,26,179,78,3,69.4
|
||||||
|
73,1,40,179,79,3,74.5
|
||||||
|
74,1,35,183,83,3,80.2
|
||||||
|
75,1,49,177,84,3,79.9
|
||||||
|
76,1,28,164,85,3,79.7
|
||||||
|
77,1,40,167,87,3,77.8
|
||||||
|
78,1,51,175,88,3,81.9
|
|
44
lab2/detected_outliers.csv
Normal file
44
lab2/detected_outliers.csv
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
index,a_1,a_2,a_3,id
|
||||||
|
34,138.485899170175,122.929095022928,126.172417916436,1
|
||||||
|
109,120.530449941131,130.750879252726,124.048371043226,1
|
||||||
|
110,127.311311345106,127.032667937049,126.663304429044,1
|
||||||
|
155,129.368231100468,118.021745958559,125.253403391051,1
|
||||||
|
201,121.425022474716,131.859816486622,129.904568828899,1
|
||||||
|
205,110.102275454427,132.655114719978,127.775782042087,1
|
||||||
|
259,131.435016375916,115.572010552953,115.944265693,1
|
||||||
|
273,128.44891783545,125.560167069906,126.983268672444,1
|
||||||
|
306,127.654182919591,124.13389221102,134.420434743023,1
|
||||||
|
341,122.598465207028,121.078619208422,127.882567550894,1
|
||||||
|
356,121.663994537573,130.691895715687,118.183688992492,1
|
||||||
|
396,131.196588840929,115.495838124466,125.804556729539,1
|
||||||
|
436,117.347471617317,126.427664977539,129.097661740584,1
|
||||||
|
466,131.541919997184,121.394265742161,116.734247753745,1
|
||||||
|
472,120.583043526426,127.066196882151,110.922208446728,1
|
||||||
|
478,127.969482196787,121.999166952041,118.756977485369,1
|
||||||
|
479,113.977630762684,129.563917897202,125.23699868556,1
|
||||||
|
541,120.84664843893,118.400886985925,129.174917770816,1
|
||||||
|
562,132.485442804093,122.712546708689,122.083320312769,1
|
||||||
|
569,118.55534283095,127.254749784513,120.393102285104,1
|
||||||
|
630,124.91219273033,127.27165803814,130.218763471469,1
|
||||||
|
701,129.902362319478,130.256075853809,120.196016971286,1
|
||||||
|
770,114.598223298961,118.93540130129,128.488285033541,1
|
||||||
|
821,116.951391359065,129.571989973166,125.814814692545,1
|
||||||
|
931,112.985171033967,130.427336194935,125.512936296744,1
|
||||||
|
1053,124.338390603879,122.151317355276,130.616547836666,1
|
||||||
|
1065,130.539775478334,119.028932834111,117.282837634037,1
|
||||||
|
1099,129.037419669311,127.107507791353,124.180827870454,1
|
||||||
|
1196,115.599094881676,129.136779334136,126.332356371468,1
|
||||||
|
1205,134.778364721566,118.962106577339,121.889179688135,1
|
||||||
|
1333,130.712202709264,109.484613386081,123.551624037055,1
|
||||||
|
1384,124.707512593542,132.008238660605,111.096035260158,1
|
||||||
|
1397,115.380552840751,129.059473129573,122.765601709126,1
|
||||||
|
1453,122.777272707575,126.947398223481,128.895048251861,1
|
||||||
|
1492,119.526488992333,127.734375888783,127.742296671876,1
|
||||||
|
1532,123.577214053843,130.265344604706,121.513138303347,1
|
||||||
|
1557,124.097805702169,130.251148713943,119.098551931573,1
|
||||||
|
1559,120.627976386864,126.519214114161,126.228370549419,1
|
||||||
|
1812,132.037558446089,113.806457986605,111.833483565089,1
|
||||||
|
1827,122.441410727444,124.761007127792,128.370446549678,1
|
||||||
|
1885,123.52845529026,126.684983276764,126.669140290452,1
|
||||||
|
1923,128.581623737032,129.397198754531,123.452343679533,1
|
||||||
|
1959,120.664772641529,137.454306930058,118.606096825007,1
|
|
79
lab2/lab1.py
Normal file
79
lab2/lab1.py
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
"""
|
||||||
|
author: Wojciech Janota
|
||||||
|
Laboratory: Lab 2, ex. 3
|
||||||
|
"""
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
input_data = pd.read_csv('Diet_R.csv', sep=',')
|
||||||
|
|
||||||
|
input_data["gender"].replace(" ", np.NaN, inplace=True)
|
||||||
|
input_data["gender"].fillna(input_data["gender"].mode()[0], inplace=True)
|
||||||
|
|
||||||
|
summary_stats = input_data.describe()
|
||||||
|
|
||||||
|
median = summary_stats.loc['50%', :]
|
||||||
|
first_quartile = summary_stats.loc['25%', :]
|
||||||
|
third_quartile = summary_stats.loc['75%', :]
|
||||||
|
mean = summary_stats.loc['mean', :]
|
||||||
|
std_dev = summary_stats.loc['std', :]
|
||||||
|
|
||||||
|
print("Statistics")
|
||||||
|
|
||||||
|
# Print the results
|
||||||
|
print("Median:\n", median)
|
||||||
|
print("\nFirst Quartile:\n", first_quartile)
|
||||||
|
print("\nThird Quartile:\n", third_quartile)
|
||||||
|
print("\nMean:\n", mean)
|
||||||
|
print("\nStandard Deviation:\n", std_dev)
|
||||||
|
|
||||||
|
with open("result.txt", "w") as output_file:
|
||||||
|
result = f"""
|
||||||
|
\t\tGeneral statistics:
|
||||||
|
\tMedian:
|
||||||
|
{median}
|
||||||
|
\tFirst Quartile:
|
||||||
|
{first_quartile}
|
||||||
|
\tThird Quartile:
|
||||||
|
{third_quartile}
|
||||||
|
\tMean:
|
||||||
|
{mean}
|
||||||
|
\tStd. deviation:
|
||||||
|
{std_dev}
|
||||||
|
"""
|
||||||
|
output_file.write(result)
|
||||||
|
|
||||||
|
grouped_stats = input_data.groupby('gender').describe()
|
||||||
|
|
||||||
|
print("Statistics grouped by gender")
|
||||||
|
|
||||||
|
# Extract specific values
|
||||||
|
median = grouped_stats.xs(key='50%', level=1, axis=1)
|
||||||
|
first_quartile = grouped_stats.xs(key='25%', level=1, axis=1)
|
||||||
|
third_quartile = grouped_stats.xs(key='75%', level=1, axis=1)
|
||||||
|
mean = grouped_stats.xs(key='mean', level=1, axis=1)
|
||||||
|
std_dev = grouped_stats.xs(key='std', level=1, axis=1)
|
||||||
|
|
||||||
|
# Print the results
|
||||||
|
print("Median:\n", median)
|
||||||
|
print("\nFirst Quartile:\n", first_quartile)
|
||||||
|
print("\nThird Quartile:\n", third_quartile)
|
||||||
|
print("\nMean:\n", mean)
|
||||||
|
print("\nStandard Deviation:\n", std_dev)
|
||||||
|
|
||||||
|
with open("result.txt", "a") as output_file:
|
||||||
|
result = f"""
|
||||||
|
\n\n
|
||||||
|
\t\tStatistics per gender:
|
||||||
|
\tMedian:
|
||||||
|
{median}
|
||||||
|
\tFirst Quartile:
|
||||||
|
{first_quartile}
|
||||||
|
\tThird Quartile:
|
||||||
|
{third_quartile}
|
||||||
|
\tMean:
|
||||||
|
{mean}
|
||||||
|
\tStd. deviation:
|
||||||
|
{std_dev}
|
||||||
|
"""
|
||||||
|
output_file.write(result)
|
40
lab2/lab2.py
Normal file
40
lab2/lab2.py
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
"""
|
||||||
|
author: Wojciech Janota
|
||||||
|
laboratory: Lab 2, ex 2
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from scipy.stats import zscore
|
||||||
|
|
||||||
|
input_data = pd.read_csv("zb_6.txt", sep=',')
|
||||||
|
|
||||||
|
# replace all non-numeric values to NaN
|
||||||
|
input_data = input_data.map(lambda x: pd.to_numeric(x, errors='coerce'))
|
||||||
|
|
||||||
|
# replace all NaN values with median
|
||||||
|
input_data.fillna(input_data.median(), inplace=True)
|
||||||
|
|
||||||
|
# # Check for NaN values in prepared DataFrame
|
||||||
|
# nan_mask = input_data.isna().any(axis=1)
|
||||||
|
# rows_with_nan = input_data[nan_mask]
|
||||||
|
# print(rows_with_nan)
|
||||||
|
|
||||||
|
# Find outliers using z-score (interquartile range)
|
||||||
|
|
||||||
|
numeric_columns = input_data[['a_1', 'a_2', 'a_3']]
|
||||||
|
|
||||||
|
z_scores = zscore(numeric_columns)
|
||||||
|
|
||||||
|
# Threshold for finding outliers (over and under 3 std dev)
|
||||||
|
threshold = 3
|
||||||
|
|
||||||
|
# Matrix of detected outliers
|
||||||
|
outliers = (abs(z_scores > threshold).any(axis=1))
|
||||||
|
# Filter out only the rows with outliers
|
||||||
|
outliers_rows = input_data[outliers]
|
||||||
|
|
||||||
|
outliers_rows.to_csv("detected_outliers.csv", sep=',', index_label="index")
|
||||||
|
|
||||||
|
# Remove detected outliers form the original data and write to new file
|
||||||
|
cleaned_data = input_data.drop(outliers_rows.index)
|
||||||
|
cleaned_data.to_csv("zb_6_cleaned_outliers.csv", sep=',', index_label="index")
|
73
lab2/result.txt
Normal file
73
lab2/result.txt
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
|
||||||
|
General statistics:
|
||||||
|
Median:
|
||||||
|
Person 39.50
|
||||||
|
Age 39.00
|
||||||
|
Height 169.50
|
||||||
|
pre.weight 72.00
|
||||||
|
Diet 2.00
|
||||||
|
weight6weeks 68.95
|
||||||
|
Name: 50%, dtype: float64
|
||||||
|
First Quartile:
|
||||||
|
Person 20.25
|
||||||
|
Age 32.25
|
||||||
|
Height 164.25
|
||||||
|
pre.weight 66.00
|
||||||
|
Diet 1.00
|
||||||
|
weight6weeks 61.85
|
||||||
|
Name: 25%, dtype: float64
|
||||||
|
Third Quartile:
|
||||||
|
Person 58.750
|
||||||
|
Age 46.750
|
||||||
|
Height 174.750
|
||||||
|
pre.weight 78.000
|
||||||
|
Diet 3.000
|
||||||
|
weight6weeks 73.825
|
||||||
|
Name: 75%, dtype: float64
|
||||||
|
Mean:
|
||||||
|
Person 39.500000
|
||||||
|
Age 39.153846
|
||||||
|
Height 170.820513
|
||||||
|
pre.weight 72.525641
|
||||||
|
Diet 2.038462
|
||||||
|
weight6weeks 68.680769
|
||||||
|
Name: mean, dtype: float64
|
||||||
|
Std. deviation:
|
||||||
|
Person 22.660538
|
||||||
|
Age 9.815277
|
||||||
|
Height 11.276621
|
||||||
|
pre.weight 8.723344
|
||||||
|
Diet 0.812920
|
||||||
|
weight6weeks 8.924504
|
||||||
|
Name: std, dtype: float64
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Statistics per gender:
|
||||||
|
Median:
|
||||||
|
Person Age Height pre.weight Diet weight6weeks
|
||||||
|
gender
|
||||||
|
0 33.0 37.0 169.0 67.0 2.0 62.4
|
||||||
|
1 47.0 39.0 175.0 79.0 2.0 73.9
|
||||||
|
First Quartile:
|
||||||
|
Person Age Height pre.weight Diet weight6weeks
|
||||||
|
gender
|
||||||
|
0 12.0 31.0 163.0 63.0 1.0 60.0
|
||||||
|
1 23.0 35.0 166.0 76.0 1.0 71.6
|
||||||
|
Third Quartile:
|
||||||
|
Person Age Height pre.weight Diet weight6weeks
|
||||||
|
gender
|
||||||
|
0 55.0 48.0 171.0 72.0 3.0 68.1
|
||||||
|
1 70.0 44.0 182.0 83.0 3.0 79.1
|
||||||
|
Mean:
|
||||||
|
Person Age Height pre.weight Diet weight6weeks
|
||||||
|
gender
|
||||||
|
0 33.555556 39.000000 167.577778 67.755556 2.022222 64.035556
|
||||||
|
1 47.606061 39.363636 175.242424 79.030303 2.060606 75.015152
|
||||||
|
Std. deviation:
|
||||||
|
Person Age Height pre.weight Diet weight6weeks
|
||||||
|
gender
|
||||||
|
0 21.363260 11.190499 9.306618 7.772783 0.811533 8.463670
|
||||||
|
1 22.149971 7.716938 12.326370 4.940172 0.826869 4.629398
|
||||||
|
|
2001
lab2/zb_6.txt
Normal file
2001
lab2/zb_6.txt
Normal file
File diff suppressed because it is too large
Load Diff
1958
lab2/zb_6_cleaned_outliers.csv
Normal file
1958
lab2/zb_6_cleaned_outliers.csv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user