This commit is contained in:
Wojciech Janota 2023-11-18 14:25:51 +01:00
commit d24a3848a2
13 changed files with 4338 additions and 0 deletions

3
lab2/.idea/.gitignore generated vendored Normal file
View File

@ -0,0 +1,3 @@
# Default ignored files
/shelf/
/workspace.xml

30
lab2/.idea/csv-editor.xml generated Normal file
View File

@ -0,0 +1,30 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CsvFileAttributes">
<option name="attributeMap">
<map>
<entry key="/Diet_R.csv">
<value>
<Attribute>
<option name="separator" value="," />
</Attribute>
</value>
</entry>
<entry key="/detected_outliers.csv">
<value>
<Attribute>
<option name="separator" value="," />
</Attribute>
</value>
</entry>
<entry key="/zb_6_cleaned_outliers.csv">
<value>
<Attribute>
<option name="separator" value="," />
</Attribute>
</value>
</entry>
</map>
</option>
</component>
</project>

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

10
lab2/.idea/lab2.iml generated Normal file
View File

@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/venv" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

7
lab2/.idea/misc.xml generated Normal file
View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Python 3.11 (lab2)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (lab2)" project-jdk-type="Python SDK" />
</project>

8
lab2/.idea/modules.xml generated Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/lab2.iml" filepath="$PROJECT_DIR$/.idea/lab2.iml" />
</modules>
</component>
</project>

79
lab2/Diet_R.csv Normal file
View File

@ -0,0 +1,79 @@
Person,gender,Age,Height,pre.weight,Diet,weight6weeks
25, ,41,171,60,2,60
26, ,32,174,103,2,103
1,0,22,159,58,1,54.2
2,0,46,192,60,1,54
3,0,55,170,64,1,63.3
4,0,33,171,64,1,61.1
5,0,50,170,65,1,62.2
6,0,50,201,66,1,64
7,0,37,174,67,1,65
8,0,28,176,69,1,60.5
9,0,28,165,70,1,68.1
10,0,45,165,70,1,66.9
11,0,60,173,72,1,70.5
12,0,48,156,72,1,69
13,0,41,163,72,1,68.4
14,0,37,167,82,1,81.1
27,0,44,174,58,2,60.1
28,0,37,172,58,2,56
29,0,41,165,59,2,57.3
30,0,43,171,61,2,56.7
31,0,20,169,62,2,55
32,0,51,174,63,2,62.4
33,0,31,163,63,2,60.3
34,0,54,173,63,2,59.4
35,0,50,166,65,2,62
36,0,48,163,66,2,64
37,0,16,165,68,2,63.8
38,0,37,167,68,2,63.3
39,0,30,161,76,2,72.7
40,0,29,169,77,2,77.5
52,0,51,165,60,3,53
53,0,35,169,62,3,56.4
54,0,21,159,64,3,60.6
55,0,22,169,65,3,58.2
56,0,36,160,66,3,58.2
57,0,20,169,67,3,61.6
58,0,35,163,67,3,60.2
59,0,45,155,69,3,61.8
60,0,58,141,70,3,63
61,0,37,170,70,3,62.7
62,0,31,170,72,3,71.1
63,0,35,171,72,3,64.4
64,0,56,171,73,3,68.9
65,0,48,153,75,3,68.7
66,0,41,157,76,3,71
15,1,39,168,71,1,71.6
16,1,31,158,72,1,70.9
17,1,40,173,74,1,69.5
18,1,50,160,78,1,73.9
19,1,43,162,80,1,71
20,1,25,165,80,1,77.6
21,1,52,177,83,1,79.1
22,1,42,166,85,1,81.5
23,1,39,166,87,1,81.9
24,1,40,190,88,1,84.5
41,1,51,191,71,2,66.8
42,1,38,199,75,2,72.6
43,1,54,196,75,2,69.2
44,1,33,190,76,2,72.5
45,1,45,160,78,2,72.7
46,1,37,194,78,2,76.3
47,1,44,163,79,2,73.6
48,1,40,171,79,2,72.9
49,1,37,198,79,2,71.1
50,1,39,180,80,2,81.4
51,1,31,182,80,2,75.7
67,1,36,155,71,3,68.5
68,1,47,179,73,3,72.1
69,1,29,166,76,3,72.5
70,1,37,173,78,3,77.5
71,1,31,177,78,3,75.2
72,1,26,179,78,3,69.4
73,1,40,179,79,3,74.5
74,1,35,183,83,3,80.2
75,1,49,177,84,3,79.9
76,1,28,164,85,3,79.7
77,1,40,167,87,3,77.8
78,1,51,175,88,3,81.9
1 Person gender Age Height pre.weight Diet weight6weeks
2 25 41 171 60 2 60
3 26 32 174 103 2 103
4 1 0 22 159 58 1 54.2
5 2 0 46 192 60 1 54
6 3 0 55 170 64 1 63.3
7 4 0 33 171 64 1 61.1
8 5 0 50 170 65 1 62.2
9 6 0 50 201 66 1 64
10 7 0 37 174 67 1 65
11 8 0 28 176 69 1 60.5
12 9 0 28 165 70 1 68.1
13 10 0 45 165 70 1 66.9
14 11 0 60 173 72 1 70.5
15 12 0 48 156 72 1 69
16 13 0 41 163 72 1 68.4
17 14 0 37 167 82 1 81.1
18 27 0 44 174 58 2 60.1
19 28 0 37 172 58 2 56
20 29 0 41 165 59 2 57.3
21 30 0 43 171 61 2 56.7
22 31 0 20 169 62 2 55
23 32 0 51 174 63 2 62.4
24 33 0 31 163 63 2 60.3
25 34 0 54 173 63 2 59.4
26 35 0 50 166 65 2 62
27 36 0 48 163 66 2 64
28 37 0 16 165 68 2 63.8
29 38 0 37 167 68 2 63.3
30 39 0 30 161 76 2 72.7
31 40 0 29 169 77 2 77.5
32 52 0 51 165 60 3 53
33 53 0 35 169 62 3 56.4
34 54 0 21 159 64 3 60.6
35 55 0 22 169 65 3 58.2
36 56 0 36 160 66 3 58.2
37 57 0 20 169 67 3 61.6
38 58 0 35 163 67 3 60.2
39 59 0 45 155 69 3 61.8
40 60 0 58 141 70 3 63
41 61 0 37 170 70 3 62.7
42 62 0 31 170 72 3 71.1
43 63 0 35 171 72 3 64.4
44 64 0 56 171 73 3 68.9
45 65 0 48 153 75 3 68.7
46 66 0 41 157 76 3 71
47 15 1 39 168 71 1 71.6
48 16 1 31 158 72 1 70.9
49 17 1 40 173 74 1 69.5
50 18 1 50 160 78 1 73.9
51 19 1 43 162 80 1 71
52 20 1 25 165 80 1 77.6
53 21 1 52 177 83 1 79.1
54 22 1 42 166 85 1 81.5
55 23 1 39 166 87 1 81.9
56 24 1 40 190 88 1 84.5
57 41 1 51 191 71 2 66.8
58 42 1 38 199 75 2 72.6
59 43 1 54 196 75 2 69.2
60 44 1 33 190 76 2 72.5
61 45 1 45 160 78 2 72.7
62 46 1 37 194 78 2 76.3
63 47 1 44 163 79 2 73.6
64 48 1 40 171 79 2 72.9
65 49 1 37 198 79 2 71.1
66 50 1 39 180 80 2 81.4
67 51 1 31 182 80 2 75.7
68 67 1 36 155 71 3 68.5
69 68 1 47 179 73 3 72.1
70 69 1 29 166 76 3 72.5
71 70 1 37 173 78 3 77.5
72 71 1 31 177 78 3 75.2
73 72 1 26 179 78 3 69.4
74 73 1 40 179 79 3 74.5
75 74 1 35 183 83 3 80.2
76 75 1 49 177 84 3 79.9
77 76 1 28 164 85 3 79.7
78 77 1 40 167 87 3 77.8
79 78 1 51 175 88 3 81.9

View File

@ -0,0 +1,44 @@
index,a_1,a_2,a_3,id
34,138.485899170175,122.929095022928,126.172417916436,1
109,120.530449941131,130.750879252726,124.048371043226,1
110,127.311311345106,127.032667937049,126.663304429044,1
155,129.368231100468,118.021745958559,125.253403391051,1
201,121.425022474716,131.859816486622,129.904568828899,1
205,110.102275454427,132.655114719978,127.775782042087,1
259,131.435016375916,115.572010552953,115.944265693,1
273,128.44891783545,125.560167069906,126.983268672444,1
306,127.654182919591,124.13389221102,134.420434743023,1
341,122.598465207028,121.078619208422,127.882567550894,1
356,121.663994537573,130.691895715687,118.183688992492,1
396,131.196588840929,115.495838124466,125.804556729539,1
436,117.347471617317,126.427664977539,129.097661740584,1
466,131.541919997184,121.394265742161,116.734247753745,1
472,120.583043526426,127.066196882151,110.922208446728,1
478,127.969482196787,121.999166952041,118.756977485369,1
479,113.977630762684,129.563917897202,125.23699868556,1
541,120.84664843893,118.400886985925,129.174917770816,1
562,132.485442804093,122.712546708689,122.083320312769,1
569,118.55534283095,127.254749784513,120.393102285104,1
630,124.91219273033,127.27165803814,130.218763471469,1
701,129.902362319478,130.256075853809,120.196016971286,1
770,114.598223298961,118.93540130129,128.488285033541,1
821,116.951391359065,129.571989973166,125.814814692545,1
931,112.985171033967,130.427336194935,125.512936296744,1
1053,124.338390603879,122.151317355276,130.616547836666,1
1065,130.539775478334,119.028932834111,117.282837634037,1
1099,129.037419669311,127.107507791353,124.180827870454,1
1196,115.599094881676,129.136779334136,126.332356371468,1
1205,134.778364721566,118.962106577339,121.889179688135,1
1333,130.712202709264,109.484613386081,123.551624037055,1
1384,124.707512593542,132.008238660605,111.096035260158,1
1397,115.380552840751,129.059473129573,122.765601709126,1
1453,122.777272707575,126.947398223481,128.895048251861,1
1492,119.526488992333,127.734375888783,127.742296671876,1
1532,123.577214053843,130.265344604706,121.513138303347,1
1557,124.097805702169,130.251148713943,119.098551931573,1
1559,120.627976386864,126.519214114161,126.228370549419,1
1812,132.037558446089,113.806457986605,111.833483565089,1
1827,122.441410727444,124.761007127792,128.370446549678,1
1885,123.52845529026,126.684983276764,126.669140290452,1
1923,128.581623737032,129.397198754531,123.452343679533,1
1959,120.664772641529,137.454306930058,118.606096825007,1
1 index a_1 a_2 a_3 id
2 34 138.485899170175 122.929095022928 126.172417916436 1
3 109 120.530449941131 130.750879252726 124.048371043226 1
4 110 127.311311345106 127.032667937049 126.663304429044 1
5 155 129.368231100468 118.021745958559 125.253403391051 1
6 201 121.425022474716 131.859816486622 129.904568828899 1
7 205 110.102275454427 132.655114719978 127.775782042087 1
8 259 131.435016375916 115.572010552953 115.944265693 1
9 273 128.44891783545 125.560167069906 126.983268672444 1
10 306 127.654182919591 124.13389221102 134.420434743023 1
11 341 122.598465207028 121.078619208422 127.882567550894 1
12 356 121.663994537573 130.691895715687 118.183688992492 1
13 396 131.196588840929 115.495838124466 125.804556729539 1
14 436 117.347471617317 126.427664977539 129.097661740584 1
15 466 131.541919997184 121.394265742161 116.734247753745 1
16 472 120.583043526426 127.066196882151 110.922208446728 1
17 478 127.969482196787 121.999166952041 118.756977485369 1
18 479 113.977630762684 129.563917897202 125.23699868556 1
19 541 120.84664843893 118.400886985925 129.174917770816 1
20 562 132.485442804093 122.712546708689 122.083320312769 1
21 569 118.55534283095 127.254749784513 120.393102285104 1
22 630 124.91219273033 127.27165803814 130.218763471469 1
23 701 129.902362319478 130.256075853809 120.196016971286 1
24 770 114.598223298961 118.93540130129 128.488285033541 1
25 821 116.951391359065 129.571989973166 125.814814692545 1
26 931 112.985171033967 130.427336194935 125.512936296744 1
27 1053 124.338390603879 122.151317355276 130.616547836666 1
28 1065 130.539775478334 119.028932834111 117.282837634037 1
29 1099 129.037419669311 127.107507791353 124.180827870454 1
30 1196 115.599094881676 129.136779334136 126.332356371468 1
31 1205 134.778364721566 118.962106577339 121.889179688135 1
32 1333 130.712202709264 109.484613386081 123.551624037055 1
33 1384 124.707512593542 132.008238660605 111.096035260158 1
34 1397 115.380552840751 129.059473129573 122.765601709126 1
35 1453 122.777272707575 126.947398223481 128.895048251861 1
36 1492 119.526488992333 127.734375888783 127.742296671876 1
37 1532 123.577214053843 130.265344604706 121.513138303347 1
38 1557 124.097805702169 130.251148713943 119.098551931573 1
39 1559 120.627976386864 126.519214114161 126.228370549419 1
40 1812 132.037558446089 113.806457986605 111.833483565089 1
41 1827 122.441410727444 124.761007127792 128.370446549678 1
42 1885 123.52845529026 126.684983276764 126.669140290452 1
43 1923 128.581623737032 129.397198754531 123.452343679533 1
44 1959 120.664772641529 137.454306930058 118.606096825007 1

79
lab2/lab1.py Normal file
View File

@ -0,0 +1,79 @@
"""
author: Wojciech Janota
Laboratory: Lab 2, ex. 3
"""
import pandas as pd
import numpy as np
input_data = pd.read_csv('Diet_R.csv', sep=',')
input_data["gender"].replace(" ", np.NaN, inplace=True)
input_data["gender"].fillna(input_data["gender"].mode()[0], inplace=True)
summary_stats = input_data.describe()
median = summary_stats.loc['50%', :]
first_quartile = summary_stats.loc['25%', :]
third_quartile = summary_stats.loc['75%', :]
mean = summary_stats.loc['mean', :]
std_dev = summary_stats.loc['std', :]
print("Statistics")
# Print the results
print("Median:\n", median)
print("\nFirst Quartile:\n", first_quartile)
print("\nThird Quartile:\n", third_quartile)
print("\nMean:\n", mean)
print("\nStandard Deviation:\n", std_dev)
with open("result.txt", "w") as output_file:
result = f"""
\t\tGeneral statistics:
\tMedian:
{median}
\tFirst Quartile:
{first_quartile}
\tThird Quartile:
{third_quartile}
\tMean:
{mean}
\tStd. deviation:
{std_dev}
"""
output_file.write(result)
grouped_stats = input_data.groupby('gender').describe()
print("Statistics grouped by gender")
# Extract specific values
median = grouped_stats.xs(key='50%', level=1, axis=1)
first_quartile = grouped_stats.xs(key='25%', level=1, axis=1)
third_quartile = grouped_stats.xs(key='75%', level=1, axis=1)
mean = grouped_stats.xs(key='mean', level=1, axis=1)
std_dev = grouped_stats.xs(key='std', level=1, axis=1)
# Print the results
print("Median:\n", median)
print("\nFirst Quartile:\n", first_quartile)
print("\nThird Quartile:\n", third_quartile)
print("\nMean:\n", mean)
print("\nStandard Deviation:\n", std_dev)
with open("result.txt", "a") as output_file:
result = f"""
\n\n
\t\tStatistics per gender:
\tMedian:
{median}
\tFirst Quartile:
{first_quartile}
\tThird Quartile:
{third_quartile}
\tMean:
{mean}
\tStd. deviation:
{std_dev}
"""
output_file.write(result)

40
lab2/lab2.py Normal file
View File

@ -0,0 +1,40 @@
"""
author: Wojciech Janota
laboratory: Lab 2, ex 2
"""
import pandas as pd
from scipy.stats import zscore
input_data = pd.read_csv("zb_6.txt", sep=',')
# replace all non-numeric values to NaN
input_data = input_data.map(lambda x: pd.to_numeric(x, errors='coerce'))
# replace all NaN values with median
input_data.fillna(input_data.median(), inplace=True)
# # Check for NaN values in prepared DataFrame
# nan_mask = input_data.isna().any(axis=1)
# rows_with_nan = input_data[nan_mask]
# print(rows_with_nan)
# Find outliers using z-score (interquartile range)
numeric_columns = input_data[['a_1', 'a_2', 'a_3']]
z_scores = zscore(numeric_columns)
# Threshold for finding outliers (over and under 3 std dev)
threshold = 3
# Matrix of detected outliers
outliers = (abs(z_scores > threshold).any(axis=1))
# Filter out only the rows with outliers
outliers_rows = input_data[outliers]
outliers_rows.to_csv("detected_outliers.csv", sep=',', index_label="index")
# Remove detected outliers form the original data and write to new file
cleaned_data = input_data.drop(outliers_rows.index)
cleaned_data.to_csv("zb_6_cleaned_outliers.csv", sep=',', index_label="index")

73
lab2/result.txt Normal file
View File

@ -0,0 +1,73 @@
General statistics:
Median:
Person 39.50
Age 39.00
Height 169.50
pre.weight 72.00
Diet 2.00
weight6weeks 68.95
Name: 50%, dtype: float64
First Quartile:
Person 20.25
Age 32.25
Height 164.25
pre.weight 66.00
Diet 1.00
weight6weeks 61.85
Name: 25%, dtype: float64
Third Quartile:
Person 58.750
Age 46.750
Height 174.750
pre.weight 78.000
Diet 3.000
weight6weeks 73.825
Name: 75%, dtype: float64
Mean:
Person 39.500000
Age 39.153846
Height 170.820513
pre.weight 72.525641
Diet 2.038462
weight6weeks 68.680769
Name: mean, dtype: float64
Std. deviation:
Person 22.660538
Age 9.815277
Height 11.276621
pre.weight 8.723344
Diet 0.812920
weight6weeks 8.924504
Name: std, dtype: float64
Statistics per gender:
Median:
Person Age Height pre.weight Diet weight6weeks
gender
0 33.0 37.0 169.0 67.0 2.0 62.4
1 47.0 39.0 175.0 79.0 2.0 73.9
First Quartile:
Person Age Height pre.weight Diet weight6weeks
gender
0 12.0 31.0 163.0 63.0 1.0 60.0
1 23.0 35.0 166.0 76.0 1.0 71.6
Third Quartile:
Person Age Height pre.weight Diet weight6weeks
gender
0 55.0 48.0 171.0 72.0 3.0 68.1
1 70.0 44.0 182.0 83.0 3.0 79.1
Mean:
Person Age Height pre.weight Diet weight6weeks
gender
0 33.555556 39.000000 167.577778 67.755556 2.022222 64.035556
1 47.606061 39.363636 175.242424 79.030303 2.060606 75.015152
Std. deviation:
Person Age Height pre.weight Diet weight6weeks
gender
0 21.363260 11.190499 9.306618 7.772783 0.811533 8.463670
1 22.149971 7.716938 12.326370 4.940172 0.826869 4.629398

2001
lab2/zb_6.txt Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff