add display script, fix critical cleanup bug, first exports

This commit is contained in:
Federico Justus Denkena 2025-06-12 11:04:24 +02:00
parent fb104f9790
commit 1859f0913b
Signed by: f-denkena
GPG Key ID: 34D3C40435BDAACD
9 changed files with 67 additions and 33 deletions

1
.gitignore vendored
View File

@ -7,3 +7,4 @@ NONE
data/ data/
log_importer log_importer
secrets.py secrets.py
share/

View File

@ -1,34 +1,28 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import argparse import argparse
import csv import csv
import sys
import json
import logging import logging
import requests
import hashlib
import io
import datetime
import pandas as pd
from rich import progress from rich import progress
from rich.logging import RichHandler from rich.logging import RichHandler
from rich.console import Console from rich.console import Console
from rich.traceback import install from rich.traceback import install
install(show_locals=True, locals_max_length=150, locals_max_string=300) install(show_locals=True, locals_max_length=150, locals_max_string=300)
global YEARS global YEARS
YEARS = [2020, 2021, 2022, 2023, 2024] YEARS = [2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024]
global INFLATION_RATES global INFLATION_RATES
#Harmonisierter Verbraucherpreisindex des statistischen Bundesamts
INFLATION_RATES = { INFLATION_RATES = {
2014: 0.8, 2014: 0.008,
2015: 0.7, 2015: 0.007,
2016: 0.4, 2016: 0.004,
2017: 1.7, 2017: 0.017,
2018: 1.9, 2018: 0.019,
2019: 1.4, 2019: 0.014,
2020: 0.4, 2020: 0.004,
2021: 3.2, 2021: 0.032,
2022: 8.7, 2022: 0.087,
2023: 6.0, 2023: 0.060,
2024: 2.5, 2024: 0.025,
} }
class Company: class Company:
@ -78,11 +72,8 @@ class Company:
def calculate_all_tax(self) -> None: def calculate_all_tax(self) -> None:
"""Calculate tax for all relevant years.""" """Calculate tax for all relevant years."""
self.calculate_tax(2020) for year in YEARS:
self.calculate_tax(2021) self.calculate_tax(year)
self.calculate_tax(2022)
self.calculate_tax(2023)
self.calculate_tax(2024)
def calculate_tax(self, year: int) -> None: def calculate_tax(self, year: int) -> None:
"""Calculate simple tax from provided values.""" """Calculate simple tax from provided values."""
@ -111,14 +102,7 @@ class Company:
def write(self) -> None: def write(self) -> None:
"""Write the current dataset to CSV""" """Write the current dataset to CSV"""
with open(self.report.output) as out_csv: self.writer.writerow(self.cleaned_data)
try:
output_reader = pd.read_csv(out_csv)
bvd_id = output_reader["bvd_id"]
if not self.cleaned_data.get("bvd_id") in bvd_id:
self.writer.writerow(self.cleaned_data)
except pd.errors.EmptyDataError:
self.writer.writerow(self.cleaned_data)
class dataimport: class dataimport:

41
display_script.py Executable file
View File

@ -0,0 +1,41 @@
#!/usr/bin/env python3
from pandas import read_csv
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 1200
import numpy as np
import pathlib
class display:
def __init__(self, filename) -> None:
"""Start the actual import process. Seperates process and setup."""
with open(filename, mode='r', encoding='utf-8-sig', newline='') as csv_file:
reader = read_csv(csv_file)
plt.style.use('_mpl-gallery')
for style in ["nom", "real", "realeff"]:
fig, ax = plt.subplots(figsize=(12, 6))
ob1 = np.asarray(reader[f"{style}tax2014"].dropna())
ob2 = np.asarray(reader[f"{style}tax2015"].dropna())
ob3 = np.asarray(reader[f"{style}tax2016"].dropna())
ob4 = np.asarray(reader[f"{style}tax2017"].dropna())
ob5 = np.asarray(reader[f"{style}tax2018"].dropna())
ob6 = np.asarray(reader[f"{style}tax2019"].dropna())
ob7 = np.asarray(reader[f"{style}tax2020"].dropna())
ob8 = np.asarray(reader[f"{style}tax2021"].dropna())
ob9 = np.asarray(reader[f"{style}tax2022"].dropna())
ob10 = np.asarray(reader[f"{style}tax2023"].dropna())
ob11 = np.asarray(reader[f"{style}tax2024"].dropna())
x = [ob1, ob2, ob3, ob4, ob5, ob6, ob7, ob8, ob9, ob10, ob11]
ax.boxplot(x, positions=[2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024], patch_artist=True,
showmeans=False, showfliers=False,
medianprops={"color": "white", "linewidth": 0.5},
boxprops={"facecolor": "C0", "edgecolor": "white",
"linewidth": 0.5},
whiskerprops={"color": "C0", "linewidth": 1.5},
capprops={"color": "C0", "linewidth": 1.5})
ax.yaxis.set_major_formatter(mtick.PercentFormatter(1,0))
plt.show()
display(pathlib.Path('/home/user/bachelorarbeit_importer/data/', 'cleaned_st2.csv'))

1
export_cleaned.csv Normal file
View File

@ -0,0 +1 @@
bvd_id,name,gv2014,gn2014,st2014,ek2014,nomtax2014,realtax2014,realefftax2014,gv2015,gn2015,st2015,ek2015,nomtax2015,realtax2015,realefftax2015,gv2016,gn2016,st2016,ek2016,nomtax2016,realtax2016,realefftax2016,gv2017,gn2017,st2017,ek2017,nomtax2017,realtax2017,realefftax2017,gv2018,gn2018,st2018,ek2018,nomtax2018,realtax2018,realefftax2018,gv2019,gn2019,st2019,ek2019,nomtax2019,realtax2019,realefftax2019,gv2020,gn2020,st2020,ek2020,nomtax2020,realtax2020,realefftax2020,gv2021,gn2021,st2021,ek2021,nomtax2021,realtax2021,realefftax2021,gv2022,gn2022,st2022,ek2022,nomtax2022,realtax2022,realefftax2022,gv2023,gn2023,st2023,ek2023,nomtax2023,realtax2023,realefftax2023,gv2024,gn2024,st2024,ek2024,nomtax2024,realtax2024,realefftax2024
1 bvd_id name gv2014 gn2014 st2014 ek2014 nomtax2014 realtax2014 realefftax2014 gv2015 gn2015 st2015 ek2015 nomtax2015 realtax2015 realefftax2015 gv2016 gn2016 st2016 ek2016 nomtax2016 realtax2016 realefftax2016 gv2017 gn2017 st2017 ek2017 nomtax2017 realtax2017 realefftax2017 gv2018 gn2018 st2018 ek2018 nomtax2018 realtax2018 realefftax2018 gv2019 gn2019 st2019 ek2019 nomtax2019 realtax2019 realefftax2019 gv2020 gn2020 st2020 ek2020 nomtax2020 realtax2020 realefftax2020 gv2021 gn2021 st2021 ek2021 nomtax2021 realtax2021 realefftax2021 gv2022 gn2022 st2022 ek2022 nomtax2022 realtax2022 realefftax2022 gv2023 gn2023 st2023 ek2023 nomtax2023 realtax2023 realefftax2023 gv2024 gn2024 st2024 ek2024 nomtax2024 realtax2024 realefftax2024

1
pgpass
View File

@ -1 +0,0 @@
denkena-consulting.com:5432:bachelorarbeit:bachelorarbeit_w:TWVn2kDqhtGOXN+1qb0Nfb/PLjQI4a22/xtIGv/T2kpf

BIN
plot_export/nomial_tax.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 606 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 574 KiB

BIN
plot_export/real_tax.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 606 KiB

View File

@ -3,19 +3,27 @@ binapy==0.8.0
certifi==2025.1.31 certifi==2025.1.31
cffi==1.17.1 cffi==1.17.1
charset-normalizer==3.4.1 charset-normalizer==3.4.1
contourpy==1.3.2
cryptography==44.0.1 cryptography==44.0.1
cycler==0.12.1
fonttools==4.58.2
furl==2.1.3 furl==2.1.3
idna==3.10 idna==3.10
jwskate==0.11.1 jwskate==0.11.1
kiwisolver==1.4.8
markdown-it-py==3.0.0 markdown-it-py==3.0.0
matplotlib==3.10.3
mdurl==0.1.2 mdurl==0.1.2
numpy==2.3.0 numpy==2.3.0
orderedmultidict==1.0.1 orderedmultidict==1.0.1
packaging==25.0
pandas==2.3.0 pandas==2.3.0
pillow==11.2.1
psycopg==3.2.9 psycopg==3.2.9
psycopg-pool==3.2.6 psycopg-pool==3.2.6
pycparser==2.22 pycparser==2.22
Pygments==2.19.1 Pygments==2.19.1
pyparsing==3.2.3
python-dateutil==2.9.0.post0 python-dateutil==2.9.0.post0
pytz==2025.2 pytz==2025.2
requests==2.32.3 requests==2.32.3