diff --git a/.gitignore b/.gitignore index 03a876f..801e060 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ NONE data/ log_importer secrets.py +share/ diff --git a/cleanup_script.py b/cleanup_script.py index a4e57aa..aa2b338 100755 --- a/cleanup_script.py +++ b/cleanup_script.py @@ -1,34 +1,28 @@ #!/usr/bin/env python3 import argparse import csv -import sys -import json import logging -import requests -import hashlib -import io -import datetime -import pandas as pd from rich import progress from rich.logging import RichHandler from rich.console import Console from rich.traceback import install install(show_locals=True, locals_max_length=150, locals_max_string=300) global YEARS -YEARS = [2020, 2021, 2022, 2023, 2024] +YEARS = [2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024] global INFLATION_RATES +#Harmonisierter Verbraucherpreisindex des statistischen Bundesamts INFLATION_RATES = { - 2014: 0.8, - 2015: 0.7, - 2016: 0.4, - 2017: 1.7, - 2018: 1.9, - 2019: 1.4, - 2020: 0.4, - 2021: 3.2, - 2022: 8.7, - 2023: 6.0, - 2024: 2.5, + 2014: 0.008, + 2015: 0.007, + 2016: 0.004, + 2017: 0.017, + 2018: 0.019, + 2019: 0.014, + 2020: 0.004, + 2021: 0.032, + 2022: 0.087, + 2023: 0.060, + 2024: 0.025, } class Company: @@ -78,11 +72,8 @@ class Company: def calculate_all_tax(self) -> None: """Calculate tax for all relevant years.""" - self.calculate_tax(2020) - self.calculate_tax(2021) - self.calculate_tax(2022) - self.calculate_tax(2023) - self.calculate_tax(2024) + for year in YEARS: + self.calculate_tax(year) def calculate_tax(self, year: int) -> None: """Calculate simple tax from provided values.""" @@ -111,14 +102,7 @@ class Company: def write(self) -> None: """Write the current dataset to CSV""" - with open(self.report.output) as out_csv: - try: - output_reader = pd.read_csv(out_csv) - bvd_id = output_reader["bvd_id"] - if not self.cleaned_data.get("bvd_id") in bvd_id: - self.writer.writerow(self.cleaned_data) - except pd.errors.EmptyDataError: - self.writer.writerow(self.cleaned_data) + self.writer.writerow(self.cleaned_data) class dataimport: diff --git a/display_script.py b/display_script.py new file mode 100755 index 0000000..e28785d --- /dev/null +++ b/display_script.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +from pandas import read_csv +import matplotlib.pyplot as plt +import matplotlib.ticker as mtick +import matplotlib as mpl +mpl.rcParams['figure.dpi'] = 1200 +import numpy as np +import pathlib + +class display: + def __init__(self, filename) -> None: + """Start the actual import process. Seperates process and setup.""" + with open(filename, mode='r', encoding='utf-8-sig', newline='') as csv_file: + reader = read_csv(csv_file) + plt.style.use('_mpl-gallery') + for style in ["nom", "real", "realeff"]: + fig, ax = plt.subplots(figsize=(12, 6)) + ob1 = np.asarray(reader[f"{style}tax2014"].dropna()) + ob2 = np.asarray(reader[f"{style}tax2015"].dropna()) + ob3 = np.asarray(reader[f"{style}tax2016"].dropna()) + ob4 = np.asarray(reader[f"{style}tax2017"].dropna()) + ob5 = np.asarray(reader[f"{style}tax2018"].dropna()) + ob6 = np.asarray(reader[f"{style}tax2019"].dropna()) + ob7 = np.asarray(reader[f"{style}tax2020"].dropna()) + ob8 = np.asarray(reader[f"{style}tax2021"].dropna()) + ob9 = np.asarray(reader[f"{style}tax2022"].dropna()) + ob10 = np.asarray(reader[f"{style}tax2023"].dropna()) + ob11 = np.asarray(reader[f"{style}tax2024"].dropna()) + x = [ob1, ob2, ob3, ob4, ob5, ob6, ob7, ob8, ob9, ob10, ob11] + ax.boxplot(x, positions=[2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024], patch_artist=True, + showmeans=False, showfliers=False, + medianprops={"color": "white", "linewidth": 0.5}, + boxprops={"facecolor": "C0", "edgecolor": "white", + "linewidth": 0.5}, + whiskerprops={"color": "C0", "linewidth": 1.5}, + capprops={"color": "C0", "linewidth": 1.5}) + ax.yaxis.set_major_formatter(mtick.PercentFormatter(1,0)) + plt.show() + + +display(pathlib.Path('/home/user/bachelorarbeit_importer/data/', 'cleaned_st2.csv')) diff --git a/export_cleaned.csv b/export_cleaned.csv new file mode 100644 index 0000000..931d7f5 --- /dev/null +++ b/export_cleaned.csv @@ -0,0 +1 @@ +bvd_id,name,gv2014,gn2014,st2014,ek2014,nomtax2014,realtax2014,realefftax2014,gv2015,gn2015,st2015,ek2015,nomtax2015,realtax2015,realefftax2015,gv2016,gn2016,st2016,ek2016,nomtax2016,realtax2016,realefftax2016,gv2017,gn2017,st2017,ek2017,nomtax2017,realtax2017,realefftax2017,gv2018,gn2018,st2018,ek2018,nomtax2018,realtax2018,realefftax2018,gv2019,gn2019,st2019,ek2019,nomtax2019,realtax2019,realefftax2019,gv2020,gn2020,st2020,ek2020,nomtax2020,realtax2020,realefftax2020,gv2021,gn2021,st2021,ek2021,nomtax2021,realtax2021,realefftax2021,gv2022,gn2022,st2022,ek2022,nomtax2022,realtax2022,realefftax2022,gv2023,gn2023,st2023,ek2023,nomtax2023,realtax2023,realefftax2023,gv2024,gn2024,st2024,ek2024,nomtax2024,realtax2024,realefftax2024 diff --git a/pgpass b/pgpass deleted file mode 100644 index 75c616a..0000000 --- a/pgpass +++ /dev/null @@ -1 +0,0 @@ -denkena-consulting.com:5432:bachelorarbeit:bachelorarbeit_w:TWVn2kDqhtGOXN+1qb0Nfb/PLjQI4a22/xtIGv/T2kpf diff --git a/plot_export/nomial_tax.png b/plot_export/nomial_tax.png new file mode 100644 index 0000000..5f78c65 Binary files /dev/null and b/plot_export/nomial_tax.png differ diff --git a/plot_export/real_effective_tax.png b/plot_export/real_effective_tax.png new file mode 100644 index 0000000..3d5aa4c Binary files /dev/null and b/plot_export/real_effective_tax.png differ diff --git a/plot_export/real_tax.png b/plot_export/real_tax.png new file mode 100644 index 0000000..e3ca048 Binary files /dev/null and b/plot_export/real_tax.png differ diff --git a/requirements.txt b/requirements.txt index d56e416..94f6bae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,19 +3,27 @@ binapy==0.8.0 certifi==2025.1.31 cffi==1.17.1 charset-normalizer==3.4.1 +contourpy==1.3.2 cryptography==44.0.1 +cycler==0.12.1 +fonttools==4.58.2 furl==2.1.3 idna==3.10 jwskate==0.11.1 +kiwisolver==1.4.8 markdown-it-py==3.0.0 +matplotlib==3.10.3 mdurl==0.1.2 numpy==2.3.0 orderedmultidict==1.0.1 +packaging==25.0 pandas==2.3.0 +pillow==11.2.1 psycopg==3.2.9 psycopg-pool==3.2.6 pycparser==2.22 Pygments==2.19.1 +pyparsing==3.2.3 python-dateutil==2.9.0.post0 pytz==2025.2 requests==2.32.3