complete preliminary cleanup

This commit is contained in:
Federico Justus Denkena 2025-06-11 12:18:06 +02:00
parent f241db6b1b
commit 38b4ee44e6
Signed by: f-denkena
GPG Key ID: 34D3C40435BDAACD
2 changed files with 208 additions and 167 deletions

View File

@ -8,6 +8,7 @@ import requests
import hashlib import hashlib
import io import io
import datetime import datetime
import pandas as pd
from rich import progress from rich import progress
from rich.logging import RichHandler from rich.logging import RichHandler
from rich.console import Console from rich.console import Console
@ -15,189 +16,223 @@ from rich.traceback import install
install(show_locals=True, locals_max_length=150, locals_max_string=300) install(show_locals=True, locals_max_length=150, locals_max_string=300)
class Company: class Company:
def __init__(self, data, report, out): def __init__(self, data, report, writer):
self.data = data self.data = data
self.out = out self.writer = writer
self.bvdid = data["BvD ID Nummer"]
self.name = data["Unternehmensname"]
self.gv2020 = None
self.gv2021 = None
self.gv2022 = None
self.gv2023 = None
self.gv2024 = None
self.gn2020 = None
self.gn2021 = None
self.gn2022 = None
self.gn2023 = None
self.gn2024 = None
try:
if "Gewinn/(Verlust) vor Steuern EUR 2020" in data.keys() and data["Gewinn/(Verlust) vor Steuern EUR 2020"]!= '' and not self.gv2020:
self.gv2020 = int(data["Gewinn/(Verlust) vor Steuern EUR 2020"])
elif "Gewinn/Verlust vor Steuern EUR 2020" in data.keys() and data["Gewinn/Verlust vor Steuern EUR 2020"] != '' and not self.gv2020:
self.gv2020 = int(data["Gewinn/Verlust vor Steuern EUR 2020"])
else:
self.gv2020 = None
except ValueError:
self.gv2020 = None
try:
if "Gewinn/(Verlust) vor Steuern EUR 2021" in data.keys() and data["Gewinn/(Verlust) vor Steuern EUR 2021"] != '' and not self.gv2021:
self.gv2021 = int(data["Gewinn/(Verlust) vor Steuern EUR 2021"])
elif "Gewinn/Verlust vor Steuern EUR 2021" in data.keys() and data["Gewinn/Verlust vor Steuern EUR 2021"] != '' and not self.gv2021:
self.gv2021 = int(data["Gewinn/Verlust vor Steuern EUR 2021"])
else:
self.gv2021 = None
except ValueError:
self.gv2021 = None
try:
if "Gewinn/(Verlust) vor Steuern EUR 2022" in data.keys() and data["Gewinn/(Verlust) vor Steuern EUR 2022"] != '' and not self.gv2022:
self.gv2022 = int(data["Gewinn/(Verlust) vor Steuern EUR 2022"])
elif "Gewinn/Verlust vor Steuern EUR 2022" in data.keys() and data["Gewinn/Verlust vor Steuern EUR 2022"] != '' and not self.gv2022:
self.gv2022 = int(data["Gewinn/Verlust vor Steuern EUR 2022"])
else:
self.gv2022 = None
except ValueError:
self.gv2022 = None
try:
if "Gewinn/(Verlust) vor Steuern EUR 2023" in data.keys() and data["Gewinn/(Verlust) vor Steuern EUR 2023"] != '' and not self.gv2023:
self.gv2023 = int(data["Gewinn/(Verlust) vor Steuern EUR 2023"])
elif "Gewinn/Verlust vor Steuern EUR 2023" in data.keys() and data["Gewinn/Verlust vor Steuern EUR 2023"] != '' and not self.gv2023:
self.gv2023 = int(data["Gewinn/Verlust vor Steuern EUR 2023"])
else:
self.gv2023 = None
except ValueError:
self.gv2023 = None
try:
if "Gewinn/(Verlust) vor Steuern EUR 2024" in data.keys() and data["Gewinn/(Verlust) vor Steuern EUR 2024"] != '' and not self.gv2024:
self.gv2024 = int(data["Gewinn/(Verlust) vor Steuern EUR 2024"])
elif "Gewinn/Verlust vor Steuern EUR 2024" in data.keys() and data["Gewinn/Verlust vor Steuern EUR 2024"] != '' and not self.gv2024:
self.gv2024 = int(data["Gewinn/Verlust vor Steuern EUR 2024"])
else:
self.gv2024 = None
except ValueError:
self.gv2024 = None
try:
if "Gewinn/(Verlust) nach Steuern EUR 2020" in data.keys() and data["Gewinn/(Verlust) nach Steuern EUR 2020"] != '' and not self.gn2020:
self.gn2020 = int(data["Gewinn/(Verlust) nach Steuern EUR 2020"])
elif "Gewinn/Verlust nach Steuern EUR 2020" in data.keys() and data["Gewinn/Verlust nach Steuern EUR 2020"] != '' and not self.gn2020:
self.gn2020 = int(data["Gewinn/Verlust nach Steuern EUR 2020"])
else:
self.gn2020 = None
except ValueError:
self.gn2020 = None
try:
if "Gewinn/(Verlust) nach Steuern EUR 2021" in data.keys() and data["Gewinn/(Verlust) nach Steuern EUR 2021"] != '' and not self.gn2021:
self.gn2021 = int(data["Gewinn/(Verlust) nach Steuern EUR 2021"])
elif "Gewinn/Verlust nach Steuern EUR 2021" in data.keys() and data["Gewinn/Verlust nach Steuern EUR 2021"] != '' and not self.gn2021:
self.gn2021 = int(data["Gewinn/Verlust nach Steuern EUR 2021"])
else:
self.gn2021 = None
except ValueError:
self.gn2021 = None
try:
if "Gewinn/(Verlust) nach Steuern EUR 2022" in data.keys() and data["Gewinn/(Verlust) nach Steuern EUR 2022"] != '' and not self.gn2022:
self.gn2022 = int(data["Gewinn/(Verlust) nach Steuern EUR 2022"])
elif "Gewinn/Verlust nach Steuern EUR 2022" in data.keys() and data["Gewinn/Verlust nach Steuern EUR 2022"] != '' and not self.gn2022:
self.gn2022 = int(data["Gewinn/Verlust nach Steuern EUR 2022"])
else:
self.gn2022 = None
except ValueError:
self.gn2022 = None
try:
if "Gewinn/(Verlust) nach Steuern EUR 2023" in data.keys() and data["Gewinn/(Verlust) nach Steuern EUR 2023"] != '' and not self.gn2023:
self.gn2023 = int(data["Gewinn/(Verlust) nach Steuern EUR 2023"])
elif "Gewinn/Verlust nach Steuern EUR 2023" in data.keys() and data["Gewinn/Verlust nach Steuern EUR 2023"] != '' and not self.gn2023:
self.gn2023 = int(data["Gewinn/Verlust nach Steuern EUR 2023"])
else:
self.gn2023 = None
except ValueError:
self.gn2023 = None
try:
if "Gewinn/(Verlust) nach Steuern EUR 2024" in data.keys() and data["Gewinn/(Verlust) nach Steuern EUR 2024"] != '' and not self.gn2024:
self.gn2024 = int(data["Gewinn/(Verlust) nach Steuern EUR 2024"])
elif "Gewinn/Verlust nach Steuern EUR 2024" in data.keys() and data["Gewinn/Verlust nach Steuern EUR 2024"] != '' and not self.gn2024:
self.gn2024 = int(data["Gewinn/Verlust nach Steuern EUR 2024"])
else:
self.gn2024 = None
except ValueError:
self.gn2024 = None
try:
self.st2020 = int(data["Steuern EUR 2020"])
except ValueError:
self.st2020 = None
try:
self.st2021 = int(data["Steuern EUR 2021"])
except ValueError:
self.st2021 = None
try:
self.st2022 = int(data["Steuern EUR 2022"])
except ValueError:
self.st2022 = None
try:
self.st2023 = int(data["Steuern EUR 2023"])
except ValueError:
self.st2023 = None
try:
self.st2024 = int(data["Steuern EUR 2024"])
except ValueError:
self.st2024 = None
try:
self.ek2020 = int(data["Eigenkapital EUR 2020"])
except ValueError:
self.ek2020 = None
try:
self.ek2021 = int(data["Eigenkapital EUR 2021"])
except ValueError:
self.ek2021 = None
try:
self.ek2022 = int(data["Eigenkapital EUR 2022"])
except ValueError:
self.ek2022 = None
try:
self.ek2023 = int(data["Eigenkapital EUR 2023"])
except ValueError:
self.ek2023 = None
try:
self.ek2024 = int(data["Eigenkapital EUR 2024"])
except ValueError:
self.ek2024 = None
self.report = report self.report = report
self.cleaned_data = dict()
self.cleaned_data["bvd_id"] = data["BvD ID Nummer"]
self.cleaned_data["name"] = data["Unternehmensname"]
try:
if "Gewinn/(Verlust) vor Steuern EUR 2020" in data.keys() and data["Gewinn/(Verlust) vor Steuern EUR 2020"]!= '' and not self.cleaned_data.get("gv2020"):
self.cleaned_data["gv2020"] = int(data["Gewinn/(Verlust) vor Steuern EUR 2020"])
elif "Gewinn/Verlust vor Steuern EUR 2020" in data.keys() and data["Gewinn/Verlust vor Steuern EUR 2020"] != '' and not self.cleaned_data.get("gv2020"):
self.cleaned_data["gv2020"] = int(data["Gewinn/Verlust vor Steuern EUR 2020"])
else:
self.report.log.debug(f"{self.cleaned_data['name']}: GV2020 empty value")
except ValueError:
self.report.log.debug(f"{self.cleaned_data['name']}: GV2020 ValueError")
try:
if "Gewinn/(Verlust) vor Steuern EUR 2021" in data.keys() and data["Gewinn/(Verlust) vor Steuern EUR 2021"] != '' and not self.cleaned_data.get("gv2021"):
self.cleaned_data["gv2021"] = int(data["Gewinn/(Verlust) vor Steuern EUR 2021"])
elif "Gewinn/Verlust vor Steuern EUR 2021" in data.keys() and data["Gewinn/Verlust vor Steuern EUR 2021"] != '' and not self.cleaned_data.get("gv2021"):
self.cleaned_data["gv2021"] = int(data["Gewinn/Verlust vor Steuern EUR 2021"])
else:
self.report.log.debug(f"{self.cleaned_data['name']}: GV2021 empty value")
except ValueError:
self.report.log.debug(f"{self.cleaned_data['name']}: GV2021 ValueError")
try:
if "Gewinn/(Verlust) vor Steuern EUR 2022" in data.keys() and data["Gewinn/(Verlust) vor Steuern EUR 2022"] != '' and not self.cleaned_data.get("gv2022"):
self.cleaned_data["gv2022"] = int(data["Gewinn/(Verlust) vor Steuern EUR 2022"])
elif "Gewinn/Verlust vor Steuern EUR 2022" in data.keys() and data["Gewinn/Verlust vor Steuern EUR 2022"] != '' and not self.cleaned_data.get("gv2022"):
self.cleaned_data["gv2022"] = int(data["Gewinn/Verlust vor Steuern EUR 2022"])
else:
self.report.log.debug(f"{self.cleaned_data['name']}: GV2022 empty value")
except ValueError:
self.report.log.debug(f"{self.cleaned_data['name']}: GV2022 ValueError")
try:
if "Gewinn/(Verlust) vor Steuern EUR 2023" in data.keys() and data["Gewinn/(Verlust) vor Steuern EUR 2023"] != '' and not self.cleaned_data.get("gv2023"):
self.cleaned_data["gv2023"] = int(data["Gewinn/(Verlust) vor Steuern EUR 2023"])
elif "Gewinn/Verlust vor Steuern EUR 2023" in data.keys() and data["Gewinn/Verlust vor Steuern EUR 2023"] != '' and not self.cleaned_data.get("gv2023"):
self.cleaned_data["gv2023"] = int(data["Gewinn/Verlust vor Steuern EUR 2023"])
else:
self.report.log.debug(f"{self.cleaned_data['name']}: GV2023 empty value")
except ValueError:
self.report.log.debug(f"{self.cleaned_data['name']}: GV2023 ValueError")
try:
if "Gewinn/(Verlust) vor Steuern EUR 2024" in data.keys() and data["Gewinn/(Verlust) vor Steuern EUR 2024"] != '' and not self.cleaned_data.get("gv2024"):
self.cleaned_data["gv2024"] = int(data["Gewinn/(Verlust) vor Steuern EUR 2024"])
elif "Gewinn/Verlust vor Steuern EUR 2024" in data.keys() and data["Gewinn/Verlust vor Steuern EUR 2024"] != '' and not self.cleaned_data.get("gv2024"):
self.cleaned_data["gv2024"] = int(data["Gewinn/Verlust vor Steuern EUR 2024"])
else:
self.report.log.debug(f"{self.cleaned_data['name']}: GV2024 empty value")
except ValueError:
self.report.log.debug(f"{self.cleaned_data['name']}: GV2024 ValueError")
try:
if "Gewinn/(Verlust) nach Steuern EUR 2020" in data.keys() and data["Gewinn/(Verlust) nach Steuern EUR 2020"] != '' and not self.cleaned_data.get("gn2020"):
self.cleaned_data["gn2020"] = int(data["Gewinn/(Verlust) nach Steuern EUR 2020"])
elif "Gewinn/Verlust nach Steuern EUR 2020" in data.keys() and data["Gewinn/Verlust nach Steuern EUR 2020"] != '' and not self.cleaned_data.get("gn2020"):
self.cleaned_data["gn2020"] = int(data["Gewinn/Verlust nach Steuern EUR 2020"])
else:
self.report.log.debug(f"{self.cleaned_data['name']}: GN2020 empty value")
except ValueError:
self.report.log.debug(f"{self.cleaned_data['name']}: GN2020 ValueError")
try:
if "Gewinn/(Verlust) nach Steuern EUR 2021" in data.keys() and data["Gewinn/(Verlust) nach Steuern EUR 2021"] != '' and not self.cleaned_data.get("gn2021"):
self.cleaned_data["gn2021"] = int(data["Gewinn/(Verlust) nach Steuern EUR 2021"])
elif "Gewinn/Verlust nach Steuern EUR 2021" in data.keys() and data["Gewinn/Verlust nach Steuern EUR 2021"] != '' and not self.cleaned_data.get("gn2021"):
self.cleaned_data["gn2021"] = int(data["Gewinn/Verlust nach Steuern EUR 2021"])
else:
self.report.log.debug(f"{self.cleaned_data['name']}: GN2021 empty value")
except ValueError:
self.report.log.debug(f"{self.cleaned_data['name']}: GN2021 ValueError")
try:
if "Gewinn/(Verlust) nach Steuern EUR 2022" in data.keys() and data["Gewinn/(Verlust) nach Steuern EUR 2022"] != '' and not self.cleaned_data.get("gn2022"):
self.cleaned_data["gn2022"] = int(data["Gewinn/(Verlust) nach Steuern EUR 2022"])
elif "Gewinn/Verlust nach Steuern EUR 2022" in data.keys() and data["Gewinn/Verlust nach Steuern EUR 2022"] != '' and not self.cleaned_data.get("gn2022"):
self.cleaned_data["gn2022"] = int(data["Gewinn/Verlust nach Steuern EUR 2022"])
else:
self.report.log.debug(f"{self.cleaned_data['name']}: GN2022 empty value")
except ValueError:
self.report.log.debug(f"{self.cleaned_data['name']}: GN2022 ValueError")
try:
if "Gewinn/(Verlust) nach Steuern EUR 2023" in data.keys() and data["Gewinn/(Verlust) nach Steuern EUR 2023"] != '' and not self.cleaned_data.get("gn2023"):
self.cleaned_data["gn2023"] = int(data["Gewinn/(Verlust) nach Steuern EUR 2023"])
elif "Gewinn/Verlust nach Steuern EUR 2023" in data.keys() and data["Gewinn/Verlust nach Steuern EUR 2023"] != '' and not self.cleaned_data.get("gn2023"):
self.cleaned_data["gn2023"] = int(data["Gewinn/Verlust nach Steuern EUR 2023"])
else:
self.report.log.debug(f"{self.cleaned_data['name']}: GN2023 empty value")
except ValueError:
self.report.log.debug(f"{self.cleaned_data['name']}: GN2023 ValueError")
try:
if "Gewinn/(Verlust) nach Steuern EUR 2024" in data.keys() and data["Gewinn/(Verlust) nach Steuern EUR 2024"] != '' and not self.cleaned_data.get("gn2024"):
self.cleaned_data["gn2024"] = int(data["Gewinn/(Verlust) nach Steuern EUR 2024"])
elif "Gewinn/Verlust nach Steuern EUR 2024" in data.keys() and data["Gewinn/Verlust nach Steuern EUR 2024"] != '' and not self.cleaned_data.get("gn2024"):
self.cleaned_data["gn2024"] = int(data["Gewinn/Verlust nach Steuern EUR 2024"])
else:
self.report.log.debug(f"{self.cleaned_data['name']}: GN2024 empty value")
except ValueError:
self.report.log.debug(f"{self.cleaned_data['name']}: GN2024 ValueError")
try:
self.cleaned_data["st2020"] = int(data["Steuern EUR 2020"])
except ValueError:
self.report.log.debug(f"{self.cleaned_data['name']}: ST2020 ValueError")
try:
self.cleaned_data["st2021"] = int(data["Steuern EUR 2021"])
except ValueError:
self.report.log.debug(f"{self.cleaned_data['name']}: ST2021 ValueError")
try:
self.cleaned_data["st2022"] = int(data["Steuern EUR 2022"])
except ValueError:
self.report.log.debug(f"{self.cleaned_data['name']}: ST2022 ValueError")
try:
self.cleaned_data["st2023"] = int(data["Steuern EUR 2023"])
except ValueError:
self.report.log.debug(f"{self.cleaned_data['name']}: ST2023 ValueError")
try:
self.cleaned_data["st2024"] = int(data["Steuern EUR 2024"])
except ValueError:
self.report.log.debug(f"{self.cleaned_data['name']}: ST2024 ValueError")
try:
self.cleaned_data["ek2020"] = int(data["Eigenkapital EUR 2020"])
except ValueError:
self.report.log.debug(f"{self.cleaned_data['name']}: EK2020 ValueError")
try:
self.cleaned_data["ek2021"] = int(data["Eigenkapital EUR 2021"])
except ValueError:
self.report.log.debug(f"{self.cleaned_data['name']}: EK2021 ValueError")
try:
self.cleaned_data["ek2022"] = int(data["Eigenkapital EUR 2022"])
except ValueError:
self.report.log.debug(f"{self.cleaned_data['name']}: EK2022 ValueError")
try:
self.cleaned_data["ek2023"] = int(data["Eigenkapital EUR 2023"])
except ValueError:
self.report.log.debug(f"{self.cleaned_data['name']}: EK2023 ValueError")
try:
self.cleaned_data["ek2024"] = int(data["Eigenkapital EUR 2024"])
except ValueError:
self.report.log.debug(f"{self.cleaned_data['name']}: EK2024 ValueError")
def calculate_tax(self): def calculate_tax(self):
if not self.st2020 and self.gv2020 != None and self.gn2020 != None: if not self.cleaned_data.get("st2020") and self.cleaned_data.get("gv2020") != None and self.cleaned_data.get("gn2020") != None:
self.st2020 = self.gv2020 - self.gn2020 self.cleaned_data["st2020"] = self.cleaned_data.get("gv2020") - self.cleaned_data.get("gn2020")
if not self.st2021 and self.gv2021 != None and self.gn2021 != None: if not self.cleaned_data.get("st2021") and self.cleaned_data.get("gv2021") != None and self.cleaned_data.get("gn2021") != None:
self.st2021 = self.gv2021 - self.gn2021 self.cleaned_data["st2021"] = self.cleaned_data.get("gv2021") - self.cleaned_data.get("gn2021")
if not self.st2022 and self.gv2022 != None and self.gn2022 != None: if not self.cleaned_data.get("st2022") and self.cleaned_data.get("gv2022") != None and self.cleaned_data.get("gn2022") != None:
self.st2022 = self.gv2022 - self.gn2022 self.cleaned_data["st2022"] = self.cleaned_data.get("gv2022") - self.cleaned_data.get("gn2022")
if not self.st2023 and self.gv2023 != None and self.gn2023 != None: if not self.cleaned_data.get("st2023") and self.cleaned_data.get("gv2023") != None and self.cleaned_data.get("gn2023") != None:
self.st2023 = self.gv2023 - self.gn2023 self.cleaned_data["st2023"] = self.cleaned_data.get("gv2023") - self.cleaned_data.get("gn2023")
if not self.st2024 and self.gv2024 != None and self.gn2024 != None: if not self.cleaned_data.get("st2024") and self.cleaned_data.get("gv2024") != None and self.cleaned_data.get("gn2024") != None:
self.st2024 = self.gv2024 - self.gn2024 self.cleaned_data["st2024"] = self.cleaned_data.get("gv2024") - self.cleaned_data.get("gn2024")
def validate(self): def validate(self):
#fallback, in case tax wasn't already calculated #fallback, in case tax wasn't already calculated
self.calculate_tax() self.calculate_tax()
if True: if True:
if self.st2020 and self.ek2020: if self.cleaned_data.get("st2020") and self.cleaned_data.get("ek2020"):
self.report.valid_data += 1 self.report.valid_data += 1
else: else:
self.report.invalid_data +=1 self.report.invalid_data +=1
if self.st2021 and self.ek2021: if self.cleaned_data.get("st2021") and self.cleaned_data.get("ek2021"):
self.report.valid_data += 1 self.report.valid_data += 1
else: else:
self.report.invalid_data +=1 self.report.invalid_data +=1
if self.st2022 and self.ek2022: if self.cleaned_data.get("st2022") and self.cleaned_data.get("ek2022"):
self.report.valid_data += 1 self.report.valid_data += 1
else: else:
self.report.invalid_data +=1 self.report.invalid_data +=1
if self.st2023 and self.ek2023: if self.cleaned_data.get("st2023") and self.cleaned_data.get("ek2023"):
self.report.valid_data += 1 self.report.valid_data += 1
else: else:
self.report.invalid_data +=1 self.report.invalid_data +=1
if self.st2024 and self.ek2024: if self.cleaned_data.get("st2024") and self.cleaned_data.get("ek2024"):
self.report.valid_data += 1 self.report.valid_data += 1
else: else:
self.report.invalid_data +=1 self.report.invalid_data +=1
def calculate_data(self):
if self.cleaned_data.get("st2020") and self.cleaned_data.get("gv2020") and self.cleaned_data.get("gn2020") and self.cleaned_data.get("ek2020"):
self.cleaned_data["nomtax2020"] = self.cleaned_data.get("st2020") / self.cleaned_data.get("gv2020")
self.cleaned_data["realtax2020"] = (self.cleaned_data.get("st2020") + (0.4 * self.cleaned_data.get("gv2020"))) / self.cleaned_data.get("gv2020")
self.cleaned_data["realefftax2020"] = (self.cleaned_data.get("st2020") + (0.4 * self.cleaned_data.get("gv2020")) + (0.4 * self.cleaned_data.get("ek2020"))) / self.cleaned_data.get("gv2020")
print(self.cleaned_data.get("nomtax2020"))
print(self.cleaned_data.get("realtax2020"))
print(self.cleaned_data.get("realefftax2020"))
def write(self):
"""Write the current (validated!) dataset to CSV"""
with open(self.report.output) as out_csv:
try:
output_reader = pd.read_csv(out_csv)
bvd_id = output_reader["bvd_id"]
if not self.cleaned_data.get("bvd_id") in bvd_id:
self.writer.writerow(self.cleaned_data)
except pd.errors.EmptyDataError:
self.writer.writerow(self.cleaned_data)
class dataimport: class dataimport:
@ -216,14 +251,14 @@ class dataimport:
if self.logfile != "NONE": if self.logfile != "NONE":
self.logconsole = Console(file=self.logfile) self.logconsole = Console(file=self.logfile)
logging.basicConfig( logging.basicConfig(
level="NOTSET", format=FORMAT, datefmt="[%X]", handlers=[ level="INFO", format=FORMAT, datefmt="[%X]", handlers=[
RichHandler(rich_tracebacks=True, console=self.progress.console, RichHandler(rich_tracebacks=True, console=self.progress.console,
show_path=False, show_time=False, level="NOTSET"), show_path=False, show_time=False, level="NOTSET"),
RichHandler(rich_tracebacks=True, console=self.logconsole, RichHandler(rich_tracebacks=True, console=self.logconsole,
show_path=False, level="WARNING")]) show_path=False, level="WARNING")])
else: else:
logging.basicConfig( logging.basicConfig(
level="NOTSET", format=FORMAT, datefmt="[%X]", handlers=[ level="INFO", format=FORMAT, datefmt="[%X]", handlers=[
RichHandler(rich_tracebacks=True, console=self.progress.console, RichHandler(rich_tracebacks=True, console=self.progress.console,
show_path=False, show_time=False, level="NOTSET")]) show_path=False, show_time=False, level="NOTSET")])
@ -243,10 +278,9 @@ class dataimport:
with open(self.filename, mode='r', encoding='utf-8-sig', newline='') as csv_file: with open(self.filename, mode='r', encoding='utf-8-sig', newline='') as csv_file:
with open(self.output, mode='a+', encoding='utf-8-sig', newline='') as output_csv: with open(self.output, mode='a+', encoding='utf-8-sig', newline='') as output_csv:
csv_reader = csv.DictReader(csv_file, delimiter=',') csv_reader = csv.DictReader(csv_file, delimiter=',')
out_names = [] fieldnames = ['bvd_id', 'name', 'gv2020', 'gn2020', 'st2020', 'ek2020', 'gv2021', 'gn2021', 'st2021', 'ek2021', 'gv2022', 'gn2022', 'st2022', 'ek2022', 'gv2023', 'gn2023', 'st2023', 'ek2023', 'gv2024', 'gn2024', 'st2024', 'ek2024']
output_writer = csv.DictWriter(output_csv, fieldnames=out_names) output_writer = csv.DictWriter(output_csv, fieldnames=fieldnames)
self.log.warning(self.get_total(self.output)) if self.get_total(self.output) == -1:
if self.get_total(self.output) <= 0:
self.log.warning(f"WRITING HEADER FOR FILE {self.output}!") self.log.warning(f"WRITING HEADER FOR FILE {self.output}!")
output_writer.writeheader() output_writer.writeheader()
rownum = 0 rownum = 0
@ -276,9 +310,11 @@ class dataimport:
def get_total(self, file): def get_total(self, file):
return sum(1 for _ in open(file, mode='r')) - 1 return sum(1 for _ in open(file, mode='r')) - 1
def comp_import(self, data, out): def comp_import(self, data, writer):
current = Company(data, report=self, out=out) current = Company(data, report=self, writer=writer)
current.validate() current.validate()
current.calculate_data()
current.write()

View File

@ -9,14 +9,19 @@ idna==3.10
jwskate==0.11.1 jwskate==0.11.1
markdown-it-py==3.0.0 markdown-it-py==3.0.0
mdurl==0.1.2 mdurl==0.1.2
numpy==2.3.0
orderedmultidict==1.0.1 orderedmultidict==1.0.1
pandas==2.3.0
psycopg==3.2.9 psycopg==3.2.9
psycopg-pool==3.2.6 psycopg-pool==3.2.6
pycparser==2.22 pycparser==2.22
Pygments==2.19.1 Pygments==2.19.1
python-dateutil==2.9.0.post0
pytz==2025.2
requests==2.32.3 requests==2.32.3
requests_oauth2client==1.6.0 requests_oauth2client==1.6.0
rich==13.9.4 rich==13.9.4
six==1.17.0 six==1.17.0
typing_extensions==4.12.2 typing_extensions==4.12.2
tzdata==2025.2
urllib3==2.3.0 urllib3==2.3.0