remove database connection, instead write back to csv file

This commit is contained in:
Federico Justus Denkena 2025-06-09 20:32:50 +02:00
parent d1229cad84
commit b38af8dee5
Signed by: f-denkena
GPG Key ID: 34D3C40435BDAACD

View File

@ -3,14 +3,12 @@ import argparse
import csv import csv
import secrets import secrets
import sys import sys
import time
import json import json
import logging import logging
import requests import requests
import hashlib import hashlib
import io import io
import psycopg as ps import datetime
import psycopg_pool as ps_pool
from rich import progress from rich import progress
from rich.logging import RichHandler from rich.logging import RichHandler
from rich.console import Console from rich.console import Console
@ -18,8 +16,9 @@ from rich.traceback import install
install(show_locals=True, locals_max_length=150, locals_max_string=300) install(show_locals=True, locals_max_length=150, locals_max_string=300)
class Company: class Company:
def __init__(self, data, report): def __init__(self, data, report, out):
self.data = data self.data = data
self.out = out
self.bvdid = data["BvD ID Nummer"] self.bvdid = data["BvD ID Nummer"]
self.name = data["Unternehmensname"] self.name = data["Unternehmensname"]
self.gv2020 = None self.gv2020 = None
@ -179,27 +178,31 @@ class Company:
def validate(self): def validate(self):
#fallback, in case tax wasn't already calculated #fallback, in case tax wasn't already calculated
self.calculate_tax() self.calculate_tax()
if self.st2020 and self.ek2020: if True:
self.report.valid_data += 1 if self.st2020 and self.ek2020:
return True self.report.valid_data += 1
self.report.invalid_data +=1 else:
if self.st2021 and self.ek2021: self.report.invalid_data +=1
self.report.valid_data += 1 if self.st2021 and self.ek2021:
return True self.report.valid_data += 1
self.report.invalid_data +=1 else:
if self.st2022 and self.ek2022: self.report.invalid_data +=1
self.report.valid_data += 1 if self.st2022 and self.ek2022:
return True self.report.valid_data += 1
if self.st2024 and self.ek2024: else:
self.report.valid_data += 1 self.report.invalid_data +=1
return True if self.st2023 and self.ek2023:
self.report.invalid_data +=1 self.report.valid_data += 1
return False else:
self.report.invalid_data +=1
if self.st2024 and self.ek2024:
self.report.valid_data += 1
else:
self.report.invalid_data +=1
class dataimport: class dataimport:
def __init__(self, filename, logfile, seek=0): def __init__(self, filename, logfile, output, seek=0):
self.seek = seek self.seek = seek
self.progress = progress.Progress( self.progress = progress.Progress(
*progress.Progress.get_default_columns(), *progress.Progress.get_default_columns(),
@ -210,6 +213,7 @@ class dataimport:
self.filename = filename self.filename = filename
FORMAT = "%(message)s" FORMAT = "%(message)s"
self.logfile = open(logfile, 'a') self.logfile = open(logfile, 'a')
self.output = output
if self.logfile != "NONE": if self.logfile != "NONE":
self.logconsole = Console(file=self.logfile) self.logconsole = Console(file=self.logfile)
logging.basicConfig( logging.basicConfig(
@ -225,54 +229,38 @@ class dataimport:
show_path=False, show_time=False, level="NOTSET")]) show_path=False, show_time=False, level="NOTSET")])
self.log = logging.getLogger("import") self.log = logging.getLogger("import")
self.total_rows = self.get_total() self.total_rows = self.get_total(self.filename)
self.errors = 0 self.errors = 0
self.data = {} self.data = {}
self.duplicate_database_id = None self.duplicate_database_id = None
self.task = self.progress.add_task(f"Importing {self.filename.split('/')[-1]}", total=self.get_total()) self.task = self.progress.add_task(f"Importing {self.filename.split('/')[-1]}", total=self.get_total(self.filename))
self.progress.update(self.task, advance=self.seek) self.progress.update(self.task, advance=self.seek)
global AUTHTOKEN
AUTHTOKEN = None
self.valid_data = 0 self.valid_data = 0
self.invalid_data = 0 self.invalid_data = 0
#with ps_pool.ConnectionPool(conninfo="postgresql:///bachelorarbeit?sslmode=require&port=5432&host=denkena-consulting.com&passfile=/home/user/bachelorarbeit_importer/pgpass&user=bachelorarbeit_w&hostaddr=94.16.116.86", min_size=4, max_size=10, open=True, ) as pool:
# with pool.connection() as conn:
#self.db_setup()
self.importer() self.importer()
#AUTHTOKEN = self.authtoken
#self.log.info('AUTHTOKEN SET!')
def db_setup(self, conn):
with conn.cursor() as cur:
cur.execute("CREATE TABLE IF NOT EXISTS test( bvd_id serial PRIMARY KEY)")
pass
def importer(self): def importer(self):
with self.progress: with self.progress:
if AUTHTOKEN is not None:
self.authtoken = AUTHTOKEN
self.log.info('AUTHTOKEN obtained!')
else:
pass
with open(self.filename, mode='r', encoding='utf-8-sig', newline='') as csv_file: with open(self.filename, mode='r', encoding='utf-8-sig', newline='') as csv_file:
csv_reader = csv.DictReader(csv_file, delimiter=',') with open(self.output, mode='a+', encoding='utf-8-sig', newline='') as output_csv:
rownum = 0 csv_reader = csv.DictReader(csv_file, delimiter=',')
for row in csv_reader: out_names = []
if rownum < self.seek: output_writer = csv.DictWriter(output_csv, fieldnames=out_names)
self.log.warning(self.get_total(self.output))
if self.get_total(self.output) <= 0:
self.log.warning(f"WRITING HEADER FOR FILE {self.output}!")
output_writer.writeheader()
rownum = 0
for row in csv_reader:
if rownum < self.seek:
rownum += 1
continue
for key in csv_reader.fieldnames:
self.data[key] = row[key]
self.comp_import(self.data, output_writer)
self.data = {}
rownum += 1 rownum += 1
continue self.progress.update(self.task, advance=1)
for key in csv_reader.fieldnames:
self.data[key] = row[key]
self.comp_import(self.data)
#if self.check_duplicate(data):
# self.patch_record(data)
# self.duplicate_database_id = None
#else:
# self.create_record(data)
self.data = {}
rownum += 1
self.progress.update(self.task, advance=1)
self.progress.console.rule() self.progress.console.rule()
self.log.info(f"Rows: {self.total_rows}") self.log.info(f"Rows: {self.total_rows}")
self.log.info(f"Valid: {self.valid_data}") self.log.info(f"Valid: {self.valid_data}")
@ -286,24 +274,23 @@ class dataimport:
else: else:
self.log.critical("ERROR CALCULATION EXCEPTION") self.log.critical("ERROR CALCULATION EXCEPTION")
def get_total(self): def get_total(self, file):
return sum(1 for _ in open(self.filename, mode='r')) - 1 return sum(1 for _ in open(file, mode='r')) - 1
def comp_import(self, data): def comp_import(self, data, out):
current = Company(data, report=self) current = Company(data, report=self, out=out)
current.validate() current.validate()
parser = argparse.ArgumentParser(description='Import data from ORBIS', epilog='Copyright Denkena Consulting') parser = argparse.ArgumentParser(description='Import data from ORBIS', epilog='Copyright Denkena Consulting')
parser.add_argument('filename', nargs="+") parser.add_argument('filename', nargs="+")
parser.add_argument('-l', '--logfile', default="log_importer", nargs="?") parser.add_argument('-l', '--logfile', default="log_importer", nargs="?")
parser.add_argument('-o', '--output', default="export_cleaned.csv", nargs="?")
parser.add_argument('-s', '--seek', type=int, default=0) parser.add_argument('-s', '--seek', type=int, default=0)
args = parser.parse_args() args = parser.parse_args()
if len(args.filename) > 1 and args.seek > 0: if len(args.filename) > 1 and args.seek > 0:
parser.error("Seek combined with multiple files is a bad idea!") parser.error("Seek combined with multiple files is a bad idea!")
for filename in args.filename: for filename in args.filename:
dataimport(filename, args.logfile, args.seek) dataimport(filename, args.logfile, args.output, args.seek)