ich bin auf InfluxDB umgestiegen und habe folgendes erstes Konzept:
Code: Alles auswählen
#!/usr/bin/env python
from pathlib import Path
import pandas as pd
from influxdb import DataFrameClient
CHUNK_SIZE = 500
CSV_FILE_PATH = Path().home() / "DataLog.csv"
TEMPORARY_FOLDER = Path(__file__).parent / "Temp"
def split_file(file, temporary_folder, chunksize):
for number, chunk in enumerate(
pd.read_csv(file, on_bad_lines="skip", chunksize=chunksize), 1
):
chunk.to_csv(f"{temporary_folder / file.stem}_{number}.csv", index=False)
def get_data(temporary_folder):
for csv_file in temporary_folder.glob("*.csv"):
data_frame = pd.read_csv(
csv_file,
)
data_frame = data_frame.dropna()
data_frame["Datetime"] = pd.to_datetime(
data_frame["Date"] + data_frame["Time"], format="%a %b %d %Y%I:%M:%S %p"
)
data_frame = data_frame.set_index("Datetime")
data_frame = data_frame.drop(["Date", "Time"], axis=1)
for column in data_frame.columns:
try:
data_frame[column] = data_frame[column].astype(float)
except ValueError:
pass
yield data_frame
def main():
split_file(CSV_FILE_PATH, TEMPORARY_FOLDER, CHUNK_SIZE)
with DataFrameClient(host="localhost", port=8086) as client:
client.switch_database("Test")
for data in get_data(TEMPORARY_FOLDER):
client.write_points(data, "Sensors", protocol="line")
if __name__ == "__main__":
main()
Habt ihr noch Optimierungen/Verbesserungen/Änderungen für mich?
Vielen Dank und Grüße
Dennis