import parse from 'csv-parse/lib/sync';
import detectNewline from 'detect-newline';
import { crlf } from 'crlf-normalize';

const PEM2_FILE_TYPE_REGEX = /(^P2.*\.\d{3}$|\.\d{3}$)/;

export default class ImportParser {
	/*
    Here, we need to detect two things.
    First, we need to detect the file type and configure a parser
    Second, we need to detect the metadata in the file, such as time zone, date format
        sensor ID, etc.
    */
	selectParserConfig = (file, filedata, declaredType, delimiter) => {
		let config = {};
		let metadata = {};

		const firstlines = this.parseFirstLines(filedata);
		const from_line = firstlines.findIndex((line) => line.includes('Date Time') || line.includes('Time')) + 1;

		// a user can declare a particular file type, and we'll do our best to import it.
		// if no type was declared, then we'll try to guess it. These heuristics are primitive AF.
		const type =
			declaredType === 'hanwell'
				? 'hanwell'
				: this.detectFileType(file, firstlines);
		if (type === 'hanwell') {
			// version 2 is the hanwell hanlog32
			const isVersion2 = firstlines.some(line =>
				line.includes('Database Valid')
			);
			metadata = this.parseHanwellMetadata(firstlines, isVersion2, delimiter);

			if (isVersion2) {
				config = {
					trim: true,
					delimiter: metadata.separator,
					columns: true,
					skip_empty_lines: true,
					skip_lines_with_error: true,
					metadata,
					bom: true,
					from_line: from_line,
					skip_last_n_lines: 8
				};
			} else {
				config = {
					trim: true,
					delimiter: metadata.separator || ',',
					columns: true,
					skip_empty_lines: true,
					skip_lines_with_error: true,
					metadata,
					bom: true,
					from_line: from_line,
					skip_last_n_lines: 13,
					relax_column_count: true,
					cast: false,
					on_record: (record) => {
						const result = {};
						metadata.columns.forEach((colName, index) => {
							const value = Object.values(record)[index];
							if (value && value.trim() !== '') {
								result[colName] = value.trim();
							}
						});
						return result;
					}
				};
			}
		} else if (type === 'pem2') {
			// PEM2 header is the first 4 lines
			metadata = this.parsePem2Metadata(firstlines);
			config = {
				trim: true,
				delimiter: '\t',
				skip_empty_lines: true,
				skip_lines_with_error: true,
				from_line: metadata.startData,
				columns: metadata.useColumns && metadata.columns,
				metadata,
				bom: true
			};
			if (!metadata.useColumns) {
				config.on_record = (record) => {
					const combinedRecord = {
						'DATE TIME': `${record[0]} ${record[1]}`,
					};

					// Only add TEMP and RH if they exist.
					if (record[2] !== undefined) {
						combinedRecord['TEMP'] = record[2];
					}
					if (record[3] !== undefined) {
						combinedRecord['%RH'] = record[3];
					}

					return combinedRecord;
				}
			}
		} else if (type === 'hobowaresingle') {
			const delim = delimiter || ',';
			metadata = this.parseHoboMetadata(firstlines, delim);
			config = {
				trim: true,
				delimiter: delim,
				skip_empty_lines: false,
				skip_lines_with_error: true,
				from_line: metadata.startData,
				columns: metadata.columns,
				metadata,
				bom: true
			};
		} else if (type === 'conserv') {
			// this is our format, we can do as we please
			metadata = {
				columns: ['DATETIME', 'TEMP1', 'RH', 'VISIBLE', 'UV'],
				columnunits: ['YYYY-MM-DDTHH:mm:SSZ', 'F', '%', 'LUX', 'mW/M^2'],
				columntypes: { DATETIME: 'datetime', TEMP1: 'temp', RH: 'rh' },
				sourcedescription: 'Pre-mapped to Conserv',
				dateformat: 'YYYY-MM-DDTHH:mm:SS',
				timezoneoffset: (new Date().getTimezoneOffset() / 60) * -1
			};
			config = {
				trim: true,
				delimiter: ',',
				skip_empty_lines: true,
				skip_lines_with_error: true,
				from_line: 3,
				columns: metadata.columns,
				metadata,
				bom: true
			};
		} else if (type === 'csv') {
			// our default is to look for a basic CSV with column headers in the first row
			const delim = delimiter || ',';
			metadata = this.parseGenericCsvMetadata(firstlines, delim);
			config = {
				trim: true,
				delimiter: delim,
				columns: true,
				skip_empty_lines: true,
				skip_lines_with_error: true,
				metadata,
				bom: true,
				from_line: 1
			};
		}
		return config;
	};

	/*
        Some of our customer data formats have massive headers.  Like, up to 200 lines long
    */
	findFirstCSVLines = () => {};

	/**
	 * Attempts to detect the type of file from the filename and the first 5 lines of the file
	 *
	 * returns:
	 * pem2 - IPI PEM2 file exported from device
	 * hobowaresingle - A file exported from HOBOWare that contains info for a single logger
	 * conserv - A file received in the prescribed conserv format
	 * other - We just don't know
	 */
	detectFileType = (file, firstlines) => {
		let type = 'csv';

		// if this is a file directly from a PEM2, it should have the extension
		if (file.name.search(new RegExp('.pm2', 'i')) !== -1) {
			type = 'pem2';
		}
		// check for files starting with P2 and ending with 3 digits
		// E.g. P2_11387.079, P2_02109.068, P2_02104.037, PEM File GMT -5.133.
		if (file.name.search(PEM2_FILE_TYPE_REGEX) !== -1) {
			type = 'pem2';
		}
		// double check file contents, looking for a PEM2 serial
		firstlines.forEach(line => {
			if (line.search(new RegExp('P2_[0-9]')) !== -1) {
				type = 'pem2';
			}
		});

		// HOBOWare exports data that looks something like this in the second line
		// "#","Date Time, GMT-05:00","Temp, °F (LGR S/N: 20535795, SEN S/N: 20535795)"
		firstlines.forEach(line => {
			if (
				line.search(new RegExp('LGR S/N')) != -1 ||
				line.search(new RegExp('SEN S/N')) != -1 ||
				line.search(new RegExp('Host Connect')) != -1 ||
				line.search(new RegExp('EOF')) != -1
			) {
				type = 'hobowaresingle';
			}
		});

		// if this isn't a PEM or HOBOWare file, it might be our own
		firstlines.forEach(line => {
			if (line.toLowerCase().includes('conserv')) {
				type = 'conserv';
			}
		});

		return type;
	};

	/**
	 * Returns true if the file is a valid file type for import.
	 */
	isValidFile = uploadFile => {
		// Could be a text type (typically text/csv or text/plain, unless it's windows, then it's an excel type because Microsoft sucks.)
		if (uploadFile.type.toLowerCase().includes('text/')) {
			return true;
		}

		// could be a PEM2 file?
		if (uploadFile.name.toLowerCase().includes('.pm2')) {
			return true;
		}

		// PEM2 files might also start with p2_ by default or have a numeric extension.
		// E.g. P2_11387.079, P2_02109.068, P2_02104.037, PEM File GMT -5.133.
		if (uploadFile.name.search(PEM2_FILE_TYPE_REGEX) !== -1) {
			return true;
		}

		// PEM2 files can also use a numeric extension, and they get detected by the browser as a RAR file??
		if (uploadFile.type.toLowerCase().includes('rar')) {
			return true;
		}

		// could also have a .csv or .txt extension
		if (
			uploadFile.name.toLowerCase().includes('.csv') ||
			uploadFile.name.toLowerCase().includes('.txt')
		) {
			return true;
		}

		return false;
	};

	parseFirstLines = filedata => {
		let firstlines = [];

		// remove the null characters that OnSet includes for some reason...
		filedata = filedata.replace(/\0/g, '');

		// detect the dominant newline character
		const newLineChar = detectNewline(filedata);

		// normalize the line endings to the detected dominant.  Onset sometimes uses one for the
		// header and a different one for the data because Onset.
		filedata = crlf(filedata, newLineChar);

		firstlines = filedata.split(newLineChar);
		return firstlines;
	};

	parseGenericCsvMetadata = (firstlines, delimiter) => {
		const metadata = { sourcedescription: 'Unknown' };
		const types = {};
		const units = [];

		const parsedlines = parse(firstlines[0], {
			delimiter,
			trim: true
		});
		const parsedvalues = parse(firstlines[1], {
			delimiter,
			trim: true
		});

		metadata.columns = parsedlines[0];

		metadata.columns.forEach((column, index) => {
			units.push('');
			const exampleval = parsedvalues[0][index] ? parsedvalues[0][index] : null;
			types[column] = this.guessMapping(column, '', exampleval);
		});

		metadata.columnunits = units;
		metadata.columntypes = types;
		metadata.timezoneoffset = (new Date().getTimezoneOffset() / 60) * -1;
		return metadata;
	};

	parsePem2Metadata = firstlines => {
		const metadata = {
			sourcedescription: 'IPI PEM2',
			deviceid: '',
			location: '',
			useColumns: false,
			columns: ['DATE TIME', 'TEMP', '%RH'],
			columnunits: ['M/D/YYYY H:mm', 'F', '%'],
			dateformat: 'M/D/YYYY H:mm',
			timezoneoffset: 0,
		};

		/**
		 * Type 1: "DATE	TIME" tab-delimited.
		 *
		 * DATE	TIME	TEMP	%RH
		 * YYYY-MM-DD	HH:MM	F	%
		 * 11/5/2024	16:30	73.7	44
		 * 11/5/2024	17:00	73.5	44
		 *
		 * Type 2: "Location:" (optional), then "DATE	TIME" tab-delimited.
		 *
		 * Location:
		 * DATE	TIME	TEMP	%RH
		 * YYYY-MM-DD	HH:MM	F	%
		 * 11/5/2024	16:30	73.7	44
		 * 11/5/2024	17:00	73.5	44
		 *
		 * Type 3: File name, followed by "Location:" (optional), then "DATE AND TIME" as single column.
		 * P2_18277.129 Uploaded: 2025-01-27 13:53:30 GMT  Decimal Sep = [.]
		 * Location:
		 * DATE AND TIME GMT	 TEMP	%RH
		 * YYYY-MM-DD HH:MM	  F	 %
		 * 2025-01-21 13:30	 68.7	  38
		 * 2025-01-21 14:00	 68.7	  37
		 */
		const LOCATION_LINE = 'Location:';
		const isType1 = firstlines[0].includes('DATE\tTIME');
		const isType2 = firstlines[0].includes(LOCATION_LINE) && firstlines[1].includes('DATE\tTIME');

		if (isType1) {
			metadata.startData = 3;
		} else if (isType2) {
			// first line contains the location (if it was set in the device)
			metadata.location = firstlines[0].substring(LOCATION_LINE.length, firstlines[0].length).trim();
			metadata.startData = 4;
		} else {
			// Fallback to Type 3.
			metadata.useColumns = true;
			// line 1 contains the device id
			metadata.deviceid = firstlines[0]
				.substring(0, firstlines[0].indexOf(' '))
				.trim();
			// second line contains the location (if it was set in the device)
			metadata.location = firstlines[1].substring(LOCATION_LINE.length, firstlines[1].length).trim();
			// third line is the header names
			metadata.columns = firstlines[2].split('\t').map(column => column.trim());
			// fourth line is the format of the data, unit of measurement, date format, etc
			metadata.columnunits = firstlines[3]
				.split('\t')
				.map(column => column.trim());

			metadata.dateformat = 'YYYY-MM-DD HH:mm';
			metadata.startData = 5;
		}

		const types = {};
		metadata.columns.forEach((column, index) => {
			types[column] = this.guessMapping(column, metadata.columnunits[index]);
		});
		metadata.columntypes = types;

		return metadata;
	};

	parseHoboMetadata = (firstlines, delimiter) => {
		const metadata = { sourcedescription: 'OnSet HOBOWare' };
		const units = [];
		const types = {};
		// line one contains the plot title
		metadata.plottitle = firstlines[0]
			.substring(14, firstlines[0].length - 1)
			.trim();

		const testLines = [];

		// get rid of weird null terminators in some hoboware files.  Why are they so bad at this?
		// also get rid of blank lines, as those show up sometimes because Onset.
		const checkLines = firstlines.slice(0, 5);
		for (let i = 0; i < checkLines.length; i++) {
			const line = checkLines[i];
			if (line.trim().length > 0) {
				testLines.push(line);
			}
		}

		// second line contains the column names with units and sensor id intermingled.
		let parsedColumns;
		let foundActualColumns = false;
		let startData = 0;
		let i = 0;

		while (!foundActualColumns && i < testLines.length) {
			parsedColumns = parse(testLines[i], { delimiter });
			if (parsedColumns[0].length > 1) {
				// must be at least two columns for the row to be valid.  Datetime and at least one reading.
				parsedColumns[0].forEach(column => {
					if (
						(column && column.toLowerCase().includes('time')) ||
						column.toLowerCase().includes('date') ||
						column.toLowerCase().includes('rh') ||
						column.toLowerCase().includes('temp') ||
						column.toLowerCase().includes('lgr') ||
						column.toLowerCase().includes('host')
					) {
						foundActualColumns = true;
						// csv-parse starts at 1, so we need to add 2 to the zero-based index.
						startData = i + 2;
					}
				});
			}
			i++;
		}

		metadata.columns = parsedColumns[0];
		// now, loop through all the column names.  Split out the unit of measuremnt and device id
		metadata.columns = metadata.columns.map(column => {
			const subfields = column.split(',');
			let unit = '';
			if (subfields[1]) {
				const subfield = subfields[1].trim().substring(0, 10);
				// if(subfield.indexOf(' ') != -1) {subfield = subfield.substring(0, subfield.indexOf(' '))}
				if (subfields[1].indexOf('LGR S/N:') != -1) {
					metadata.deviceid = subfields[1].substring(
						subfields[1].indexOf('LGR S/N: ') + 9,
						subfields[1].length
					);
				}
				unit = subfield;
			} else {
				unit = '';
			}
			units.push(unit);
			// Do not guess type, hobo files often contain 5 different temps (temp, min, max, avg, std deviation)
			const type = this.guessMapping(column, unit);

			// if this is a field of type datetime then the unit we have been provided is the timezone offset
			if (type == 'datetime') {
				if (unit.indexOf(':') != -1) {
					metadata.timezoneoffset = parseInt(
						unit.substring(3, unit.indexOf(':'))
					);
				} else {
					const offsetPart = unit.substring(unit.indexOf(' '), unit.length);
					metadata.timezoneoffset = parseInt(offsetPart) / 100;
				}
			}

			// only set a type for this field if we do not already have one mapped out.
			const typeKeys = Object.keys(types);
			const mappedType = typeKeys.find(key => types[key] === type);
			if (!mappedType) {
				types[subfields[0]] = type;
			}

			return subfields[0];
		});

		metadata.columnunits = units;
		metadata.columntypes = types;
		metadata.startData = startData;
		return metadata;
	};

	parseHanwellMetadata = (firstlines, isVersion2, separator) => {

		if (isVersion2) {
			return this.parseHanwellV2Metadata(firstlines, separator);
		} else {
			return this.parseHanwellV1Metadata(firstlines, separator);
		}
	};

	parseHanwellV1Metadata = (firstlines, separator) => {
		const headerLineIndex = firstlines.findIndex(line => line.includes('Date Time'));

		if (headerLineIndex === -1) {
			throw new Error('No header line found in file');
		}

		const headerLine = firstlines[headerLineIndex];
		const headers = headerLine.split(separator || ',')
			.map(h => h.trim())
			.filter(h => h !== '');

		let sourceDescription = 'Hanwell';
		let dateFormat = 'DD/MM/YYYY HH:mm';
		const v1Type1 = firstlines.some(line => line.includes('UKAS Rotronic'));
		const v1Type2 = firstlines.some(line => line.includes('Livingroom'));

		if (v1Type1) {
			sourceDescription += ' UKAS Rotronic';
			dateFormat = 'YYYY-MM-DD HH:mm:ss';
		} else if (v1Type2) {
			sourceDescription += ' Livingroom';
			dateFormat = 'DD/MM/YYYY HH:mm';
		}

		const metadata = {
			columntypes: {
				'Date Time': 'datetime',
				'Temperature (C)': 'temp',
				'Humidity (%RH)': 'rh'
			},
			columnunits: [dateFormat, 'C', '%'],
			columns: headers,
			sensorInfo: {},
			dateformat: dateFormat,
			sourcedescription: sourceDescription,
			separator: separator || ','
		};

		firstlines.forEach(line => {
			const parts = line.split(separator || ',').map(part => part.trim());
			if (parts[0] === 'Livingroom') {
				metadata.sensorInfo.name = parts[0];
			}
		});
		return metadata;
	};

	parseHanwellV2Metadata = (firstlines, separator) => {
		const dateFormat = 'HH:mm:ss  DD/MMM/YYYY';

		const metadata = {
			columntypes: {
				'Time': 'datetime',
				'Temperature (C)': 'temp',
				'Humidity (RH(%))': 'rh'
			},
			columns: ['Time', 'Temperature (C)', 'Humidity (RH(%))'],
			columnunits: [dateFormat, 'C', '%'],
			sensorInfo: {},
			dateformat: dateFormat,
			sourcedescription: 'Hanwell hanlog32',
			separator: separator
		};



		return metadata;
	};

	guessMapping = (column) => {
		if (
			column.toLowerCase().includes('temp') ||
			column.toLowerCase().includes('celsius') ||
			column.toLowerCase().includes('fahren')
		) {
			return 'temp';
		}
		if (
			column.toLowerCase().includes('rh') ||
			column.toLowerCase().includes('humidity')
		) {
			return 'rh';
		}
		if (
			column.toLowerCase().includes('date') ||
			column.toLowerCase().includes('time')
		) {
			return 'datetime';
		}
		if (
			column.toLowerCase().includes('light') ||
			column.toLowerCase().includes('lux') ||
			column.toLowerCase().includes('illuminance')
		) {
			return 'vis';
		}
		if (
			column.toLowerCase().includes('sensor') ||
			column.toLowerCase().includes('logger')
		) {
			return 'sensor';
		}
	};
}
