ObservationSourceAnalyser.java

/**
 * VStar: a statistical analysis tool for variable star data.
 * Copyright (C) 2009  AAVSO (http://www.aavso.org/)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>. 
 */
package org.aavso.tools.vstar.input.text;

import java.io.IOException;
import java.io.LineNumberReader;

import org.aavso.tools.vstar.data.validation.AAVSODownloadFormatValidator;
import org.aavso.tools.vstar.data.validation.CommonTextFormatValidator;
import org.aavso.tools.vstar.data.validation.SimpleTextFormatValidator;
import org.aavso.tools.vstar.exception.ObservationReadError;
import org.aavso.tools.vstar.ui.mediator.NewStarType;

import com.csvreader.CsvReader;

/**
 * This class analyses an observation file (simple or download formats) and
 * makes information about the file available for use by consumers.
 */
public class ObservationSourceAnalyser {

	public static final String TAB_DELIM = "\t";
	public static final String COMMA_DELIM = ",";
	public static final String SPACE_DELIM = " +";

	private LineNumberReader obsSource;
	private String obsSourceIdentifier;
	private int lineCount;
	private NewStarType newStarType;
	private String delimiter;

	/**
	 * Constructor.
	 * 
	 * @param obsSource
	 *            The observation source to be analysed.
	 * @param obsSourceIdentifier
	 *            An identifier for the source of the observations.
	 */
	public ObservationSourceAnalyser(LineNumberReader obsSource,
			String obsSourceIdentifier) {
		this.obsSource = obsSource;
		this.obsSourceIdentifier = obsSourceIdentifier;
		this.lineCount = 0;
	}

	/**
	 * Analyse the source.
	 */
	public void analyse() throws IOException, ObservationReadError {

		boolean gleanedFormat = false;

		String line = obsSource.readLine();
		while (line != null) {
			// Using one line of data, glean format information.
			// Other than doing this once, just read all lines
			// so we can get a line count.
			if (!gleanedFormat) {
				// Ignore comment or blank line.
				if (!line.startsWith("#") && !line.matches("^\\s*$")) {
					// Try different delimiter types to guess CSV or TSV.
					gleanedFormat = determinedFormat(line, TAB_DELIM);
					if (!gleanedFormat) {
						gleanedFormat = determinedFormat(line, COMMA_DELIM);
						if (!gleanedFormat) {
							gleanedFormat = determinedFormat(line, SPACE_DELIM);
							if (!gleanedFormat) {
								throw new ObservationReadError("'"
										+ obsSourceIdentifier
										+ "' is in an unknown format.");
							}
						}
					}
				}
			}

			line = obsSource.readLine();
		}

		this.lineCount = obsSource.getLineNumber();
		obsSource.close();
	}

	/**
	 * Try to determine the format of the file from a single line: TSV vs CSV
	 * and simple vs download format.
	 * 
	 * @param line
	 *            The line to be analysed.
	 * @param delimiter
	 *            Tab or comma.
	 * @return Whether or not the format was determined.
	 */
	private boolean determinedFormat(String line, String delimiter) {
		boolean determined = false;

		String[] fields = line.split(delimiter);
		if (fields.length >= 2 && fields.length <= 5) {
			this.delimiter = delimiter;
			this.newStarType = NewStarType.NEW_STAR_FROM_SIMPLE_FILE;
			determined = true;
		} else if (fields.length > 5) {
			this.delimiter = delimiter;
			this.newStarType = NewStarType.NEW_STAR_FROM_DOWNLOAD_FILE;
			determined = true;
		}

		return determined;
	}

	/**
	 * @return the lineCount
	 */
	public int getLineCount() {
		return lineCount;
	}

	/**
	 * @return the newStarType
	 */
	public NewStarType getNewStarType() {
		return newStarType;
	}

	/**
	 * @return the delimiter
	 */
	public String getDelimiter() {
		return delimiter;
	}

	/**
	 * @return the obsSourceIdentifier
	 */
	public String getObsSourceIdentifier() {
		return obsSourceIdentifier;
	}

	/**
	 * Return an instance of the text format validator class to be used for
	 * creating observation objects from a sequence of lines containing comma or
	 * tab delimited fields (CSV, TSV).
	 * 
	 * @param obsSource
	 *            The observation source to be analysed. Passing this in here
	 *            allows us to ensure we pass in an observation source that is
	 *            reset to the start, not the case for the one passed into the
	 *            constructor after analyse() has been invoked.
	 * @return The validator object corresponding to this "new star" type.
	 */
	public CommonTextFormatValidator getTextFormatValidator(
			LineNumberReader obsSource) throws IOException {

		assert (ObservationSourceAnalyser.TAB_DELIM.equals(delimiter)
				|| ObservationSourceAnalyser.COMMA_DELIM.equals(delimiter) || ObservationSourceAnalyser.SPACE_DELIM
				.equals(delimiter));

		CommonTextFormatValidator validator = null;

		CsvReader lineReader = new CsvReader(obsSource);
		lineReader.setDelimiter(delimiter.charAt(0));

		if (NewStarType.NEW_STAR_FROM_SIMPLE_FILE.equals(newStarType)) {
			validator = new SimpleTextFormatValidator(lineReader, newStarType
					.getMinFields(), newStarType.getMaxFields(), newStarType
					.getFieldInfoSource());
		} else if (NewStarType.NEW_STAR_FROM_DOWNLOAD_FILE.equals(newStarType)) {
			validator = new AAVSODownloadFormatValidator(lineReader,
					newStarType.getMinFields(), newStarType.getMaxFields(),
					newStarType.getFieldInfoSource());
		}

		assert (validator != null);

		return validator;
	}
}