/*
 * KHeiseReg
 *
 * A utility to search for articles within the Heise register.
 *
 * Copyright (C) 2002 Oliver Gantz <Oliver.Gantz@epost.de>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#ifndef REGFILE_H
#define REGFILE_H

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <qvaluelist.h>
#include <qstring.h>
#include <qcstring.h>
#include <qfile.h>


#define REG_MAGAZINE_NONE 0x00
#define REG_MAGAZINE_CT   0x01
#define REG_MAGAZINE_IX   0x02


#define REG_LINE_SIZE				512
#define REG_LINE_COUNT			9

#define REG_LINE_TITLE			0
#define REG_LINE_SUBTITLE		1
#define REG_LINE_AUTHOR			2
#define REG_LINE_EDITOR			3
#define REG_LINE_PAGE				4
#define REG_LINE_EDITION		5
#define REG_LINE_MAGYEAR		6
#define REG_LINE_LINKS			7
#define REG_LINE_BYWORDS		8



typedef QValueList<QCString> RegCStrList;
typedef QValueList<int> RegNumList;


/**
 * Container for "Altavista-style" keywords to be matched with a text.
 *
 * "word1 word2 word3"    means text must contain word1 OR word2 OR word3.
 * "+word1 +word2 +word3" means text must contain word1 AND word2 AND word3.
 * "-word1 -word2 -word3" means text must not contain word1 NOR word2 NOR word3.
 */
class RegSearchList
{
public:
	/**
	 * Constructor.
	 */
	RegSearchList();

	/**
	 * Stores a line of keywords.
	 *
	 * @param text The line containing the keywords.
	 */
	void setKeyWords(const QString &text);

	/**
	 * Matches the given text line with the stored keywords.
	 *
	 * @param text An arbitrary text line (IBM charset).
	 * @param cs @p true for case sensitive search.
	 * @return @p true if the text matches, @p false otherwise.
	 */
	bool matches(const QCString &text, bool cs) const;

	/**
	 * Matches the given text line with the stored keywords with fuzzyness.
	 *
	 * @param text An arbitrary text line (IBM character set).
	 * @param cs @p true for case sensitive search.
	 * @param threshold Fuzzyness threshold. Must be between 0 and 100.
	 * @return @p true if the text matches, @p false otherwise.
	 */
	bool matchesFuzzy(const QCString &text, bool cs, int threshold) const;
	
private:
	/**
	 * Returns @p true if a given string contains a given substring with at least
	 * a given fuzzyness threshold.
	 *
	 * @param str An arbitrary string.
	 * @param substr The substring to look for within the given string.
	 * @param cs @p true for case sensitive search.
	 * @param threshold Fuzzyness threshold. Must be between 0 and 100.
	 * @return @p true if @p substr occurs within @p str, @p false otherwise.
	 */
	bool stringContainsFuzzy(const QCString &str, const QCString &substr, bool cs, int threshold) const;


	RegCStrList m_include, m_require, m_exclude;
};


class RegSearchNumList
{
public:
	/**
	 * Constructor.
	 */
	RegSearchNumList();

	/**
	 * Stores a line of keywords.
	 *
	 * @param text The line containing the numbers.
	 * @param year @p true if two digits year conversion required, @p false otherwise.
	 */
	void setNumbers(const QString &text, bool year);

	/**
	 * Matches the given number with the stored numbers.
	 *
	 * @param num An arbitrary number.
	 * @return @p true if the number matches, @p false otherwise.
	 */
	bool matches(int num) const;

private:
	RegNumList m_include;
};


/**
 * Class for a Heise register file entry. Each entry represents an article.
 */
class RegEntry
{
public:
	/**
	 * Constructor.
	 */
	RegEntry();

	/**
	 * Returns a pointer to the content of a certain entry line.
	 *
	 * The content is stored using the IBM (DOS) character set and includes a
	 * <CR><LF> linebreak sequence.
	 *
	 * @param num The line number to refer to (must be between 0 and 8).
	 */
	char *line(int num);
	
	/**
	 * Returns the content of a certain entry line.
	 *
	 * The trailing <CR><LF> linebreak sequence is omitted and the charater set is tranformed
	 * to latin1.
	 *
	 * @param num The line number to refer to (must be between 0 and 8).
	 */
	QCString lineStr(int num) const;

	const char *title() const;
	const char *subTitle() const;
	const char *author() const;
	const char *editor() const;
	int page() const;
	int edition() const;
	unsigned char magazine() const;
	int year() const;
	const char *links() const;
	const char *bywords() const;

	bool verify() const;

private:
	char m_lines[9][REG_LINE_SIZE];
};


inline char *RegEntry::line(int num)
{	return m_lines[num]; }

inline const char *RegEntry::title() const
{	return m_lines[REG_LINE_TITLE]; }

inline const char *RegEntry::subTitle() const
{	return m_lines[REG_LINE_SUBTITLE]; }

inline const char *RegEntry::author() const
{	return m_lines[REG_LINE_AUTHOR]; }

inline const char *RegEntry::editor() const
{	return m_lines[REG_LINE_EDITOR]; }

inline const char *RegEntry::links() const
{	return m_lines[REG_LINE_LINKS]; }

inline const char *RegEntry::bywords() const
{	return m_lines[REG_LINE_BYWORDS]; }



class RegMask
{
public:
	RegMask();

	void setBywords(const QString &text);
	void setAuthors(const QString &text);
	void setEditors(const QString &text);
	void setEditions(const QString &text);
	void setYears(const QString &text);
	void setMagazines(unsigned char c);
  void setCaseSensitive(bool cs);
  void setFuzzy(bool on);
  void setThreshold(int value);
	
	bool matches(const RegEntry &entry) const;
	
private:
	RegSearchList m_bywords;
	RegSearchList m_authors;
	RegSearchList m_editors;
	RegSearchNumList m_editions;
	RegSearchNumList m_years;
	unsigned char m_magazines;
	bool m_cs;
	bool m_fuzzy;
	int m_threshold;
};


inline void RegMask::setMagazines(unsigned char c)
{	m_magazines = c; }

inline void RegMask::setCaseSensitive(bool cs)
{	m_cs = cs; }

inline void RegMask::setFuzzy(bool on)
{	m_fuzzy = on; }

inline void RegMask::setThreshold(int value)
{	m_threshold = value; }



class RegFile: public QFile
{
public:
	RegFile();
	RegFile(const QString &name);
	~RegFile();

	bool readEntry(RegEntry *entry);

	bool scanEntries();

	int firstEdition(unsigned char mag) const;
	int lastEdition(unsigned char mag) const;
	int articles(unsigned char mag) const;
	
	RegNumList editions(unsigned char mag) const;

	bool containsEditions(unsigned char mag, const RegNumList &editions) const;
	
private:
	int ct_first_ed;
	int ct_last_ed;
	int ct_articles;
	int ix_first_ed;
	int ix_last_ed;
	int ix_articles;
	RegNumList ct_editions;
	RegNumList ix_editions;
};


inline RegNumList RegFile::editions(unsigned char mag) const
{	return (mag & REG_MAGAZINE_CT) ? ct_editions : ix_editions; }



#endif // REGFILE_H
