/************************************************************************
 * PREMIM, version 2.06
 * Copyright 2012,
 * Richard Howey
 * Institute of Genetic Medicine, Newcastle University
 *
 * richard.howey@ncl.ac.uk
 * http://www.staff.ncl.ac.uk/richard.howey/
 *
 * This file is part of PREMIM, the pedigree file processing program for EMIM.
 *
 * PREMIM is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * PREMIM is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with PREMIM.  If not, see <http://www.gnu.org/licenses/>.
 ************************************************************************/


/*! \file ProcessData.h
    \brief This file contains classes and methods for processing the pedigree data into separate genotype groups.
    
  
*/

#ifndef __PROCESSDATA
#define __PROCESSDATA

#include <string>
#include <map>
#include <set>
#include <iostream>
#include <fstream>

#include "Data.h"
#include "main.h"

#ifdef USING_GZIP
#include <gzstream.h>
#endif


 //! namespace std initiates the "std" or "standard" namespace.
using namespace std;

//! Maps the names of subjects and pedigrees given in file to an ordinal number.
class MapIds
{
private:
	map<string, unsigned int> idMap; //!< original name in file, counted id

public:

	MapIds() : idMap() {};

	//! Create a new ID for an item.
	unsigned int addItem(const string & name)
	{
		unsigned int newId = (unsigned)idMap.size() + 1;
		idMap[name] = newId;
		return newId; 
	};

	//! Look up ID from a name given from file.
	unsigned int getId(const string & name);
	//! Look up name from a ID
	string getName(const unsigned int & id);
};

//! Maps the allele names given in file to either a 0 or 1.

//! In the .ped pedigree file the SNP names may be given by a letter, e.g. A and G,
//! these letters are mapped to values 0 and 1 in the code which in turn map to
//! allele "1" and allele "2" respectively.
class MapSnpAlleleIds
{
private:
	map<unsigned int, map<string, bool> > mapSnpAlleles; //SNP ID no., allele name,  allele id - 0 or 1

public:

	MapSnpAlleleIds() {};

	~MapSnpAlleleIds() {};

	bool getSnpAlleleId(const unsigned int & snpId, string & alleleName); 
	map<string, bool> getSnpAlleleNames(const unsigned int & snpId) const; 
};

//! Contains a summary of the counted genotypes groups from pedigrees.
class AllCountedData
{
private:

	CountedTrios caseParentTrios;
	CountedDuos caseFatherDuos;
	CountedDuos caseMotherDuos;
	CountedDuos controlFatherDuos;
	CountedDuos controlMotherDuos;
	CountedParents parentsOfCases;
	CountedParents parentsOfControls;
	CountedSingleSubject fathersOfCases;
	CountedSingleSubject mothersOfCases;
	CountedSingleSubject cases;
	CountedSingleSubject controls;
	
public:

	//! Create an object to hold all of the processed pedigree infomation.
	/*!
	    The output file name for each pedigree subgroup type is set and
	 the number of different genotype groups for each. 
	 */
	AllCountedData(const string & dir, const string & en, const unsigned int & sso) : caseParentTrios("caseparenttrios", 15, "case parent trios", dir, en, sso), caseFatherDuos("casefatherduos", 7, "case father duos", dir, en, sso),  caseMotherDuos("casemotherduos", 7, "case mother duos", dir, en, sso),
		controlFatherDuos("confatherduos", 7, "control father duos", dir, en, sso), controlMotherDuos("conmotherduos", 7, "control mother duos", dir, en, sso), parentsOfCases("caseparents", 9, "case parents", dir, en, sso),
		parentsOfControls("conparents", 9, "control parents", dir, en, sso), fathersOfCases("casefathers", 3, "case fathers", dir, en, sso), mothersOfCases("casemothers", 3, "case mothers", dir, en, sso), cases("cases", 3, "cases", dir, en, sso),
		controls("cons", 3, "controls", dir, en, sso)  {};

	void addCaseParentTrio(const unsigned int & snpId, const bool & reverseAlleleLabels, Subject * father, Subject * mother, Subject * child);
	void addCaseFatherDuo(const unsigned int & snpId, const bool & reverseAlleleLabels, Subject * father, Subject * child);
	void addCaseMotherDuo(const unsigned int & snpId, const bool & reverseAlleleLabels, Subject * mother, Subject * child);
	void addControlFatherDuo(const unsigned int & snpId, const bool & reverseAlleleLabels, Subject * father, Subject * child);
	void addControlMotherDuo(const unsigned int & snpId, const bool & reverseAlleleLabels, Subject * mother, Subject * child);
	void addParentsOfCase(const unsigned int & snpId, const bool & reverseAlleleLabels, Subject * father, Subject * mother);
	void addParentsOfControl(const unsigned int & snpId, const bool & reverseAlleleLabels, Subject * father, Subject * mother);
	void addFatherOfCase(const unsigned int & snpId, const bool & reverseAlleleLabels, Subject * father);
	void addMotherOfCase(const unsigned int & snpId, const bool & reverseAlleleLabels, Subject * mother);
	void addCase(const unsigned int & snpId, const bool & reverseAlleleLabels, Subject * subject);
	void addControl(const unsigned int & snpId, const bool & reverseAlleleLabels, Subject * subject);
	void outputResults(const unsigned int & noOfSnps, const unsigned int & totalNoOfPedigrees, const unsigned int & splitSNPOutput);
	void outputFileSummaries(const unsigned int & noOfSnps, const unsigned int & totalNoOfPedigrees);
	void outputResultOneSnp(const unsigned int & snpId, const unsigned int & splitSNPOutput);
	void outputParameterFile(ofstream & fileOutPut, const unsigned int & noOfSnps, const bool & useCPG);
	
};

//! Contains a list of all pedigrees.
class AllPedigreeData
{
private:
	

public:

	map<unsigned int, Pedigree *> pedigrees; //!< pedigree ID, pedigree

	AllPedigreeData() : pedigrees() {};

	//! Delete all the pedigrees, these belong to this class
	~AllPedigreeData()
	{
		for(map<unsigned int, Pedigree *>::iterator p = pedigrees.begin(); p != pedigrees.end(); ++p)
		{
			delete p->second;
		};
	};

	void addSubjectToPedigree(unsigned int & subjectId, Subject * subject, unsigned int & pedigreeId);
	void outputSummary();
	void restoreAllSubjectsToAllPedigrees();
	void subjectExistsAndAffected(const unsigned int & pedId, const unsigned int & subjectId, MapIds & subjectIds);  
};

//! Used to organise the processing of files and data.
class ProcessData
{
private:
	bool extraAffectedTrios, extraUnaffectedTrios, childGenotype, childTrend, motherGenotype,
		motherTrend, imprintingMaternal, imprintingPaternal,
		imprintingMaternalWeinberg, imprintingPaternalWeinberg, estimateAlleleFreq, useMajorAlleleAsRisk;
	unsigned int splitSNPOutput;
	string outputDirectory;
	string endName;
	string probandFileName;
	string riskAlleleInFileName;
	string riskAlleleOutFileName;
	map<unsigned int, string> snpNames;
	map<unsigned int, string> riskAlleleNames;//snp No., risk allele name
	unsigned int noOfSnps;
	string pedigreeName, subjectIdName, fatherIdName, motherIdName, sexIdName, affectedIdName;
	string allele1Name, allele2Name;
	unsigned int fileType; // 1 = .ped, 2 = .bed, 3 = .gzip
	MapSnpAlleleIds snpAlleleIds;
	bool allele1Id, allele2Id;
	ifstream readPedigree;
	ifstream readBinaryGenotypeData;
#ifdef USING_GZIP
	igzstream readGzipPedigree;
#endif
	ofstream markersFile;
	bool snpMajor;
	unsigned int one;
	unsigned int aBit;
	unsigned int bitCount;
	char buffer[1];
	unsigned int allele1, allele2;
	

public:
	ProcessData(bool xa, bool xu, bool cg, bool ct, bool mg, bool mt, bool im, bool ip, bool imw,
		bool ipw, bool eaf, bool uma, unsigned int so, string od, string en, string pb, string rain, string raout) : extraAffectedTrios(xa),
		extraUnaffectedTrios(xu), childGenotype(cg), childTrend(ct), motherGenotype(mg),
		motherTrend(mt), imprintingMaternal(im), imprintingPaternal(ip),
		imprintingMaternalWeinberg(imw), imprintingPaternalWeinberg(ipw),
		estimateAlleleFreq(eaf), useMajorAlleleAsRisk(uma), splitSNPOutput(so), outputDirectory(od), endName(en),
		probandFileName(pb), riskAlleleInFileName(rain), riskAlleleOutFileName(raout)
	{one = '\1';};

	~ProcessData() {};

	void process(string & fileName, string & mapFileName);
	void createMarkerFiles(const unsigned int & splitSNPOutput, string & markersFile, string & outputDirectory, string & endName);
	void createResultsFiles(string & outputDirectory, string & endName);
	void outputOptionInfo();
	void setNoOfSnps(string & fileName, string & mapFileName);
	void analyseData();
	bool getReverseAlleles(CountAlleles *& countAlleles, const unsigned int & snpNo) const;
	void setRiskAlleles();
	bool addProcessedPedigree(const unsigned int & snpId, const bool & reverseAlleleLabels, Pedigree * pedigree, AllCountedData & allCountedData,
		map<unsigned int, set<unsigned int> > & probandSubjectIds);
	bool addProcessedPedigreeProbandSubject(bool & found, bool & foundTrio, bool & foundParentsCon, unsigned int & probandSubjectId,
		const unsigned int & snpId, const bool & reverseAlleleLabels, Pedigree * pedigree, AllCountedData & allCountedData);
	void getSubjectNamesFromFile();
	void getAlleleNamesFromFile();
	void setEstimateAlleleFreq(bool & eaf) {estimateAlleleFreq = eaf;};
	void addGenotypeDataToSubject(Subject * subject, const unsigned int & snpId, CountAlleles * countAlleles);
	void addGenotypeDataToSubjectPed(Subject * subject, const unsigned int & snpId, CountAlleles * countAlleles);
	void addGenotypeDataToSubjectBinary(Subject * subject, const unsigned int & snpId, CountAlleles * countAlleles);
	void outputParameterFile(AllCountedData & allCountedData, const unsigned int & noOfSnps) const;
	void outputRiskAlleles(CountAlleles *& countAlleles) const;
};


#endif

