#
# DataAnalysis2TPP.awk (c) Magnus Palmblad, The University of Reading 2006-
#
# DataAnalysis2TPP is a simple AWK script to convert the Mascot Generic Format (MGF) files exported
# by Bruker Daltonics DataAnalysis (e.g. by selecting File:Export:Compounds in the menu or by 
# "Analysis.Compounds.Export <output MGF file>, damgf" in a DataAnalysis script) to a TPP-friendly 
# MGF format for use with the XPRESS or ASAPRatio tools for quantitative proteomics in the Institute for 
# Systems Biology/Seattle Proteome Center Trans-Proteomic Pipeline. 
# 
# The scan range of merged MS/MS data and charge must be supplied in the "TITLE" field in the MGF file for 
# XPRESS and ASAPRatio to find the light and heavy peptide pairs for relative quantitation. DataAnalysis2TPP 
# does this and trims off the reduntant ".mgf" extension from the filename in the title field, if present.
#
# Run DataAnalysis2TPP with "awk -f DataAnalysis2TPP.awk <DataAnalysis MGF file> > <output TPP-friendly MGF file>" 
# on the command line. AWK itself is included in the default Cygwin package and most Linux distributions. 
#

BEGIN {FS="[ =/+]"}
(NR==1) {title_filename=(FILENAME~/.mgf$/)?substr(FILENAME,1,length(FILENAME)-4):FILENAME}
($1=="###MSMS:") {first_scan=$2; last_scan=$(NF-1)}
($1!="TITLE") {print $0}
($1=="CHARGE") {charge=$2; printf("TITLE=%s.%i.%i.%i\n",title_filename,first_scan,last_scan,charge)}
{next}