This file is indexed.

/usr/share/hyphy/TemplateBatchFiles/CleanGaps.bf is in hyphy-common 2.2.6+dfsg-3build3.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
function _standardAnalysisBFHelp (_what)
{
	if (_what == "Synopsis")
	{
		return "Filter "gappy" columns in a sequence alignments: i.e. those that contain fewer than a given proportion of sequences with fully or partially resolved characters";
	}
	if (_what == "Input")
	{
		return "A sequence alignment";
	}
	if (_what == "Output")
	{
		return "A sequence alignment with gappy columns stripped out";
	}
	if (_what == "Options")
	{
		_options = {};
		_options ["Filtering threshold"] = "Minimum percent of informative sequences per site to retain the site";
		_options ["Informative characters"] = "Define an informative character as either a fully resolved charatcer (e.g. A) or a partial ambiguity (e.g. R)";
		return _options
	}
	if (_what == "Further")
	{
		return "";
	}
	if (_what == "Author")
	{
		return "Sergei L Kosakovsky Pond (spond@ucsd.edu)";
	}
	if (_what == "Version")
	{
		retrun "1.00";
	}
	if (_what == "Date")
	{
		return "20081215";
	}
	return "";
}

/*--------------------------------------------------------------------------*/

ExecuteAFile ("Utility/GrabBag.bf");

SetDialogPrompt ("Please choose a data file:");
DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
fprintf (stdout, "\nRead an alignment on ", ds.species, " sequences with ", ds.sites, " sites from ", LAST_FILE_PATH);

if (IS_TREE_PRESENT_IN_DATA)
{
	fprintf (stdout, "\nTree In Data:", DATAFILE_TREE);
}

DataSetFilter	    all = CreateFilter (ds, 1, "", "");

options				={{"Completely resolved", "Only count completely unambiguious characters (e.g. A,C,G,T for nucleotides) as informative"}
					  {"Partially resolved",  "Also count partially resolved characters (e.g. R,Y,M,S etc for nucleotides)"}};
					  

ChoiceList (filteringOption,"Informative characters?",1,SKIP_NONE,options);
															   
if (filteringOption < 0)
{
	return 0;
}
	

fprintf (stdout, "\n");
gating_thresh     = prompt_for_a_value ("Retain sites with at least this proportion of informative sites:",0.1,0,1,0);
gating_thresh_seq = (gating_thresh * all.species+0.5)$1;

fprintf 	  (stdout, "Selected informative sites option '", options[filteringOption][0], "' and filtering threshold of '", gating_thresh, "'\n");
retainSites = {};

GetDataInfo     (charInfo, all, "CHARACTERS");
GetDataInfo		(siteToPatternMap,  all);

charCount	  = Columns (charInfo);
template	  = {1,charCount}["1"];
passcode	  = 2;
if (filteringOption == 1)
{
	passcode = charCount;
}

for (site = 0; site < all.unique_sites; site = site+1)
{
	seq_count = 0;
	for (sequence = 0; sequence < all.species; sequence = sequence + 1)
	{
		GetDataInfo (thisChar, all, sequence, site);
		if ((template*thisChar)[0] < passcode)
		{
			seq_count = seq_count + 1;
			if (seq_count >= gating_thresh_seq)
			{
				break;
			}
		}
	}
	if (seq_count >= gating_thresh_seq)
	{
		retainSites [site] = 1;
	}
	SetParameter (STATUS_BAR_STATUS_STRING, "Processing pattern "+(site+1)+"/"+all.unique_sites,0);
}

DataSetFilter	filtered = CreateFilter (all, 1, retainSites[siteToPatternMap[siteIndex]]);
fprintf (stdout, "\nRetained ", filtered.sites, "/", all.sites, " sites\n");
SetDialogPrompt ("Saved the filtered alignment to:");

fprintf (PROMPT_FOR_FILE, CLEAR_FILE, filtered);