For a collection of strings, a larger string containing every one of the smaller strings as a substring is called a superstring. This may be useful, for example, if we have a large number of pieces of a DNA and want to figure out how the full DNA could look like.
For example, for the following strings
ATTAGACCTG
CCTGCCGGAA
AGACCTGCCG
GCCGGAATAC
The shortest superstring will be
ATTAGACCTGCCGGAATAC
The following code is a naive approach to solve the problem. The logic is as follows: Sort the list of strings by length. Take the longest one and call it a superstring. Next, iterate through the list to find the string that has the longest intersection with the superstring. Remove that string from the list and attach to the superstring. Continue until the list is empty, the resulting superstring should be the shortest possible.
public static string ShortestSuperstring(List<string> input) { input = input.OrderByDescending(x => x.Length).ToList(); string superstring = input[0]; input.RemoveAt(0); int counter = input.Count; for (int i = 0; i < counter; i++) { List<IntBoolString> items = new List<IntBoolString>(); for (int j = 0; j < input.Count; j++) { items.Add(GetIntersection(superstring, input[j])); } IntBoolString chosen = items.OrderByDescending(x => x.intValue).First(); superstring = CombineIntoSuper(superstring, chosen); input.Remove(chosen.stringValue); } return superstring; } private static IntBoolString GetIntersection(string super, string candidate) { IntBoolString result = new IntBoolString(); result.stringValue = candidate; int i = 0; while (candidate.Length > i) { int testlen = candidate.Length - i; string leftcan = candidate.Substring(0, testlen); string rightcan = candidate.Substring(i, testlen); string leftsuper = super.Substring(0, testlen); string rightsuper = super.Substring(super.Length - testlen, testlen); if (leftcan == rightsuper || rightcan == leftsuper) { result.boolValue = (leftcan == rightsuper) ? true : false; result.intValue = testlen; return result; } i++; } return result; } private static string CombineIntoSuper(string superstring, IntBoolString chosen) { string toAppend = string.Empty; int lenToAppend = chosen.stringValue.Length - chosen.intValue; toAppend = (chosen.boolValue == true) ? chosen.stringValue.Substring(chosen.stringValue.Length - lenToAppend, lenToAppend) : chosen.stringValue.Substring(0, lenToAppend); superstring = (chosen.boolValue == true) ? superstring + toAppend : toAppend + superstring; return superstring; } public struct IntBoolString { public string stringValue; public int intValue; public bool boolValue; }by Evgeny. Also posted on my website
No comments:
Post a Comment