import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.util.Vector; public class Distances { /** the q-gram length */ public static final int Q=7; /** alphabet size; the alphabet is {0,1, ..., asize-1} */ public static final int ASIZE=5; public static int editDistance(String a, String b){ // Edit-Matrix int[][] matrix = new int[a.length()+1][b.length()+1]; //Initialisierung for(int i = 0; i < matrix.length; i++){ matrix[i][0] = i; } for(int i = 0; i < matrix[0].length; i++){ matrix[0][i] = i; } //Berechnung der inneren Werte for(int i = 1; i < matrix.length; i++){ for(int j = 1; j < matrix[0].length; j++){ int insertion = matrix[i][j-1] + 1; int deletion = matrix[i-1][j] + 1; int copsub; if (a.charAt(i-1) == b.charAt(j-1)) copsub = matrix[i-1][j-1]; else copsub = matrix[i-1][j-1] + 1; // Minimum waehlen if (insertion < deletion && insertion < copsub) matrix[i][j] = insertion; else if (deletion < insertion && deletion < copsub) matrix[i][j] = deletion; else matrix[i][j] = copsub; } } return matrix[matrix.length-1][matrix[0].length-1]; } private static byte getCode(char c){ switch (Character.toUpperCase(c)) { case 'A': return 0; case 'C': return 1; case 'G': return 2; case 'T': return 3; case 'N': return 4; default: System.err.println("***error: getCode of character other than ACGTN in getCode(char c)!"); System.exit(1); } return -1; } public static int getRank(String s){ int rank=0; int p=1; // Berechne Rank für Präfix der Länge q (erstes q-Gram) for(int pos=0; pos readMultipleFasta(String filename) { File file = new File(filename); String line = null; Vector sequences = new Vector(); try { BufferedReader br = new BufferedReader(new FileReader(file)); String sequence=null; // Lies Datei zeilenweise while((line=br.readLine())!=null){ // Fasta-header gefunden? if(line.startsWith(">")){ if(sequence!=null){ // Speichere aktuelle Sequenz sequences.add(sequence); } // Initiiere neue Sequenz sequence=""; }else{ // erweitere aktuelle Sequenz sequence+=line.trim(); } } // Speichere letzte Sequenz sequences.add(sequence); br.close(); }catch (Exception e) { e.printStackTrace(); System.exit(1); } return sequences; } public static int[] getProfile(String s){ int[] profile = new int[/*TODO: höchstmöglicher Rank = Länge des Profils*/]; // Erstes q-Gram int rank=getRank(s); profile[rank]++; //TODO: weitere q-Gramme return profile; } public static int qGramDistance(String a, String b){ //TODO... } public static long compDistTimeQgram(String a, String b){ long startTime=System.currentTimeMillis(); // Mittel über 100 Läufe int runs=10; for(int i=0;i"); System.exit(1); } // Einlesen der Eingabe Vector sequences = readMultipleFasta(args[0]); System.err.println("Read "+sequences.size()+" sequences."); // Berechnung aller Distanzen der Sequenz-Paare (0,1), (2,3), etc. for(int i=0; i