package morphology.parser; import java.io.*; import java.text.*; import java.util.*; /** * Service for acquisition of external information, e.g. list of stems and configuration data. * * Project: NGSLT --> NLP --> Words --> Assignment #1 * @author Normunds Grūzītis, Gunta Nešpore, Baiba Saulīte * @version February-March 2006 */ public class Utility { /** * Reads configuration data from an external properties file. * @param properties pattern configuration file. * @return list of key-value pairs. */ public static Properties readConfiguration(File properties) throws IOException { Properties config = new Properties(); FileInputStream fis = new FileInputStream(properties); InputStreamReader isr = new InputStreamReader(fis, "Cp1257"); BufferedReader reader = new BufferedReader(isr); String line = null; while ((line = reader.readLine()) != null) { line = line.trim(); //Valid configuration entry satisfies pattern "KEY=VALUE". if (!line.startsWith("#") && line.indexOf("=") > 0 && line.indexOf("=") < line.length() - 1) { String key = line.substring(0, line.indexOf("=")); String value = line.substring(line.indexOf("=") + 1); config.setProperty(key, value); } } reader.close(); return config; } /** * Reads table of noun stems into memory. * @param lex lexicon data file. * @return list of stems mapped by their morphological descriptions. */ public static SortedMap readNounStems(File lex) throws IOException { SortedMap stems = new TreeMap(Collator.getInstance(new Locale("lv", "LV"))); FileInputStream fis = new FileInputStream(lex); InputStreamReader isr = new InputStreamReader(fis, "Cp1257"); BufferedReader reader = new BufferedReader(isr); String line = null; ArrayList features = null; while ((line = reader.readLine()) != null) { //entry[0] = stem; entry[1] = pos; entry[2] = paradigm. String[] entry = line.split("\t"); if (!stems.containsKey(entry[0])) stems.put(entry[0], new ArrayList()); features = (ArrayList)stems.get(entry[0]); features.add(new NounStemFeature(entry[1], entry[2])); stems.put(entry[0], features); } reader.close(); return stems; } /** * Reads table of morphotactics into memory. * @param lex lexicon data file. * @return list of rules. */ public static ArrayList readRegExRules(File lex) throws IOException { ArrayList rules = new ArrayList(); FileInputStream fis = new FileInputStream(lex); InputStreamReader isr = new InputStreamReader(fis, "Cp1257"); BufferedReader reader = new BufferedReader(isr); String line = null; while ((line = reader.readLine()) != null) { //entry[0]=order; entry[1] = IF rule; entry[2] = THEN rule. String[] entry = line.split("\t"); rules.add(new RegExRule(Integer.parseInt(entry[0]), entry[1], entry[2])); } reader.close(); return rules; } }