BST – Двоичные деревья поиска

24/12/2015 by Максим Швандт

Задача

Разработать класс, реализующий функциональность дерева двоичного поиска.

Ссылка на условие http://java.mazurok.com/word-of-week/bst

Тесты

input	output
put S 11.11	Ok
put E 22.22	Ok
put A 33.33	Ok
put R 44.44	Ok
put H 55.55	Ok
put C 66.66	Ok
put X 77.77	Ok
put M 88.88	Ok
size	8
min	(A->33.33)/2
max	(X->77.77)/1
print	L0: (S->11.11)/8 .L1: (E->22.22)/6 ..L2: (A->33.33)/2 …L3: (C->66.66)/1 ..L2: (R->44.44)/3 …L3: (H->55.55)/2 ….L4: (M->88.88)/1 .L1: (X->77.77)/1
delete E	Ok
size	7
min	(A->33.33)/2
max	(X->77.77)/1
print	L0: (S->11.11)/7 .L1: (H->55.55)/5 ..L2: (A->33.33)/2 …L3: (C->66.66)/1 ..L2: (R->44.44)/2 …L3: (M->88.88)/1 .L1: (X->77.77)/1
exit	N/A

Критерий тестирования

В ходе тестирования мы добавляем 8 ключей с уникальными значениями. Затем мы определяем размер, минимальный и максимальный узлы, а также распечатываем все дерево, чтобы убедится, что оно имеет ожидаемою конфигурацию. Затем мы удаляем 1 ключ с серединным значением и повторяем вышеперечисленные операции, чтобы увидеть ожидаемые изменения в дереве.

Формат вывода:

Команды min и max выводят одиночный узел в следующем формате: (ключ->значение)/кол-во узлов

Команда print выводит дерево по уровням, каждая строчка имеет следующий формат: уровень: (ключ->значение)/кол-во узлов

Структура программы

Код программы состоит из следующих элементов, объединенных в пакет odessa.uni.imem.maxim:

Класс CountingBST, реализующий функциональность дерева двоичного поиска с подсчетом количества узлов в дереве
Класс CountingBSTTestApp, реализующий тестовое приложение

Класс CountingBST

package odessa.uni.imem.maxim;

class CountingBST<Key extends Comparable<Key>, Value>
{

   // --------------------------------------------------------------------------
   public class Node
   {
      Key key;
      Value value;
      Node left, right;
      int count;

      public Node( Key key, Value value, int count)
      {
         this.key = key;
         this.value = value;
         this.left = this.right = null;
         this.count = count;
      }
   }

   private Node root;

   // --------------------------------------------------------------------------
   public void put( Key key, Value val )
   {
      root = put( root, key, val );
   }

   // --------------------------------------------------------------------------
   private Node put( Node x, Key key, Value val )
   {
      if (x == null)
         return new Node( key, val, 1 );
      int cmp = key.compareTo( x.key );
      if (cmp < 0)
         x.left = put( x.left, key, val );
      else if (cmp > 0)
         x.right = put( x.right, key, val );
      else if (cmp == 0)
         x.value = val;
      x.count = 1 + size( x.left ) + size( x.right );
      return x;
   }

   // --------------------------------------------------------------------------
   public Value get( Key key )
   {
      Node x = root;
      while (x != null)
      {
         int cmp = key.compareTo( x.key );
         if (cmp < 0)
            x = x.left;
         else if (cmp > 0)
            x = x.right;
         else if (cmp == 0)
            return x.value;
      }
      return null;
   }

   // --------------------------------------------------------------------------
   public int size()
   {
      return size( root );
   }

   // --------------------------------------------------------------------------
   private int size( Node x )
   {
      if (x == null)
         return 0;
      return x.count;
   }
   
   // --------------------------------------------------------------------------
   public Node min()
   {
      return min(root);
   }
   
   // --------------------------------------------------------------------------
   private Node min( Node x )
   {
      if ( x != null )
      {
         if (x.left == null)
         {
            return x;
         }
         return min( x.left );
      }
      return null;
   }

   // --------------------------------------------------------------------------
   public Node max()
   {
      return max(root);
   }
   
   // --------------------------------------------------------------------------
   private Node max( Node x )
   {
      if ( x != null )
      {
         if (x.right == null)
         {
            return x;
         }
         return max( x.right );
      }
      return null;
   }
 

   // --------------------------------------------------------------------------
   private Node deleteMin( Node x )
   {
      if ( x != null )
      {
         if (x.left == null)
            return x.right;
         x.left = deleteMin( x.left );
         x.count = 1 + size(x.left) + size(x.right);
         return x;
      }
      return null;
   }

   // --------------------------------------------------------------------------
   public void delete( Key key )
   {
      root = delete( root, key );
   }

   // --------------------------------------------------------------------------
   private Node delete( Node x, Key key )
   {
      if (x == null)
         return null;
      int cmp = key.compareTo( x.key );
      if (cmp < 0)
         x.left = delete( x.left, key );
      else if (cmp > 0)
         x.right = delete( x.right, key );
      else
      {
         if (x.right == null)
            return x.left;
         if (x.left == null)
            return x.right;
         Node t = x;
         x = min( t.right );
         x.right = deleteMin( t.right );
         x.left = t.left;
      }
      x.count = size( x.left ) + size( x.right ) + 1;
      return x;
   }

   // --------------------------------------------------------------------------
   private void print( Node node, int level )
   {
      if (node != null)
      {
         for( int i = 0; i<level; i++ )
         {
            System.out.print( "." );
         }
         System.out.printf( "L%d: (", level );
         System.out.print( node.key );
         System.out.print( "->" );
         System.out.print( node.value );
         System.out.print( ")" );
         System.out.printf( "/%d", node.count );
         System.out.println();

         print( node.left, level + 1 );
         print( node.right, level + 1 );
      }
   }

   // --------------------------------------------------------------------------
   public void print()
   {
      print( root, 0 );
   }

   // also maxKey, minKey,

}

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

package odessa.uni.imem.maxim;

class CountingBST<Key extends Comparable<Key>, Value>

{

// --------------------------------------------------------------------------

public class Node

{

Key key;

Value value;

Node left, right;

int count;

public Node( Key key, Value value, int count)

{

this.key = key;

this.value = value;

this.left = this.right = null;

this.count = count;

}

private Node root;

// --------------------------------------------------------------------------

public void put( Key key, Value val )

{

root = put( root, key, val );

}

// --------------------------------------------------------------------------

private Node put( Node x, Key key, Value val )

{

if (x == null)

return new Node( key, val, 1 );

int cmp = key.compareTo( x.key );

if (cmp < 0)

x.left = put( x.left, key, val );

else if (cmp > 0)

x.right = put( x.right, key, val );

else if (cmp == 0)

x.value = val;

x.count = 1 + size( x.left ) + size( x.right );

return x;

}

// --------------------------------------------------------------------------

public Value get( Key key )

{

Node x = root;

while (x != null)

{

int cmp = key.compareTo( x.key );

if (cmp < 0)

x = x.left;

else if (cmp > 0)

x = x.right;

else if (cmp == 0)

return x.value;

}

return null;

}

// --------------------------------------------------------------------------

public int size()

{

return size( root );

}

// --------------------------------------------------------------------------

private int size( Node x )

{

if (x == null)

return 0;

return x.count;

}

// --------------------------------------------------------------------------

public Node min()

{

return min(root);

}

// --------------------------------------------------------------------------

private Node min( Node x )

{

if ( x != null )

{

if (x.left == null)

{

return x;

}

return min( x.left );

}

return null;

}

// --------------------------------------------------------------------------

public Node max()

{

return max(root);

}

// --------------------------------------------------------------------------

private Node max( Node x )

{

if ( x != null )

{

if (x.right == null)

{

return x;

}

return max( x.right );

}

return null;

}

// --------------------------------------------------------------------------

private Node deleteMin( Node x )

{

if ( x != null )

{

if (x.left == null)

return x.right;

x.left = deleteMin( x.left );

x.count = 1 + size(x.left) + size(x.right);

return x;

}

return null;

}

// --------------------------------------------------------------------------

public void delete( Key key )

{

root = delete( root, key );

}

// --------------------------------------------------------------------------

private Node delete( Node x, Key key )

{

if (x == null)

return null;

int cmp = key.compareTo( x.key );

if (cmp < 0)

x.left = delete( x.left, key );

else if (cmp > 0)

x.right = delete( x.right, key );

else

{

if (x.right == null)

return x.left;

if (x.left == null)

return x.right;

Node t = x;

x = min( t.right );

x.right = deleteMin( t.right );

x.left = t.left;

}

x.count = size( x.left ) + size( x.right ) + 1;

return x;

}

// --------------------------------------------------------------------------

private void print( Node node, int level )

{

if (node != null)

{

for( int i = 0; i<level; i++ )

{

System.out.print( "." );

}

System.out.printf( "L%d: (", level );

System.out.print( node.key );

System.out.print( "->" );

System.out.print( node.value );

System.out.print( ")" );

System.out.printf( "/%d", node.count );

System.out.println();

print( node.left, level + 1 );

print( node.right, level + 1 );

}

// --------------------------------------------------------------------------

public void print()

{

print( root, 0 );

}

// also maxKey, minKey,

}

Все методы разделены на пары — один метод пары приватный, а другой — публичный. Это необходимо для того, чтобы передавать первым аргументом корень дерева, начиная с которого выполняются все операции. Каждый приватный метод содержит первым аргументом узел, начиная с которого будет выполнятся операция. Публичный метод вызывает приватный и передает ему в качестве первого аргумента корень дерева. Это относится к основным операциям на дереве:

put — помещает ключ и значение в дерево
get — извлекает значение по клчу
size — возвращает количество узлов дерева
min — находит узел с минимальным ключом
max — находит узел с максимальным ключом
delete — удаление по Хиббарду ( Coursera presentation )
print — печать дерева

Помимо основных полей, необходимых для BST, узел содержит поле count, в котором содержится количество все узлов поддерева, корнем которого является данный узел, включая сам узел.

Класс CountingBSTTestApp

package odessa.uni.imem.maxim;

import java.util.*;

public class CountingBSTTestApp
{

   // parse text line into tokens, e.g. "put salary 120.22" to "put" "salary" "120.22"
   public static Vector<String> parseTextToTokens( String text )
   {
      Vector<String> tokens = new Vector<String>();
      StringTokenizer st = new StringTokenizer(text);        
      while (st.hasMoreTokens())
      {
         tokens.add(st.nextToken());
      }
      return tokens;
   }

   // helper function: converts string to double if possible or returns null
   public static Double strToDouble( String s )
   {
      Double x = null;
      try
      {
         x = Double.parseDouble( s );
      }
      catch( NumberFormatException e )
      {
      }
      return x;
   }

   // print content of one node into human readable form
   public static void printTreeNode( CountingBST<String,Double>.Node node )
   {
      if ( node != null )
      {
         System.out.print( "(" );
         System.out.print( node.key );
         System.out.print( "->" );
         System.out.print( node.value );
         System.out.print( ")/" );
         System.out.print( node.count );
         System.out.println();
      }
      else
      {
         System.out.println("null");
      }
   }
   
   // main method with command line interpreter
   public static void main(String[] args)
   {
      CountingBST<String,Double> tree = new CountingBST<String,Double>(); 
      
      Scanner in = new Scanner(System.in);
      
      for(;;)
      {
        String inputLine = in.nextLine();
        if ( inputLine.isEmpty() )
        {
           continue;  
        }
        
        String cmd = null;
        String arg1 = null;
        String arg2 = null;
       
        {
           Vector<String> inputTokens = parseTextToTokens( inputLine );
           if ( inputTokens.size() > 0 ) cmd  = inputTokens.get(0);
           if ( inputTokens.size() > 1 ) arg1 = inputTokens.get(1);
           if ( inputTokens.size() > 2 ) arg2 = inputTokens.get(2);
        }
        
        if ( cmd.equals("exit") )
        {
           break;  
        }
        else
        if ( cmd.equals("print") )
        {
           tree.print();
        }
        else 
        if (cmd.equals("size"))
        {
           System.out.println(tree.size());
        }
        else
        if ( cmd.equals("put") )
        {
           String key   = ( arg1 != null && !arg1.isEmpty()) ? arg1 : null ;
           Double value = ( arg2 != null ) ? strToDouble( arg2 ) : null ;
           if ( key == null || value == null )
           {
              System.out.printf("*** ERROR: wrong arguments: <%s> <%s> <%s>\n",cmd,arg1,arg2);
              continue;
           }
           tree.put( key, value );
           System.out.println("Ok");
        }
        else if ( cmd.equals("get") )
        {
           String key   = ( arg1 != null && !arg1.isEmpty()) ? arg1 : null ;
           if ( key == null )
           {
              System.out.printf("*** ERROR: wrong arguments: <%s> <%d>\n",cmd,arg1);
              continue;
           }
           Double value = tree.get( key );
           System.out.println(value);
        }
        else if ( cmd.equals("delete") )
        {
           String key   = ( arg1 != null && !arg1.isEmpty()) ? arg1 : null ;
           if ( key == null )
           {
              System.out.printf("*** ERROR: wrong arguments: <%s> <%d>\n",cmd,arg1);
              continue;
           }
           tree.delete( key );
           System.out.println("Ok");
        }
        else if ( cmd.equals("min") )
        {
           printTreeNode( tree.min() );
        } 
        else if ( cmd.equals("max") )
        {
           printTreeNode( tree.max() );
        }
        
      }
      
      in.close();

  } 
   
}

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

package odessa.uni.imem.maxim;

import java.util.*;

public class CountingBSTTestApp

{

// parse text line into tokens, e.g. "put salary 120.22" to "put" "salary" "120.22"

public static Vector<String> parseTextToTokens( String text )

{

Vector<String> tokens = new Vector<String>();

StringTokenizer st = new StringTokenizer(text);

while (st.hasMoreTokens())

{

tokens.add(st.nextToken());

}

return tokens;

}

// helper function: converts string to double if possible or returns null

public static Double strToDouble( String s )

{

Double x = null;

try

{

x = Double.parseDouble( s );

}

catch( NumberFormatException e )

{

}

return x;

}

// print content of one node into human readable form

public static void printTreeNode( CountingBST<String,Double>.Node node )

{

if ( node != null )

{

System.out.print( "(" );

System.out.print( node.key );

System.out.print( "->" );

System.out.print( node.value );

System.out.print( ")/" );

System.out.print( node.count );

System.out.println();

}

else

{

System.out.println("null");

}

// main method with command line interpreter

public static void main(String[] args)

{

CountingBST<String,Double> tree = new CountingBST<String,Double>();

Scanner in = new Scanner(System.in);

for(;;)

{

String inputLine = in.nextLine();

if ( inputLine.isEmpty() )

{

continue;

}

String cmd = null;

String arg1 = null;

String arg2 = null;

{

Vector<String> inputTokens = parseTextToTokens( inputLine );

if ( inputTokens.size() > 0 ) cmd = inputTokens.get(0);

if ( inputTokens.size() > 1 ) arg1 = inputTokens.get(1);

if ( inputTokens.size() > 2 ) arg2 = inputTokens.get(2);

}

if ( cmd.equals("exit") )

{

break;

}

else

if ( cmd.equals("print") )

{

tree.print();

}

else

if (cmd.equals("size"))

{

System.out.println(tree.size());

}

else

if ( cmd.equals("put") )

{

String key = ( arg1 != null && !arg1.isEmpty()) ? arg1 : null ;

Double value = ( arg2 != null ) ? strToDouble( arg2 ) : null ;

if ( key == null || value == null )

{

System.out.printf("*** ERROR: wrong arguments: <%s> <%s> <%s>\n",cmd,arg1,arg2);

continue;

}

tree.put( key, value );

System.out.println("Ok");

}

else if ( cmd.equals("get") )

{

String key = ( arg1 != null && !arg1.isEmpty()) ? arg1 : null ;

if ( key == null )

{

System.out.printf("*** ERROR: wrong arguments: <%s> <%d>\n",cmd,arg1);

continue;

}

Double value = tree.get( key );

System.out.println(value);

}

else if ( cmd.equals("delete") )

{

String key = ( arg1 != null && !arg1.isEmpty()) ? arg1 : null ;

if ( key == null )

{

System.out.printf("*** ERROR: wrong arguments: <%s> <%d>\n",cmd,arg1);

continue;

}

tree.delete( key );

System.out.println("Ok");

}

else if ( cmd.equals("min") )

{

printTreeNode( tree.min() );

}

else if ( cmd.equals("max") )

{

printTreeNode( tree.max() );

}

in.close();

}

Данный класс реализует тестовое приложение для класса CountingBST. Он содержит вспомогательные функции ввода и функцию main, содержащую код теста. Он реализует командный интерпритатор, где каждая команда вызывает соответствующий публичный метод тестируемого класса. По-этому тест представляет собой текстовый файл с командами для стандартного потока ввода с консоли.

Ссылка на Ideone: https://ideone.com/b0xrPX

acm.timus.ru №2002. Тестовое задание

22/12/2015 by Максим Швандт

Автор задачи: Кирилл Бороздин
Источник задачи: Уральская региональная командная олимпиада по программированию 2013

Ограничения:

Время:	0.5 секунды
Память	64 Мб

Условие

Это было обычное хмурое октябрьское утро. Небо было затянуто тяжёлыми серыми тучами, накрапывал дождь. Капли падали на стёкла автомобилей, били в окна домов. Илья сидел за компьютером и угрюмо взирал на унылый пейзаж за окном. Внезапно его взгляд привлекла надпись, появившаяся в правом нижнем углу экрана: «You have 1 unread email message(s)». Заранее приготовившись удалить бесполезный спам, Илья открыл письмо. Однако оно оказалось куда интереснее…

Вас приветствует отдел по работе с персоналом компании «Рутнок БКС»!

Мы рассмотрели вашу заявку на вакансию разработчика программного обеспечения и были заинтересованы вашей кандидатурой. Для оценки ваших профессиональных навыков мы предлагаем вам выполнить несложное тестовое задание: необходимо реализовать систему регистрации для форума. Она должна поддерживать три операции:

«register username password» — зарегистрировать нового пользователя с именем «username» и установить для него пароль «password». Если такой пользователь уже есть в базе данных, необходимо выдать ошибку «fail: user already exists». Иначе нужно вывести сообщение «success: new user added».
«login username password» — войти в систему от имени пользователя «username» с паролем «password». Если такого пользователя не существует в базе данных, необходимо выдать «fail: no such user». Иначе, если был введен неправильный пароль, нужно выдать «fail: incorrect password». Иначе, если пользователь уже находится в системе в данный момент, необходимо вывести «fail: already logged in». Иначе нужно вывести сообщение «success: user logged in».
«logout username» — выйти из системы пользователем «username». Если такого пользователя не существует, необходимо вывести «fail: no such user». Иначе, если пользователь не находится в системе в данный момент, следует выдать «fail: already logged out». Иначе необходимо выдать сообщение «success: user logged out».

Пользуйтесь этим письмом как формальным описанием алгоритма и строго соблюдайте порядок обработки ошибок. Желаем вам удачи!

И вот Илья, откинув все дела, уже решает тестовое задание. Попробуйте и вы выполнить его!

Исходные данные

В первой строке дано целое число [latex]n[/latex] — количество операций [latex]1\leq n\leq 100[/latex]. В каждой из следующих [latex]n[/latex] строк содержится один запрос в соответствии с форматом, описанным выше. В качестве «username» и «password» могут выступать любые непустые строки длиной до 30 символов включительно. Строки могут состоять только из символов с кодами от 33 до 126.

Результат

Для каждой операции выведите в отдельной строке сообщение в соответствии с форматом, описанным выше. Строго соблюдайте расстановку пробелов и знаков препинания в этих сообщениях.

Пример

Исходные данные

Результат

6register vasya 12345

logout vasya

success: new user addedfail: incorrect password

success: user logged in

fail: no such user

success: user logged out

fail: already logged out

Код

import java.util.Scanner;
import java.util.StringTokenizer;
import java.util.Vector;
import java.util.TreeMap;
import java.lang.NumberFormatException;

public class Timus2002App
{

   //структура, описывающая атрибуты учетной записи
   public class AccountInfo
   {
      public String password;
      public boolean loggedIn;

      public AccountInfo(String argPassword, boolean argLoggedIn)
      {
         password = argPassword;
         loggedIn = argLoggedIn;
      }
   }

   
   //база данных учетной записи
   public class AccountDatabase
   {
      public static final int OK = 0;
      public static final int NOT_REGISTERED = 1;
      public static final int WRONG_PASSWORD = 2;
      public static final int ALREADY_LOGGED_IN = 3;
      public static final int ALREADY_LOGGED_OUT = 4;

      private TreeMap<String, AccountInfo> map;

      //конструктор
      public AccountDatabase()
      {
         map = new TreeMap<String, AccountInfo>();
      }

      //регистрация новой учетной записи
      public boolean register(String name, String password)
      {
         AccountInfo info = map.get(name);

         if (info != null)
         {
            return false;
         }

         map.put(name, new AccountInfo(password, false));

         return true;
      }

      //вход в систему
      public int login(String name, String password)
      {
         int result = NOT_REGISTERED;

         AccountInfo info = map.get(name);

         if (info != null)
         {
            if (!info.password.equals(password))
            {
               result = WRONG_PASSWORD;
            }
            else if (info.loggedIn)
            {
               result = ALREADY_LOGGED_IN;
            }
            else
            {
               map.put(name, new AccountInfo(info.password, true));
               result = OK;
            }
         }

         return result;
      }

      //выход из системы
      public int logout(String name)
      {
         int result = NOT_REGISTERED;

         AccountInfo info = map.get(name);

         if ( info != null )
         {
            if ( info.loggedIn )
            {
               map.put(name, new AccountInfo(info.password, false));
               result = OK;
            }
            else
            {
               result = ALREADY_LOGGED_OUT;
            }
         }

         return result;
      }

   }

   public static void main(String[] args)
   {
      int loginCount = 0;
      String line = null;
      
      Scanner in = new Scanner(System.in);
      //System.out.println("please enter number of logins: ");
      line = ( in.hasNextLine( ) ) ? in.nextLine() : null;
      try
      {
         loginCount = Integer.parseInt(line.trim());
      }
      catch( NumberFormatException e )
      {
      }
      
      if ( loginCount < 1 || loginCount > 100 )
      {
         System.out.println("**** ERROR: wrong number of logins ****");
         in.close();
         return;
      }
      
      Timus2002App mainObj = new Timus2002App();
      AccountDatabase accounts = mainObj.new AccountDatabase();
      
      for( int i = 0; i < loginCount; i++ )
      {
         //System.out.println("please enter command:");
         
         line = ( in.hasNextLine( ) ) ? in.nextLine() : null;
         if ( line == null || line.isEmpty() )
         {
            System.out.println("**** ERROR: empty input, abort processing ****");
            break;
         }
         
         //превращение введенной строки текста в вектор лексем
         StringTokenizer st = new StringTokenizer(line, " ");
         Vector<String> tokens = new Vector<String>();
         while (st.hasMoreTokens())
         {
            tokens.add( st.nextToken() );
         }

         //превращение вектора лексем в команду с аргументами
         String command   = ( tokens.size() > 0 ) ? tokens.get(0) : null;
         String name      = ( tokens.size() > 1 ) ? tokens.get(1) : null;
         String password  = ( tokens.size() > 2 ) ? tokens.get(2) : null;
         String message = null;
        
        //выполнение команд
         if ( command.equals("register"))
         {
            if ( name != null || password != null )
            {
               if ( accounts.register(name, password) )
               {
                  message = "success: new user added";
               }
               else
               {
                  message = "fail: user already exists";
               }
            }
            else
            {
               message = "fail: missing user name or password";
            }
         }
         else if ( command.equals("login"))
         {
            if ( name != null || password != null )
            {
               switch( accounts.login(name, password) )
               {
                  case AccountDatabase.NOT_REGISTERED:
                     message = "fail: no such user";
                  break;
                  case AccountDatabase.WRONG_PASSWORD:
                     message = "fail: incorrect password";
                  break;
                  case AccountDatabase.ALREADY_LOGGED_IN:
                     message = "fail: already logged in";
                  break;
                  default:
                     message = "success: user logged in";
                  break;
              }
           }
           else
           {
              message = "fail: missing user name or password";
           }
         }
         else if ( command.equals("logout"))
         {
            if ( name != null )
            {
               switch( accounts.logout(name) )
               {
                  case AccountDatabase.ALREADY_LOGGED_OUT:
                     message = "fail: already logged out";
                  break;
                  case AccountDatabase.NOT_REGISTERED:
                     message = "fail: no such user";
                  break;
                  default:
                     message = "success: user logged out";
                  break;
               }
            }
            else
            {
               message = "fail: missing user name";
            }
         }
         
         if ( message != null )
         {
            System.out.println(message);
         }
      }
      in.close();
   } 
}

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

import java.util.Scanner;

import java.util.StringTokenizer;

import java.util.Vector;

import java.util.TreeMap;

import java.lang.NumberFormatException;

public class Timus2002App

{

//структура, описывающая атрибуты учетной записи

public class AccountInfo

{

public String password;

public boolean loggedIn;

public AccountInfo(String argPassword, boolean argLoggedIn)

{

password = argPassword;

loggedIn = argLoggedIn;

}

//база данных учетной записи

public class AccountDatabase

{

public static final int OK = 0;

public static final int NOT_REGISTERED = 1;

public static final int WRONG_PASSWORD = 2;

public static final int ALREADY_LOGGED_IN = 3;

public static final int ALREADY_LOGGED_OUT = 4;

private TreeMap<String, AccountInfo> map;

//конструктор

public AccountDatabase()

{

map = new TreeMap<String, AccountInfo>();

}

//регистрация новой учетной записи

public boolean register(String name, String password)

{

AccountInfo info = map.get(name);

if (info != null)

{

return false;

}

map.put(name, new AccountInfo(password, false));

return true;

}

//вход в систему

public int login(String name, String password)

{

int result = NOT_REGISTERED;

AccountInfo info = map.get(name);

if (info != null)

{

if (!info.password.equals(password))

{

result = WRONG_PASSWORD;

}

else if (info.loggedIn)

{

result = ALREADY_LOGGED_IN;

}

else

{

map.put(name, new AccountInfo(info.password, true));

result = OK;

}

return result;

}

//выход из системы

public int logout(String name)

{

int result = NOT_REGISTERED;

AccountInfo info = map.get(name);

if ( info != null )

{

if ( info.loggedIn )

{

map.put(name, new AccountInfo(info.password, false));

result = OK;

}

else

{

result = ALREADY_LOGGED_OUT;

}

return result;

}

public static void main(String[] args)

{

int loginCount = 0;

String line = null;

Scanner in = new Scanner(System.in);

//System.out.println("please enter number of logins: ");

line = ( in.hasNextLine( ) ) ? in.nextLine() : null;

try

{

loginCount = Integer.parseInt(line.trim());

}

catch( NumberFormatException e )

{

}

if ( loginCount < 1 || loginCount > 100 )

{

System.out.println("**** ERROR: wrong number of logins ****");

in.close();

return;

}

Timus2002App mainObj = new Timus2002App();

AccountDatabase accounts = mainObj.new AccountDatabase();

for( int i = 0; i < loginCount; i++ )

{

//System.out.println("please enter command:");

line = ( in.hasNextLine( ) ) ? in.nextLine() : null;

if ( line == null || line.isEmpty() )

{

System.out.println("**** ERROR: empty input, abort processing ****");

break;

}

//превращение введенной строки текста в вектор лексем

StringTokenizer st = new StringTokenizer(line, " ");

Vector<String> tokens = new Vector<String>();

while (st.hasMoreTokens())

{

tokens.add( st.nextToken() );

}

//превращение вектора лексем в команду с аргументами

String command = ( tokens.size() > 0 ) ? tokens.get(0) : null;

String name = ( tokens.size() > 1 ) ? tokens.get(1) : null;

String password = ( tokens.size() > 2 ) ? tokens.get(2) : null;

String message = null;

//выполнение команд

if ( command.equals("register"))

{

if ( name != null || password != null )

{

if ( accounts.register(name, password) )

{

message = "success: new user added";

}

else

{

message = "fail: user already exists";

}

else

{

message = "fail: missing user name or password";

}

else if ( command.equals("login"))

{

if ( name != null || password != null )

{

switch( accounts.login(name, password) )

{

case AccountDatabase.NOT_REGISTERED:

message = "fail: no such user";

break;

case AccountDatabase.WRONG_PASSWORD:

message = "fail: incorrect password";

break;

case AccountDatabase.ALREADY_LOGGED_IN:

message = "fail: already logged in";

break;

default:

message = "success: user logged in";

break;

}

else

{

message = "fail: missing user name or password";

}

else if ( command.equals("logout"))

{

if ( name != null )

{

switch( accounts.logout(name) )

{

case AccountDatabase.ALREADY_LOGGED_OUT:

message = "fail: already logged out";

break;

case AccountDatabase.NOT_REGISTERED:

message = "fail: no such user";

break;

default:

message = "success: user logged out";

break;

}

else

{

message = "fail: missing user name";

}

if ( message != null )

{

System.out.println(message);

}

in.close();

}

Данная программа представляет собой типичный пример использования таблицы символов, согласно терминологии Coursera. Для доступа к учетным записям используется интерфейс Map, а для реализации самой базы данных учетных записей — объект типа TreeMap. Учетная запись пользователей реализована в виде одного элемента типа Map.entry, где имя пользователя — это ключ, а атрибуты учетной записи — пароль и флаг подключен/отключен — реализованы в виде отдельной структуры AccountInfo, которая является значением этого ключа.

Время работы	Выделено памяти
0.124	1 928 КБ

Ссылка на Ideone: https://ideone.com/3Y2W4z

Segmented Array

05/12/2015 by Максим Швандт

Задача

Необходимо реализовать некоторую коллекцию индексированных данных некоторого типа T, выглядящую как несколько необычный массив. Вместо операции присваивания значения элементу массива в нём имеется операция присваивания одинаковых значений всем элементам для некоторого диапазона индексов. Также и операции поиска минимума и максимума должны работать для некоторого диапазона значений индексов.

При написании класса, реализующего данный интерфейс необходимо стремиться к максимальной эффективности выполнения наиболее частых операций и учитывать особенности использования отражённые в следующей таблице с заданиями:

Big array	Small \|T\|	Often				Student
Big array	Small \|T\|	get()	set()	indexOf()	min()/max()	Student
1	0	0	0	1	0	Швандт

Тест

input	output
set 2 2 H	ok
set 3 3 E	ok
set 4 5 L	ok
set 6 6 O	ok
set8 8 W	ok
set 9 9 O	ok
set 10 10 R	ok
set 11 11 L	ok
set 12 12 D	ok
set 13 13 !	ok
set 14 14 !	ok
set 15 15 !	ok
show	segment#0 { null null H E } segment#1 { L L O null } segment#2 { W O R L } segment#3 { D ! ! ! }
exit	N/A

Структура программы

Код программы состоит из следующих элементов, объединенных в пакет odessa.uni.imem.maxim:

Интерфейс SegmentedArray, который описывает заданную функциональность доступа к сегментированному массиву
Класс SegmentedArrayImpl, имплементирует данный интерфейс
Класс SegmentedArrayApp, который имплементирует тестевое приложение, он содержит метод main и вспомогательные методы ввода данных и вывода результата.

Интерфейс SegmentedArray

package odessa.uni.imem.maxim;

public interface SegmentedArray<Comparable>
{
   int indexOf(Comparable value, int fromIndex);
   void set(int fromIndex, int toIndex, Comparable value);
   Comparable min(int fromIndex, int toIndex);
}

package odessa.uni.imem.maxim;

public interface SegmentedArray<Comparable>

{

int indexOf(Comparable value, int fromIndex);

void set(int fromIndex, int toIndex, Comparable value);

Comparable min(int fromIndex, int toIndex);

}

Методы интерфейса:

indexOf — возвращает абсолютный индекс элемента в массиве начиная с заданного индекса.
set — присваивает последовательности элементов массива одно и тоже значение в заданном диапазоне
min — возвращает минимальный элемент в заданном диапазоне

Класс SegmentedArrayImpl

package odessa.uni.imem.maxim;

public class SegmentedArrayImpl<Item extends Comparable<Item>> implements SegmentedArray<Item>
{
   private int size;
   private int segmentSize;
   private Object[][] segments;

   public SegmentedArrayImpl(int aSegmentSize, int aSegmentCount)
   {
      segmentSize = aSegmentSize;
      size = segmentSize * aSegmentCount;

      segments = new Object[aSegmentCount][segmentSize];
   }

   public int indexOf(Item value, int fromIndex)
   {
      if (fromIndex >= size)
      {
         return -1;
      }

      int segmentIndex = findSegmentIndex(fromIndex);
      int indexInSegment = fromIndex - segmentIndex * segmentSize;

      while (segmentIndex < segments.length)
      {
         // @SuppressWarnings("unchecked")
         if (((Item) (segments[segmentIndex][indexInSegment])).compareTo(value) == 0)
         {
            return segmentIndex * indexInSegment;
         }

         indexInSegment++;

         if (indexInSegment == segmentSize)
         {
            indexInSegment = 0;
            segmentIndex++;
         }
      }

      return -1;
   }

   public void set(int fromIndex, int toIndex, Item value)
   {
      if (fromIndex >= size || toIndex >= size || fromIndex > toIndex)
      {
         return;
      }

      int index = fromIndex;
      int segmentIndex = findSegmentIndex(fromIndex);
      int indexInSegment = fromIndex - segmentIndex * segmentSize;

      while (segmentIndex < segments.length && index <= toIndex)
      {
         // @SuppressWarnings("unchecked")
         segments[segmentIndex][indexInSegment] = value;
         index++;
         indexInSegment++;

         if (indexInSegment == segmentSize)
         {
            indexInSegment = 0;
            segmentIndex++;
         }
      }
   }

   public Item min(int fromIndex, int toIndex)
   {
      if (fromIndex >= size || toIndex >= size || fromIndex > toIndex)
      {
         return null;
      }

      int index = fromIndex;
      int segmentIndex = findSegmentIndex(fromIndex);
      int indexInSegment = fromIndex - segmentIndex * segmentSize;
      Item minimum = null;

      while (segmentIndex < segments.length && index < toIndex)
      {
         // @SuppressWarnings("unchecked")
         Item val = (Item) segments[segmentIndex][indexInSegment];
         if (minimum == null || val.compareTo(minimum) < 0)
         {
            minimum = val;
         }

         index++;
         indexInSegment++;

         if (indexInSegment == segmentSize)
         {
            indexInSegment = 0;
            segmentIndex++;
         }
      }

      return null;
   }

   // check if the absolute index falls into given segment index
   private int compareIndexToSegment(int index, int segmentIndex)
   {
      int lowerIndex = segmentIndex * segmentSize;
      int higherIndex = (segmentIndex + 1) * segmentSize - 1;

      if (index < lowerIndex) // index falls before segment
      {
         return -1;
      }
      else if (index > higherIndex) // index falls after segment
      {
         return 1;
      }

      return 0;
   }

   // find segment that contains given absolute index using binary search
   // in array
   private int findSegmentIndex(int index)
   {
      int lo = 0; // lower segment index
      int hi = segments.length - 1; // higher segment index

      while (lo <= hi)
      {
         int mid = lo + (hi - lo) / 2; // middle segment index
         int cmp = compareIndexToSegment(index, mid);

         if (cmp < 0)
         {
            hi = mid - 1;
         }
         else if (cmp > 0)
         {
            lo = mid + 1;
         }
         else if (cmp == 0)
         {
            return mid;
         }
      }

      return lo;
   }

   public void show()
   {
      for (int i = 0; i < segments.length; i++)
      {
         System.out.printf("segment#%d { ", i);

         for (int j = 0; j < segmentSize; j++)
         {
            System.out.print((Item) segments[i][j]);
            System.out.print(" ");
         }

         System.out.printf("}\n");
      }
   }

}

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

package odessa.uni.imem.maxim;

public class SegmentedArrayImpl<Item extends Comparable<Item>> implements SegmentedArray<Item>

{

private int size;

private int segmentSize;

private Object[][] segments;

public SegmentedArrayImpl(int aSegmentSize, int aSegmentCount)

{

segmentSize = aSegmentSize;

size = segmentSize * aSegmentCount;

segments = new Object[aSegmentCount][segmentSize];

}

public int indexOf(Item value, int fromIndex)

{

if (fromIndex >= size)

{

return -1;

}

int segmentIndex = findSegmentIndex(fromIndex);

int indexInSegment = fromIndex - segmentIndex * segmentSize;

while (segmentIndex < segments.length)

{

// @SuppressWarnings("unchecked")

if (((Item) (segments[segmentIndex][indexInSegment])).compareTo(value) == 0)

{

return segmentIndex * indexInSegment;

}

indexInSegment++;

if (indexInSegment == segmentSize)

{

indexInSegment = 0;

segmentIndex++;

}

return -1;

}

public void set(int fromIndex, int toIndex, Item value)

{

if (fromIndex >= size || toIndex >= size || fromIndex > toIndex)

{

return;

}

int index = fromIndex;

int segmentIndex = findSegmentIndex(fromIndex);

int indexInSegment = fromIndex - segmentIndex * segmentSize;

while (segmentIndex < segments.length && index <= toIndex)

{

// @SuppressWarnings("unchecked")

segments[segmentIndex][indexInSegment] = value;

index++;

indexInSegment++;

if (indexInSegment == segmentSize)

{

indexInSegment = 0;

segmentIndex++;

}

public Item min(int fromIndex, int toIndex)

{

if (fromIndex >= size || toIndex >= size || fromIndex > toIndex)

{

return null;

}

int index = fromIndex;

int segmentIndex = findSegmentIndex(fromIndex);

int indexInSegment = fromIndex - segmentIndex * segmentSize;

Item minimum = null;

while (segmentIndex < segments.length && index < toIndex)

{

// @SuppressWarnings("unchecked")

Item val = (Item) segments[segmentIndex][indexInSegment];

if (minimum == null || val.compareTo(minimum) < 0)

{

minimum = val;

}

index++;

indexInSegment++;

if (indexInSegment == segmentSize)

{

indexInSegment = 0;

segmentIndex++;

}

return null;

}

// check if the absolute index falls into given segment index

private int compareIndexToSegment(int index, int segmentIndex)

{

int lowerIndex = segmentIndex * segmentSize;

int higherIndex = (segmentIndex + 1) * segmentSize - 1;

if (index < lowerIndex) // index falls before segment

{

return -1;

}

else if (index > higherIndex) // index falls after segment

{

return 1;

}

return 0;

}

// find segment that contains given absolute index using binary search

// in array

private int findSegmentIndex(int index)

{

int lo = 0; // lower segment index

int hi = segments.length - 1; // higher segment index

while (lo <= hi)

{

int mid = lo + (hi - lo) / 2; // middle segment index

int cmp = compareIndexToSegment(index, mid);

if (cmp < 0)

{

hi = mid - 1;

}

else if (cmp > 0)

{

lo = mid + 1;

}

else if (cmp == 0)

{

return mid;

}

return lo;

}

public void show()

{

for (int i = 0; i < segments.length; i++)

{

System.out.printf("segment#%d { ", i);

for (int j = 0; j < segmentSize; j++)

{

System.out.print((Item) segments[i][j]);

System.out.print(" ");

}

System.out.printf("}\n");

}

Данный класс реализует интерфейс SegmentedArray с учетом требования задачи, а именно:

Допускаются массивы очень большого размера
Методы indexOf и max должны работать максимально быстро

Используемый алгоритм

С учетом условий задачи был выбран алгориnм, описанный в статье Segmented Tree.

Согласно условию задачи, массив может быть очень большим, а это означает очень широкий диапазон индексов элементов. Следовательно, стоит задача максимально быстро находить начальный и конечный сегменты, которые покрывают индексы диапазона, заданные в качестве параметров методов интерфейса.

Выбранный алгоритм позволяет быстрыое нахождение нужных сегментов благодаря бинарному дереву, которым описывается набор диапазонов.

Алгоритм поиска нужного сегмента реализован с помощью методов findSegmentIndex и compareIndexToSegment.

Первый метод — это обычный двоичный поиск в массиве отсортированных элементов, которыми являются номера сегментов в порядке их следования. Данный метод заимствован из Coursera. Внутри он использует второй метод compareIndexToSegment для проверки попадания заданного абсолютного индекса в сегмент с заданным номером, т. е. он реализует нобходимый для бинарного поиска метод сравнения, где фактически абсолютный индекс сравнивается с сегментом с целью его выбора.

Методы родительского интерфейса

indexOf — находит нужный сегмент посредством findSegmentIndex и далее — индекс в сегменте, затем производит поиск в этом и последующих сегментах нужного значения
set — находит первый и последний сегменты посредством findSegmentIndex, которые покрывают входные индексы диапазона, затем выполняет запись начиная с начального сегмента и индекса в нем, и заканчивая последним сегментом и индексом в нем.
min — находит диапазон сегментов для поиска таким же образом, как и метод set и находит минимальное значения в данном диапазоне сегментов

Собственные методы

конструктор — принимает 2 аргумента — размер и количество сегментов
compareIndexToSegment — проверяет, попадает ли абсолютный индекс в данный сегмент и возвращает одно из значений: -1 — если индекс перед сегментом, 0 — если попадает в сегмент, 1 — если после сегмента
findSegmentIndex — находит сегмент, который содержит данный абсолютный индекс используя бинарный поиск в массиве
show — диагностическая печать содержимого

Класс SegmentedArrayApp

package odessa.uni.imem.maxim;

import java.util.*;

public class SegmentedArrayApp
{
   public static Vector<String> parseTextToTokens(String text)
   {
      Vector<String> tokens = new Vector<String>();
      StringTokenizer st = new StringTokenizer(text);

      while (st.hasMoreTokens())
      {
         tokens.add(st.nextToken());
      }

      return tokens;
   }

   public static Integer strToInt(String s)
   {
      Integer x = null;
      try
      {
         x = Integer.parseUnsignedInt(s);
      }
      catch (NumberFormatException e)
      {
      }

      return x;
   }

   public static void main(String[] args)
   {
      SegmentedArrayImpl<String> storage = new SegmentedArrayImpl<String>(4, 4);

      Scanner in = new Scanner(System.in);

      for (;;)
      {
         String inputLine = in.nextLine();
         if (inputLine.isEmpty())
         {
            continue;
         }

         String cmd = null;
         String arg1 = null;
         String arg2 = null;
         String arg3 = null;

         {
            Vector<String> inputTokens = parseTextToTokens(inputLine);
            if (inputTokens.size() > 0)
            {
               cmd = inputTokens.get(0);
            }
            if (inputTokens.size() > 1)
            {
               arg1 = inputTokens.get(1);
            }
            if (inputTokens.size() > 2)
            {
               arg2 = inputTokens.get(2);
            }
            if (inputTokens.size() > 3)
            {
               arg3 = inputTokens.get(3);
            }
         }

         if (cmd.equals("exit"))
         {
            break;
         }
         else if (cmd.equals("show"))
         {
            storage.show();
         }
         else if (cmd.equals("index"))
         {
            String value = (arg1 != null && !arg1.isEmpty()) ? arg1 : null;
            Integer fromIndex = (arg2 != null) ? strToInt(arg2) : null;

            if (value == null || fromIndex == null)
            {
               System.out.printf("*** ERROR: wrong arguments: <%s> <%s> <%d>\n", cmd, arg1, arg2);
               continue;
            }

            System.out.println(storage.indexOf(value, fromIndex));
         }
         else if (cmd.equals("set"))
         {
            Integer fromIndex = (arg1 != null) ? strToInt(arg1) : null;
            Integer toIndex = (arg2 != null) ? strToInt(arg2) : null;
            String value = (arg3 != null && !arg3.isEmpty()) ? arg3 : null;

            if (fromIndex == null || toIndex == null || value == null)
            {
               System.out.printf("*** ERROR: wrong arguments: <%s> <%d> <%d> <%s>\n", cmd, arg1, arg2, arg3);
               continue;
            }

            storage.set(fromIndex, toIndex, value);
            System.out.println("Ok");
         }
         else if (cmd.equals("min"))
         {
            Integer fromIndex = (arg1 != null) ? strToInt(arg1) : null;
            Integer toIndex = (arg2 != null) ? strToInt(arg2) : null;

            if (fromIndex == null || toIndex == null)
            {
               System.out.printf("*** ERROR: wrong arguments: <%s> <%d> <%d>\n", cmd, arg1, arg2);
               continue;
            }

            System.out.println(storage.min(fromIndex, toIndex));
         }

      }

      in.close();

   }

}

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

package odessa.uni.imem.maxim;

import java.util.*;

public class SegmentedArrayApp

{

public static Vector<String> parseTextToTokens(String text)

{

Vector<String> tokens = new Vector<String>();

StringTokenizer st = new StringTokenizer(text);

while (st.hasMoreTokens())

{

tokens.add(st.nextToken());

}

return tokens;

}

public static Integer strToInt(String s)

{

Integer x = null;

try

{

x = Integer.parseUnsignedInt(s);

}

catch (NumberFormatException e)

{

}

return x;

}

public static void main(String[] args)

{

SegmentedArrayImpl<String> storage = new SegmentedArrayImpl<String>(4, 4);

Scanner in = new Scanner(System.in);

for (;;)

{

String inputLine = in.nextLine();

if (inputLine.isEmpty())

{

continue;

}

String cmd = null;

String arg1 = null;

String arg2 = null;

String arg3 = null;

{

Vector<String> inputTokens = parseTextToTokens(inputLine);

if (inputTokens.size() > 0)

{

cmd = inputTokens.get(0);

}

if (inputTokens.size() > 1)

{

arg1 = inputTokens.get(1);

}

if (inputTokens.size() > 2)

{

arg2 = inputTokens.get(2);

}

if (inputTokens.size() > 3)

{

arg3 = inputTokens.get(3);

}

if (cmd.equals("exit"))

{

break;

}

else if (cmd.equals("show"))

{

storage.show();

}

else if (cmd.equals("index"))

{

String value = (arg1 != null && !arg1.isEmpty()) ? arg1 : null;

Integer fromIndex = (arg2 != null) ? strToInt(arg2) : null;

if (value == null || fromIndex == null)

{

System.out.printf("*** ERROR: wrong arguments: <%s> <%s> <%d>\n", cmd, arg1, arg2);

continue;

}

System.out.println(storage.indexOf(value, fromIndex));

}

else if (cmd.equals("set"))

{

Integer fromIndex = (arg1 != null) ? strToInt(arg1) : null;

Integer toIndex = (arg2 != null) ? strToInt(arg2) : null;

String value = (arg3 != null && !arg3.isEmpty()) ? arg3 : null;

if (fromIndex == null || toIndex == null || value == null)

{

System.out.printf("*** ERROR: wrong arguments: <%s> <%d> <%d> <%s>\n", cmd, arg1, arg2, arg3);

continue;

}

storage.set(fromIndex, toIndex, value);

System.out.println("Ok");

}

else if (cmd.equals("min"))

{

Integer fromIndex = (arg1 != null) ? strToInt(arg1) : null;

Integer toIndex = (arg2 != null) ? strToInt(arg2) : null;

if (fromIndex == null || toIndex == null)

{

System.out.printf("*** ERROR: wrong arguments: <%s> <%d> <%d>\n", cmd, arg1, arg2);

continue;

}

System.out.println(storage.min(fromIndex, toIndex));

}

in.close();

}

Данный класс реализует тестовое приложение для интерфейса. Он содержит вспомогательные функции ввода и функцию main, содержащую код теста.

Методы класса

parseTextToTokens — разбирает входной текст на слова используя известный класс StringTokenizer и возвращает вектор слов
strToInt — превращает строку в число с подавлением исключения NumberFormatException
main — реализует простейший интерпритатор команд, в котором каждая команда представляет соостветствующий интерфейсный метод, состоит из цикла, где в каждом цикле посредством объекта Scanner читается полностью входная строка, которая затем разбирается на команду и ее аргументы посредством parseTextToTokens, это в дальнейшем превращается в соответствующие вызовы интерфейсных методов.

Ссылка на Ideone: https://ideone.com/BknxiN

Java Collections Framework: Map. Частотный словарь.

29/11/2015 by Максим Швандт

Задача 1:

Получив на входе корпус языка (огромный набор атрибутированных текстов на каком-нибудь языке) построить частотный словарь. Знаки препинания, скобки, кавычки и числа должны быть удалены. Слова, содержащие в себе не буквенные символы, игнорируются целиком.

Задача 2:

При автоматическом переводе необходимо из нескольких слов выбрать наиболее употребительное. Необходимо построить эффективную реализацию функции, которая для данного множества (Set) слов-ключей, определит то, которому соответствует максимальное значение.

Тест

input

output

aaa for bbb aaa for if
%EOF%

Word frequency statistics:

aaa   ,   2,   0.250000
bbb   ,   1,   0.125000
for   ,   2,   0.250000
if   ,   1,   0.125000
—————————————

Select max frequent token from: for do to while if
=> selected is: for

To be, or not to be: that is the question:
Whether ’tis nobler in the mind to suffer
The slings and arrows of outrageous fortune,
Or to take arms against a sea of troubles,
And by opposing end them? To die: to sleep;
No more; and by a sleep to say we end
The heart-ache and the thousand natural shocks
That flesh is heir to, ’tis a consummation
Devoutly to be wish’d. To die, to sleep;
To sleep: perchance to dream: ay, there’s the rub;
For in that sleep of death what dreams may come
When we have shuffled off this mortal coil,
Must give us pause: there’s the respect
That makes calamity of so long life;
For who would bear the whips and scorns of time,
The oppressor’s wrong, the proud man’s contumely,
The pangs of despised love, the law’s delay,
The insolence of office and the spurns
That patient merit of the unworthy takes,
When he himself might his quietus make
With a bare bodkin? who would fardels bear,
To grunt and sweat under a weary life,
But that the dread of something after death,
The undiscover’d country from whose bourn
No traveller returns, puzzles the will
And makes us rather bear those ills we have
Than fly to others that we know not of?
Thus conscience does make cowards of us all;
And thus the native hue of resolution
Is sicklied o’er with the pale cast of thought,
And enterprises of great pith and moment
With this regard their currents turn awry,
And lose the name of action. — Soft you now!
The fair Ophelia! Nymph, in thy orisons
Be all my sins remember’d.

%EOF%

Word frequency statistics:

And   ,   5,   0.007321
Be   ,   1,   0.001464
But   ,   1,   0.001464
Devoutly   ,   1,   0.001464
For   ,   2,   0.002928
Is   ,   1,   0.001464
Must   ,   1,   0.001464
No   ,   2,   0.002928
Nymph   ,   1,   0.001464
Ophelia   ,   1,   0.001464
Or   ,   1,   0.001464
Soft   ,   1,   0.001464
Than   ,   1,   0.001464
That   ,   3,   0.004392
The   ,   7,   0.010249
Thus   ,   1,   0.001464
To   ,   5,   0.007321
When   ,   2,   0.002928
Whether   ,   1,   0.001464
With   ,   2,   0.002928
a   ,   5,   0.007321
ache   ,   1,   0.001464
action   ,   1,   0.001464
after   ,   1,   0.001464
against   ,   1,   0.001464
all   ,   2,   0.002928
and   ,   7,   0.010249
arms   ,   1,   0.001464
arrows   ,   1,   0.001464
awry   ,   1,   0.001464
ay   ,   1,   0.001464
bare   ,   1,   0.001464
be   ,   3,   0.004392
bear   ,   3,   0.004392
bodkin   ,   1,   0.001464
bourn   ,   1,   0.001464
by   ,   2,   0.002928
calamity   ,   1,   0.001464
cast   ,   1,   0.001464
coil   ,   1,   0.001464
come   ,   1,   0.001464
conscience   ,   1,   0.001464
consummation   ,   1,   0.001464
contumely   ,   1,   0.001464
country   ,   1,   0.001464
cowards   ,   1,   0.001464
currents   ,   1,   0.001464
d   ,   3,   0.004392
death   ,   2,   0.002928
delay   ,   1,   0.001464
despised   ,   1,   0.001464
die   ,   2,   0.002928
does   ,   1,   0.001464
dread   ,   1,   0.001464
dream   ,   1,   0.001464
dreams   ,   1,   0.001464
end   ,   2,   0.002928
enterprises   ,   1,   0.001464
er   ,   1,   0.001464
fair   ,   1,   0.001464
fardels   ,   1,   0.001464
flesh   ,   1,   0.001464
fly   ,   1,   0.001464
fortune   ,   1,   0.001464
from   ,   1,   0.001464
give   ,   1,   0.001464
great   ,   1,   0.001464
grunt   ,   1,   0.001464
have   ,   2,   0.002928
he   ,   1,   0.001464
heart   ,   1,   0.001464
heir   ,   1,   0.001464
himself   ,   1,   0.001464
his   ,   1,   0.001464
hue   ,   1,   0.001464
ills   ,   1,   0.001464
in   ,   3,   0.004392
insolence   ,   1,   0.001464
is   ,   2,   0.002928
know   ,   1,   0.001464
law   ,   1,   0.001464
life   ,   2,   0.002928
long   ,   1,   0.001464
lose   ,   1,   0.001464
love   ,   1,   0.001464
make   ,   2,   0.002928
makes   ,   2,   0.002928
man   ,   1,   0.001464
may   ,   1,   0.001464
merit   ,   1,   0.001464
might   ,   1,   0.001464
mind   ,   1,   0.001464
moment   ,   1,   0.001464
more   ,   1,   0.001464
mortal   ,   1,   0.001464
my   ,   1,   0.001464
name   ,   1,   0.001464
native   ,   1,   0.001464
natural   ,   1,   0.001464
nobler   ,   1,   0.001464
not   ,   2,   0.002928
now   ,   1,   0.001464
o   ,   1,   0.001464
of   ,   15,   0.021962
off   ,   1,   0.001464
office   ,   1,   0.001464
opposing   ,   1,   0.001464
oppressor   ,   1,   0.001464
or   ,   1,   0.001464
orisons   ,   1,   0.001464
others   ,   1,   0.001464
outrageous   ,   1,   0.001464
pale   ,   1,   0.001464
pangs   ,   1,   0.001464
patient   ,   1,   0.001464
pause   ,   1,   0.001464
perchance   ,   1,   0.001464
pith   ,   1,   0.001464
proud   ,   1,   0.001464
puzzles   ,   1,   0.001464
question   ,   1,   0.001464
quietus   ,   1,   0.001464
rather   ,   1,   0.001464
regard   ,   1,   0.001464
remember   ,   1,   0.001464
resolution   ,   1,   0.001464
respect   ,   1,   0.001464
returns   ,   1,   0.001464
rub   ,   1,   0.001464
s   ,   5,   0.007321
say   ,   1,   0.001464
scorns   ,   1,   0.001464
sea   ,   1,   0.001464
shocks   ,   1,   0.001464
shuffled   ,   1,   0.001464
sicklied   ,   1,   0.001464
sins   ,   1,   0.001464
sleep   ,   5,   0.007321
slings   ,   1,   0.001464
so   ,   1,   0.001464
something   ,   1,   0.001464
spurns   ,   1,   0.001464
suffer   ,   1,   0.001464
sweat   ,   1,   0.001464
take   ,   1,   0.001464
takes   ,   1,   0.001464
that   ,   4,   0.005857
the   ,   15,   0.021962
their   ,   1,   0.001464
them   ,   1,   0.001464
there   ,   2,   0.002928
this   ,   2,   0.002928
those   ,   1,   0.001464
thought   ,   1,   0.001464
thousand   ,   1,   0.001464
thus   ,   1,   0.001464
thy   ,   1,   0.001464
time   ,   1,   0.001464
tis   ,   2,   0.002928
to   ,   10,   0.014641
traveller   ,   1,   0.001464
troubles   ,   1,   0.001464
turn   ,   1,   0.001464
under   ,   1,   0.001464
undiscover   ,   1,   0.001464
unworthy   ,   1,   0.001464
us   ,   3,   0.004392
we   ,   4,   0.005857
weary   ,   1,   0.001464
what   ,   1,   0.001464
whips   ,   1,   0.001464
who   ,   2,   0.002928
whose   ,   1,   0.001464
will   ,   1,   0.001464
wish   ,   1,   0.001464
with   ,   1,   0.001464
would   ,   2,   0.002928
wrong   ,   1,   0.001464
you   ,   1,   0.001464
—————————————

Select max frequent token from: else for do to while if
=> selected is: to

#include <iostream>
using namespace std;

int main() {

for( int i = 0; i < 10; i++ )
{
if( i % 2 == 0 )
{
cout << «Even» << endl;
}
else
{
cout << «Odd» << endl;
}
}
return 0;
}

%EOF%

Word frequency statistics:

Even   ,   1,   0.032258
Odd   ,   1,   0.032258
cout   ,   2,   0.064516
else   ,   1,   0.032258
endl   ,   2,   0.064516
for   ,   1,   0.032258
i   ,   4,   0.129032
if   ,   1,   0.032258
include   ,   1,   0.032258
int   ,   2,   0.064516
iostream   ,   1,   0.032258
main   ,   1,   0.032258
namespace   ,   1,   0.032258
return   ,   1,   0.032258
std   ,   1,   0.032258
using   ,   1,   0.032258
—————————————

Select max frequent token from: else for do to while if
=> selected is: else

Структура программы

Код программы состоит из следующих элементов, объединенных в пакет odessa.uni.imem.maxim:

Интерфейс FrequencyVocabulary, который описывает заданную функциональность доступа к частотному словарю
Класс FrequencyVocabularyImpl, имплементирует данный интерфейс
Класс FrequencyVocabularyTest, который имплементирует тестевое приложение, он содержит метод main и вспомогательные методы ввода данных и вывода результата.

Интерфейс FrequencyVocabulary:

package odessa.uni.imem.maxim;

import java.util.Set;
import java.util.Map;
import java.util.TreeMap;

public interface FrequencyVocabulary
{
   public class Statistic
   {
      public int count;
      public double frequency;
      public Statistic()
      {
         count = 0;
         frequency = 0;
      }
   }
   
   public TreeMap<String,Statistic> getMap();
   public String selectMaxFreqToken( Set<String> tokens );
   
}

package odessa.uni.imem.maxim;

import java.util.Set;

import java.util.Map;

import java.util.TreeMap;

public interface FrequencyVocabulary

{

public class Statistic

{

public int count;

public double frequency;

public Statistic()

{

count = 0;

frequency = 0;

}

public TreeMap<String,Statistic> getMap();

public String selectMaxFreqToken( Set<String> tokens );

}

Интерфейс подразумевает, что частотный словарь представлен в виде стандартного класса TreeMap. В качестве ключа типа String выступает само слово, а в качестве значения — специальная структура Statistic, содержащая статистику по данному слову, а именно — число вхождений count и частоту frequency.

Интерфейс содержит два метода:

getMap — возвращает сам частотный словарь как TreeMap объект.
selectMaxFreqToken — выполняет условие 2-го задания, а именно — выбирает наиболее часто встречаемое слово из заданного набора, передаваемого как аргумент типа Set

Класс FrequencyVocabularyImpl:

package odessa.uni.imem.maxim;

import java.util.Set;
import java.util.Map;
import java.util.TreeMap;
import java.util.StringTokenizer;

public class FrequencyVocabularyImpl implements FrequencyVocabulary
{
   TreeMap<String, Statistic> map;

   public FrequencyVocabularyImpl(String text, String delimiters)
   {
      map = new TreeMap<String, Statistic>();
      int totalCount = 0;
      
      // first we parse text to tokens (words) and fill the map where keys are words and values - statistic for given word
      StringTokenizer st = new StringTokenizer(text, delimiters);
      while (st.hasMoreTokens())
      {
         String key = st.nextToken();
         if ( containsDigits(key) )
         {
             continue;
         }
         Statistic value = map.get(key);
         if (null != value)
         {
            value.count++;
            map.replace(key, value);
         }
         else
         {
            value = new Statistic();
            value.count = 1;
            map.put( key, value );
         }
         totalCount += value.count;
      }

      // now we can calculate and set frequency field for all the entries
      for (Map.Entry<String, Statistic> entry : map.entrySet())
      {
         if ( entry.getValue().count != 0 )
         {
            entry.getValue().frequency = ((double)entry.getValue().count) / totalCount;
         }
      }
   }

   public TreeMap<String, Statistic> getMap()
   {
      return map;
   }

   // select token with max frequency from the given set of tokens
   public String selectMaxFreqToken( Set<String> tokens )
   {
      String maxFreqToken = null;
      Statistic maxFreqStat = null;
      for( String token : tokens )
      {
         Statistic stat = map.get(token);
         if ( stat != null )
         {
            if ( maxFreqToken == null )
            {
               maxFreqToken = token;
               maxFreqStat = stat;
            }
            else
            {
               if ( stat.count > maxFreqStat.count )
               {
                  maxFreqToken = token;
                  maxFreqStat = stat;
               }
            }
         }
      }
      
      return maxFreqToken;
   }
   
   public static boolean containsDigits( String token )
   {
       if (  token.indexOf('0') >= 0 || 
               token.indexOf('1') >= 0 ||
               token.indexOf('2') >= 0 ||
               token.indexOf('3') >= 0 ||
               token.indexOf('4') >= 0 ||
               token.indexOf('5') >= 0 ||
               token.indexOf('6') >= 0 ||
               token.indexOf('7') >= 0 ||
               token.indexOf('8') >= 0 ||
               token.indexOf('9') >= 0
               )
      {
          return true;
      }
       return false;
   }
}

100

101

102

103

package odessa.uni.imem.maxim;

import java.util.Set;

import java.util.Map;

import java.util.TreeMap;

import java.util.StringTokenizer;

public class FrequencyVocabularyImpl implements FrequencyVocabulary

{

TreeMap<String, Statistic> map;

public FrequencyVocabularyImpl(String text, String delimiters)

{

map = new TreeMap<String, Statistic>();

int totalCount = 0;

// first we parse text to tokens (words) and fill the map where keys are words and values - statistic for given word

StringTokenizer st = new StringTokenizer(text, delimiters);

while (st.hasMoreTokens())

{

String key = st.nextToken();

if ( containsDigits(key) )

{

continue;

}

Statistic value = map.get(key);

if (null != value)

{

value.count++;

map.replace(key, value);

}

else

{

value = new Statistic();

value.count = 1;

map.put( key, value );

}

totalCount += value.count;

}

// now we can calculate and set frequency field for all the entries

for (Map.Entry<String, Statistic> entry : map.entrySet())

{

if ( entry.getValue().count != 0 )

{

entry.getValue().frequency = ((double)entry.getValue().count) / totalCount;

}

public TreeMap<String, Statistic> getMap()

{

return map;

}

// select token with max frequency from the given set of tokens

public String selectMaxFreqToken( Set<String> tokens )

{

String maxFreqToken = null;

Statistic maxFreqStat = null;

for( String token : tokens )

{

Statistic stat = map.get(token);

if ( stat != null )

{

if ( maxFreqToken == null )

{

maxFreqToken = token;

maxFreqStat = stat;

}

else

{

if ( stat.count > maxFreqStat.count )

{

maxFreqToken = token;

maxFreqStat = stat;

}

return maxFreqToken;

}

public static boolean containsDigits( String token )

{

if ( token.indexOf('0') >= 0 ||

token.indexOf('1') >= 0 ||

token.indexOf('2') >= 0 ||

token.indexOf('3') >= 0 ||

token.indexOf('4') >= 0 ||

token.indexOf('5') >= 0 ||

token.indexOf('6') >= 0 ||

token.indexOf('7') >= 0 ||

token.indexOf('8') >= 0 ||

token.indexOf('9') >= 0

)

{

return true;

}

return false;

}

Класс строит частотный словарь на основе входного текста. Словарь храниться в виде приватного члена типа TreeMap. Для разбора текста на слова используется известный класс StringTokenizer. Конструктор создает класс в 2 прохода. За первый проход заполняется словарь, где для каждого слова заносится статистика с подсчитанным счетчиком вхождений count и пока еще нулевым frequency, однако, вычисляется общий счетчик всех слов totalCount. За второй проход по уже известной статистике каждого слова рассчитывается его frequency как count /totalCount.

Метод containsDigits добавляет проверку для отсеивания лекскм, содержащих цифры согласно требованию 1-й задачи.

Выборка слова с максимальной частотой согласно второй задаче реализована в методе selectMaxFreqToken, где выполняется цикл по всем заданным словам и выбирается слово с максимальным count.

Класс FrequencyVocabularyTest

package odessa.uni.imem.maxim;

import java.util.Set;
import java.util.HashSet;
import java.util.Map;
import java.util.TreeMap;
import java.util.NoSuchElementException;
import java.io.IOException;

import odessa.uni.imem.maxim.FrequencyVocabulary.Statistic;

import java.util.Scanner;

public class FrequencyVocabularyTest
{

   public static void main(String[] args)
   {
      
      String text = "";
      
      Scanner in = new Scanner(System.in);
      
      for(;;)
      {
        if ( ! in.hasNextLine() ) break;
        String inputLine = in.nextLine();
        if ( inputLine == null || inputLine.equals("%EOF%") ) break;
        text = text + inputLine + "\n";
      }
      in.close();
      
      if ( text.isEmpty() )
      {
         System.out.println("*** Empty text ****");
         return;
      }
      
      //System.out.println("*** DBG:input text is: <"+text+">");
            
      FrequencyVocabulary fv = new FrequencyVocabularyImpl( text, " ?.,;:-+\n\t{}[]=()<>*&%$#@\"\'`~!" );
      
      TreeMap<String,FrequencyVocabulary.Statistic> statMap = fv.getMap();
      System.out.println("Word frequency statistics:\n");
      for (Map.Entry<String, Statistic> statEntry : statMap.entrySet())
      {
         System.out.printf("%s\t,\t%d,\t%f\n",
               statEntry.getKey(),
               statEntry.getValue().count,
               statEntry.getValue().frequency
               );
      }
      System.out.println("--------------------------------------\n");
      
      HashSet<String> tokens = new HashSet<String>();
      tokens.add("for");
      tokens.add("do");
      tokens.add("to");
      tokens.add("while");
      tokens.add("else");
      tokens.add("if");
      String caption = "Select max frequent token from: ";
      for( String s : tokens )
      {
         caption += " " + s;
      }
      System.out.println(caption);
      String s = fv.selectMaxFreqToken(tokens);
      if ( s == null ) s = "<null>";
      System.out.println("=> selected is: " + s );
      
   }

}

package odessa.uni.imem.maxim;

import java.util.Set;

import java.util.HashSet;

import java.util.Map;

import java.util.TreeMap;

import java.util.NoSuchElementException;

import java.io.IOException;

import odessa.uni.imem.maxim.FrequencyVocabulary.Statistic;

import java.util.Scanner;

public class FrequencyVocabularyTest

{

public static void main(String[] args)

{

String text = "";

Scanner in = new Scanner(System.in);

for(;;)

{

if ( ! in.hasNextLine() ) break;

String inputLine = in.nextLine();

if ( inputLine == null || inputLine.equals("%EOF%") ) break;

text = text + inputLine + "\n";

}

in.close();

if ( text.isEmpty() )

{

System.out.println("*** Empty text ****");

return;

}

//System.out.println("*** DBG:input text is: <"+text+">");

FrequencyVocabulary fv = new FrequencyVocabularyImpl( text, " ?.,;:-+\n\t{}[]=()<>*&%$#@\"\'`~!" );

TreeMap<String,FrequencyVocabulary.Statistic> statMap = fv.getMap();

System.out.println("Word frequency statistics:\n");

for (Map.Entry<String, Statistic> statEntry : statMap.entrySet())

{

System.out.printf("%s\t,\t%d,\t%f\n",

statEntry.getKey(),

statEntry.getValue().count,

statEntry.getValue().frequency

);

}

System.out.println("--------------------------------------\n");

HashSet<String> tokens = new HashSet<String>();

tokens.add("for");

tokens.add("do");

tokens.add("to");

tokens.add("while");

tokens.add("else");

tokens.add("if");

String caption = "Select max frequent token from: ";

for( String s : tokens )

{

caption += " " + s;

}

System.out.println(caption);

String s = fv.selectMaxFreqToken(tokens);

if ( s == null ) s = "<null>";

System.out.println("=> selected is: " + s );

}

В данном классе реализуется приложение, которое тестирует интерфейсные методы частотного словаря. Он состоит из следующих этапов:

Ввод исходного текста, так как текст может допускать любое количество символов новой строки, и поскольку затруднительно определить признак конца текста как конец файла, в качестве признака конца текста используется спец. лексема %EOF% , которая должна быть после последней строки текста.
Создание объекта частотного словаря на основе введенного текста, в качестве разделителей используются все известные символы, которые не явл. буквой или цифрой.
Проход по элементам словаря и печать статистики.
Тестирование метода, определяющего максимально встречающиеся слова из набора заданных

Примечание: Ввиду того, что затруднительно за один раз ввести с консоли исходный текст для 1-го задания и список слов для 2-го, набор слов для 2-го задания задается в коде непосредственно, а именно do, to, for, while и if.

Код на Ideone: https://ideone.com/jxJ4rU

java@Cat

Учебные материалы по изучению основ языка програvмирования Java

Author Archives: Максим Швандт

BST – Двоичные деревья поиска

acm.timus.ru №2002. Тестовое задание

Условие

Исходные данные

Результат

Пример

Segmented Array

Java Collections Framework: Map. Частотный словарь.

Задача 1:

Задача 2: