greeness notes: 2008

Saturday, November 15, 2008

Repeated Squaring

http://www.algorithmist.com/index.php/Repeated_Squaring

Repeated squaring, or repeated doubling is an algorithm that computes integer powers of a number quickly. The general problem is to compute xy for an arbitrary integer y.

// Calculates n to the p power, where p is a positive number.
func power( var n as integer, var p as integer )
   if p = 0 return 1
   if p = 1 return n
   if p is odd
      return n * power( n * n, (p-1) / 2 )
   else
      return power( n * n, p / 2 )
end func

Tuesday, November 11, 2008

quadtree

http://en.wikipedia.org/wiki/Quadtree

UVa problem: 297: Quadtrees

refer to forum: http://acm.uva.es/board/viewtopic.php?f=2&t=700&p=2691&hilit=297&sid=156699fafa75aaeacf281e1af3a8006c#p2691

#include <iostream>

#define M 32
int p[M];
int idx;

int mask[] = {
  0, 0x1, 0x3, 0, // 0-3
  0xf, 0, 0, 0, 0xff, //4-8
  0,0,0,0,0,0,0,0xffff, // 16
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xffffffff // 32
};

#ifndef ONLINE_JUDGE
// couting 1's
int count_bit(int n) {
    int bits = 0;
    while(n) {
        n &= n-1;
        ++bits;
    }
    return bits;
}
#endif

void color (char *str, int size, int x, int y) {
  if (str[idx] == 'p') {
    idx ++;
    int sz = size>>1;
    color(str, sz, x+sz, y);
    color(str, sz, x, y);
    color(str, sz, x, y+sz);
    color(str, sz, x+sz, y+sz);
  }
  else if (str[idx] == 'f') {
    idx ++;
    for (int i = y; i < y + size; i ++)
      p[i] |= mask[size] << x;
  }
  else idx++;
}
int main () {
  int num;
  char tree1[1500], tree2[1500];

  scanf("%d\n", &num);
  while(num--) {
    scanf("%s\n%s\n", tree1, tree2);

    memset(p, 0, sizeof(p));

    idx=0;
    color(tree1, M, 0, 0);

    idx=0;
    color(tree2, M, 0, 0);

    int total = 0;
    for (int i = 0; i < M; i ++)
#ifndef ONLINE_JUDGE
      total += count_bit(p[i]);
#else
      total += __builtin_popcount(p[i]);
#endif

    printf("There are %d black pixels.\n", total);
  }
  return 0;
}

UVa problem: 752 - Unscrambling Images

#include <iostream>
#include <deque>
using namespace std;
int q[16][16];
int e[16][16]; // encrypted

int d[16][16]; // decoded
inline int getxy(int size, int k, int &x, int &y) {

  deque<int> Q;
  while(k > 0) {
    int r = k % 4;
    if (r == 0) {
      r = 4;
      k = (k-4) >> 2;
    } else
      k = k >>2;

    Q.push_front(r);
  }

  x = 0, y = 0;
  for(int i = 0; i < Q.size(); ++i) {
    size = size >> 1;
    int idx = Q[i];

    if (idx == 2)
      y += size;
    if (idx == 3)
      x += size;
    if (idx == 4) {
      x += size;
      y += size;
    }
  }
  return size;
}

int main() {
  int ndata;
  scanf("%d ", &ndata);
  int nd = 1;
  while(nd <= ndata) {
    if(nd > 1) printf("\n");
    printf("Case %d\n\n", nd++);
    int n;
    scanf("%d ", &n);

    // quadtree reset to all 0
    memset(q, 0, sizeof(q));

    int m; // number of leaf nodes

    scanf("%d ", &m); 
    int k;
    int intensity;
    for(int i = 0; i < m; ++ i) {
      scanf("%d %d ", &k, &intensity);
      int x, y;
      int size = getxy(n, k, x, y);

      for(int i = x; i < x+size; ++i)
        for(int j = y;j < y+size; ++j)
          q[i][j] = intensity;      
    }
/*
    for(int i = 0; i < n; ++i) {
      for(int j = 0; j < n; ++j)
        printf("%4d", q[i][j]);
      printf("\n");
    }
    printf("\n");
*/
    scanf("%d ", &m);

    // encrypted quadtree reset to all 0
    memset(e, 0, sizeof(e));
    for(int i = 0; i < m; ++ i) {
      scanf("%d %d ", &k, &intensity);
      int x, y;
      int size = getxy(n, k, x, y);

      for(int i = x; i < x+size; ++i)
        for(int j = y;j < y+size; ++j)
          e[i][j] = intensity;
    }

    for(int i = 0; i < n; ++i) {
      for(int j = 0; j < n; ++j) {
        int x = q[i][j] / n;
        int y = q[i][j] % n;
        d[x][y] = e[i][j];
      }
    }
    for(int i = 0; i < n; ++i) {
      for(int j = 0; j < n; ++j)
        printf("%4d", d[i][j]);
      printf("\n");
    }
  }
}

Wednesday, November 05, 2008

Segmentation fault: A segmentation fault occurs when a program attempts to access a memory location that it is not allowed to access, or attempts to access a memory location in a way that is not allowed. Segmentation is one approach to memory management and protection in the operating system.

Stack Overflow: Stack buffer overflow bugs are caused when a program writes more data to a buffer located on the stack than there was actually allocated for that buffer. This almost always results in corruption of adjacent data on the stack, and in cases where the overflow was triggered by mistake, will often cause the program to crash or operate incorrectly.

A core dump is the recorded state of the working memory of a computer program at a specific time, generally when the program has terminated abnormally (crashed).In practice, other key pieces of program state are usually dumped at the same time, including the processor registers, which may include the program counter and stack pointer, memory management information, and other processor and operating system flags and information.

Static function:

The differences between a static member function and non-static member functions are as follows.

A static member function can access only static member data, static member functions and data and functions outside the class. A non-static member function can access all of the above including the static data member.
A static member function can be called, even when a class is not instantiated, a non-static member function can be called only after instantiating the class as an object.
A static member function cannot be declared virtual, whereas a non-static member functions can be declared as virtual
A static member function cannot have access to the 'this' pointer of the class.

The static member functions are not used very frequently in programs. But nevertheless, they become useful whenever we need to have functions which are accessible even when the class is not instantiated.

A memory leak is a particular type of unintentional memory consumption by a computer program where the program fails to release memory when no longer needed. This condition is normally the result of a bug in a program that prevents it from freeing up memory that it no longer needs.

http://linux.die.net/man/2/fork

fork() creates a child process that differs from the parent process only in its PID and PPID, and in the fact that resource utilizations are set to 0. File locks and pending signals are not inherited.

Under Linux, fork() is implemented using copy-on-write pages, so the only penalty that it incurs is the time and memory required to duplicate the parent's page tables, and to create a unique task structure for the child.

Return Value

On success, the PID of the child process is returned in the parent's thread of execution, and a 0 is returned in the child's thread of execution. On failure, a -1 will be returned in the parent's context, no child process will be created, and errno will be set appropriately.

Shallow copies and deep copies

A shallow copy of an object copies all of the member field values. This works well if the fields are values, but may not be what you want for fields that point to dynamically allocated memory. The pointer will be copied. but the memory it points to will not be copied -- the field in both the original object and the copy will then point to the same dynamically allocated memory, which is not usually what you want. The default copy constructor and assignment operator make shallow copies.

A deep copy copies all fields, and makes copies of dynamically allocated memory pointed to by the fields. To make a deep copy, you must write a copy constructor and overload the assignment operator, otherwise the copy will point to the original, with disasterous consequences.

Deep copies need ...

If an object has pointers to dynamically allocated memory, and the dynamically allocated memory needs to be copied when the original object is copied, then a deep copy is required.

A class that requires deep copies generally needs:

A constructor to either make an initial allocation or set the pointer to NULL.
A destructor to delete the dynamically allocated memory.
A copy constructor to make a copy of the dynamically allocated memory.
An overloaded assignment operator to make a copy of the dynamically allocated memory.

Defunct Process: Defunct processes are corrupted processes that can no longer communicate between the parent and child one. Sometimes, they become “zombies” and remain in your system until you reboot your machine. You can try to apply “kill -9″ command, but most of the time you’ll be out of luck.

Stack unwinding:

When an exception is thrown and control passes from a try block to a handler, the C++ run time calls destructors for all automatic objects constructed since the beginning of the try block. This process is called stack unwinding. The automatic objects are destroyed in reverse order of their construction. (Automatic objects are local objects that have been declared auto or register, or not declared static or extern. An automatic object x is deleted whenever the program exits the block in which x is declared.)

If an exception is thrown during construction of an object consisting of subobjects or array elements, destructors are only called for those subobjects or array elements successfully constructed before the exception was thrown. A destructor for a local static object will only be called if the object was successfully constructed.

http://en.wikipedia.org/wiki/Data_segment

In the PC architecture there are four basic read-write memory regions in a program: Stack, Data, BSS, and Heap.

The Data segment contains constants used by the program that are not initialized to zero. For instance the string defined by char s[] = "hello world"; in C would exist in the data part.
The BSS segment starts at the end of the data segment and contains all global variables that are initialized to zero. For instance a variable declared static int i; would be contained in the BSS segment.
The heap area begins at the end of the data segment and grows to larger addresses from there. The Heap area is managed by malloc, realloc, and free, which use the brk and sbrk system calls to adjust its size. The Heap area is shared by all shared libraries and dynamic load modules in a process.
The stack is a LIFO structure, typically located in the higher parts of memory. It usually "grows down" with every register, immediate value or stack frame being added to it. A stack frame consists at minimum of a return address.

Virtual Table The virtual table is a lookup table of functions used to resolve function calls in a dynamic/late binding manner.

A Simple Garbage Collector for C++ http://www.devarticles.com/c/a/Cplusplus/A-Simple-Garbage-Collector-for-C-plus-plus/

A deadlock is a situation wherein two or more competing actions are waiting for the other to finish, and thus neither ever does.

In scheduling, priority inversion is the scenario where a low priority task holds a shared resource that is required by a high priority task. This causes the execution of the high priority task to be blocked until the low priority task has released the resource, effectively "inverting" the relative priorities of the two tasks. If some other medium priority task, one that does not depend on the shared resource, attempts to run in the interim, it will take precedence over both the low priority task and the high priority task.

3Sum problem

http://discuss.techinterview.org/default.asp?interview.11.613896.3

http://en.wikipedia.org/wiki/3SUM

Given an unsorted array of integers. Find any three numbers satisfying following condition:
c = a+b

First, sort the array in O(nlogn).
For each element c of the array try to find two numbers a and b such that a+b = c.
Time complexity:
O(n) to find a+b = c in sorted array. (We can use two pointers and move them towards the middle)

Since we do this for each element. O(n^2).

Monday, November 03, 2008

Maximun Flow

Residual network: (credit: http://www.topcoder.com/tc?module=Static&d1=tutorials&d2=maxFlow)

if the flow along the edge x-y is less than the capacity there is a forward edge x-y with a capacity equal to the difference between the capacity and the flow (this is called the residual capacity),
if the flow is positive there is a backward edge y-x with a capacity equal to the flow on x-y.

Augmenting path: a path from the source to the sink in the residual network, whose purpose is to increase the flow in the original one.

Ford-Fulkerson method: start with no flow everywhere and increase the total flow in the network while there is an augmenting path from the source to the sink with no full forward edges or empty backward edges - a path in the residual network.

A cut in a flow network is simply a partition of the vertices in two sets, let's call them A and B, in such a way that the source vertex is in A and the sink is in B.

The capacity of a cut is the sum of the capacities of the edges that go from a vertex in A to a vertex in B.

The flow of the cut is the difference of the flows that go from A to B (the sum of the flows along the edges that have the starting point in A and the ending point in B), respectively from B to A, which is exactly the value of the flow in the network, due to the entering flow equals leaving flow - property, which is true for every vertex other than the source and the sink.

The flow of the cut is less or equal to the capacity of the cut due to the constraint of the flow being less or equal to the capacity of every edge.

This implies that the maximum flow is less or equal to every cut of the network. This is where the max-flow min-cut theorem comes in and states that the value of the maximum flow through the network is exactly the value of the minimum cut of the network.

Maximum Bipartite Matching

http://shygypsy.com/tools/bpm.cpp (a bpm code)

UVa Problems:

10080: Gopher (II) (跟670几乎一样，先确定graph[][]即可,用scanf("%lf", ..) 和eps比较double型)
10092: The Problem with the Problem Setter (L: the number of problems that should be included; R: the total number of problems in the pool)
259: Software Allocation (每台机子只能run一个application, 又是典型的0-1 assignment)
670: The dog task

其实就是先填一个graph[][]即可，在每两个master点之间，确定dog能到达哪些interesting places. 然后就是一个0-1 assignment问题而已。另注意double型比较用了eps = 1e-18(开始用1e-15 WA: )

special case:

2 1
0 0 2 2
3 3

#include <iostream>
#include <vector>
#include <cmath>
using namespace std;

#define MAX 101
#define eps 1e-18
typedef pair<int,int> ii;

bool graph[MAX][MAX];
bool seen[MAX];
int matchL[MAX], matchR[MAX];
// n: dim of L; m: dim of R
int n, m;

bool bpm( int u ) {
    for( int v = 0; v < m; v++ ) if( graph[u][v] )
    {
        if( seen[v] ) continue;
        seen[v] = true;

        if( matchR[v] < 0 || bpm( matchR[v] ) )
        {
            matchL[u] = v;
            matchR[v] = u;
            return true;
        }
    }
    return false;
}

double dist(ii a, ii b) {
  return sqrt((a.first - b.first) * (a.first - b.first) + 
              (a.second - b.second) * (a.second - b.second) + 0.0);
}

int main() {
  int ndata;
  scanf("%d ", &ndata);
  while(ndata--) {
    scanf("%d %d ", &n, &m);

    vector<ii> master(n);
    vector<ii> place(m);
    for(int i = 0; i < n; ++i)
      scanf("%d %d ", &master[i].first, &master[i].second);

    for(int i = 0; i < m; ++i)
      scanf("%d %d ", &place[i].first, &place[i].second);

    memset(graph, false, sizeof(graph));
    memset( matchL, -1, sizeof( matchL ) );
    memset( matchR, -1, sizeof( matchR ) );

    for(int i = 0; i < n-1; ++i) {

      // master's walking distance doubled
      double d2 = dist(master[i], master[i+1]) * 2;
      for(int j = 0; j < m; ++j) {
        // dog's walking distance
        double dogdist = dist(master[i], place[j]) + dist(place[j], master[i+1]);
        // as long as dog's walking distance is no more than 2xmaster's, the dog can visit this place
        if(d2 - dogdist>= eps) 
          graph[i][j] = true;
      }
    }

    int cnt = 0;
    for( int i = 0; i < n; ++ i){
      memset(seen, 0, sizeof( seen ) );
      if(bpm(i)) cnt++;
    }

    printf("%d\n", n+cnt);
    printf("%d %d", master[0].first, master[0].second);
    for(int i = 0; i < n; ++i) {
      if(i) printf(" %d %d", master[i].first, master[i].second);
      if(matchL[i] != -1) {
        int p = matchL[i];
        printf(" %d %d", place[p].first, place[p].second);
      }
    }
    printf("\n");
    if(ndata) printf("\n");
  }
}

663: Sorting Slides (use point in rectangle to get graph[][])

这题难点在于要输出unique的matching, 我是run max bipartite matching 算法 |E|次(the number of edges), 每次检查uniqueness

#include <iostream>
#include <vector>
using namespace std;

struct Rect {
  int xmin;
  int xmax;
  int ymin;
  int ymax;
};

#define MAX 27 // the number of alphabet
bool graph[MAX][MAX];
bool seen[MAX];
int matchL[MAX], matchR[MAX];
int n;
Rect slides[MAX];

bool exist;      // if solution already exists
int savedR[MAX]; // save previous solution

// if a points is inside a rectangle
inline bool ptInRect(int x, int y, Rect r) {
  //No number will lie on a slide boundary.
  if (x > r.xmin && x < r.xmax && y > r.ymin && y < r.ymax) return true;
  return false; 
}

bool bpm( int u ) {
  for( int v = 0; v < n; v++ ) if( graph[u][v] )
  {
    if( seen[v] ) continue;
    seen[v] = true;

    if( matchR[v] < 0 || bpm( matchR[v] ) )
    {
      matchL[u] = v;
      matchR[v] = u;
      return true;
    }
  }
  return false;
}

int main() {
  int ndata = 0;
  while(scanf("%d ", &n) && n) {
    printf("Heap %d\n", ++ndata);

    for(int i = 0; i < n; ++i) {
      scanf("%d %d %d %d ", &slides[i].xmin, 
                            &slides[i].xmax,
                            &slides[i].ymin,
                            &slides[i].ymax);
    }

    memset(graph, false, sizeof(graph));
    memset(savedR, -1, sizeof(savedR));
    exist = false;

    int x, y;
    for(int i = 0; i < n; ++i) {
      scanf("%d %d ", &x, &y);
      for(int j = 0; j < n; ++j)
        if(ptInRect(x, y, slides[j]))
          graph[i][j] = true;
    }
    
    int cnt = 0;
    for(int u = 0; u < n; ++u) {
      for(int v = 0; v < n; ++v) {
        // each time, put one edge into the match
        // and try to construct the max bipartite match based on that
        if(!graph[u][v]) 
          continue;

        memset( matchL, -1, sizeof( matchL ) );
        memset( matchR, -1, sizeof( matchR ) );
        
        matchL[u] = v;
        matchR[v] = u;

        int cnt = 1;
        for( int i = 0; i < n; ++ i){
          if (i == u) continue;
          memset(seen, 0, sizeof( seen ));
          if(bpm(i)) cnt++;
        }

        if(exist) {
          for(int k = 0; k < n; ++k)
            //check uniqueness
            if (matchR[k] != savedR[k])
              savedR[k] = -1;  // not unique, reset to -1 
        } else {
          memcpy(savedR, matchR, sizeof(int)*n); // save the first solution
          exist = true;
        }
      }
    }

    int cardinal = 0;
    int i;
    for(i = 0; i < n; ++i) {
      if(savedR[i] != -1) {
        ++cardinal;
        printf("(%c,%d)", i + 'A', savedR[i] + 1);
        break;
      }
    }

    for(++i; i < n; ++i) {
      if(savedR[i] != -1) {
        ++cardinal;
        printf(" (%c,%d)", i + 'A', savedR[i] + 1);
      }
    }

    if(cardinal == 0) 
      printf("none");

    printf("\n\n");
  }
}

Hungarian Method

Easy to understand examples:

http://www.ee.oulu.fi/~mpa/matreng/eem1_2-1.htm

http://www.ams.jhu.edu/~castello/362/Handouts/hungarian.pdf

http://www.public.iastate.edu/~ddoty/HungarianAlgorithm.html

animation: http://www.ifors.ms.unimelb.edu.au/tutorial/hungarian/index.html

topcoder tutorial: Assignment Problem and Hungarian Algorithm (very good regarding theory and c++ code)

http://www.math.uwo.ca/~mdawes/courses/344/kuhn-munkres.pdf

Sunday, November 02, 2008

String Searching

http://allisons.org/ll/AlgDS/Strings/

http://en.wikipedia.org/wiki/String_searching_algorithm

http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm

Suffix Tree

Tries and suffix tree

wiki - Suffix Trees

http://allisons.org/ll/AlgDS/Tree/Suffix/

The suffix tree can be built in O(n) time (linear time regarding the length of strings) due to Ukkonen (1995).

Applications: [1,2,3] from http://allisons.org/ll/AlgDS/Tree/Suffix/

String Search: Searching for a substring, pat[1..m], in txt[1..n], can be solved in O(m) time (after the suffix tree for txt has been built in O(n) time).
Palindromes: The longest palindrome of txt[1..n] can be found in O(n) time, e.g. by building the suffix tree for txt$reverse(txt)# or by building the generalized suffix tree for txt and reverse(txt).
Longest Common String: The longest common substring of two strings, txt₁ and txt₂, can be found by building a generalized suffix tree for txt₁ and txt₂: Each node is marked to indicate if it represents a suffix of txt₁ or txt₂ or both. The deepest node marked for both txt₁ and txt₂ represents the longest common substring. Equivalently, one can build a (basic) suffix tree for the string txt₁$txt₂#, where `$' is a special terminator for txt₁ and `#' is a special terminator for txt₂. The longest common substring is indicated by the deepest fork node that has both `...$...' and `...#...' (no $) beneath it.

Friday, October 24, 2008

google c++ coding style

http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml?showone=Header_File_Dependencies#Header_File_Dependencies

Header File Dependencies

How can we use a class Foo in a header file without access to its definition?

We can declare data members of type Foo* or Foo&.
We can declare (but not define) functions with arguments, and/or return values, of type Foo.
We can declare static data members of type Foo. This is because static data members are defined outside the class definition.

On the other hand, you must include the header file for Foo if your class subclasses Foo or has a data member of type Foo.

Inline Functions

A decent rule of thumb is to not inline a function if it is more than 10 lines long. Beware of destructors, which are often longer than they appear because of implicit member- and base-destructor calls!

Another useful rule of thumb: it's typically not cost effective to inline functions with loops or switch statements (unless, in the common case, the loop or switch statement is never executed).

Function Parameter Ordering

When defining a function, parameter order is: inputs, then output

Parameters to C/C++ functions are either input to the function, output from the function, or both. Input parameters are usually values or const references, while output and input/output parameters will be non-const pointers. When ordering function parameters, put all input-only parameters before any output parameters. In particular, do not add new parameters to the end of the function just because they are new; place new input-only parameters before the output parameters.

Local Variables

There is one caveat: if the variable is an object, its constructor is invoked every time it enters scope and is created, and its destructor is invoked every time it goes out of scope.

// Inefficient implementation:
for (int i = 0; i < 1000000; ++i) {
  Foo f;  // My ctor and dtor get called 1000000 times each.
  f.DoSomething(i);
}

It may be more efficient to declare such a variable used in a loop outside that loop:

Foo f;  // My ctor and dtor get called once each.
for (int i = 0; i < 1000000; ++i) {
  f.DoSomething(i);
}

Doing Work in Constructors

Do only trivial initialization in a constructor. If at all possible, use an Init() method for non-trivial initialization.

Decision: If your object requires non-trivial initialization, consider having an explicit Init() method and/or adding a member flag that indicates whether the object was successfully initialized.

Explicit Constructors

Use the C++ keyword explicit for constructors with one argument.

Definition: Normally, if a constructor takes one argument, it can be used as a conversion. For instance, if you define Foo::Foo(string name) and then pass a string to a function that expects a Foo, the constructor will be called to convert the string into a Foo and will pass the Foo to your function for you. This can be convenient but is also a source of trouble when things get converted and new objects created without you meaning them to. Declaring a constructor explicit prevents it from being invoked implicitly as a conversion.

Pros: Avoids undesirable conversions.

Cons: None.

Decision:

We require all single argument constructors to be explicit. Always put explicit in front of one-argument constructors in the class definition: explicit Foo(string name);

The exception is copy constructors, which, in the rare cases when we allow them, should probably not be explicit. Classes that are intended to be transparent wrappers around other classes are also exceptions. Such exceptions should be clearly marked with comments.

Copy Constructors

Consider adding dummy declarations for the copy constructor and assignment operator in the class' private: section, without providing definitions. With these dummy routines marked private, a compilation error will be raised if other code attempts to use them. For convenience, a DISALLOW_COPY_AND_ASSIGN macro can be used:

// A macro to disallow the copy constructor and operator= functions
// This should be used in the private: declarations for a class
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
  TypeName(const TypeName&);               \
  void operator=(const TypeName&)Then, in class Foo: 

class Foo {
 public:
  Foo(int f);
  ~Foo();

 private:
  DISALLOW_COPY_AND_ASSIGN(Foo);
};

Structs vs. Classes

Use a struct only for passive objects that carry data; everything else is a class.

Declaration Order

Your class definition should start with its public: section, followed by its protected: section and then its private: section. If any of these sections are empty, omit them.

Within each section, the declarations generally should be in the following order:

Typedefs and Enums
Constants
Constructors
Destructor
Methods, including static methods
Data Members, including static data members

The DISALLOW_COPY_AND_ASSIGN macro invocation should be at the end of the private: section. It should be the last thing in the class.

Reference Arguments

All parameters passed by reference must be labeled const.

Within function parameter lists all references must be const:

void Foo(const string &in, string *out);

In fact it is a very strong convention that input arguments are values or const references while output arguments are pointers. Input parameters may be const pointers, but we never allow non-const reference parameters.

Casting

Use C++ casts like static_cast<>(). Do not use other cast formats like int y = (int)x; or int y = int(x);.

Do not use C-style casts. Instead, use these C++-style casts.

Use static_cast as the equivalent of a C-style cast that does value conversion, or when you need to explicitly up-cast a pointer from a class to its superclass.
Use const_cast to remove the const qualifier (see const).
Use reinterpret_cast to do unsafe conversions of pointer types to and from integer and other pointer types. Use this only if you know what you are doing and you understand the aliasing issues.
Do not use dynamic_cast except in test code. If you need to know type information at runtime in this way outside of a unittest, you probably have a design flaw.

sizeof

Use sizeof(varname) instead of sizeof(type) whenever possible.

Use sizeof(varname) because it will update appropriately if the type of the variable changes. sizeof(type) may make sense in some cases, but should generally be avoided because it can fall out of sync if the variable's type changes.

Struct data;
memset(&data, 0, sizeof(data));

memset(&data, 0, sizeof(Struct));

File Comments

Every file should contain the following items, in order:

a copyright statement (for example, Copyright 2008 Google Inc.)
a license boilerplate. Choose the appropriate boilerplate for the license used by the project (for example, Apache 2.0, BSD, LGPL, GPL)
an author line to identify the original author of the file

File Contents

Every file should have a comment at the top, below the and author line, that describes the contents of the file.

Generally a .h file will describe the classes that are declared in the file with an overview of what they are for and how they are used. A .cc file should contain more information about implementation details or discussions of tricky algorithms. If you feel the implementation details or a discussion of the algorithms would be useful for someone reading the .h, feel free to put it there instead, but mention in the .cc that the documentation is in the .h file.

Do not duplicate comments in both the .h and the .cc. Duplicated comments diverge.

Every class definition should have an accompanying comment that describes what it is for and how it should be used.

// Iterates over the contents of a GargantuanTable.  Sample usage:
//    GargantuanTable_Iterator* iter = table->NewIterator();
//    for (iter->Seek("foo"); !iter->done(); iter->Next()) {
//      process(iter->key(), iter->value());
//    }
//    delete iter;
class GargantuanTable_Iterator {
  ...
};

Function Comments

Declaration comments describe use of the function; comments at the definition of a function describe operation.

Function Declarations

Every function declaration should have comments immediately preceding it that describe what the function does and how to use it. These comments should be descriptive ("Opens the file") rather than imperative ("Open the file"); the comment describes the function, it does not tell the function what to do. In general, these comments do not describe how the function performs its task. Instead, that should be left to comments in the function definition.

Types of things to mention in comments at the function declaration:

What the inputs and outputs are.
For class member functions: whether the object remembers reference arguments beyond the duration of the method call, and whether it will free them or not.
If the function allocates memory that the caller must free.
Whether any of the arguments can be NULL.
If there are any performance implications of how a function is used.
If the function is re-entrant. What are its synchronization assumptions?

Here is an example:

// Returns an iterator for this table.  It is the client's
// responsibility to delete the iterator when it is done with it,
// and it must not use the iterator once the GargantuanTable object
// on which the iterator was created has been deleted.
//
// The iterator is initially positioned at the beginning of the table.
//
// This method is equivalent to:
//    Iterator* iter = table->NewIterator();
//    iter->Seek("");
//    return iter;
// If you are going to immediately seek to another place in the
// returned iterator, it will be faster to use NewIterator()
// and avoid the extra seek.
Iterator* GetIterator() const;

Thursday, October 23, 2008

random programs

摘几个programming pearls 上提到的random function.

1. knuth version. 从1-n之间生成有序的m个不同的随机数( the output is a sorted list of m random integers in the range 0...n-1 in which no integer occurs more than once).

void genknuth(int m, int n) {
  for (int i = 0; i < n; ++i) {
    /* select m of remaining n-i */
    if ((bigrand() % (n-i)) < m) {
      cout << i << "\n";
      --m;
    }
  }
}

2. shuffle the first m elements of the array. output the sorted list of the m random numbers.

void genshuf(int m, int n) {
  int i, j;
  int *x = new int[n];
  for(i = 0; i < n; ++i)
    x[i] = i;
  for(i = 0; i < m; ++i) {
    j = randint(i, n-1);
    int t = x[i]; x[i] = x[j]; x[j] = t;
  }
  sort(x, x+m);
  for(i = 0; i < m; ++i)
    cout << x[i] << "\n";
}

3. floyd. This algorithm uses only m random numbers, even in the worst case.

void genfloyd(int m, int n) {
  set<int> S;
  set<int>::iterator i;
  for(int j = n-m; j < n; ++j) {
    int t = bigrand() % (j+1);
    if(S.find(t) == S.end())
      S.insert(t); // t not in S
    else
      S.insert(j); // t in S
  }
  for(i = S.begin(); i != S.end(); ++i)
    cout << *i << "\n";
}

4. reservoir sampling. when we do not know the length of the data ( we see the data sequentially. )

We always select the first line, we select the second line with probability one half, the third line with probability one third, and so on. At the end of the process, each line has the same probability of being chosen (1/n, where n is the total number of lines in the article).

Sunday, October 19, 2008

counting sort + bit operation

UVa problem: Bridge Hands

每一花色牌不超过13张，4种花色，用一个long long表示，每16位一组表示一个花色

#include <stdio.h>
#include <string.h>

long long player[4];
int p[90];
char ip[4] = {'C', 'D', 'S', 'H'};
int seat[90];
char iseat[4] = {'S','W','N','E'};
int idx[90];
char inverse_idx[15] = {'0','1','2','3','4','5','6','7','8','9','T','J','Q','K','A'};
//int __builtin_ctz(long long);
int main() {
    char buff[60];
    seat['S'] = p['C'] = 0; 
    seat['W'] = p['D'] = 1;
    seat['N'] = p['S'] = 2;
    seat['E'] = p['H'] = 3;
    int i, j;
    for(i = 2; i <= 9; ++i)
        idx[i+48] = i;
    idx['T'] = 10;
    idx['J'] = 11;
    idx['Q'] = 12;
    idx['K'] = 13;
    idx['A'] = 14;       
    char t;
    while((t = getchar()) != '#') {
        while(gets(buff) && strlen(buff)==0){};
        int side = seat[t];
        memset(player, 0, sizeof(player));
        for(i = 0; i < 52; i += 2) {
            if(++side == 4) side = 0;
            /* get card, set its bit */
            player[side] |= 1LL<< (p[buff[i]]<<4LL) + idx[buff[i+1]]; 
        }
        gets(buff);
        for(i = 0; i < 52; i += 2) {
            if(++side == 4) side = 0;
            player[side] |= 1LL<< (p[buff[i]]<<4LL) + idx[buff[i+1]];
        }
        
        for(i = 0; i < 4; ++i) {
            int printed = 0;
            printf("%c: ", iseat[i]); 
            for(j = 0; j < 4; ++j) {                
                /* get current color */
                int c = player[i] & 0xffff;
                player[i] >>= 16; /* next color */
                while(c) {
                    int pos = __builtin_ctz(c);
                    c ^= 1LL << pos;  /* clear printed bit */
                    printf("%c%c", ip[j], inverse_idx[pos]); 
                    if(++printed < 13) printf(" ");
                }
            }
            printf("\n");
        }
    }
}

/* gcc has built in __builtin_ctz, vc not*/

/*
int __builtin_ctz(long long c) {

    for(int i = 0; i < 64; ++i)
        if(c & (1LL<<i)) return i;
}*/

Maximum interval sum

Or maximum contiguous subvector of the input.

Do a linear sweep from left to right, accumulate the sum. Start new interval whenever you encounter partial sum < 0.

/* 1-d linear alg O(n) */
inline int mis1d(int line[], int size) {
    int maxv = -INF;
    int p = 0;
    for(int i = 0; i < size; ++i) {
        p += line[i];
        if(p < 0) p = 0;
        if(p > maxv) maxv = p;
    }
    return maxv;
}

In a 2D problem, we are expected to find a maximum sum subrectangle in a mxn matrix.

We first calculate the accumulative sum in the dimension of length m (smaller dim).

We then use the above technique in the dimension of length n.

The total running time is O(m^2 n). (for nxn matrix, that is O(n^3) )

UVa problem: 108, 836

#include <iostream>
using namespace std;

#define INF 999999999
int M[101][101];
int partialSum[101][101];
int line[101];

/* 1-d linear alg O(n) */
inline int mis1d(int line[], int size) {
    int maxv = -INF;
    int p = 0;
    for(int i = 0; i < size; ++i) {
        p += line[i];
        if(p < 0) p = 0;
        if(p > maxv) maxv = p;
    }
    return maxv;
}

/* 2-D O(n^3) */
int mis2d(int size) {
    int maxv = -INF;
    for(int j = 0; j < size; ++j) {
        partialSum[0][j] = M[0][j];
        for (int i = 1; i < size; i++)
            partialSum[i][j] = partialSum[i-1][j] + M[i][j];
    }
    for(int i = 0; i < size; ++i) {   // use from the i-th row
        for(int j = i; j < size; ++j) {  // to the j-th row
            if(i == j) 
                memcpy(line, M[i], size*sizeof(int));
            else {
                // generate the data i to j-th row
                for(int k = 0; k < size; ++k)
                    line[k] = partialSum[j][k] - partialSum[i][k];
            }
            int v = mis1d(line, size);
            if(v > maxv) maxv = v;
        }
    }
    return maxv;
}

int main() {
    int n;
    while(scanf("%d ", &n) == 1) {
        for(int i = 0; i < n; ++i)
            for(int j = 0; j < n; ++j)
                scanf("%d", &M[i][j]);
        printf("%d\n", mis2d(n));
    }
}

Related problems:

find the subvector with the sum closest to zeros

calc the accumulative sum so that cum[i] = x[0] + ... + x[i]

The subvector x[l ... u] sums to zeros if cum[l-1] = cum[u]

the subvector with the sum closest to zeros is found by locating the two closest elements in cum

which can be done in O(nlogn) by sorting

find the subvector with the sum closest to a given real number t.

a simple extension from the above case

The subvector x[l ... u] sums to t if cum[l-1] + t = cum[u] and l-1<u

first nlogn sorting

then for each element, do a (log n) binary search for closest to (cum[i]+t)

initialize upon the first use

This is a technique coverd in Programming Pearls. (Chap 1, Problems 9)

This method use constant time for initialization and for each vector access, and use extra space proportional to the size of the vector. Because this method reduces initialization time by using even more space, it should be considered only when space is cheap, time is deer and the vector is sparse.

My test code here ( #define COL to enable 2-D matrix test case)

#include <iostream>
#include <ctime>
using namespace std;

// max row and col size
#define ROW 8000
// if 1-d, just comment the next line
#define COL 8000
int top = 0;

#ifndef COL
int data[ROW];
#define MAXE ROW
#else
int data[ROW][COL];
#define MAXE ROW*COL
#endif

// two additional array to implement a simple signature
int from[MAXE];
int to[MAXE];

// if element i in the array is initialised 
inline bool exist(int i, int j=0) {
#ifdef COL
    int idx = COL*i+j;
#else
    int idx = i;
#endif
    if(from[idx] < top && to[from[idx]] == idx)
        return true;
    return false;
}

// assign data value to the i-th element
#ifndef COL
inline void setdata(int i, int v) {
    if(!exist(i)) {        
        from[i] = top;
        to[top] = i;
        ++ top;
    }
    data[i] = v;
}

#else
inline void setdata(int i, int j, int v) {
    if(!exist(i, j)) {
        int idx = COL*i+j;
        from[idx] = top;
        to[top] = idx;
        ++ top;
    }
    data[i][j] = v;
}
#endif

int main(int argc, char* argv[]) {
    int startt = clock();
    printf("initialize at start time--\n");
    memset(data, 0, sizeof(data));

    int n;
    if(argc < 2 || atoi(argv[1]) > ROW) n = 100;
    else n = atoi(argv[1]);

#ifndef COL
    for(int i = 0; i < n; ++i)
        data[i] = (i*i-i)/(i+1);
    int starts = clock();
    printf("use %d\n", starts-startt);

    printf("no initialization--\n");
    for(int i = 0; i < n; i++)
        setdata(i, (i*i-i)/(i+1));
    printf("use %d\n", clock()-starts); 

#else
    for(int i = 0; i < n; i++)
        for(int j = 0; j < n; ++j)
            data[i][j] = (i*j-i)/(i+1);

    int starts = clock();
    printf("use %d\n", starts-startt);

    printf("no initialization--\n");
    for(int i = 0; i < n; i++)
        for(int j = 0; j < n; ++j)
            setdata(i, j, (i*j-i)/(i+1));

    printf("use %d\n", clock()-starts);
#endif
}

From the test result:

When the operations overhead is much less than the initialization cost, use init upon the first use.

When the vector/matrix is not sparse (e.g., > 10% full), memset is fast enough.

It seems the access speed of alg2 is 10 times slower than direct access speed

+------------+----------+-----------+----------+
|matrix dim  |#  op     |alg 1 (ms) |alg 2 (ms)| 
|8000 x 8000 |10 x 10   |312        |0         |
|8000 x 8000 |100x100   |312        |0         |
|8000 x 8000 |1000x1000 |328        |187       |
|8000 x 8000 |4000x4000 |531        |2922      |
+------------+----------+-----------+----------+

Saturday, October 18, 2008

bit operation again SRM 422

topcder: BirthdayCake

用gcc的__builtin_popcount()速度快了两倍多。

由于cake 那个是50位的，用了long long，然后调用__builtin_popcount两次分别处理高低32位然后求和。注意给常数1加上LL后缀。

// couting 1's
inline int count_bit(long long n) {
    return __builtin_popcount(n>>32) + __builtin_popcount(n &((1LL<<32)-1LL));
}

void checkmax(int &a, int b) {
    if(a < b) a = b;
}

int BirthdayCake::howManyFriends(vector <string> availableFruits, vector <string> f, int K) {

    int N = availableFruits.size(); // total number of available fruit
    int m = f.size(); // friend number
    map<string,int> fruit;
    for(int i = 0; i < N; ++i)
        fruit.insert(make_pair(availableFruits[i], fruit.size()));

    long long mask = (1LL<<N)-1LL;
    vector<long long> like(m, mask);
    for(int i = 0; i < m; ++i) {
        stringstream ss;
        ss << f[i];
        string tmp;
        while(ss >> tmp) { 
            if(!fruit.count(tmp)) continue;
            like[i] &= ~(1LL<< fruit[tmp]); // clear uliked bits
        }
    }
    int max = (1<<(m))-1; // all friends included
    
    int maxnf = 0; // max number of friends included
    // brute force traversal of all subsets
    for(int i = max; i >=0 ; --i) {
        long long cake = mask;
        for(int j = 0; j < m; ++j) if( i & (1<<j) ) {
            cake &= like[j];
            if( count_bit(cake) < K )
                goto next;
        }
        checkmax(maxnf, count_bit(i));
next:   continue;
    }
    return maxnf;
}

Friday, October 17, 2008

closest pair problem

Closest pair of points problem on wiki

nearest neighbor problem

k-d tree on wiki

Sweeping: http://www.cs.mcgill.ca/~cs251/ClosestPair/ClosestPair1D.html

every time the sweep line encounters a point p, we will perform the following actions:

Remove the points further than d to the left of p from the ordered set D that is storing the points in the strip.
Determine the point on the left of p that is closest to it
If the distance between this point and p is less than d (the current minimum distance), then update the closest current pair and d.

The summary and analysis of the plane sweep algorithm goes something like this:

sort the set according to x-coordinates (this is necessary for a plane sweep approach to determine the coordinates of the next point), which takes O(nlogn) time
inserting and removing each point once from the ordered set D (each insertion takes O(logn) time as shown in the section on (2,4)-trees), which takes a total of O(nlogn) time
comparing each point to a constant number of its neighbors (which takes O(n) time in total )

K-D Tree

Animation: http://donar.umiacs.umd.edu/quadtree/points/kdtree.html

Complexity

Building a static kd-tree from n points takes O(n log ² n) time if an O(n log n) sort is used to compute the median at each level. The complexity is O(n log n) if a linear median-finding algorithm such as the one described in Cormen et al^[3] is used.

Inserting a new point into a balanced kd-tree takes O(log n) time.
Removing a point from a balanced kd-tree takes O(log n) time.
Querying an axis-parallel range in a balanced kd-tree takes O(n^1-1/d +k) time, where k is the number of the reported points, and d the dimension of the kd-tree.
Finding the nearest point is an O(log N) operation.

Bit operations hacks

I find a few more interesting sites:

Bit Twiddling Hacks

The Aggregate Magic Algorithms

Thursday, October 16, 2008

Design Patterns (4) - Factory method and Abstract Factory

Factory method
Define an interface for creating an object, but let subclasses decide which class to instantiate. Factory Method lets a class defer instantiation to subclasses.

Why would anybody use such a design pattern?

Imagine you are writing the code for the “Client” object. The “client” object needs to use the “Product” object. You hard code the object creation process in the “Client” Now any changes in the object creation procedure would mean that you need to change the “client” too. The sole purpose of the “Factory” object is to create “Product” This gives us a certain amount of flexibility. The “client” delegates that task to “Factory”. Any changes in the object creation process do not affect the “client” and changes to “Factory” should be fairly simple as that is the only thing it does.

ref: http://vorleakchy.wordpress.com/2008/08/15/factory-method-design-pattern/

Principles:

No variable should hold a reference to a concrete class. If you use new, you' ll be holding a reference to a concrete class. Use a factory to get around that!

No class should derive from a concrete class. If you derive from a concret class, you're depending on a concrete class. Derive from an abstraction, like an interface or an abstract class.

No method should override an implemented method of any its base class. If you override an implemented method, then your base class wasn't really an abstraction to start with. Those methods implemented in the base class are meant to be shared by all your subclasses.

My simple factory method example:

//Pizza example:

#include <string>
#include <iostream>
class Pizza {
public:
    virtual void get_price() = 0;
};
 
class HamAndMushroomPizza: public Pizza {
public:
    virtual void get_price(){
        std::cout << "Ham and Mushroom: $8.5" << std::endl;
    }
};
 
class DeluxePizza : public Pizza {
public:
    virtual void get_price() {
        std::cout << "Deluxe: $10.5" << std::endl;
    }
};
 
class SeafoodPizza : public Pizza {
public:
    virtual void get_price(){
        std::cout << "Seafood: $11.5" << std::endl;
    }
};
 
class PizzaFactory {
public:
    static Pizza* create_pizza(const std::string type) {
        if (type == "Ham and Mushroom")
            return new HamAndMushroomPizza();
        else if (type == "Seafood Pizza")
            return new SeafoodPizza();
        else
            return new DeluxePizza();
    }
};
//usage
int main() {
    PizzaFactory factory;
    Pizza *pz = factory.create_pizza("Default");
    pz->get_price();
 
    Pizza *ph = factory.create_pizza("Ham and Mushroom");
    ph->get_price();
 
    delete pz;
    pz = factory.create_pizza("Seafood Pizza");
 
    pz->get_price();
    delete pz;
    delete ph;
}

Design Patterns (3) - Decorator

Decorator: Attach additional responsibilities to an object dynamically keeping the same interface. Decorators provide a flexible alternative to subclassing for extending functionality.

The idea:

Start with a basic component and "decorate" it with condiments at runtime.
Decorators have the same supertype as the objects they decorate.
Given that the decorator has the same supertype as the object it decorates, we can pass around a decorated object in place of the original object.
The decorator adds its own behavior either before and/or after delegating to the object it decorates to do the rest of the job.
Objects can be decorated at any time, so we can decorate objects dynamically at runtime with as many decorators as we like.

Design Patterns (2) - Observer

Observer :

Define a one-to-many dependency between objects so that when one object changes state, all its dependents are notified and updated automatically.

Design principles of Observer ( from head first Design Pattern Chap 2)

The thing that varies in the Observer Pattern is the state of the Subject and the number and types of Observers. With this pattern, you can vary the objects that are dependent on the state of the Subject, without having to change that Subject. That's called planning ahead!
Both the Subject and Observer use interfaces. the subject keeps track of objects implementing the Observer interface, while the observers register with, and get notified by the Subject interface. As we've seen, this keeps things nice and loosely coupled.
The Observer Pattern uses composition to compose any number of Observers with their Subjects. These relationships aren't set up by some kind of inheritance hierarchy. No, they are set up at runtime by composition!

wiki's c++ example shows an implementation of head first's chap 2 problem. It is a nice example.

Just refer to it for deep understanding of the pattern.

Design Patterns (1) - Strategy

book reference:

Gang-of-four: Design Patterns
Head first design patterns

resource reference:

Strategy Pattern:

Define a family of algorithms, encapsulate each one, and make them interchangeable. Strategy lets the algorithm vary independently from clients that use it.

The Context has-a behavior of Strategy, which can be implemented with different algorithms.
We define a private member in Context: a pointer to the general superclass Strategy.
We have a set_strategy method in Context class to change behavior at runtime.
We also have a superclass: Strategy and a bunch of subclass : ConcreteStrategy to implement the behavior.
In client code, we have one or multiple instances of Context.
At any given time, only one ConcreteStrategy is availabe to be executed a single Context object.

Strategy Pattern example:

#include <iostream>
 
using namespace std;
 
class StrategyInterface
{
    public:
        virtual void execute() = 0;
};
 
class ConcreteStrategyA: public StrategyInterface
{
    public:
        virtual void execute()
        {
            cout << "Called ConcreteStrategyA execute method" << endl;
        }
};
 
class ConcreteStrategyB: public StrategyInterface
{
    public:
        virtual void execute()
        {
            cout << "Called ConcreteStrategyB execute method" << endl;
        }
};
 
class ConcreteStrategyC: public StrategyInterface
{
    public:
        virtual void execute()
        {
            cout << "Called ConcreteStrategyC execute method" << endl;
        }
};
 
class Context
{
    private:
        StrategyInterface *_strategy;
 
    public:
        Context(StrategyInterface *strategy):_strategy(strategy)
        {
        }
        void set_strategy(StrategyInterface *strategy) 
        {
            _strategy = strategy;
        }
        void execute() 
        {
            _strategy->execute();
        }
};
 
int main(int argc, char *argv[])
{
    ConcreteStrategyA concreteStrategyA;
    ConcreteStrategyB concreteStrategyB;
    ConcreteStrategyC concreteStrategyC;

    Context contextA(&concreteStrategyA);
    Context contextB(&concreteStrategyB);
    Context contextC(&concreteStrategyC);
 
    contextA.execute();
    contextB.execute();
    contextC.execute();

    contextA.set_strategy(&concreteStrategyB);
    contextA.execute();
 
    return 0;
}